[med-svn] [trinityrnaseq] 01/03: Imported Upstream version 2.0.6+dfsg
Michael Crusoe
misterc-guest at moszumanska.debian.org
Sat Aug 22 01:46:32 UTC 2015
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to branch master
in repository trinityrnaseq.
commit 7558f2f368d1b221d7e6f0db580ea9902c243ebc
Author: Michael R. Crusoe <mcrusoe at msu.edu>
Date: Sun Mar 8 19:08:20 2015 -0400
Imported Upstream version 2.0.6+dfsg
---
Butterfly/jar-in-jar-loader.zip | Bin 7269 -> 0 bytes
Butterfly/src/src/PathExpressionComparator.java | 4 +-
Inchworm/configure | 6 +-
Inchworm/configure.ac | 6 +-
Trinity | 20 +--
util/align_and_estimate_abundance.pl | 51 +++++--
util/misc/SRA_to_fastq.pl | 36 ++---
util/misc/run_GSNAP.pl | 14 +-
util/misc/run_HISAT.pl | 173 ++++++++++++++++++++++++
9 files changed, 260 insertions(+), 50 deletions(-)
diff --git a/Butterfly/jar-in-jar-loader.zip b/Butterfly/jar-in-jar-loader.zip
deleted file mode 100644
index 6ee1217..0000000
Binary files a/Butterfly/jar-in-jar-loader.zip and /dev/null differ
diff --git a/Butterfly/src/src/PathExpressionComparator.java b/Butterfly/src/src/PathExpressionComparator.java
index 7ed59e9..a3a96fb 100644
--- a/Butterfly/src/src/PathExpressionComparator.java
+++ b/Butterfly/src/src/PathExpressionComparator.java
@@ -278,7 +278,9 @@ public class PathExpressionComparator implements Comparator<Object> {
return(transcript_to_fractional_expr.get(path));
}
else {
- System.err.println("WARNING: no expr value stored for path: " + path);
+ if (BFLY_GLOBALS.VERBOSE_LEVEL >= 10) {
+ System.err.println("WARNING: no expr value stored for path: " + path);
+ }
return(0f);
}
}
diff --git a/Inchworm/configure b/Inchworm/configure
index a43d96b..533ece1 100755
--- a/Inchworm/configure
+++ b/Inchworm/configure
@@ -3123,11 +3123,11 @@ fi
AM_CXXFLAGS=-m64
case $CXX in
- g++*) AM_CXXFLAGS="-pedantic -fopenmp -Wall -Wextra -Wno-long-long -Wno-deprecated $AM_CXXFLAGS"
+ g++*) AM_CXXFLAGS="-std=c++0x -pedantic -fopenmp -Wall -Wextra -Wno-deprecated $AM_CXXFLAGS"
;;
- sunCC*) AM_CXXFLAGS="-library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"
+ sunCC*) AM_CXXFLAGS="-std=c++0x -library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"
;;
- icpc*) AM_CXXFLAGS="-Wall -openmp $AM_CXXFLAGS"
+ icpc*) AM_CXXFLAGS="-std=c++0x -Wall -openmp $AM_CXXFLAGS"
;;
esac
diff --git a/Inchworm/configure.ac b/Inchworm/configure.ac
index 890fe68..9b14c77 100644
--- a/Inchworm/configure.ac
+++ b/Inchworm/configure.ac
@@ -5,9 +5,9 @@ AC_PROG_CXX
#AC_OPENMP # requires autoconf >= 2.62
AC_SUBST([AM_CXXFLAGS], [-m64])
case $CXX in
- g++*) AC_SUBST([AM_CXXFLAGS],["-pedantic -fopenmp -Wall -Wextra -Wno-long-long -Wno-deprecated $AM_CXXFLAGS"]);;
- sunCC*) AC_SUBST([AM_CXXFLAGS], ["-library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"]) ;;
- icpc*) AC_SUBST([AM_CXXFLAGS], ["-Wall -openmp $AM_CXXFLAGS"]) ;;
+ g++*) AC_SUBST([AM_CXXFLAGS],["-std=c++0x -pedantic -fopenmp -Wall -Wextra -Wno-deprecated $AM_CXXFLAGS"]);;
+ sunCC*) AC_SUBST([AM_CXXFLAGS], ["-std=c++0x -library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"]) ;;
+ icpc*) AC_SUBST([AM_CXXFLAGS], ["-std=c++0x -Wall -openmp $AM_CXXFLAGS"]) ;;
esac
AC_SEARCH_LIBS([cos], [m])
AC_CONFIG_HEADERS([config.h])
diff --git a/Trinity b/Trinity
index c27d31f..73f8264 100755
--- a/Trinity
+++ b/Trinity
@@ -17,7 +17,7 @@ use Pipeliner;
use Fasta_reader;
-my $VERSION = "v2.0.4";
+my $VERSION = "v2.0.6";
BEGIN {
@@ -227,11 +227,11 @@ my $basic_usage = qq^
# provied in Gb of RAM, ie. '--max_memory 10G'
#
# If paired reads:
-# --left <string> :left reads, one or more (separated by space)
-# --right <string> :right reads, one or more (separated by space)
+# --left <string> :left reads, one or more file names (separated by commas, no spaces)
+# --right <string> :right reads, one or more file names (separated by commas, no spaces)
#
# Or, if unpaired reads:
-# --single <string> :single reads, one or more (note, if single file contains pairs, can use flag: --run_as_paired )
+# --single <string> :single reads, one or more file names, comma-delimited (note, if single file contains pairs, can use flag: --run_as_paired )
#
####################################
## Misc: #########################
@@ -1751,9 +1751,9 @@ sub run_chrysalis {
sub run_recursive_trinity {
my ($reads_sorted_by_component_file) = @_;
- my $target_files_per_dir = 100;
- my $file_bins_per_dir = 10000;
-
+ my $target_files_per_dir = 100; # new Fb_\d/CBin_\d every (#components/100)
+ my $file_bins_per_dir = 1000 * $target_files_per_dir; # new Fb_\d dir
+
my $component_counter = 0;
my $prev_component_id = -1;
@@ -1782,10 +1782,13 @@ sub run_recursive_trinity {
}
my $readsfile = "$currdir/$base_dir/c$component_id.trinity.reads.fa";
+ $component_counter++;
+
open ($ofh, ">$readsfile") or die "Error, cannot write to file $readsfile";
print $ofh_read_filenames "$readsfile\n";
}
print $ofh "$read_name\n$read_seq\n";
+
}
close $ofh if $ofh;
close $ofh_read_filenames;
@@ -1799,8 +1802,7 @@ sub run_recursive_trinity {
exit(0);
}
-
-
+
if (! -e "recursive_trinity.cmds.ok") {
&write_trinity_partitioned_cmds($read_filenames, "recursive_trinity.cmds");
&process_cmd("touch recursive_trinity.cmds.ok");
diff --git a/util/align_and_estimate_abundance.pl b/util/align_and_estimate_abundance.pl
index c4ad94b..88e4f3a 100755
--- a/util/align_and_estimate_abundance.pl
+++ b/util/align_and_estimate_abundance.pl
@@ -21,6 +21,14 @@ my %aligner_params = ( 'bowtie_RSEM' => '--all --best --strata -m 300 --chunkmbs
'bowtie2_eXpress' => '--end-to-end',
+
+
+ 'bowtie_none' => '--all --best --strata -m 300 --chunkmbs 512',
+
+ 'bowtie2_none' => '--no-mixed --no-discordant --gbar 1000 --end-to-end',
+
+
+
);
my $rsem_add_opts = "";
@@ -255,7 +263,7 @@ elsif ($aln_method !~ /bowtie2?/) {
die "Error, --aln_method must be either 'bowtie' or 'bowtie2' ";
}
-unless ($est_method =~ /^(RSEM|eXpress)$/) {
+unless ($est_method =~ /^(RSEM|eXpress|none)$/) {
die "Error, --est_method 'RSEM' or 'eXpress' only, and capitalization matters. :) \n";
}
@@ -490,33 +498,46 @@ main: {
elsif ($est_method eq "RSEM") {
&run_RSEM($bam_file, $rsem_prefix, $output_prefix);
}
+ elsif ($est_method eq "none") {
+ print STDERR "Not running abundance estimation, stopping now after alignment.\n";
+ }
else {
die "Error, --est_method $est_method is not supported";
}
if ($coordsort_bam_flag) {
- my $sorted_bam_file = $bam_file;
- $sorted_bam_file =~ s/bam$/csorted/;
- if (! -e "$sorted_bam_file.bam.ok") {
- ## sort the bam file
-
- my $cmd = "samtools sort $bam_file $sorted_bam_file";
- &process_cmd($cmd);
- $cmd = "samtools index $sorted_bam_file.bam";
- &process_cmd($cmd);
-
- &process_cmd("touch $sorted_bam_file.bam.ok");
- }
+ &sort_bam_file($bam_file);
+
}
-
-
exit(0);
}
+
+####
+sub sort_bam_file {
+ my ($bam_file) = @_;
+ my $sorted_bam_file = $bam_file;
+ $sorted_bam_file =~ s/bam$/csorted/;
+ if (! -e "$sorted_bam_file.bam.ok") {
+ ## sort the bam file
+
+ my $cmd = "samtools sort $bam_file $sorted_bam_file";
+ &process_cmd($cmd);
+ $cmd = "samtools index $sorted_bam_file.bam";
+ &process_cmd($cmd);
+
+ &process_cmd("touch $sorted_bam_file.bam.ok");
+ }
+
+ return;
+}
+
+
+
####
sub run_eXpress {
my ($bam_file) = @_;
diff --git a/util/misc/SRA_to_fastq.pl b/util/misc/SRA_to_fastq.pl
index 849606d..fa66fb4 100755
--- a/util/misc/SRA_to_fastq.pl
+++ b/util/misc/SRA_to_fastq.pl
@@ -16,7 +16,7 @@ my $prefix;
my @sra_files = @ARGV;
-unless ($prefix) {
+unless (@ARGV) {
die $usage;
}
@@ -42,25 +42,29 @@ foreach my $sra_file (@sra_files) {
}
-my @final_cmds;
-my @tmp_files;
-for my $end ("1", "2") {
-
- my $cmd = "cat";
- foreach my $core_name (@core_names) {
+if ($prefix) {
+
+ my @final_cmds;
+ my @tmp_files;
+ for my $end ("1", "2") {
- my $file = "$core_name" . "_$end.fastq";
- $cmd .= " $file ";
- push (@tmp_files, $file);
+ my $cmd = "cat";
+ foreach my $core_name (@core_names) {
+
+ my $file = "$core_name" . "_$end.fastq";
+ $cmd .= " $file ";
+ push (@tmp_files, $file);
+ }
+ $cmd .= ">$prefix" . "_$end.fastq";
+ &process_cmd($cmd);
+ }
+
+ ## remove tmp files:
+ foreach my $file (@tmp_files) {
+ unlink($file);
}
- $cmd .= ">$prefix" . "_$end.fastq";
- &process_cmd($cmd);
}
-## remove tmp files:
-foreach my $file (@tmp_files) {
- unlink($file);
-}
exit(0);
diff --git a/util/misc/run_GSNAP.pl b/util/misc/run_GSNAP.pl
index 0681b7d..fdaea5a 100755
--- a/util/misc/run_GSNAP.pl
+++ b/util/misc/run_GSNAP.pl
@@ -21,11 +21,12 @@ my $usage = <<__EOUSAGE__;
#
# Optional:
# -N <int> number of top hits (default: 1)
-# -I <int> max intron length (default: 500000)
+# -I <int> max intron length (default: 1000000)
# -G <string> GTF file for incorporating reference splice site info.
# --CPU <int> number of threads (default: 2)
# --out_prefix <string> output prefix (default: gsnap)
# --no_sarray skip the sarray in the gmap-build
+# --proper_pairs_only require proper pairing of reads
#
#######################################################################
@@ -37,7 +38,7 @@ __EOUSAGE__
my ($genome, $reads);
-my $max_intron = 500000;
+my $max_intron = 1000000;
my $CPU = 2;
my $help_flag;
@@ -46,6 +47,7 @@ my $num_top_hits = 1;
my $out_prefix = "gsnap";
my $gtf_file;
my $no_sarray = "";
+my $proper_pairs_only_flag = 0;
&GetOptions( 'h' => \$help_flag,
'genome=s' => \$genome,
@@ -56,6 +58,7 @@ my $no_sarray = "";
'out_prefix=s' => \$out_prefix,
'G=s' => \$gtf_file,
'no_sarray' => \$no_sarray,
+ 'proper_pairs_only' => \$proper_pairs_only_flag,
);
@@ -108,7 +111,12 @@ main: {
$reads = &add_zcat_fifo($reads);
- my $cmd = "bash -c \"set -o pipefail && gsnap -D $genomeBaseDir -d $genomeDir -A sam -N 1 -w $max_intron $gsnap_use_sarray -n $num_top_hits -t $CPU $reads $splice_param | samtools view -bS -F 4 - | samtools sort -@ $CPU - $out_prefix.cSorted \"";
+ my $require_proper_pairs = "";
+ if ($proper_pairs_only_flag) {
+ $require_proper_pairs = " -f 2 ";
+ }
+
+ my $cmd = "bash -c \"set -o pipefail && gsnap -D $genomeBaseDir -d $genomeDir -A sam -N 1 -w $max_intron $gsnap_use_sarray -n $num_top_hits -t $CPU $reads $splice_param | samtools view -bS -F 4 $require_proper_pairs - | samtools sort -@ $CPU - $out_prefix.cSorted \"";
&process_cmd($cmd);
if (-s "$out_prefix.cSorted.bam") {
diff --git a/util/misc/run_HISAT.pl b/util/misc/run_HISAT.pl
new file mode 100755
index 0000000..5e6a69b
--- /dev/null
+++ b/util/misc/run_HISAT.pl
@@ -0,0 +1,173 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use FindBin;
+use File::Basename;
+use Cwd;
+
+use Carp;
+use Getopt::Long qw(:config no_ignore_case bundling pass_through);
+
+my $HISAT_HOME = $ENV{HISAT_HOME} or die "Error, need env var HISAT_HOME set to the HISAT installation directory.\n\n";
+
+
+
+my $usage = <<__EOUSAGE__;
+
+######################################################################
+#
+# Required:
+# --genome <string> target genome to align to
+# --reads <string> fastq files. If pairs, indicate both in quotes, ie. "left.fq right.fq"
+#
+# Optional:
+# -N <int> max number of alignments to report. (default: 1)
+# -G <string> GTF file for incorporating reference splice site info.
+# --CPU <int> number of threads (default: 2)
+# --out_prefix <string> output prefix (default: hisat)
+# --run_as_single_reads if paired, run as single reads
+#
+#######################################################################
+
+
+__EOUSAGE__
+
+ ;
+
+
+my ($genome, $reads);
+
+my $CPU = 2;
+
+my $help_flag;
+
+my $out_prefix = "hisat";
+my $gtf_file;
+my $run_as_single_flag = 0;
+my $num_top_hits = 1;
+
+&GetOptions( 'h' => \$help_flag,
+ 'genome=s' => \$genome,
+ 'reads=s' => \$reads,
+ 'CPU=i' => \$CPU,
+ 'out_prefix=s' => \$out_prefix,
+ 'G=s' => \$gtf_file,
+ 'run_as_single_reads' => \$run_as_single_flag,
+ 'N=i' => \$num_top_hits,
+ );
+
+
+unless ($genome && $reads) {
+ die $usage;
+}
+
+
+main: {
+
+ my $hisat_index = "$genome.hisat.idx";
+ if (! -s "$hisat_index.1.bt2") {
+ ## build hisat index
+
+ my $cmd = "$HISAT_HOME/hisat-build $genome $hisat_index";
+ &process_cmd($cmd);
+ }
+
+ my $gtf_splice = "$gtf_file.hisat.splice";
+ my $splice_incl = "";
+
+ if ($gtf_file) {
+
+ unless (-s $gtf_splice) {
+ my $cmd = "$HISAT_HOME/extract_splice_sites.py $gtf_file > $gtf_file.hisat.splice";
+ &process_cmd($cmd);
+ }
+
+ $splice_incl = " --known-splicesite-infile $gtf_splice ";
+ }
+
+ ## run HISAT
+
+ $reads = &add_zcat_fifo_and_add_hisat_params($reads);
+
+ my $top_hits_count = "";
+ if ($num_top_hits > 1) {
+ $top_hits_count = " -k $num_top_hits ";
+ }
+
+ my $cmd = "bash -c \"set -o pipefail && $HISAT_HOME/hisat -x $hisat_index -q $reads $splice_incl -p $CPU $top_hits_count @ARGV | samtools view -@ $CPU -F 4 -Sb - | samtools sort -@ $CPU -o - - > $out_prefix.cSorted.bam \"";
+
+ &process_cmd($cmd);
+
+ if (-s "$out_prefix.cSorted.bam") {
+ $cmd = "samtools index $out_prefix.cSorted.bam";
+ &process_cmd($cmd);
+ }
+
+ exit(0);
+}
+
+
+####
+sub add_zcat_fifo_and_add_hisat_params {
+ my ($reads) = @_;
+
+ $reads =~ s/^\s+|\s+$//g;
+
+ my @adj_reads_list;
+
+ my $counter = 0;
+ my @read_files = split(/\s+/, $reads);
+
+ my @updated_read_filenames;
+
+ foreach my $reads_file (@read_files) {
+
+ $counter++;
+
+ if ($reads_file =~ /\.gz$/) {
+ $reads_file = "<(zcat $reads_file)";
+ }
+
+ push (@updated_read_filenames, $reads_file);
+
+ # add decoration
+ $reads_file = (scalar(@read_files) == 2) ? "-$counter $reads_file" : "-U $reads_file";
+
+ push (@adj_reads_list, $reads_file);
+ }
+
+ if ($run_as_single_flag) {
+ return("-U " . join(",", @updated_read_filenames));
+ }
+ else {
+
+
+ my $adj_reads = join(" ", @adj_reads_list);
+
+ return($adj_reads);
+ }
+}
+
+
+
+
+
+####
+sub process_cmd {
+ my ($cmd) = @_;
+
+ print STDERR "CMD: $cmd\n";
+ #return;
+
+ my $ret = system($cmd);
+ if ($ret) {
+ die "Error, cmd: $cmd died with ret ($ret)";
+ }
+
+ return;
+}
+
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/trinityrnaseq.git
More information about the debian-med-commit
mailing list