[med-svn] [trinityrnaseq] 01/03: Imported Upstream version 2.0.6+dfsg

Michael Crusoe misterc-guest at moszumanska.debian.org
Sat Aug 22 01:46:32 UTC 2015


This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository trinityrnaseq.

commit 7558f2f368d1b221d7e6f0db580ea9902c243ebc
Author: Michael R. Crusoe <mcrusoe at msu.edu>
Date:   Sun Mar 8 19:08:20 2015 -0400

    Imported Upstream version 2.0.6+dfsg
---
 Butterfly/jar-in-jar-loader.zip                 | Bin 7269 -> 0 bytes
 Butterfly/src/src/PathExpressionComparator.java |   4 +-
 Inchworm/configure                              |   6 +-
 Inchworm/configure.ac                           |   6 +-
 Trinity                                         |  20 +--
 util/align_and_estimate_abundance.pl            |  51 +++++--
 util/misc/SRA_to_fastq.pl                       |  36 ++---
 util/misc/run_GSNAP.pl                          |  14 +-
 util/misc/run_HISAT.pl                          | 173 ++++++++++++++++++++++++
 9 files changed, 260 insertions(+), 50 deletions(-)

diff --git a/Butterfly/jar-in-jar-loader.zip b/Butterfly/jar-in-jar-loader.zip
deleted file mode 100644
index 6ee1217..0000000
Binary files a/Butterfly/jar-in-jar-loader.zip and /dev/null differ
diff --git a/Butterfly/src/src/PathExpressionComparator.java b/Butterfly/src/src/PathExpressionComparator.java
index 7ed59e9..a3a96fb 100644
--- a/Butterfly/src/src/PathExpressionComparator.java
+++ b/Butterfly/src/src/PathExpressionComparator.java
@@ -278,7 +278,9 @@ public class PathExpressionComparator implements Comparator<Object> {
 			return(transcript_to_fractional_expr.get(path));
 		}
 		else {
-			System.err.println("WARNING: no expr value stored for path: " + path);
+			if (BFLY_GLOBALS.VERBOSE_LEVEL >= 10) {
+				System.err.println("WARNING: no expr value stored for path: " + path);
+			}
 			return(0f);
 		}
 	}
diff --git a/Inchworm/configure b/Inchworm/configure
index a43d96b..533ece1 100755
--- a/Inchworm/configure
+++ b/Inchworm/configure
@@ -3123,11 +3123,11 @@ fi
 AM_CXXFLAGS=-m64
 
 case $CXX in
-  g++*) AM_CXXFLAGS="-pedantic -fopenmp -Wall -Wextra -Wno-long-long -Wno-deprecated $AM_CXXFLAGS"
+  g++*) AM_CXXFLAGS="-std=c++0x -pedantic -fopenmp -Wall -Wextra -Wno-deprecated $AM_CXXFLAGS"
 ;;
-  sunCC*) AM_CXXFLAGS="-library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"
+  sunCC*) AM_CXXFLAGS="-std=c++0x -library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"
  ;;
-  icpc*) AM_CXXFLAGS="-Wall -openmp $AM_CXXFLAGS"
+  icpc*) AM_CXXFLAGS="-std=c++0x -Wall -openmp $AM_CXXFLAGS"
  ;;
 esac
 
diff --git a/Inchworm/configure.ac b/Inchworm/configure.ac
index 890fe68..9b14c77 100644
--- a/Inchworm/configure.ac
+++ b/Inchworm/configure.ac
@@ -5,9 +5,9 @@ AC_PROG_CXX
 #AC_OPENMP # requires autoconf >= 2.62
 AC_SUBST([AM_CXXFLAGS], [-m64])
 case $CXX in
-  g++*) AC_SUBST([AM_CXXFLAGS],["-pedantic -fopenmp -Wall -Wextra -Wno-long-long -Wno-deprecated $AM_CXXFLAGS"]);;
-  sunCC*) AC_SUBST([AM_CXXFLAGS], ["-library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"]) ;;
-  icpc*) AC_SUBST([AM_CXXFLAGS], ["-Wall -openmp $AM_CXXFLAGS"]) ;;
+  g++*) AC_SUBST([AM_CXXFLAGS],["-std=c++0x -pedantic -fopenmp -Wall -Wextra -Wno-deprecated $AM_CXXFLAGS"]);;
+  sunCC*) AC_SUBST([AM_CXXFLAGS], ["-std=c++0x -library=stlport4 -xopenmp -xvpara -fast $AM_CXXFLAGS"]) ;;
+  icpc*) AC_SUBST([AM_CXXFLAGS], ["-std=c++0x -Wall -openmp $AM_CXXFLAGS"]) ;;
 esac
 AC_SEARCH_LIBS([cos], [m])
 AC_CONFIG_HEADERS([config.h])
diff --git a/Trinity b/Trinity
index c27d31f..73f8264 100755
--- a/Trinity
+++ b/Trinity
@@ -17,7 +17,7 @@ use Pipeliner;
 use Fasta_reader;
 
 
-my $VERSION = "v2.0.4"; 
+my $VERSION = "v2.0.6"; 
 
 
 BEGIN {
@@ -227,11 +227,11 @@ my $basic_usage = qq^
 #                            provied in Gb of RAM, ie.  '--max_memory 10G'
 #
 #  If paired reads:
-#      --left  <string>    :left reads, one or more (separated by space)
-#      --right <string>    :right reads, one or more (separated by space)
+#      --left  <string>    :left reads, one or more file names (separated by commas, no spaces)
+#      --right <string>    :right reads, one or more file names (separated by commas, no spaces)
 #
 #  Or, if unpaired reads:
-#      --single <string>   :single reads, one or more (note, if single file contains pairs, can use flag: --run_as_paired )
+#      --single <string>   :single reads, one or more file names, comma-delimited (note, if single file contains pairs, can use flag: --run_as_paired )
 #
 ####################################
 ##  Misc:  #########################
@@ -1751,9 +1751,9 @@ sub run_chrysalis {
 sub run_recursive_trinity {
     my ($reads_sorted_by_component_file) = @_;
 
-    my $target_files_per_dir = 100;
-    my $file_bins_per_dir = 10000;
-
+    my $target_files_per_dir = 100; # new Fb_\d/CBin_\d every (#components/100)
+    my $file_bins_per_dir = 1000 * $target_files_per_dir; # new Fb_\d dir 
+    
     my $component_counter = 0;
     
     my $prev_component_id = -1;
@@ -1782,10 +1782,13 @@ sub run_recursive_trinity {
                 }
                 my $readsfile = "$currdir/$base_dir/c$component_id.trinity.reads.fa";
                 
+                $component_counter++;
+                                
                 open ($ofh, ">$readsfile") or die "Error, cannot write to file $readsfile";
                 print $ofh_read_filenames "$readsfile\n";
             }
             print $ofh "$read_name\n$read_seq\n";
+        
         }
         close $ofh if $ofh;
         close $ofh_read_filenames;
@@ -1799,8 +1802,7 @@ sub run_recursive_trinity {
         exit(0);
     }
     
-
-
+        
     if (! -e "recursive_trinity.cmds.ok") {
         &write_trinity_partitioned_cmds($read_filenames, "recursive_trinity.cmds");
         &process_cmd("touch recursive_trinity.cmds.ok");
diff --git a/util/align_and_estimate_abundance.pl b/util/align_and_estimate_abundance.pl
index c4ad94b..88e4f3a 100755
--- a/util/align_and_estimate_abundance.pl
+++ b/util/align_and_estimate_abundance.pl
@@ -21,6 +21,14 @@ my %aligner_params = ( 'bowtie_RSEM' => '--all --best --strata -m 300 --chunkmbs
                        
                        'bowtie2_eXpress' => '--end-to-end',
                        
+                       
+
+                       'bowtie_none' => '--all --best --strata -m 300 --chunkmbs 512',
+                       
+                       'bowtie2_none' => '--no-mixed --no-discordant --gbar 1000 --end-to-end', 
+                       
+                       
+
     );
 
 my $rsem_add_opts = "";
@@ -255,7 +263,7 @@ elsif ($aln_method !~ /bowtie2?/) {
     die "Error, --aln_method must be either 'bowtie' or 'bowtie2' ";
 }
 
-unless ($est_method =~ /^(RSEM|eXpress)$/) {
+unless ($est_method =~ /^(RSEM|eXpress|none)$/) {
     die "Error, --est_method  'RSEM' or 'eXpress' only, and capitalization matters. :) \n";
 }
 
@@ -490,33 +498,46 @@ main: {
     elsif ($est_method eq "RSEM") {
         &run_RSEM($bam_file, $rsem_prefix, $output_prefix);
     }
+    elsif ($est_method eq "none") {
+        print STDERR "Not running abundance estimation, stopping now after alignment.\n";
+    }
     else {
         die "Error, --est_method $est_method is not supported";
     }
     
     if ($coordsort_bam_flag) {
         
-        my $sorted_bam_file = $bam_file;
-        $sorted_bam_file =~ s/bam$/csorted/;
-        if (! -e "$sorted_bam_file.bam.ok") {
-            ## sort the bam file
-            
-            my $cmd = "samtools sort $bam_file $sorted_bam_file";
-            &process_cmd($cmd);
-            $cmd = "samtools index $sorted_bam_file.bam";
-            &process_cmd($cmd);
-            
-            &process_cmd("touch $sorted_bam_file.bam.ok");
-        }
+        &sort_bam_file($bam_file);
+        
     }
     
-
-    
     exit(0);
     
 }
 
 
+
+####
+sub sort_bam_file {
+    my ($bam_file) = @_;
+    my $sorted_bam_file = $bam_file;
+    $sorted_bam_file =~ s/bam$/csorted/;
+    if (! -e "$sorted_bam_file.bam.ok") {
+        ## sort the bam file
+        
+        my $cmd = "samtools sort $bam_file $sorted_bam_file";
+        &process_cmd($cmd);
+        $cmd = "samtools index $sorted_bam_file.bam";
+        &process_cmd($cmd);
+        
+        &process_cmd("touch $sorted_bam_file.bam.ok");
+    }
+
+    return;
+}
+
+
+
 ####
 sub run_eXpress {
     my ($bam_file) = @_;
diff --git a/util/misc/SRA_to_fastq.pl b/util/misc/SRA_to_fastq.pl
index 849606d..fa66fb4 100755
--- a/util/misc/SRA_to_fastq.pl
+++ b/util/misc/SRA_to_fastq.pl
@@ -16,7 +16,7 @@ my $prefix;
 
 my @sra_files = @ARGV;
 
-unless ($prefix) {
+unless (@ARGV) {
     die $usage;
 }
 
@@ -42,25 +42,29 @@ foreach my $sra_file (@sra_files) {
     
 }
 
-my @final_cmds;
-my @tmp_files;
-for my $end ("1", "2") {
-
-    my $cmd = "cat";
-    foreach my $core_name (@core_names) {
+if ($prefix) {
+    
+    my @final_cmds;
+    my @tmp_files;
+    for my $end ("1", "2") {
         
-        my $file = "$core_name" . "_$end.fastq";
-        $cmd .= " $file ";
-        push (@tmp_files, $file);
+        my $cmd = "cat";
+        foreach my $core_name (@core_names) {
+            
+            my $file = "$core_name" . "_$end.fastq";
+            $cmd .= " $file ";
+            push (@tmp_files, $file);
+        }
+        $cmd .= ">$prefix" . "_$end.fastq";
+        &process_cmd($cmd);
+    }
+    
+    ## remove tmp files:
+    foreach my $file (@tmp_files) {
+        unlink($file);
     }
-    $cmd .= ">$prefix" . "_$end.fastq";
-    &process_cmd($cmd);
 }
 
-## remove tmp files:
-foreach my $file (@tmp_files) {
-    unlink($file);
-}
 
 exit(0);
 
diff --git a/util/misc/run_GSNAP.pl b/util/misc/run_GSNAP.pl
index 0681b7d..fdaea5a 100755
--- a/util/misc/run_GSNAP.pl
+++ b/util/misc/run_GSNAP.pl
@@ -21,11 +21,12 @@ my $usage = <<__EOUSAGE__;
 #
 #  Optional:
 #  -N <int>                    number of top hits (default: 1)
-#  -I <int>                    max intron length (default: 500000)
+#  -I <int>                    max intron length (default: 1000000)
 #  -G <string>                 GTF file for incorporating reference splice site info.
 #  --CPU <int>                 number of threads (default: 2)
 #  --out_prefix <string>       output prefix (default: gsnap)
 #  --no_sarray                 skip the sarray in the gmap-build 
+#  --proper_pairs_only         require proper pairing of reads
 #
 #######################################################################
 
@@ -37,7 +38,7 @@ __EOUSAGE__
 
 my ($genome, $reads);
 
-my $max_intron = 500000;
+my $max_intron = 1000000;
 my $CPU = 2;
 
 my $help_flag;
@@ -46,6 +47,7 @@ my $num_top_hits = 1;
 my $out_prefix = "gsnap";
 my $gtf_file;
 my $no_sarray = "";
+my $proper_pairs_only_flag = 0;
 
 &GetOptions( 'h' => \$help_flag,
              'genome=s' => \$genome,
@@ -56,6 +58,7 @@ my $no_sarray = "";
              'out_prefix=s' => \$out_prefix,
              'G=s' => \$gtf_file,
              'no_sarray' => \$no_sarray,
+             'proper_pairs_only' => \$proper_pairs_only_flag,
     );
 
 
@@ -108,7 +111,12 @@ main: {
 
     $reads = &add_zcat_fifo($reads);
 
-    my $cmd = "bash -c \"set -o pipefail && gsnap -D $genomeBaseDir -d $genomeDir -A sam -N 1 -w $max_intron $gsnap_use_sarray -n $num_top_hits -t $CPU $reads $splice_param | samtools view -bS -F 4 - | samtools sort -@ $CPU - $out_prefix.cSorted \"";
+    my $require_proper_pairs = "";
+    if ($proper_pairs_only_flag) {
+        $require_proper_pairs = " -f 2 ";
+    }
+
+    my $cmd = "bash -c \"set -o pipefail && gsnap -D $genomeBaseDir -d $genomeDir -A sam -N 1 -w $max_intron $gsnap_use_sarray -n $num_top_hits -t $CPU $reads $splice_param | samtools view -bS -F 4 $require_proper_pairs - | samtools sort -@ $CPU - $out_prefix.cSorted \"";
     &process_cmd($cmd);
 
     if (-s "$out_prefix.cSorted.bam") {
diff --git a/util/misc/run_HISAT.pl b/util/misc/run_HISAT.pl
new file mode 100755
index 0000000..5e6a69b
--- /dev/null
+++ b/util/misc/run_HISAT.pl
@@ -0,0 +1,173 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use FindBin;
+use File::Basename;
+use Cwd;
+
+use Carp;
+use Getopt::Long qw(:config no_ignore_case bundling pass_through);
+
+my $HISAT_HOME = $ENV{HISAT_HOME} or die "Error, need env var HISAT_HOME set to the HISAT installation directory.\n\n";
+ 
+
+
+my $usage = <<__EOUSAGE__;
+
+######################################################################
+#
+#  Required:
+#  --genome <string>           target genome to align to
+#  --reads  <string>           fastq files. If pairs, indicate both in quotes, ie. "left.fq right.fq"
+#
+#  Optional:
+#  -N <int>                    max number of alignments to report. (default: 1)
+#  -G <string>                 GTF file for incorporating reference splice site info.
+#  --CPU <int>                 number of threads (default: 2)
+#  --out_prefix <string>       output prefix (default: hisat)
+#  --run_as_single_reads       if paired, run as single reads
+#
+#######################################################################
+
+
+__EOUSAGE__
+
+    ;
+
+
+my ($genome, $reads);
+
+my $CPU = 2;
+
+my $help_flag;
+
+my $out_prefix = "hisat";
+my $gtf_file;
+my $run_as_single_flag = 0;
+my $num_top_hits = 1;
+
+&GetOptions( 'h' => \$help_flag,
+             'genome=s' => \$genome,
+             'reads=s' => \$reads,
+             'CPU=i' => \$CPU,
+             'out_prefix=s' => \$out_prefix,
+             'G=s' => \$gtf_file,
+             'run_as_single_reads' => \$run_as_single_flag,
+             'N=i' => \$num_top_hits,
+    );
+
+
+unless ($genome && $reads) {
+    die $usage;
+}
+
+
+main: {
+	
+    my $hisat_index = "$genome.hisat.idx";
+    if (! -s "$hisat_index.1.bt2") {
+        ## build hisat index
+
+        my $cmd = "$HISAT_HOME/hisat-build $genome $hisat_index";
+        &process_cmd($cmd);
+    }
+
+    my $gtf_splice = "$gtf_file.hisat.splice";
+    my $splice_incl = "";
+    
+    if ($gtf_file) {
+
+        unless (-s $gtf_splice) {
+            my $cmd = "$HISAT_HOME/extract_splice_sites.py $gtf_file > $gtf_file.hisat.splice";
+            &process_cmd($cmd);
+        }
+        
+        $splice_incl = " --known-splicesite-infile $gtf_splice ";
+    }
+    
+    ## run HISAT
+    
+    $reads = &add_zcat_fifo_and_add_hisat_params($reads);
+    
+    my $top_hits_count = "";
+    if ($num_top_hits > 1) {
+        $top_hits_count = " -k $num_top_hits ";
+    }
+    
+    my $cmd = "bash -c \"set -o pipefail && $HISAT_HOME/hisat -x $hisat_index -q $reads $splice_incl -p $CPU $top_hits_count @ARGV | samtools view -@ $CPU -F 4 -Sb - | samtools sort -@ $CPU -o - - > $out_prefix.cSorted.bam \"";
+    
+    &process_cmd($cmd);
+
+    if (-s "$out_prefix.cSorted.bam") {
+        $cmd = "samtools index $out_prefix.cSorted.bam";
+        &process_cmd($cmd);
+    }
+    
+	exit(0);
+}
+
+
+####
+sub add_zcat_fifo_and_add_hisat_params {
+    my ($reads) = @_;
+
+    $reads =~ s/^\s+|\s+$//g;
+    
+    my @adj_reads_list;
+
+    my $counter = 0;
+    my @read_files = split(/\s+/, $reads);
+
+    my @updated_read_filenames;
+    
+    foreach my $reads_file (@read_files) {
+        
+        $counter++;
+        
+        if ($reads_file =~ /\.gz$/) {
+            $reads_file = "<(zcat $reads_file)";
+        }
+       
+        push (@updated_read_filenames, $reads_file);
+
+        # add decoration 
+        $reads_file = (scalar(@read_files) == 2) ? "-$counter $reads_file" : "-U $reads_file";
+
+        push (@adj_reads_list, $reads_file);
+    }
+    
+    if ($run_as_single_flag) {
+        return("-U " . join(",", @updated_read_filenames));
+    }
+    else {
+        
+        
+        my $adj_reads = join(" ", @adj_reads_list);
+        
+        return($adj_reads);
+    }
+}
+
+    
+
+
+
+####
+sub process_cmd {
+	my ($cmd) = @_;
+	
+	print STDERR "CMD: $cmd\n";
+	#return;
+
+	my $ret = system($cmd);
+	if ($ret) {
+		die "Error, cmd: $cmd died with ret ($ret)";
+	}
+
+	return;
+}
+
+
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/trinityrnaseq.git



More information about the debian-med-commit mailing list