[med-svn] [Git][med-team/trinityrnaseq][upstream] New upstream version 2.14.0+dfsg

Michael R. Crusoe (@crusoe) gitlab at salsa.debian.org
Sun Apr 10 17:42:27 BST 2022



Michael R. Crusoe pushed to branch upstream at Debian Med / trinityrnaseq


Commits:
9773f0b9 by Michael R. Crusoe at 2022-04-10T17:59:31+02:00
New upstream version 2.14.0+dfsg
- - - - -


27 changed files:

- Analysis/DifferentialExpression/run_GOseq.pl
- Analysis/FL_reconstruction_analysis/FL_trans_analysis_pipeline.pl
- Analysis/FL_reconstruction_analysis/util/blat_full_length_mappings.pl
- Changelog.txt
- Docker/VERSION.txt
- Docker/cleanMe.sh
- Makefile
- PerlLib/CDNA/PASA_alignment_assembler.pm
- PerlLib/COMMON.pm
- Trinity
- sample_data/Makefile
- trinity-plugins/Makefile
- trinity-plugins/ParaFly/Makefile
- trinity-plugins/ParaFly/config.log
- trinity-plugins/ParaFly/config.status
- trinity-plugins/ParaFly/src/Makefile
- trinity-plugins/scaffold_iworm_contigs/Makefile
- util/align_and_estimate_abundance.pl
- util/misc/SRA_to_fastq.pl
- util/misc/blat_util/blat_to_sam.pl
- util/misc/run_HISAT.pl
- util/misc/run_STAR.pl
- util/misc/run_STAR_via_samples_file.pl
- + util/misc/validate_fastqs.py
- util/retrieve_sequences_from_fasta.pl
- util/support_scripts/bowtie2_wrapper.pl
- util/support_scripts/prep_rnaseq_alignments_for_genome_assisted_assembly.pl


Changes:

=====================================
Analysis/DifferentialExpression/run_GOseq.pl
=====================================
@@ -184,6 +184,13 @@ main: {
     
     print $ofh "    descr = unlist(lapply(result_table\$category, get_GO_term_descr))\n";
     print $ofh "    result_table\$go_term = descr;\n";
+   
+    print $ofh "    result_table\$gene_ids = do.call(rbind, lapply(result_table\$category, function(x) { \n" .
+               "            gene_list = GO_to_gene_list[[x]]\n" .
+               "            gene_list = gene_list[gene_list %in% gene_ids_in_feature_cat]\n" .
+               "            paste(gene_list, collapse=', ');\n" .
+               "     }) )\n";
+
     print $ofh "    write.table(result_table[order(result_table\$under_represented_pvalue),], file=go_depleted_filename, sep='\t', quote=F, row.names=F)\n";
     
             


=====================================
Analysis/FL_reconstruction_analysis/FL_trans_analysis_pipeline.pl
=====================================
@@ -92,7 +92,7 @@ my $util_dir = "$FindBin::RealBin/util";
 my $found_all_tools = 1;
 my @required_tools = qw (blat slclust);
 foreach my $tool (@required_tools) {
-    my $path = `which $tool`;
+    my $path = `sh -c "command -v $tool"`;
     unless ($path =~ /\w/) {
         print STDERR "Error, cannot locate required tool: $tool\n";
         $found_all_tools = 0;


=====================================
Analysis/FL_reconstruction_analysis/util/blat_full_length_mappings.pl
=====================================
@@ -103,7 +103,7 @@ main: {
     
 	my $blat_output = "$out_prefix.pslx";
 	
-    my  $blat_prog = `which blat`;
+    my  $blat_prog = `sh -c "command -v blat"`;
     chomp $blat_prog;
     unless ($blat_prog) {
         die "Error, cannot find a blat program in your path.\n";


=====================================
Changelog.txt
=====================================
@@ -1,3 +1,13 @@
+# Trinity-v2.14.0 Mar 11, 2022
+- bugfix for the rarely occurring butterfly error:  "after topo sort, still have edge unaccounted for: Edge..."
+- setting max value for max_mem to 200G to avoid potential problems
+- adding validate_fastqs.py
+- setting an imposed absolute min contig length parameter setting to 100
+- exit zero on version check
+- updates from M. Crusoe to make system
+- updated bowtie2 and samtools sort command
+- added trinity ids for the goseq depleted files
+
 
 # Trinity-v2.13.2 Sep 4, 2021
 -bugfix - Trinity-GG final output files werent being written in v2.13, so restored here under v2.13.2, and integrated into the test regression suite to check in future releases.


=====================================
Docker/VERSION.txt
=====================================
@@ -1 +1 @@
-2.13.2
+2.14.0


=====================================
Docker/cleanMe.sh
=====================================
@@ -1,3 +1,5 @@
 sudo rm -rf ./trinity_out_dir
 sudo rm -rf ./trinity_ext_sample_data_docker
 sudo rm -rf ./trinity_ext_sample_data_singularity
+sudo rm -rf ./trinity_ext_sample_data
+sudo rm -rf ./trinity_out_dir_docker


=====================================
Makefile
=====================================
@@ -60,19 +60,19 @@ clean:
 
 
 test_trinity:
-	cd sample_data/test_Trinity_Assembly && make test
+	cd sample_data/test_Trinity_Assembly && $(MAKE) test
 
 
 # note 'test_all': ** this is for a more advanced installation including devel features **
 
 test_all:
-	cd sample_data/ && make test_all
+	cd sample_data/ && $(MAKE) test_all
 	./__pull_trinity_ext_sample_data.sh
-	cd trinity_ext_sample_data/ && make test
+	cd trinity_ext_sample_data/ && $(MAKE) test
 
 test_clean:
-	cd sample_data/ && make clean
-	cd trinity_ext_sample_data/ && make clean
+	cd sample_data/ && $(MAKE) clean
+	cd trinity_ext_sample_data/ && $(MAKE) clean
 
 ###################################################################
 


=====================================
PerlLib/CDNA/PASA_alignment_assembler.pm
=====================================
@@ -60,7 +60,7 @@ sub _init {
     $self->{assemblies} = []; #contains list of all singletons and assemblies.
     $self->{fuzzlength} = $FUZZLENGTH;  #default setting.
     
-    my $pasa_bin = `which pasa`;
+    my $pasa_bin = `sh -c "command -v pasa"`;
     $pasa_bin =~ s/\s//g;
     
     unless (-x $pasa_bin) {


=====================================
PerlLib/COMMON.pm
=====================================
@@ -13,7 +13,7 @@ sub get_sort_exec {
     # check it like so:
     #  perl -MCOMMON -e 'print COMMON::get_sort_exec(4);'
 
-    my $sort_exec = `which sort`;
+    my $sort_exec = `sh -c "command -v sort"`;
     unless ($sort_exec =~ /\w/) {
         confess "Error, cannot find sort utility";
     }


=====================================
Trinity
=====================================
@@ -21,10 +21,13 @@ use List::Util qw(min max);
 use Data::Dumper;
 
 
-my $VERSION = "Trinity-v2.13.2";
+my $VERSION = "Trinity-v2.14.0";
 #my $VERSION = "__BLEEDING_EDGE__"; 
 
 
+my $ABSOLUTE_MIN_CONTIG_LENGTH = 100;  # going shorter might lead to making too many intermediate files during the run.
+
+
 BEGIN {
 
     $ENV{TRINITY_HOME} = "$FindBin::RealBin";
@@ -276,8 +279,6 @@ $trinity_banner
 ####################################
 ##  Misc:  #########################
 #
-#  --include_supertranscripts      :yield supertranscripts fasta and gtf files as outputs.
-#
 #  --SS_lib_type <string>          :Strand-specific RNA-Seq read orientation.
 #                                   if paired: RF or FR,
 #                                   if single: F or R.   (dUTP method = RF)
@@ -285,7 +286,7 @@ $trinity_banner
 #
 #  --CPU <int>                     :number of CPUs to use, default: $CPU
 #  --min_contig_length <int>       :minimum assembled contig length to report
-#                                   (def=$min_contig_length)
+#                                   (def=$min_contig_length, must be >= $ABSOLUTE_MIN_CONTIG_LENGTH)
 #
 #  --long_reads <string>           :fasta file containing error-corrected or circular consensus (CCS) pac bio reads
 #                                   (** note: experimental parameter **, this functionality continues to be under development)
@@ -938,9 +939,15 @@ if ($help_flag) {
 if ($show_version_flag) {
     &version_check();
     
-    exit(1);
+    exit(0);
+}
+
+
+if ($min_contig_length < $ABSOLUTE_MIN_CONTIG_LENGTH) {
+    die "sorry, min contig length set at $min_contig_length is below our imposed threshold of $ABSOLUTE_MIN_CONTIG_LENGTH and might lead to undesirably long runtimes and numbers of transcript clusters to pursue (and number of intermediate files generated).";
 }
 
+
 ## basic options check:
 unless ($max_memory &&
 
@@ -957,7 +964,7 @@ unless ($max_memory &&
 ## make sure properly installed
 {
     foreach my $trinity_tool ("ParaFly", "seqtk-trinity") {
-        my $loc = `which $trinity_tool`;
+        my $loc = `sh -c "command -v $trinity_tool"`;
         unless ($loc =~ /\w/) {
             die "\n\n\tError, cannot locate Trinity-specific tool: $trinity_tool in the PATH setting: $ENV{PATH},  be sure to install Trinity by running 'make' in the base installation directory\n\n";
         }
@@ -1106,6 +1113,15 @@ if ($min_iso_ratio > 1) {
     die "Error, --min_iso_ratio should be <= 1 \n";
 }
 
+if ($max_memory =~ /^(\d+)G/) {
+    my $mem_val = $1;
+    if ($mem_val > 200) {
+        print STDERR "-shouldn't require more than 200G RAM, so resetting max memory suggestion value to 200G to avoid potential problems.\n";
+        $max_memory = "200G";
+    }
+}
+
+
 ## keep the original 'xG' format string for the --JM option, then calculate the numerical value for jellyfish_ram
 my $jellyfish_ram = $max_memory;    ## this one is used in the Chrysalis exec string
 if ($jellyfish_ram) {
@@ -2049,8 +2065,8 @@ sub run_chrysalis {
 
             my $bowtie2_scoring = "G,20,8"; # default bowtie2 params for local alignment
                         
-            $cmd = "bash -c \" set -o pipefail;$bowtie2_path --local -k 2 --no-unal --threads $CPU -f --score-min $bowtie2_scoring -x $iworm_min100_fa_file $bowtie_reads_fa  | samtools view $PARALLEL_SAMTOOLS_SORT_TOKEN -F4 -Sb - | samtools sort -m $samtools_max_memory $PARALLEL_SAMTOOLS_SORT_TOKEN -no - - > $bowtie_sam_file\" ";  
-                        
+            $cmd = "bash -c \" set -o pipefail;$bowtie2_path --local -k 2 --no-unal --threads $CPU -f --score-min $bowtie2_scoring -x $iworm_min100_fa_file $bowtie_reads_fa  | samtools view $PARALLEL_SAMTOOLS_SORT_TOKEN -F4 -Sb - | samtools sort -m $samtools_max_memory $PARALLEL_SAMTOOLS_SORT_TOKEN -no $bowtie_sam_file\" ";  
+            
             $pipeliner->add_commands( new Command($cmd, "$bowtie_sam_file.ok"));
             
             ## generate the scaffold info
@@ -2937,7 +2953,7 @@ sub test_java_failure_capture {
         print "Running Java Tests\n";
     }
     
-    my $java_prog = `which java`;
+    my $java_prog = `sh -c "command -v java"`;
     unless ($java_prog) {
         die "Error, cannot find 'java'.  Please be sure it is available within your \${PATH} setting and then try again.";
     }
@@ -3988,7 +4004,7 @@ sub check_required_3rd_party_tool_installations {
 
 
     # samtools
-    my $samtools_path = `which samtools`;
+    my $samtools_path = `sh -c "command -v samtools"`;
     if ($samtools_path =~ /\w/) {
         print "Found samtools at: $samtools_path\n" if $VERBOSE;
     
@@ -4005,7 +4021,7 @@ sub check_required_3rd_party_tool_installations {
     }
 
     # jellyfish
-    my $jellyfish_path = `which jellyfish`;
+    my $jellyfish_path = `sh -c "command -v jellyfish"`;
     if ($jellyfish_path =~ /\w/) {
         print "Found jellyfish at: $jellyfish_path\n" if $VERBOSE;
 
@@ -4023,8 +4039,8 @@ sub check_required_3rd_party_tool_installations {
     ## bowtie2
     if (!$NO_BOWTIE) {
         ## be sure we can find 'bowtie', since we use it as part of the iworm pair scaffolding step
-        $bowtie2_path = `which bowtie2`;
-        $bowtie2_build_path = `which bowtie2-build`;
+        $bowtie2_path = `sh -c "command -v bowtie2"`;
+        $bowtie2_build_path = `sh -c "command -v bowtie2-build"`;
         if ($bowtie2_path =~ /\w/ && $bowtie2_build_path =~ /\w/) {
             chomp $bowtie2_path;
             chomp $bowtie2_build_path;
@@ -4037,7 +4053,7 @@ sub check_required_3rd_party_tool_installations {
 
     # salmon
     if (! $NO_SALMON) {
-        my $salmon_path = `which salmon`;
+        my $salmon_path = `sh -c "command -v salmon"`;
         if ($salmon_path =~ /\w/) {
             print "Found salmon installed at $salmon_path\n" if $VERBOSE;
 


=====================================
sample_data/Makefile
=====================================
@@ -3,7 +3,7 @@ DIRS = test_Trinity_Assembly
 test_all: test_trin_assembly
 
 test_trin_assembly:
-	cd test_Trinity_Assembly && make test_all
+	cd test_Trinity_Assembly && $(MAKE) test_all
 
 
 clean:


=====================================
trinity-plugins/Makefile
=====================================
@@ -51,8 +51,8 @@ clean:
 	rm -f ./Trimmomatic # rm symlink
 	cd slclust && $(MAKE) clean
 	cd COLLECTL && rm -rf ${COLLECTL_CODE} && rm -f collectl
-	cd htslib && make clean
-	cd bamsifter && make clean
+	cd htslib && $(MAKE) clean
+	cd bamsifter && $(MAKE) clean
 	rm -f ./htslib.ok
 	@echo "\n\n** Done cleaning plugins area **"
 


=====================================
trinity-plugins/ParaFly/Makefile
=====================================
@@ -98,12 +98,12 @@ DIST_ARCHIVES = $(distdir).tar.gz
 GZIP_ENV = --best
 distuninstallcheck_listfiles = find . -type f -print
 distcleancheck_listfiles = find . -type f -print
-ACLOCAL = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11
-AMTAR = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar
+ACLOCAL = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11
+AMTAR = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar
 AM_CXXFLAGS = -pedantic -fopenmp -Wall -Wextra -Wno-long-long -Wno-deprecated -m64
-AUTOCONF = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf
-AUTOHEADER = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader
-AUTOMAKE = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11
+AUTOCONF = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf
+AUTOHEADER = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader
+AUTOMAKE = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11
 AWK = gawk
 CPPFLAGS = 
 CXX = g++
@@ -125,8 +125,8 @@ LDFLAGS =
 LIBOBJS = 
 LIBS = 
 LTLIBOBJS = 
-MAKEINFO = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo
-MKDIR_P = /bin/mkdir -p
+MAKEINFO = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo
+MKDIR_P = /usr/bin/mkdir -p
 OBJEXT = o
 PACKAGE = parafly
 PACKAGE_BUGREPORT = bug-report at address
@@ -139,10 +139,10 @@ SET_MAKE =
 SHELL = /bin/bash
 STRIP = 
 VERSION = 0.1
-abs_builddir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly
-abs_srcdir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly
-abs_top_builddir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly
-abs_top_srcdir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly
+abs_builddir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly
+abs_srcdir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly
+abs_top_builddir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly
+abs_top_srcdir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly
 ac_ct_CXX = 
 am__include = include
 am__leading_dot = .
@@ -161,13 +161,13 @@ host_alias =
 htmldir = ${docdir}
 includedir = ${prefix}/include
 infodir = ${datarootdir}/info
-install_sh = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/install-sh
+install_sh = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/install-sh
 libdir = ${exec_prefix}/lib
 libexecdir = ${exec_prefix}/libexec
 localedir = ${datarootdir}/locale
 localstatedir = ${prefix}/var
 mandir = ${datarootdir}/man
-mkdir_p = /bin/mkdir -p
+mkdir_p = /usr/bin/mkdir -p
 oldincludedir = /usr/include
 pdfdir = ${docdir}
 prefix = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly


=====================================
trinity-plugins/ParaFly/config.log
=====================================
@@ -4,22 +4,22 @@ running configure, to aid debugging if configure makes a mistake.
 It was created by Parafly configure 0.1, which was
 generated by GNU Autoconf 2.63.  Invocation command line was
 
-  $ ./configure --prefix=/home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly CXX=g++ CC=gcc CFLAGS=-fopenmp CXXFLAGS=-fopenmp
+  $ ./configure --prefix=/home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly CXX=g++ CC=gcc CFLAGS=-fopenmp CXXFLAGS=-fopenmp --no-create --no-recursion
 
 ## --------- ##
 ## Platform. ##
 ## --------- ##
 
-hostname = smileemptysoul
+hostname = methods01.broadinstitute.org
 uname -m = x86_64
-uname -r = 5.4.0-1044-gcp
+uname -r = 3.10.0-1160.15.2.el7.x86_64
 uname -s = Linux
-uname -v = #47~18.04.2-Ubuntu SMP Mon May 17 03:48:15 UTC 2021
+uname -v = #1 SMP Thu Jan 21 16:15:07 EST 2021
 
-/usr/bin/uname -p = unknown
+/usr/bin/uname -p = x86_64
 /bin/uname -X     = unknown
 
-/bin/arch              = unknown
+/bin/arch              = x86_64
 /usr/bin/arch -k       = unknown
 /usr/convex/getsysinfo = unknown
 /usr/bin/hostinfo      = unknown
@@ -27,22 +27,38 @@ uname -v = #47~18.04.2-Ubuntu SMP Mon May 17 03:48:15 UTC 2021
 /usr/bin/oslevel       = unknown
 /bin/universe          = unknown
 
-PATH: /home/bhaas/.local/bin
-PATH: /usr/local/go/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/google-cloud-sdk/bin
+PATH: /home/unix/bhaas/miniconda2/envs/py3/bin
+PATH: /home/unix/bhaas/miniconda2/condabin
+PATH: /home/unix/bhaas/utilities/emboss
+PATH: .
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/gcc_7.3.0/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/hmmer_3.1/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/mysql_5.6.20/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/git_2.12.0/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/jdk1.8.0_181/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/ncbi-blast_2.2.30+-x86_64/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/perl_5.8.9/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/openssl_1.0.2g/bin
+PATH: /broad/software/free/Linux/redhat_7_x86_64/pkgs/dbd-oracle_1.23-perl-5.8.9-oracle-instantclient-10.2.0.4.0/bin
+PATH: /broad/software/nonfree/Linux/redhat_7_x86_64/pkgs/oracle_instantclient-10.2.0.4.0/instantclient_10_2
+PATH: /broad/uge/8.5.5/bin/lx-amd64
+PATH: /home/unix/bhaas/utilities
+PATH: /home/unix/bhaas/GITHUB/pasapipeline/misc_utilities/
+PATH: /home/unix/bhaas/bin
+PATH: /seq/regev_genome_portal/SOFTWARE/SRA_TOOLKIT/sratoolkit.2.5.1-centos_linux64/bin
 PATH: /usr/local/bin
-PATH: /home/bhaas/anaconda3/bin
+PATH: /usr/bin
 PATH: /usr/local/sbin
-PATH: /usr/local/bin
 PATH: /usr/sbin
-PATH: /usr/bin
-PATH: /sbin
-PATH: /bin
-PATH: /usr/games
-PATH: /usr/local/games
-PATH: /snap/bin
-PATH: /home/bhaas/.local/bin
-PATH: /home/bhaas/utilities
-PATH: /home/bhaas/BIN
+PATH: /opt/puppetlabs/bin
+PATH: /opt/dell/srvadmin/bin
+PATH: /home/unix/bhaas/.local/bin
+PATH: /seq/RNASEQ/TOOLS/SPADES/SPAdes-3.14.1-Linux/bin
+PATH: /seq/regev_genome_portal/SOFTWARE/OpenMPI/openmpi-1.6.5/bin
+PATH: /home/unix/bhaas/GITHUB/Trinity_CTAT/fusion
+PATH: /home/unix/bhaas/GITHUB/trinityrnaseq
+PATH: /home/unix/bhaas/.firecloud-cli/ubin
 
 
 ## ----------- ##
@@ -54,7 +70,7 @@ configure:1899: result: /usr/bin/install -c
 configure:1910: checking whether build environment is sane
 configure:1970: result: yes
 configure:2111: checking for a thread-safe mkdir -p
-configure:2150: result: /bin/mkdir -p
+configure:2150: result: /usr/bin/mkdir -p
 configure:2163: checking for gawk
 configure:2179: found /usr/bin/gawk
 configure:2190: result: gawk
@@ -62,7 +78,7 @@ configure:2201: checking whether make sets $(MAKE)
 configure:2223: result: yes
 configure:2423: checking for C++ compiler version
 configure:2431: g++ --version >&5
-g++ (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
+g++ (GCC) 7.3.0
 Copyright (C) 2017 Free Software Foundation, Inc.
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
@@ -71,13 +87,11 @@ configure:2435: $? = 0
 configure:2442: g++ -v >&5
 Using built-in specs.
 COLLECT_GCC=g++
-COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/7/lto-wrapper
-OFFLOAD_TARGET_NAMES=nvptx-none
-OFFLOAD_TARGET_DEFAULT=1
-Target: x86_64-linux-gnu
-Configured with: ../src/configure -v --with-pkgversion='Ubuntu 7.5.0-3ubuntu1~18.04' --with-bugurl=file:///usr/share/doc/gcc-7/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++ --prefix=/usr --with-gcc-major-version-only --program-suffix=-7 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
+COLLECT_LTO_WRAPPER=/broad/software/free/Linux/redhat_7_x86_64/pkgs/gcc_7.3.0/libexec/gcc/x86_64-redhat-linux/7.3.0/lto-wrapper
+Target: x86_64-redhat-linux
+Configured with: /tmp/redhat_7_x86_64/build/tmp/gcc-7.3.0/configure --prefix=/broad/software/free/Linux/redhat_7_x86_64/pkgs/gcc_7.3.0 --enable-lto --with-cloog --enable-plugins --enable-languages=c,c++,objc,obj-c++,fortran --build=x86_64-redhat-linux --disable-multilib --with-gmp=/broad/software/free/Linux/redhat_7_x86_64/pkgs/gcc_7.3.0 --with-mpfr=/broad/software/free/Linux/redhat_7_x86_64/pkgs/gcc_7.3.0
 Thread model: posix
-gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) 
+gcc version 7.3.0 (GCC) 
 configure:2446: $? = 0
 configure:2453: g++ -V >&5
 g++: error: unrecognized command line option '-V'
@@ -123,27 +137,6 @@ configure:3182: $? = 0
 configure:3213: result: none required
 configure:3349: creating ./config.status
 
-## ---------------------- ##
-## Running config.status. ##
-## ---------------------- ##
-
-This file was extended by Parafly config.status 0.1, which was
-generated by GNU Autoconf 2.63.  Invocation command line was
-
-  CONFIG_FILES    = 
-  CONFIG_HEADERS  = 
-  CONFIG_LINKS    = 
-  CONFIG_COMMANDS = 
-  $ ./config.status 
-
-on smileemptysoul
-
-config.status:760: creating Makefile
-config.status:760: creating src/Makefile
-config.status:760: creating config.h
-config.status:982: config.h is unchanged
-config.status:1034: executing depfiles commands
-
 ## ---------------- ##
 ## Cache variables. ##
 ## ---------------- ##
@@ -169,7 +162,7 @@ ac_cv_env_target_alias_set=
 ac_cv_env_target_alias_value=
 ac_cv_objext=o
 ac_cv_path_install='/usr/bin/install -c'
-ac_cv_path_mkdir=/bin/mkdir
+ac_cv_path_mkdir=/usr/bin/mkdir
 ac_cv_prog_AWK=gawk
 ac_cv_prog_cxx_g=yes
 ac_cv_prog_make_make_set=yes
@@ -180,15 +173,15 @@ am_cv_CXX_dependencies_compiler_type=gcc3
 ## Output variables. ##
 ## ----------------- ##
 
-ACLOCAL='${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11'
+ACLOCAL='${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11'
 AMDEPBACKSLASH='\'
 AMDEP_FALSE='#'
 AMDEP_TRUE=''
-AMTAR='${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar'
+AMTAR='${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar'
 AM_CXXFLAGS='-pedantic -fopenmp -Wall -Wextra -Wno-long-long -Wno-deprecated -m64'
-AUTOCONF='${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf'
-AUTOHEADER='${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader'
-AUTOMAKE='${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11'
+AUTOCONF='${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf'
+AUTOHEADER='${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader'
+AUTOMAKE='${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11'
 AWK='gawk'
 CPPFLAGS=''
 CXX='g++'
@@ -209,8 +202,8 @@ LDFLAGS=''
 LIBOBJS=''
 LIBS=''
 LTLIBOBJS=''
-MAKEINFO='${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo'
-MKDIR_P='/bin/mkdir -p'
+MAKEINFO='${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo'
+MKDIR_P='/usr/bin/mkdir -p'
 OBJEXT='o'
 PACKAGE='parafly'
 PACKAGE_BUGREPORT='bug-report at address'
@@ -245,13 +238,13 @@ host_alias=''
 htmldir='${docdir}'
 includedir='${prefix}/include'
 infodir='${datarootdir}/info'
-install_sh='${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/install-sh'
+install_sh='${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/install-sh'
 libdir='${exec_prefix}/lib'
 libexecdir='${exec_prefix}/libexec'
 localedir='${datarootdir}/locale'
 localstatedir='${prefix}/var'
 mandir='${datarootdir}/man'
-mkdir_p='/bin/mkdir -p'
+mkdir_p='/usr/bin/mkdir -p'
 oldincludedir='/usr/include'
 pdfdir='${docdir}'
 prefix='/home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly'
@@ -275,3 +268,24 @@ target_alias=''
 #define VERSION "0.1"
 
 configure: exit 0
+
+## ---------------------- ##
+## Running config.status. ##
+## ---------------------- ##
+
+This file was extended by Parafly config.status 0.1, which was
+generated by GNU Autoconf 2.63.  Invocation command line was
+
+  CONFIG_FILES    = 
+  CONFIG_HEADERS  = 
+  CONFIG_LINKS    = 
+  CONFIG_COMMANDS = 
+  $ ./config.status 
+
+on methods01.broadinstitute.org
+
+config.status:760: creating Makefile
+config.status:760: creating src/Makefile
+config.status:760: creating config.h
+config.status:982: config.h is unchanged
+config.status:1034: executing depfiles commands


=====================================
trinity-plugins/ParaFly/config.status
=====================================
@@ -363,10 +363,10 @@ Copyright (C) 2008 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it."
 
-ac_pwd='/home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly'
+ac_pwd='/broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly'
 srcdir='.'
 INSTALL='/usr/bin/install -c'
-MKDIR_P='/bin/mkdir -p'
+MKDIR_P='/usr/bin/mkdir -p'
 AWK='gawk'
 test -n "$AWK" || AWK=awk
 # The default lists apply if the user does not specify any file.
@@ -556,20 +556,20 @@ S["CXXFLAGS"]="-fopenmp"
 S["CXX"]="g++"
 S["am__untar"]="${AMTAR} xf -"
 S["am__tar"]="${AMTAR} chof - \"$$tardir\""
-S["AMTAR"]="${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar"
+S["AMTAR"]="${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar"
 S["am__leading_dot"]="."
 S["SET_MAKE"]=""
 S["AWK"]="gawk"
-S["mkdir_p"]="/bin/mkdir -p"
-S["MKDIR_P"]="/bin/mkdir -p"
+S["mkdir_p"]="/usr/bin/mkdir -p"
+S["MKDIR_P"]="/usr/bin/mkdir -p"
 S["INSTALL_STRIP_PROGRAM"]="$(install_sh) -c -s"
 S["STRIP"]=""
-S["install_sh"]="${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/install-sh"
-S["MAKEINFO"]="${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo"
-S["AUTOHEADER"]="${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader"
-S["AUTOMAKE"]="${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11"
-S["AUTOCONF"]="${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf"
-S["ACLOCAL"]="${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11"
+S["install_sh"]="${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/install-sh"
+S["MAKEINFO"]="${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo"
+S["AUTOHEADER"]="${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader"
+S["AUTOMAKE"]="${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11"
+S["AUTOCONF"]="${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf"
+S["ACLOCAL"]="${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11"
 S["VERSION"]="0.1"
 S["PACKAGE"]="parafly"
 S["CYGPATH_W"]="echo"


=====================================
trinity-plugins/ParaFly/src/Makefile
=====================================
@@ -62,12 +62,12 @@ DIST_SOURCES = $(ParaFly_SOURCES)
 ETAGS = etags
 CTAGS = ctags
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11
-AMTAR = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar
+ACLOCAL = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run aclocal-1.11
+AMTAR = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run tar
 AM_CXXFLAGS = -pedantic -fopenmp -Wall -Wextra -Wno-long-long -Wno-deprecated -m64
-AUTOCONF = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf
-AUTOHEADER = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader
-AUTOMAKE = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11
+AUTOCONF = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoconf
+AUTOHEADER = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run autoheader
+AUTOMAKE = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run automake-1.11
 AWK = gawk
 CPPFLAGS = 
 CXX = g++
@@ -89,8 +89,8 @@ LDFLAGS =
 LIBOBJS = 
 LIBS = 
 LTLIBOBJS = 
-MAKEINFO = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo
-MKDIR_P = /bin/mkdir -p
+MAKEINFO = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/missing --run makeinfo
+MKDIR_P = /usr/bin/mkdir -p
 OBJEXT = o
 PACKAGE = parafly
 PACKAGE_BUGREPORT = bug-report at address
@@ -103,10 +103,10 @@ SET_MAKE =
 SHELL = /bin/bash
 STRIP = 
 VERSION = 0.1
-abs_builddir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/src
-abs_srcdir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/src
-abs_top_builddir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly
-abs_top_srcdir = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly
+abs_builddir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/src
+abs_srcdir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/src
+abs_top_builddir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly
+abs_top_srcdir = /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly
 ac_ct_CXX = 
 am__include = include
 am__leading_dot = .
@@ -125,13 +125,13 @@ host_alias =
 htmldir = ${docdir}
 includedir = ${prefix}/include
 infodir = ${datarootdir}/info
-install_sh = ${SHELL} /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly/install-sh
+install_sh = ${SHELL} /broad/hptmp/bhaas/trinityrnaseq/trinity-plugins/ParaFly/install-sh
 libdir = ${exec_prefix}/lib
 libexecdir = ${exec_prefix}/libexec
 localedir = ${datarootdir}/locale
 localstatedir = ${prefix}/var
 mandir = ${datarootdir}/man
-mkdir_p = /bin/mkdir -p
+mkdir_p = /usr/bin/mkdir -p
 oldincludedir = /usr/include
 pdfdir = ${docdir}
 prefix = /home/bhaas/GITHUB/trinityrnaseq/trinity-plugins/ParaFly


=====================================
trinity-plugins/scaffold_iworm_contigs/Makefile
=====================================
@@ -2,7 +2,7 @@ CXX    ?= g++
 prefix = ../htslib
 
 ScaffoldIwormContigs:
-	$(CXX) $(LDFLAGS) -I$(prefix) -L$(prefix) ScaffoldIwormContigs.cpp error_checker.cpp -lhts -o scaffold_iworm_contigs
+	$(CXX) $(LDFLAGS) -I$(prefix) -L$(prefix) $(CPPFLAGS) $(CXXFLAGS) ScaffoldIwormContigs.cpp error_checker.cpp -lhts -o scaffold_iworm_contigs
 
 clean:
 	rm -f scaffold_iworm_contigs


=====================================
util/align_and_estimate_abundance.pl
=====================================
@@ -401,7 +401,7 @@ if ( $thread_count !~ /^\d+$/ ) {
     
         
     foreach my $tool (@tools) {
-        my $p = `which $tool`;
+        my $p = `sh -c "command -v $tool"`;
         unless ($p =~ /\w/) {
             warn("ERROR, cannot find $tool in PATH setting: $ENV{PATH}\n\n");
             $missing = 1;


=====================================
util/misc/SRA_to_fastq.pl
=====================================
@@ -24,7 +24,7 @@ unless (@sra_files) {
     die $usage;
 }
 
-my $fastq_dump_path = `which fastq-dump`;
+my $fastq_dump_path = `sh -c "command -v fastq-dump"`;
 unless ($fastq_dump_path && $fastq_dump_path =~ /\w/) {
     die "Error, cannot find 'fastq-dump' utility in your PATH. Be sure you have SRA toolkit installed and fastq-dump in your PATH setting. ";
 }


=====================================
util/misc/blat_util/blat_to_sam.pl
=====================================
@@ -61,7 +61,7 @@ unless ($genome_fa && $reads_fa) {
 	my @required_progs = qw (blat psl2sam.pl);
 
 	foreach my $prog (@required_progs) {
-		my $path = `which $prog`;
+		my $path = `sh -c "command -v $prog"`;
 		unless ($path =~ /^\//) {
 			die "Error, cannot locate required program: $prog";
 		}


=====================================
util/misc/run_HISAT.pl
=====================================
@@ -20,7 +20,7 @@ BEGIN {
         $HISAT_HOME = $ENV{HISAT_HOME};
     }
     else {
-        my $hisat_prog = `which hisat`;
+        my $hisat_prog = `sh -c "command -v hisat"`;
         if ($hisat_prog) {
             chomp $hisat_prog;
             $HISAT_HOME = dirname($hisat_prog);


=====================================
util/misc/run_STAR.pl
=====================================
@@ -30,6 +30,7 @@ my $usage = <<__EOUSAGE__;
 #  --star_path <string>        full path to the STAR program to use.
 #  --patch <string>            genomic targets to patch the genome fasta with.
 #  --chim_search               include Chimeric.junction outputs
+#  --max_intron <int>          max intron length (and PE gap size)
 #
 #######################################################################
 
@@ -53,6 +54,7 @@ my $ADV = 0;
 my $star_path = "STAR";
 my $patch;
 my $chim_search;
+my $max_intron;
 
 &GetOptions( 'h' => \$help_flag,
              'genome=s' => \$genome,
@@ -65,6 +67,7 @@ my $chim_search;
              'star_path=s' => \$star_path,
              'patch=s' => \$patch,
              'chim_search' => \$chim_search,
+             "max_intron=i" => \$max_intron,
     );
 
 
@@ -81,7 +84,7 @@ if (@ARGV) {
 }
 
 
-my $star_prog = `which $star_path`;
+my $star_prog = `sh -c "command -v $star_path"`;
 chomp $star_prog;
 unless ($star_prog =~ /\w/) {
     die "Error, cannot locate STAR program. Be sure it's in your PATH setting.  ";
@@ -150,21 +153,40 @@ main: {
         . " --alignSJDBoverhangMin 10 "
         . " --outSAMstrandField intronMotif "
         . " --outSAMunmapped Within "
+        . " --outReadsUnmapped Fastx "
+        . " --alignInsertionFlush Right "
+        . " --alignSplicedMateMapLminOverLmate 0 "
+        . " --alignSplicedMateMapLmin 30 "
+        . " --alignSJstitchMismatchNmax 5 -1 5 5 "  #which allows for up to 5 mismatches for non-canonical GC/AG, and AT/AC junctions, and any number of mismatches for canonical junctions (the default values 0 -1 0 0 replicate the old behavior (from AlexD)      
+        . " --peOverlapNbasesMin 12 "
+        . " --peOverlapMMp 0.1 "
         . " --limitBAMsortRAM 20000000000";
 
+
+    if ($max_intron) {
+    
+        $cmd .= " --alignMatesGapMax $max_intron "
+            . " --alignIntronMax $max_intron ";
+    }
+    
+    
     if ($chim_search) {
-        $cmd .= " --chimJunctionOverhangMin 12 "
+        $cmd .= " --chimJunctionOverhangMin 8 "
+             .  " --chimOutJunctionFormat 1 "
              .  " --chimSegmentMin 12 "
              .  " --chimSegmentReadGapMax parameter 3 "
+             .  " --chimMultimapNmax 20 "
+             .  " --chimOutType Junctions WithinBAM "
+             .  " --chimScoreJunctionNonGTAG -4 "
+             .  " --chimNonchimScoreDropMin 10 "
+             .  " --chimMultimapScoreRange 10 ";
     }
-        
+    
     if ($patch) {
         $cmd .= " --genomeFastaFiles $patch ";
     }
         
-    
-    $cmd .= " --alignSJstitchMismatchNmax 5 -1 5 5 ";  #which allows for up to 5 mismatches for non-canonical GC/AG, and AT/AC junctions, and any number of mismatches for canonical junctions (the default values 0 -1 0 0 replicate the old behavior (from AlexD)
-    
+        
     
     if ($reads =~ /\.gz$/) {
         $cmd .= " --readFilesCommand 'gunzip -c' ";


=====================================
util/misc/run_STAR_via_samples_file.pl
=====================================
@@ -72,7 +72,7 @@ if (@ARGV) {
 }
 
 
-my $star_prog = `which STAR`;
+my $star_prog = `sh -c "command -v STAR"`;
 chomp $star_prog;
 unless ($star_prog =~ /\w/) {
     die "Error, cannot locate STAR program. Be sure it's in your PATH setting.  ";


=====================================
util/misc/validate_fastqs.py
=====================================
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+
+import os, sys, re
+import logging
+import argparse
+import gzip
+from collections import defaultdict
+
+def main():
+
+    parser = argparse.ArgumentParser(description="validate fastqs", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    
+    parser.add_argument("--left_fq", required=True, type=str, help="left fastq file")
+
+    parser.add_argument("--right_fq", required=False, type=str, help="right fastq file")
+    
+    args = parser.parse_args()
+
+    left_fq = args.left_fq
+    right_fq = args.right_fq
+
+
+
+
+    left_fq_iterator = fastq_iterator(left_fq)
+    right_fq_iterator = None
+    if right_fq:
+        right_fq_iterator = fastq_iterator(right_fq)
+
+
+
+    counter = 0
+    
+    for left_read_tuple in left_fq_iterator:
+        left_readname, left_readseq, left_L3, left_quals = left_read_tuple
+        left_readseq_len = len(left_readseq)
+        left_quals_len = len(left_quals)
+
+        counter += 1
+        if counter % 10000 == 0:
+            sys.stderr.write(f"\r[{counter}]  ")
+
+        
+        assert left_readseq_len == left_quals_len, f"Error, left seqlen and quals len dont match:\n{left_readname}\n{left_readseq}\n{left_L3}\n{left_quals}\n"
+        
+        
+        if right_fq_iterator is not None:
+
+            right_read_tuple = next(right_fq_iterator)
+            right_readname, right_readseq, right_L3, right_quals = right_read_tuple
+            right_readseq_len = len(right_readseq)
+            right_quals_len = len(right_quals)
+            
+
+            assert right_readseq_len == right_quals_len, f"Error, right seqlen and quals len dont match:\n{right_readname}\n{right_readseq}\n{right_L3}\n{right_quals}\n"
+
+            assert core_readname(left_readname) == core_readname(right_readname), f"Error, fastq pair read names aren't consistent: {left_readname} vs. {right_readname}"
+            
+
+    print("fastq(s) validate")
+    
+    sys.exit(0)
+
+
+
+def core_readname(readname):
+
+    core_readname = readname.split(" ")[0]
+
+    core_readname = re.sub("/[12]$", "", core_readname)
+
+    return core_readname
+
+
+
+def fastq_iterator(fastq_filename):
+
+    if re.search(".gz$", fastq_filename):
+        fh = gzip.open(fastq_filename, 'rt', encoding='utf-8')
+    else:
+        fh = open(fastq_filename, 'rt', encoding='utf-8')
+
+    have_records = True
+    while have_records:
+        readname = next(fh).rstrip()
+        readseq = next(fh).rstrip()
+        L3 = next(fh).rstrip()
+        quals = next(fh).rstrip()
+
+        yield (readname, readseq, L3, quals)
+
+        if not readname:
+            break
+
+    return
+        
+
+
+
+if __name__=='__main__':
+    main()


=====================================
util/retrieve_sequences_from_fasta.pl
=====================================
@@ -11,7 +11,7 @@ my $target_db = $ARGV[1] or die $usage;
 
 main: {
 
-    my $samtools = `which samtools`;
+    my $samtools = `sh -c "command -v samtools"`;
     unless ($samtools =~ /\w/) {
         die "Error, need samtools in your PATH setting.";
     }


=====================================
util/support_scripts/bowtie2_wrapper.pl
=====================================
@@ -163,7 +163,7 @@ if ($SS_lib_type && $SS_lib_type !~ /^(F|R|FR|RF)$/) {
     my @required_progs = qw(samtools bowtie2-build bowtie2);
     
     foreach my $prog (@required_progs) {
-        my $path = `which $prog`;
+        my $path = `sh -c "command -v $prog"`;
         unless ($path =~ /^\//) {
             die "Error, path to required $prog cannot be found";
         }


=====================================
util/support_scripts/prep_rnaseq_alignments_for_genome_assisted_assembly.pl
=====================================
@@ -120,7 +120,7 @@ main: {
 	}
 	else {
         if (cwd() ne dirname(File::Spec->rel2abs($SAM_file))) {
-            &process_cmd("$SYMLINK $SAM_file");
+            &process_cmd("$SYMLINK $SAM_file " . basename($SAM_file));
             $SAM_file = basename($SAM_file);
         }
 		push (@sam_info, [$SAM_file, '+']);



View it on GitLab: https://salsa.debian.org/med-team/trinityrnaseq/-/commit/9773f0b9f65a9f78758e08c1618799fe8743de09

-- 
View it on GitLab: https://salsa.debian.org/med-team/trinityrnaseq/-/commit/9773f0b9f65a9f78758e08c1618799fe8743de09
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220410/b8af1495/attachment-0001.htm>


More information about the debian-med-commit mailing list