[med-svn] [trinityrnaseq] 01/01: Imported Upstream version 2.0.3+dfsg
Michael Crusoe
misterc-guest at moszumanska.debian.org
Thu Feb 12 22:38:32 UTC 2015
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to branch upstream
in repository trinityrnaseq.
commit 1a1969db83df2c2eeb43eeebc947025608a5a1e7
Author: Michael R. Crusoe <mcrusoe at msu.edu>
Date: Thu Feb 12 13:59:08 2015 -0500
Imported Upstream version 2.0.3+dfsg
---
Butterfly/Butterfly.jar | Bin 1426909 -> 1427334 bytes
Butterfly/src/lib/Jaligner.jar | Bin 266463 -> 0 bytes
Butterfly/src/lib/collections-generic-4.01.jar | Bin 531557 -> 0 bytes
Butterfly/src/lib/java-getopt-1.0.13.jar | Bin 56709 -> 0 bytes
Butterfly/src/lib/jung-algorithms-2.0.1.jar | Bin 233113 -> 0 bytes
Butterfly/src/lib/jung-api-2.0.1.jar | Bin 40975 -> 0 bytes
Butterfly/src/lib/jung-graph-impl-2.0.1.jar | Bin 62329 -> 0 bytes
Butterfly/src/src/SeqVertex.java | 1 +
Butterfly/src/src/TransAssembly_allProbPaths.java | 71 +++-
Chrysalis/analysis/ReadsToTranscripts.cc | 2 +-
Makefile | 8 +-
PerlLib/{HTC => HPC}/Base_handler.pm | 2 +-
PerlLib/{HTC => HPC}/FarmIt.pm | 4 +-
PerlLib/{HTC => HPC}/GridRunner.pm | 93 +++--
PerlLib/{HTC => HPC}/LSF_handler.pm | 4 +-
PerlLib/{HTC => HPC}/PBS_handler.pm | 4 +-
PerlLib/{HTC => HPC}/SGE_handler.pm | 4 +-
PerlLib/{HTC => HPC}/SLURM_handler.pm | 4 +-
PerlLib/Pipeliner.pm | 11 +-
Release.Notes | 16 +
Trinity | 406 +++++++++++++++------
galaxy-plugin/EdgeR_differentialExpression.xml | 47 +++
galaxy-plugin/abundance_estimation_to_matrix.xml | 42 +++
.../abundance_estimation_to_matrix_wrapper.py | 40 ++
{htc_conf => hpc_conf}/BroadInst_LSF.hour.10.conf | 0
.../BroadInst_LSF.neurolab.1.conf | 0
.../BroadInst_LSF.neurolab.10.conf | 0
.../BroadInst_LSF.neurolab.100.conf | 0
{htc_conf => hpc_conf}/BroadInst_LSF.regev.1.conf | 0
{htc_conf => hpc_conf}/BroadInst_LSF.regev.10.conf | 0
.../BroadInst_LSF.regev.100.conf | 0
{htc_conf => hpc_conf}/BroadInst_LSF.test.conf | 0
{htc_conf => hpc_conf}/BroadInst_LSF.week.1.conf | 0
{htc_conf => hpc_conf}/BroadInst_SGE.test.conf | 0
{htc_conf => hpc_conf}/PBS.test.conf | 0
{htc_conf => hpc_conf}/SLURM.test.conf | 0
.../deprecated/BroadInst_LSF.Trinity.conf | 0
.../deprecated/BroadInst_LSF.blast.conf | 0
.../deprecated/BroadInst_LSF.conf | 0
.../deprecated/BroadInst_LSF.regev.10.conf | 0
.../deprecated/BroadInst_LSF.regev.100.conf | 0
.../deprecated/BroadInst_LSF.regev.1000.conf | 0
.../deprecated/BroadInst_LSF.test.conf | 0
.../deprecated/BroadInst_SGE.conf | 0
.../deprecated/BroadInst_SGE.test.conf | 0
{htc_conf => hpc_conf}/deprecated/SLURM.FAS.conf | 0
trinity-plugins/Makefile | 43 ++-
trinity-plugins/Trimmomatic | 1 -
trinity-plugins/collectl/make_data_files.py | 17 +-
trinity-plugins/fastool | 1 -
trinity-plugins/jellyfish-2.1.4.tar.gz | Bin 958038 -> 0 bytes
trinity-plugins/parafly | 1 -
.../{parafly-r2013-01-21 => parafly-code}/LICENSE | 0
.../Makefile.am | 0
.../Makefile.in | 0
.../aclocal.m4 | 0
.../{parafly-r2013-01-21 => parafly-code}/config.h | 0
.../config.h.in | 0
.../configure | 0
.../configure.ac | 0
.../{parafly-r2013-01-21 => parafly-code}/depcomp | 0
.../install-sh | 0
.../{parafly-r2013-01-21 => parafly-code}/missing | 0
.../src/Makefile.am | 0
.../src/Makefile.in | 0
.../src/ParaFly.cpp | 10 +-
.../src/argProcessor.cpp | 0
.../src/argProcessor.hpp | 0
.../{parafly-r2013-01-21 => parafly-code}/stamp-h1 | 0
util/HTC_helpers/bsub_fasta_bin_cmd_processor.pl | 201 ----------
util/HTC_helpers/get_failed_cmds.pl | 48 ---
util/HTC_helpers/htc_bin_blastPlus.pl | 224 ------------
util/HTC_helpers/run_cmds_on_grid.pl | 92 -----
util/support_scripts/ExitTester.jar | Bin 8259 -> 0 bytes
.../get_Trinity_gene_to_trans_map.pl | 2 +-
.../partition_chrysalis_graphs_n_reads.pl | 2 +-
util/support_scripts/tests.py | 40 +-
77 files changed, 661 insertions(+), 780 deletions(-)
diff --git a/Butterfly/Butterfly.jar b/Butterfly/Butterfly.jar
index 8a01081..21e2d54 100644
Binary files a/Butterfly/Butterfly.jar and b/Butterfly/Butterfly.jar differ
diff --git a/Butterfly/src/lib/Jaligner.jar b/Butterfly/src/lib/Jaligner.jar
deleted file mode 100644
index b0bb501..0000000
Binary files a/Butterfly/src/lib/Jaligner.jar and /dev/null differ
diff --git a/Butterfly/src/lib/collections-generic-4.01.jar b/Butterfly/src/lib/collections-generic-4.01.jar
deleted file mode 100755
index 92d009c..0000000
Binary files a/Butterfly/src/lib/collections-generic-4.01.jar and /dev/null differ
diff --git a/Butterfly/src/lib/java-getopt-1.0.13.jar b/Butterfly/src/lib/java-getopt-1.0.13.jar
deleted file mode 100644
index d108633..0000000
Binary files a/Butterfly/src/lib/java-getopt-1.0.13.jar and /dev/null differ
diff --git a/Butterfly/src/lib/jung-algorithms-2.0.1.jar b/Butterfly/src/lib/jung-algorithms-2.0.1.jar
deleted file mode 100755
index 5b98f9c..0000000
Binary files a/Butterfly/src/lib/jung-algorithms-2.0.1.jar and /dev/null differ
diff --git a/Butterfly/src/lib/jung-api-2.0.1.jar b/Butterfly/src/lib/jung-api-2.0.1.jar
deleted file mode 100755
index 6dcac89..0000000
Binary files a/Butterfly/src/lib/jung-api-2.0.1.jar and /dev/null differ
diff --git a/Butterfly/src/lib/jung-graph-impl-2.0.1.jar b/Butterfly/src/lib/jung-graph-impl-2.0.1.jar
deleted file mode 100755
index a64f6f7..0000000
Binary files a/Butterfly/src/lib/jung-graph-impl-2.0.1.jar and /dev/null differ
diff --git a/Butterfly/src/src/SeqVertex.java b/Butterfly/src/src/SeqVertex.java
index 7274649..f3550eb 100644
--- a/Butterfly/src/src/SeqVertex.java
+++ b/Butterfly/src/src/SeqVertex.java
@@ -32,6 +32,7 @@ public class SeqVertex {
public static HashMap<Integer, ArrayList<SeqVertex>> origIDnodeTracker = new HashMap<Integer,ArrayList<SeqVertex>>();
public Vector<Integer> __tmp_compressed_vertices;
+ public boolean is_replacement_vertex;
//constructors
/*
diff --git a/Butterfly/src/src/TransAssembly_allProbPaths.java b/Butterfly/src/src/TransAssembly_allProbPaths.java
index a5ab2ca..5a3bc09 100644
--- a/Butterfly/src/src/TransAssembly_allProbPaths.java
+++ b/Butterfly/src/src/TransAssembly_allProbPaths.java
@@ -1917,7 +1917,7 @@ public class TransAssembly_allProbPaths {
// do a DFS-based graph reconstruction starting from a root node.
-
+ SeqVertex.set_graph(seqvertex_graph);
HashSet<Path> visited = new HashSet<Path>();
@@ -1976,6 +1976,18 @@ public class TransAssembly_allProbPaths {
while (count_zip_up_merged_in_round > 0) {
zip_round++;
+
+ debugMes("\n\n## Round: " + zip_round + " Zipping up.", 10);
+
+ if (graph_contains_loops(seqvertex_graph)) {
+ throw new RuntimeException("Error, detected cycles in seqvertex_graph, so not a DAG as expected!");
+ }
+
+ init_replacement_vertices(seqvertex_graph);
+
+ // ensure DAG
+ topo_sorted_vertices = TopologicalSort.topoSortSeqVerticesDAG(seqvertex_graph);
+
count_zip_up_merged_in_round = zipper_collapse_DAG_zip_up(seqvertex_graph);
sum_merged += count_zip_up_merged_in_round;
@@ -1999,6 +2011,17 @@ public class TransAssembly_allProbPaths {
while (count_zip_down_merged_in_round > 0) {
zip_round++;
+ debugMes("\n\n## Round: " + zip_round + " Zipping down.", 10);
+
+ if (graph_contains_loops(seqvertex_graph)) {
+ throw new RuntimeException("Error, detected cycles in seqvertex_graph, so not a DAG as expected!");
+ }
+
+ init_replacement_vertices(seqvertex_graph);
+
+ // ensure DAG
+ topo_sorted_vertices = TopologicalSort.topoSortSeqVerticesDAG(seqvertex_graph);
+
count_zip_down_merged_in_round = zipper_collapse_DAG_zip_down(seqvertex_graph);
@@ -2022,7 +2045,7 @@ public class TransAssembly_allProbPaths {
- // doubly test. :)
+ // test again. :)
if (graph_contains_loops(seqvertex_graph)) {
throw new RuntimeException("Error, detected cycles in seqvertex_graph, so not a DAG as expected!");
}
@@ -2084,6 +2107,19 @@ public class TransAssembly_allProbPaths {
}
+ private static void init_replacement_vertices(
+ DirectedSparseGraph<SeqVertex, SimpleEdge> seqvertex_graph) {
+
+
+ for (SeqVertex v : seqvertex_graph.getVertices()) {
+ v.is_replacement_vertex = false;
+ }
+
+ return;
+
+ }
+
+
private static int zipper_collapse_DAG_zip_up(
DirectedSparseGraph<SeqVertex, SimpleEdge> seqvertex_graph) {
@@ -2104,6 +2140,8 @@ public class TransAssembly_allProbPaths {
for (SeqVertex v : topo_sorted_vertices) {
+ if (v.is_replacement_vertex) { continue; }
+
if (! seqvertex_graph.containsVertex(v)) { continue; }
@@ -2143,8 +2181,11 @@ public class TransAssembly_allProbPaths {
for (SeqVertex v : topo_sorted_vertices) {
+ if (v.is_replacement_vertex) { continue; }
+
if (! seqvertex_graph.containsVertex(v)) { continue; }
+
count_total_zip_merged += zip_down(seqvertex_graph, v);
@@ -2166,17 +2207,21 @@ public class TransAssembly_allProbPaths {
DirectedSparseGraph<SeqVertex, SimpleEdge> seqvertex_graph,
SeqVertex v) {
+
+
List<SeqVertex> pred_list = new ArrayList<SeqVertex>(seqvertex_graph.getPredecessors(v));
if (pred_list.size() <= 1) { return (0); } // must have multiple parents
-
+ debugMes("## zip_up()", 15);
// get list of parent nodes having the same original ID
HashMap<Integer,HashSet<SeqVertex>> pred_orig_id_to_vertex_list = new HashMap<Integer,HashSet<SeqVertex>>();
for (SeqVertex pred : pred_list) {
+ if (pred.is_replacement_vertex) { return(0); } // delay to next round.
+
if (! seqvertex_graph.containsVertex(pred)) { continue; }
Integer orig_pred_id = pred.getOrigButterflyID();
@@ -2210,17 +2255,23 @@ public class TransAssembly_allProbPaths {
DirectedSparseGraph<SeqVertex, SimpleEdge> seqvertex_graph,
SeqVertex v) {
+
+
List<SeqVertex> child_list = new ArrayList<SeqVertex>(seqvertex_graph.getSuccessors(v));
if (child_list.size() <= 1) { return (0); } // must have multiple parents
+ debugMes("##zip_down()", 15);
+
// get list of children nodes having the same original ID
HashMap<Integer,HashSet<SeqVertex>> child_orig_id_to_vertex_list = new HashMap<Integer,HashSet<SeqVertex>>();
for (SeqVertex child : child_list) {
+ if (child.is_replacement_vertex) { return(0); } // delay to next round
+
if (! seqvertex_graph.containsVertex(child) ) { continue; }
Integer orig_child_id = child.getOrigButterflyID();
@@ -2354,12 +2405,24 @@ public class TransAssembly_allProbPaths {
Integer replacement_vertex_depth = (dir.equals("min")) ? min_val(target_depths) : max_val(target_depths);
+ String zipDir = (dir.equals("min")) ? "Up" : "Down";
+
replacement_vertex_obj.setDepth(replacement_vertex_depth);
replacement_vertex_obj.setNodeDepth(replacement_vertex_depth);
+ replacement_vertex_obj.is_replacement_vertex = true;
+
+ // fix local environment for this round
+ for (SeqVertex p : parent_vertices) {
+ p.is_replacement_vertex = true;
+ }
+ for (SeqVertex c : child_vertices) {
+ c.is_replacement_vertex = true;
+ }
+
replacement_vertex_obj.__tmp_compressed_vertices.addAll(merged_vertex_ids);
- debugMes("ZipMerging nodes: " + pred_same_orig_id_set + " to " + replacement_vertex_obj, 15);
+ debugMes(zipDir + "ZipMerging nodes: " + pred_same_orig_id_set + " to " + replacement_vertex_obj, 15);
int count_merged = pred_same_orig_id_set.size();
diff --git a/Chrysalis/analysis/ReadsToTranscripts.cc b/Chrysalis/analysis/ReadsToTranscripts.cc
index 533431f..1b8f0e3 100644
--- a/Chrysalis/analysis/ReadsToTranscripts.cc
+++ b/Chrysalis/analysis/ReadsToTranscripts.cc
@@ -105,7 +105,7 @@ int main(int argc,char** argv)
vecDNAVector dna;
if(max_mem_reads > 0){
- cout << "Setting maximum number of reads to load in memory to " << max_mem_reads << endl;
+ cerr << "Setting maximum number of reads to load in memory to " << max_mem_reads << endl;
} else {
max_mem_reads = 2147483647; // max int
}
diff --git a/Makefile b/Makefile
index 2b1be89..3ec8304 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ all: inchworm_target chrysalis_target trinity_essentials
inchworm_target:
@echo Using $(TRINITY_COMPILER) compiler for Inchworm and Chrysalis
cd Inchworm && (test -e configure || autoreconf) \
- && ./configure --prefix=`pwd` $(INCHWORM_CONFIGURE_FLAGS) && $(MAKE) install
+ && sh ./configure --prefix=`pwd` $(INCHWORM_CONFIGURE_FLAGS) && $(MAKE) install
chrysalis_target:
cd Chrysalis && $(MAKE) UNSUPPORTED=yes $(CHRYSALIS_MAKE_FLAGS)
@@ -49,15 +49,15 @@ clean:
cd sample_data/ && make clean
-testTrinity:
+test_trinity:
cd sample_data/test_Trinity_Assembly && make test
cd sample_data/test_GenomeGuidedTrinity && make test
-testall:
+test_all:
cd sample_data/ && make test
cd sample_data/test_Trinity_Assembly && make test_full
-testclean:
+test_clean:
cd sample_data/ && make clean
###################################################################
diff --git a/PerlLib/HTC/Base_handler.pm b/PerlLib/HPC/Base_handler.pm
similarity index 98%
rename from PerlLib/HTC/Base_handler.pm
rename to PerlLib/HPC/Base_handler.pm
index 7cf229d..b0b81ec 100644
--- a/PerlLib/HTC/Base_handler.pm
+++ b/PerlLib/HPC/Base_handler.pm
@@ -1,4 +1,4 @@
-package HTC::Base_handler;
+package HPC::Base_handler;
use strict;
use warnings;
diff --git a/PerlLib/HTC/FarmIt.pm b/PerlLib/HPC/FarmIt.pm
old mode 100755
new mode 100644
similarity index 99%
rename from PerlLib/HTC/FarmIt.pm
rename to PerlLib/HPC/FarmIt.pm
index 5f85e4b..26b4bca
--- a/PerlLib/HTC/FarmIt.pm
+++ b/PerlLib/HPC/FarmIt.pm
@@ -3,7 +3,7 @@
package main;
our $SEE;
-package HTC::FarmIt;
+package HPC::FarmIt;
use strict;
use warnings;
@@ -38,7 +38,7 @@ use List::Util qw (shuffle);
my $farmer = new Farmit({cmds=>\@cmds,
- handler => HTC::LSF->new(...), or HTC::SGE->new(...)
+ handler => HPC::LSF->new(...), or HPC::SGE->new(...)
# optional, have defaults:
log_base_dir => cwd(),
diff --git a/PerlLib/HTC/GridRunner.pm b/PerlLib/HPC/GridRunner.pm
similarity index 66%
rename from PerlLib/HTC/GridRunner.pm
rename to PerlLib/HPC/GridRunner.pm
index fe6b2d9..2039b72 100644
--- a/PerlLib/HTC/GridRunner.pm
+++ b/PerlLib/HPC/GridRunner.pm
@@ -1,4 +1,4 @@
-package HTC::GridRunner;
+package HPC::GridRunner;
use strict;
use warnings;
@@ -7,16 +7,13 @@ use List::Util qw (shuffle);
use FindBin;
use Cwd;
-use HTC::FarmIt;
-use HTC::LSF_handler;
-use HTC::SGE_handler;
-use HTC::SLURM_handler;
-use HTC::PBS_handler;
+use HPC::FarmIt;
+use HPC::LSF_handler;
+use HPC::SGE_handler;
+use HPC::SLURM_handler;
+use HPC::PBS_handler;
BEGIN {
- unless ($ENV{TRINITY_HOME}) {
- confess "ERROR, must have env var TRINITY_HOME set to the base installation directory of Trinity ";
- }
unless ($ENV{HOSTNAME}) {
if ($ENV{HOST}) {
@@ -29,6 +26,25 @@ BEGIN {
}
+my $PARAFLY_PROG;
+
+## static method:
+sub use_parafly {
+ my $parafly_prog = `which ParaFly`;
+ unless ($parafly_prog =~ /\w/) {
+ confess "Error, cannot find ParaFly. Please be sure that ParaFly is installed in yuor PATH env setting. ";
+ }
+ chomp $parafly_prog;
+
+ $PARAFLY_PROG = $parafly_prog;
+
+ print STDERR "* Found ParaFly installed at: $parafly_prog\n\n";
+
+ return;
+}
+
+
+
####
sub new {
my $packagename = shift;
@@ -61,16 +77,16 @@ sub new {
my $handler;
if ($grid_type eq "LSF") {
- $handler = HTC::LSF_handler->new(\%config);
+ $handler = HPC::LSF_handler->new(\%config);
}
elsif ($grid_type eq "SGE") {
- $handler = HTC::SGE_handler->new(\%config);
+ $handler = HPC::SGE_handler->new(\%config);
}
elsif ($grid_type eq "SLURM") {
- $handler = HTC::SLURM_handler->new(\%config);
+ $handler = HPC::SLURM_handler->new(\%config);
}
elsif ($grid_type eq "PBS") {
- $handler = HTC::PBS_handler->new(\%config);
+ $handler = HPC::PBS_handler->new(\%config);
}
else {
confess "Error, grid type: $grid_type is not supported";
@@ -148,7 +164,7 @@ sub run_on_grid {
$params{cmds_per_node} = $cmds_per_node;
}
- my $farmer = new HTC::FarmIt(\%params);
+ my $farmer = new HPC::FarmIt(\%params);
$farmer->submit_jobs();
@@ -165,13 +181,23 @@ sub run_on_grid {
}
print STDERR "$num_failed_cmds commands failed during grid computing.\n";
- print STDERR "Failed cmds:\n" . join("\n", @cmds_remaining) . "\n";
-
-
- ## try running them via parafly
- print STDERR "\n\nTrying to run them using parafly...\n\n";
- return($self->run_parafly(@cmds_remaining));
+
+ my $cache_failed_cmds = "$cache_completed_cmds_file.__failures";
+ print STDERR "-failed commands written to: $cache_failed_cmds\n\n";
+ open (my $ofh, ">$cache_failed_cmds") or die "Error, cannot write to $cache_failed_cmds";
+ foreach my $cmd (@cmds_remaining) {
+ print $ofh "$cmd\n";
+ }
+ close $ofh;
+ if ($PARAFLY_PROG) {
+ ## try running them via parafly
+ print STDERR "\n\nTrying to run them using parafly...\n\n";
+ return($self->run_parafly($cache_failed_cmds));
+ }
+ else {
+ return(1);
+ }
}
else {
print "All commands completed successfully on the computing grid.\n";
@@ -182,32 +208,21 @@ sub run_on_grid {
####
sub run_parafly {
my $self = shift;
- my (@cmds) = @_;
+ my ($cmds_file_for_parafly) = @_;
my $cache_file = $self->{cache_completed_cmds_file};
+
+ my $num_cpus = $ENV{OMP_NUM_THREADS} || 1;
- my $cmds_file = "$cache_file.failed_for_parafly.txt";
- open (my $ofh, ">$cmds_file") or die "Error, cannot write to file $cmds_file";
- foreach my $cmd (@cmds) {
- print $ofh $cmd . "\n";
- }
-
- my $num_cpus = $ENV{OMP_NUM_THREADS} || 10;
-
- my $cmd;
- if (my $trin_home = $ENV{TRINITY_HOME}) {
- $cmd = "$trin_home/trinity-plugins/parafly/bin/ParaFly";
- }
- else {
- $cmd = "ParaFly";
- }
-
- $cmd .= " -c $cmds_file -CPU $num_cpus -v -shuffle -failed_cmds $cmds_file.FAILED_DURING_PARAFLY";
+ my $cmd = "$PARAFLY_PROG -c $cmds_file_for_parafly -CPU $num_cpus -v -shuffle -failed_cmds $cmds_file_for_parafly.FAILED_DURING_PARAFLY";
my $ret = system($cmd);
if ($ret) {
- die "Error, cmd: $cmd died with ret: $ret";
+ die "\n\nError, cmd: $cmd died with ret: $ret.\n\n"
+ . "###########\n"
+ . "## See $cmds_file_for_parafly.FAILED_DURING_PARAFLY for final set of commands that could not be executed successfully.\n"
+ . "###########\n\n\n";
}
return(0);
diff --git a/PerlLib/HTC/LSF_handler.pm b/PerlLib/HPC/LSF_handler.pm
similarity index 97%
rename from PerlLib/HTC/LSF_handler.pm
rename to PerlLib/HPC/LSF_handler.pm
index 93f5960..0538b71 100644
--- a/PerlLib/HTC/LSF_handler.pm
+++ b/PerlLib/HPC/LSF_handler.pm
@@ -1,8 +1,8 @@
-package HTC::LSF_handler;
+package HPC::LSF_handler;
use strict;
use warnings;
-use base qw(HTC::Base_handler);
+use base qw(HPC::Base_handler);
use Carp;
use Cwd;
diff --git a/PerlLib/HTC/PBS_handler.pm b/PerlLib/HPC/PBS_handler.pm
similarity index 93%
rename from PerlLib/HTC/PBS_handler.pm
rename to PerlLib/HPC/PBS_handler.pm
index 040056a..a8c7b98 100644
--- a/PerlLib/HTC/PBS_handler.pm
+++ b/PerlLib/HPC/PBS_handler.pm
@@ -1,8 +1,8 @@
-package HTC::PBS_handler;
+package HPC::PBS_handler;
use strict;
use warnings;
-use base qw(HTC::Base_handler);
+use base qw(HPC::Base_handler);
use Carp;
use Cwd;
diff --git a/PerlLib/HTC/SGE_handler.pm b/PerlLib/HPC/SGE_handler.pm
similarity index 97%
rename from PerlLib/HTC/SGE_handler.pm
rename to PerlLib/HPC/SGE_handler.pm
index 7aea1d5..9712f0d 100644
--- a/PerlLib/HTC/SGE_handler.pm
+++ b/PerlLib/HPC/SGE_handler.pm
@@ -1,8 +1,8 @@
-package HTC::SGE_handler;
+package HPC::SGE_handler;
use strict;
use warnings;
-use base qw(HTC::Base_handler);
+use base qw(HPC::Base_handler);
use Carp;
use Cwd;
diff --git a/PerlLib/HTC/SLURM_handler.pm b/PerlLib/HPC/SLURM_handler.pm
similarity index 93%
rename from PerlLib/HTC/SLURM_handler.pm
rename to PerlLib/HPC/SLURM_handler.pm
index 96a5e17..1685aba 100644
--- a/PerlLib/HTC/SLURM_handler.pm
+++ b/PerlLib/HPC/SLURM_handler.pm
@@ -1,8 +1,8 @@
-package HTC::SLURM_handler;
+package HPC::SLURM_handler;
use strict;
use warnings;
-use base qw(HTC::Base_handler);
+use base qw(HPC::Base_handler);
use Carp;
use Cwd;
diff --git a/PerlLib/Pipeliner.pm b/PerlLib/Pipeliner.pm
index adf9cde..285527c 100644
--- a/PerlLib/Pipeliner.pm
+++ b/PerlLib/Pipeliner.pm
@@ -4,9 +4,16 @@ use strict;
use warnings;
use Carp;
+my $VERBOSE = 0;
+
####
sub new {
my $packagename = shift;
+ my %params = @_;
+
+ if ($params{-verbose}) {
+ $VERBOSE = 1;
+ }
my $self = { cmd_objs => [],
};
@@ -41,10 +48,10 @@ sub run {
my $checkpoint_file = $cmd_obj->get_checkpoint_file();
if (-e $checkpoint_file) {
- print STDERR "-skipping cmd: $cmdstr, checkpoint exists.\n";
+ print STDERR "--Skipping cmd: $cmdstr, checkpoint exists.\n" if $VERBOSE;
}
else {
- print STDERR "-running cmd: $cmdstr\n";
+ print STDERR "Running cmd: $cmdstr\n" if $VERBOSE;
my $ret = system($cmdstr);
if ($ret) {
confess "Error, cmd: $cmdstr died with ret $ret";
diff --git a/Release.Notes b/Release.Notes
index 704f9a4..92ace01 100644
--- a/Release.Notes
+++ b/Release.Notes
@@ -1,3 +1,19 @@
+
+## pre-release v2.0.3
+
+
+ -Trinity is by default less verbose. For a more verbose run, use the new --verbose flag.
+ -Matt MacManes incorporated his optimized trimmomatic settings from his earlier published study (PMID: .... include ref ... ).
+ -less verbose during a run, easier to monitor progress.
+ -butterfly bugfixes for edge cases dealing with overlap graph -> seq vertex graph retaining it as a DAG.
+ -use Jellyfish for only phase 1 of Trinity, with inchworm doing its own kmer counting in phase 2 (faster this way).
+ -moved the HTC code over to the HPC GridRunner codebase and synched.
+
+
+
+
+
+
## Trinity v2.0.2 release:
-Makefile: split into
diff --git a/Trinity b/Trinity
index 074589c..2d76cfe 100755
--- a/Trinity
+++ b/Trinity
@@ -24,11 +24,29 @@ BEGIN {
}
-use HTC::GridRunner;
+use HPC::GridRunner;
open (STDERR, ">&STDOUT"); ## capturing stderr and stdout in a single stdout stream
+#directory defnintions
+my $ROOTDIR = "$FindBin::RealBin";
+my $UTILDIR = "$ROOTDIR/util";
+my $MISCDIR = "$UTILDIR/misc";
+my $INCHWORM_DIR = "$ROOTDIR/Inchworm/bin/";
+my $CHRYSALIS_DIR = "$ROOTDIR/Chrysalis";
+my $BUTTERFLY_DIR = "$ROOTDIR/Butterfly";
+my $JELLYFISH_DIR = "$ROOTDIR/trinity-plugins/jellyfish";
+my $FASTOOL_DIR = "$ROOTDIR/trinity-plugins/fastool";
+my $COLLECTL_DIR = "$ROOTDIR/trinity-plugins/collectl/bin";
+my $COREUTILS_DIR = "$ROOTDIR/trinity-plugins/coreutils/bin";
+my $PARAFLY = "$ROOTDIR/trinity-plugins/parafly/bin/ParaFly";
+my $TRIMMOMATIC = "$ROOTDIR/trinity-plugins/Trimmomatic/trimmomatic.jar";
+my $TRIMMOMATIC_DIR = "$ROOTDIR/trinity-plugins/Trimmomatic";
+
+#&version_check();
+
+
# Site specific setup
my $KMER_SIZE = 25;
@@ -173,7 +191,7 @@ my $NO_PARALLEL_IWORM = 0;
## Quality trimming params
my $RUN_TRIMMOMATIC_FLAG = 0;
-my $trimmomatic_quality_trim_params = "LEADING:5 TRAILING:5 MINLEN:36";
+my $trimmomatic_quality_trim_params = "ILLUMINACLIP:$TRIMMOMATIC_DIR/adapters/TruSeq3-PE.fa:2:30:10 SLIDINGWINDOW:4:5 LEADING:5 TRAILING:5 MINLEN:25";
## Normalize reads
my $NORMALIZE_READS_FLAG = 0;
@@ -261,6 +279,8 @@ my $basic_usage = qq^
#
# --cite :show the Trinity literature citation
#
+# --verbose :provide additional job status info during the run.
+#
# --version :reports Trinity version ($VERSION) and exits.
#
# --show_full_usage_info :show the many many more options available for running Trinity (expert usage).
@@ -394,7 +414,7 @@ my $full_usage = qq^
#################################
# Grid-computing options: #######
#
-# --grid_conf_file <string> :configuration file for supported compute farms
+# --grid_conf <string> :configuration file for supported compute farms
# ex. TRINITY_HOME/htc_conf/BroadInst_LSF.conf
# currently supported computing gris: LSF, SGE
#
@@ -405,8 +425,8 @@ my $full_usage = qq^
#
^;
-my $usage_synopsis = qq^
-
+my $usage_synopsis = qq^#
+#
###############################################################################
#
# *Note, a typical Trinity command might be:
@@ -465,8 +485,7 @@ my $advanced_usage = <<_ADVANCEDUSAGE_;
# Other:
# --monitoring :use collectl to monitor all steps of Trinity
#
-# --compdir|component_directory : use a temporary or local directory for Components_bin
-#
+# --bypass_java_version_check : skip check for required java version 1.7
#
@@ -475,20 +494,6 @@ _ADVANCEDUSAGE_
;
-my $ROOTDIR = "$FindBin::RealBin";
-my $UTILDIR = "$ROOTDIR/util";
-my $MISCDIR = "$UTILDIR/misc";
-my $INCHWORM_DIR = "$ROOTDIR/Inchworm/bin/";
-my $CHRYSALIS_DIR = "$ROOTDIR/Chrysalis";
-my $BUTTERFLY_DIR = "$ROOTDIR/Butterfly";
-my $JELLYFISH_DIR = "$ROOTDIR/trinity-plugins/jellyfish";
-my $FASTOOL_DIR = "$ROOTDIR/trinity-plugins/fastool";
-my $COLLECTL_DIR = "$ROOTDIR/trinity-plugins/collectl/bin";
-my $COREUTILS_DIR = "$ROOTDIR/trinity-plugins/coreutils/bin";
-my $PARAFLY = "$ROOTDIR/trinity-plugins/parafly/bin/ParaFly";
-my $TRIMMOMATIC = "$ROOTDIR/trinity-plugins/Trimmomatic/trimmomatic.jar";
-
-#&version_check();
my $usage = $basic_usage . $usage_synopsis;
@@ -539,6 +544,11 @@ my @ORIG_ARGS = @ARGV;
my $CHRYSALIS_DEBUG_WELD_ALL = 0;
my $iworm_opts = "";
+my $bypass_java_version_check = 0;
+
+my $SEE = 0;
+
+my $ANANAS_DIR = "";
&GetOptions(
@@ -546,7 +556,9 @@ my $iworm_opts = "";
'h|help' => \$help_flag,
'advanced_help' => \$advanced_help_flag,
'show_full_usage_info' => \$full_usage_info_flag,
-
+
+ 'verbose' => \$SEE,
+
## general opts
"seqType=s" => \$seqType,
"left=s{,}" => \@left_files,
@@ -629,7 +641,7 @@ my $iworm_opts = "";
"no_run_chrysalis" => \$NO_RUN_CHRYSALIS_FLAG,
# Grid computing options
- 'grid_conf_file=s' => \$grid_conf_file,
+ 'grid_conf=s' => \$grid_conf_file,
"show_advanced_options" => \$show_advanced_options,
@@ -646,7 +658,7 @@ my $iworm_opts = "";
# hidden (don't look here! ;)
'KMER_SIZE=i' => \$KMER_SIZE,
'jelly_s=i' => \$JELLY_S,
- 'compdir|component_directory=s' => \$component_directory,
+
'NO_PARALLEL_IWORM' => \$NO_PARALLEL_IWORM,
'chrysalis_debug_weld_all' => \$CHRYSALIS_DEBUG_WELD_ALL,
@@ -664,9 +676,12 @@ my $iworm_opts = "";
"grid_node_CPU=i" => \$grid_node_CPU,
"grid_node_max_memory=s" => \$grid_node_max_memory,
+ "bypass_java_version_check" => \$bypass_java_version_check,
- );
+ "ananas_dir=s" => \$ANANAS_DIR,
+
+ );
@@ -756,6 +771,12 @@ if ($SS_lib_type) {
unless ($genome_guided_bam || (@left_files && @right_files) || @single_files ) {
die "Error, need either options 'left' and 'right' or option 'single' or 'genome_guided_bam'\n";
}
+unless ($genome_guided_bam) {
+ unless ($seqType) {
+ die "Error, need --seqType specified\n";
+ }
+}
+
if (@left_files) {
@left_files = split(",", join(",", @left_files));
@@ -786,14 +807,20 @@ else {
## Check Java version:
-unless ($NO_RUN_INCHWORM_FLAG || $NO_RUN_CHRYSALIS_FLAG) {
+unless ($NO_RUN_INCHWORM_FLAG || $NO_RUN_CHRYSALIS_FLAG || $bypass_java_version_check) {
my $java_version = `java -Xmx64m -version 2>&1 `;
- unless ($java_version =~ /(java|openjdk) version \"1\.(\d)\./) {
- my $version_id = $1;
- if ($version_id < 7) {
+ if ($java_version =~ /(java|openjdk) version \"1\.(\d)\./) {
+ my $version_id = $2;
+ if ($version_id != 7) {
die "Error, Trinity requires access to Java version 1.7. Currently installed version is: $java_version";
}
}
+ else {
+ print STDERR "\n\n\n********************************************************************\n"
+ . "** Warning, Trinity cannot determine which version of Java is being used. Version 1.7 is required. \n\nAttempting to continue in 30 seconds\n"
+ . "********************************************************************\n\n\n";
+ sleep(30);
+ }
}
# Give the variable with memory size and a user-oriented name
@@ -850,7 +877,7 @@ if ($PAIRED_MODE && (!$NO_RUN_CHRYSALIS_FLAG) && (!$NO_BOWTIE)) {
my $bowtie_path = `which bowtie`;
my $bowtie_build_path = `which bowtie-build`;
if ($bowtie_path =~ /\w/ && $bowtie_build_path =~ /\w/) {
- print "Paired mode requires bowtie. Found bowtie at: $bowtie_path\n and bowtie-build at $bowtie_build_path\n\n";
+ print "Paired mode requires bowtie. Found bowtie at: $bowtie_path\n and bowtie-build at $bowtie_build_path\n\n" if $SEE;
}
else {
die "Error, cannot find path to bowtie ($bowtie_path) or bowtie-build ($bowtie_build_path), which is now needed as part of Chrysalis' read scaffolding step. If you should choose to not run bowtie, include the --no_bowtie in your Trinity command.\n\n";
@@ -858,7 +885,7 @@ if ($PAIRED_MODE && (!$NO_RUN_CHRYSALIS_FLAG) && (!$NO_BOWTIE)) {
my $samtools_path = `which samtools`;
if ($samtools_path =~ /\w/) {
- print "Found samtools at: $samtools_path\n";
+ print "Found samtools at: $samtools_path\n" if $SEE;
}
else {
die "Error, cannot find samtools. Please be sure samtools is installed and included in your PATH setting.\n";
@@ -903,12 +930,19 @@ unless (basename($output_directory) =~ /trinity/i) {
}
+if ($grid_conf_file) {
+ # Prep for HPC parafly use.
+ $ENV{PATH} = dirname($PARAFLY) . ":$ENV{PATH}";
+ &HPC::GridRunner::use_parafly();
+}
+
+
main: {
$ENV{OMP_NUM_THREADS} = $CPU;
unless ($NO_RUN_INCHWORM_FLAG || $NO_RUN_CHRYSALIS_FLAG) {
- print STDERR "-since butterfly will eventually be run, lets test for proper execution of java\n";
+ print STDERR "-since butterfly will eventually be run, lets test for proper execution of java\n" if $SEE;
&test_java_failure_capture();
}
@@ -980,7 +1014,16 @@ main: {
collectl_start() unless ($FULL_CLEANUP);
&perfmon_start() unless ($FULL_CLEANUP);
+
+ unless ($TRINITY_COMPLETE_FLAG) {
+ print STDERR "\n\n";
+ print STDERR "----------------------------------------------------------------------------------\n"
+ . "-------------- Trinity Phase 1: Clustering of RNA-Seq Reads ---------------------\n"
+ . "----------------------------------------------------------------------------------\n\n";
+
+ }
+
##########################
## Run Quality Trimming
##########################
@@ -1052,28 +1095,28 @@ main: {
};
if ($@) {
- print STDERR "$@\n";
+ print STDERR "$@\n" if $SEE;
if ($@ !~ /^NON_FATAL_EXCEPTION/) {
- die "Trinity run failed. Must investigate error above.";
+ die "Trinity run failed. Must investigate error above.\n";
}
}
if ($FULL_CLEANUP) {
- print "Fully cleaning up.\n";
+ print "Fully cleaning up.\n" if $SEE;
$output_directory =~ s|/+$||g; # remove any trailing directory slash
if (-s "Trinity.fasta.tmp") {
rename("Trinity.fasta.tmp", "$output_directory.Trinity.fasta") or die "Error, cannot rename Trinity.fasta.tmp to $output_directory.Trinity.fasta";
- print "\n\n";
- print "###################################################################\n";
- print "Butterfly assemblies are written to $output_directory.Trinity.fasta\n";
- print "###################################################################\n\n\n";
+ print "\n\n"
+ . "###################################################################\n"
+ . "Butterfly assemblies are written to $output_directory.Trinity.fasta\n"
+ . "###################################################################\n\n\n" unless $TRINITY_COMPLETE_FLAG;
}
else {
- print "# No butterfly assemblies to report.\n";
+ print "# No butterfly assemblies to report.\n" unless $TRINITY_COMPLETE_FLAG;
}
if ($MKDIR_OUTDIR_FLAG && basename($output_directory) =~ /trinity/i) { # ensure outdirectory as trinity in the name, just to be sure we dont delete something non-trinity related!!!
@@ -1096,7 +1139,7 @@ main: {
print "\n\n";
print "###################################################################\n";
print "Butterfly assemblies are written to $output_directory/$butterfly_output_filename\n";
- print "###################################################################\n\n\n";
+ print "###################################################################\n\n\n" unless $TRINITY_COMPLETE_FLAG;
}
else {
die "ERROR, no butterfly assemblies reported.";
@@ -1105,6 +1148,7 @@ main: {
}
&perfmon_end() unless ($FULL_CLEANUP);
+
exit(0);
@@ -1132,7 +1176,7 @@ sub run_Trinity {
print "\n\n#######################################################################\n"
. "Inchworm file: $inchworm_file detected.\n"
. "Skipping Inchworm Step, Using Previous Inchworm Assembly\n"
- . "#######################################################################\n\n";
+ . "#######################################################################\n\n" if ($SEE || ! $TRINITY_COMPLETE_FLAG);
#sleep(2);
}
else {
@@ -1167,7 +1211,7 @@ sub run_Trinity {
die "Error prepping sequences.";
}
- print("Done converting input files.");
+ print("Done converting input files.") if $SEE;
## Calculate input file sizes for performance monitoring
# this should be set as the created fasta otherwise results will differ for same data passed as .fq and .fa?
my $pm_temp = -s "left.fa";
@@ -1186,17 +1230,17 @@ sub run_Trinity {
unlink ("left.fa", "right.fa") unless $jaccard_clip; # no longer needed now that we have 'both.fa', which is needed by chryaslis
}
- foreach my $f ((@left_files, at right_files)){
- if (-s $f.'.readcount'){
- open (IN,$f.'.readcount');
- my $s = <IN>;
- close IN;
- $s=~/([0-9]+)$/;
- $count_of_reads += $1 if $1;
- }
- }
-
-
+ foreach my $f ((@left_files, at right_files)){
+ if (-s $f.'.readcount'){
+ open (IN,$f.'.readcount');
+ my $s = <IN>;
+ close IN;
+ $s=~/([0-9]+)$/;
+ $count_of_reads += $1 if $1;
+ }
+ }
+
+
}
elsif (@single_files) {
@@ -1206,13 +1250,13 @@ sub run_Trinity {
$pm_temp = $pm_temp / 1024 / 1024;
$pm_single_fa_size = sprintf('%.0f', $pm_temp);
foreach my $f (@single_files){
- if (-s $f.'.readcount'){
- open (IN,$f.'.readcount');
- my $s = <IN>;
- close IN;
- $s=~/([0-9]+)$/;
- $count_of_reads += $1 if $1;
- }
+ if (-s $f.'.readcount'){
+ open (IN,$f.'.readcount');
+ my $s = <IN>;
+ close IN;
+ $s=~/([0-9]+)$/;
+ $count_of_reads += $1 if $1;
+ }
}
}
@@ -1316,33 +1360,33 @@ sub run_Trinity {
$pm_chrysalis_end = `date +%s`;
- print "Butterfly_cmds: $butterfly_cmds\n";
+ print STDERR "Butterfly_cmds: $butterfly_cmds\n" if $SEE;
if ($butterfly_cmds && -s $butterfly_cmds) {
## Run Butterfly
- print "Inchworm and Chrysalis complete. Butterfly commands to execute are provided here:\n"
- . $butterfly_cmds . "\n\n";
+ print STDERR "Inchworm and Chrysalis complete. Butterfly commands to execute are provided here:\n"
+ . $butterfly_cmds . "\n\n" if $SEE;
print STDERR "---------------------------------------------------------------\n"
. "-------------------- Butterfly --------------------------------\n"
. "-- (Reconstruct transcripts from reads and de Bruijn graphs) --\n"
- . "---------------------------------------------------------------\n\n";
+ . "---------------------------------------------------------------\n\n" if $SEE;
$pm_butterfly_start = `date +%s`;
if ($grid_conf_file) {
my @bfly_cmds = `cat $butterfly_cmds`;
chomp @bfly_cmds;
- my $grid_runner = new HTC::GridRunner($grid_conf_file, "chrysalis/butterfly_on_grid.cacheSuccess");
+ my $grid_runner = new HPC::GridRunner($grid_conf_file, "chrysalis/butterfly_on_grid.cacheSuccess");
my $ret = $grid_runner->run_on_grid(@bfly_cmds);
if ($ret) {
die "Error, not all butterfly commands could complete successfully... cannot continue.";
}
}
else {
- my $cmd = "$PARAFLY -c $butterfly_cmds -shuffle -CPU $bflyCPU -failed_cmds failed_butterfly_commands.$$.txt -v "; # shuffle them since the first ones are usually the longest-running ones.
+ my $cmd = "$PARAFLY -c $butterfly_cmds -shuffle -CPU $bflyCPU -failed_cmds failed_butterfly_commands.$$.txt "; # shuffle them since the first ones are usually the longest-running ones.
&process_cmd($cmd);
}
$pm_butterfly_end = `date +%s`;
@@ -1350,6 +1394,7 @@ sub run_Trinity {
## capture results:
# my $cmd = 'find ./chrysalis -name "*allProbPaths.fasta" -exec cat {} + > Trinity.fasta.tmp';
# no longer scan the file system... we know which files should exist
+ print STDERR "\n\n** Harvesting all assembled transcripts into a single multi-fasta file...\n" unless ($TRINITY_COMPLETE_FLAG);
my $cmd = "$UTILDIR/support_scripts/print_butterfly_assemblies.pl $chrysalis_output_dir/component_base_listing.txt > Trinity.fasta.tmp";
&process_cmd($cmd);
@@ -1375,7 +1420,7 @@ sub run_chrysalis {
## Define iworm links via paired-end read mappings:
if ($PAIRED_MODE && ! $NO_BOWTIE) {
- my $pipeliner = new Pipeliner();
+ my $pipeliner = new Pipeliner(-verbose => ($TRINITY_COMPLETE_FLAG) ? 0 : 1);
# generate the pair links.
my $iworm_min100_fa_file = "$chrysalis_output_dir/" . basename("$inchworm_file.min100");
@@ -1383,19 +1428,30 @@ sub run_chrysalis {
$pipeliner->add_commands( new Command($cmd, "$iworm_min100_fa_file.ok"));
$pipeliner->run();
- $pipeliner = new Pipeliner();
+ $pipeliner = new Pipeliner(-verbose => $SEE);
$cmd = "bowtie-build -q $iworm_min100_fa_file $iworm_min100_fa_file";
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
if (-s "$iworm_min100_fa_file") {
$pipeliner->add_commands( new Command($cmd, "$iworm_min100_fa_file.bowtie_build.ok"));
my $bowtie_sam_file = "$chrysalis_output_dir/iworm.bowtie.nameSorted.bam";
$cmd = "bash -c \" set -o pipefail; bowtie -a -m 20 --best --strata --threads $CPU --chunkmbs 512 -q -S -f $iworm_min100_fa_file $bowtie_reads_fa | samtools view -F4 -Sb - | samtools sort $PARALLEL_SAMTOOLS_SORT_TOKEN -no - - > $bowtie_sam_file\" ";
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
+
$pipeliner->add_commands( new Command($cmd, "$bowtie_sam_file.ok"));
## generate the scaffold info
my $iworm_scaffolds_file = "$chrysalis_output_dir/iworm_scaffolds.txt";
$cmd = "$UTILDIR/support_scripts/scaffold_iworm_contigs.pl $bowtie_sam_file $inchworm_file > $iworm_scaffolds_file"; # important, must use original inchworm file because positions are indexed for later use by GraphFromFasta
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
+
$pipeliner->add_commands( new Command($cmd, "$iworm_scaffolds_file.ok"));
$pair_links_file = $iworm_scaffolds_file;
@@ -1407,7 +1463,7 @@ sub run_chrysalis {
- my $chrysalis_pipeliner = new Pipeliner();
+ my $chrysalis_pipeliner = new Pipeliner(-verbose => ($TRINITY_COMPLETE_FLAG) ? 0 : 1);
my $graphFromFasta_outfile = "$chrysalis_output_dir/GraphFromIwormFasta.out";
@@ -1434,6 +1490,11 @@ sub run_chrysalis {
$graphFromFasta_cmd .= " > $graphFromFasta_outfile";
+ if ($TRINITY_COMPLETE_FLAG) {
+ $graphFromFasta_cmd .= " 2>/dev/null";
+ }
+
+
my $checkpoint = "$graphFromFasta_outfile.ok";
$chrysalis_pipeliner->add_commands( new Command($graphFromFasta_cmd, $checkpoint) );
}
@@ -1445,6 +1506,11 @@ sub run_chrysalis {
my $cmd = "$CHRYSALIS_DIR/CreateIwormFastaBundle -i $graphFromFasta_outfile -o $iworm_bundles_fasta_file -min $min_contig_length";
+
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null ";
+ }
+
$chrysalis_pipeliner->add_commands( new Command($cmd, "$iworm_bundles_fasta_file.ok"));
}
@@ -1453,13 +1519,13 @@ sub run_chrysalis {
my $reads_to_components_output_file = "$chrysalis_output_dir/readsToComponents.out";
{
## map reads to chrysalis components:
-
-
+
+
my $cmd = ($BOWTIE_COMP) ? "$CHRYSALIS_DIR/ReadsToComponents.pl" : "$CHRYSALIS_DIR/ReadsToTranscripts";
if ("$cmd" eq "$CHRYSALIS_DIR/ReadsToTranscripts" ) {
- if ($np > 1) {
- $cmd = "$mpiexec -np $np $CHRYSALIS_DIR/ReadsToTranscripts_MPI"
- }
+ if ($np > 1) {
+ $cmd = "$mpiexec -np $np $CHRYSALIS_DIR/ReadsToTranscripts_MPI"
+ }
}
$cmd .= " -i $reads_file -f $iworm_bundles_fasta_file -o $reads_to_components_output_file -t $CPU -max_mem_reads $max_reads_per_loop ";
@@ -1469,6 +1535,11 @@ sub run_chrysalis {
if ($min_pct_read_mapping) {
$cmd .= " -min_pct_read_mapping $min_pct_read_mapping ";
}
+
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
+
$chrysalis_pipeliner->add_commands( new Command($cmd, "$reads_to_components_output_file.ok"));
}
@@ -1478,6 +1549,11 @@ sub run_chrysalis {
## sort the read mappings:
my $cmd = "$sort_exec -T . -S $max_memory -k 1,1n $reads_to_components_output_file > $sorted_reads_to_components_file";
+
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null ";
+ }
+
$chrysalis_pipeliner->add_commands( new Command($cmd, "$sorted_reads_to_components_file.ok"));
}
@@ -1506,6 +1582,10 @@ sub run_chrysalis {
$cmd .= " > $deBruijnGraph_outfile";
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
+
$chrysalis_pipeliner->add_commands(new Command($cmd, "$deBruijnGraph_outfile.ok") );
}
$chrysalis_pipeliner->run();
@@ -1523,8 +1603,12 @@ sub run_chrysalis {
my $partitioning_checkpoint_file = "$chrysalis_output_dir/file_partitioning.ok";
- my $cmd = "$UTILDIR/support_scripts/partition_chrysalis_graphs_n_reads.pl --deBruijns $chrysalis_output_dir/bundled_iworm_contigs.fasta.deBruijn --componentReads $chrysalis_output_dir/readsToComponents.out.sort -N 1000 -L $min_contig_length --compdir $component_directory ";
-
+ my $cmd = "$UTILDIR/support_scripts/partition_chrysalis_graphs_n_reads.pl --deBruijns $chrysalis_output_dir/bundled_iworm_contigs.fasta.deBruijn --componentReads $chrysalis_output_dir/readsToComponents.out.sort -N 1000 -L $min_contig_length ";
+
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
+
&process_cmd($cmd) unless (-e $partitioning_checkpoint_file);
&process_cmd("touch $partitioning_checkpoint_file") unless (-e $partitioning_checkpoint_file);
@@ -1564,6 +1648,10 @@ sub run_chrysalis {
$quantify_graph_cmd .= " -no_cleanup ";
}
+
+ unless ($SEE) {
+ $quantify_graph_cmd .= " 2>/dev/null";
+ }
print $qgraph_cmd_ofh $quantify_graph_cmd . "\n";
@@ -1625,14 +1713,14 @@ sub run_chrysalis {
print STDERR "---------------------------------------------------\n"
. "----------- Chrysalis: QuantifyGraph --------------\n"
. "-- (Integrate mapped reads into de Bruijn graph) --\n"
- . "---------------------------------------------------\n\n";
+ . "---------------------------------------------------\n\n" if $SEE;
if ($grid_conf_file) {
my @quantify_graph_cmds = `cat $quantify_graph_cmds`;
chomp @quantify_graph_cmds;
- my $grid_runner = new HTC::GridRunner($grid_conf_file, "chrysalis/chrysalis_quantify_graph_on_grid.cacheSuccess");
+ my $grid_runner = new HPC::GridRunner($grid_conf_file, "chrysalis/chrysalis_quantify_graph_on_grid.cacheSuccess");
my $ret = $grid_runner->run_on_grid(@quantify_graph_cmds);
if ($ret) {
die "Error, not all Chrysalis quantify_graph commands completed successfully. Cannot continue.";
@@ -1640,7 +1728,7 @@ sub run_chrysalis {
}
else {
- my $cmd = "$PARAFLY -c $quantify_graph_cmds -CPU $CPU -failed_cmds failed_quantify_graph_commands.$$.txt -v -shuffle ";
+ my $cmd = "$PARAFLY -c $quantify_graph_cmds -CPU $CPU -failed_cmds failed_quantify_graph_commands.$$.txt -shuffle ";
&process_cmd($cmd);
}
@@ -1701,10 +1789,18 @@ sub run_recursive_trinity {
&process_cmd("touch $read_filenames_ok");
}
+
+ if ($ANANAS_DIR) {
+ &run_ANANAS($read_filenames, "read_partitions");
+ exit(0);
+ }
+
+
+
if (! -e "recursive_trinity.cmds.ok") {
&write_trinity_partitioned_cmds($read_filenames, "recursive_trinity.cmds");
&process_cmd("touch recursive_trinity.cmds.ok");
- print STDERR "Done prepping partitioned cmds.";
+ print STDERR "Done prepping partitioned cmds." if $SEE;
}
if ($NO_DISTRIBUTED_TRINITY_EXEC) {
@@ -1714,7 +1810,9 @@ sub run_recursive_trinity {
exit(0);
}
- &run_trinity_partitioned_cmds("recursive_trinity.cmds");
+ &run_partitioned_cmds("recursive_trinity.cmds");
+
+ print STDERR "\n\n** Harvesting all assembled transcripts into a single multi-fasta file...\n\n" unless ($TRINITY_COMPLETE_FLAG);
my $cmd = "find read_partitions/ -name '*inity.fasta' | $UTILDIR/support_scripts/partitioned_trinity_aggregator.pl TR > Trinity.fasta.tmp";
&process_cmd($cmd);
@@ -1751,11 +1849,11 @@ sub run_inchworm {
#####################################################
- ## Using Jellyfish kmer method
+ ## Using Jellyfish kmer method (if in initial read partitioning phase (1) )
#####################################################
- if (! $FORCE_INCHWORM_KMER_METHOD) {
-
+ if (! ($FORCE_INCHWORM_KMER_METHOD || $TRINITY_COMPLETE_FLAG) ) {
+
my $jelly_kmer_fa_file = "jellyfish.kmers.fa";
my $jelly_finished_checkpoint_file = "jellyfish.$min_kmer_cov.finished";
unless (-e $jelly_finished_checkpoint_file) {
@@ -1790,6 +1888,10 @@ sub run_inchworm {
$cmd .= " $reads";
+ unless ($SEE) {
+ $cmd .= " 2> /dev/null ";
+ }
+
&process_cmd($cmd);
@@ -1800,12 +1902,20 @@ sub run_inchworm {
my $jelly_db = "mer_counts.jf";
$cmd = "$JELLYFISH_DIR/bin/jellyfish dump -L $min_kmer_cov $jelly_db > $jelly_kmer_fa_file";
+
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
&process_cmd($cmd);
## write a histogram of the kmer counts.
$cmd = "$JELLYFISH_DIR/bin/jellyfish histo -t $CPU -o $jelly_kmer_fa_file.histo $jelly_db";
+
+ unless ($SEE) {
+ $cmd .= " 2>/dev/null";
+ }
&process_cmd($cmd);
@@ -1833,7 +1943,7 @@ sub run_inchworm {
else {
######################################################
- ## Using Inchworm kmer method (original, slow method)
+ ## Using Inchworm kmer method (original, slow w/ large data)
######################################################
$inchworm_cmd = "$INCHWORM_DIR/inchworm --reads $reads --run_inchworm -K $KMER_SIZE -L $MIN_IWORM_LEN --monitor 1 ";
@@ -1842,13 +1952,12 @@ sub run_inchworm {
}
}
-
## finish constructing the inchworm command to execute
unless ($strand_specific_flag) {
$inchworm_cmd .= " --DS ";
}
-
+
unless ($NO_CLEANUP) {
$inchworm_cmd .= " --keep_tmp_files ";
}
@@ -1871,8 +1980,12 @@ sub run_inchworm {
print STDERR "----------------------------------------------\n"
. "--------------- Inchworm ---------------------\n"
. "-- (Linear contig construction from k-mers) --\n"
- . "----------------------------------------------\n\n";
+ . "----------------------------------------------\n\n" if (! $TRINITY_COMPLETE_FLAG);
+ unless ($SEE) {
+ $inchworm_cmd .= " 2>/dev/null";
+ }
+
eval {
@@ -1987,7 +2100,7 @@ sub create_full_path {
sub process_cmd {
my ($cmd) = @_;
- print &mytime."CMD: $cmd\n";
+ print STDERR &mytime."CMD: $cmd\n" unless ($TRINITY_COMPLETE_FLAG);
my $start_time = time();
my $ret = system("bash", "-c", $cmd);
@@ -1997,8 +2110,8 @@ sub process_cmd {
die "Error, cmd: $cmd died with ret $ret";
}
- print "CMD finished (" . ($end_time - $start_time) . " seconds)\n";
-
+ print STDERR "CMD finished (" . ($end_time - $start_time) . " seconds)\n" if $SEE;
+
return;
}
@@ -2064,8 +2177,10 @@ sub run_jaccard_clip_single_but_really_paired {
####
sub test_java_failure_capture {
- print "#######################################\n";
- print "Running Java Tests\n";
+ if ($SEE) {
+ print "#######################################\n";
+ print "Running Java Tests\n";
+ }
my $java_prog = `which java`;
unless ($java_prog) {
@@ -2091,15 +2206,15 @@ sub test_java_failure_capture {
};
if ($@) {
- print "-we properly captured the java failure status, as needed. Looking good.\n";
+ print "-we properly captured the java failure status, as needed. Looking good.\n" if $SEE;
}
else {
print STDERR "-we are unable to properly capture java failure status. Please be sure that java (or any wrapper around java that's being used) can properly capture and propagate failure status before proceeding.\n";
exit(1);
}
- print "Java tests succeeded.\n";
- print "###################################\n\n";
+ print "Java tests succeeded.\n" if $SEE;
+ print "###################################\n\n" if $SEE;
return;
}
@@ -2511,9 +2626,9 @@ sub normalize {
my $checkpoint = "$normalize_outdir/normalization.ok";
if (&files_exist(@ret_files, $checkpoint)) {
- print STDERR "###############################################################################\n";
- print STDERR "#### Normalization process was previously completed. Skipping it and using existing normalized files: @ret_files\n";
- print STDERR "###############################################################################\n";
+ print STDERR "###############################################################################\n"
+ . "#### Normalization process was previously completed. Skipping it and using existing normalized files: @ret_files\n"
+ . "###############################################################################\n" if $SEE;
}
else {
@@ -2538,7 +2653,7 @@ sub files_exist {
return(0); # not exists
}
}
-
+
return(1); # all exist
}
@@ -2561,6 +2676,14 @@ sub run_genome_guided_Trinity {
&process_cmd($cmd) unless (-s "read_files.list" && -e "read_files.list.ok");
&process_cmd("touch read_files.list.ok") unless (-e "read_files.list.ok"); # checkpoint
+
+ if ($ANANAS_DIR) {
+ &run_ANANAS("read_files.list", "Dir_\*");
+ exit(0);
+ }
+
+
+
unless (-e "trinity_GG.cmds.ok") {
&write_trinity_partitioned_cmds("read_files.list", "trinity_GG.cmds");
&process_cmd("touch trinity_GG.cmds.ok");
@@ -2573,7 +2696,7 @@ sub run_genome_guided_Trinity {
exit(0);
}
- &run_trinity_partitioned_cmds("trinity_GG.cmds");
+ &run_partitioned_cmds("trinity_GG.cmds");
## pull together the final outputs:
@@ -2588,15 +2711,26 @@ sub run_genome_guided_Trinity {
}
####
-sub run_trinity_partitioned_cmds {
+sub run_partitioned_cmds {
my ($cmds_file) = @_;
- ## execute the commands:
+
+ print STDERR "\n\n";
+ print STDERR "--------------------------------------------------------------------------------\n"
+ . "------------ Trinity Phase 2: Assembling Clusters of Reads ---------------------\n"
+ . "--------------------------------------------------------------------------------\n\n";
+
+
+ if ($ANANAS_DIR) {
+ print STDERR " ************ Using Ananas Assembler ***************** \n\n";
+ }
+
+ ## Execute the commands:
if ($grid_conf_file) {
my @cmds = `cat $cmds_file`;
chomp @cmds;
- my $grid_runner = new HTC::GridRunner($grid_conf_file, "$cmds_file.htc_cache_success");
+ my $grid_runner = new HPC::GridRunner($grid_conf_file, "$cmds_file.htc_cache_success");
my $ret = $grid_runner->run_on_grid(@cmds);
if ($ret) {
die "Error, not all $cmds_file commands completed successfully. Cannot continue.";
@@ -2721,3 +2855,65 @@ sub version_check {
print "Version check failed";
}
}
+
+
+####
+sub run_ANANAS {
+ my ($read_filenames, $base_out_dir) = @_;
+
+ ## Create Ananas command set
+ my $ananas_cmds_file = "ananas.cmds";
+ my $ananas_cmds_checkpoint = "$ananas_cmds_file.ok";
+
+ my $ananas_SS = "";
+ my $ananas_dir = "na";
+ if ($SS_lib_type) {
+ $ananas_SS = " -strand 1 ";
+ if ($PAIRED_MODE) {
+ $ananas_dir = "ff";
+ }
+ }
+ elsif ($PAIRED_MODE) {
+ $ananas_dir = "fr";
+ }
+
+ unless (-e $ananas_cmds_checkpoint) {
+ open (my $fh, $read_filenames) or die "Error, cannot open file $read_filenames";
+ open (my $ofh, ">$ananas_cmds_file") or die "Error, cannot write to $ananas_cmds_file";
+ while (<$fh>) {
+ chomp;
+ my $reads_file = $_;
+
+ my $ananas_cmd = "$ANANAS_DIR/Ananas -i $reads_file -o $reads_file.out_dir -dir $ananas_dir $ananas_SS";
+ print $ofh "$ananas_cmd\n";
+ }
+ close $fh;
+ close $ofh;
+
+ &process_cmd("touch $ananas_cmds_checkpoint");
+ }
+
+ if ($NO_DISTRIBUTED_TRINITY_EXEC) {
+ print STDERR "\n\n###################################################################\n"
+ . "## Stopping here due to --no_distributed_trinity_exec in effect ##\n"
+ . "###################################################################\n\n";
+ exit(0);
+ }
+
+
+ &run_partitioned_cmds($ananas_cmds_file);
+
+
+ ## capture all outputs into a single output file.
+
+ my $cmd = "find $base_out_dir -name \"*final.fa\" | $UTILDIR/support_scripts/partitioned_trinity_aggregator.pl ANANAS > Ananas.fasta.tmp";
+ &process_cmd($cmd);
+
+ rename("Ananas.fasta.tmp", "Ananas.fasta");
+
+ print STDERR "Done. See Ananas.fasta \n\n";
+
+
+
+ exit(0);
+}
diff --git a/galaxy-plugin/EdgeR_differentialExpression.xml b/galaxy-plugin/EdgeR_differentialExpression.xml
new file mode 100644
index 0000000..774a3f4
--- /dev/null
+++ b/galaxy-plugin/EdgeR_differentialExpression.xml
@@ -0,0 +1,47 @@
+<tool id="EdgeR_differentialExpression" name="EdgeR_differentialExpression" version="0.0.1">
+
+ <description>Identify Differentially Expressed Transcripts Using EdgeR</description>
+ <requirements>
+ <requirement type="package">trinity</requirement>
+ </requirements>
+ <command interpreter="python">
+
+ trinityToolWrapper.py Analysis/DifferentialExpression/run_DE_analysis.pl
+ --matrix $counts_matrix
+ --method edgeR
+ --output edgeR_results
+ --dispersion $dispersion
+
+ > stdout.txt
+
+
+ </command>
+ <inputs>
+
+ <param type="data" format="txt" name="counts_matrix" label="Matrix of RNA-Seq fragment counts for transcripts per condition" />
+ <param type="data" format="fasta" name="transcripts_fasta_file" label="Transcripts fasta file corresponding to matrix" />
+ <param type="float" name="dispersion" value="0.1" min="0" label="dispersion value" help="Dispersion value to be used in the negative binomial" />
+
+ </inputs>
+ <outputs>
+
+ <data format="txt" name="diff_expressed_edgeR_results" label="${tool.name} on ${on_string}: differentially expressed transcripts per pair of conditions" from_work_dir="edgeR_results/all_diff_expression_results.txt" />
+
+ <data format="txt" name="matrix_FPKM" label="${tool.name} on ${on_string}: matrix.TMM_normalized.FPKM" from_work_dir="edgeR_results/matrix.TMM_normalized.FPKM" />
+
+ <data format="txt" name="TMM_info" label="${tool.name} on ${on_string}: TMM library size estimates" from_work_dir="edgeR_results/TMM_info.txt" />
+
+ </outputs>
+ <tests>
+
+ <test>
+ <param name="myname" value="This is just a simple test" />
+
+ </test>
+
+
+ </tests>
+ <help>
+ help info here.
+ </help>
+</tool>
diff --git a/galaxy-plugin/abundance_estimation_to_matrix.xml b/galaxy-plugin/abundance_estimation_to_matrix.xml
new file mode 100644
index 0000000..46ef57e
--- /dev/null
+++ b/galaxy-plugin/abundance_estimation_to_matrix.xml
@@ -0,0 +1,42 @@
+<tool id="abundance_estimation_to_matrix" name="abundance_estimation_to_matrix" version="0.0.1">
+
+ <description>Join RSEM estimates from multiple samples into a single matrix</description>
+ <requirements>
+ <requirement type="package">trinity</requirement>
+ </requirements>
+ <command interpreter="python">
+
+ abundance_estimation_to_matrix_wrapper.py
+ #for $q in $RSEM_samples
+ ${q.file} "${q.column_label}"
+ #end for
+
+ </command>
+ <inputs>
+
+ <repeat name="RSEM_samples" title="RSEM abundance estimates for samples">
+ <param name="file" label="Add file" type="data" format="text"/>
+ <param name="column_label" label="column label" type="text" />
+ </repeat>
+
+ </inputs>
+ <outputs>
+ <data format="text" name="counts_matrix" label="${tool.name} on ${on_string}: Counts Matrix" from_work_dir="matrix.counts.matrix"/>
+ </outputs>
+ <tests>
+
+
+ <test>
+ <param name="target" value="trinity/Trinity.fasta" />
+ <param name="aligner" value="bowtie" />
+ <param name="paired_or_single" value="single" />
+ <param name="library_type" value="None" />
+ <param name="input" value="trinity/reads.left.fq" />
+ </test>
+
+
+ </tests>
+ <help>
+ .. _Trinity: http://trinityrnaseq.sourceforge.net
+ </help>
+</tool>
diff --git a/galaxy-plugin/abundance_estimation_to_matrix_wrapper.py b/galaxy-plugin/abundance_estimation_to_matrix_wrapper.py
new file mode 100644
index 0000000..33df387
--- /dev/null
+++ b/galaxy-plugin/abundance_estimation_to_matrix_wrapper.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import sys, os, string, subprocess
+
+#aliasing the filenames using the labels
+
+
+def run_command(command):
+ print "Running command: " + command
+
+ err_capture_file = open("my.stderr", 'w') # writing stderr to a file
+ cmd_run = subprocess.Popen(args=command, shell=True, stderr=err_capture_file, stdout=sys.stdout)
+ err = cmd_run.wait() # get exit code from command execution
+ err_capture_file.close()
+
+ if err:
+ # report the error messages we captured, and exit non-zero
+ sys.stderr.write("Error, cmd: " + command + " died with ret: " + `err`)
+ for line in open(err_capture_file):
+ sys.stderr.write(line)
+ sys.exit(err)
+ return
+
+label_list = [] # symlink files to the labels
+for i in range(1, len(sys.argv), 2):
+ filename=sys.argv[i]
+ label= sys.argv[i+1]
+ cmd= "ln -sf " + filename + " " + label
+ label_list.append(label)
+ run_command(cmd)
+
+
+# run the abundance estimation script
+
+cmd = os.path.dirname(sys.argv[0]) + "/trinityToolWrapper.py " + " util/abundance_estimates_to_matrix.pl --est_method RSEM --cross_sample_fpkm_norm none " + " ".join(label_list)
+
+run_command(cmd)
+
+sys.exit(0)
+
diff --git a/htc_conf/BroadInst_LSF.hour.10.conf b/hpc_conf/BroadInst_LSF.hour.10.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.hour.10.conf
rename to hpc_conf/BroadInst_LSF.hour.10.conf
diff --git a/htc_conf/BroadInst_LSF.neurolab.1.conf b/hpc_conf/BroadInst_LSF.neurolab.1.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.neurolab.1.conf
rename to hpc_conf/BroadInst_LSF.neurolab.1.conf
diff --git a/htc_conf/BroadInst_LSF.neurolab.10.conf b/hpc_conf/BroadInst_LSF.neurolab.10.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.neurolab.10.conf
rename to hpc_conf/BroadInst_LSF.neurolab.10.conf
diff --git a/htc_conf/BroadInst_LSF.neurolab.100.conf b/hpc_conf/BroadInst_LSF.neurolab.100.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.neurolab.100.conf
rename to hpc_conf/BroadInst_LSF.neurolab.100.conf
diff --git a/htc_conf/BroadInst_LSF.regev.1.conf b/hpc_conf/BroadInst_LSF.regev.1.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.regev.1.conf
rename to hpc_conf/BroadInst_LSF.regev.1.conf
diff --git a/htc_conf/BroadInst_LSF.regev.10.conf b/hpc_conf/BroadInst_LSF.regev.10.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.regev.10.conf
rename to hpc_conf/BroadInst_LSF.regev.10.conf
diff --git a/htc_conf/BroadInst_LSF.regev.100.conf b/hpc_conf/BroadInst_LSF.regev.100.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.regev.100.conf
rename to hpc_conf/BroadInst_LSF.regev.100.conf
diff --git a/htc_conf/BroadInst_LSF.test.conf b/hpc_conf/BroadInst_LSF.test.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.test.conf
rename to hpc_conf/BroadInst_LSF.test.conf
diff --git a/htc_conf/BroadInst_LSF.week.1.conf b/hpc_conf/BroadInst_LSF.week.1.conf
similarity index 100%
rename from htc_conf/BroadInst_LSF.week.1.conf
rename to hpc_conf/BroadInst_LSF.week.1.conf
diff --git a/htc_conf/BroadInst_SGE.test.conf b/hpc_conf/BroadInst_SGE.test.conf
similarity index 100%
rename from htc_conf/BroadInst_SGE.test.conf
rename to hpc_conf/BroadInst_SGE.test.conf
diff --git a/htc_conf/PBS.test.conf b/hpc_conf/PBS.test.conf
similarity index 100%
rename from htc_conf/PBS.test.conf
rename to hpc_conf/PBS.test.conf
diff --git a/htc_conf/SLURM.test.conf b/hpc_conf/SLURM.test.conf
similarity index 100%
rename from htc_conf/SLURM.test.conf
rename to hpc_conf/SLURM.test.conf
diff --git a/htc_conf/deprecated/BroadInst_LSF.Trinity.conf b/hpc_conf/deprecated/BroadInst_LSF.Trinity.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_LSF.Trinity.conf
rename to hpc_conf/deprecated/BroadInst_LSF.Trinity.conf
diff --git a/htc_conf/deprecated/BroadInst_LSF.blast.conf b/hpc_conf/deprecated/BroadInst_LSF.blast.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_LSF.blast.conf
rename to hpc_conf/deprecated/BroadInst_LSF.blast.conf
diff --git a/htc_conf/deprecated/BroadInst_LSF.conf b/hpc_conf/deprecated/BroadInst_LSF.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_LSF.conf
rename to hpc_conf/deprecated/BroadInst_LSF.conf
diff --git a/htc_conf/deprecated/BroadInst_LSF.regev.10.conf b/hpc_conf/deprecated/BroadInst_LSF.regev.10.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_LSF.regev.10.conf
rename to hpc_conf/deprecated/BroadInst_LSF.regev.10.conf
diff --git a/htc_conf/deprecated/BroadInst_LSF.regev.100.conf b/hpc_conf/deprecated/BroadInst_LSF.regev.100.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_LSF.regev.100.conf
rename to hpc_conf/deprecated/BroadInst_LSF.regev.100.conf
diff --git a/htc_conf/deprecated/BroadInst_LSF.regev.1000.conf b/hpc_conf/deprecated/BroadInst_LSF.regev.1000.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_LSF.regev.1000.conf
rename to hpc_conf/deprecated/BroadInst_LSF.regev.1000.conf
diff --git a/htc_conf/deprecated/BroadInst_LSF.test.conf b/hpc_conf/deprecated/BroadInst_LSF.test.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_LSF.test.conf
rename to hpc_conf/deprecated/BroadInst_LSF.test.conf
diff --git a/htc_conf/deprecated/BroadInst_SGE.conf b/hpc_conf/deprecated/BroadInst_SGE.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_SGE.conf
rename to hpc_conf/deprecated/BroadInst_SGE.conf
diff --git a/htc_conf/deprecated/BroadInst_SGE.test.conf b/hpc_conf/deprecated/BroadInst_SGE.test.conf
similarity index 100%
rename from htc_conf/deprecated/BroadInst_SGE.test.conf
rename to hpc_conf/deprecated/BroadInst_SGE.test.conf
diff --git a/htc_conf/deprecated/SLURM.FAS.conf b/hpc_conf/deprecated/SLURM.FAS.conf
similarity index 100%
rename from htc_conf/deprecated/SLURM.FAS.conf
rename to hpc_conf/deprecated/SLURM.FAS.conf
diff --git a/trinity-plugins/Makefile b/trinity-plugins/Makefile
index 99228e7..040defe 100644
--- a/trinity-plugins/Makefile
+++ b/trinity-plugins/Makefile
@@ -17,7 +17,14 @@ endif
### Required by Trinity
#######################
-trinity_essentials: jellyfish fastool_target parafly_target
+FASTOOL_CODE=fstrozzi-Fastool-7c3e034f05
+PARAFLY_CODE=parafly-code
+TRIMMOMATIC_CODE=Trimmomatic-0.32
+
+trinity_essentials: jellyfish fastool_target parafly_target trimmomatic_target
+
+trimmomatic_target:
+ ln -sf ${TRIMMOMATIC_CODE} trimmomatic
jellyfish:
@@ -26,31 +33,32 @@ jellyfish:
mv tmp.jellyfish jellyfish
fastool_target:
- cd fastool && $(MAKE)
+ cd ${FASTOOL_CODE} && $(MAKE)
+ ln -sf ${FASTOOL_CODE} fastool
parafly_target:
- cd parafly && ./configure --prefix=`pwd` && $(MAKE) install
-
+ cd ${PARAFLY_CODE} && sh ./configure --prefix=`pwd` && $(MAKE) install
+ ln -sf ${PARAFLY_CODE} parafly
##################################
### Needed for downstream analyses
##################################
-plugins: transdecoder slclust_target collectl_target rsem
+plugins: transdecoder_target slclust_target collectl_target rsem
@echo "\n\n** Done building plugins **\n\n"
rsem:
- tar -zxvf ${RSEM_CODE}.tar.gz && ln -sf ${RSEM_CODE} tmp.rsem
- cd ./tmp.rsem && $(MAKE) LIBCURSES="${RSEM_LIBCURSES}"
- mv tmp.rsem rsem
+ tar -zxvf ${RSEM_CODE}.tar.gz
+ cd ${RSEM_CODE} && $(MAKE) LIBCURSES="${RSEM_LIBCURSES}"
+ ln -sf ${RSEM_CODE} rsem
-transdecoder:
- tar -zxvf ${TRANSDECODER_CODE}.tar.gz && ln -sf ${TRANSDECODER_CODE} tmp.transdecoder
- cd ./tmp.transdecoder && ${MAKE} simple
- mv ./tmp.transdecoder transdecoder
+transdecoder_target:
+ tar -zxvf ${TRANSDECODER_CODE}.tar.gz
+ cd ${TRANSDECODER_CODE} && ${MAKE} simple
+ ln -sf ${TRANSDECODER_CODE} transdecoder
slclust_target:
cd slclust && $(MAKE) install
@@ -59,11 +67,14 @@ collectl_target:
cd collectl && ./build_collectl.sh
clean:
- rm -rf ./${JELLYFISH_CODE} ./jellyfish
- rm -rf ./${RSEM_CODE} ./rsem
- rm -rf ./${TRANSDECODER_CODE} ./transdecoder
- cd fastool && $(MAKE) clean
+ rm -rf ./${JELLYFISH_CODE} ./jellyfish # has tar.gz
+ rm -rf ./${RSEM_CODE} ./rsem # has tar.gz
+ rm -rf ./${TRANSDECODER_CODE} ./transdecoder # has tar.gz
+ cd ${FASTOOL_CODE} && $(MAKE) clean
+ rm -f ./fastool # rm symlink
cd parafly && $(MAKE) clean
+ rm -f ./parafly # rm symlink
+ rm -f ./trimmomatic # rm symlink
cd slclust && $(MAKE) clean
cd collectl && rm -rf bin doc man
@echo "\n\n** Done cleaning plugins area **"
diff --git a/trinity-plugins/Trimmomatic b/trinity-plugins/Trimmomatic
deleted file mode 120000
index d52230e..0000000
--- a/trinity-plugins/Trimmomatic
+++ /dev/null
@@ -1 +0,0 @@
-Trimmomatic-0.32
\ No newline at end of file
diff --git a/trinity-plugins/collectl/make_data_files.py b/trinity-plugins/collectl/make_data_files.py
index e42f522..a6d9f7c 100644
--- a/trinity-plugins/collectl/make_data_files.py
+++ b/trinity-plugins/collectl/make_data_files.py
@@ -71,21 +71,21 @@ def prettyprocess(line):
s = line.split()[29:]
if not s:
return None
- if s[0] in ("-bash", 'sh', '/bin/bash', 'bash'):
+ if s[0] in ("-bash", 'sh', '/bin/bash', 'bash', 'ln', '/bin/pwd', 'mkdir', 'date', 'touch', '/usr/bin/env'):
return None
exes = ['fastool', 'ParaFly','Butterfly','ReadsToTranscripts', 'jellyfish',
- 'inchworm', 'FastaToDeBruijn', 'QuantifyGraph', 'GraphFromFasta'
- , 'bowtie-build', 'bowtie', 'Chrysalis', 'cat', 'sort', 'cp', 'wc', 'rm']
+ 'inchworm', 'FastaToDeBruijn', 'QuantifyGraph', 'GraphFromFasta', 'CreateIwormFastaBundle',
+ 'bowtie-build', 'bowtie', 'Chrysalis', 'cat', 'sort', 'cp', 'wc', 'rm', 'find']
perl_scripts = ['scaffold_iworm_contigs', 'Trinity', 'collectl', 'print_butterfly_assemblies',
- 'partition_chrysalis_graphs_n_reads', 'fasta_filter_by_min_length']
+ 'partition_chrysalis_graphs_n_reads', 'fasta_filter_by_min_length', 'partitioned_trinity_aggregator']
for k in exes:
if s[0].endswith(k):
return k
if s[0] == 'samtools':
- return 'samtools_' + s[1]
+ return ('samtools_' + s[1]) if len(s) > 1 else 'samtools'
if s[0] == '/bin/sort':
return 'sort'
@@ -93,11 +93,18 @@ def prettyprocess(line):
if s[0] == 'java':
if 'Butterfly.jar' in " ".join(s):
return 'Butterfly'
+ if 'ExitTester.jar' in " ".join(s):
+ return 'ExitTester'
+ if '-version' in " ".join(s):
+ return 'java_version'
+
+ return 'java'
if s[0] == 'perl':
for k in perl_scripts:
if k in s[1]:
return k
+ return 'perl'
if s[0] == '/usr/bin/perl' and 'collectl' in s[2]:
return 'collectl'
diff --git a/trinity-plugins/fastool b/trinity-plugins/fastool
deleted file mode 120000
index 1fbadca..0000000
--- a/trinity-plugins/fastool
+++ /dev/null
@@ -1 +0,0 @@
-fstrozzi-Fastool-7c3e034f05
\ No newline at end of file
diff --git a/trinity-plugins/jellyfish-2.1.4.tar.gz b/trinity-plugins/jellyfish-2.1.4.tar.gz
deleted file mode 100644
index 64c6034..0000000
Binary files a/trinity-plugins/jellyfish-2.1.4.tar.gz and /dev/null differ
diff --git a/trinity-plugins/parafly b/trinity-plugins/parafly
deleted file mode 120000
index 397e3f1..0000000
--- a/trinity-plugins/parafly
+++ /dev/null
@@ -1 +0,0 @@
-parafly-r2013-01-21/
\ No newline at end of file
diff --git a/trinity-plugins/parafly-r2013-01-21/LICENSE b/trinity-plugins/parafly-code/LICENSE
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/LICENSE
rename to trinity-plugins/parafly-code/LICENSE
diff --git a/trinity-plugins/parafly-r2013-01-21/Makefile.am b/trinity-plugins/parafly-code/Makefile.am
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/Makefile.am
rename to trinity-plugins/parafly-code/Makefile.am
diff --git a/trinity-plugins/parafly-r2013-01-21/Makefile.in b/trinity-plugins/parafly-code/Makefile.in
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/Makefile.in
rename to trinity-plugins/parafly-code/Makefile.in
diff --git a/trinity-plugins/parafly-r2013-01-21/aclocal.m4 b/trinity-plugins/parafly-code/aclocal.m4
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/aclocal.m4
rename to trinity-plugins/parafly-code/aclocal.m4
diff --git a/trinity-plugins/parafly-r2013-01-21/config.h b/trinity-plugins/parafly-code/config.h
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/config.h
rename to trinity-plugins/parafly-code/config.h
diff --git a/trinity-plugins/parafly-r2013-01-21/config.h.in b/trinity-plugins/parafly-code/config.h.in
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/config.h.in
rename to trinity-plugins/parafly-code/config.h.in
diff --git a/trinity-plugins/parafly-r2013-01-21/configure b/trinity-plugins/parafly-code/configure
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/configure
rename to trinity-plugins/parafly-code/configure
diff --git a/trinity-plugins/parafly-r2013-01-21/configure.ac b/trinity-plugins/parafly-code/configure.ac
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/configure.ac
rename to trinity-plugins/parafly-code/configure.ac
diff --git a/trinity-plugins/parafly-r2013-01-21/depcomp b/trinity-plugins/parafly-code/depcomp
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/depcomp
rename to trinity-plugins/parafly-code/depcomp
diff --git a/trinity-plugins/parafly-r2013-01-21/install-sh b/trinity-plugins/parafly-code/install-sh
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/install-sh
rename to trinity-plugins/parafly-code/install-sh
diff --git a/trinity-plugins/parafly-r2013-01-21/missing b/trinity-plugins/parafly-code/missing
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/missing
rename to trinity-plugins/parafly-code/missing
diff --git a/trinity-plugins/parafly-r2013-01-21/src/Makefile.am b/trinity-plugins/parafly-code/src/Makefile.am
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/src/Makefile.am
rename to trinity-plugins/parafly-code/src/Makefile.am
diff --git a/trinity-plugins/parafly-r2013-01-21/src/Makefile.in b/trinity-plugins/parafly-code/src/Makefile.in
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/src/Makefile.in
rename to trinity-plugins/parafly-code/src/Makefile.in
diff --git a/trinity-plugins/parafly-r2013-01-21/src/ParaFly.cpp b/trinity-plugins/parafly-code/src/ParaFly.cpp
similarity index 94%
rename from trinity-plugins/parafly-r2013-01-21/src/ParaFly.cpp
rename to trinity-plugins/parafly-code/src/ParaFly.cpp
index 240c1b9..fdf1437 100644
--- a/trinity-plugins/parafly-r2013-01-21/src/ParaFly.cpp
+++ b/trinity-plugins/parafly-code/src/ParaFly.cpp
@@ -129,9 +129,11 @@ int main(int argc, char* argv[]) {
}
getline(in,line);
}
+
+ if (VERBOSE_LEVEL) {
+ cerr << "Number of Commands: " << NumberofCommands << endl;
+ }
- cerr << "Number of Commands: " << NumberofCommands << endl;
-
//Parrell Execution of Individual Commands
vector<string> failedCommands;
int num_failed_commands = 0;
@@ -229,7 +231,9 @@ int main(int argc, char* argv[]) {
exit(1);
}
else {
- cout << endl << endl << "All commands completed successfully. :-)" << endl << endl;
+ if (VERBOSE_LEVEL) {
+ cout << endl << endl << "All commands completed successfully. :-)" << endl << endl;
+ }
exit(0); // used to be return(0), but sometimes in OMP land this would not exit 0....?!?!
}
diff --git a/trinity-plugins/parafly-r2013-01-21/src/argProcessor.cpp b/trinity-plugins/parafly-code/src/argProcessor.cpp
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/src/argProcessor.cpp
rename to trinity-plugins/parafly-code/src/argProcessor.cpp
diff --git a/trinity-plugins/parafly-r2013-01-21/src/argProcessor.hpp b/trinity-plugins/parafly-code/src/argProcessor.hpp
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/src/argProcessor.hpp
rename to trinity-plugins/parafly-code/src/argProcessor.hpp
diff --git a/trinity-plugins/parafly-r2013-01-21/stamp-h1 b/trinity-plugins/parafly-code/stamp-h1
similarity index 100%
rename from trinity-plugins/parafly-r2013-01-21/stamp-h1
rename to trinity-plugins/parafly-code/stamp-h1
diff --git a/util/HTC_helpers/bsub_fasta_bin_cmd_processor.pl b/util/HTC_helpers/bsub_fasta_bin_cmd_processor.pl
deleted file mode 100755
index ae86e02..0000000
--- a/util/HTC_helpers/bsub_fasta_bin_cmd_processor.pl
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use FindBin;
-use lib ("FindBin::Bin/../PerlLib", "$FindBin::Bin/PerlLib");
-use Fasta_reader;
-use Getopt::Std;
-use strict;
-use Carp;
-use Cwd;
-use Bsub;
-use List::Util qw (shuffle);
-
-our ($opt_d, $opt_q, $opt_s, $opt_Q, $opt_b, $opt_p, $opt_O, $opt_h, $opt_c, $opt_B, $opt_X, $opt_M, $opt_S, $opt_o);
-
-&getopts ('dq:s:bp:O:hbc:B:Q:XM:S:o:');
-
-my $usage = <<_EOH_;
-
-############################# Options ###############################
-#
-# Required:
-# -q query multiFastaFile (full or relative path)
-# -p program command line template: eg. "/path/to/prog [opts] __QUERY_FILE__ [other opts]"
-# -o outdir
-#
-# Optional:
-# -c cmds per node
-# -S number of fasta seqs per job submission (default: 1)
-# -B bin size (input seqs per directory) (default 5000)
-# -Q bsub queue
-# -M memory (4000 = 4G is the default setting). use nubmers only... don't say 4G!!!
-# -X commands only! don't launch.
-#
-###################### Process Args and Options #####################
-
-_EOH_
-
-
- ;
-
-
-if ($opt_h) {
- die $usage;
-}
-
-my $CMDS_ONLY = $opt_X;
-
-my $bin_size = $opt_B || 5000;
-
-my $queue = $opt_Q || "week";
-
-our $DEBUG = $opt_d;
-
-my $CMDS_PER_NODE = $opt_c;
-
-my $num_seqs_per_job = $opt_S || 1;
-
-unless ($opt_q && $opt_p && $opt_o) {
- die $usage;
-}
-
-my $queryFile = $opt_q;
-unless ($queryFile =~ /^\//) {
- $queryFile = cwd() . "/$queryFile";
-}
-
-my $program_cmd_template = $opt_p;
-unless ($program_cmd_template =~ /__QUERY_FILE__/) {
- die "Error, program cmd template must include '__QUERY_FILE__' placeholder in the command";
-}
-
-
-my $memory = $opt_M || 4000;
-
-my $out_dir = $opt_o;
-
-## Create files to search
-
-my $fastaReader = new Fasta_reader($queryFile);
-
-my @searchFileList;
-
-my $count = 0;
-my $current_bin = 1;
-
-mkdir $out_dir or die "Error, cannot mkdir $out_dir";
-
-my $bindir = "$out_dir/grp_" . sprintf ("%04d", $current_bin);
-mkdir ($bindir) or die "Error, cannot mkdir $bindir";
-
-
-
-while (my $fastaSet = &get_next_fasta_entries($fastaReader, $num_seqs_per_job) ) {
-
- $count++;
-
- my $filename = "$bindir/$count.fa";
-
- push (@searchFileList, $filename);
-
- open (TMP, ">$filename") or die "Can't create file ($filename)\n";
- print TMP $fastaSet;
- close TMP;
- chmod (0666, $filename);
-
- if ($count % $bin_size == 0) {
- # make a new bin:
- $current_bin++;
- $bindir = "$out_dir/grp_" . sprintf ("%04d", $current_bin);
- mkdir ($bindir) or die "Error, cannot mkdir $bindir";
- }
-}
-
-print STDERR "Sequences to search: @searchFileList\n";
-my $numFiles = @searchFileList;
-print STDERR "There are $numFiles blast search jobs to run.\n";
-
-my $curr_dir = cwd;
-
-if ($numFiles) {
-
- my @cmds;
- ## formulate blast commands:
- foreach my $searchFile (@searchFileList) {
- $searchFile = "$curr_dir/$searchFile";
-
- my $cmd = $program_cmd_template;
- $cmd =~ s/__QUERY_FILE__/$searchFile/g;
-
-
- $cmd .= " > $searchFile.OUT ";
-
- unless ($CMDS_ONLY) {
- $cmd .= " 2>$searchFile.ERR";
- }
- push (@cmds, $cmd);
- }
-
-
- @cmds = shuffle(@cmds);
-
- my $cmds_per_node;
- if ($CMDS_PER_NODE) {
- $cmds_per_node = $CMDS_PER_NODE;
- }
- else {
- $cmds_per_node = int ( scalar(@cmds) / 400); # split job across complete set of nodes available.
- if ($cmds_per_node < 1) {
- # use 10 as default.
- $cmds_per_node = 1;
- }
- }
-
-
- open (my $fh, ">cmds.list") or die $!;
- foreach my $cmd (@cmds) {
- print $fh "$cmd\n";
- }
- close $fh;
-
- unless ($CMDS_ONLY) {
- my $bsubber = new Bsub({cmds=>\@cmds,
- log_dir => $out_dir,
- cmds_per_node => $cmds_per_node,
- queue => $queue,
- memory => $memory,
- }
- );
-
- $bsubber->bsub_jobs();
-
- }
-
-} else {
- print STDERR "Sorry, no searches to perform. Results already exist here\n";
-}
-## Cleanup
-
-exit(0);
-
-
-####
-sub get_next_fasta_entries {
- my ($fastaReader, $num_seqs) = @_;
-
-
- my $fasta_entries_txt = "";
-
- for (1..$num_seqs) {
- my $seq_obj = $fastaReader->next();
- unless ($seq_obj) {
- last;
- }
-
- my $entry_txt = $seq_obj->get_FASTA_format();
- $fasta_entries_txt .= $entry_txt;
- }
-
- return($fasta_entries_txt);
-}
diff --git a/util/HTC_helpers/get_failed_cmds.pl b/util/HTC_helpers/get_failed_cmds.pl
deleted file mode 100755
index 126ce8a..0000000
--- a/util/HTC_helpers/get_failed_cmds.pl
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-my $usage = "usage: $0 cmds.list cmds.list.cache_success\n\n";
-
-
-my $cmds_list_file = $ARGV[0] or die $usage;
-my $cached_successes_file = $ARGV[1] or die $usage;
-
-
-
-main: {
-
-
- my %OK;
-
- {
- # get cached successes
-
- open (my $fh, $cached_successes_file) or die $!;
- while (<$fh>) {
- chomp;
- my $cmd = $_;
-
- $OK{$cmd} = 1;
- }
- close $fh;
-
- }
-
- open (my $fh, $cmds_list_file) or die $!;
- while (<$fh>) {
- chomp;
- my $cmd = $_;
- unless ($OK{$cmd}) {
- print "$cmd\n";
- }
- }
-
- close $fh;
-
- exit(0);
-}
-
-
-
diff --git a/util/HTC_helpers/htc_bin_blastPlus.pl b/util/HTC_helpers/htc_bin_blastPlus.pl
deleted file mode 100755
index c7501a1..0000000
--- a/util/HTC_helpers/htc_bin_blastPlus.pl
+++ /dev/null
@@ -1,224 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-use FindBin;
-use lib ("$FindBin::Bin/../../PerlLib");
-use Fasta_reader;
-use Getopt::Long qw(:config no_ignore_case bundling pass_through);
-use strict;
-use Carp;
-use Cwd;
-use HTC::GridRunner;
-use List::Util qw (shuffle);
-use File::Basename;
-
-my $usage = <<_EOH_;
-
-############################# Options ###############################
-#
-## Required:
-#
-# --grid_conf|G <string> grid config file
-#
-# --query|Q <string> query multiFastaFile (full or relative path)
-# --search|S <string> search multiFastaFile (full or relative path)
-# --program|P <string> program (e.g. blastn blastx tblastn)
-#
-## Optional
-#
-# --options|O <string> blast options, default "-max_target_seqs 1 -outfmt 6 -evalue 1e-5"
-#
-# --outdir|o <string> outdir (default: "htc_blast_outdir")
-# --fasta_per_job|F <int> number of fasta seqs per job submission (default: 100)
-#
-# -X commands only! don't launch.
-#
-# --RESUME resume from earlier search attempt
-#
-###################### Process Args and Options #####################
-
-
-
-_EOH_
-
-
- ;
-
-my $help_flag;
-my $grid_conf_file;
-my $queryFile;
-my $searchDB;
-my $program;
-my $progOptions = "-max_target_seqs 1 -outfmt 6 -evalue 1e-5";
-my $num_seqs_per_job = 100;
-my $out_dir = "htc_blast_outdir";
-my $CMDS_ONLY;
-our $DEBUG;
-my $bin_size = 2000; # files per directory
-
-my $RESUME_FLAG = 0;
-
-&GetOptions ( 'help|h' => \$help_flag,
- 'grid_conf|G=s' => \$grid_conf_file,
- 'queryFile|Q=s' => \$queryFile,
- 'searchDB|S=s' => \$searchDB,
- 'program|P=s' => \$program,
- 'options|O=s' => \$progOptions,
- 'fasta_per_job|F=i' => \$num_seqs_per_job,
- 'out_dir|o=s' => \$out_dir,
- 'X' => \$CMDS_ONLY,
- 'd' => \$DEBUG,
- 'RESUME' => \$RESUME_FLAG,
-
-);
-
-
-
-if ($help_flag) {
- die $usage;
-}
-
-if (@ARGV) {
- die "Error, not recognizing parameters: @ARGV ";
-}
-
-
-unless ($queryFile && $searchDB && $program) {
- print STDERR "Need to specify -Q, -S, and -P ";
- die $usage;
-}
-
-unless ($queryFile =~ /^\//) {
- $queryFile = cwd() . "/$queryFile";
-}
-
-unless ($searchDB =~ /^\//) {
- $searchDB = cwd() . "/$searchDB";
-}
-
-unless (-s $searchDB || "-s $searchDB.pal") {
- die "Error, can't find $searchDB\n";
-}
-
-
-unless ($out_dir =~ /^\//) {
- # create full path
- $out_dir = cwd() . "/$out_dir";
-}
-
-
-my $cache_file_prefix = "htc-" . join("-", basename($queryFile), basename($searchDB), $program);
-my $cache_file = "$cache_file_prefix.cache_success";
-my $cache_cmds = "$cache_file_prefix.cmds";
-my @cmds;
-
-if ($RESUME_FLAG) {
- @cmds = `cat $cache_cmds`;
- unless (@cmds) {
- die "Error, cannot resume, no cmds found - expecting file: $cache_cmds";
- }
- chomp @cmds;
-}
-else {
-
- ## Create files to search
-
- my $fastaReader = new Fasta_reader($queryFile);
-
- my @searchFileList;
-
- my $count = 0;
- my $current_bin = 1;
-
- mkdir $out_dir or die "Error, cannot mkdir $out_dir";
-
- my $bindir = "$out_dir/grp_" . sprintf ("%04d", $current_bin);
- mkdir ($bindir) or die "Error, cannot mkdir $bindir";
-
-
-
- while (my $fastaSet = &get_next_fasta_entries($fastaReader, $num_seqs_per_job) ) {
-
- $count++;
-
- my $filename = "$bindir/$count.fa";
-
- push (@searchFileList, $filename);
-
- open (TMP, ">$filename") or die "Can't create file ($filename)\n";
- print TMP $fastaSet;
- close TMP;
- chmod (0666, $filename);
-
- if ($count % $bin_size == 0) {
- # make a new bin:
- $current_bin++;
- $bindir = "$out_dir/grp_" . sprintf ("%04d", $current_bin);
- mkdir ($bindir) or die "Error, cannot mkdir $bindir";
- }
- }
-
- print "Sequences to search: @searchFileList\n";
- my $numFiles = @searchFileList;
- print "There are $numFiles blast search jobs to run.\n";
-
- if ($numFiles) {
-
- ## formulate blast commands:
- foreach my $searchFile (@searchFileList) {
-
- my $cmd = "$program -db $searchDB -query $searchFile $progOptions > $searchFile.$program.result ";
- unless ($CMDS_ONLY) {
- $cmd .= "2>$searchFile.$program.stderr";
- }
- push (@cmds, $cmd);
- }
-
- open (my $fh, ">$cache_cmds") or die $!;
- foreach my $cmd (@cmds) {
- print $fh "$cmd\n";
- }
- close $fh;
- }
-}
-
-if (@cmds) {
- unless ($CMDS_ONLY) {
-
- my $grid_runner = new HTC::GridRunner($grid_conf_file, $cache_file);
- my $ret = $grid_runner->run_on_grid(@cmds);
-
- if ($ret) {
- die "Error, not all butterfly commands could complete successfully... cannot continue.";
- }
- }
-
-} else {
- print STDERR "Sorry, no searches to perform. Results already exist here\n";
-}
-
-## Cleanup
-
-exit(0);
-
-
-####
-sub get_next_fasta_entries {
- my ($fastaReader, $num_seqs) = @_;
-
-
- my $fasta_entries_txt = "";
-
- for (1..$num_seqs) {
- my $seq_obj = $fastaReader->next();
- unless ($seq_obj) {
- last;
- }
-
- my $entry_txt = $seq_obj->get_FASTA_format();
- $fasta_entries_txt .= $entry_txt;
- }
-
- return($fasta_entries_txt);
-}
diff --git a/util/HTC_helpers/run_cmds_on_grid.pl b/util/HTC_helpers/run_cmds_on_grid.pl
deleted file mode 100755
index 11714ab..0000000
--- a/util/HTC_helpers/run_cmds_on_grid.pl
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-use FindBin;
-use lib ("$FindBin::Bin/../../PerlLib");
-use HTC::GridRunner;
-use List::Util qw (shuffle);
-
-use Getopt::Long qw(:config no_ignore_case bundling);
-
-my $usage = <<_EOUSAGE_;
-
-################################################################
-# Required:
-#
-# -c <string> file containing list of commands
-# --grid_conf|G <string> grid config file
-#
-####################################################################
-
-_EOUSAGE_
-
- ;
-
-
-my $grid_conf_file;
-my $cmd_file;
-my $help_flag;
-
-&GetOptions ( 'h' => \$help_flag,
- 'c=s' => \$cmd_file,
-
- 'grid_conf|G=s' => \$grid_conf_file,
-
- );
-
-
-unless ($cmd_file && $grid_conf_file) {
- die $usage;
-}
-
-if ($help_flag) {
- die $usage;
-}
-
-
-
-## add Parafly to path
-$ENV{PATH} .= ":" . "$FindBin::Bin/../trinity-plugins/parafly/bin/";
-
-
-main: {
-
- my $uname = `uname -n`;
- chomp $uname;
-
- print "SERVER: $uname, PID: $$\n";
-
-
- open (my $fh, $cmd_file) or die "Error, cannot open $cmd_file";
- my @cmds;
-
- while (<$fh>) {
- chomp;
- if (/\w/) {
- push (@cmds, $_);
- }
- }
- close $fh;
-
- @cmds = shuffle @cmds; ## to even out load on grid nodes. Some may topload their jobs!
-
- my $cache_file = "$cmd_file.htc-cache_success";
-
- my $grid_runner = new HTC::GridRunner($grid_conf_file, $cache_file);
- my $ret = $grid_runner->run_on_grid(@cmds);
-
- if ($ret) {
-
- print STDERR "Error, not all commands could complete successfully... cannot continue.";
-
- exit(1);
- }
- else {
- ## all good
- exit(0);
- }
-}
-
-
-
diff --git a/util/support_scripts/ExitTester.jar b/util/support_scripts/ExitTester.jar
deleted file mode 100644
index a75d6d8..0000000
Binary files a/util/support_scripts/ExitTester.jar and /dev/null differ
diff --git a/util/support_scripts/get_Trinity_gene_to_trans_map.pl b/util/support_scripts/get_Trinity_gene_to_trans_map.pl
index 8e003ba..4559164 100755
--- a/util/support_scripts/get_Trinity_gene_to_trans_map.pl
+++ b/util/support_scripts/get_Trinity_gene_to_trans_map.pl
@@ -13,7 +13,7 @@ while (<>) {
print "$gene\t$trans\n";
}
- elsif ($acc =~ /^(comp\d+_c\d+)/) {
+ elsif ($acc =~ /^(.*comp\d+_c\d+)/) {
my $gene = $1;
my $trans = $acc;
print "$gene\t$trans\n";
diff --git a/util/support_scripts/partition_chrysalis_graphs_n_reads.pl b/util/support_scripts/partition_chrysalis_graphs_n_reads.pl
index 20276b1..2ac78d8 100755
--- a/util/support_scripts/partition_chrysalis_graphs_n_reads.pl
+++ b/util/support_scripts/partition_chrysalis_graphs_n_reads.pl
@@ -51,7 +51,7 @@ unless ($deBruijns_file && $componentReads_file && $num_graphs_per_partition &&
main: {
- print "Partitioning chrysalis graphs and reads\n";
+ print STDERR "Partitioning chrysalis graphs and reads\n";
my $outdir_base = dirname($deBruijns_file);
my $components_directory = $outdir_base . '/Component_bins';
diff --git a/util/support_scripts/tests.py b/util/support_scripts/tests.py
index 249b68a..0f24adc 100644
--- a/util/support_scripts/tests.py
+++ b/util/support_scripts/tests.py
@@ -11,12 +11,10 @@ import time
# module load samtools
# Trinity
# Copy the .gz files in sample_data/test_Trinity_Assembly to current directory
-# also unzip the left file using zcat
# Run using nosetests
-TEMP_FILES = ['both.fa', 'inchworm.K25.L25.fa', 'jellyfish.kmers.fa',
- 'scaffolding_entries.sam', 'both.fa.read_count', 'jellyfish.kmers.fa',
- 'inchworm.kmer_count','jellyfish.kmers.fa.histo', 'scaffolding_entries.sam',
- 'chrysalis/GraphFromIwormFasta.out']
+MEM_FLAG = "--max_memory 2G"
+TEMP_FILES = ['both.fa', 'bowtie.nameSorted.bam', 'inchworm.K25.L25.fa', 'jellyfish.kmers.fa',
+ 'scaffolding_entries.sam', 'target.fa', 'target.1.ebwt']
class TestTrinity(unittest.TestCase):
@@ -33,7 +31,7 @@ class TestTrinity(unittest.TestCase):
def test_sample_data_seq_count(self):
self.trinity(
- "Trinity --seqType fq --JM 2G --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --no_cleanup")
+ "Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --no_cleanup" % MEM_FLAG)
handle = open("trinity_out_dir/Trinity.fasta", "rU")
seq_count = len([x for x in SeqIO.parse(handle, "fasta")])
handle.close()
@@ -41,7 +39,7 @@ class TestTrinity(unittest.TestCase):
def test_sample_data_trimmed_and_normalized(self):
self.trinity(
- "Trinity --seqType fq --JM 2G --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --trimmomatic --normalize_reads --no_cleanup")
+ "Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --trimmomatic --normalize_reads --no_cleanup" % MEM_FLAG)
handle = open("trinity_out_dir/Trinity.fasta", "rU")
seq_count = len([x for x in SeqIO.parse(handle, "fasta")])
handle.close()
@@ -49,13 +47,13 @@ class TestTrinity(unittest.TestCase):
def test_no_cleanup_leaves_temp_files(self):
self.trinity(
- "Trinity --seqType fq --JM 2G --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --no_cleanup")
+ "Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --no_cleanup" % MEM_FLAG)
for f in TEMP_FILES:
self.assertTrue(os.path.exists("trinity_out_dir/%s" % f), msg="%s not found with no_cleanup" % f)
def test_cleanup_removes_temp_files(self):
self.trinity(
- "Trinity --seqType fq --JM 2G --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --full_cleanup")
+ "Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU 4 --full_cleanup" % MEM_FLAG)
time.sleep(5) # Make sure the system has time to recognize the directory is gone
self.assertFalse(os.path.exists("trinity_out_dir"), msg="Did full_cleanup but trinity_out_dir exists")
self.assertTrue(os.path.isfile("trinity_out_dir.Trinity.fasta"),
@@ -68,33 +66,33 @@ class TestTrinity(unittest.TestCase):
self.assertTrue("Error, with --single reads, the --SS_lib_type can be 'F' or 'R' only." in e.output)
def test_single_end_with_fq(self):
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F")
+ self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F" % MEM_FLAG)
def test_no_run_chrysalis(self):
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --no_run_chrysalis")
+ self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_run_chrysalis" % MEM_FLAG)
self.assertEquals(0, len(os.listdir('trinity_out_dir/chrysalis')))
def test_no_run_butterfly(self):
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --no_run_butterfly")
+ self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_run_butterfly" % MEM_FLAG)
self.assertTrue(os.path.isfile("trinity_out_dir/chrysalis/butterfly_commands"),
msg="butterfly_commands file does not exist")
self.assertFalse(os.path.isfile("trinity_out_dir/butterfly_commands.completed"),
msg="Butterfly appears to have run although no_run_butterfly was specified")
def test_no_run_inchworm(self):
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --no_run_inchworm")
+ self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_run_inchworm" % MEM_FLAG)
self.assertFalse(os.path.isfile("trinity_out_dir/inchworm.K25.L25.fa.finished"),
msg="Inchworm appears to have run although no_run_inchworm was specified")
self.assertTrue(os.path.isfile("trinity_out_dir/jellyfish.kmers.fa"),
msg="jellyfish.kmers.fa was not created")
def test_no_bowtie(self):
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --no_bowtie")
+ self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_bowtie" % MEM_FLAG)
self.assertFalse(os.path.isfile("trinity_out_dir/bowtie.nameSorted.bam"),
msg="Bowtie appears to have run although no_bowtie was specified")
def test_no_run_quantifygraph(self):
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --no_run_quantifygraph")
+ self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_run_quantifygraph" % MEM_FLAG)
self.assertTrue(os.path.isfile("trinity_out_dir/chrysalis/file_partitioning.ok"),
msg="file_partitioning.ok flag file does not exist")
self.assertTrue(os.path.isfile("trinity_out_dir/chrysalis/quantifyGraph_commands"),
@@ -104,16 +102,17 @@ class TestTrinity(unittest.TestCase):
def test_single_end_with_fa_and_reverse(self):
self.fq2fa()
- self.trinity("Trinity --JM 2G --seqType fa --single reads.fa --SS_lib_type R")
+ self.trinity("Trinity %s --seqType fa --single reads.fa --SS_lib_type R" % MEM_FLAG)
def test_output_correctly_changes_dir(self):
shutil.rmtree('trinity_test', True)
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --output trinity_test")
+ self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --output trinity_test" % MEM_FLAG)
self.assertTrue(os.path.exists("trinity_test"), msg="Changed output directory but it was not created")
shutil.rmtree('trinity_test', True)
def test_chrysalis_output_correctly_changes_dir(self):
- self.trinity("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --chrysalis_output chrysalis_test")
+ self.trinity(
+ "Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --chrysalis_output chrysalis_test" % MEM_FLAG)
self.assertTrue(os.path.exists("trinity_out_dir/chrysalis_test"),
msg="Changed chrysalis output directory but it was not created")
@@ -145,10 +144,11 @@ class TestTrinity(unittest.TestCase):
### Invalid command line tests
def test_no_JM_specified_error(self):
error = self.get_error("Trinity --seqType fq --single reads.left.fq --SS_lib_type F")
- self.assertTrue("Error, must specify max memory for jellyfish to use, eg. --JM 10G" in error)
+ self.assertTrue("Error, must specify max memory for jellyfish to use, eg. --max_memory 10G" in error)
def test_chrysalis_output_must_include_chrysalis(self):
- error = self.get_error("Trinity --JM 2G --seqType fq --single reads.left.fq --SS_lib_type F --chrysalis_output invalid_name")
+ error = self.get_error(
+ "Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --chrysalis_output invalid_name" % MEM_FLAG)
self.assertTrue("Error, chrysalis output directory name must include 'chrysalis' in the name" in error)
def test_invalid_option_error(self):
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/trinityrnaseq.git
More information about the debian-med-commit
mailing list