[med-svn] [sprai] 01/05: Imported Upstream version 0.9.9.14+dfsg
Afif Elghraoui
afif at moszumanska.debian.org
Wed Apr 27 02:59:47 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository sprai.
commit eea0dd6cd9761f9b86d61e33edbd82a114db743c
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Tue Apr 26 19:35:08 2016 -0700
Imported Upstream version 0.9.9.14+dfsg
---
ChangeLog.txt | 1 +
ca_ikki_v5.pl | 4 +-
doc/_build/html/_sources/Download.txt | 4 ++
doc/_build/html/_sources/Example.txt | 28 ++++++++
doc/_build/html/_sources/README.txt | 37 ++++++++++-
doc/_build/html/_sources/index.txt | 4 ++
ezez4qsub_vx1.pl | 120 +++++++++++++++++++---------------
ezez_vx1.pl | 25 +++++--
makefile | 2 +-
wscript | 3 +-
10 files changed, 164 insertions(+), 64 deletions(-)
diff --git a/ChangeLog.txt b/ChangeLog.txt
new file mode 100644
index 0000000..dd03243
--- /dev/null
+++ b/ChangeLog.txt
@@ -0,0 +1 @@
+Please see doc/_build/html/index.html
diff --git a/ca_ikki_v5.pl b/ca_ikki_v5.pl
index b382c09..e4d6e2a 100755
--- a/ca_ikki_v5.pl
+++ b/ca_ikki_v5.pl
@@ -19,11 +19,11 @@ my @msg=(
"USAGE: <this> <asm.spec> estimated_genome_size",
#"[-from integer]",
#"[-to integer ]",
-"[-d directory in which fin.fq.gzs exist (default: $fastqdir)]",
+"[-d directory in which fin.idfq.gzs exist (default: $fastqdir)]",
"[-ca_path /path/to/your/wgs/Linux-amd64/bin (default: $ca_path)]",
#"[-tmp_dir temporary directory (default: $tmp_dir)]",
"[-out_dir output directory (default: $out_dir)]",
-"[-sprai_path the path to sprai installed (default: $sprai_path)]",
+"[-sprai_path the path to get_top_20x_fa.pl installed (default: $sprai_path)]",
"[-coverage int : use longer than N(coverage) reads for assembly (default: $coverage)]",
"",
"[-raw_fastq in.fq : use all reads in in.fq (default: off)]",
diff --git a/doc/_build/html/_sources/Download.txt b/doc/_build/html/_sources/Download.txt
index 3c584c0..3edb882 100644
--- a/doc/_build/html/_sources/Download.txt
+++ b/doc/_build/html/_sources/Download.txt
@@ -1,6 +1,10 @@
========
Download
========
+http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.14.tar.gz
+
+http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.13.tar.gz
+
http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.12.tar.gz
http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.11.tar.gz
diff --git a/doc/_build/html/_sources/Example.txt b/doc/_build/html/_sources/Example.txt
index 165b3b0..dcf1860 100644
--- a/doc/_build/html/_sources/Example.txt
+++ b/doc/_build/html/_sources/Example.txt
@@ -18,6 +18,9 @@ Go to `pacbiotoca wiki <http://sourceforge.net/apps/mediawiki/wgs-assembler/inde
tar xvzf sampleData.tar.gz
Convert fasta to fastq. You can use a fa2fq.pl script in Sprai.
+
+(Sprai version 0.9.9.13 or newer can be fed both fastq and fasta format. So you can skip converting fasta to fastq.)
+
::
fa2fq.pl sampleData/pacbio.filtered_subreads.fasta > pacbio.filtered_subreads.fq
@@ -61,6 +64,8 @@ If you do not know it, set 0.
*trim* is the number of nucleotides Sprai cut from both sides of alignments.
+*ca_path* is the path to your wgs-assembler (Celera Assembler) installed.
+
*word_size* is used by blastn.
Correct errors & assemble
@@ -83,6 +88,29 @@ Sprai extracts longest 20X reads of the *estimated_genome_size* from *c01.fin.id
And feed them to Celera Assembler.
Celera Assembler outputs files into *CA* directory.
+If you only correct errors and don't assemble, do
+
+::
+
+ ezez_vx1.pl ec.spec -ec_only > log.txt 2>&1 &
+
+or
+
+::
+
+ ezez_vx1.pl ec.spec > log.txt 2>&1 &
+
+After error correction, if you want to assemble corrected reads using Celera Assembler, do
+
+::
+
+ ca_ikki_v5.pl pbasm.spec estimated_genome_size \
+ -d directory in which fin.idfq.gzs exist \
+ -ca_path /path/to/your/wgs/Linux-amd64/bin \
+ -sprai_path the path to get_top_20x_fa.pl installed
+
+
+
Find contigs
===================
You will find contigs in a *CA/9-terminator/asm.ctg.fasta* file.
diff --git a/doc/_build/html/_sources/README.txt b/doc/_build/html/_sources/README.txt
index bc64a07..05e9e18 100644
--- a/doc/_build/html/_sources/README.txt
+++ b/doc/_build/html/_sources/README.txt
@@ -179,11 +179,13 @@ Edit *ec.spec*, and give *ca_path* parameter, which is the directory (full-path)
.. You can confirm what will happen by using fs2ctg_v4.pl with '-n' option.
Then, type the following commands
+
::
ezez_vx1.pl ec.spec pbasm.spec > log 2>&1 &
This will do sequencing-error correction, and contigs will be created.
+Note that parameter files for Sprai and Celera assembler are independent; you can run Celera with multiple nodes (machines) even with Sprai single node mode.
If you only need error-corrected reads and do not want Sprai (Celera assembler) to assemble them, do as follows
@@ -195,7 +197,27 @@ If you only need error-corrected reads and do not want Sprai (Celera assembler)
ezez_vx1.pl ec.spec /dev/null -ec_only > log 2>&1 &
-Note that parameter files for Sprai and Celera assembler are independent; you can run Celera with multiple nodes (machines) even with Sprai single node mode.
+or
+
+::
+
+ ezez_vx1.pl ec.spec -ec_only > log 2>&1 &
+
+or
+
+::
+
+ ezez_vx1.pl ec.spec > log 2>&1 &
+
+
+After error correction, if you want to assemble corrected reads using Celera Assembler, do
+
+::
+
+ ca_ikki_v5.pl pbasm.spec estimated_genome_size \
+ -d directory in which fin.idfq.gzs exist \
+ -ca_path /path/to/your/wgs/Linux-amd64/bin \
+ -sprai_path the path to get_top_20x_fa.pl installed
Multi-node mode 1 (qsub mode)
------------------------------
@@ -216,10 +238,23 @@ To correct sequencing errors of PacBio Continuous Long Reads and also would like
.. ezez4makefile.pl ec.spec asm.spec > ezez4makefile.log 2>&1 && make &
If you only use error-corrected reads and do not want Sprai (Celera assembler) to assemble them, do as follows
+
::
ezez4qsub_vx1.pl ec.spec /dev/null -ec_only > log 2>&1 &
+or
+
+::
+
+ ezez4qsub_vx1.pl ec.spec -ec_only > log 2>&1 &
+
+or
+
+::
+
+ ezez4qsub_vx1.pl ec.spec > log 2>&1 &
+
.. \or
.. ::
diff --git a/doc/_build/html/_sources/index.txt b/doc/_build/html/_sources/index.txt
index 0f79d24..b729ca9 100644
--- a/doc/_build/html/_sources/index.txt
+++ b/doc/_build/html/_sources/index.txt
@@ -20,6 +20,10 @@ Contents
Changelogs
=============
+2016.4.15: v0.9.9.14: -ec_only mode can be run without a spec file of Celera Assembler. (Thanks to Afif Elghraoui for a report)
+
+2016.4.13: v0.9.9.13: myrealigner.c: dynamic memory allocation for element & col pools. (Thanks to Tomoaki Nishiyama for code modifications)
+
2016.4.12: v0.9.9.12: Sprai is released under MIT license. See LICENSE.txt .
2015.10.20: v0.9.9.11: nss2v_v3.c: variable max read length. (Thanks to Tomoaki Nishiyama for code modifications)
diff --git a/ezez4qsub_vx1.pl b/ezez4qsub_vx1.pl
index 233994d..5f5e5c9 100755
--- a/ezez4qsub_vx1.pl
+++ b/ezez4qsub_vx1.pl
@@ -43,33 +43,41 @@ if($DEBUG){
my %params;
-my @emsgs=(
+my @msgs=(
'USAGE: <this> <ec.spec> <asm.spec>',
- '[-debug: outputs intermediate files (not implemented)]',
+ "or: <this> <ec.spec> -ec_only",
+ #'[-debug: outputs intermediate files (not implemented)]',
'[-n: outputs qsub scripts and does NOT qsub]',
'[-now yyyymmdd_hhmmss: use a XXX_yyyymmdd_hhmmss directories, detect unfinished jobs and restart at the appropriate stage.]',
- '[-ec_only: not assemble]',
+ "[-ec_only: does error correction and does NOT assemble]",
);
-if(@ARGV != 2){
- my $msg = join "\n\t", at emsgs;
- die "$msg\n";
+if(@ARGV == 0 || @ARGV > 2){
+ my $msg = join "\n\t", at msgs;
+ printf STDERR ("%s\n",$msg);
+ exit(1);
+}
+if(@ARGV == 1 && !$opt_ec_only){
+ printf STDERR ("WARNING: %s\n", "-ec_only was added");
+ $opt_ec_only = 1;
}
my $pwd = `pwd`;
chomp $pwd;
-my $asm_spec = $ARGV[1];
-$asm_spec =~ s/^\s+//;
-if(!-e $asm_spec){
- die "$asm_spec does not exist.\n";
-}
-
-if($asm_spec =~ /^\//){
- # real path; do nothing
-}
-else{
- $asm_spec = "$pwd/$asm_spec";
+my $asm_spec="";
+if(@ARGV == 2){
+ $asm_spec = $ARGV[1];
+ $asm_spec =~ s/^\s+//;
+ if(!-e $asm_spec){
+ die "$asm_spec does not exist.\n";
+ }
+ if($asm_spec =~ /^\//){
+ # real path; do nothing
+ }
+ else{
+ $asm_spec = "$pwd/$asm_spec";
+ }
}
{
@@ -242,7 +250,9 @@ printf STDERR ("evalue %g\n",$evalue);
printf STDERR ("num_threads %d\n",$num_threads);
printf STDERR ("valid_voters %s\n",$valid_voters);
printf STDERR ("trim %d\n",$trim);
-printf STDERR ("ca_path %s\n",$ca_path);
+if($ca_path){
+ printf STDERR ("ca_path %s\n",$ca_path);
+}
printf STDERR ("blast_path %s\n",$blast_path);
printf STDERR ("sprai_path %s\n",$sprai_path);
if($queue_req){
@@ -315,6 +325,10 @@ if(!-e "$bindir/bfmt72s"){
if(!-e "$path2blast/blastn"){
die "$path2blast/blastn does not exist in $path2blast\n"
}
+if(!$opt_ec_only && !-e $ca_path){
+ die "ca_path $ca_path does not exist.\n";
+}
+
$scriptdir = "$pwd/${scriptdir}_$now";
if(!-d $scriptdir){
@@ -666,7 +680,7 @@ for(my $index=$from; $index<$to; ++$index){
}
#my $PG3="$bindir/nss2v_v3 -v $valid_voters -q";
my $PG3="$bindir/nss2v_v3 -v $valid_voters";
- my $PG4="$bindir/myrealigner -f -B $valid_voters -b 3 -d 0.5";
+ my $PG4="$bindir/myrealigner -f -B $valid_voters -b 3 -d 0.5 -l 131072";
#my $PG3="$bindir/nss2v_v3 -q";
#my $PG3="$bindir/nss2v_v3 -q -s";
#my $PG3="$bindir/nss2v_v3 -v $valid_voters";
@@ -1187,25 +1201,26 @@ for(my $index=$from; $index<$to; ++$index){
if(!$opt_ec_only){
push(@post_array_jobs,$script);
push(@do_qsub_postaj,$do_qsub);
- }
- open my $fh, ">", $script or die $!;
- printf $fh ("#!/bin/bash\n");
- printf $fh ("#\$ -S /bin/bash\n");
- printf $fh ("#\$ -cwd\n");
- printf $fh ("#\$ -V\n");
- my $jobname = sprintf("$preprefix%02d_fastqToCA_$now",$index);
- push @runCA_holdjids,$jobname;
- printf $fh ("#\$ -N $jobname\n");
- if($queue_req){
- printf $fh ("#\$ $queue_req\n");
- }
- printf $fh ("#\$ -o $logdir\n");
- printf $fh ("#\$ -e $logdir\n");
- printf $fh ("#\$ -hold_jid $holdlist\n");
- printf $fh ("time ($command)\n");
+ open my $fh, ">", $script or die $!;
- close $fh;
+ printf $fh ("#!/bin/bash\n");
+ printf $fh ("#\$ -S /bin/bash\n");
+ printf $fh ("#\$ -cwd\n");
+ printf $fh ("#\$ -V\n");
+ my $jobname = sprintf("$preprefix%02d_fastqToCA_$now",$index);
+ push @runCA_holdjids,$jobname;
+ printf $fh ("#\$ -N $jobname\n");
+ if($queue_req){
+ printf $fh ("#\$ $queue_req\n");
+ }
+ printf $fh ("#\$ -o $logdir\n");
+ printf $fh ("#\$ -e $logdir\n");
+ printf $fh ("#\$ -hold_jid $holdlist\n");
+ printf $fh ("time ($command)\n");
+
+ close $fh;
+ }
}
if($index+1 == $to)
{
@@ -1244,24 +1259,25 @@ for(my $index=$from; $index<$to; ++$index){
if(!$opt_ec_only){
push(@post_array_jobs,$script);
push(@do_qsub_postaj,$do_qsub);
- }
- open my $fh, ">", $script or die $!;
- printf $fh ("#!/bin/bash\n");
- printf $fh ("#\$ -S /bin/bash\n");
- printf $fh ("#\$ -cwd\n");
- printf $fh ("#\$ -V\n");
- my $jobname = sprintf("$preprefix%02d_runCA_$now",$index);
- printf $fh ("#\$ -N $jobname\n");
- if($queue_req){
- printf $fh ("#\$ $queue_req\n");
- }
- printf $fh ("#\$ -o $logdir\n");
- printf $fh ("#\$ -e $logdir\n");
- printf $fh ("#\$ -hold_jid $holdlist\n");
- printf $fh ("time ($command)\n");
+ open my $fh, ">", $script or die $!;
- close $fh;
+ printf $fh ("#!/bin/bash\n");
+ printf $fh ("#\$ -S /bin/bash\n");
+ printf $fh ("#\$ -cwd\n");
+ printf $fh ("#\$ -V\n");
+ my $jobname = sprintf("$preprefix%02d_runCA_$now",$index);
+ printf $fh ("#\$ -N $jobname\n");
+ if($queue_req){
+ printf $fh ("#\$ $queue_req\n");
+ }
+ printf $fh ("#\$ -o $logdir\n");
+ printf $fh ("#\$ -e $logdir\n");
+ printf $fh ("#\$ -hold_jid $holdlist\n");
+ printf $fh ("time ($command)\n");
+
+ close $fh;
+ }
}
# print STDERR "post_array_jobs printed\n";
diff --git a/ezez_vx1.pl b/ezez_vx1.pl
index afa7827..35d7c01 100755
--- a/ezez_vx1.pl
+++ b/ezez_vx1.pl
@@ -31,19 +31,28 @@ my %params;
my @msgs = (
"USAGE: <this> <ec.spec> <asm.spec>",
+ "or: <this> <ec.spec> -ec_only",
"[-n: only shows parameters in ec.spec and exit.]",
"[-ec_only: does error correction and does NOT assemble]",
#'[-debug: outputs intermediate files (not implemented)]',
'[-now yyyymmdd_hhmmss: use a result_yyyymmdd_hhmmss directory, detect unfinished jobs and restart at the appropriate stage.]',
);
-if(@ARGV != 2){
+if(@ARGV == 0 || @ARGV > 2){
my $msg = join "\n\t", at msgs;
printf STDERR ("%s\n",$msg);
exit(1);
}
+if(@ARGV == 1 && !$opt_ec_only){
+ printf STDERR ("WARNING: %s\n", "-ec_only was added");
+ $opt_ec_only = 1;
+}
+#print "@ARGV\n";
-my $spec=$ARGV[1];
+my $spec="";
+if(@ARGV == 2){
+ $spec=$ARGV[1];
+}
{
my $ec_spec = $ARGV[0];
@@ -80,7 +89,7 @@ my $max_target_seqs=100;
my $valid_voters=11;
my $trim=42;
my $estimated_genome_size=0;
-my $ca_path="/home/imai/wgs-7.0/Linux-amd64/bin/";
+my $ca_path="";
my $word_size=0;
my $min_len_for_query=1;
my $max_len_for_query=1000000000000000;
@@ -187,7 +196,9 @@ printf STDERR ("evalue %g\n",$evalue);
printf STDERR ("num_threads %d\n",$num_threads);
printf STDERR ("valid_voters %s\n",$valid_voters);
printf STDERR ("trim %d\n",$trim);
-printf STDERR ("ca_path %s\n",$ca_path);
+if($ca_path){
+ printf STDERR ("ca_path %s\n",$ca_path);
+}
if($sprai_path){
printf STDERR ("sprai_path %s\n",$sprai_path);
}
@@ -211,8 +222,8 @@ if($max_target_seqs){
}
printf STDERR ("#>- params -<#\n");
-if(!-e $ca_path){
- die "$ca_path does not exist.\n";
+if(!$opt_ec_only && !-e $ca_path){
+ die "ca_path $ca_path does not exist.\n";
}
if(!$opt_ec_only && !-e $spec){
@@ -390,7 +401,7 @@ for(my $index=$from; $index<$to; ++$index){
if($sprai_path){
$PG3 = "$sprai_path/$PG3";
}
- my $PG4="myrealigner -f -B $valid_voters -b 3 -d 0.5";
+ my $PG4="myrealigner -f -B $valid_voters -b 3 -d 0.5 -l 131072";
if($sprai_path){
$PG4 = "$sprai_path/$PG4";
}
diff --git a/makefile b/makefile
index b1ca723..84ad6f2 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,5 @@
APPNAME = 'sprai'
-VERSION = '0.9.9.13'
+VERSION = '0.9.9.14'
PREFIX=$(PWD)
COMPILED= \
diff --git a/wscript b/wscript
index bdf8f74..69d8d4e 100644
--- a/wscript
+++ b/wscript
@@ -1,5 +1,5 @@
APPNAME = 'sprai'
-VERSION = '0.9.9.13'
+VERSION = '0.9.9.14'
srcdir = '.'
blddir = 'build'
@@ -131,6 +131,7 @@ def dist(ctx):
'bfmt72s.c',
'col2fqcell.h',
'LICENSE.txt',
+ 'ChangeLog.txt',
'myrealigner.c',
'nss2v_v3.c',
'waf',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/sprai.git
More information about the debian-med-commit
mailing list