[med-svn] [sprai] 01/05: Imported Upstream version 0.9.9.14+dfsg

Wed Apr 27 02:59:47 UTC 2016

This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository sprai.

commit eea0dd6cd9761f9b86d61e33edbd82a114db743c
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Tue Apr 26 19:35:08 2016 -0700

    Imported Upstream version 0.9.9.14+dfsg
---
 ChangeLog.txt                         |   1 +
 ca_ikki_v5.pl                         |   4 +-
 doc/_build/html/_sources/Download.txt |   4 ++
 doc/_build/html/_sources/Example.txt  |  28 ++++++++
 doc/_build/html/_sources/README.txt   |  37 ++++++++++-
 doc/_build/html/_sources/index.txt    |   4 ++
 ezez4qsub_vx1.pl                      | 120 +++++++++++++++++++---------------
 ezez_vx1.pl                           |  25 +++++--
 makefile                              |   2 +-
 wscript                               |   3 +-
 10 files changed, 164 insertions(+), 64 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
new file mode 100644
index 0000000..dd03243
--- /dev/null
+++ b/ChangeLog.txt
@@ -0,0 +1 @@
+Please see doc/_build/html/index.html
diff --git a/ca_ikki_v5.pl b/ca_ikki_v5.pl
index b382c09..e4d6e2a 100755
--- a/ca_ikki_v5.pl
+++ b/ca_ikki_v5.pl
@@ -19,11 +19,11 @@ my @msg=(
 "USAGE: <this> <asm.spec> estimated_genome_size",
 #"[-from integer]",
 #"[-to integer ]",
-"[-d directory in which fin.fq.gzs exist (default: $fastqdir)]",
+"[-d directory in which fin.idfq.gzs exist (default: $fastqdir)]",
 "[-ca_path /path/to/your/wgs/Linux-amd64/bin (default: $ca_path)]",
 #"[-tmp_dir temporary directory (default: $tmp_dir)]",
 "[-out_dir output directory (default: $out_dir)]",
-"[-sprai_path the path to sprai installed (default: $sprai_path)]",
+"[-sprai_path the path to get_top_20x_fa.pl installed (default: $sprai_path)]",
 "[-coverage int : use longer than N(coverage) reads for assembly (default: $coverage)]",
 "",
 "[-raw_fastq in.fq : use all reads in in.fq (default: off)]",
diff --git a/doc/_build/html/_sources/Download.txt b/doc/_build/html/_sources/Download.txt
index 3c584c0..3edb882 100644
--- a/doc/_build/html/_sources/Download.txt
+++ b/doc/_build/html/_sources/Download.txt
@@ -1,6 +1,10 @@
 ========
 Download
 ========
+http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.14.tar.gz
+
+http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.13.tar.gz
+
 http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.12.tar.gz
 
 http://zombie.cb.k.u-tokyo.ac.jp/sprai/dist/sprai-0.9.9.11.tar.gz
diff --git a/doc/_build/html/_sources/Example.txt b/doc/_build/html/_sources/Example.txt
index 165b3b0..dcf1860 100644
--- a/doc/_build/html/_sources/Example.txt
+++ b/doc/_build/html/_sources/Example.txt
@@ -18,6 +18,9 @@ Go to `pacbiotoca wiki <http://sourceforge.net/apps/mediawiki/wgs-assembler/inde
    tar xvzf sampleData.tar.gz
 
 Convert fasta to fastq. You can use a fa2fq.pl script in Sprai.
+
+(Sprai version 0.9.9.13 or newer can be fed both fastq and fasta format. So you can skip converting fasta to fastq.)
+
 ::
 
    fa2fq.pl sampleData/pacbio.filtered_subreads.fasta > pacbio.filtered_subreads.fq
@@ -61,6 +64,8 @@ If you do not know it, set 0.
 
 *trim* is the number of nucleotides Sprai cut from both sides of alignments.
 
+*ca_path* is the path to your wgs-assembler (Celera Assembler) installed.
+
 *word_size* is used by blastn.
 
 Correct errors & assemble
@@ -83,6 +88,29 @@ Sprai extracts longest 20X reads of the *estimated_genome_size* from *c01.fin.id
 And feed them to Celera Assembler.
 Celera Assembler outputs files into *CA* directory.
 
+If you only correct errors and don't assemble, do
+
+::
+
+   ezez_vx1.pl ec.spec -ec_only > log.txt 2>&1 &
+
+or
+
+::
+
+   ezez_vx1.pl ec.spec > log.txt 2>&1 &
+
+After error correction, if you want to assemble corrected reads using Celera Assembler, do
+
+::
+
+   ca_ikki_v5.pl pbasm.spec estimated_genome_size \
+     -d directory in which fin.idfq.gzs exist \
+     -ca_path /path/to/your/wgs/Linux-amd64/bin \
+     -sprai_path the path to get_top_20x_fa.pl installed 
+
+
+
 Find contigs
 ===================
 You will find contigs in a *CA/9-terminator/asm.ctg.fasta* file.
diff --git a/doc/_build/html/_sources/README.txt b/doc/_build/html/_sources/README.txt
index bc64a07..05e9e18 100644
--- a/doc/_build/html/_sources/README.txt
+++ b/doc/_build/html/_sources/README.txt
@@ -179,11 +179,13 @@ Edit *ec.spec*, and give *ca_path* parameter, which is the directory (full-path)
 .. You can confirm what will happen by using fs2ctg_v4.pl with '-n' option.
 
 Then, type the following commands
+
 ::
 
    ezez_vx1.pl ec.spec pbasm.spec > log 2>&1 &
 
 This will do sequencing-error correction, and contigs will be created.
+Note that parameter files for Sprai and Celera assembler are independent; you can run Celera with multiple nodes (machines) even with Sprai single node mode.
 
 If you only need error-corrected reads and do not want Sprai (Celera assembler) to assemble them, do as follows
 
@@ -195,7 +197,27 @@ If you only need error-corrected reads and do not want Sprai (Celera assembler)
 
    ezez_vx1.pl ec.spec /dev/null -ec_only > log 2>&1 &
 
-Note that parameter files for Sprai and Celera assembler are independent; you can run Celera with multiple nodes (machines) even with Sprai single node mode.
+or
+
+::
+
+   ezez_vx1.pl ec.spec -ec_only > log 2>&1 &
+
+or
+
+::
+
+   ezez_vx1.pl ec.spec > log 2>&1 &
+
+
+After error correction, if you want to assemble corrected reads using Celera Assembler, do
+
+::
+
+   ca_ikki_v5.pl pbasm.spec estimated_genome_size \
+     -d directory in which fin.idfq.gzs exist \
+     -ca_path /path/to/your/wgs/Linux-amd64/bin \
+     -sprai_path the path to get_top_20x_fa.pl installed 
 
 Multi-node mode 1 (qsub mode)
 ------------------------------
@@ -216,10 +238,23 @@ To correct sequencing errors of PacBio Continuous Long Reads and also would like
 ..    ezez4makefile.pl ec.spec asm.spec > ezez4makefile.log 2>&1 && make &
 
 If you only use error-corrected reads and do not want Sprai (Celera assembler) to assemble them, do as follows
+
 ::
 
    ezez4qsub_vx1.pl ec.spec /dev/null -ec_only > log 2>&1 &
 
+or
+
+::
+
+   ezez4qsub_vx1.pl ec.spec -ec_only > log 2>&1 &
+
+or
+
+::
+
+   ezez4qsub_vx1.pl ec.spec > log 2>&1 &
+
 .. \or
 .. ::
 
diff --git a/doc/_build/html/_sources/index.txt b/doc/_build/html/_sources/index.txt
index 0f79d24..b729ca9 100644
--- a/doc/_build/html/_sources/index.txt
+++ b/doc/_build/html/_sources/index.txt
@@ -20,6 +20,10 @@ Contents
 
 Changelogs
 =============
+2016.4.15: v0.9.9.14: -ec_only mode can be run without a spec file of Celera Assembler. (Thanks to Afif Elghraoui for a report)
+
+2016.4.13: v0.9.9.13: myrealigner.c: dynamic memory allocation for element & col pools. (Thanks to Tomoaki Nishiyama for code modifications)
+
 2016.4.12: v0.9.9.12: Sprai is released under MIT license. See LICENSE.txt .
 
 2015.10.20: v0.9.9.11: nss2v_v3.c: variable max read length. (Thanks to Tomoaki Nishiyama for code modifications)
diff --git a/ezez4qsub_vx1.pl b/ezez4qsub_vx1.pl
index 233994d..5f5e5c9 100755
--- a/ezez4qsub_vx1.pl
+++ b/ezez4qsub_vx1.pl
@@ -43,33 +43,41 @@ if($DEBUG){
 
 my %params;
 
-my @emsgs=(
+my @msgs=(
   'USAGE: <this> <ec.spec> <asm.spec>',
-  '[-debug: outputs intermediate files (not implemented)]',
+  "or: <this> <ec.spec> -ec_only",
+  #'[-debug: outputs intermediate files (not implemented)]',
   '[-n: outputs qsub scripts and does NOT qsub]',
   '[-now yyyymmdd_hhmmss: use a XXX_yyyymmdd_hhmmss directories, detect unfinished jobs and restart at the appropriate stage.]',
-  '[-ec_only: not assemble]',
+  "[-ec_only: does error correction and does NOT assemble]",
 );
 
-if(@ARGV != 2){
-  my $msg = join "\n\t", at emsgs;
-  die "$msg\n";
+if(@ARGV == 0 || @ARGV > 2){
+  my $msg = join "\n\t", at msgs;
+  printf STDERR ("%s\n",$msg);
+  exit(1);
+}
+if(@ARGV == 1 && !$opt_ec_only){
+  printf STDERR ("WARNING: %s\n", "-ec_only was added");
+  $opt_ec_only = 1;
 }
 
 my $pwd = `pwd`;
 chomp $pwd;
 
-my $asm_spec = $ARGV[1];
-$asm_spec =~ s/^\s+//;
-if(!-e $asm_spec){
-  die "$asm_spec does not exist.\n";
-}
-
-if($asm_spec =~ /^\//){
-  # real path; do nothing
-}
-else{
-  $asm_spec = "$pwd/$asm_spec";
+my $asm_spec="";
+if(@ARGV == 2){
+  $asm_spec = $ARGV[1];
+  $asm_spec =~ s/^\s+//;
+  if(!-e $asm_spec){
+    die "$asm_spec does not exist.\n";
+  }
+  if($asm_spec =~ /^\//){
+    # real path; do nothing
+  }
+  else{
+    $asm_spec = "$pwd/$asm_spec";
+  }
 }
 
 {
@@ -242,7 +250,9 @@ printf STDERR ("evalue %g\n",$evalue);
 printf STDERR ("num_threads %d\n",$num_threads);
 printf STDERR ("valid_voters %s\n",$valid_voters);
 printf STDERR ("trim %d\n",$trim);
-printf STDERR ("ca_path %s\n",$ca_path);
+if($ca_path){
+  printf STDERR ("ca_path %s\n",$ca_path);
+}
 printf STDERR ("blast_path %s\n",$blast_path);
 printf STDERR ("sprai_path %s\n",$sprai_path);
 if($queue_req){
@@ -315,6 +325,10 @@ if(!-e "$bindir/bfmt72s"){
 if(!-e "$path2blast/blastn"){
   die "$path2blast/blastn does not exist in $path2blast\n"
 }
+if(!$opt_ec_only && !-e $ca_path){
+  die "ca_path $ca_path does not exist.\n";
+}
+
 
 $scriptdir = "$pwd/${scriptdir}_$now";
 if(!-d $scriptdir){
@@ -666,7 +680,7 @@ for(my $index=$from; $index<$to; ++$index){
       }
       #my $PG3="$bindir/nss2v_v3 -v $valid_voters -q";
       my $PG3="$bindir/nss2v_v3 -v $valid_voters";
-      my $PG4="$bindir/myrealigner -f -B $valid_voters -b 3 -d 0.5";
+      my $PG4="$bindir/myrealigner -f -B $valid_voters -b 3 -d 0.5 -l 131072";
       #my $PG3="$bindir/nss2v_v3 -q";
       #my $PG3="$bindir/nss2v_v3 -q -s";
       #my $PG3="$bindir/nss2v_v3 -v $valid_voters";
@@ -1187,25 +1201,26 @@ for(my $index=$from; $index<$to; ++$index){
     if(!$opt_ec_only){
       push(@post_array_jobs,$script);
       push(@do_qsub_postaj,$do_qsub);
-    }
-    open my $fh, ">", $script or die $!;
 
-    printf $fh ("#!/bin/bash\n");
-    printf $fh ("#\$ -S /bin/bash\n");
-    printf $fh ("#\$ -cwd\n");
-    printf $fh ("#\$ -V\n");
-    my $jobname = sprintf("$preprefix%02d_fastqToCA_$now",$index);
-    push @runCA_holdjids,$jobname;
-    printf $fh ("#\$ -N $jobname\n");
-    if($queue_req){
-      printf $fh ("#\$ $queue_req\n");
-    }
-    printf $fh ("#\$ -o $logdir\n");
-    printf $fh ("#\$ -e $logdir\n");
-    printf $fh ("#\$ -hold_jid $holdlist\n");
-    printf $fh ("time ($command)\n");
+      open my $fh, ">", $script or die $!;
 
-    close $fh;
+      printf $fh ("#!/bin/bash\n");
+      printf $fh ("#\$ -S /bin/bash\n");
+      printf $fh ("#\$ -cwd\n");
+      printf $fh ("#\$ -V\n");
+      my $jobname = sprintf("$preprefix%02d_fastqToCA_$now",$index);
+      push @runCA_holdjids,$jobname;
+      printf $fh ("#\$ -N $jobname\n");
+      if($queue_req){
+        printf $fh ("#\$ $queue_req\n");
+      }
+      printf $fh ("#\$ -o $logdir\n");
+      printf $fh ("#\$ -e $logdir\n");
+      printf $fh ("#\$ -hold_jid $holdlist\n");
+      printf $fh ("time ($command)\n");
+
+      close $fh;
+    }
   }
   if($index+1 == $to)
   {
@@ -1244,24 +1259,25 @@ for(my $index=$from; $index<$to; ++$index){
     if(!$opt_ec_only){
       push(@post_array_jobs,$script);
       push(@do_qsub_postaj,$do_qsub);
-    }
-    open my $fh, ">", $script or die $!;
 
-    printf $fh ("#!/bin/bash\n");
-    printf $fh ("#\$ -S /bin/bash\n");
-    printf $fh ("#\$ -cwd\n");
-    printf $fh ("#\$ -V\n");
-    my $jobname = sprintf("$preprefix%02d_runCA_$now",$index);
-    printf $fh ("#\$ -N $jobname\n");
-    if($queue_req){
-      printf $fh ("#\$ $queue_req\n");
-    }
-    printf $fh ("#\$ -o $logdir\n");
-    printf $fh ("#\$ -e $logdir\n");
-    printf $fh ("#\$ -hold_jid $holdlist\n");
-    printf $fh ("time ($command)\n");
+      open my $fh, ">", $script or die $!;
 
-    close $fh;
+      printf $fh ("#!/bin/bash\n");
+      printf $fh ("#\$ -S /bin/bash\n");
+      printf $fh ("#\$ -cwd\n");
+      printf $fh ("#\$ -V\n");
+      my $jobname = sprintf("$preprefix%02d_runCA_$now",$index);
+      printf $fh ("#\$ -N $jobname\n");
+      if($queue_req){
+        printf $fh ("#\$ $queue_req\n");
+      }
+      printf $fh ("#\$ -o $logdir\n");
+      printf $fh ("#\$ -e $logdir\n");
+      printf $fh ("#\$ -hold_jid $holdlist\n");
+      printf $fh ("time ($command)\n");
+
+      close $fh;
+    }
   }
 
 #  print STDERR "post_array_jobs printed\n";
diff --git a/ezez_vx1.pl b/ezez_vx1.pl
index afa7827..35d7c01 100755
--- a/ezez_vx1.pl
+++ b/ezez_vx1.pl
@@ -31,19 +31,28 @@ my %params;
 
 my @msgs = (
   "USAGE: <this> <ec.spec> <asm.spec>",
+  "or: <this> <ec.spec> -ec_only",
   "[-n: only shows parameters in ec.spec and exit.]",
   "[-ec_only: does error correction and does NOT assemble]",
   #'[-debug: outputs intermediate files (not implemented)]',
   '[-now yyyymmdd_hhmmss: use a result_yyyymmdd_hhmmss directory, detect unfinished jobs and restart at the appropriate stage.]',
 );
 
-if(@ARGV != 2){
+if(@ARGV == 0 || @ARGV > 2){
   my $msg = join "\n\t", at msgs;
   printf STDERR ("%s\n",$msg);
   exit(1);
 }
+if(@ARGV == 1 && !$opt_ec_only){
+  printf STDERR ("WARNING: %s\n", "-ec_only was added");
+  $opt_ec_only = 1;
+}
+#print "@ARGV\n";
 
-my $spec=$ARGV[1];
+my $spec="";
+if(@ARGV == 2){
+  $spec=$ARGV[1];
+}
 
 {
   my $ec_spec = $ARGV[0];
@@ -80,7 +89,7 @@ my $max_target_seqs=100;
 my $valid_voters=11;
 my $trim=42;
 my $estimated_genome_size=0;
-my $ca_path="/home/imai/wgs-7.0/Linux-amd64/bin/";
+my $ca_path="";
 my $word_size=0;
 my $min_len_for_query=1;
 my $max_len_for_query=1000000000000000;
@@ -187,7 +196,9 @@ printf STDERR ("evalue %g\n",$evalue);
 printf STDERR ("num_threads %d\n",$num_threads);
 printf STDERR ("valid_voters %s\n",$valid_voters);
 printf STDERR ("trim %d\n",$trim);
-printf STDERR ("ca_path %s\n",$ca_path);
+if($ca_path){
+  printf STDERR ("ca_path %s\n",$ca_path);
+}
 if($sprai_path){
   printf STDERR ("sprai_path %s\n",$sprai_path);
 }
@@ -211,8 +222,8 @@ if($max_target_seqs){
 }
 printf STDERR ("#>- params -<#\n");
 
-if(!-e $ca_path){
-  die "$ca_path does not exist.\n";
+if(!$opt_ec_only && !-e $ca_path){
+  die "ca_path $ca_path does not exist.\n";
 }
 
 if(!$opt_ec_only && !-e $spec){
@@ -390,7 +401,7 @@ for(my $index=$from; $index<$to; ++$index){
       if($sprai_path){
         $PG3 = "$sprai_path/$PG3";
       }
-      my $PG4="myrealigner -f -B $valid_voters -b 3 -d 0.5";
+      my $PG4="myrealigner -f -B $valid_voters -b 3 -d 0.5 -l 131072";
       if($sprai_path){
         $PG4 = "$sprai_path/$PG4";
       }
diff --git a/makefile b/makefile
index b1ca723..84ad6f2 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,5 @@
 APPNAME = 'sprai'
-VERSION = '0.9.9.13'
+VERSION = '0.9.9.14'
 
 PREFIX=$(PWD)
 COMPILED= \
diff --git a/wscript b/wscript
index bdf8f74..69d8d4e 100644
--- a/wscript
+++ b/wscript
@@ -1,5 +1,5 @@
 APPNAME = 'sprai'
-VERSION = '0.9.9.13'
+VERSION = '0.9.9.14'
 
 srcdir = '.'
 blddir = 'build'
@@ -131,6 +131,7 @@ def dist(ctx):
         'bfmt72s.c',
         'col2fqcell.h',
         'LICENSE.txt',
+        'ChangeLog.txt',
         'myrealigner.c',
         'nss2v_v3.c',
         'waf',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/sprai.git