[med-svn] [SCM] gmap branch, master, updated. upstream/2010-07-21-20-g95ea5b7

Wed Mar 23 18:26:41 UTC 2011

The following commit has been merged in the master branch:
commit 95ea5b7cc34322fd2106a539c605d63304554905
Author: Shaun Jackman <sjackman at debian.org>
Date:   Wed Mar 23 11:25:43 2011 -0700

    * New upstream release.

diff --git a/debian/changelog b/debian/changelog
index 2bbaf86..9cb0e8c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+gmap (2011-03-11-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Wed, 23 Mar 2011 10:16:38 -0700
+
 gmap (2010-07-27-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/debian/gmap.1 b/debian/gmap.1
index a5d313c..aa488f0 100644
--- a/debian/gmap.1
+++ b/debian/gmap.1
@@ -1,4 +1,4 @@
-.TH GMAP "1" "Aug 2010" "GMAP 2010-07-27" "User Commands"
+.TH GMAP "1" "Mar 2011" "GMAP 2011-03-11" "User Commands"
 .SH NAME
 gmap \- Genomic Mapping and Alignment Program
 .SH SYNOPSIS
@@ -26,15 +26,27 @@ compressed version
 \fB\-g\fR, \fB\-\-gseg\fR=\fIfilename\fR
 User\-suppled genomic segment
 .TP
-\fB\-q\fR, \fB\-\-jobdiv\fR=\fIINT/INT\fR
-Process only i out of every n sequences
-e.g., 0/100 or 99/100
+\fB\-q\fR, \fB\-\-part\fR=\fIINT\fR/\fIINT\fR
+Process only the i-th out of every n sequences
+e.g., 0/100 or 99/100 (useful for distributing jobs
+to a computer farm).
+.TP
+\fB\-\-input\-buffer\fR=\fIINT\fR
+Size of input buffer (program reads this many sequences
+at a time for efficiency) (default 1000)
 .SS
 Computation options
 .TP
 \fB\-B\fR, \fB\-\-batch\fR=\fIINT\fR
-Batch mode (0 = no pre\-loading, 1 = pre\-load only indices;
-2 (default) = pre\-load both indices and genome)
+ Mode     Offsets       Positions       Genome
+   0      allocate      mmap            mmap
+   1      allocate      mmap & preload  mmap
+   2      allocate      mmap & preload  mmap & preload (default)
+   3      allocate      allocate        mmap & preload
+   4      allocate      allocate        allocate
+
+Note: For a single sequence, all data structures use mmap.
+If mmap not available and allocate not chosen, then will use fileio (slow)
 .TP
 \fB\-K\fR, \fB\-\-intronlength\fR=\fIINT\fR
 Max length for one intron (default 1000000)
@@ -42,13 +54,10 @@ Max length for one intron (default 1000000)
 \fB\-L\fR, \fB\-\-totallength\fR=\fIINT\fR
 Max total intron length (default 2400000)
 .TP
-\fB\-x\fR, \fB\-\-chimera_margin\fR=\fIINT\fR
+\fB\-x\fR, \fB\-\-chimera-margin\fR=\fIINT\fR
 Amount of unaligned sequence that triggers
 search for a chimera (default off)
 .TP
-\fB\-w\fR, \fB\-\-reference\fR=\fIfilename\fR
-Reference cDNA sequence for relative alignment
-.TP
 \fB\-t\fR, \fB\-\-nthreads\fR=\fIINT\fR
 Number of worker threads
 .TP
@@ -62,7 +71,8 @@ User\-suppled chromosome subset file
 Chromosome subset to search
 .TP
 \fB\-z\fR, \fB\-\-direction\fR=\fISTRING\fR
-cDNA direction (sense, antisense, or auto (default))
+cDNA direction (sense_force, antisense_force,
+sense_filter, antisense_filter, or auto (default))
 .TP
 \fB\-H\fR, \fB\-\-trimendexons\fR=\fIINT\fR
 Trim end exons with fewer than given number of matches
@@ -88,7 +98,7 @@ Show alignments
 \fB\-3\fR, \fB\-\-continuous\fR
 Show alignment in three continuous lines
 .TP
-\fB\-4\fR, \fB\-\-alignedexons\fR
+\fB\-4\fR, \fB\-\-continuous-by-exon\fR
 Show alignment in three lines per exon
 .TP
 \fB\-Z\fR, \fB\-\-compress\fR
@@ -105,14 +115,16 @@ Print protein sequence (genomic)
 .TP
 \fB\-f\fR, \fB\-\-format\fR=\fIINT\fR
 Format for output
- 1 = PSL (BLAT) format,
- 2 = GFF3 gene format,
- 3 = GFF3 cDNA_match format,
- 4 = GFF3 EST_match format,
- 6 = splicesites output (for GSNAP),
- 7 = IIT FASTA exon map format,
- 8 = IIT FASTA map format,
- 9 = coords in table format
+ 1 or psl = PSL (BLAT) format,
+ 2 or gff3_gene = GFF3 gene format,
+ 3 or gff3_match_cdna = GFF3 cDNA_match format,
+ 4 or gff3_match_est = GFF3 EST_match format,
+ 6 or splicesites = splicesites output (for GSNAP),
+ 7 or map_exons = IIT FASTA exon map format,
+ 8 or map_genes = IIT FASTA map format,
+ 9 or coords = coords in table format,
+ sampe = SAM format (setting paired_read bit in flag),
+ samse = SAM format (without setting paired_read bit)
 .SS
 Output options
 .TP
@@ -127,16 +139,33 @@ only if there is more than one worker thread)
 \fB\-5\fR, \fB\-\-md5\fR
 Print MD5 checksum for each query sequence
 .TP
-\fB\-o\fR, \fB\-\-chimera_overlap\fR
+\fB\-o\fR, \fB\-\-chimera-overlap\fR
 Overlap to show, if any, at chimera breakpoint
 .TP
+\fB\-\-failsonly\fR
+Print only failed alignments, those with no results
+.TP
+\fB\-\-nofails\fR
+Exclude printing of failed alignments
+.TP
+\fB\-\-fails\-as\-input\fR=\fISTRING\fR
+Print completely failed alignments as input FASTA or FASTQ format
+Allowed values: yes, no
+.TP
 \fB\-V\fR, \fB\-\-usesnps\fR=\fISTRING\fR
 Use database containing known SNPs (in <STRING>.iit, built
 previously using snpindex) for reporting output
 .TP
+\fB\-\-split-output\fR=\fISTRING\fR
+Basename for multiple-file output, separately for nomapping,
+uniq, mult, (and chimera, if --chimera-margin is selected)
+.TP
 \fB\-F\fR, \fB\-\-fulllength\fR
 Assume full\-length protein, starting with Met
 .TP
+\fB\-\-cdsstart\fR=\fIINT\fR
+Translate codons from given nucleotide (1-based)
+.TP
 \fB\-T\fR, \fB\-\-truncate\fR
 Truncate alignment around full\-length protein, Met to Stop
 Implies \fB\-F\fR flag.
@@ -144,6 +173,21 @@ Implies \fB\-F\fR flag.
 \fB\-Y\fR, \fB\-\-tolerant\fR
 Translates cDNA with corrections for frameshifts
 .SS
+Options for SAM output
+.TP
+\fB\-\-no\-sam\-headers\fR
+Do not print headers beginning with '@'
+.TP
+\fB\-\-noncanonical\-splices\fR=\fISTRING\fR
+Print non-canonical genomic gaps greater than 20 nt
+in CIGAR string as STRING. Allowed values: N (default), D.
+.TP
+\fB\-\-read\-group\-id\fR=\fISTRING\fR
+Value to put into read-group id (RG-ID) field
+.TP
+\fB\-\-read\-group\-name\fR=\fISTRING\fR
+Value to put into read-group name (RG-SM) field
+.SS
 External map file options
 .TP
 \fB\-M\fR, \fB\-\-mapdir\fR=\fIdirectory\fR
@@ -161,6 +205,9 @@ Report hits from both strands of genome
 .TP
 \fB\-u\fR, \fB\-\-flanking\fR=\fIINT\fR
 Show flanking hits (default 0)
+.TP
+\fB\-\-print\-comment\fR
+Show comment line for each hit
 .SS
 Alignment output options
 .TP
@@ -181,10 +228,10 @@ Wrap length for alignment (default=50)
 .SS
 Help options
 .TP
-\fB\-v\fR, \fB\-\-version\fR
+\fB\-\-version\fR
 Show version
 .TP
-\fB\-?\fR, \fB\-\-help\fR
+\fB\-\-help\fR
 Show this help message
 .SH ENVIRONMENT
 .TP
diff --git a/debian/gsnap.1 b/debian/gsnap.1
index 059e5a1..bc725a3 100644
--- a/debian/gsnap.1
+++ b/debian/gsnap.1
@@ -1,4 +1,4 @@
-.TH GSNAP "1" "Aug 2010" "GMAP 2010-07-27" "User Commands"
+.TH GSNAP "1" "Mar 2011" "GMAP 2011-03-11" "User Commands"
 .SH NAME
 gsnap \- Genomic Short-read Nucleotide Alignment Program
 .SH SYNOPSIS
@@ -19,10 +19,22 @@ Genome database
 .TP
 \fB\-q\fR, \fB\-\-part\fR=\fIINT/INT\fR
 Process only the i\-th out of every n sequences
-e.g., 0/100 or 99/100
+e.g., 0/100 or 99/100 (useful for distributing jobs to a computer farm).
 .TP
-\fB\-c\fR, \fB\-\-circular\-input\fR
-Circular\-end data (paired reads are on same strand)
+\fB\-\-input\-buffer\fR=\fIINT\fR
+Size of input buffer (program reads this many sequences
+at a time for efficiency) (default 1000)
+.TP
+\fB\-\-barcode\-length\fR=\fIINT\fR
+Amount of barcode to remove from start of read (default 0)
+.TP
+\fB\-\-pc\-linefeeds\fR
+Strip PC line feeds (ASCII 13) from input
+.TP
+\fB\-o\fR, \fB\-\-orientation=\fISTRING\fR
+Orientation of paired-end reads
+Allowed values: FR (fwd-rev, or typical Illumina; default),
+FR (rev-fwd, for circularized inserts), or FF (fwd-fwd, same strand)
 .SS
 Computation options
 .PP
@@ -33,8 +45,15 @@ Also, indels, especially end indels, take longer to compute, although the algori
 is still designed to be fast.
 .TP
 \fB\-B\fR, \fB\-\-batch\fR=\fIINT\fR
-Batch mode (0 = no pre\-loading, 1 = pre\-load only indices;
-2 (default) = pre\-load both indices and genome)
+ Mode     Offsets       Positions       Genome
+   0      allocate      mmap            mmap
+   1      allocate      mmap & preload  mmap
+   2      allocate      mmap & preload  mmap & preload (default)
+   3      allocate      allocate        mmap & preload
+   4      allocate      allocate        allocate
+
+Note: For a single sequence, all data structures use mmap.
+If mmap not available and allocate not chosen, then will use fileio (slow)
 .TP
 \fB\-m\fR, \fB\-\-max\-mismatches\fR=\fIFLOAT\fR
 Maximum number of mismatches allowed (if not specified, then
@@ -45,8 +64,12 @@ of mismatches (including indel and splicing penalties)
 For RNA-Seq, you may need to increase this value slightly
 to align reads extending past the ends of an exon.
 .TP
+\fB\-\-terminal\-penalty\fR=\fIINT\fR
+Penalty for a terminal alignment (alignment from one end of the read
+to the best possible position at the other end) (default 1)
+.TP
 \fB\-i\fR, \fB\-\-indel\-penalty\fR=\fIINT\fR
-Penalty for an indel (default 1000, essentially turning it off).
+Penalty for an indel (default 1).
 Counts against mismatches allowed. To find indels, make
 indel\-penalty less than or equal to max\-mismatches
 For 2\-base reads, need to set indel\-penalty somewhat high
@@ -63,12 +86,12 @@ Maximum number of middle deletions allowed (default 30)
 \fB\-Y\fR, \fB\-\-max\-end\-insertions\fR=\fIINT\fR
 Maximum number of end insertions allowed (default 3)
 .TP
-\fB\-Y\fR, \fB\-\-max\-end\-deletions\fR=\fIINT\fR
+\fB\-Z\fR, \fB\-\-max\-end\-deletions\fR=\fIINT\fR
 Maximum number of end deletions allowed (default 6)
 .TP
-\fB\-M\fR, \fB\-\-suboptimal\-score\fR=\fIINT\fR
+\fB\-M\fR, \fB\-\-suboptimal\-levels\fR=\fIINT\fR
 Report suboptimal hits beyond best hit (default 0)
-All hits with best score plus suboptimal\-score are reported
+All hits with best score plus suboptimal-levels are reported
 .TP
 \fB\-R\fR, \fB\-\-masking\fR=\fIINT\fR
 Masking of frequent/repetitive oligomers to avoid spending time
@@ -82,25 +105,25 @@ on non\-unique or repetitive reads
 \fB-a\fR, \fB--adapter-strip\fR=\fISTRING\fR
 Method for removing adapters from reads. Currently allowed values: paired
 .TP
-\fB\-T\fR, \fB\-\-trimlength\fR=\fIINT\fR
-Maximum amount of trimming of mismatches at ends (default is 1000;
+\fB\-\-trim\-mismatch\-score\fR=\fIINT\fR
+Score to use for mismatches when trimming at ends (default is -3;
 to turn off trimming, specify 0)
 .TP
-\fB\-2\fR, \fB\-\-dibase\fR
-Input is 2\-base encoded (e.g., SOLiD), with database built
-previously using dibaseindex)
-.TP
-\fB\-C\fR, \fB\-\-cmet\fR
-Use database for methylcytosine experiments, built
-previously using cmetindex)
+\fB\-V\fR, \fB\-\-snpsdir\fR=\fISTRING\fR
+Directory for SNPs index files (created using snpindex) (default is
+location of genome index files specified using -D and -d)
 .TP
-\fB\-V\fR, \fB\-\-usesnps\fR=\fISTRING\fR
+\fB\-v\fR, \fB\-\-use\-snps\fR=\fISTRING\fR
 Use database containing known SNPs (in <STRING>.iit, built
 previously using snpindex) for tolerance to SNPs
 .TP
-\fB\-g\fR, \fB\-\-geneprob\fR=\fISTRING\fR
-Use IIT file containing geneprob (in <STRING>.iit, of cumulative
-format >(count) (genomicpos) to resolve ties
+\fB\-C\fR, \fB\-\-cmetdir\fR=\fISTRING\fR
+Directory for methylcytosine index files (created using cmetindex)
+default is location of genome index files specified using -D, -V, and -d)
+.TP
+\fB\-c\fR, \fB\-\-cmet\fR
+Use database for methylcytosine experiments, built
+previously using cmetindex)
 .TP
 \fB\-t\fR, \fB\-\-nthreads\fR=\fIINT\fR
 Number of worker threads
@@ -111,9 +134,23 @@ Splicing options for RNA\-Seq
 Look for splicing involving known splice sites
 (in <STRING>.iit), at short or long distances
 .TP
+\fB\-S\fR, \fB\-\-splicetrie\-precompute\fR=\fIINT\fR
+Pre-compute splicetrie for all known splice sites
+(0=no, 1=yes (default)). Requires --splicesites flag
+and multiple sequence input.
+.TP
 \fB\-N\fR, \fB\-\-novelsplicing\fR=\fIINT\fR
 Look for novel splicing, not in known splice sites (if \fB\-s\fR provided)
-within shortsplicedist (\fB\-w\fR flag) or with novelspliceprob (\fB\-x\fR flag)
+.TP
+\fB\-\-novel\-doublesplices\fR
+Allow GSNAP to look for two splices in a single-end involving novel
+splice sites (default is not to allow this). Caution: this option
+can slow down the program considerably. A better way to detect
+double splices is with known splice sites, using the
+\fB\-\-splicesites\fR option.
+.TP
+\fB-w\fR, \fB\-\-localsplicedist\fR=\fIINT\fR
+Definition of local novel splicing event (default 200000)
 .TP
 \fB\-w\fR, \fB\-\-localsplicedist\fR=\fIINT\fR
 Definition of local novel splicing event (default 200000)
@@ -132,17 +169,51 @@ Minimum length at end required for local spliced alignments (default 15, min is
 \fB\-K\fR, \fB\-\-distant\-splice\-endlength\fR=\fIINT\fR
 Minimum length at end required for distant spliced alignments (default 16, min is 14)
 .TP
-\fB\-J\fR, \fB\-\-distant\-splice\-identity\fR=\fIFLOAT\fR
+\fB-l,\fR \fB\-\-shortend\-splice\-endlength\fR=\fIINT\fR
+Minimum length at end required for short-end spliced alignments (default 2)
+.TP
+\fB\-\-distant\-splice\-identity\fR=\fIFLOAT\fR
 Minimum identity at end required for distant spliced alignments (default 0.95)
 .SS
 Options for paired\-end reads
 .TP
-\fB\-P\fR, \fB\-\-pairmax\fR=\fIINT\fR
+\fB\-\-pairmax-dna\fR=\fIINT\fR
 Max total genomic length for paired reads
-(default 1000). Should increase for RNA\-Seq reads.
+(default 1000). Should increase for RNA-Seq reads.
 .TP
-\fB\-p\fR, \fB\-\-pairlength\fR=\fIINT\fR
-Expected paired\-end length (default 200)
+\fB\-\-pairmax\-rna\fR=\fIINT\fR
+Max total genomic length for RNA-Seq paired reads, or other reads
+that could have a splice (default 200000). Used if -N or -s is specified.
+Should probably match the value for -w, --localsplicedist.
+.TP
+\fB\-\-pairexpect\fR=\fIINT\fR
+Expected paired-end length (default 200)
+.TP
+\fB\-\-pairdev\fR=\fIINT\fR
+Allowable deviation from expected paired-end length, used for
+discriminating between alternative alignments (default 50)
+.SS
+Options for quality scores
+.TP
+\fB\-\-quality\-protocol\fR=\fISTRING\fR
+Protocol for input quality scores.  Allowed values:
+
+ illumina (ASCII 64-126) (equivalent to -J 64 -j -31)
+ sanger   (ASCII 33-126) (equivalent to -J 33 -j 0)
+
+Default is sanger (no quality print shift)
+SAM output files should have quality scores in sanger protocol
+
+Or you can customize this behavior with these flags:
+.TP
+\fB-J\fR, \fB\-\-quality\-zero\-score\fR=\fIINT\fR
+FASTQ quality scores are zero at this ASCII value
+(default is 33 for sanger protocol; for Illumina, select 64)
+.TP
+\fB-j\fR, \fB\-\-quality\-print\-shift\fR=\fIINT\fR
+Shift FASTQ quality scores by this amount in output
+(default is 0 for sanger protocol; to change Illumina input
+to Sanger output, select -31)
 .SS
 Output options
 .TP
@@ -157,29 +228,51 @@ then nothing is printed.
 Print output in same order as input (relevant
 only if there is more than one worker thread)
 .TP
-\fB\-S\fR, \fB\-\-print\-snps\fR=\fIINT\fR
-Print detailed information about SNPs in reads (works only if \fB\-V\fR also selected)
-(0=no (default), 1=positions and labels)
+\fB\-\-show\-refdiff\fR
+For GSNAP output in SNP-tolerant alignment, shows all differences
+relative to the reference genome as lower case (otherwise, it shows
+all differences relative to both the reference and alternate genome)
 .TP
-\fB\-F\fR, \fB\-\-failsonly\fR
+\fB\-\-print\-snps\fR
+Print detailed information about SNPs in reads (works only if \fB\-v\fR also selected)
+(not fully implemented yet)
+.TP
+\fB\-\-failsonly\fR
 Print only failed alignments, those with no results
 .TP
-\fB\-f\fR, \fB\-\-nofails\fR
+\fB\-\-nofails\fR
 Exclude printing of failed alignments
 .TP
+\fB\-\-fails\-as\-input\fR=\fISTRING\fR
+Print completely failed alignments as input FASTA or FASTQ format
+Allowed values: yes, no
+.TP
 \fB\-A\fR, \fB\-\-format\fR=\fISTRING\fR
 Another format type, other than default.
 Currently implemented: sam
+Also allowed, but not installed at compile-time: goby
+(To install, need to re-compile with appropriate options)
+.SS
+Options for SAM output
+.TP
+\fB\-\-no\-sam\-headers\fR
+Do not print headers beginning with '@'
+.TP
+\fB\-\-sam\-headers\-batch\fR=\fIINT\fR
+Print headers only for this batch, as specified by -q
+.TP
+\fB\-\-read\-group\-id\fR=\fISTRING\fR
+Value to put into read-group id (RG-ID) field
 .TP
-\fB-j\fR, \fB--quality-shift\fR=\fIINT\fR
-Shift FASTQ quality scores by this amount in SAM output (default -31)
+\fB\-\-read\-group\-name\fR=\fISTRING\fR
+Value to put into read-group name (RG-SM) field
 .SS
 Help options
 .TP
-\fB\-v\fR, \fB\-\-version\fR
+\fB\-\-version\fR
 Show version
 .TP
-\fB\-?\fR, \fB\-\-help\fR
+\fB\-\-help\fR
 Show this help message
 .SH ENVIRONMENT
 .TP
diff --git a/debian/patches/install-data-local b/debian/patches/install-data-local
index ea8278b..56ee76a 100644
--- a/debian/patches/install-data-local
+++ b/debian/patches/install-data-local
@@ -2,7 +2,7 @@ Description: Add DESTDIR to install-data-local
 
 --- gmap.orig/Makefile.in
 +++ gmap/Makefile.in
-@@ -642,7 +642,7 @@
+@@ -650,7 +650,7 @@
  
  
  install-data-local:
@@ -10,4 +10,4 @@ Description: Add DESTDIR to install-data-local
 +	$(mkinstalldirs) $(DESTDIR)$(GMAPDB)
  
  dist-hook:
- 	if test -d CVS; then \
+ #	svn log -v --xml | ./svncl.pl > ChangeLog

-- 
Align mRNA and EST sequences to a genome