[med-svn] [SCM] gmap branch, master, updated. upstream/2010-07-21-20-g95ea5b7
Shaun Jackman
sjackman at debian.org
Wed Mar 23 18:26:41 UTC 2011
The following commit has been merged in the master branch:
commit 95ea5b7cc34322fd2106a539c605d63304554905
Author: Shaun Jackman <sjackman at debian.org>
Date: Wed Mar 23 11:25:43 2011 -0700
* New upstream release.
diff --git a/debian/changelog b/debian/changelog
index 2bbaf86..9cb0e8c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+gmap (2011-03-11-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org> Wed, 23 Mar 2011 10:16:38 -0700
+
gmap (2010-07-27-1) unstable; urgency=low
* New upstream release.
diff --git a/debian/gmap.1 b/debian/gmap.1
index a5d313c..aa488f0 100644
--- a/debian/gmap.1
+++ b/debian/gmap.1
@@ -1,4 +1,4 @@
-.TH GMAP "1" "Aug 2010" "GMAP 2010-07-27" "User Commands"
+.TH GMAP "1" "Mar 2011" "GMAP 2011-03-11" "User Commands"
.SH NAME
gmap \- Genomic Mapping and Alignment Program
.SH SYNOPSIS
@@ -26,15 +26,27 @@ compressed version
\fB\-g\fR, \fB\-\-gseg\fR=\fIfilename\fR
User\-suppled genomic segment
.TP
-\fB\-q\fR, \fB\-\-jobdiv\fR=\fIINT/INT\fR
-Process only i out of every n sequences
-e.g., 0/100 or 99/100
+\fB\-q\fR, \fB\-\-part\fR=\fIINT\fR/\fIINT\fR
+Process only the i-th out of every n sequences
+e.g., 0/100 or 99/100 (useful for distributing jobs
+to a computer farm).
+.TP
+\fB\-\-input\-buffer\fR=\fIINT\fR
+Size of input buffer (program reads this many sequences
+at a time for efficiency) (default 1000)
.SS
Computation options
.TP
\fB\-B\fR, \fB\-\-batch\fR=\fIINT\fR
-Batch mode (0 = no pre\-loading, 1 = pre\-load only indices;
-2 (default) = pre\-load both indices and genome)
+ Mode Offsets Positions Genome
+ 0 allocate mmap mmap
+ 1 allocate mmap & preload mmap
+ 2 allocate mmap & preload mmap & preload (default)
+ 3 allocate allocate mmap & preload
+ 4 allocate allocate allocate
+
+Note: For a single sequence, all data structures use mmap.
+If mmap not available and allocate not chosen, then will use fileio (slow)
.TP
\fB\-K\fR, \fB\-\-intronlength\fR=\fIINT\fR
Max length for one intron (default 1000000)
@@ -42,13 +54,10 @@ Max length for one intron (default 1000000)
\fB\-L\fR, \fB\-\-totallength\fR=\fIINT\fR
Max total intron length (default 2400000)
.TP
-\fB\-x\fR, \fB\-\-chimera_margin\fR=\fIINT\fR
+\fB\-x\fR, \fB\-\-chimera-margin\fR=\fIINT\fR
Amount of unaligned sequence that triggers
search for a chimera (default off)
.TP
-\fB\-w\fR, \fB\-\-reference\fR=\fIfilename\fR
-Reference cDNA sequence for relative alignment
-.TP
\fB\-t\fR, \fB\-\-nthreads\fR=\fIINT\fR
Number of worker threads
.TP
@@ -62,7 +71,8 @@ User\-suppled chromosome subset file
Chromosome subset to search
.TP
\fB\-z\fR, \fB\-\-direction\fR=\fISTRING\fR
-cDNA direction (sense, antisense, or auto (default))
+cDNA direction (sense_force, antisense_force,
+sense_filter, antisense_filter, or auto (default))
.TP
\fB\-H\fR, \fB\-\-trimendexons\fR=\fIINT\fR
Trim end exons with fewer than given number of matches
@@ -88,7 +98,7 @@ Show alignments
\fB\-3\fR, \fB\-\-continuous\fR
Show alignment in three continuous lines
.TP
-\fB\-4\fR, \fB\-\-alignedexons\fR
+\fB\-4\fR, \fB\-\-continuous-by-exon\fR
Show alignment in three lines per exon
.TP
\fB\-Z\fR, \fB\-\-compress\fR
@@ -105,14 +115,16 @@ Print protein sequence (genomic)
.TP
\fB\-f\fR, \fB\-\-format\fR=\fIINT\fR
Format for output
- 1 = PSL (BLAT) format,
- 2 = GFF3 gene format,
- 3 = GFF3 cDNA_match format,
- 4 = GFF3 EST_match format,
- 6 = splicesites output (for GSNAP),
- 7 = IIT FASTA exon map format,
- 8 = IIT FASTA map format,
- 9 = coords in table format
+ 1 or psl = PSL (BLAT) format,
+ 2 or gff3_gene = GFF3 gene format,
+ 3 or gff3_match_cdna = GFF3 cDNA_match format,
+ 4 or gff3_match_est = GFF3 EST_match format,
+ 6 or splicesites = splicesites output (for GSNAP),
+ 7 or map_exons = IIT FASTA exon map format,
+ 8 or map_genes = IIT FASTA map format,
+ 9 or coords = coords in table format,
+ sampe = SAM format (setting paired_read bit in flag),
+ samse = SAM format (without setting paired_read bit)
.SS
Output options
.TP
@@ -127,16 +139,33 @@ only if there is more than one worker thread)
\fB\-5\fR, \fB\-\-md5\fR
Print MD5 checksum for each query sequence
.TP
-\fB\-o\fR, \fB\-\-chimera_overlap\fR
+\fB\-o\fR, \fB\-\-chimera-overlap\fR
Overlap to show, if any, at chimera breakpoint
.TP
+\fB\-\-failsonly\fR
+Print only failed alignments, those with no results
+.TP
+\fB\-\-nofails\fR
+Exclude printing of failed alignments
+.TP
+\fB\-\-fails\-as\-input\fR=\fISTRING\fR
+Print completely failed alignments as input FASTA or FASTQ format
+Allowed values: yes, no
+.TP
\fB\-V\fR, \fB\-\-usesnps\fR=\fISTRING\fR
Use database containing known SNPs (in <STRING>.iit, built
previously using snpindex) for reporting output
.TP
+\fB\-\-split-output\fR=\fISTRING\fR
+Basename for multiple-file output, separately for nomapping,
+uniq, mult, (and chimera, if --chimera-margin is selected)
+.TP
\fB\-F\fR, \fB\-\-fulllength\fR
Assume full\-length protein, starting with Met
.TP
+\fB\-\-cdsstart\fR=\fIINT\fR
+Translate codons from given nucleotide (1-based)
+.TP
\fB\-T\fR, \fB\-\-truncate\fR
Truncate alignment around full\-length protein, Met to Stop
Implies \fB\-F\fR flag.
@@ -144,6 +173,21 @@ Implies \fB\-F\fR flag.
\fB\-Y\fR, \fB\-\-tolerant\fR
Translates cDNA with corrections for frameshifts
.SS
+Options for SAM output
+.TP
+\fB\-\-no\-sam\-headers\fR
+Do not print headers beginning with '@'
+.TP
+\fB\-\-noncanonical\-splices\fR=\fISTRING\fR
+Print non-canonical genomic gaps greater than 20 nt
+in CIGAR string as STRING. Allowed values: N (default), D.
+.TP
+\fB\-\-read\-group\-id\fR=\fISTRING\fR
+Value to put into read-group id (RG-ID) field
+.TP
+\fB\-\-read\-group\-name\fR=\fISTRING\fR
+Value to put into read-group name (RG-SM) field
+.SS
External map file options
.TP
\fB\-M\fR, \fB\-\-mapdir\fR=\fIdirectory\fR
@@ -161,6 +205,9 @@ Report hits from both strands of genome
.TP
\fB\-u\fR, \fB\-\-flanking\fR=\fIINT\fR
Show flanking hits (default 0)
+.TP
+\fB\-\-print\-comment\fR
+Show comment line for each hit
.SS
Alignment output options
.TP
@@ -181,10 +228,10 @@ Wrap length for alignment (default=50)
.SS
Help options
.TP
-\fB\-v\fR, \fB\-\-version\fR
+\fB\-\-version\fR
Show version
.TP
-\fB\-?\fR, \fB\-\-help\fR
+\fB\-\-help\fR
Show this help message
.SH ENVIRONMENT
.TP
diff --git a/debian/gsnap.1 b/debian/gsnap.1
index 059e5a1..bc725a3 100644
--- a/debian/gsnap.1
+++ b/debian/gsnap.1
@@ -1,4 +1,4 @@
-.TH GSNAP "1" "Aug 2010" "GMAP 2010-07-27" "User Commands"
+.TH GSNAP "1" "Mar 2011" "GMAP 2011-03-11" "User Commands"
.SH NAME
gsnap \- Genomic Short-read Nucleotide Alignment Program
.SH SYNOPSIS
@@ -19,10 +19,22 @@ Genome database
.TP
\fB\-q\fR, \fB\-\-part\fR=\fIINT/INT\fR
Process only the i\-th out of every n sequences
-e.g., 0/100 or 99/100
+e.g., 0/100 or 99/100 (useful for distributing jobs to a computer farm).
.TP
-\fB\-c\fR, \fB\-\-circular\-input\fR
-Circular\-end data (paired reads are on same strand)
+\fB\-\-input\-buffer\fR=\fIINT\fR
+Size of input buffer (program reads this many sequences
+at a time for efficiency) (default 1000)
+.TP
+\fB\-\-barcode\-length\fR=\fIINT\fR
+Amount of barcode to remove from start of read (default 0)
+.TP
+\fB\-\-pc\-linefeeds\fR
+Strip PC line feeds (ASCII 13) from input
+.TP
+\fB\-o\fR, \fB\-\-orientation=\fISTRING\fR
+Orientation of paired-end reads
+Allowed values: FR (fwd-rev, or typical Illumina; default),
+FR (rev-fwd, for circularized inserts), or FF (fwd-fwd, same strand)
.SS
Computation options
.PP
@@ -33,8 +45,15 @@ Also, indels, especially end indels, take longer to compute, although the algori
is still designed to be fast.
.TP
\fB\-B\fR, \fB\-\-batch\fR=\fIINT\fR
-Batch mode (0 = no pre\-loading, 1 = pre\-load only indices;
-2 (default) = pre\-load both indices and genome)
+ Mode Offsets Positions Genome
+ 0 allocate mmap mmap
+ 1 allocate mmap & preload mmap
+ 2 allocate mmap & preload mmap & preload (default)
+ 3 allocate allocate mmap & preload
+ 4 allocate allocate allocate
+
+Note: For a single sequence, all data structures use mmap.
+If mmap not available and allocate not chosen, then will use fileio (slow)
.TP
\fB\-m\fR, \fB\-\-max\-mismatches\fR=\fIFLOAT\fR
Maximum number of mismatches allowed (if not specified, then
@@ -45,8 +64,12 @@ of mismatches (including indel and splicing penalties)
For RNA-Seq, you may need to increase this value slightly
to align reads extending past the ends of an exon.
.TP
+\fB\-\-terminal\-penalty\fR=\fIINT\fR
+Penalty for a terminal alignment (alignment from one end of the read
+to the best possible position at the other end) (default 1)
+.TP
\fB\-i\fR, \fB\-\-indel\-penalty\fR=\fIINT\fR
-Penalty for an indel (default 1000, essentially turning it off).
+Penalty for an indel (default 1).
Counts against mismatches allowed. To find indels, make
indel\-penalty less than or equal to max\-mismatches
For 2\-base reads, need to set indel\-penalty somewhat high
@@ -63,12 +86,12 @@ Maximum number of middle deletions allowed (default 30)
\fB\-Y\fR, \fB\-\-max\-end\-insertions\fR=\fIINT\fR
Maximum number of end insertions allowed (default 3)
.TP
-\fB\-Y\fR, \fB\-\-max\-end\-deletions\fR=\fIINT\fR
+\fB\-Z\fR, \fB\-\-max\-end\-deletions\fR=\fIINT\fR
Maximum number of end deletions allowed (default 6)
.TP
-\fB\-M\fR, \fB\-\-suboptimal\-score\fR=\fIINT\fR
+\fB\-M\fR, \fB\-\-suboptimal\-levels\fR=\fIINT\fR
Report suboptimal hits beyond best hit (default 0)
-All hits with best score plus suboptimal\-score are reported
+All hits with best score plus suboptimal-levels are reported
.TP
\fB\-R\fR, \fB\-\-masking\fR=\fIINT\fR
Masking of frequent/repetitive oligomers to avoid spending time
@@ -82,25 +105,25 @@ on non\-unique or repetitive reads
\fB-a\fR, \fB--adapter-strip\fR=\fISTRING\fR
Method for removing adapters from reads. Currently allowed values: paired
.TP
-\fB\-T\fR, \fB\-\-trimlength\fR=\fIINT\fR
-Maximum amount of trimming of mismatches at ends (default is 1000;
+\fB\-\-trim\-mismatch\-score\fR=\fIINT\fR
+Score to use for mismatches when trimming at ends (default is -3;
to turn off trimming, specify 0)
.TP
-\fB\-2\fR, \fB\-\-dibase\fR
-Input is 2\-base encoded (e.g., SOLiD), with database built
-previously using dibaseindex)
-.TP
-\fB\-C\fR, \fB\-\-cmet\fR
-Use database for methylcytosine experiments, built
-previously using cmetindex)
+\fB\-V\fR, \fB\-\-snpsdir\fR=\fISTRING\fR
+Directory for SNPs index files (created using snpindex) (default is
+location of genome index files specified using -D and -d)
.TP
-\fB\-V\fR, \fB\-\-usesnps\fR=\fISTRING\fR
+\fB\-v\fR, \fB\-\-use\-snps\fR=\fISTRING\fR
Use database containing known SNPs (in <STRING>.iit, built
previously using snpindex) for tolerance to SNPs
.TP
-\fB\-g\fR, \fB\-\-geneprob\fR=\fISTRING\fR
-Use IIT file containing geneprob (in <STRING>.iit, of cumulative
-format >(count) (genomicpos) to resolve ties
+\fB\-C\fR, \fB\-\-cmetdir\fR=\fISTRING\fR
+Directory for methylcytosine index files (created using cmetindex)
+default is location of genome index files specified using -D, -V, and -d)
+.TP
+\fB\-c\fR, \fB\-\-cmet\fR
+Use database for methylcytosine experiments, built
+previously using cmetindex)
.TP
\fB\-t\fR, \fB\-\-nthreads\fR=\fIINT\fR
Number of worker threads
@@ -111,9 +134,23 @@ Splicing options for RNA\-Seq
Look for splicing involving known splice sites
(in <STRING>.iit), at short or long distances
.TP
+\fB\-S\fR, \fB\-\-splicetrie\-precompute\fR=\fIINT\fR
+Pre-compute splicetrie for all known splice sites
+(0=no, 1=yes (default)). Requires --splicesites flag
+and multiple sequence input.
+.TP
\fB\-N\fR, \fB\-\-novelsplicing\fR=\fIINT\fR
Look for novel splicing, not in known splice sites (if \fB\-s\fR provided)
-within shortsplicedist (\fB\-w\fR flag) or with novelspliceprob (\fB\-x\fR flag)
+.TP
+\fB\-\-novel\-doublesplices\fR
+Allow GSNAP to look for two splices in a single-end involving novel
+splice sites (default is not to allow this). Caution: this option
+can slow down the program considerably. A better way to detect
+double splices is with known splice sites, using the
+\fB\-\-splicesites\fR option.
+.TP
+\fB-w\fR, \fB\-\-localsplicedist\fR=\fIINT\fR
+Definition of local novel splicing event (default 200000)
.TP
\fB\-w\fR, \fB\-\-localsplicedist\fR=\fIINT\fR
Definition of local novel splicing event (default 200000)
@@ -132,17 +169,51 @@ Minimum length at end required for local spliced alignments (default 15, min is
\fB\-K\fR, \fB\-\-distant\-splice\-endlength\fR=\fIINT\fR
Minimum length at end required for distant spliced alignments (default 16, min is 14)
.TP
-\fB\-J\fR, \fB\-\-distant\-splice\-identity\fR=\fIFLOAT\fR
+\fB-l,\fR \fB\-\-shortend\-splice\-endlength\fR=\fIINT\fR
+Minimum length at end required for short-end spliced alignments (default 2)
+.TP
+\fB\-\-distant\-splice\-identity\fR=\fIFLOAT\fR
Minimum identity at end required for distant spliced alignments (default 0.95)
.SS
Options for paired\-end reads
.TP
-\fB\-P\fR, \fB\-\-pairmax\fR=\fIINT\fR
+\fB\-\-pairmax-dna\fR=\fIINT\fR
Max total genomic length for paired reads
-(default 1000). Should increase for RNA\-Seq reads.
+(default 1000). Should increase for RNA-Seq reads.
.TP
-\fB\-p\fR, \fB\-\-pairlength\fR=\fIINT\fR
-Expected paired\-end length (default 200)
+\fB\-\-pairmax\-rna\fR=\fIINT\fR
+Max total genomic length for RNA-Seq paired reads, or other reads
+that could have a splice (default 200000). Used if -N or -s is specified.
+Should probably match the value for -w, --localsplicedist.
+.TP
+\fB\-\-pairexpect\fR=\fIINT\fR
+Expected paired-end length (default 200)
+.TP
+\fB\-\-pairdev\fR=\fIINT\fR
+Allowable deviation from expected paired-end length, used for
+discriminating between alternative alignments (default 50)
+.SS
+Options for quality scores
+.TP
+\fB\-\-quality\-protocol\fR=\fISTRING\fR
+Protocol for input quality scores. Allowed values:
+
+ illumina (ASCII 64-126) (equivalent to -J 64 -j -31)
+ sanger (ASCII 33-126) (equivalent to -J 33 -j 0)
+
+Default is sanger (no quality print shift)
+SAM output files should have quality scores in sanger protocol
+
+Or you can customize this behavior with these flags:
+.TP
+\fB-J\fR, \fB\-\-quality\-zero\-score\fR=\fIINT\fR
+FASTQ quality scores are zero at this ASCII value
+(default is 33 for sanger protocol; for Illumina, select 64)
+.TP
+\fB-j\fR, \fB\-\-quality\-print\-shift\fR=\fIINT\fR
+Shift FASTQ quality scores by this amount in output
+(default is 0 for sanger protocol; to change Illumina input
+to Sanger output, select -31)
.SS
Output options
.TP
@@ -157,29 +228,51 @@ then nothing is printed.
Print output in same order as input (relevant
only if there is more than one worker thread)
.TP
-\fB\-S\fR, \fB\-\-print\-snps\fR=\fIINT\fR
-Print detailed information about SNPs in reads (works only if \fB\-V\fR also selected)
-(0=no (default), 1=positions and labels)
+\fB\-\-show\-refdiff\fR
+For GSNAP output in SNP-tolerant alignment, shows all differences
+relative to the reference genome as lower case (otherwise, it shows
+all differences relative to both the reference and alternate genome)
.TP
-\fB\-F\fR, \fB\-\-failsonly\fR
+\fB\-\-print\-snps\fR
+Print detailed information about SNPs in reads (works only if \fB\-v\fR also selected)
+(not fully implemented yet)
+.TP
+\fB\-\-failsonly\fR
Print only failed alignments, those with no results
.TP
-\fB\-f\fR, \fB\-\-nofails\fR
+\fB\-\-nofails\fR
Exclude printing of failed alignments
.TP
+\fB\-\-fails\-as\-input\fR=\fISTRING\fR
+Print completely failed alignments as input FASTA or FASTQ format
+Allowed values: yes, no
+.TP
\fB\-A\fR, \fB\-\-format\fR=\fISTRING\fR
Another format type, other than default.
Currently implemented: sam
+Also allowed, but not installed at compile-time: goby
+(To install, need to re-compile with appropriate options)
+.SS
+Options for SAM output
+.TP
+\fB\-\-no\-sam\-headers\fR
+Do not print headers beginning with '@'
+.TP
+\fB\-\-sam\-headers\-batch\fR=\fIINT\fR
+Print headers only for this batch, as specified by -q
+.TP
+\fB\-\-read\-group\-id\fR=\fISTRING\fR
+Value to put into read-group id (RG-ID) field
.TP
-\fB-j\fR, \fB--quality-shift\fR=\fIINT\fR
-Shift FASTQ quality scores by this amount in SAM output (default -31)
+\fB\-\-read\-group\-name\fR=\fISTRING\fR
+Value to put into read-group name (RG-SM) field
.SS
Help options
.TP
-\fB\-v\fR, \fB\-\-version\fR
+\fB\-\-version\fR
Show version
.TP
-\fB\-?\fR, \fB\-\-help\fR
+\fB\-\-help\fR
Show this help message
.SH ENVIRONMENT
.TP
diff --git a/debian/patches/install-data-local b/debian/patches/install-data-local
index ea8278b..56ee76a 100644
--- a/debian/patches/install-data-local
+++ b/debian/patches/install-data-local
@@ -2,7 +2,7 @@ Description: Add DESTDIR to install-data-local
--- gmap.orig/Makefile.in
+++ gmap/Makefile.in
-@@ -642,7 +642,7 @@
+@@ -650,7 +650,7 @@
install-data-local:
@@ -10,4 +10,4 @@ Description: Add DESTDIR to install-data-local
+ $(mkinstalldirs) $(DESTDIR)$(GMAPDB)
dist-hook:
- if test -d CVS; then \
+ # svn log -v --xml | ./svncl.pl > ChangeLog
--
Align mRNA and EST sequences to a genome
More information about the debian-med-commit
mailing list