[med-svn] [SCM] gmap branch, master, updated. upstream/2011-12-28-34-ge4624a2

Andreas Tille tille at debian.org
Sun Apr 1 07:22:48 UTC 2012


The following commit has been merged in the master branch:
commit 7c28f90d045057de1fa0d807dfd1ce30897f784d
Author: Andreas Tille <tille at debian.org>
Date:   Sun Apr 1 09:19:06 2012 +0200

    I have the burning feeling that I missed something in the git-buildpackage docs because I somehow can not get back the debian/ dir.  So I just readd it and hope this does not break anything.

diff --git a/debian/README.Debian b/debian/README.Debian
new file mode 100644
index 0000000..3fcf07d
--- /dev/null
+++ b/debian/README.Debian
@@ -0,0 +1,5 @@
+Prebuilt genome databases for GMAP and GSNAP are available from the
+GMAP web site:
+http://research-pub.gene.com/gmap/
+
+ -- Shaun Jackman <sjackman at debian.org>  2010-04-28
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..a6732d3
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,68 @@
+gmap (2011-12-28-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Thu, 29 Dec 2011 12:32:19 -0800
+
+gmap (2011-11-30-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Wed, 07 Dec 2011 10:46:12 -0800
+
+gmap (2011-10-16-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Wed, 19 Oct 2011 15:40:36 -0700
+
+gmap (2011-09-14-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Thu, 15 Sep 2011 14:36:34 -0700
+
+gmap (2011-09-09-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Wed, 14 Sep 2011 17:09:21 -0700
+
+gmap (2011-08-15-1) unstable; urgency=low
+
+  * New upstream release.
+  * Bump Standards-Version to 3.9.2.
+
+ -- Shaun Jackman <sjackman at debian.org>  Tue, 23 Aug 2011 10:45:46 -0700
+
+gmap (2011-03-11-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Wed, 23 Mar 2011 10:16:38 -0700
+
+gmap (2010-07-27-1) unstable; urgency=low
+
+  * New upstream release.
+  * Bump Standards-Version to 3.9.1.
+
+ -- Shaun Jackman <sjackman at debian.org>  Fri, 06 Aug 2010 10:12:23 -0700
+
+gmap (2010-07-21-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Shaun Jackman <sjackman at debian.org>  Thu, 22 Jul 2010 11:36:34 -0700
+
+gmap (2010-07-20-1) unstable; urgency=low
+
+  * New upstream release.
+  * Bump Standards-Version to 3.9.0.
+
+ -- Shaun Jackman <sjackman at debian.org>  Wed, 21 Jul 2010 11:07:28 -0700
+
+gmap (2010-03-09-1) unstable; urgency=low
+
+  * Initial release. Closes: #580277.
+
+ -- Shaun Jackman <sjackman at debian.org>  Wed, 28 Apr 2010 11:55:31 -0700
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..7f8f011
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+7
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..c11fbfd
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,26 @@
+Source: gmap
+Section: non-free/science
+Priority: optional
+Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
+DM-Upload-Allowed: yes
+Uploaders: Shaun Jackman <sjackman at debian.org>
+Build-Depends: debhelper (>= 7.0.50~), autotools-dev
+Standards-Version: 3.9.2
+Homepage: http://research-pub.gene.com/gmap/
+Vcs-Git: git://git.debian.org/git/debian-med/gmap.git
+Vcs-Browser: http://git.debian.org/?p=debian-med/gmap.git
+
+Package: gmap
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}, ${perl:Depends}
+Description: spliced and SNP-tolerant alignment for mRNA and short reads
+ This package contains the programs GMAP and GSNAP as well as
+ utilities to manage genome databases in GMAP/GSNAP format.
+ GMAP (Genomic Mapping and Alignment Program) is a tool for aligning
+ EST, mRNA and cDNA sequences.
+ GSNAP (Genomic Short-read Nucleotide Alignment Program) is a tool for
+ aligning single-end and paired-end transcriptome reads.
+ Both tools can use a database of
+ * known splice sites and identify novel splice sites.
+ * known single-nucleotide polymorphisms (SNPs).
+ GSNAP can align bisulfite-treated DNA.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..465e2a2
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,50 @@
+Format-Specification: http://svn.debian.org/wsvn/dep/web/deps/dep5.mdwn?op=file&rev=135
+Name: GMAP
+Maintainer: Thomas Wu <twu at gene.com>, Colin K. Watanabe <ckw at gene.com>
+Source: http://research-pub.gene.com/gmap/
+
+Copyright: 2011 Genentech, Inc.
+License: other
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Package"), to use, copy, and distribute copies of the Package,
+ without modifications, provided that the above copyright notice and
+ this permission notice are included in all copies or substantial
+ portions of the Package.  Distribution of this Package as part of a
+ commercial software product requires prior arrangement with the
+ Developers.
+ .
+ Permission is also hereby granted, free of charge, to any person
+ obtaining a copy of this Package, to modify your copy or copies of the
+ Package or any portion of it, provided that you use the modified
+ Package only within your corporation or organization.  Distribution of
+ a modified version of this Package requires prior arrangement with the
+ Developers.
+ .
+ Genome databases, map files, and other result files produced as output
+ from software in this Package do not automatically fall under the
+ copyright of this Package, but belong to whoever generated them, and
+ may be distributed freely.
+ .
+ IN NO EVENT SHALL GENENTECH, INC. BE LIABLE TO ANY PARTY FOR DIRECT,
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR OTHER
+ LIABILITY, INCLUDING LOST PROFITS, ARISING FROM THE USE OF THIS
+ SOFTWARE.
+ .
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ GENENTECH, INC. HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
+ UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+Files: src/getopt*
+Copyright: 2002 Free Software Foundation, Inc.
+License: LGPL-2.1+
+ See `/usr/share/common-licenses/LGPL'.
+
+Files: debian/*
+Copyright: 2011 Shaun Jackman <sjackman at debian.org>
+License: ISC
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..e845566
--- /dev/null
+++ b/debian/docs
@@ -0,0 +1 @@
+README
diff --git a/debian/gmap.1 b/debian/gmap.1
new file mode 100644
index 0000000..37a581a
--- /dev/null
+++ b/debian/gmap.1
@@ -0,0 +1,325 @@
+.TH GMAP "1" "Nov 2011" "GMAP 2011-11-30" "User Commands"
+.SH NAME
+gmap \- Genomic Mapping and Alignment Program
+.SH SYNOPSIS
+.B gmap
+\fB-d\fR\fIDB\fR|\fB-g\fR\fIFASTA\fR [\fIOPTION\fR]... [\fIQUERY\fR]...
+.SH DESCRIPTION
+Align the sequences QUERY to the reference, specified with
+\fB-d\fR or \fB-g\fR.
+With no QUERY, read standard input.
+.SH OPTIONS
+.SS Input options
+.TP
+\fB\-D\fR, \fB\-\-dir\fR=\fIdirectory\fR
+Genome directory
+.TP
+\fB\-d\fR, \fB\-\-db\fR=\fISTRING\fR
+Genome database. If argument is '?' (with
+the quotes), this command lists available databases.
+.TP
+\fB-k\fR, \fB--kmer\fR=\fIINT\fR
+kmer size to use in genome database (allowed values: 12-15). If not
+specified, the program will find the highest available kmer size in
+the genome database
+.TP
+\fB\-G\fR, \fB\-\-genomefull\fR
+Use full genome (all ASCII chars allowed;
+built explicitly during setup), not
+compressed version
+.TP
+\fB\-g\fR, \fB\-\-gseg\fR=\fIfilename\fR
+User-supplied genomic segment
+.TP
+\fB-2\fR, \fB--pairalign\fR
+Align two sequences in FASTA format via stdin, first one being
+genomic and second one being cDNA
+.TP
+\fB--cmdline\fR=\fISTRING\fR,\fISTRING\fR
+Align these two sequences provided on the command line,
+first one being genomic and second one being cDNA
+.TP
+\fB\-q\fR, \fB\-\-part\fR=\fIINT\fR/\fIINT\fR
+Process only the i-th out of every n sequences
+e.g., 0/100 or 99/100 (useful for distributing jobs
+to a computer farm).
+.TP
+\fB\-\-input\-buffer\fR=\fIINT\fR
+Size of input buffer (program reads this many sequences
+at a time for efficiency) (default 1000)
+.SS
+Computation options
+.TP
+\fB\-B\fR, \fB\-\-batch\fR=\fIINT\fR
+ Mode     Offsets       Positions       Genome
+   0      allocate      mmap            mmap
+   1      allocate      mmap & preload  mmap
+   2      allocate      mmap & preload  mmap & preload (default)
+   3      allocate      allocate        mmap & preload
+   4      allocate      allocate        allocate
+   5      expand        allocate        allocate
+
+Note: For a single sequence, all data structures use mmap.
+If mmap not available and allocate not chosen, then will use fileio
+(very slow)
+.TP
+\fB--nosplicing\fR
+Turns off splicing (useful for aligning genomic sequences
+onto a genome)
+.TP
+\fB--min-intronlength\fR=\fIINT\fR
+Min length for one internal intron (default 9).  Below this size,
+a genomic gap will be considered a deletion rather than an intron.
+.TP
+\fB-K\fR, \fB--intronlength\fR=\fIINT\fR
+Max length for one internal intron (default 1000000)
+.TP
+\fB-w\fR, \fB--localsplicedist\fR=\fIINT\fR
+Max length for known splice sites at ends of sequence (default 200000)
+.TP
+\fB\-L\fR, \fB\-\-totallength\fR=\fIINT\fR
+Max total intron length (default 2400000)
+.TP
+\fB\-x\fR, \fB\-\-chimera-margin\fR=\fIINT\fR
+Amount of unaligned sequence that triggers
+search for the remaining sequence (default 40).
+Enables alignment of chimeric reads, and may help
+with some non-chimeric reads. To turn off, set to
+a large value (greater than the query length).
+.TP
+\fB\-t\fR, \fB\-\-nthreads\fR=\fIINT\fR
+Number of worker threads
+.TP
+\fB\-C\fR, \fB\-\-chrsubsetfile\fR=\fIfilename\fR
+User\-supplied chromosome subset file
+.TP
+\fB\-c\fR, \fB\-\-chrsubset\fR=\fIstring\fR
+Chromosome subset to search
+.TP
+\fB\-z\fR, \fB\-\-direction\fR=\fISTRING\fR
+cDNA direction (sense_force, antisense_force,
+sense_filter, antisense_filter, or auto (default))
+.TP
+\fB\-H\fR, \fB\-\-trimendexons\fR=\fIINT\fR
+Trim end exons with fewer than given number of matches
+(in nt, default 12)
+.TP
+\fB--cross-species\fR
+For cross-species alignments, use a more sensitive search for
+canonical splicing
+.TP
+\fB--canonical-mode\fR=\fIINT\fR
+Reward for canonical and semi-canonical introns
+0=low reward, 1=high reward (default), 2=low reward for
+high\-identity sequences and high reward otherwise
+.TP
+\fB--allow-close-indels\fR=\fIINT\fR
+Allow an insertion and deletion close to each other
+(0=no, 1=yes (default), 2=only for high-quality alignments)
+.TP
+\fB--microexon-spliceprob\fR=\fIFLOAT\fR
+Allow microexons only if one of the splice site probabilities is
+greater than this value (default 0.90)
+.TP
+\fB\-p\fR, \fB\-\-prunelevel\fR
+Pruning level: 0=no pruning (default), 1=poor seqs,
+2=repetitive seqs, 3=poor and repetitive
+.SS
+Output types
+.TP
+\fB\-S\fR, \fB\-\-summary\fR
+Show summary of alignments only
+.TP
+\fB\-A\fR, \fB\-\-align\fR
+Show alignments
+.TP
+\fB\-3\fR, \fB\-\-continuous\fR
+Show alignment in three continuous lines
+.TP
+\fB\-4\fR, \fB\-\-continuous-by-exon\fR
+Show alignment in three lines per exon
+.TP
+\fB\-Z\fR, \fB\-\-compress\fR
+Print output in compressed format
+.TP
+\fB\-E\fR, \fB\-\-exons\fR=\fISTRING\fR
+Print exons ("cdna" or "genomic")
+.TP
+\fB\-P\fR, \fB\-\-protein_dna\fR
+Print protein sequence (cDNA)
+.TP
+\fB\-Q\fR, \fB\-\-protein_gen\fR
+Print protein sequence (genomic)
+.TP
+\fB\-f\fR, \fB\-\-format\fR=\fIINT\fR
+Other format for output (also note the -A and -S options and other
+options listed under Output types):
+ psl (or 1)= PSL (BLAT) format,
+ gff3_gene (or 2)= GFF3 gene format,
+ gff3_match_cdna (or 3)= GFF3 cDNA_match format,
+ gff3_match_est (or 4) = GFF3 EST_match format,
+ splicesites (or 6) = splicesites output (for GSNAP splicing file),
+ introns = introns output (for GSNAP splicing file),
+ map_exons (or 7) = IIT FASTA exon map format,
+ map_genes (or 8) = IIT FASTA map format,
+ coords (or 9) = coords in table format,
+ sampe = SAM format (setting paired_read bit in flag),
+ samse = SAM format (without setting paired_read bit)
+.SS
+Output options
+.TP
+\fB\-n\fR, \fB\-\-npaths\fR=\fIINT\fR
+Maximum number of paths to show. If set to 0,
+prints two paths if chimera detected, else one.
+.TP
+\fB--quiet-if-excessive\fR
+If more than maximum number of paths are found, then nothing is
+printed.
+.TP
+\fB--suboptimal-score\fR=\fIINT\fR
+Report only paths whose score is within this value of the
+best path. By default, if this option is not provided,
+the program prints all paths found.
+.TP
+\fB\-O\fR, \fB\-\-ordered\fR
+Print output in same order as input (relevant
+only if there is more than one worker thread)
+.TP
+\fB\-5\fR, \fB\-\-md5\fR
+Print MD5 checksum for each query sequence
+.TP
+\fB\-o\fR, \fB\-\-chimera-overlap\fR
+Overlap to show, if any, at chimera breakpoint
+.TP
+\fB\-\-failsonly\fR
+Print only failed alignments, those with no results
+.TP
+\fB\-\-nofails\fR
+Exclude printing of failed alignments
+.TP
+\fB\-\-fails\-as\-input\fR=\fISTRING\fR
+Print completely failed alignments as input FASTA or FASTQ format
+Allowed values: yes, no
+.TP
+\fB\-V\fR, \fB\-\-usesnps\fR=\fISTRING\fR
+Use database containing known SNPs (in <STRING>.iit, built
+previously using snpindex) for reporting output
+.TP
+\fB\-\-split-output\fR=\fISTRING\fR
+Basename for multiple-file output, separately for nomapping,
+uniq, mult, (and chimera, if --chimera-margin is selected)
+.TP
+\fB--output-buffer-size\fR=\fIINT\fR
+Buffer size, in queries, for output thread (default 1000). When the
+number of results to be printed exceeds this size, the worker threads
+are halted until the backlog is cleared
+.TP
+\fB\-F\fR, \fB\-\-fulllength\fR
+Assume full\-length protein, starting with Met
+.TP
+\fB\-\-cdsstart\fR=\fIINT\fR
+Translate codons from given nucleotide (1-based)
+.TP
+\fB\-T\fR, \fB\-\-truncate\fR
+Truncate alignment around full\-length protein, Met to Stop
+Implies \fB\-F\fR flag.
+.TP
+\fB\-Y\fR, \fB\-\-tolerant\fR
+Translates cDNA with corrections for frameshifts
+.SS
+Options for SAM output
+.TP
+\fB\-\-no\-sam\-headers\fR
+Do not print headers beginning with '@'
+.TP
+\fB\-\-read\-group\-id\fR=\fISTRING\fR
+Value to put into read-group id (RG-ID) field
+.TP
+\fB\-\-read\-group\-name\fR=\fISTRING\fR
+Value to put into read-group name (RG-SM) field
+.TP
+\fB--read-group-library\fR=\fISTRING\fR
+Value to put into read-group library (RG-LB) field
+.TP
+\fB--read-group-platform\fR=\fISTRING\fR
+Value to put into read-group library (RG-PL) field
+.SS
+Options for quality scores
+.TP
+\fB--quality-protocol\fR=\fISTRING\fR
+Protocol for input quality scores. Allowed values:
+ illumina (ASCII 64-126) (equivalent to -J 64 -j -31)
+ sanger   (ASCII 33-126) (equivalent to -J 33 -j 0)
+
+Default is sanger (no quality print shift)
+SAM output files should have quality scores in sanger protocol.
+Or you can specify the print shift with this flag:
+.TP
+\fB-j\fR, \fB--quality-print-shift\fR=\fIINT\fR
+Shift FASTQ quality scores by this amount in output
+(default is 0 for sanger protocol; to change Illumina input to Sanger
+output, select -31)
+.SS
+External map file options
+.TP
+\fB\-M\fR, \fB\-\-mapdir\fR=\fIdirectory\fR
+Map directory
+.TP
+\fB\-m\fR, \fB\-\-map\fR=\fIiitfile\fR
+Map file. If argument is '?' (with the quotes),
+this lists available map files.
+.TP
+\fB\-e\fR, \fB\-\-mapexons\fR
+Map each exon separately
+.TP
+\fB\-b\fR, \fB\-\-mapboth\fR
+Report hits from both strands of genome
+.TP
+\fB\-u\fR, \fB\-\-flanking\fR=\fIINT\fR
+Show flanking hits (default 0)
+.TP
+\fB\-\-print\-comment\fR
+Show comment line for each hit
+.SS
+Alignment output options
+.TP
+\fB\-N\fR, \fB\-\-nolengths\fR
+No intron lengths in alignment
+.TP
+\fB\-I\fR, \fB\-\-invertmode\fR=\fIINT\fR
+Mode for alignments to genomic (\-) strand:
+ 0=Don't invert the cDNA (default)
+ 1=Invert cDNA and print genomic (\-) strand
+ 2=Invert cDNA and print genomic (+) strand
+.TP
+\fB\-i\fR, \fB\-\-introngap\fR=\fIINT\fR
+Nucleotides to show on each end of intron (default=3)
+.TP
+\fB\-l\fR, \fB\-\-wraplength\fR=\fIINT\fR
+Wrap length for alignment (default=50)
+.SS
+Help options
+.TP
+\fB\-\-version\fR
+Show version
+.TP
+\fB\-\-help\fR
+Show this help message
+.SH ENVIRONMENT
+.TP
+\fBGMAPDB\fR
+genome directory (eqivalent to \fB-D\fR)
+.SH FILES
+.TP
+~/.gmaprc
+configuration file
+.SH AUTHOR
+Thomas D. Wu and Colin K. Watanabe
+.SH "REPORTING BUGS"
+Report bugs to Thomas Wu <twu at gene.com>.
+.SH COPYRIGHT
+Copyright 2005 Genentech, Inc. All rights reserved.
+.SH "SEE ALSO"
+\fBgmap_setup\fR(1), \fBgsnap\fR(1)
+.br
+http://research-pub.gene.com/gmap/
diff --git a/debian/gmap_setup.1 b/debian/gmap_setup.1
new file mode 100644
index 0000000..aab9a93
--- /dev/null
+++ b/debian/gmap_setup.1
@@ -0,0 +1,102 @@
+.TH GMAP_SETUP "1" "Nov 2011" "GMAP 2011-11-30" "User Commands"
+.SH NAME
+gmap_setup \- create a genome database for GMAP or GSNAP
+.SH SYNOPSIS
+.B gmap_setup
+\fB-d\fR\fIgenomename\fR [\fB-D\fR\fIdestdir\fR]
+[\fB-o\fR\fIMakefile\fR] \fIFASTA\fR
+.SH OPTIONS
+.TP
+\fB\-d\fR
+genome name
+.TP
+\fB\-D\fR
+destination directory for installation (defaults to gmapdb directory specified at configure time)
+.TP
+\fB\-o\fR
+name of output Makefile (default is "Makefile.<genome>")
+.TP
+\fB\-M\fR
+use coordinates from an .md file (e.g., seq_contig.md file from NCBI)
+.TP
+\fB\-C\fR
+try to parse chromosomal coordinates from each FASTA header
+.TP
+\fB\-E\fR
+interpret argument as a command, instead of a list of FASTA files
+.TP
+\fB\-O\fR
+order chromosomes in numeric/alphabetic order (0 = no, 1 = yes (default))
+.SS Advanced options
+.TP
+\fB\-W\fR
+write some output directly to file, instead of using RAM (use only if RAM is limited)
+.TP
+\fB\-q\fR
+GMAP indexing interval (default: 3 nt)
+.TP
+\fB\-Q\fR
+PMAP indexing interval (default: 6 aa)
+.SH DESCRIPTION
+.PP
+If you want to treat each FASTA entry as a separate chromosome (either
+because it is in fact an entire chromosome or because you have contigs
+without any chromosomal information), you can simply call gmap_setup
+like this:
+.IP
+gmap_setup \fB\-d\fR <genome> <fasta_file>...
+.PP
+The accession of each FASTA header (the word following each ">") will
+be the name of each chromosome. GMAP can handle an unlimited number
+of "chromosomes", with arbitrarily long names. In this way, GMAP
+could be used as a general search program for near\-identity matches
+against a FASTA file.
+.TP
+\fB\-M\fR and \fB\-C\fR
+If your sequences represent contigs that have
+mapping information to specific chromosomal regions, then you can
+have gmap_setup try to read each header to determine its chromosomal
+region (the \fB\-C\fR flag) or read an .md file that contains information
+about chromosomal regions (the \fB\-M\fR flag). The .md files are often
+provided in NCBI releases, but since the formats change often,
+gmap_setup will prompt you to make sure it parses it correctly.
+.TP
+\fB\-E\fR
+If you need to pre\-process the FASTA files before using
+these programs, perhaps because they are compressed or because you
+need to insert chromosomal information in the header lines, you can
+specify a command instead of multiple fasta_files, like these
+examples:
+
+ gmap_setup \fB\-d\fR <genome> \fB\-E\fR 'gunzip \fB\-c\fR genomefiles.gz'
+ gmap_setup \fB\-d\fR <genome> \fB\-E\fR 'cat *.fa | ./add\-chromosomal\-info.pl'
+.TP
+\fB\-W\fR
+The gmap_setup process works best if you have a
+computer with enough RAM to hold the entire genome (e.g., 3
+gigabytes for a human\- or mouse\-sized genome). Since the resulting
+genome files work across all machine architectures, you can find any
+machine with sufficient RAM to build the genome files and then
+transfer the files to another machine. (GMAP itself runs fine on
+machines with limited RAM.) If you cannot find any machine with
+sufficient RAM for gmap_setup, you can run the program with the \fB\-W\fR
+flag to write the files directly, but this can be very slow.
+.TP
+\fB\-q\fR and \fB\-Q\fR
+If you specify a smaller interval (for example,
+3 for the GMAP interval), you can create a higher\-resolution
+database, which can be useful for mapping small oligomers (smaller
+than 18 nt). However, the corresponding genome index files will be
+larger (twice as big if you specify \fB\-q\fR 3). These index files may
+exceed the 2 gigabyte file offset limit on some computers, and will
+therefore fail to work on those computers.
+.SH AUTHOR
+Thomas D. Wu and Colin K. Watanabe
+.SH "REPORTING BUGS"
+Report bugs to Thomas Wu <twu at gene.com>.
+.SH COPYRIGHT
+Copyright 2005 Genentech, Inc. All rights reserved.
+.SH "SEE ALSO"
+\fBgmap\fR(1), \fBgsnap\fR(1)
+.br
+http://research-pub.gene.com/gmap/
diff --git a/debian/gsnap.1 b/debian/gsnap.1
new file mode 100644
index 0000000..1898337
--- /dev/null
+++ b/debian/gsnap.1
@@ -0,0 +1,421 @@
+.TH GSNAP "1" "Nov 2011" "GMAP 2011-11-30" "User Commands"
+.SH NAME
+gsnap \- Genomic Short-read Nucleotide Alignment Program
+.SH SYNOPSIS
+.B gsnap
+\fB-d\fR\fIDB\fR [\fIOPTION\fR]... [\fIQUERY\fR]...
+.SH DESCRIPTION
+Align the sequences QUERY to the reference DB.
+With no QUERY, read standard input.
+.SH OPTIONS
+.SS
+Input options
+.TP
+\fB\-D\fR, \fB\-\-dir\fR=\fIdirectory\fR
+Genome directory
+.TP
+\fB\-d\fR, \fB\-\-db\fR=\fISTRING\fR
+Genome database
+.TP
+\fB-k\fR, \fB--kmer\fR=\fIINT\fR
+kmer size to use in genome database (allowed values: 12-15). If not
+specified, the program will find the highest available kmer size in
+the genome database
+.TP
+\fB\-q\fR, \fB\-\-part\fR=\fIINT/INT\fR
+Process only the i\-th out of every n sequences
+e.g., 0/100 or 99/100 (useful for distributing jobs to a computer farm).
+.TP
+\fB\-\-input\-buffer\fR=\fIINT\fR
+Size of input buffer (program reads this many sequences
+at a time for efficiency) (default 1000)
+.TP
+\fB\-\-barcode\-length\fR=\fIINT\fR
+Amount of barcode to remove from start of read (default 0)
+.TP
+\fB\-o\fR, \fB\-\-orientation=\fISTRING\fR
+Orientation of paired-end reads
+Allowed values: FR (fwd-rev, or typical Illumina; default),
+RF (rev-fwd, for circularized inserts), or FF (fwd-fwd, same strand)
+.TP
+\fB--fastq-id-start\fR=\fIINT\fR
+Starting position of identifier in FASTQ header, space-delimited (>= 1)
+.TP
+\fB--fastq-id-end\fR=\fIINT\fR
+Ending position of identifier in FASTQ header, space-delimited (>= 1)
+ Examples:
+ @HWUSI-EAS100R:6:73:941:1973#0/1
+  start=1, end=1 (default)
+   => identifier is HWUSI-EAS100R:6:73:941:1973#0
+ @SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
+  start=1, end=1
+   => identifier is SRR001666.1
+  start=2, end=2
+   => identifier is 071112_SLXA-EAS1_s_7:5:1:817:345
+  start=1, end=2
+   => identifier is SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345
+.TP
+\fB--filter-chastity\fR=\fISTRING\fR
+Skips reads marked by the Illumina chastity program.  Expecting a string
+after the accession having a 'Y' after the first colon, like this:
+ @accession 1:Y:0:CTTGTA
+where the 'Y' signifies filtering by chastity.
+Values: off (default), either, both.  For 'either', a 'Y' on either end
+of a paired-end read will be filtered.  For 'both', a 'Y' is required
+on both ends of a paired-end read (or on the only end of a single-end read).
+.TP
+\fB--gunzip\fR
+Uncompress gzipped input files
+.SS
+Computation options
+.PP
+Note: GSNAP has an ultrafast algorithm for calculating mismatches up to and including
+((readlength+2)/kmer \- 2) ("ultrafast mismatches"). The program will run fastest if
+max\-mismatches (plus suboptimal\-levels) is within that value.
+Also, indels, especially end indels, take longer to compute, although the algorithm
+is still designed to be fast.
+.TP
+\fB\-B\fR, \fB\-\-batch\fR=\fIINT\fR
+ Mode     Offsets       Positions       Genome
+   0      allocate      mmap            mmap
+   1      allocate      mmap & preload  mmap
+   2      allocate      mmap & preload  mmap & preload (default)
+   3      allocate      allocate        mmap & preload
+   4      allocate      allocate        allocate
+   5      expand        allocate        allocate
+
+Note: For a single sequence, all data structures use mmap.
+If mmap not available and allocate not chosen, then will use fileio
+(very slow)
+.TP
+\fB\-m\fR, \fB\-\-max\-mismatches\fR=\fIFLOAT\fR
+Maximum number of mismatches allowed (if not specified, then
+defaults to the ultrafast level of ((readlength+2)/kmer \- 2))
+If specified between 0.0 and 1.0, then treated as a fraction
+of each read length. Otherwise, treated as an integral number
+of mismatches (including indel and splicing penalties)
+For RNA-Seq, you may need to increase this value slightly
+to align reads extending past the ends of an exon.
+.TP
+\fB--query-unk-mismatch\fR=\fIINT\fR
+Whether to count unknown (N) characters in the query as a mismatch
+(0=no (default), 1=yes)
+.TP
+\fB--genome-unk-mismatch\fR=\fIINT\fR
+Whether to count unknown (N) characters in the genome as a mismatch
+(0=no, 1=yes (default))
+.TP
+\fB--terminal-threshold\fR=\fIINT\fR
+Threshold for searching for a terminal alignment (from one end of the
+read to the best possible position at the other end) (default 2).
+For example, if this value is 2, then if GSNAP finds an exact or
+1-mismatch alignment, it will not try to find a terminal alignment.
+Note that this default value may not be low enough if you want to
+obtain terminal alignments for very short reads, although such reads
+probably don't have enough specificity for terminal alignments anyway.
+To turn off terminal alignments, set this to a high value, greater
+than the value for --max-mismatches.
+.TP
+\fB\-i\fR, \fB\-\-indel\-penalty\fR=\fIINT\fR
+Penalty for an indel (default 2).
+Counts against mismatches allowed. To find indels, make
+indel-penalty less than or equal to max-mismatches.
+A value < 2 can lead to false positives at read ends
+.TP
+\fB\-\-indel\-endlength\fR=\fIINT\fR
+Minimum length at end required for indel alignments (default 4)
+.TP
+\fB\-y\fR, \fB\-\-max\-middle\-insertions\fR=\fIINT\fR
+Maximum number of middle insertions allowed (default 9)
+.TP
+\fB\-z\fR, \fB\-\-max\-middle\-deletions\fR=\fIINT\fR
+Maximum number of middle deletions allowed (default 30)
+.TP
+\fB\-Y\fR, \fB\-\-max\-end\-insertions\fR=\fIINT\fR
+Maximum number of end insertions allowed (default 3)
+.TP
+\fB\-Z\fR, \fB\-\-max\-end\-deletions\fR=\fIINT\fR
+Maximum number of end deletions allowed (default 6)
+.TP
+\fB\-M\fR, \fB\-\-suboptimal\-levels\fR=\fIINT\fR
+Report suboptimal hits beyond best hit (default 0)
+All hits with best score plus suboptimal-levels are reported
+.TP
+\fB-a\fR, \fB--adapter-strip\fR=\fISTRING\fR
+Method for removing adapters from reads. Currently allowed values:
+off, paired.
+Default is "paired", which removes adapters from paired-end reads if a
+concordant or paired alignment cannot be found from the original read.
+To turn off, use the value "off".
+.TP
+\fB\-\-trim\-mismatch\-score\fR=\fIINT\fR
+Score to use for mismatches when trimming at ends (default is -3;
+to turn off trimming, specify 0). Warning: turning trimming off
+will give false positive mismatches at the ends of reads
+.TP
+\fB--trim-indel-score\fR=\fIINT\fR
+Score to use for indels when trimming at ends (default is -4;
+to turn off trimming, specify 0). Warning: turning trimming off
+will give false positive indels at the ends of reads
+.TP
+\fB\-V\fR, \fB\-\-snpsdir\fR=\fISTRING\fR
+Directory for SNPs index files (created using snpindex) (default is
+location of genome index files specified using -D and -d)
+.TP
+\fB\-v\fR, \fB\-\-use\-snps\fR=\fISTRING\fR
+Use database containing known SNPs (in <STRING>.iit, built
+previously using snpindex) for tolerance to SNPs
+.TP
+\fB\-\-cmetdir\fR=\fISTRING\fR
+Directory for methylcytosine index files (created using cmetindex)
+default is location of genome index files specified using -D, -V, and -d)
+.TP
+\fB--atoidir\fR=\fISTRING\fR
+Directory for A-to-I RNA editing index files (created using atoiindex)
+(default is location of genome index files specified using -D, -V, and
+-d)
+.TP
+\fB--mode\fR=\fISTRING\fR
+Alignment mode: standard (default), cmet-stranded, cmet-nonstranded,
+atoi-stranded, or atoi-nonstranded. Non-standard modes requires you
+to have previously run the cmetindex or atoiindex programs on the genome
+.TP
+\fB--tallydir\fR=\fISTRING\fR
+Directory for tally IIT file to resolve concordant multiple results
+(default is location of genome index files specified using -D and -d).
+Note: can just give full path name to --use\-tally instead.
+.TP
+\fB--use-tally\fR=\fISTRING\fR
+Use this tally IIT file to resolve concordant multiple results
+.TP
+\fB--runlengthdir\fR=\fISTRING\fR
+Directory for runlength IIT file to resolve concordant multiple
+results (default is location of genome index files specified using -D
+and -d).
+Note: can just give full path name to --use\-runlength instead.
+.TP
+\fB--use-runlength\fR=\fISTRING\fR
+Use this runlength IIT file to resolve concordant multiple results
+.TP
+\fB\-t\fR, \fB\-\-nthreads\fR=\fIINT\fR
+Number of worker threads
+.SS
+Options for GMAP alignment within GSNAP
+.TP
+\fB--gmap-mode\fR=\fISTRING\fR
+Cases to use GMAP for complex alignments containing multiple splices
+or indels.
+Allowed values: none, pairsearch, terminal, improve (or multiple,
+separated by commas). Default: pairsearch,terminal,improve
+.TP
+\fB--trigger-score-for-gmap\fR=\fIINT\fR
+Try GMAP pairsearch on nearby genomic regions if best score (the total
+of both ends if paired-end) exceeds this value (default 5)
+.TP
+\fB--max-gmap-pairsearch\fR=\fIINT\fR
+Perform GMAP pairsearch on nearby genomic regions up to this many
+many candidate ends (default 3). Requires pairsearch in --gmap-mode
+.TP
+\fB--max-gmap-terminal\fR=\fIINT\fR
+Perform GMAP terminal on nearby genomic regions up to this many
+candidate ends (default 3). Requires terminal in --gmap-mode
+.TP
+\fB--max-gmap-improvement\fR=\fIINT\fR
+Perform GMAP improvement on nearby genomic regions up to this many
+.TP
+\fB--microexon-spliceprob\fR=\fIFLOAT\fR
+Allow microexons only if one of the splice site probabilities is
+greater than this value (default 0.90)
+.SS
+Splicing options for RNA\-Seq
+.TP
+.TP
+\fB-N,\fR \fB--novelsplicing\fR=\fIINT\fR
+Look for novel splicing (0=no (default), 1=yes)
+.TP
+\fB--splicingdir\fR=\fISTRING\fR
+Directory for splicing involving known sites or known introns,
+as specified by the -s or --use-splicing flag (default is
+directory computed from -D and -d flags).
+Note: can just give full pathname to the -s flag instead.
+.TP
+\fB\-s\fR, \fB--use-splicing\fR=\fISTRING\fR
+Look for splicing involving known sites or known introns
+(in <STRING>.iit), at short or long distances.
+See README instructions for the distinction between known sites and
+known introns
+.TP
+\fB--ambig-splice-noclip\fR
+For ambiguous known splicing at ends of the read, do not clip at the
+splice site, but extend instead into the intron. This flag makes
+sense only if you provide the --use-splicing flag, and you are trying
+to eliminate all soft clipping with --trim-mismatch-score=0
+.TP
+\fB\-w\fR, \fB\-\-localsplicedist\fR=\fIINT\fR
+Definition of local novel splicing event (default 200000)
+.TP
+\fB\-e\fR, \fB\-\-local\-splice\-penalty\fR=\fIINT\fR
+Penalty for a local splice (default 0). Counts against mismatches allowed
+.TP
+\fB\-E\fR, \fB\-\-distant\-splice\-penalty\fR=\fIINT\fR
+Penalty for a distant splice (default 3). A distant splice is one where
+the intron length exceeds the value of -w, or --localsplicedist, or is an
+inversion, scramble, or translocation between two different chromosomes
+Counts against mismatches allowed
+.TP
+\fB\-K\fR, \fB\-\-distant\-splice\-endlength\fR=\fIINT\fR
+Minimum length at end required for distant spliced alignments (default 16, min
+allowed is the value of -k, or kmer size)
+.TP
+\fB-l,\fR \fB\-\-shortend\-splice\-endlength\fR=\fIINT\fR
+Minimum length at end required for short-end spliced alignments (default 2)
+but unless known splice sites are provided with the -s flag, GSNAP may still
+need the end length to be the value of -k, or kmer size to find a given splice
+.TP
+\fB\-\-distant\-splice\-identity\fR=\fIFLOAT\fR
+Minimum identity at end required for distant spliced alignments (default 0.95)
+.TP
+\fB--antistranded-penalty\fR=\fIINT\fR
+Penalty for antistranded splicing when using stranded RNA-Seq
+protocols. A positive value, such as 1, expects antisense on the
+first read and sense on the second read. Default is 0, which treats
+sense and antisense equally well
+.TP
+\fB--merge-distant-samechr\fR
+Report distant splices on the same chromosome as a single splice, if possible.
+Will produce a single SAM line instead of two SAM lines, which is also done
+for translocations, inversions, and scramble events
+.SS
+Options for paired\-end reads
+.TP
+\fB\-\-pairmax-dna\fR=\fIINT\fR
+Max total genomic length for DNA-Seq paired reads, or other reads
+without splicing (default 1000).  Used if -N or -s is not specified.
+.TP
+\fB\-\-pairmax\-rna\fR=\fIINT\fR
+Max total genomic length for RNA-Seq paired reads, or other reads
+that could have a splice (default 200000). Used if -N or -s is specified.
+Should probably match the value for -w, --localsplicedist.
+.TP
+\fB--pairexpect\fR=\fIINT\fR
+Expected paired-end length, used for calling splices in medial part of
+paired-end reads (default 200)
+.TP
+\fB--pairdev\fR=\fIINT\fR
+Allowable deviation from expected paired-end length, used for
+calling splices in medial part of paired-end reads (default 25)
+.SS
+Options for quality scores
+.TP
+\fB\-\-quality\-protocol\fR=\fISTRING\fR
+Protocol for input quality scores. Allowed values:
+
+ illumina (ASCII 64-126) (equivalent to -J 64 -j -31)
+ sanger   (ASCII 33-126) (equivalent to -J 33 -j 0)
+
+Default is sanger (no quality print shift)
+SAM output files should have quality scores in sanger protocol
+
+Or you can customize this behavior with these flags:
+.TP
+\fB-J\fR, \fB\-\-quality\-zero\-score\fR=\fIINT\fR
+FASTQ quality scores are zero at this ASCII value
+(default is 33 for sanger protocol; for Illumina, select 64)
+.TP
+\fB-j\fR, \fB\-\-quality\-print\-shift\fR=\fIINT\fR
+Shift FASTQ quality scores by this amount in output
+(default is 0 for sanger protocol; to change Illumina input
+to Sanger output, select -31)
+.SS
+Output options
+.TP
+\fB\-n\fR, \fB\-\-npaths\fR=\fIINT\fR
+Maximum number of paths to print (default 100).
+.TP
+\fB\-Q\fR, \fB\-\-quiet\-if\-excessive\fR
+If more than maximum number of paths are found,
+then nothing is printed.
+.TP
+\fB\-O\fR, \fB\-\-ordered\fR
+Print output in same order as input (relevant
+only if there is more than one worker thread)
+.TP
+\fB\-\-show\-refdiff\fR
+For GSNAP output in SNP-tolerant alignment, shows all differences
+relative to the reference genome as lower case (otherwise, it shows
+all differences relative to both the reference and alternate genome)
+.TP
+\fB--clip-overlap\fR
+For paired-end reads whose alignments overlap, clip the overlapping
+region.
+.TP
+\fB\-\-print\-snps\fR
+Print detailed information about SNPs in reads (works only if \fB\-v\fR also selected)
+(not fully implemented yet)
+.TP
+\fB\-\-failsonly\fR
+Print only failed alignments, those with no results
+.TP
+\fB\-\-nofails\fR
+Exclude printing of failed alignments
+.TP
+\fB\-\-fails\-as\-input\fR=\fISTRING\fR
+Print completely failed alignments as input FASTA or FASTQ format
+Allowed values: yes, no
+.TP
+\fB\-A\fR, \fB\-\-format\fR=\fISTRING\fR
+Another format type, other than default.
+Currently implemented: sam
+Also allowed, but not installed at compile-time: goby
+(To install, need to re-compile with appropriate options)
+.TP
+\fB--output-buffer-size\fR=\fIINT\fR
+Buffer size, in queries, for output thread (default 1000). When the
+number of results to be printed exceeds this size, the worker threads
+are halted until the backlog is cleared
+.SS
+Options for SAM output
+.TP
+\fB\-\-no\-sam\-headers\fR
+Do not print headers beginning with '@'
+.TP
+\fB\-\-sam\-headers\-batch\fR=\fIINT\fR
+Print headers only for this batch, as specified by -q
+.TP
+\fB\-\-read\-group\-id\fR=\fISTRING\fR
+Value to put into read-group id (RG-ID) field
+.TP
+\fB\-\-read\-group\-name\fR=\fISTRING\fR
+Value to put into read-group name (RG-SM) field
+.TP
+\fB--read-group-library\fR=\fISTRING\fR
+Value to put into read-group library (RG-LB) field
+.TP
+\fB--read-group-platform\fR=\fISTRING\fR
+Value to put into read-group library (RG-PL) field
+.SS
+Help options
+.TP
+\fB\-\-version\fR
+Show version
+.TP
+\fB\-\-help\fR
+Show this help message
+.SH ENVIRONMENT
+.TP
+\fBGMAPDB\fR
+genome directory (eqivalent to \fB-D\fR)
+.SH FILES
+.TP
+~/.gmaprc
+configuration file
+.SH AUTHOR
+Thomas D. Wu and Colin K. Watanabe
+.SH "REPORTING BUGS"
+Report bugs to Thomas Wu <twu at gene.com>.
+.SH COPYRIGHT
+Copyright 2005 Genentech, Inc. All rights reserved.
+.SH "SEE ALSO"
+\fBgmap_setup\fR(1), \fBgmap\fR(1)
+.br
+http://research-pub.gene.com/gmap/
diff --git a/debian/install b/debian/install
new file mode 100644
index 0000000..8dc8108
--- /dev/null
+++ b/debian/install
@@ -0,0 +1,24 @@
+usr/bin/gmap
+usr/bin/gsnap
+usr/bin/gmap_setup
+usr/bin/atoiindex /usr/lib/gmap
+usr/bin/cmetindex /usr/lib/gmap
+usr/bin/dbsnp_iit /usr/lib/gmap
+usr/bin/fa_coords /usr/lib/gmap
+usr/bin/get-genome /usr/lib/gmap
+usr/bin/gmap_build /usr/lib/gmap
+usr/bin/gmap_compress /usr/lib/gmap
+usr/bin/gmap_process /usr/lib/gmap
+usr/bin/gmap_reassemble /usr/lib/gmap
+usr/bin/gmap_uncompress /usr/lib/gmap
+usr/bin/gmapindex /usr/lib/gmap
+usr/bin/gsnap_tally /usr/lib/gmap
+usr/bin/gtf_genes /usr/lib/gmap
+usr/bin/gtf_splicesites /usr/lib/gmap
+usr/bin/iit_dump /usr/lib/gmap
+usr/bin/iit_get /usr/lib/gmap
+usr/bin/iit_store /usr/lib/gmap
+usr/bin/md_coords /usr/lib/gmap
+usr/bin/psl_genes /usr/lib/gmap
+usr/bin/psl_splicesites /usr/lib/gmap
+usr/bin/snpindex /usr/lib/gmap
diff --git a/debian/manpages b/debian/manpages
new file mode 100644
index 0000000..5ba1fda
--- /dev/null
+++ b/debian/manpages
@@ -0,0 +1,3 @@
+debian/gmap.1
+debian/gmap_setup.1
+debian/gsnap.1
diff --git a/debian/patches/install-data-local b/debian/patches/install-data-local
new file mode 100644
index 0000000..e09331b
--- /dev/null
+++ b/debian/patches/install-data-local
@@ -0,0 +1,13 @@
+Description: Add DESTDIR to install-data-local
+
+--- gmap.orig/Makefile.in
++++ gmap/Makefile.in
+@@ -653,7 +653,7 @@
+ 
+ 
+ install-data-local:
+-	$(mkinstalldirs) $(GMAPDB)
++	$(mkinstalldirs) $(DESTDIR)$(GMAPDB)
+ 
+ dist-hook:
+ #	svn log -v --xml | ./svncl.pl > ChangeLog
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..f01aaaa
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1 @@
+install-data-local
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..c15901b
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,12 @@
+#!/usr/bin/make -f
+
+export DH_OPTIONS
+
+%:
+	dh  $@
+
+override_dh_auto_configure:
+	dh_auto_configure -- --with-gmapdb=/var/cache/gmap
+
+override_dh_auto_install:
+	dh_auto_install --destdir=debian/tmp
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..163aaf8
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 0000000..6d4a198
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,2 @@
+version=3
+http://research-pub.gene.com/gmap/ src/gmap-gsnap-(.*)\.tar\.gz

-- 
Align mRNA and EST sequences to a genome



More information about the debian-med-commit mailing list