[med-svn] r6030 - trunk/packages/transtermhp/trunk/debian

Alexandre Mestiashvili malex-guest at alioth.debian.org
Sat Feb 19 20:10:21 UTC 2011


Author: malex-guest
Date: 2011-02-19 20:10:19 +0000 (Sat, 19 Feb 2011)
New Revision: 6030

Added:
   trunk/packages/transtermhp/trunk/debian/2ndscore.1
   trunk/packages/transtermhp/trunk/debian/transterm.1
   trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides
   trunk/packages/transtermhp/trunk/debian/transtermhp.manpages
Modified:
   trunk/packages/transtermhp/trunk/debian/README.source
   trunk/packages/transtermhp/trunk/debian/copyright
   trunk/packages/transtermhp/trunk/debian/docs
   trunk/packages/transtermhp/trunk/debian/transtermhp.install
Log:
some cleaning , added manual pages .


Added: trunk/packages/transtermhp/trunk/debian/2ndscore.1
===================================================================
--- trunk/packages/transtermhp/trunk/debian/2ndscore.1	                        (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/2ndscore.1	2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1,240 @@
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings.  \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+.    ds -- \(*W-
+.    ds PI pi
+.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
+.    ds L" ""
+.    ds R" ""
+.    ds C` ""
+.    ds C' ""
+'br\}
+.el\{\
+.    ds -- \|\(em\|
+.    ds PI \(*p
+.    ds L" ``
+.    ds R" ''
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.ie \nF \{\
+.    de IX
+.    tm Index:\\$1\t\\n%\t"\\$2"
+..
+.    nr % 0
+.    rr F
+.\}
+.el \{\
+.    de IX
+..
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
+.    \" fudge factors for nroff and troff
+.if n \{\
+.    ds #H 0
+.    ds #V .8m
+.    ds #F .3m
+.    ds #[ \f1
+.    ds #] \fP
+.\}
+.if t \{\
+.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+.    ds #V .6m
+.    ds #F 0
+.    ds #[ \&
+.    ds #] \&
+.\}
+.    \" simple accents for nroff and troff
+.if n \{\
+.    ds ' \&
+.    ds ` \&
+.    ds ^ \&
+.    ds , \&
+.    ds ~ ~
+.    ds /
+.\}
+.if t \{\
+.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+.    \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+.    \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+.    \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+.    ds : e
+.    ds 8 ss
+.    ds o a
+.    ds d- d\h'-1'\(ga
+.    ds D- D\h'-1'\(hy
+.    ds th \o'bp'
+.    ds Th \o'LP'
+.    ds ae ae
+.    ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "2NDSCORE 1"
+.TH 2NDSCORE 1 "2011-02-19" "perl v5.10.1" "User Contributed Documentation"
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+.Vb 1
+\&  2ndscore  \- find the best hairpin anchored at each position.
+.Ve
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& 2ndscore in.fasta > out.hairpins
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+For every position in the sequence this will output a line:
+.PP
+.Vb 2
+\&   \-0.6  52 ..  62      TTCCTAAAGGTTCCA  GCG CAAAA TGC  CATAAGCACCACATT
+\& (score) (start .. end) (left context)   (hairpin)      (right contenxt)
+.Ve
+.PP
+For positions near the ends of the sequences, the context may be padded with
+\&'x' characters. If no hairpin can be found, the score will be 'None'.
+.PP
+Multiple fasta files can be given and multiple sequences can be in each fasta
+file. The output for each sequence will be separated by a line starting with
+\&'>' and containing the \s-1FASTA\s0 description of the sequence.
+.PP
+Because the hairpin scores of the plus-strand and minus-strand may differ (due
+to \s-1GU\s0 binding in \s-1RNA\s0), by default 2ndscore outputs two sets of hairpins for
+every sequence: the \s-1FORWARD\s0 hairpins and the \s-1REVERSE\s0 hairpins. All the forward
+hairpins are output first, and are identified by having the word '\s-1FORWARD\s0' at
+the end of the '>' line preceding them. Similarly, the \s-1REVERSE\s0 hairpins are
+listed after a '>' line ending with '\s-1REVERSE\s0'. If you want to search only one
+or the other strand, you can use:
+.PP
+.Vb 2
+\&    \-\-no\-fwd    Don\*(Aqt print the FORWARD hairpins
+\&    \-\-no\-rvs    Don\*(Aqt print the REVERSE hairpins
+.Ve
+.PP
+You can set the energy function used, just as with transterm with the \-\-gc,
+\&\-\-au, \-\-gu, \-\-mm, \-\-gap options. The \-\-min\-loop, \-\-max\-loop, and \-\-max\-len
+options are also supported.
+.SS "\s-1FORMAT\s0 \s-1OF\s0 \s-1THE\s0 .BAG \s-1FILES\s0"
+.IX Subsection "FORMAT OF THE .BAG FILES"
+The columns for the .bag files are, in order:
+.PP
+.Vb 6
+\&        1. gene_name
+\&        2. terminator_start
+\&        3. terminator_end
+\&        4. hairpin_score
+\&        5. tail_score
+\&        6. terminator_sequence
+\&
+\&    7. terminator_confidence: a combination of the hairpin and tail score that
+\&       takes into account how likely such scores are in a random sequence. This
+\&       is the main "score" for the terminator and is computed as described in
+\&       the paper.
+\&
+\&    8. APPROXIMATE_distance_from_end_of_gene: The *approximate* number of base
+\&       pairs between the end of the gene and the start of the terminator. This
+\&       is approximate in several ways: First, (and most important) TransTermHP
+\&       doesn\*(Aqt always use the real gene ends. Depending on the options you give
+\&       it may trim some off the ends of genes to handle terminators that
+\&       partially overlap with genes. Second, where the terminator "begins"
+\&       isn\*(Aqt that well defined.  This field is intended only for a sanity check
+\&       (terminators reported to be the best near the ends of genes shouldn\*(Aqt be
+\&       _too far_ from the end of the gene).
+.Ve
+.SS "\s-1USING\s0 \s-1TRANSTERM\s0 \s-1WITHOUT\s0 \s-1GENOME\s0 \s-1ANNOTATIONS\s0"
+.IX Subsection "USING TRANSTERM WITHOUT GENOME ANNOTATIONS"
+TransTermHP uses known gene information for only 3 things: (1) tagging the
+putative terminators as either \*(L"inside genes\*(R" or \*(L"intergenic,\*(R" (2) choosing the
+background GC-content percentage to compute the scores, because genes often
+have different \s-1GC\s0 content than the intergenic regions, and (3) producing
+slightly more readable output. Items (1) and (3) are not really necessary, and
+(2) has no effect if your genes have about the same GC-content as your
+intergenic regions.
+.PP
+Unfortunately, TransTermHP doesn't yet have a simple option to run without an
+annotation file (either .ptt or .coords), and requires at least 2 genes to be
+present. The solution is to create fake, small genes that flank each
+chromosome. To do this, make a fake.coords file that contains only these two
+lines:
+.PP
+.Vb 2
+\&        fakegene1       1 2     chome_id
+\&        fakegene2       L\-1 L   chrom_id
+.Ve
+.PP
+where L is the length of the input sequence and L\-1 is 1 less than the length
+of the input sequence. \*(L"chrom_id\*(R" should be the word directly following the \*(L">\*(R"
+in the .fasta file containing your sequence. (If, for example, your .fasta file
+began with \*(L">seq1\*(R", then chrom_id = seq1).
+.PP
+This creates a \*(L"fake\*(R" annotation with two 1\-base\-long genes flanking the
+sequence in a tail-to-tail arrangement: \-\-> <\-\-. TransTermHP can then be run
+with:
+.PP
+.Vb 1
+\&        transterm \-p expterm.dat sequence.fasta fake.coords
+.Ve
+.PP
+If the G/C content of your intergenic regions is about the same as your genes,
+then this won't have too much of an effect on the scores terminators receive.
+On the other hand, this use of TransTermHP hasn't been tested much at all, so
+it's hard to vouch for its accuracy.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fItransterm\fR\|(1)

Modified: trunk/packages/transtermhp/trunk/debian/README.source
===================================================================
--- trunk/packages/transtermhp/trunk/debian/README.source	2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/README.source	2011-02-19 20:10:19 UTC (rev 6030)
@@ -1,8 +1,17 @@
 transtermhp for Debian
 ----------------------
 
-test target is broken .
+7. PORTING NOTES
 
+If you want to run TransTermHP on a non-UNIX-like system, you should take note
+of the following:
 
+* gene-reader.cc assumes that the filename extension separators is "." and the
+  path separator is "/".
 
+* getopt_long() is used to process the command line arguments.
 
+
+
+
+

Modified: trunk/packages/transtermhp/trunk/debian/copyright
===================================================================
--- trunk/packages/transtermhp/trunk/debian/copyright	2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/copyright	2011-02-19 20:10:19 UTC (rev 6030)
@@ -6,6 +6,15 @@
 Copyright: Steven Salzberg salzberg at umiacs.umd.edu
 License: GPL-2.0+
 
+Files: 2ndscore.cc
+Copyright: 2005-2006 Carl Kingsford 
+License: GPL-2.0+
+
+Files: transterm.cc
+Copyright: 2005-2006 Carl Kingsford
+License: GPL-2.0+
+
+
 Files: debian/*
 Copyright: 2011 Alex Mestiashvili <alex at biotec.tu-dresden.de>
 License: GPL-2.0+

Modified: trunk/packages/transtermhp/trunk/debian/docs
===================================================================
--- trunk/packages/transtermhp/trunk/debian/docs	2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/docs	2011-02-19 20:10:19 UTC (rev 6030)
@@ -1,3 +1,2 @@
-LICENSE.txt
 RELEASE-NOTES.txt
 USAGE.txt

Added: trunk/packages/transtermhp/trunk/debian/transterm.1
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transterm.1	                        (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/transterm.1	2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1,392 @@
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings.  \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+.    ds -- \(*W-
+.    ds PI pi
+.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
+.    ds L" ""
+.    ds R" ""
+.    ds C` ""
+.    ds C' ""
+'br\}
+.el\{\
+.    ds -- \|\(em\|
+.    ds PI \(*p
+.    ds L" ``
+.    ds R" ''
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.ie \nF \{\
+.    de IX
+.    tm Index:\\$1\t\\n%\t"\\$2"
+..
+.    nr % 0
+.    rr F
+.\}
+.el \{\
+.    de IX
+..
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
+.    \" fudge factors for nroff and troff
+.if n \{\
+.    ds #H 0
+.    ds #V .8m
+.    ds #F .3m
+.    ds #[ \f1
+.    ds #] \fP
+.\}
+.if t \{\
+.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+.    ds #V .6m
+.    ds #F 0
+.    ds #[ \&
+.    ds #] \&
+.\}
+.    \" simple accents for nroff and troff
+.if n \{\
+.    ds ' \&
+.    ds ` \&
+.    ds ^ \&
+.    ds , \&
+.    ds ~ ~
+.    ds /
+.\}
+.if t \{\
+.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+.    \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+.    \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+.    \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+.    ds : e
+.    ds 8 ss
+.    ds o a
+.    ds d- d\h'-1'\(ga
+.    ds D- D\h'-1'\(hy
+.    ds th \o'bp'
+.    ds Th \o'LP'
+.    ds ae ae
+.    ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "TRANSTERM 1"
+.TH TRANSTERM 1 "2011-02-19" "perl v5.10.1" "User Contributed Documentation"
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+.Vb 1
+\& transterm  \- Finds rho\-independent transcription terminators in bacterial genomes.
+.Ve
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+transterm \-p expterm.dat seq.fasta annotation.ptt > output.tt
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+Any number of fasta and annotation files can be listed but fasta files should
+come before annotation files. The type of the file is determined by the
+extension:
+.PP
+.Vb 2
+\&    .ptt               a GenBank ptt annotation file
+\&    .coords or .crd    a simple annotation file
+.Ve
+.PP
+Each line of a .coords or .crd file has the format:
+.PP
+.Vb 1
+\&    gene_name  start  end  chrom_id
+.Ve
+.PP
+The chrom_id specifies which sequence the annotation should apply to. For a
+\&.ptt file, the chrom_id is taken to be the filename with the path and
+extension removed. A filename with any other extension is assumed to be a
+fasta file.
+.PP
+When processing an annotation for a chromosom with id = \s-1ID\s0, the first word of
+the '>' lines of the input sequences are searched for \s-1ID\s0.  Because there is no
+good standard for how the '>' line is formated, several heuristics are tried
+to find \s-1ID\s0 in the '>' line. In the order tried, they are:
+.PP
+.Vb 4
+\&    >ID
+\&    >junk|cmr:ID|junk or junk|ID|junk
+\&    >junk|gi|ID|junk or >junk|gi|ID.junk|junk
+\&    >junk:ID
+.Ve
+.PP
+The option '\-p expterm.dat' uses the newest confidence scheme, where
+expterm.dat is the path to the file of that name supplied with TransTermHP. If
+\&'\-p expterm.dat' is omited, the version 1.0 confidence scheme is used. See
+section '\s-1COMMAND\s0 \s-1LINE\s0 \s-1OPTIONS\s0' for more detail.
+.SS "\s-1FORMAT\s0 \s-1OF\s0 \s-1THE\s0 \s-1TRANSTERM\s0 \s-1OUTPUT\s0"
+.IX Subsection "FORMAT OF THE TRANSTERM OUTPUT"
+The organism's genes are listed sorted by their end coordinate and terminators
+are output between them. A terminator entry looks like this:
+.PP
+.Vb 2
+\&    TERM 19  15310 \- 15327  \-      F     99      \-12.7 \-4.0 |bidir
+\&    (name)   (start \- end)  (sense)(loc) (conf) (hp) (tail) (notes)
+.Ve
+.PP
+where 'conf' is the overall confidence score, 'hp' is the hairpin score, and
+\&'tail' is the tail score. 'Conf' (which ranges from 0 to 100) is what you
+probably want to use to assess the quality of a terminator. Higher is better.
+The confidence, hp score, and tail scores are described in the paper cited
+above.  'Loc' gives type of region the terminator is in:
+.PP
+.Vb 6
+\&    \*(AqG\*(Aq = in the interior of a gene (at least 50bp from an end),
+\&    \*(AqF\*(Aq = between two +strand genes,
+\&    \*(AqR\*(Aq = between two \-strand genes,
+\&    \*(AqT\*(Aq = between the ends of a +strand gene and a \-strand gene,
+\&    \*(AqH\*(Aq = between the starts of a +strand gene and a \-strand gene,
+\&    \*(AqN\*(Aq = none of the above (for the start and end of the DNA)
+.Ve
+.PP
+Because of how overlapping genes are handled, these designations are not
+exclusive. 'G', 'F', or 'R' can also be given in lowercase, indicating that
+the terminator is on the opposite strand as the region.  Unless the
+\&\-\-all\-context option is given, only candidate terminators that appear to be in
+an appropriate genome context (e.g. T, F, R) are output.
+.PP
+Following the \s-1TERM\s0 line is the sequence of the hairpin and the 5' and 3'
+tails, always written 5' to 3'.
+.SS "\s-1TRANSTERM\s0 \s-1COMMAND\s0 \s-1LINE\s0 \s-1OPTIONS\s0"
+.IX Subsection "TRANSTERM COMMAND LINE OPTIONS"
+You can also set how large a hairpin must be to be considered:
+.PP
+.Vb 2
+\&    \-\-min\-stem=n    Stem must be n nucleotides long
+\&    \-\-min\-loop=n    Loop portion of the hairpin must be at least n long
+.Ve
+.PP
+You can also set the maximum size of the hairpin that will be found:
+.PP
+.Vb 2
+\&    \-\-max\-len=n     Total extent of hairpin <= n NT long
+\&    \-\-max\-loop=n    The loop portion can be no longer than n
+.Ve
+.PP
+The maximum length is the total length for the hairpin portion (2 stems, 1
+loop) and does not include the U\-tail. It's measured in nuceotides in the
+input sequence, so because of gaps, the actual structure may be longer than
+max-len.  Max-len must be less than the compiled-in constant \s-1REALLY_MAX_UP\s0
+(which by default is 1000). To increase the size of structures found recompile
+after increasing this constant.
+.PP
+TransTermHP assigns a score to the hairpin and tail portions of potential
+terminators. Lower scores are considered better. Many of the constants used in
+scoring hairpins can be set from the command line:
+.PP
+.Vb 5
+\&    \-\-gc=f       Score of a G\-C pair
+\&    \-\-au=f       Score of an A\-U pair
+\&    \-\-gu=f       Score of a G\-U pair
+\&    \-\-mm=f       Score of any other pair
+\&    \-\-gap=f      Score of a gap in the hairpin
+.Ve
+.PP
+The cost of loops of various lengths can be set using:
+.PP
+.Vb 1
+\&    \-\-loop\-penalty=f1,f2,f3,f4,f5,...fn
+.Ve
+.PP
+where f1 is the cost of a loop of length \-\-min\-loop, f2 is the cost of a loop
+of length \-\-min\-loop+1, as so on. If there are too few terms to cover up to
+max-loop, the last term is repeated. Thus \-\-loop\-penalty=0,2 would assign cost
+0 to any loop of length min-loop, and 2 to any longer loop (up to max-loop,
+after which longer loops are given infinite scores). Extra terms are ignored.
+.PP
+Note that if you are using the \-\-pval\-conf confidence scheme (see below), you
+must regenerate the expterm.dat file if you change any of the above constants.
+.PP
+To weed out any potential terminator with tail or hairpin scores that are too
+large, you can use the following options:
+.PP
+.Vb 2
+\&    \-\-max\-hp\-score=f    Maximum allowable hairpin score
+\&    \-\-max\-tail\-score=f  Maximum allowable tail score
+.Ve
+.PP
+Terminator hairpins must be adjacent to a \*(L"U\-rich\*(R" region. You can adjust the
+constants the define what constitutes a U\-rich region. Using the options:
+.PP
+.Vb 2
+\&    \-\-uwin\-size=s
+\&    \-\-uwin\-require=r
+.Ve
+.PP
+requires that there are at least r 'U' nucleotides in the s\-nucleotide-long
+window adjacent to the hairpin. Again, if you change these constants, you
+should regenerate expterms.dat.
+.PP
+Before the main output, TransTermHP will output the values of the above options
+in a format suitable to be used on the command line.
+.PP
+In addition to the tail and hairpin scores, each possible terminator is
+assigned a confidence \-\-\- a value between 0 and 100 that indicates how likely
+it is that the sequence is a terminator. The scoring scheme needs a background
+file (supplied with TransTermHP) that is specified using:
+.PP
+.Vb 1
+\&    \-\-pval\-conf expterms.dat
+.Ve
+.PP
+This will use the distribution in the file expterms.dat as the background. (You can
+abreivate this as \*(L"\-p expterms.dat\*(R".) Though the supplied expterms.dat file is
+derived from random sequences, any background distribution can be used by
+supplying your own expterms.dat file.  See below for the format of
+expterms.dat.  The values in expterms.dat depend on the scoring constants,
+definition of u\-rich regions, and the maximum allowed tail and hp scores.
+Thus, if you change any of these constants using the options above, you should
+regenerate expterms.dat.
+.PP
+The main output of TransTermHP is a list of terminators interleaved between a
+listing of the gene annotations that were provided as input. This output can
+be customized in a few ways:
+.PP
+.Vb 3
+\&    \-S              Don\*(Aqt output the terminator sequences
+\&    \-\-min\-conf=n    Only output terminators with confidence >= n (can
+\&                    abbreviate this as \-c n; default is 76.)
+.Ve
+.PP
+Additional analysis output can be obtained with the following options:
+.PP
+.Vb 3
+\&    \-\-bag\-output file.bag  Output the Best terminator After Gene
+\&    \-\-t2t\-perf file.t2t    Output a summary of which tail\-to\-tail regions
+\&                           have good terminators
+.Ve
+.SS "\s-1RECALIBRATING\s0 \s-1USING\s0 \s-1DIFFERENT\s0 \s-1PARAMETERS\s0"
+.IX Subsection "RECALIBRATING USING DIFFERENT PARAMETERS"
+As mentioned above, if you change any of the basic scoring function and search
+parameters and are using the version 2.0 confidence scheme (recommended) then
+you have to recompute the values in the expterm.dat file. If you have python
+installed this is easy (though perhaps time consuming). You can issue the
+command:
+.PP
+.Vb 1
+\&    % calibrate.sh newexpterms.dat [OPTIONS TO TRANSTERM]
+.Ve
+.PP
+where \*(L"[\s-1OPTIONS\s0 \s-1TO\s0 \s-1TRANSTERM\s0]\*(R" are TransTermHP options (discussed above) that
+set the parameters to what you want them to be. After calibrate.sh finishes,
+newexpterms.dat will be in the current directory and can serve as an argument
+to \-p when using the same parameters you passed to calibrate.sh.
+.PP
+Note that for the newexpterms.dat to be valid, you must supply the same basic
+parameters to TransTermHP on subsequent runs. TransTerm (or newexpterms.dat)
+will not remember these parameters for you. The best way to handle this is to
+make a shell script wrapper around transterm that always passes in your new
+parameters.
+.PP
+Output formating parameters do not require regeneration of expterms.dat \-\-\-
+see discussion above for which parameters expterm.dat depends on.
+.PP
+calibrate.sh can be found in /usr/share/doc/transtermhp/examples directory.
+.SS "\s-1FORMAT\s0 \s-1OF\s0 \s-1THE\s0 \s-1EXPTERMS\s0.DAT \s-1FILE\s0"
+.IX Subsection "FORMAT OF THE EXPTERMS.DAT FILE"
+The 'pval\-conf' confidence scheme, selected with the option \*(L"\-\-pval\-conf
+expterms.dat\*(R" (or '\-p expterms.dat') computes the confidence of a terminator
+with \s-1HP\s0 energy E and tail energy T as follows.  First, the ranges of \s-1HP\s0
+energies and tail energies are evenly divided into bins, and the appropriate
+bins e and t are found for E and T. Then the confidence is computed as
+described in [2].
+.PP
+The first line of expterms.dat contains 6 numbers:
+.PP
+.Vb 1
+\&   seqlen  num_bins
+.Ve
+.PP
+The (low_hp, high_hp) and (low_tail, high_tail) ranges give the bounds on the
+hairpin and tail scores. The integer num_bins gives the number of
+equally-sized bins into which those ranges are divided. Seqlen gives the
+length of the random sequence that was used to generate the data in the rest
+of the file.
+.PP
+Following this line are any number of (at, R, M) triples, where 'at' is the \s-1AT\s0
+content, R is a 4\-tuple (low_hp, high_hp, low_tail, high_tail) giving the
+range of the \s-1HP\s0 and tail scores observed in random sequences of this \s-1AT\s0
+content, and M is the distribution matrix.  These (at, R, M) triples are
+formated as follows:
+.PP
+.Vb 5
+\&   at  low_hp  high_hp  low_tail  high_tail
+\&   n11 n12 n13 n14 ... n1,num_bins
+\&   n21  ...
+\&   ...
+\&   n_num_bins,1 ...
+.Ve
+.PP
+The mu_r(e,t) term is computed by selecting the matrix with the at value
+closest to the computed \f(CW%AT\fR of the region r. If the total length of region r
+sequence is L_r, then
+.PP
+.Vb 1
+\&  mu_r(e,t) = n_t_e * L_r/seqlen
+.Ve
+.PP
+where n_t_e is the entry in the t\-th row and e\-th column of the selected
+matrix, and seqlen is the first number in the first line of the file.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+2ndscore(1)

Modified: trunk/packages/transtermhp/trunk/debian/transtermhp.install
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transtermhp.install	2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/transtermhp.install	2011-02-19 20:10:19 UTC (rev 6030)
@@ -1,4 +1,6 @@
-transterm	/usr/bin
-2ndscore	/usr/bin
-calibrate.sh	/usr/share/transtermhp
-make_expterm.py	/usr/share/transtermhp
+transterm	usr/bin
+2ndscore	usr/bin
+calibrate.sh	usr/share/doc/transtermhp/examples
+mfold_rna.sh    usr/share/doc/transtermhp/examples
+make_expterm.py	usr/share/doc/transtermhp/examples
+random_fasta.py usr/share/doc/transtermhp/examples

Added: trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides	                        (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides	2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1,2 @@
+#scientific words are so long ...
+transtermhp: extended-description-line-too-long

Added: trunk/packages/transtermhp/trunk/debian/transtermhp.manpages
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transtermhp.manpages	                        (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/transtermhp.manpages	2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1 @@
+debian/*.1




More information about the debian-med-commit mailing list