[med-svn] r6030 - trunk/packages/transtermhp/trunk/debian
Alexandre Mestiashvili
malex-guest at alioth.debian.org
Sat Feb 19 20:10:21 UTC 2011
Author: malex-guest
Date: 2011-02-19 20:10:19 +0000 (Sat, 19 Feb 2011)
New Revision: 6030
Added:
trunk/packages/transtermhp/trunk/debian/2ndscore.1
trunk/packages/transtermhp/trunk/debian/transterm.1
trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides
trunk/packages/transtermhp/trunk/debian/transtermhp.manpages
Modified:
trunk/packages/transtermhp/trunk/debian/README.source
trunk/packages/transtermhp/trunk/debian/copyright
trunk/packages/transtermhp/trunk/debian/docs
trunk/packages/transtermhp/trunk/debian/transtermhp.install
Log:
some cleaning , added manual pages .
Added: trunk/packages/transtermhp/trunk/debian/2ndscore.1
===================================================================
--- trunk/packages/transtermhp/trunk/debian/2ndscore.1 (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/2ndscore.1 2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1,240 @@
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.ie \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. nr % 0
+. rr F
+.\}
+.el \{\
+. de IX
+..
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "2NDSCORE 1"
+.TH 2NDSCORE 1 "2011-02-19" "perl v5.10.1" "User Contributed Documentation"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+.Vb 1
+\& 2ndscore \- find the best hairpin anchored at each position.
+.Ve
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& 2ndscore in.fasta > out.hairpins
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+For every position in the sequence this will output a line:
+.PP
+.Vb 2
+\& \-0.6 52 .. 62 TTCCTAAAGGTTCCA GCG CAAAA TGC CATAAGCACCACATT
+\& (score) (start .. end) (left context) (hairpin) (right contenxt)
+.Ve
+.PP
+For positions near the ends of the sequences, the context may be padded with
+\&'x' characters. If no hairpin can be found, the score will be 'None'.
+.PP
+Multiple fasta files can be given and multiple sequences can be in each fasta
+file. The output for each sequence will be separated by a line starting with
+\&'>' and containing the \s-1FASTA\s0 description of the sequence.
+.PP
+Because the hairpin scores of the plus-strand and minus-strand may differ (due
+to \s-1GU\s0 binding in \s-1RNA\s0), by default 2ndscore outputs two sets of hairpins for
+every sequence: the \s-1FORWARD\s0 hairpins and the \s-1REVERSE\s0 hairpins. All the forward
+hairpins are output first, and are identified by having the word '\s-1FORWARD\s0' at
+the end of the '>' line preceding them. Similarly, the \s-1REVERSE\s0 hairpins are
+listed after a '>' line ending with '\s-1REVERSE\s0'. If you want to search only one
+or the other strand, you can use:
+.PP
+.Vb 2
+\& \-\-no\-fwd Don\*(Aqt print the FORWARD hairpins
+\& \-\-no\-rvs Don\*(Aqt print the REVERSE hairpins
+.Ve
+.PP
+You can set the energy function used, just as with transterm with the \-\-gc,
+\&\-\-au, \-\-gu, \-\-mm, \-\-gap options. The \-\-min\-loop, \-\-max\-loop, and \-\-max\-len
+options are also supported.
+.SS "\s-1FORMAT\s0 \s-1OF\s0 \s-1THE\s0 .BAG \s-1FILES\s0"
+.IX Subsection "FORMAT OF THE .BAG FILES"
+The columns for the .bag files are, in order:
+.PP
+.Vb 6
+\& 1. gene_name
+\& 2. terminator_start
+\& 3. terminator_end
+\& 4. hairpin_score
+\& 5. tail_score
+\& 6. terminator_sequence
+\&
+\& 7. terminator_confidence: a combination of the hairpin and tail score that
+\& takes into account how likely such scores are in a random sequence. This
+\& is the main "score" for the terminator and is computed as described in
+\& the paper.
+\&
+\& 8. APPROXIMATE_distance_from_end_of_gene: The *approximate* number of base
+\& pairs between the end of the gene and the start of the terminator. This
+\& is approximate in several ways: First, (and most important) TransTermHP
+\& doesn\*(Aqt always use the real gene ends. Depending on the options you give
+\& it may trim some off the ends of genes to handle terminators that
+\& partially overlap with genes. Second, where the terminator "begins"
+\& isn\*(Aqt that well defined. This field is intended only for a sanity check
+\& (terminators reported to be the best near the ends of genes shouldn\*(Aqt be
+\& _too far_ from the end of the gene).
+.Ve
+.SS "\s-1USING\s0 \s-1TRANSTERM\s0 \s-1WITHOUT\s0 \s-1GENOME\s0 \s-1ANNOTATIONS\s0"
+.IX Subsection "USING TRANSTERM WITHOUT GENOME ANNOTATIONS"
+TransTermHP uses known gene information for only 3 things: (1) tagging the
+putative terminators as either \*(L"inside genes\*(R" or \*(L"intergenic,\*(R" (2) choosing the
+background GC-content percentage to compute the scores, because genes often
+have different \s-1GC\s0 content than the intergenic regions, and (3) producing
+slightly more readable output. Items (1) and (3) are not really necessary, and
+(2) has no effect if your genes have about the same GC-content as your
+intergenic regions.
+.PP
+Unfortunately, TransTermHP doesn't yet have a simple option to run without an
+annotation file (either .ptt or .coords), and requires at least 2 genes to be
+present. The solution is to create fake, small genes that flank each
+chromosome. To do this, make a fake.coords file that contains only these two
+lines:
+.PP
+.Vb 2
+\& fakegene1 1 2 chome_id
+\& fakegene2 L\-1 L chrom_id
+.Ve
+.PP
+where L is the length of the input sequence and L\-1 is 1 less than the length
+of the input sequence. \*(L"chrom_id\*(R" should be the word directly following the \*(L">\*(R"
+in the .fasta file containing your sequence. (If, for example, your .fasta file
+began with \*(L">seq1\*(R", then chrom_id = seq1).
+.PP
+This creates a \*(L"fake\*(R" annotation with two 1\-base\-long genes flanking the
+sequence in a tail-to-tail arrangement: \-\-> <\-\-. TransTermHP can then be run
+with:
+.PP
+.Vb 1
+\& transterm \-p expterm.dat sequence.fasta fake.coords
+.Ve
+.PP
+If the G/C content of your intergenic regions is about the same as your genes,
+then this won't have too much of an effect on the scores terminators receive.
+On the other hand, this use of TransTermHP hasn't been tested much at all, so
+it's hard to vouch for its accuracy.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fItransterm\fR\|(1)
Modified: trunk/packages/transtermhp/trunk/debian/README.source
===================================================================
--- trunk/packages/transtermhp/trunk/debian/README.source 2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/README.source 2011-02-19 20:10:19 UTC (rev 6030)
@@ -1,8 +1,17 @@
transtermhp for Debian
----------------------
-test target is broken .
+7. PORTING NOTES
+If you want to run TransTermHP on a non-UNIX-like system, you should take note
+of the following:
+* gene-reader.cc assumes that the filename extension separators is "." and the
+ path separator is "/".
+* getopt_long() is used to process the command line arguments.
+
+
+
+
Modified: trunk/packages/transtermhp/trunk/debian/copyright
===================================================================
--- trunk/packages/transtermhp/trunk/debian/copyright 2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/copyright 2011-02-19 20:10:19 UTC (rev 6030)
@@ -6,6 +6,15 @@
Copyright: Steven Salzberg salzberg at umiacs.umd.edu
License: GPL-2.0+
+Files: 2ndscore.cc
+Copyright: 2005-2006 Carl Kingsford
+License: GPL-2.0+
+
+Files: transterm.cc
+Copyright: 2005-2006 Carl Kingsford
+License: GPL-2.0+
+
+
Files: debian/*
Copyright: 2011 Alex Mestiashvili <alex at biotec.tu-dresden.de>
License: GPL-2.0+
Modified: trunk/packages/transtermhp/trunk/debian/docs
===================================================================
--- trunk/packages/transtermhp/trunk/debian/docs 2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/docs 2011-02-19 20:10:19 UTC (rev 6030)
@@ -1,3 +1,2 @@
-LICENSE.txt
RELEASE-NOTES.txt
USAGE.txt
Added: trunk/packages/transtermhp/trunk/debian/transterm.1
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transterm.1 (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/transterm.1 2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1,392 @@
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.ie \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. nr % 0
+. rr F
+.\}
+.el \{\
+. de IX
+..
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "TRANSTERM 1"
+.TH TRANSTERM 1 "2011-02-19" "perl v5.10.1" "User Contributed Documentation"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+.Vb 1
+\& transterm \- Finds rho\-independent transcription terminators in bacterial genomes.
+.Ve
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+transterm \-p expterm.dat seq.fasta annotation.ptt > output.tt
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+Any number of fasta and annotation files can be listed but fasta files should
+come before annotation files. The type of the file is determined by the
+extension:
+.PP
+.Vb 2
+\& .ptt a GenBank ptt annotation file
+\& .coords or .crd a simple annotation file
+.Ve
+.PP
+Each line of a .coords or .crd file has the format:
+.PP
+.Vb 1
+\& gene_name start end chrom_id
+.Ve
+.PP
+The chrom_id specifies which sequence the annotation should apply to. For a
+\&.ptt file, the chrom_id is taken to be the filename with the path and
+extension removed. A filename with any other extension is assumed to be a
+fasta file.
+.PP
+When processing an annotation for a chromosom with id = \s-1ID\s0, the first word of
+the '>' lines of the input sequences are searched for \s-1ID\s0. Because there is no
+good standard for how the '>' line is formated, several heuristics are tried
+to find \s-1ID\s0 in the '>' line. In the order tried, they are:
+.PP
+.Vb 4
+\& >ID
+\& >junk|cmr:ID|junk or junk|ID|junk
+\& >junk|gi|ID|junk or >junk|gi|ID.junk|junk
+\& >junk:ID
+.Ve
+.PP
+The option '\-p expterm.dat' uses the newest confidence scheme, where
+expterm.dat is the path to the file of that name supplied with TransTermHP. If
+\&'\-p expterm.dat' is omited, the version 1.0 confidence scheme is used. See
+section '\s-1COMMAND\s0 \s-1LINE\s0 \s-1OPTIONS\s0' for more detail.
+.SS "\s-1FORMAT\s0 \s-1OF\s0 \s-1THE\s0 \s-1TRANSTERM\s0 \s-1OUTPUT\s0"
+.IX Subsection "FORMAT OF THE TRANSTERM OUTPUT"
+The organism's genes are listed sorted by their end coordinate and terminators
+are output between them. A terminator entry looks like this:
+.PP
+.Vb 2
+\& TERM 19 15310 \- 15327 \- F 99 \-12.7 \-4.0 |bidir
+\& (name) (start \- end) (sense)(loc) (conf) (hp) (tail) (notes)
+.Ve
+.PP
+where 'conf' is the overall confidence score, 'hp' is the hairpin score, and
+\&'tail' is the tail score. 'Conf' (which ranges from 0 to 100) is what you
+probably want to use to assess the quality of a terminator. Higher is better.
+The confidence, hp score, and tail scores are described in the paper cited
+above. 'Loc' gives type of region the terminator is in:
+.PP
+.Vb 6
+\& \*(AqG\*(Aq = in the interior of a gene (at least 50bp from an end),
+\& \*(AqF\*(Aq = between two +strand genes,
+\& \*(AqR\*(Aq = between two \-strand genes,
+\& \*(AqT\*(Aq = between the ends of a +strand gene and a \-strand gene,
+\& \*(AqH\*(Aq = between the starts of a +strand gene and a \-strand gene,
+\& \*(AqN\*(Aq = none of the above (for the start and end of the DNA)
+.Ve
+.PP
+Because of how overlapping genes are handled, these designations are not
+exclusive. 'G', 'F', or 'R' can also be given in lowercase, indicating that
+the terminator is on the opposite strand as the region. Unless the
+\&\-\-all\-context option is given, only candidate terminators that appear to be in
+an appropriate genome context (e.g. T, F, R) are output.
+.PP
+Following the \s-1TERM\s0 line is the sequence of the hairpin and the 5' and 3'
+tails, always written 5' to 3'.
+.SS "\s-1TRANSTERM\s0 \s-1COMMAND\s0 \s-1LINE\s0 \s-1OPTIONS\s0"
+.IX Subsection "TRANSTERM COMMAND LINE OPTIONS"
+You can also set how large a hairpin must be to be considered:
+.PP
+.Vb 2
+\& \-\-min\-stem=n Stem must be n nucleotides long
+\& \-\-min\-loop=n Loop portion of the hairpin must be at least n long
+.Ve
+.PP
+You can also set the maximum size of the hairpin that will be found:
+.PP
+.Vb 2
+\& \-\-max\-len=n Total extent of hairpin <= n NT long
+\& \-\-max\-loop=n The loop portion can be no longer than n
+.Ve
+.PP
+The maximum length is the total length for the hairpin portion (2 stems, 1
+loop) and does not include the U\-tail. It's measured in nuceotides in the
+input sequence, so because of gaps, the actual structure may be longer than
+max-len. Max-len must be less than the compiled-in constant \s-1REALLY_MAX_UP\s0
+(which by default is 1000). To increase the size of structures found recompile
+after increasing this constant.
+.PP
+TransTermHP assigns a score to the hairpin and tail portions of potential
+terminators. Lower scores are considered better. Many of the constants used in
+scoring hairpins can be set from the command line:
+.PP
+.Vb 5
+\& \-\-gc=f Score of a G\-C pair
+\& \-\-au=f Score of an A\-U pair
+\& \-\-gu=f Score of a G\-U pair
+\& \-\-mm=f Score of any other pair
+\& \-\-gap=f Score of a gap in the hairpin
+.Ve
+.PP
+The cost of loops of various lengths can be set using:
+.PP
+.Vb 1
+\& \-\-loop\-penalty=f1,f2,f3,f4,f5,...fn
+.Ve
+.PP
+where f1 is the cost of a loop of length \-\-min\-loop, f2 is the cost of a loop
+of length \-\-min\-loop+1, as so on. If there are too few terms to cover up to
+max-loop, the last term is repeated. Thus \-\-loop\-penalty=0,2 would assign cost
+0 to any loop of length min-loop, and 2 to any longer loop (up to max-loop,
+after which longer loops are given infinite scores). Extra terms are ignored.
+.PP
+Note that if you are using the \-\-pval\-conf confidence scheme (see below), you
+must regenerate the expterm.dat file if you change any of the above constants.
+.PP
+To weed out any potential terminator with tail or hairpin scores that are too
+large, you can use the following options:
+.PP
+.Vb 2
+\& \-\-max\-hp\-score=f Maximum allowable hairpin score
+\& \-\-max\-tail\-score=f Maximum allowable tail score
+.Ve
+.PP
+Terminator hairpins must be adjacent to a \*(L"U\-rich\*(R" region. You can adjust the
+constants the define what constitutes a U\-rich region. Using the options:
+.PP
+.Vb 2
+\& \-\-uwin\-size=s
+\& \-\-uwin\-require=r
+.Ve
+.PP
+requires that there are at least r 'U' nucleotides in the s\-nucleotide-long
+window adjacent to the hairpin. Again, if you change these constants, you
+should regenerate expterms.dat.
+.PP
+Before the main output, TransTermHP will output the values of the above options
+in a format suitable to be used on the command line.
+.PP
+In addition to the tail and hairpin scores, each possible terminator is
+assigned a confidence \-\-\- a value between 0 and 100 that indicates how likely
+it is that the sequence is a terminator. The scoring scheme needs a background
+file (supplied with TransTermHP) that is specified using:
+.PP
+.Vb 1
+\& \-\-pval\-conf expterms.dat
+.Ve
+.PP
+This will use the distribution in the file expterms.dat as the background. (You can
+abreivate this as \*(L"\-p expterms.dat\*(R".) Though the supplied expterms.dat file is
+derived from random sequences, any background distribution can be used by
+supplying your own expterms.dat file. See below for the format of
+expterms.dat. The values in expterms.dat depend on the scoring constants,
+definition of u\-rich regions, and the maximum allowed tail and hp scores.
+Thus, if you change any of these constants using the options above, you should
+regenerate expterms.dat.
+.PP
+The main output of TransTermHP is a list of terminators interleaved between a
+listing of the gene annotations that were provided as input. This output can
+be customized in a few ways:
+.PP
+.Vb 3
+\& \-S Don\*(Aqt output the terminator sequences
+\& \-\-min\-conf=n Only output terminators with confidence >= n (can
+\& abbreviate this as \-c n; default is 76.)
+.Ve
+.PP
+Additional analysis output can be obtained with the following options:
+.PP
+.Vb 3
+\& \-\-bag\-output file.bag Output the Best terminator After Gene
+\& \-\-t2t\-perf file.t2t Output a summary of which tail\-to\-tail regions
+\& have good terminators
+.Ve
+.SS "\s-1RECALIBRATING\s0 \s-1USING\s0 \s-1DIFFERENT\s0 \s-1PARAMETERS\s0"
+.IX Subsection "RECALIBRATING USING DIFFERENT PARAMETERS"
+As mentioned above, if you change any of the basic scoring function and search
+parameters and are using the version 2.0 confidence scheme (recommended) then
+you have to recompute the values in the expterm.dat file. If you have python
+installed this is easy (though perhaps time consuming). You can issue the
+command:
+.PP
+.Vb 1
+\& % calibrate.sh newexpterms.dat [OPTIONS TO TRANSTERM]
+.Ve
+.PP
+where \*(L"[\s-1OPTIONS\s0 \s-1TO\s0 \s-1TRANSTERM\s0]\*(R" are TransTermHP options (discussed above) that
+set the parameters to what you want them to be. After calibrate.sh finishes,
+newexpterms.dat will be in the current directory and can serve as an argument
+to \-p when using the same parameters you passed to calibrate.sh.
+.PP
+Note that for the newexpterms.dat to be valid, you must supply the same basic
+parameters to TransTermHP on subsequent runs. TransTerm (or newexpterms.dat)
+will not remember these parameters for you. The best way to handle this is to
+make a shell script wrapper around transterm that always passes in your new
+parameters.
+.PP
+Output formating parameters do not require regeneration of expterms.dat \-\-\-
+see discussion above for which parameters expterm.dat depends on.
+.PP
+calibrate.sh can be found in /usr/share/doc/transtermhp/examples directory.
+.SS "\s-1FORMAT\s0 \s-1OF\s0 \s-1THE\s0 \s-1EXPTERMS\s0.DAT \s-1FILE\s0"
+.IX Subsection "FORMAT OF THE EXPTERMS.DAT FILE"
+The 'pval\-conf' confidence scheme, selected with the option \*(L"\-\-pval\-conf
+expterms.dat\*(R" (or '\-p expterms.dat') computes the confidence of a terminator
+with \s-1HP\s0 energy E and tail energy T as follows. First, the ranges of \s-1HP\s0
+energies and tail energies are evenly divided into bins, and the appropriate
+bins e and t are found for E and T. Then the confidence is computed as
+described in [2].
+.PP
+The first line of expterms.dat contains 6 numbers:
+.PP
+.Vb 1
+\& seqlen num_bins
+.Ve
+.PP
+The (low_hp, high_hp) and (low_tail, high_tail) ranges give the bounds on the
+hairpin and tail scores. The integer num_bins gives the number of
+equally-sized bins into which those ranges are divided. Seqlen gives the
+length of the random sequence that was used to generate the data in the rest
+of the file.
+.PP
+Following this line are any number of (at, R, M) triples, where 'at' is the \s-1AT\s0
+content, R is a 4\-tuple (low_hp, high_hp, low_tail, high_tail) giving the
+range of the \s-1HP\s0 and tail scores observed in random sequences of this \s-1AT\s0
+content, and M is the distribution matrix. These (at, R, M) triples are
+formated as follows:
+.PP
+.Vb 5
+\& at low_hp high_hp low_tail high_tail
+\& n11 n12 n13 n14 ... n1,num_bins
+\& n21 ...
+\& ...
+\& n_num_bins,1 ...
+.Ve
+.PP
+The mu_r(e,t) term is computed by selecting the matrix with the at value
+closest to the computed \f(CW%AT\fR of the region r. If the total length of region r
+sequence is L_r, then
+.PP
+.Vb 1
+\& mu_r(e,t) = n_t_e * L_r/seqlen
+.Ve
+.PP
+where n_t_e is the entry in the t\-th row and e\-th column of the selected
+matrix, and seqlen is the first number in the first line of the file.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+2ndscore(1)
Modified: trunk/packages/transtermhp/trunk/debian/transtermhp.install
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transtermhp.install 2011-02-19 15:14:02 UTC (rev 6029)
+++ trunk/packages/transtermhp/trunk/debian/transtermhp.install 2011-02-19 20:10:19 UTC (rev 6030)
@@ -1,4 +1,6 @@
-transterm /usr/bin
-2ndscore /usr/bin
-calibrate.sh /usr/share/transtermhp
-make_expterm.py /usr/share/transtermhp
+transterm usr/bin
+2ndscore usr/bin
+calibrate.sh usr/share/doc/transtermhp/examples
+mfold_rna.sh usr/share/doc/transtermhp/examples
+make_expterm.py usr/share/doc/transtermhp/examples
+random_fasta.py usr/share/doc/transtermhp/examples
Added: trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/transtermhp.lintian-overrides 2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1,2 @@
+#scientific words are so long ...
+transtermhp: extended-description-line-too-long
Added: trunk/packages/transtermhp/trunk/debian/transtermhp.manpages
===================================================================
--- trunk/packages/transtermhp/trunk/debian/transtermhp.manpages (rev 0)
+++ trunk/packages/transtermhp/trunk/debian/transtermhp.manpages 2011-02-19 20:10:19 UTC (rev 6030)
@@ -0,0 +1 @@
+debian/*.1
More information about the debian-med-commit
mailing list