[med-svn] [Git][med-team/vmatch][master] 5 commits: more extended description
Sascha Steinbiss
gitlab at salsa.debian.org
Thu Jan 2 00:05:57 GMT 2020
Sascha Steinbiss pushed to branch master at Debian Med / vmatch
Commits:
4ae5e4c0 by Sascha Steinbiss at 2020-01-02T01:02:17+01:00
more extended description
- - - - -
5e7ad947 by Sascha Steinbiss at 2020-01-02T01:02:29+01:00
install .pr files in dev pkg
- - - - -
1f933a2e by Sascha Steinbiss at 2020-01-02T01:03:00+01:00
add man pages
- - - - -
0e22fabb by Sascha Steinbiss at 2020-01-02T01:03:16+01:00
include scripts outside /usr/bin
- - - - -
d5473510 by Sascha Steinbiss at 2020-01-02T01:05:35+01:00
include source for selector functions
- - - - -
16 changed files:
- debian/control
- debian/libvmatch-dev.install
- + debian/mansrc/chain2dim.1.adoc
- + debian/mansrc/matchcluster.1.adoc
- + debian/mansrc/mkdna6idx.1.adoc
- + debian/mansrc/mkvtree.1.adoc
- + debian/mansrc/vendian.1.adoc
- + debian/mansrc/vmatch.1.adoc
- + debian/mansrc/vmatchselect.1.adoc
- + debian/mansrc/vseqinfo.1.adoc
- + debian/mansrc/vseqselect.1.adoc
- + debian/mansrc/vstree2tex.1.adoc
- + debian/mansrc/vsubseqselect.1.adoc
- debian/rules
- debian/vmatch.install
- + debian/vmatch.manpages
Changes:
=====================================
debian/control
=====================================
@@ -32,4 +32,5 @@ Depends: ${shlibs:Depends},
${misc:Depends}
Description: development headers and static library for Vmatch
This package contains C development headers and a static library to
- build custom programs utilizing Vmatch.
+ build custom programs and selector functions utilizing Vmatch.
+ It also includes example source code for selector functions.
=====================================
debian/libvmatch-dev.install
=====================================
@@ -1,3 +1,5 @@
src/lib/libfiles/*.a /usr/lib
src/include/*.h /usr/include/vmatch
src/include/*.pr /usr/include/vmatch
+src/Vmatch/*.pr /usr/include/vmatch
+src/Vmatch/SELECT /usr/share/vmatch
=====================================
debian/mansrc/chain2dim.1.adoc
=====================================
@@ -0,0 +1,63 @@
+# chain2dim(1)
+
+## NAME
+
+chain2dim - two-dimensional match chaining
+
+## SYNOPSIS
+
+*chain2dim* [options] <matchfile>
+
+## OPTIONS
+
+*-global* <param>::
+ Global chaining. Optional parameter "gc" switches on gap costs (according to
+ L1-model). Optional parameter "ov" means that overlaps between matches are
+ allowed.
+
+*-local* <param>::
+ Compute local chains (according to L1-model).
+ If no parameter is given, compute local chains with maximum score.
+ If parameter is given, this must be a positive number optionally followed by
+ the character b or p. If only the number, say k, is given, this is the
+ minimum score of the chains output.
+ If a number is followed by character b, then output all chains with the
+ largest k scores. If a number is followed by character p, then output all
+ chains with scores at most k percent away from the best score.
+
+*-wf* <factor>::
+ Specify weight factor > 0.0 to obtain the score of a fragment. Requires one
+ of the options *-local*, *-global gc* or *-global ov*.
+
+*-maxgap* <width>::
+ Maximal width of gap in chain.
+
+*-outprefix* <prefix>::
+ Specify prefix of files to output chains.
+
+*-withinborders*::
+ Only compute chains which do not cross sequence borders (not possible for
+ matches in open format).
+
+*-thread* <keywords...>::
+ Thread the chains, i.e. close the gaps. Accepts an optional list of keywords
+ "minlen1 minlen2 maxerror1 maxerror2", each followed by a number specifies
+ the minimum length and the maximum error rate of thread.
+ 1 refers to match instance in indexed sequence, 2 refers to matching
+ instance in query.
+
+*-silent*::
+ Do not output the chains and only report their lengths and scores.
+
+*-v*::
+ Be verbose.
+
+*-version*::
+ Show the version of the Vmatch package.
+
+*-help*::
+ Show help.
+
+## SEE ALSO
+
+vmatch(1)
=====================================
debian/mansrc/matchcluster.1.adoc
=====================================
@@ -0,0 +1,33 @@
+# matchcluster (1)
+
+## NAME
+
+matchcluster - match clustering
+
+## SYNOPSIS
+
+*matchcluster* [options] <matchfile>
+
+## OPTIONS
+
+*-erate* <value>::
+ Specify maximum error rate in range [0,100] for similarity clustering.
+
+*-gapsize* <size>::
+ Specify maximum gap size for gap clustering.
+
+*-overlap* <percentage>::
+ Specify minimum percentage of overlap for overlap clustering.
+
+*-outprefix* <string>::
+ Specify prefix of files to output clusters.
+
+*-version*::
+ Show the version of the Vmatch package.
+
+*-help*::
+ Show help.
+
+## SEE ALSO
+
+vmatch(1)
=====================================
debian/mansrc/mkdna6idx.1.adoc
=====================================
@@ -0,0 +1,83 @@
+# mkdna6idx(1)
+
+## NAME
+
+mkdna6idx - generate a six frame translation index
+
+## SYNOPSIS
+
+*mkdna6idx* [options] <indexname>
+
+## DESCRIPTION
+
+*mkdna6idx* is very similar to *mkvtree*. While *mkvtree* can handle sequences
+over arbitrary alphabets, *mkdna6idx* requires DNA-sequences as input. It
+generates two indices, namely:
+
+* A flat index "indexname" for the the given DNA sequences. It mainly consists
+ of the two files "indexname.tis" and "indexname.ois". This index is mainly
+ used for output purpose.
+* An index "indexname.6fr" for the given DNA sequences translated in all six
+ reading frames.This is used for computing the matches.
+
+Please also see the Vmatch manual for a more detailed explanation of the usage.
+
+## OPTIONS
+
+*-db* <file>::
+ Specify database files (mandatory).
+
+*-smap* <file>::
+ Specify file containing a symbol mapping. This describes the grouping of
+ symbols. It is possible to set the environment variable MKVTREESMAPDIR
+ to the path where these files can be found.
+
+*-transnum* <table>::
+ Perform six frame translation. Specify codon translation table by a number
+ in the range [1,23] except for 7, 8, 17, 18, 19 and 20; (default is 1):
+
+ 1 Standard
+ 2 Vertebrate Mitochondrial
+ 3 Yeast Mitochondrial
+ 4 Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma
+ 5 Invertebrate Mitochondrial
+ 6 Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear
+ 9 Echinoderm Mitochondrial
+ 10 Euplotid Nuclear
+ 11 Bacterial
+ 12 Alternative Yeast Nuclear
+ 13 Ascidian Mitochondrial
+ 14 Flatworm Mitochondrial
+ 15 Blepharisma Macronuclear
+ 16 Chlorophycean Mitochondrial
+ 21 Trematode Mitochondrial
+ 22 Scenedesmus Obliquus Mitochondrial
+ 23 Thraustochytrium Mitochondrial
+
+*-indexname* <string>::
+ Specify name for index to be generated.
+
+*-cpl*::
+ Use reverse complement of the input sequence.
+
+*-tis*::
+ Output transformed input sequences (tistab) to file.
+
+*-ois*::
+ Output original input sequences (oistab) to file.
+
+*-maxdepth* <len>::
+ Restrict the sorting to prefixes of the given length.
+
+*-v*::
+ Verbose mode.
+
+*-version*::
+ Show the version of the Vmatch package
+
+*-help*::
+ Show help.
+
+## SEE ALSO
+
+mkvtree(1)
=====================================
debian/mansrc/mkvtree.1.adoc
=====================================
@@ -0,0 +1,131 @@
+# mkvtree(1)
+
+## NAME
+
+mkvtree - construct index for sequence
+
+## SYNOPSIS
+
+*mkvtree* [options]
+
+## DESCRIPTION
+
+The program *mkvtree* constructs an index for a given set of sequences. These
+are given as a list of input files. The sequences are referred to as database
+sequences. They can be over any given alphabet. The alphabet can be the DNA
+alphabet, or the protein alphabet, or any other alphabet consisting of
+printable characters. An alphabet is specified by a file storing a symbol
+mapping. The index consists of several files, the index files. Each such file
+stores a different table. The user specifies which tables (i.e. which part of
+the index) is written to a file, using one of eight output options, or a
+single option specifying that all tables are written to file.
+
+We support the following formats for the input files. They are recognized
+according to the first non-whitespace symbol in the file.
+
+* multiple FASTA format: If the file begins with the symbol ">", then this
+ file is considered to be a file in multiple FASTA format (i.e. it contains
+ one or more sequences). Each line starting with the symbol ">" contains
+ the description of the sequence following it. Each line not
+ starting with the symbol ">" contains the sequence. Empty lines are allowed
+ and ignored when reading the input.
+* multiple EMBL/SWISSPROT format: If the file begins with the string "ID",
+ then this file is considered to be a file in multiple EMBL format (i.e.
+ containing one or more sequences, each in EMBL format). The information
+ contained in the "ID" and "DE" lines is taken as the description of the
+ corresponding sequence. The EMBL format is identical to the SWISSPROT
+ format (w.r.t. the information we need to extract from such entries).
+ So one can also use files in multiple SWISSPROT format as input.
+* multiple GENBANK format: If the file begins with the string "LOCUS", then
+ this file is considered to be a file in multiple GENBANK format (i.e.
+ containing one or more entries in GENBANK format). The information
+ contained in the "LOCUS" and the "DEFINITION" lines is taken as the
+ description of the corresponding sequence.
+* plain format: If the file does not begin with the symbol ">" or the strings
+ "ID" or "LOCUS", then the file is taken verbatim. That is, the entire file
+ is considered to be the input sequence (whitespaces are not ignored).
+
+There is no special option necessary to tell the program the sequence format.
+It automatically detects the appropriate format, according to the rules given
+above. If none of the above rules apply, then the program cannot recognize the
+input format and exits with error code 1. In such a case please check you
+input files for if they are conform with the input formats above. Another good
+solution is to use a more versatile sequence format transformation programs
+(e.g. *readseq*) to first generate multiple FASTA files and then feed this
+into *mkvtree*.
+
+Today many files containing sequence files are provided compressed by the
+program *gzip*. To simplify the use of these files, *mkvtree* also accepts
+gzipped input files. These files must have the ending ".gz". The gzipped
+formatted files are gunzipped internally and then processed as any other
+file.
+
+## OPTIONS
+
+*-db* <file>::
+ Specify database files (mandatory).
+
+*-smap* <file>::
+ Specify file containing a symbol mapping. This describes the grouping of
+ symbols. It is possible to set the environment variable MKVTREESMAPDIR
+ to the path where these files can be found.
+
+*-dna*::
+ Input is DNA sequence.
+
+*-protein*::
+ Input is Protein sequence.
+
+*-indexname* <string>::
+ Specify name for index to be generated.
+
+*-pl* <length>::
+ Specify prefix length for bucket sort.
+ Recommendation: use without argument; then a reasonable prefix length is automatically determined.
+
+*-tis*::
+ Output transformed input sequences (tistab) to file.
+
+*-ois*::
+ Output original input sequences (oistab) to file.
+
+*-suf*::
+ Output suffix array (suftab) to file.
+
+*-sti1*::
+ Output reduced inverse suffix array (sti1tab) to file.
+
+*-bwt*::
+ Output Burrows-Wheeler Transformation (bwttab) to file.
+
+*-bck*::
+ Output bucket boundaries (bcktab) to file.
+
+*-skp*::
+ Output skip values (skptab) to file.
+
+*-lcp*::
+ Output longest common prefix lengths (lcptab) to file.
+
+*-allout*::
+ Output all index tables to files.
+
+*-maxdepth* <len>::
+ Restrict the sorting to prefixes of the given length.
+
+*-v*::
+ Verbose mode
+
+*-version*::
+ Show the version of the Vmatch package.
+
+*-help*::
+ Show help.
+
+## RETURNS
+
+If an error occurs, the program exits with error code 1. Otherwise, the exit code is 0.
+
+## SEE ALSO
+
+mkdna6idx(1)
=====================================
debian/mansrc/vendian.1.adoc
=====================================
@@ -0,0 +1,13 @@
+# vendian(1)
+
+## NAME
+
+vendian - helper tool for endianness conversion
+
+## SYNOPSIS
+
+*vendian* bytes filename
+
+## DESCRIPTION
+
+This is used by the *vmigrate.sh* script to perform index conversion.
=====================================
debian/mansrc/vmatch.1.adoc
=====================================
@@ -0,0 +1,293 @@
+# vmatch(1)
+
+## NAME
+
+vmatch - solve matching tasks
+
+## SYNOPSIS
+
+*vmatch* [options] indexname
+
+## DESCRIPTION
+
+
+The program *vmatch* allows one to solve a multitude of different matching
+tasks over an index constructed by *mkvtree*. Each matching task is solved by
+a combination of options specifying
+
+* the input,
+* the kind of matches sought,
+* additional constraints on the matches,
+* the direction of the matches (in case of DNA),
+* the kind of postprocessing to be done,
+* the output mode and output format.
+
+Additionally, if there is more than one algorithm to solve a certain matching
+task, *vmatch* allows to specify which algorithm is to be used.
+*vmatch* allows to compute the following kinds of matches:
+
+. match all substrings of the database sequences against itself. The matches
+ can be one of the following kinds:
+.. branching tandem repeats, i.e. repeats where the two instances of the
+ repeat occur at consecutive positions
+.. maximal repeats, i.e. pairs of maximal substrings occurring more than
+ once in the database sequences
+.. supermaximal repeats, i.e. pairs of maximal substrings occurring more than
+ once in the database sequences, but not in any other maximal repeat
+. match a set of query sequences (given in an extra query file) against the
+ index. The matches can be one of the following kinds:
+.. maximal substring matches, i.e. the substrings of the query sequences
+ matching substrings of the database sequences. All matches exceeding some
+ minimum length,extended maximally to the left and to the right, are reported.
+.. maximal unique matches, i.e. the substrings of the query sequences matching
+ substrings of the database sequences. A match is reported if it is unique in
+ the database sequences as well as in the query sequences.
+.. complete matches, i.e. a query sequence must completely match (i.e. from the
+ first character to the last character) a substring of the database sequences.
+
+For all these match kinds, the matches themselves can be direct or palindromic
+(i.e. on the reverse strand, in case of DNA sequences). If required, DNA
+sequences are translated into six reading frames and the matches are computed
+on the protein level, and reported on the DNA level. Besides exact matches,
+also degenerate matches with a maximal number of errors (insertions, deletions,
+and mismatches) are supported. Moreover, degenerate matches can be derived
+from exact matches by extending these using a greedy extension strategy. This
+does not apply to complete matches. For all different match kinds, the matches
+delivered by *vmatch* can be selected according to their E-value, their
+identity value, or their match score.
+
+In the default case, a match is reported as a formatted row of numbers,
+containing its lengths, the positions where it occurs, the E-value, the number
+of errors it contains, the match score, and the identity value. Optionally, an
+alignment of the sequences that are involved in the match can be reported.
+An important feature of *vmatch* is the capability of directly postprocessing
+the matches found in the following ways:
+
+. inverse output, i.e. report substrings of the database sequences or the query
+ sequences not covered by a match
+. masking substrings of the database sequences or the query sequences covered
+ by a match
+. clustering of a set of database sequences according to the matches found
+ between these sequences. The output of this option can be a representation of
+ the clusters, or a set of sequences each being representative for a cluster.
+. chaining of a set of matches, i.e. finding optimal subsets of all matches
+ which do not cross
+. clustering of matches according to the pairwise similarities on the sequences
+ involved inthe match
+. clustering of matches according to the positions where they occur
+
+Finally, to accommodate many more kinds of user defined post processing tasks,
+*vmatch* provides the concept of selection functions. These provide an open
+interface which allow arbitrary on-the-fly postprocessing of the matches
+without output and parsing of the matches. For more details on this concept,
+see the manual.
+
+## OPTIONS
+
+*-q* <file>::
+ Specify files containing queries to be matched.
+
+*-dnavsprot* <table>::
+ Perform six frame translation. Specify codon translation table by a number
+ in the range [1,23] except for 7, 8, 17, 18, 19 and 20; (default is 1):
+ 1 Standard
+ 2 Vertebrate Mitochondrial
+ 3 Yeast Mitochondrial
+ 4 Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma
+ 5 Invertebrate Mitochondrial
+ 6 Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear
+ 9 Echinoderm Mitochondrial
+ 10 Euplotid Nuclear
+ 11 Bacterial
+ 12 Alternative Yeast Nuclear
+ 13 Ascidian Mitochondrial
+ 14 Flatworm Mitochondrial
+ 15 Blepharisma Macronuclear
+ 16 Chlorophycean Mitochondrial
+ 21 Trematode Mitochondrial
+ 22 Scenedesmus Obliquus Mitochondrial
+ 23 Thraustochytrium Mitochondrial
+
+*-tandem*::
+ Compute right branching tandem repeats.
+
+*-supermax*::
+ Compute supermaximal matches.
+
+*-mum*::
+ Compute maximal unique matches.
+
+*-complete*::
+ Specify that query sequences must match completely.
+
+*-dbnomatch* <arg>::
+ Mask all database substrings containing a match; optional argument:
+ * keepleft means to not mask the left instance
+ of a match
+ * keepright means to not mask the right instance
+ of a match
+ * keepleftifsamesequence means to not mask the left instance
+ of the match if the right instance occurs
+ in the same sequence
+ * keeprightifsamesequence means to not mask the right instance
+ of the match if the left instance occurs
+ in the same sequence
+
+*-qnomatch*::
+ Show all query substrings not containing a match.
+
+*-dbmaskmatch* <arg>::
+ Mask all database substrings containing a match; optional argument:
+ * keepleft means to not mask the left instance
+ of a match
+ * keepright means to not mask the right instance
+ of a match
+ * keepleftifsamesequence means to not mask the left instance
+ of the match if the right instance occurs
+ in the same sequence
+ * keeprightifsamesequence means to not mask the right instance
+ of the match if the left instance occurs
+ in the same sequence
+
+*-qmaskmatch*::
+ Mask all query substrings containing a match.
+
+*-pp*::
+ Generic postprocessing of matches.
+
+*-online*::
+ Run algorithms online without using the index.
+
+*-qspeedup* <level>::
+ Specify speedup level when matching queries (0: fast, 2: faster; default is 2),
+ beware of time/space tradeoff.
+
+*-d*::
+ Compute direct matches (default).
+
+*-p*::
+ Compute palindromic (i.e. reverse complemented matches).
+
+*-h* <dist>::
+ Specify the allowed hamming distance > 0. In combination with option
+ *-complete* one can switch on the percentage search mode or the best
+ search mode for the percentage search mode use an argument of the
+ form ip (where i is a positive integer). This means that up to
+ i*100/m mismatches are allowed in a match of a query of length m.
+ For the best search mode use an argument of the form ib where i is a
+ positive integer. This means that in a first phase the minimum threshold q
+ is determined such that there is still a match with q mismatches. q is in
+ the range 0 to i*100/m.
+
+*-e* <dist>::
+ Specify the allowed edit distance > 0. In combination with option
+ *-complete* one can switch on the percentage search mode or the best
+ search mode for the percentage search mode use an argument of the
+ form ip (where i is a positive integer). This means that up to
+ i*100/m differences are allowed in a match of a query of length m.
+ For the best search mode use an argument of the form ib where i is a
+ positive integer. This means that in a first phase the minimum threshold q
+ is determined such that there is still a match with q differences. q is in
+ the range 0 to i*100/m.
+
+*-allmax*::
+ Show all maximal matches in the order of their computation.
+
+*-seedlength* <length>::
+ Specify the seed length.
+
+*-hxdrop* <value>::
+ Specify the xdrop value for hamming distance extension.
+
+*-exdrop* <value>::
+ Specify the xdrop value for edit distance extension.
+
+*-i*::
+ Give information about number of different matches.
+
+*-dbcluster* <args>::
+ Cluster the database sequences.
+ * first argument is percentage of shorter string
+ to be included in match,
+ * second argument is percentage of larger string
+ to be included in match,
+ * third optional argument is filenameprefix,
+ * fourth optional argument is (minclustersize, maxclustersize)
+
+*-nonredundant*::
+ Generate file with non-redundant set of sequences; only works together
+ with option *-dbcluster*.
+
+*-selfun* <file>::
+ Specify shared object file containing selection function.
+
+*-l* <length>::
+ Specify that match must have the given length, optionally specify minimum
+ and maximum size of gaps between repeat instances.
+
+*-leastscore* <score>::
+ Specify the minimum score of a match.
+
+*-evalue* <value>::
+ Specify the maximum E-value of a match.
+
+*-identity* <value>::
+ Specify minimum identity of match in range [1..100%].
+
+*-sort* <mode>::
+ Sort the matches, additional argument is mode:
+ la: ascending order of length
+ ld: descending order of length
+ ia: ascending order of first position
+ id: descending order of first position
+ ja: ascending order of second position
+ jd: descending order of second position
+ ea: ascending order of Evalue
+ ed: descending order of Evalue
+ sa: ascending order of score
+ sd: descending order of score
+ ida: ascending order of identity
+ idd: descending order of identity
+
+*-best* <n>::
+ Show the best matches (those with smallest E-values), default is best 50.
+
+*-s*::
+ Show the alignment of matching sequences.
+
+*-showdesc*::
+ Show sequence description of match.
+
+*-f*::
+ Show filename where match occurs.
+
+*-absolute*::
+ Show absolute positions.
+
+*-nodist*::
+ Do not show distance of match.
+
+*-noevalue*::
+ Do not show E-value of match.
+
+*-noscore*::
+ Do not show score of match.
+
+*-noidentity*::
+ Do not show identity of match.
+
+*-v*::
+ Verbose mode.
+
+*-version*::
+ Show the version of the Vmatch package.
+
+*-help*::
+ Show basic options.
+
+*-help+*::
+ Show all options.
+
+## SEE ALSO
+
+vmatchselect(1)
=====================================
debian/mansrc/vmatchselect.1.adoc
=====================================
@@ -0,0 +1,135 @@
+# vmatchselect(1)
+
+## NAME
+
+vmatchselect - sort and select matches
+
+## SYNOPSIS
+
+*vmatchselect* [options] matchfile
+
+## DESCRIPTION
+
+*vmatchselect* allows one to select interesting matches from the output of
+vmatch as specified by user-defined criteria. It delivers matches of chosen
+length, degeneracy or significance into further analysis routines.
+
+*vmatchselect* removes from the input all those matches that are contained in
+another match. To do this efficiently, the matches are sorted by their
+position in the database sequence, and hence in the order in which the matches
+are output, unless the user specifies otherwise. Moreover, the sequences of
+the virtual suffix tree for which the match filewas produced can be clustered
+according to the matches. The input for *vmatchselect* is a file produced by
+vmatch, called a match file.
+
+The output of *vmatchselect* goes to standard output and is sorted in
+ascending order of the positions of the left instance of a match. Two matches
+where the left instance occurs at the same position, are sorted in descending
+order of their length. Two matches of the same length where the left instance
+occurs in the same position, are sorted in ascending order of the position of
+the right instance of the match.
+
+*vmatchselect* provides a subset of the options of *vmatch*.
+The main difference to *vmatch* is that *vmatchselect* gets the matches from
+a match file, while *vmatch* computes the matches from scratch. Therefore
+options specifying the index and/or the query sequences to be matched, as well
+as options specifying how to match are not available in *vmatchselect*.
+The options of *vmatchselect* have the same meaning as in the program *vmatch*.
+Thus, for a description, see the corresponding documentation. Note that
+*vmatchselect* also allows to use the option "-dbcluster". If *vmatchselect*
+is called with this option, then it parses the given match file and performs
+single linkage clustering based on the matches in this file.
+Thus *vmatch* and *vmatchselect* allow to perform hierarchical clustering.
+In a first step an initial set of matches with loose matching criteria is
+computed, using *vmatch*. Then one clusters these matches by calling
+*vmatchselect*. In a second round one applies more strict choices for the
+matches by the using the options "-l", "-leastscore", "-evalue", or
+"-identity", etc. This allows stepwise refinement of clusters without much
+computational effort and no new index construction for the sequence of a
+cluster. The output of *vmatchselect* is the same as the output of *vmatch*.
+
+## OPTIONS
+
+*-dbcluster* <args>::
+ Cluster the database sequences.
+ * first argument is percentage of shorter string
+ to be included in match,
+ * second argument is percentage of larger string
+ to be included in match,
+ * third optional argument is filenameprefix,
+ * fourth optional argument is (minclustersize, maxclustersize)
+
+*-nonredundant*::
+ Generate file with non-redundant set of sequences; only works together
+ with option *-dbcluster*.
+
+*-selfun* <file>::
+ Specify shared object file containing selection function.
+
+*-l* <length>::
+ Specify that match must have the given length, optionally specify minimum
+ and maximum size of gaps between repeat instances.
+
+*-leastscore* <score>::
+ Specify the minimum score of a match.
+
+*-evalue* <value>::
+ Specify the maximum E-value of a match.
+
+*-identity* <value>::
+ Specify minimum identity of match in range [1..100%].
+
+*-sort* <mode>::
+ Sort the matches, additional argument is mode:
+ la: ascending order of length
+ ld: descending order of length
+ ia: ascending order of first position
+ id: descending order of first position
+ ja: ascending order of second position
+ jd: descending order of second position
+ ea: ascending order of Evalue
+ ed: descending order of Evalue
+ sa: ascending order of score
+ sd: descending order of score
+ ida: ascending order of identity
+ idd: descending order of identity
+
+*-best* <n>::
+ Show the best matches (those with smallest E-values), default is best 50.
+
+*-s*::
+ Show the alignment of matching sequences.
+
+*-showdesc*::
+ Show sequence description of match.
+
+*-f*::
+ Show filename where match occurs.
+
+*-absolute*::
+ Show absolute positions.
+
+*-nodist*::
+ Do not show distance of match.
+
+*-noevalue*::
+ Do not show E-value of match.
+
+*-noscore*::
+ Do not show score of match.
+
+*-noidentity*::
+ Do not show identity of match.
+
+*-v*::
+ Verbose mode.
+
+*-version*::
+ Show the version of the Vmatch package.
+
+*-help*::
+ Show help.
+
+## SEE ALSO
+
+vmatch(1)
=====================================
debian/mansrc/vseqinfo.1.adoc
=====================================
@@ -0,0 +1,19 @@
+# vseqinfo(1)
+
+## NAME
+
+vseqinfo - obtain sequence information from index
+
+## SYNOPSIS
+
+*vseqinfo* indexname
+
+## DESCRIPTION
+
+*vseqinfo* echoes for each database sequence its length and its description.
+The program has no options. It takes exactly one argument, namely the index
+name. The output goes to standard output.
+
+## SEE ALSO
+
+vseqselect(1)
=====================================
debian/mansrc/vseqselect.1.adoc
=====================================
@@ -0,0 +1,37 @@
+# vseqselect(1)
+
+## NAME
+
+vseqselect - print selected sequences from index
+
+## SYNOPSIS
+
+*vseqselect* [options] indexname
+
+## DESCRIPTION
+
+The program *vseqselect* selects sequences from a given index and prints them
+on standard output.
+
+## OPTIONS
+
+*-minlength*::
+ Specify the minimal length of the sequences to be selected.
+
+*-maxlength* <length>::
+ Specify the maximal length of the sequences to be selected.
+
+*-randomnum* <n>::
+ Specify the number of random sequences to be selected.
+
+*-randomlength* <length>::
+ Specify the minimal total length of the random sequences to be selected.
+
+*-seqnum* <filename>::
+ Select the sequences with numbers given in filename.
+
+*-version*::
+ Show the version of the Vmatch package
+
+*-help*::
+ Show help.
=====================================
debian/mansrc/vstree2tex.1.adoc
=====================================
@@ -0,0 +1,81 @@
+# vstree2tex(1)
+
+## NAME
+
+vstree2tex - pretty-print a virtual tree
+
+## SYNOPSIS
+
+*vstree2tex* [options] indexname
+
+## DESCRIPTION
+
+The program *vstree2tex* produces a representation of a virtual suffix tree
+in LATEX format and print it to standard output. Note that *vstree2tex*
+should only be used for very small indexes since it produces large output
+files.
+
+Suppose the total length of all sequences in the index is n. If the option
+*-s* is not used, then the output size of *vstree2tex* is about 10n bytes
+per option (plus some constant number of bytes for the header and the footer
+of the LATEX file). If the option *-s* is used, then the size of the output
+is proportional to n^2.
+
+The program is mainly designed for debugging a program based on the index and
+for educational purposes.
+
+## OPTIONS
+
+*-s*::
+ Output suffixes.
+
+*-tis*::
+ Output tistab.
+
+*-ois*::
+ Output oistab.
+
+*-suf*::
+ Output suftab.
+
+*-sti1*::
+ Output small inverse suftab.
+
+*-bwt*::
+ Output bwttab.
+
+*-bck*::
+ Output bcktab in vertical mode.
+
+*-bckhz*::
+ Output bcktab in horizontal mode.
+
+*-lcp*::
+ Output lcptab.
+
+*-skp*::
+ Output skptab.
+
+*-cfr*::
+ Output cfrtab.
+
+*-crf*::
+ Output crftab.
+
+*-lsf*::
+ Output lsftab.
+
+*-sti*::
+ Output inverse suftab.
+
+*-cld*::
+ Output cldtab.
+
+*-iso*::
+ Output isotab.
+
+*-version*::
+ Show the version of the Vmatch package.
+
+*-help*::
+ Show help.
\ No newline at end of file
=====================================
debian/mansrc/vsubseqselect.1.adoc
=====================================
@@ -0,0 +1,40 @@
+# vsubseqselect(1)
+
+## NAME
+
+vsubseqselect - print selected subsequences from index
+
+## SYNOPSIS
+
+*vsubseqselect* [options] indexname
+
+## DESCRIPTION
+
+The program *vseqselect* selects subsequences from a given index and prints
+them on standard output, either line by line or in FASTA format. The selection
+can either be random or according to position ranges specified by the user.
+
+Please refer to the manual for more detailed explanations.
+
+## OPTIONS
+
+*-minlength*::
+ Specify the minimal length of the substrings to be selected.
+
+*-maxlength* <length>::
+ Specify the maximal length of the substrings to be selected.
+
+*-snum* <n>::
+ Specify the number of random substrings to be selected.
+
+*-range* <pos> <pos>::
+ Specify the first and last position of the substring to be selected.
+
+*-seq* <length> <number> <pos>::
+ Specify length, number, and relative position of the substring to be selected.
+
+*-version*::
+ Show the version of the Vmatch package
+
+*-help*::
+ Show help.
=====================================
debian/rules
=====================================
@@ -1,6 +1,6 @@
#!/usr/bin/make -f
-# DH_VERBOSE := 1
+DH_VERBOSE := 1
export LC_ALL=C.UTF-8
export DEB_BUILD_MAINT_OPTIONS=hardening=+all
export PATH:=$(PATH):$(CURDIR)/src/bin
@@ -9,6 +9,9 @@ export WORKVSTREESRC=$(CURDIR)/src
%:
dh $@
+override_dh_auto_clean:
+ rm -rf debian/man
+
override_dh_auto_build:
cd src && mklink.sh linux-gcc-64
cd src && make licensemanager=no $*
@@ -18,7 +21,7 @@ override_dh_auto_install:
dh_auto_install
override_dh_installman:
- #mkdir -p $(CURDIR)/debian/man
- #asciidoctor -a docdate='' -b manpage $(CURDIR)/debian/man_src/*.adoc
- #cp $(CURDIR)/debian/man_src/*.? $(CURDIR)/debian/man
+ mkdir -p $(CURDIR)/debian/man
+ asciidoctor -a docdate='' -b manpage $(CURDIR)/debian/mansrc/*.adoc
+ mv $(CURDIR)/debian/mansrc/*.? $(CURDIR)/debian/man
dh_installman --
=====================================
debian/vmatch.install
=====================================
@@ -1,12 +1,12 @@
src/dist/vmatch/chain2dim /usr/bin
-#src/dist/vmatch/cleanpp.sh /usr/bin
+src/dist/vmatch/cleanpp.sh /usr/share/vmatch
src/dist/vmatch/matchcluster /usr/bin
src/dist/vmatch/mkdna6idx /usr/bin
src/dist/vmatch/mkvtree /usr/bin
src/dist/vmatch/vendian /usr/bin
src/dist/vmatch/vmatch /usr/bin
src/dist/vmatch/vmatchselect /usr/bin
-#src/dist/vmatch/vmigrate.sh /usr/bin
+src/dist/vmatch/vmigrate.sh /usr/share/vmatch
src/dist/vmatch/vseqinfo /usr/bin
src/dist/vmatch/vseqselect /usr/bin
src/dist/vmatch/vstree2tex /usr/bin
=====================================
debian/vmatch.manpages
=====================================
@@ -0,0 +1 @@
+debian/man/*.1
View it on GitLab: https://salsa.debian.org/med-team/vmatch/compare/365ac97ac74d2f233fc20d2a5f9625b226a9d63c...d54735101a5a0a8a2fced27bef58e60540a1ed7b
--
View it on GitLab: https://salsa.debian.org/med-team/vmatch/compare/365ac97ac74d2f233fc20d2a5f9625b226a9d63c...d54735101a5a0a8a2fced27bef58e60540a1ed7b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200102/b465073c/attachment-0001.html>
More information about the debian-med-commit
mailing list