[med-svn] [Git][med-team/last-align][upstream] New upstream version 1471
Charles Plessy (@plessy)
gitlab at salsa.debian.org
Thu Aug 24 03:17:06 BST 2023
Charles Plessy pushed to branch upstream at Debian Med / last-align
Commits:
e54f9ef3 by Charles Plessy at 2023-08-24T01:51:54+00:00
New upstream version 1471
- - - - -
24 changed files:
- bin/last-train
- doc/last-train.rst
- doc/last-tuning.rst
- − src/DiagonalTable.cc
- src/DiagonalTable.hh
- src/LastEvaluer.cc
- src/LastalArguments.cc
- src/LastalArguments.hh
- src/MultiSequence.cc
- src/MultiSequence.hh
- src/MultiSequenceQual.cc
- src/SubsetSuffixArray.cc
- src/SubsetSuffixArray.hh
- src/SubsetSuffixArraySearch.cc
- src/SubsetSuffixArraySort.cc
- src/gaplessXdrop.hh
- src/lastal.cc
- src/lastdb.cc
- src/makefile
- src/mcf_aligment_path_adder.cc → src/mcf_alignment_path_adder.cc
- src/mcf_aligment_path_adder.hh → src/mcf_alignment_path_adder.hh
- test/last-test.out
- test/last-test.sh
- test/last-train-test.out
Changes:
=====================================
bin/last-train
=====================================
@@ -64,11 +64,13 @@ def homogeneousLetterFreqs(scale, matScores):
m[k], m[iMax] = m[iMax], m[k]
if abs(m[k][k]) <= 0:
raise ArithmeticError("singular matrix")
+ mk = m[k]
for i in range(n):
if i != k:
- mul = m[i][k] / m[k][k]
+ mi = m[i]
+ mul = mi[k] / mk[k]
for j in range(k + 1, n + 1):
- m[i][j] -= m[k][j] * mul
+ mi[j] -= mk[j] * mul
return [m[k][n] / m[k][k] for k in range(n)]
def randomSample(things, sampleSize):
@@ -323,12 +325,6 @@ def gapProbsFromCounts(counts, opts, maxGapGrowProb):
print("# alignments:", alignments)
print("# mean delete size: %g" % (deletes / delOpens))
print("# mean insert size: %g" % (inserts / insOpens))
- print("# matchProb: %g" % matchProb)
- print("# delOpenProb: %g" % delOpenProb)
- print("# insOpenProb: %g" % insOpenProb)
- print("# delExtendProb: %g" % delGrowProb)
- print("# insExtendProb: %g" % insGrowProb)
- print()
delGrowProb = min(delGrowProb, maxGapGrowProb)
insGrowProb = min(insGrowProb, maxGapGrowProb)
return matchProb, (delOpenProb, delGrowProb), (insOpenProb, insGrowProb)
@@ -357,6 +353,13 @@ def gapRatiosFromProbs(matchProb, delProbs, insProbs):
insGrowRatio = insGrowProb / endProb
insRatios = firstInsRatio, insGrowRatio
+ print("# matchProb: %g" % matchProb)
+ print("# delOpenProb: %g" % delOpenProb)
+ print("# insOpenProb: %g" % insOpenProb)
+ print("# delExtendProb: %g" % delGrowProb)
+ print("# insExtendProb: %g" % insGrowProb)
+ print("# endProb: %g" % endProb)
+ print()
return matchRatio, delRatios, insRatios
def scoreFromLetterProbs(scale, matchRatio, pairProb, rowProb, colProb):
@@ -392,6 +395,7 @@ def scoreImbalance(scale, matScores, delCosts, insCosts):
def balancedScale(imbalanceFunc, nearScale, args):
# Find a scale, near nearScale, with balanced length probability
+ sys.stdout.flush()
bump = 1.000001
rootFinders = rootOfDecreasingFunction, rootOfIncreasingFunction
value = imbalanceFunc(nearScale, *args)
@@ -557,13 +561,6 @@ def frameshiftProbsFromCounts(counts, opts):
print("# frameshifts del-1,del-2,ins+1,ins+2: %.12g,%.12g,%.12g,%.12g"
% (delOpens1, delOpens2, insOpens1, insOpens2))
print("# alignments:", alignments)
- print("# matchProb: %g" % matchProb)
- print("# delOpenProb: %g" % delOpenProb)
- print("# insOpenProb: %g" % insOpenProb)
- print("# delExtendProb: %g" % delGrowProb)
- print("# insExtendProb: %g" % insGrowProb)
- print("# frameshiftProbs del-1,del-2,ins+1,ins+2: %g,%g,%g,%g"
- % (delProb1, delProb2, insProb1, insProb2))
print()
delProbs = delOpenProb, delGrowProb, delProb1, delProb2
insProbs = insOpenProb, insGrowProb, insProb1, insProb2
@@ -593,6 +590,15 @@ def frameshiftRatiosFromProbs(matchProb, delProbs, insProbs):
delRatio2 = del2 * (1 - del1) / (delAdj * (delMean * endProb) ** 2)
delRatios = delOpenRatio, delRatio0, delRatio1, delRatio2
+ print("# matchProb: %g" % matchProb)
+ print("# delOpenProb: %g" % delOpenProb)
+ print("# insOpenProb: %g" % insOpenProb)
+ print("# delExtendProb: %g" % delGrowProb)
+ print("# insExtendProb: %g" % insGrowProb)
+ print("# frameshiftProbs del-1,del-2,ins+1,ins+2: %g,%g,%g,%g"
+ % (del1, del2, ins1, ins2))
+ print("# endProb: %g" % endProb)
+ print()
return matchRatio, delRatios, insRatios
def frameshiftCostsFromProbRatios(scale, gapRatios):
@@ -760,9 +766,10 @@ def doTraining(opts, args):
lastalProgName, alphabet = readLastdbData(args[0])
lastalVersion = versionFromLastal()
- if not opts.p and (not opts.Q or opts.Q in ("0", "fastx", "keep")):
- if not opts.r: opts.r = "5" if len(alphabet) < 20 else "12"
- if not opts.q: opts.q = "5" if len(alphabet) < 20 else "7"
+ if (not opts.p and len(alphabet) < 20 and
+ (not opts.Q or opts.Q in ("0", "fastx", "keep"))):
+ if not opts.r: opts.r = "5"
+ if not opts.q: opts.q = "5"
if not opts.a: opts.a = "15"
if not opts.b: opts.b = "3"
@@ -920,14 +927,15 @@ if __name__ == "__main__":
og = optparse.OptionGroup(op, "Initial parameter options")
og.add_option("-r", metavar="SCORE", help=
- "match score (default: 6 if Q>=1, or 5 if DNA, or 12)")
+ "match score (default: 6 if Q>=1, else 5 if DNA)")
og.add_option("-q", metavar="COST", help=
- "mismatch cost (default: 18 if Q>=1, or 5 if DNA, or 7)")
- og.add_option("-p", metavar="NAME", help="match/mismatch score matrix")
- og.add_option("-a", metavar="COST",
- help="gap existence cost (default: 21 if Q>=1, else 15)")
- og.add_option("-b", metavar="COST",
- help="gap extension cost (default: 9 if Q>=1, else 3)")
+ "mismatch cost (default: 18 if Q>=1, else 5 if DNA)")
+ og.add_option("-p", metavar="NAME", help=
+ "match/mismatch score matrix (default: BL62 if protein)")
+ og.add_option("-a", metavar="COST", help="gap existence cost "
+ "(default: 15 if DNA, 11 if protein, 21 if Q>=1)")
+ og.add_option("-b", metavar="COST", help="gap extension cost "
+ "(default: 3 if DNA, 2 if protein, 9 if Q>=1)")
og.add_option("-A", metavar="COST", help="insertion existence cost")
og.add_option("-B", metavar="COST", help="insertion extension cost")
og.add_option("-F", metavar="LIST", help="frameshift probabilities: "
=====================================
doc/last-train.rst
=====================================
@@ -20,20 +20,16 @@ How it works
of ``queries.fasta``.
2. It starts with an initial guess for substitution and gap
- parameters.
+ rates/scores.
-3. Using these parameters, it finds similar segments between the
- chunks and ``reference.fasta``.
+3. Using these rates, it finds similar segments between the chunks and
+ ``reference.fasta``. (If one part of the chunks matches several
+ parts of ``reference.fasta``, only the best matches are kept.)
- If one part of the chunks matches several parts of
- ``reference.fasta``, only the best matches are kept.
+4. It gets substitution and gap rates from these similar segments.
-4. It gets substitution and gap parameters from these similar
- segments.
-
-5. It uses these parameters to find similar segments more accurately,
- then gets parameters again, and repeats until the result stops
- changing.
+5. It uses these rates to find similar segments more accurately, then
+ gets rates again, and repeats until the result stops changing.
last-train prints a summary of each iteration, followed by the final
score parameters in a format that can be read by `lastal's -p option
@@ -62,8 +58,7 @@ Training options
--pid=PID
Ignore similar segments with > PID% identity (matches /
[matches + mismatches]). This aims to optimize the parameters
- for low-similarity alignments (similarly to the BLOSUM
- matrices).
+ for low-similarity alignments.
--postmask=NUMBER
By default, last-train ignores alignments of mostly-lowercase
sequence (by using `last-postmask <doc/last-postmask.rst>`_).
@@ -171,13 +166,32 @@ Alignment options
Details
-------
-* last-train (and lastal) uses "Model A", in Figure 5A of btz576_.
+last-train shows the gap probabilities at each iteration. They
+correspond to "Model A" in Figure 5A of btz576_:
+
+============= ========================
+last-train btz576_
+============= ========================
+delOpenProb α\ :sub:`D`
+insOpenProb α\ :sub:`I`
+delExtendProb β\ :sub:`D`
+insExtendProb β\ :sub:`I`
+matchProb γ
+endProb ω\ :sub:`D`, ω\ :sub:`I`
+============= ========================
+
+* last-train gets most of the probabilities from the similar sequence
+ segments that it finds. But it gets these probabilities in a
+ different way:
-* last-train (and lastal) converts between path and alignment
- parameters as in Supplementary Section 3.1 of btz576_.
+ - It assumes that ω\ :sub:`D` = ω\ :sub:`I`, and gets the unique
+ value that satisfies "balanced length probability" (btz576_).
-* last-train uses parameters with "homogeneous letter probabilities"
- and "balanced length probability" (btz576_).
+ - It gets φ\ :sub:`x` and ψ\ :sub:`y` by assuming "homogeneous
+ letter probabilities" (btz576_).
+
+* last-train converts between gap probabilities and gap scores as in
+ Supplementary Section 3.1 of btz576_.
* last-train rounds the scores to integers, which makes them slightly
inaccurate. It then finds an adjusted scale factor (without
@@ -191,7 +205,27 @@ Details
(to reduce the inaccuracy of integer rounding), until the problem
goes away.
-.. _btz576: https://doi.org/10.1093/bioinformatics/btz576
+When ``--codon`` is used, the gap probabilities correspond to Figure 2
+of DNA-versus-protein_:
+
+============= ==========================
+last-train DNA-versus-protein_
+============= ==========================
+delOpenProb α\ :sub:`D`
+insOpenProb α\ :sub:`I`
+delExtendProb β\ :sub:`D`
+insExtendProb β\ :sub:`I`
+del-1 1 - δ\ :sub:`D`
+del-2 1 - ε\ :sub:`D`
+ins+1 1 - δ\ :sub:`I`
+ins+2 1 - ε\ :sub:`I`
+matchProb Γ
+endProb ω\ :sub:`i`, ∛ ω\ :sub:`D`
+============= ==========================
+
+It assumes that ω\ :sub:`D` = ω\ :sub:`i`\ :sup:`3`, and gets the
+unique value that satisfies "balanced length probability"
+(DNA-versus-protein_).
Bugs
----
@@ -205,4 +239,6 @@ Bugs
increasing the sample number, or reducing the alignment
significance_ threshold with option ``-D``.
+.. _btz576: https://doi.org/10.1093/bioinformatics/btz576
+.. _DNA-versus-protein: https://doi.org/10.1109/TCBB.2022.3177855
.. _significance: doc/last-evalues.rst
=====================================
doc/last-tuning.rst
=====================================
@@ -147,6 +147,19 @@ lastal -f
Option ``-fTAB`` **reduces the output size**, which can improve speed.
+lastdb -S2
+----------
+
+This makes lastdb index both strands of DNA. This **doubles memory
+and disk use**, but **makes lastal faster** because it just scans one
+strand of the query DNA.
+
+lastdb -B
+---------
+
+Lower values (e.g. 1) make lastal **faster**, but use **more memory
+and disk**. This has no effect on the results.
+
lastdb -i
---------
@@ -161,12 +174,6 @@ makes new versions of lastal slower. If you already used this option,
you can undo it by deleting the ``.chi2`` files (or move/rename them
to test which is faster).
-lastdb -B
----------
-
-Lower values (e.g. 1) make lastal **faster**, but use **more memory
-and disk**. This has no effect on the results.
-
Repeat masking
--------------
=====================================
src/DiagonalTable.cc deleted
=====================================
@@ -1,34 +0,0 @@
-// Copyright 2008 Martin C. Frith
-
-#include "DiagonalTable.hh"
-
-namespace cbrc{
-
-bool DiagonalTable::isCovered( indexT sequentialPos, indexT randomPos ){
-
- indexT diagonal = sequentialPos - randomPos; // wrap-around is OK
- indexT bin = diagonal % BINS;
- std::vector<pairT>& v = hits[bin];
-
- for( std::vector<pairT>::iterator i = v.begin(); i < v.end(); /* noop */ ){
- if( i->first >= sequentialPos ){
- if( i->second == diagonal ) return true;
- ++i;
- }else{
- i = v.erase(i); // hopefully we rarely get here
- }
- }
-
- return false;
-}
-
-void DiagonalTable::addEndpoint( indexT sequentialPos, indexT randomPos ){
-
- indexT diagonal = sequentialPos - randomPos; // wrap-around is OK
- indexT bin = diagonal % BINS;
- std::vector<pairT>& v = hits[bin];
-
- v.push_back( pairT( sequentialPos, diagonal ) );
-}
-
-} // end namespace cbrc
=====================================
src/DiagonalTable.hh
=====================================
@@ -1,4 +1,5 @@
-// Copyright 2008, 2010 Martin C. Frith
+// Author Martin C. Frith 2023
+// SPDX-License-Identifier: GPL-3.0-or-later
// This struct records coverage of "diagonals" by gapless alignments,
// when comparing two sequences. The diagonal is the coordinate in
@@ -20,26 +21,35 @@
#include <utility> // pair
#include <vector>
-namespace cbrc{
+namespace cbrc {
-struct DiagonalTable{
+struct DiagonalTable {
-#if LAST_POS_BYTES > 4
- typedef size_t indexT;
-#else
- typedef unsigned indexT;
-#endif
-
- typedef std::pair<indexT, indexT> pairT;
+ typedef std::pair<size_t, size_t> pairT;
enum { BINS = 256 }; // use a power-of-two for faster modulus (maybe)
// 256 is much faster than 65536 in my tests
// is this position on this diagonal already covered by an alignment?
- bool isCovered( indexT sequentialPos, indexT randomPos );
+ bool isCovered(size_t diagonal, size_t sequentialPos) {
+ std::vector<pairT> &v = hits[diagonal % BINS];
+
+ for (std::vector<pairT>::iterator i = v.begin(); i < v.end(); ) {
+ if (i->first >= sequentialPos) {
+ if (i->second == diagonal) return true;
+ ++i;
+ } else {
+ i = v.erase(i); // hopefully we rarely get here
+ }
+ }
+
+ return false;
+ }
// add an alignment endpoint to the table:
- void addEndpoint( indexT sequentialPos, indexT randomPos );
+ void addEndpoint(size_t diagonal, size_t sequentialPos) {
+ hits[diagonal % BINS].push_back(pairT(sequentialPos, diagonal));
+ }
std::vector<pairT> hits[BINS];
};
=====================================
src/LastEvaluer.cc
=====================================
@@ -3,7 +3,7 @@
#include "LastEvaluer.hh"
#include "GeneticCode.hh"
-#include "mcf_aligment_path_adder.hh"
+#include "mcf_alignment_path_adder.hh"
#include "alp/sls_falp_alignment_evaluer.hpp"
=====================================
src/LastalArguments.cc
=====================================
@@ -163,7 +163,7 @@ Initial-match options (default settings):\n\
-W use \"minimum\" positions in sliding windows of W consecutive positions\n\
\n\
Miscellaneous options (default settings):\n\
- -s strand: 0=reverse, 1=forward, 2=both (2 for DNA, 1 for protein)\n\
+ -s strand: 0=reverse, 1=forward, 2=both (2 if DNA and not lastdb -S2, else 1)\n\
-S score matrix applies to forward strand of: 0=reference, 1=query ("
+ stringify(isQueryStrandMatrix) + ")\n\
-K omit alignments whose query range lies in >= K others with > score (off)\n\
@@ -534,13 +534,14 @@ const char* LastalArguments::matrixName( bool isProtein ) const{
return matrixFile.c_str();
}
-void LastalArguments::setDefaultsFromAlphabet( bool isDna, bool isProtein,
- bool isKeepRefLowercase,
- int refTantanSetting,
- bool isCaseSensitiveSeeds,
- bool isVolumes,
- size_t refMinimizerWindow ){
- if( strand < 0 ) strand = (isDna || isTranslated()) ? 2 : 1;
+void LastalArguments::setDefaultsFromAlphabet(bool isDna, bool isProtein,
+ int refStrand,
+ bool isKeepRefLowercase,
+ int refTantanSetting,
+ bool isCaseSensitiveSeeds,
+ bool isVolumes,
+ size_t refMinimizerWindow) {
+ if (strand < 0) strand = 1 + ((isDna || isTranslated()) && refStrand < 2);
if( isGreedy ){
if( matchScore < 0 ) matchScore = 2;
=====================================
src/LastalArguments.hh
=====================================
@@ -36,10 +36,10 @@ struct LastalArguments{
const char* matrixName( bool isProtein ) const;
// set default option values that depend on input files:
- void setDefaultsFromAlphabet( bool isDna, bool isProtein,
- bool isKeepRefLowercase, int refTantanSetting,
- bool isCaseSensitiveSeeds, bool isVolumes,
- size_t refMinimizerWindow );
+ void setDefaultsFromAlphabet(bool isDna, bool isProtein, int refStrand,
+ bool isKeepRefLowercase, int refTantanSetting,
+ bool isCaseSensitiveSeeds, bool isVolumes,
+ size_t refMinimizerWindow);
void setDefaultsFromMatrix(double lambda, double minScore,
double maxEvalueDefault);
=====================================
src/MultiSequence.cc
=====================================
@@ -3,13 +3,12 @@
#include "MultiSequence.hh"
#include "io.hh"
#include <sstream>
-#include <algorithm> // upper_bound
#include <cassert>
#include <streambuf>
using namespace cbrc;
-void MultiSequence::initForAppending(indexT padSizeIn,
+void MultiSequence::initForAppending(size_t padSizeIn,
bool isAppendStopSymbol) {
padSize = padSizeIn;
seq.v.assign( padSize, ' ' );
@@ -82,7 +81,7 @@ void MultiSequence::readFastxName(std::istream& stream) {
}
std::istream&
-MultiSequence::appendFromFasta( std::istream& stream, indexT maxSeqLen ){
+MultiSequence::appendFromFasta(std::istream &stream, size_t maxSeqLen) {
if( isFinished() ){
char c = '>';
stream >> c;
@@ -112,12 +111,6 @@ MultiSequence::appendFromFasta( std::istream& stream, indexT maxSeqLen ){
return stream;
}
-size_t MultiSequence::whichSequence(indexT coordinate) const {
- const indexT* u = std::upper_bound( ends.begin(), ends.end(), coordinate );
- assert( u != ends.begin() && u != ends.end() );
- return u - ends.begin() - 1;
-}
-
static void reverseComplementPssm(int *beg, int *end,
const uchar *complement) {
while (beg < end) {
=====================================
src/MultiSequence.hh
=====================================
@@ -13,6 +13,7 @@
#include "ScoreMatrixRow.hh"
#include "VectorOrMmap.hh"
+#include <algorithm> // upper_bound
#include <string>
#include <iosfwd>
@@ -30,7 +31,7 @@ class MultiSequence{
#endif
// initialize with leftmost delimiter pad, ready for appending sequences
- void initForAppending(indexT padSizeIn, bool isAppendStopSymbol = false);
+ void initForAppending(size_t padSizeIn, bool isAppendStopSymbol = false);
// re-initialize, but keep the last sequence if it is unfinished
void reinitForAppending();
@@ -51,26 +52,26 @@ class MultiSequence{
// Append a sequence with delimiters. Don't let the total size of
// the concatenated sequences plus pads exceed maxSeqLen: thus it
// may not finish reading the sequence.
- std::istream& appendFromFasta( std::istream& stream, indexT maxSeqLen );
+ std::istream &appendFromFasta(std::istream &stream, size_t maxSeqLen);
// As above, but read FASTQ format.
- std::istream& appendFromFastq(std::istream& stream, indexT maxSeqLen,
+ std::istream &appendFromFastq(std::istream &stream, size_t maxSeqLen,
bool isKeepQualityData);
// As above, but read either FASTA or FASTQ format. The first
// sequence may have either format, but subsequent sequences must
// have the same format.
- std::istream& appendFromFastx(std::istream& stream, indexT maxSeqLen,
+ std::istream &appendFromFastx(std::istream &stream, size_t maxSeqLen,
bool isKeepQualityData);
// As above, but read quality scores too.
- std::istream& appendFromPrb( std::istream& stream, indexT maxSeqLen,
- unsigned alphSize, const uchar decode[] );
+ std::istream &appendFromPrb(std::istream &stream, size_t maxSeqLen,
+ unsigned alphSize, const uchar decode[]);
// As above, but read a PSSM too, in PSI-BLAST ASCII format.
- std::istream& appendFromPssm( std::istream& stream, indexT maxSeqLen,
- const uchar* lettersToNumbers,
- bool isMaskLowercase );
+ std::istream &appendFromPssm(std::istream &stream, size_t maxSeqLen,
+ const uchar *lettersToNumbers,
+ bool isMaskLowercase);
// did we finish reading the last sequence?
bool isFinished() const{ return ends.size() == nameEnds.size(); }
@@ -82,7 +83,10 @@ class MultiSequence{
size_t unfinishedSize() const{ return seq.size(); }
// which sequence is the coordinate in?
- size_t whichSequence(indexT coordinate) const;
+ size_t whichSequence(indexT coordinate) const {
+ return std::upper_bound(ends.begin(), ends.end(), coordinate)
+ - ends.begin() - 1;
+ }
size_t padBeg(size_t seqNum) const { return ends[seqNum] - padSize; }
size_t seqBeg(size_t seqNum) const { return ends[seqNum]; }
@@ -149,7 +153,7 @@ class MultiSequence{
}
private:
- indexT padSize; // number of delimiter chars between sequences
+ size_t padSize; // number of delimiter chars between sequences
VectorOrMmap<uchar> seq; // concatenated sequences
VectorOrMmap<indexT> ends; // coordinates of ends of delimiter pads
VectorOrMmap<char> names; // concatenated sequence names (to save memory)
=====================================
src/MultiSequenceQual.cc
=====================================
@@ -12,7 +12,7 @@
using namespace cbrc;
std::istream&
-MultiSequence::appendFromFastx(std::istream& stream, indexT maxSeqLen,
+MultiSequence::appendFromFastx(std::istream &stream, size_t maxSeqLen,
bool isKeepQualityData) {
if (names.empty()) {
isReadingFastq = false;
@@ -33,7 +33,7 @@ MultiSequence::appendFromFastx(std::istream& stream, indexT maxSeqLen,
}
std::istream&
-MultiSequence::appendFromFastq(std::istream& stream, indexT maxSeqLen,
+MultiSequence::appendFromFastq(std::istream &stream, size_t maxSeqLen,
bool isKeepQualityData) {
// initForAppending:
qualityScoresPerLetter = isKeepQualityData;
@@ -83,8 +83,8 @@ MultiSequence::appendFromFastq(std::istream& stream, indexT maxSeqLen,
}
std::istream&
-MultiSequence::appendFromPrb( std::istream& stream, indexT maxSeqLen,
- unsigned alphSize, const uchar decode[] ){
+MultiSequence::appendFromPrb(std::istream &stream, size_t maxSeqLen,
+ unsigned alphSize, const uchar decode[]) {
// initForAppending:
qualityScoresPerLetter = alphSize;
if( qualityScores.v.empty() ) appendQualPad();
@@ -162,9 +162,9 @@ std::istream& MultiSequence::readPssmHeader( std::istream& stream ){
}
std::istream&
-MultiSequence::appendFromPssm( std::istream& stream, indexT maxSeqLen,
- const uchar* lettersToNumbers,
- bool isMaskLowercase ){
+MultiSequence::appendFromPssm(std::istream &stream, size_t maxSeqLen,
+ const uchar *lettersToNumbers,
+ bool isMaskLowercase) {
// initForAppending:
if( pssm.empty() ) appendPssmPad();
=====================================
src/SubsetSuffixArray.cc
=====================================
@@ -202,7 +202,7 @@ void SubsetSuffixArray::toFiles( const std::string& baseName,
}
static size_t bucketPos(const uchar *text, const CyclicSubsetSeed &seed,
- const SubsetSuffixArray::indexT *steps, unsigned depth,
+ const size_t *steps, unsigned depth,
const PosPart *textPosPtr) {
const uchar *textPtr = text + posGet(textPosPtr);
@@ -224,8 +224,8 @@ static size_t bucketPos(const uchar *text, const CyclicSubsetSeed &seed,
}
static void makeSomeBuckets(const uchar *text, const CyclicSubsetSeed &seed,
- const SubsetSuffixArray::indexT *steps,
- unsigned depth, OffPart *buckBeg, OffPart *buckPtr,
+ const size_t *steps, unsigned depth,
+ OffPart *buckBeg, OffPart *buckPtr,
const PosPart *sa, size_t saBeg, size_t saEnd) {
for (size_t i = saBeg; i < saEnd; ++i) {
OffPart *b = buckBeg + bucketPos(text, seed, steps, depth, sa);
@@ -237,7 +237,7 @@ static void makeSomeBuckets(const uchar *text, const CyclicSubsetSeed &seed,
}
static void runThreads(const uchar *text, const CyclicSubsetSeed *seedPtr,
- const SubsetSuffixArray::indexT *steps, unsigned depth,
+ const size_t *steps, unsigned depth,
OffPart *buckBeg, OffPart *buckPtr, const PosPart *sa,
size_t saBeg, size_t saEnd, size_t numOfThreads) {
const CyclicSubsetSeed &seed = *seedPtr;
@@ -287,7 +287,7 @@ void SubsetSuffixArray::makeBuckets(const uchar *text,
for (size_t s = 0; s < seeds.size(); ++s) {
const CyclicSubsetSeed &seed = seeds[s];
unsigned depth = bucketDepths[s];
- const indexT *steps = bucketStepEnds[s];
+ const size_t *steps = bucketStepEnds[s];
size_t saEnd = cumulativeCounts[s];
if (saEnd > saBeg) {
runThreads(text, &seed, steps, depth, buckBeg,
@@ -304,11 +304,10 @@ void SubsetSuffixArray::makeBuckets(const uchar *text,
}
}
-static void makeBucketStepsForOneSeed(SubsetSuffixArray::indexT *steps,
- unsigned depth,
+static void makeBucketStepsForOneSeed(size_t *steps, unsigned depth,
const CyclicSubsetSeed &seed,
size_t wordLength) {
- SubsetSuffixArray::indexT step = offParts;
+ size_t step = offParts;
steps[depth] = step;
while (depth > 0) {
@@ -333,7 +332,7 @@ void SubsetSuffixArray::makeBucketSteps(const unsigned *bucketDepths,
}
bucketSteps.resize(numOfBucketSteps);
bucketStepEnds.resize(numOfSeeds + 1);
- indexT *steps = &bucketSteps[0];
+ size_t *steps = &bucketSteps[0];
for (size_t i = 0; i < numOfSeeds; ++i) {
bucketStepEnds[i] = steps;
unsigned depth = bucketDepths[i];
=====================================
src/SubsetSuffixArray.hh
=====================================
@@ -83,7 +83,7 @@ public:
typedef unsigned indexT;
#endif
- struct Range {PosPart *beg; PosPart *end; indexT depth;};
+ struct Range {PosPart *beg; PosPart *end; size_t depth;};
std::vector<CyclicSubsetSeed> &getSeeds() { return seeds; }
const std::vector<CyclicSubsetSeed> &getSeeds() const { return seeds; }
@@ -148,11 +148,11 @@ public:
private:
std::vector<CyclicSubsetSeed> seeds;
std::vector<const OffPart *> bucketEnds;
- std::vector<const indexT *> bucketStepEnds;
+ std::vector<const size_t *> bucketStepEnds;
VectorOrMmap<PosPart> suffixArray; // sorted indices
VectorOrMmap<OffPart> buckets;
- std::vector<indexT> bucketSteps; // step size for each k-mer
+ std::vector<size_t> bucketSteps; // step size for each k-mer
VectorOrMmap<indexT> childTable;
VectorOrMmap<unsigned short> kiddyTable; // smaller child table
@@ -190,27 +190,27 @@ private:
void sort2( const uchar* text, const CyclicSubsetSeed& seed,
PosPart *beg, const uchar* subsetMap );
- void radixSort1( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth );
- void radixSort2( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth );
- void radixSort3( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth );
- void radixSort4( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth );
- void radixSortN( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth,
- unsigned subsetCount, indexT* bucketSize );
-
- void sortRanges( std::vector<Range>* stacks, indexT* bucketSizes,
- const uchar* text,
- unsigned wordLength, const CyclicSubsetSeed& seed,
- size_t maxUnsortedInterval, size_t numOfThreads );
+ void radixSort1(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth);
+ void radixSort2(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth);
+ void radixSort3(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth);
+ void radixSort4(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth);
+ void radixSortN(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth,
+ unsigned subsetCount, size_t *bucketSize);
+
+ void sortRanges(std::vector<Range> *stacks, size_t *bucketSizes,
+ const uchar *text,
+ unsigned wordLength, const CyclicSubsetSeed &seed,
+ size_t maxUnsortedInterval, size_t numOfThreads);
size_t getChildForward(size_t from) const {
return
@@ -254,7 +254,7 @@ private:
bool isChildDirectionForward(const PosPart *beg) const {
const PosPart *origin = &suffixArray.v[0];
- indexT i = posCount(origin, beg);
+ size_t i = posCount(origin, beg);
return
!childTable.v.empty() ? childTable.v[i] == 0 :
!kiddyTable.v.empty() ? kiddyTable.v[i] == USHRT_MAX :
=====================================
src/SubsetSuffixArraySearch.cc
=====================================
@@ -347,7 +347,7 @@ void SubsetSuffixArray::match(const PosPart *&begPtr, const PosPart *&endPtr,
size_t bucketDepth = maxBucketPrefix(seedNum);
size_t startDepth = std::min( bucketDepth, maxDepth );
const OffPart *bucketPtr = bucketEnds[seedNum];
- const indexT* myBucketSteps = bucketStepEnds[seedNum];
+ const size_t *myBucketSteps = bucketStepEnds[seedNum];
while( depth < startDepth ){
uchar subset = subsetMap[ queryPtr[depth] ];
@@ -428,7 +428,7 @@ void SubsetSuffixArray::countMatches(std::vector<unsigned long long> &counts,
// match using buckets:
size_t bucketDepth = maxBucketPrefix(seedNum);
const OffPart *bucketPtr = bucketEnds[seedNum];
- const indexT* myBucketSteps = bucketStepEnds[seedNum];
+ const size_t *myBucketSteps = bucketStepEnds[seedNum];
size_t beg = offGet(bucketPtr);
size_t end = offGet(bucketPtr + myBucketSteps[depth]);
=====================================
src/SubsetSuffixArraySort.cc
=====================================
@@ -22,12 +22,11 @@ static void posSwap(PosPart *x, PosPart *y) {
}
namespace{
- typedef SubsetSuffixArray::indexT indexT;
typedef SubsetSuffixArray::Range Range;
}
static void pushRange(std::vector<Range> &v,
- PosPart *beg, PosPart *end, indexT depth) {
+ PosPart *beg, PosPart *end, size_t depth) {
if (end - beg > posParts) {
Range r = {beg, end, depth};
v.push_back(r);
@@ -77,9 +76,9 @@ void SubsetSuffixArray::sort2(const uchar* text, const CyclicSubsetSeed &seed,
// Specialized sort for 1 symbol + 1 delimiter.
// E.g. wildcard positions in spaced seeds.
-void SubsetSuffixArray::radixSort1( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth ){
+void SubsetSuffixArray::radixSort1(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth) {
PosPart *end0 = beg; // end of '0's
PosPart *begN = end; // beginning of delimiters
@@ -109,9 +108,9 @@ void SubsetSuffixArray::radixSort1( std::vector<Range>& rangeStack,
// Specialized sort for 2 symbols + 1 delimiter.
// E.g. transition-constrained positions in subset seeds.
-void SubsetSuffixArray::radixSort2( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth ){
+void SubsetSuffixArray::radixSort2(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth) {
PosPart *end0 = beg; // end of '0's
PosPart *end1 = beg; // end of '1's
PosPart *begN = end; // beginning of delimiters
@@ -151,9 +150,9 @@ void SubsetSuffixArray::radixSort2( std::vector<Range>& rangeStack,
// Specialized sort for 3 symbols + 1 delimiter.
// E.g. subset seeds for bisulfite-converted DNA.
-void SubsetSuffixArray::radixSort3( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth ){
+void SubsetSuffixArray::radixSort3(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth) {
PosPart *end0 = beg; // end of '0's
PosPart *end1 = beg; // end of '1's
PosPart *beg2 = end; // beginning of '2's
@@ -203,9 +202,9 @@ void SubsetSuffixArray::radixSort3( std::vector<Range>& rangeStack,
}
// Specialized sort for 4 symbols + 1 delimiter. E.g. DNA.
-void SubsetSuffixArray::radixSort4( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth ){
+void SubsetSuffixArray::radixSort4(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth) {
PosPart *end0 = beg; // end of '0's
PosPart *end1 = beg; // end of '1's
PosPart *end2 = beg; // end of '2's
@@ -267,10 +266,10 @@ void SubsetSuffixArray::radixSort4( std::vector<Range>& rangeStack,
const unsigned numOfBuckets = 256;
-void SubsetSuffixArray::radixSortN( std::vector<Range>& rangeStack,
- const uchar* text, const uchar* subsetMap,
- PosPart *beg, PosPart *end, indexT depth,
- unsigned subsetCount, indexT* bucketSize ){
+void SubsetSuffixArray::radixSortN(std::vector<Range> &rangeStack,
+ const uchar *text, const uchar *subsetMap,
+ PosPart *beg, PosPart *end, size_t depth,
+ unsigned subsetCount, size_t *bucketSize) {
PosPart *bucketEnd[numOfBuckets];
// get bucket sizes (i.e. letter counts):
@@ -361,7 +360,7 @@ static size_t numOfThreadsForOneRange(size_t numOfThreads,
}
void SubsetSuffixArray::sortRanges(std::vector<Range> *stacks,
- indexT *bucketSizes,
+ size_t *bucketSizes,
const uchar *text,
unsigned wordLength,
const CyclicSubsetSeed &seed,
@@ -411,11 +410,11 @@ void SubsetSuffixArray::sortRanges(std::vector<Range> *stacks,
PosPart *beg = myStack.back().beg;
PosPart *end = myStack.back().end;
- indexT depth = myStack.back().depth;
+ size_t depth = myStack.back().depth;
myStack.pop_back();
size_t interval = posCount(beg, end);
- const indexT minLength = 1;
+ const size_t minLength = 1;
if( interval <= maxUnsortedInterval && depth >= minLength ) continue;
const uchar* textBase = text + depth;
@@ -465,7 +464,7 @@ void SubsetSuffixArray::sortIndex( const uchar* text,
if (childTableType == 3) childTable.v.assign(size(), 0);
std::vector< std::vector<Range> > stacks(numOfThreads);
- std::vector<indexT> bucketSizes(numOfThreads * numOfBuckets);
+ std::vector<size_t> bucketSizes(numOfThreads * numOfBuckets);
PosPart *a = &suffixArray.v[0];
PosPart *beg = a;
=====================================
src/gaplessXdrop.hh
=====================================
@@ -31,21 +31,19 @@ static void gaplessXdropScores(BigPtr seq1, const uchar *seq2,
const uchar *fwd2 = seq2;
int fScore = 0, f = 0;
- while (true) {
+ do {
f += scorer[getNext(fwd1)][*fwd2++]; // overflow risk
- if (f < fScore - maxScoreDrop) break;
if (f > fScore) fScore = f;
- }
+ } while (f >= fScore - maxScoreDrop);
if (fScore - f < 0)
throw std::overflow_error("score overflow in forward gapless extension");
fwdScore = fScore;
int rScore = 0, r = 0;
- while (true) {
+ do {
r += scorer[getPrev(seq1)][*--seq2]; // overflow risk
- if (r < rScore - maxScoreDrop) break;
if (r > rScore) rScore = r;
- }
+ } while (r >= rScore - maxScoreDrop);
if (rScore - r < 0)
throw std::overflow_error("score overflow in reverse gapless extension");
revScore = rScore;
@@ -62,25 +60,31 @@ static bool gaplessXdropEnds(BigSeq seq1, const uchar *seq2,
size_t end1 = beg1;
size_t beg2 = pos2;
size_t end2 = beg2;
- while (fwdScore) fwdScore -= scorer[seq1[end1++]][seq2[end2++]];
- while (revScore) revScore -= scorer[seq1[--beg1]][seq2[--beg2]];
+
+ int fDrop = 0;
+ int fScore = fwdScore;
+ while (fScore) {
+ fScore -= scorer[seq1[end1++]][seq2[end2++]];
+ if (fScore > fDrop) fDrop = fScore;
+ }
+
+ int rDrop = 0;
+ int rScore = revScore;
+ while (rScore) {
+ rScore -= scorer[seq1[--beg1]][seq2[--beg2]];
+ if (rScore > rDrop) rDrop = rScore;
+ }
+
pos1 = beg1;
pos2 = beg2;
length = end1 - beg1;
// Check whether the alignment has no prefix with score <= 0, no
// suffix with score <= 0, and no region with score < -maxScoreDrop
- int score = 0;
- int maxScore = 0;
- while (beg1 < end1) {
- score += scorer[seq1[beg1++]][seq2[beg2++]];
- if (score > maxScore) {
- maxScore = score;
- } else if (score <= 0 || beg1 == end1 || score < maxScore - maxScoreDrop) {
- return false;
- }
- }
- return true;
+
+ fDrop -= fwdScore;
+ rDrop -= revScore;
+ return fDrop < revScore && rDrop < fwdScore && rDrop + fDrop <= maxScoreDrop;
}
// Returns the score, and sets the reverse and forward extension
=====================================
src/lastal.cc
=====================================
@@ -169,6 +169,7 @@ calculateSubstitutionScoreMatrixStatistics(const std::string &matrixName) {
}
} else {
stats.calcFromScale(scoreMat, alph.size, args.temperature);
+ LOG("score matrix bias=" << stats.bias());
}
}
@@ -375,7 +376,7 @@ static void calculateScoreStatistics(const std::string& matrixName,
// Read the .prj file for the whole database
void readOuterPrj(const std::string &fileName, size_t &refMinimizerWindow,
size_t &minSeedLimit, bool &isKeepRefLowercase,
- int &refTantanSetting, countT &numOfRefSeqs,
+ int &refTantanSetting, int &refStrand, countT &numOfRefSeqs,
countT &refLetters, countT &refMaxSeqLen, int &bitsPerBase) {
std::ifstream f( fileName.c_str() );
if( !f ) ERR( "can't open file: " + fileName );
@@ -394,6 +395,7 @@ void readOuterPrj(const std::string &fileName, size_t &refMinimizerWindow,
getline( iss, word, '=' );
if( word == "version" ) iss >> version;
if( word == "alphabet" ) iss >> alphabetLetters;
+ if( word == "strand" ) iss >> refStrand;
if( word == "numofsequences" ) iss >> numOfRefSeqs;
if( word == "numofletters" ) iss >> refLetters;
if( word == "maxsequenceletters" ) iss >> refMaxSeqLen;
@@ -633,7 +635,8 @@ void alignGapless1(LastAligner &aligner, SegmentPairPot &gaplessAlns,
if (maxAlignments == 0) break;
size_t refPos = posGet(beg); // coordinate in the reference sequence
- if (dt.isCovered(qryPos, refPos)) continue;
+ size_t diagonal = qryPos - refPos;
+ if (dt.isCovered(diagonal, qryPos)) continue;
++counts.gaplessExtensionCount;
int score;
@@ -642,7 +645,7 @@ void alignGapless1(LastAligner &aligner, SegmentPairPot &gaplessAlns,
score = dis.gaplessOverlap(refPos, qryPos, revLen, fwdLen);
if (score < minScoreGapless) continue;
SegmentPair sp(refPos - revLen, qryPos - revLen, revLen + fwdLen, score);
- dt.addEndpoint(sp.end2(), sp.end1());
+ dt.addEndpoint(diagonal, sp.end2());
writeSegmentPair(aligner, qrySeqs, qryData, sp);
} else {
int fwdScore, revScore;
@@ -654,7 +657,7 @@ void alignGapless1(LastAligner &aligner, SegmentPairPot &gaplessAlns,
size_t length;
if (!dis.gaplessEnds(fwdScore, revScore, rPos, qPos, length)) continue;
SegmentPair sp(rPos, qPos, length, score);
- dt.addEndpoint(sp.end2(), sp.end1());
+ dt.addEndpoint(diagonal, sp.end2());
if (args.outputType == 1) { // we just want gapless alignments
writeSegmentPair(aligner, qrySeqs, qryData, sp);
@@ -1484,6 +1487,7 @@ void lastal( int argc, char** argv ){
args.fromArgs( argc, argv );
args.resetCumulativeOptions(); // because we will do fromArgs again
+ int refStrand = 1; // assume this value, if not specified
size_t refMinimizerWindow = 1; // assume this value, if not specified
size_t minSeedLimit = 0;
countT numOfRefSeqs = -1;
@@ -1493,7 +1497,7 @@ void lastal( int argc, char** argv ){
int refTantanSetting = 0;
int bitsPerBase = CHAR_BIT;
readOuterPrj(args.lastdbName + ".prj", refMinimizerWindow, minSeedLimit,
- isKeepRefLowercase, refTantanSetting,
+ isKeepRefLowercase, refTantanSetting, refStrand,
numOfRefSeqs, refLetters, refMaxSeqLen, bitsPerBase);
bool isDna = (alph.letters == alph.dna);
bool isProtein = alph.isProtein();
@@ -1527,10 +1531,10 @@ void lastal( int argc, char** argv ){
aligners.resize( decideNumberOfThreads( args.numOfThreads,
args.programName, args.verbosity ) );
bool isMultiVolume = (numOfVolumes + 1 > 0 && numOfVolumes > 1);
- args.setDefaultsFromAlphabet( isDna, isProtein,
- isKeepRefLowercase, refTantanSetting,
- isCaseSensitiveSeeds, numOfVolumes + 1 > 0,
- refMinimizerWindow );
+ args.setDefaultsFromAlphabet(isDna, isProtein, refStrand,
+ isKeepRefLowercase, refTantanSetting,
+ isCaseSensitiveSeeds, numOfVolumes + 1 > 0,
+ refMinimizerWindow);
makeScoreMatrix(matrixName, matrixFile);
gapCosts.assign(args.delOpenCosts, args.delGrowCosts,
args.insOpenCosts, args.insGrowCosts,
=====================================
src/lastdb.cc
=====================================
@@ -299,10 +299,10 @@ void makeVolume(std::vector<CyclicSubsetSeed>& seeds,
// is likely to be less than volumeSize bytes. (This is crude, it
// neglects memory for the sequence names, and the fact that
// lowercase-masked letters and DNA "N"s aren't indexed.)
-static indexT maxLettersPerVolume( const LastdbArguments& args,
- const DnaWordsFinder& wordsFinder,
- size_t qualityCodesPerLetter,
- unsigned numOfSeeds ){
+static size_t maxLettersPerVolume(const LastdbArguments &args,
+ const DnaWordsFinder &wordsFinder,
+ size_t qualityCodesPerLetter,
+ unsigned numOfSeeds) {
double b = args.minIndexedPositionsPerBucket;
double x = posSize + offSize / b; // bytes per indexed position
if (wordsFinder.wordLength) {
@@ -316,7 +316,8 @@ static indexT maxLettersPerVolume( const LastdbArguments& args,
}
}
double y = args.volumeSize / (1 + qualityCodesPerLetter + x);
- return (y < posLimit) ? y : posLimit;
+ if (y < posLimit) return y;
+ return posLimit;
}
static bool isRoomToDuplicateTheLastSequence(const MultiSequence &multi,
@@ -432,8 +433,8 @@ void lastdb( int argc, char** argv ){
unsigned volumeNumber = 0;
countT sequenceCount = 0;
std::vector<countT> letterCounts( alph.size );
- indexT maxLetters = 0;
- indexT maxSeqLen = posLimit;
+ size_t maxLetters = 0;
+ size_t maxSeqLen = posLimit;
size_t maxSeqLenSeen = 0;
char defaultInputName[] = "-";
=====================================
src/makefile
=====================================
@@ -32,16 +32,16 @@ TantanMasker.o dna_words_finder.o fileMap.o tantan.o \
LastalArguments.o GappedXdropAligner.o GappedXdropAlignerDna.o \
GappedXdropAlignerPssm.o GappedXdropAligner2qual.o \
GappedXdropAligner3frame.o GappedXdropAlignerFrame.o \
-mcf_aligment_path_adder.o mcf_frameshift_xdrop_aligner.o \
+mcf_alignment_path_adder.o mcf_frameshift_xdrop_aligner.o \
mcf_gap_costs.o GeneticCode.o GreedyXdropAligner.o LastEvaluer.o \
OneQualityScoreMatrix.o QualityPssmMaker.o TwoQualityScoreMatrix.o \
cbrc_linalg.o mcf_substitution_matrix_stats.o \
split/cbrc_unsplit_alignment.o split/last_split_options.o $(alpObj)
alignObj4 = Alignment.o AlignmentPot.o AlignmentWrite.o \
-DiagonalTable.o MultiSequence.o MultiSequenceQual.o SegmentPair.o \
-SegmentPairPot.o SubsetSuffixArray.o SubsetSuffixArraySearch.o \
-lastal.o split/cbrc_split_aligner.o split/mcf_last_splitter.o
+MultiSequence.o MultiSequenceQual.o SegmentPair.o SegmentPairPot.o \
+SubsetSuffixArray.o SubsetSuffixArraySearch.o lastal.o \
+split/cbrc_split_aligner.o split/mcf_last_splitter.o
splitObj0 = Alphabet.o LambdaCalculator.o fileMap.o cbrc_linalg.o \
mcf_substitution_matrix_stats.o split/cbrc_unsplit_alignment.o \
@@ -143,7 +143,7 @@ ScoreMatrixData.hh: ../data/*.mat
../build/mat-inc.sh ../data/*.mat > $@
VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 1460) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 1471) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
@@ -188,7 +188,6 @@ Centroid.o Centroid.o5 Centroid.o8: Centroid.cc Centroid.hh GappedXdropAligner.h
mcf_substitution_matrix_stats.hh GappedXdropAlignerInl.hh
CyclicSubsetSeed.o CyclicSubsetSeed.o5 CyclicSubsetSeed.o8: CyclicSubsetSeed.cc CyclicSubsetSeed.hh \
CyclicSubsetSeedData.hh zio.hh mcf_zstream.hh stringify.hh
-DiagonalTable.o DiagonalTable.o5 DiagonalTable.o8: DiagonalTable.cc DiagonalTable.hh
dna_words_finder.o dna_words_finder.o5 dna_words_finder.o8: dna_words_finder.cc dna_words_finder.hh
fileMap.o fileMap.o5 fileMap.o8: fileMap.cc fileMap.hh stringify.hh
GappedXdropAligner2qual.o GappedXdropAligner2qual.o5 GappedXdropAligner2qual.o8: GappedXdropAligner2qual.cc \
@@ -253,14 +252,14 @@ lastdb.o lastdb.o5 lastdb.o8: lastdb.cc last.hh Alphabet.hh mcf_big_seq.hh \
LastEvaluer.o LastEvaluer.o5 LastEvaluer.o8: LastEvaluer.cc LastEvaluer.hh ScoreMatrixRow.hh \
mcf_frameshift_xdrop_aligner.hh mcf_gap_costs.hh \
alp/sls_alignment_evaluer.hpp alp/sls_pvalues.hpp alp/sls_basic.hpp \
- GeneticCode.hh mcf_aligment_path_adder.hh \
+ GeneticCode.hh mcf_alignment_path_adder.hh \
alp/sls_falp_alignment_evaluer.hpp alp/sls_fsa1_pvalues.hpp
last-pair-probs.o last-pair-probs.o5 last-pair-probs.o8: last-pair-probs.cc last-pair-probs.hh zio.hh \
mcf_zstream.hh stringify.hh
last-pair-probs-main.o last-pair-probs-main.o5 last-pair-probs-main.o8: last-pair-probs-main.cc last-pair-probs.hh \
stringify.hh version.hh
-mcf_aligment_path_adder.o mcf_aligment_path_adder.o5 mcf_aligment_path_adder.o8: mcf_aligment_path_adder.cc \
- mcf_aligment_path_adder.hh
+mcf_alignment_path_adder.o mcf_alignment_path_adder.o5 mcf_alignment_path_adder.o8: mcf_alignment_path_adder.cc \
+ mcf_alignment_path_adder.hh
mcf_frameshift_xdrop_aligner.o mcf_frameshift_xdrop_aligner.o5 mcf_frameshift_xdrop_aligner.o8: mcf_frameshift_xdrop_aligner.cc \
mcf_frameshift_xdrop_aligner.hh mcf_gap_costs.hh
mcf_gap_costs.o mcf_gap_costs.o5 mcf_gap_costs.o8: mcf_gap_costs.cc mcf_gap_costs.hh
=====================================
src/mcf_aligment_path_adder.cc → src/mcf_alignment_path_adder.cc
=====================================
@@ -1,7 +1,7 @@
// Author: Martin C. Frith 2021
// SPDX-License-Identifier: GPL-3.0-or-later
-#include "mcf_aligment_path_adder.hh"
+#include "mcf_alignment_path_adder.hh"
#include <algorithm>
=====================================
src/mcf_aligment_path_adder.hh → src/mcf_alignment_path_adder.hh
=====================================
@@ -28,8 +28,8 @@
// [Yao21]: "Improved DNA-versus-protein homology search for protein
// fossils", Y Yao & MC Frith
-#ifndef MCF_ALIGMENT_PATH_ADDER_HH
-#define MCF_ALIGMENT_PATH_ADDER_HH
+#ifndef MCF_ALIGNMENT_PATH_ADDER_HH
+#define MCF_ALIGNMENT_PATH_ADDER_HH
#include <vector>
=====================================
test/last-test.out
=====================================
@@ -5263,3 +5263,38 @@ q ERR4570987.11099 <FGFBGHHHHHHHEGBG1GG?0HFBGHFFGHHHG22F?2FG/G
107 chrM 15871 42 - 16775 chrM 16325 42 - 16571 42 EG2=1.7e+07 E=0.01
106 chrM 9486 72 - 16775 chrM 13183 72 - 16571 72 EG2=2.2e+07 E=0.013
# Query sequences=1 normal letters=16571
+#
+# a=29 b=2 A=28 B=1 e=105 d=-1 x=104 y=47 z=104 D=1e+06 E=2.68446e+07
+# R=01 u=0 s=1 S=1 M=0 T=0 m=10 l=1 n=10 k=1 w=1000 t=4.72795 j=3 Q=0
+# /tmp/last-test
+# Reference sequences=4 normal letters=35606
+# lambda=0.21678 K=0.20616
+#
+# A C G T M S K W R Y B D H V
+# A 4 -3 -2 -5 2 -3 -4 1 2 -4 -3 1 0 1
+# C -3 4 -7 -2 2 2 -3 -2 -4 2 1 -3 1 1
+# G -4 -8 7 -9 -6 3 3 -6 3 -9 1 1 -7 1
+# T -5 -2 -7 4 -3 -3 2 2 -6 2 1 0 1 -4
+# M 2 2 -4 -3 2 0 -3 0 0 0 -1 -1 0 1
+# S -3 2 3 -4 0 2 0 -3 0 0 1 -1 -1 1
+# K -5 -3 2 2 -4 0 2 0 -1 0 1 1 -1 -1
+# W 1 -2 -4 1 0 -3 0 1 0 0 -1 1 0 -1
+# R 2 -4 3 -6 0 0 0 0 3 -5 -1 1 -1 1
+# Y -4 2 -7 2 0 0 0 0 -5 2 1 -1 1 -1
+# B -4 1 1 1 -1 1 1 -1 -2 1 1 0 0 -1
+# D 1 -3 1 0 -1 -1 1 1 1 -1 0 1 0 0
+# H 0 1 -5 1 0 -1 -1 0 -1 1 0 0 1 0
+# V 1 1 1 -4 1 1 -1 -1 1 -1 -1 0 0 1
+#
+# Coordinates are 0-based. For - strand matches, coordinates
+# in the reverse complement of the 2nd sequence are used.
+#
+# score name1 start1 alnSize1 strand1 seqSize1 name2 start2 alnSize2 strand2 seqSize2 blocks
+# batch 0
+21390 chrM 1234 13651 + 16775 chrM 585 13564 + 16571 9,0:1,56,0:1,82,3:0,63,1:0,19,1:0,55,0:1,58,1:0,98,5:0,14,3:0,114,2:0,95,4:0,30,1:0,34,1:0,38,0:1,33,0:1,60,6:0,57,5:0,34,0:7,60,0:1,9,2:0,33,16:0,21,0:9,25,0:2,36,14:0,21,1:0,16,4:0,26,4:0,11,2:0,56,0:1,43,1:0,24,3:0,93,1:0,41,0:4,11,16:0,38,0:1,40,3:0,30,1:0,16,0:8,38,0:1,24,0:2,37,1:0,11,0:9,23,0:4,25,3:0,13,3:0,41,0:2,37,1:0,33,2:0,37,3:0,212,2:0,8,4:0,70,12:0,16,0:1,15,0:4,21,1:0,148,1:0,53,0:1,67,6:0,19,3:0,21,5:0,15,1:0,44,0:1,29,7:0,20,21:0,930,0:2,29,2:0,20,0:1,11,10:0,70,0:3,63,1:0,673,0:1,26,1:0,263,0:5,29,2:0,79,5:0,32,3:0,21,0:1,68,2:0,80,0:30,81,2:0,35,3:0,17,0:8,450,0:3,1085,0:3,67,5:0,58,1:0,18,1:0,152,0:3,514,0:17,18,0:4,14,2:0,27,0:4,62,0:9,33,0:5,52,8:0,138,3:0,1406,1:0,21,3:0,340,3:0,48,3:0,458,0:6,12,6:0,1281,8:0,129,9:0,51,3:0,23,0:4,29,0:5,515,0:3,900,8:0,17,0:8,171,3:0,84 EG2=0 E=0
+2669 chrM 14904 1273 + 16775 chrM 14756 1268 + 16571 1127,2:0,22,0:1,24,4:0,94 EG2=1.1e-234 E=5.4e-244
+367 chrM 16213 562 + 16775 chrM 14155 588 + 16571 135,0:27,75,0:5,29,5:0,246,2:0,54,0:1,16 EG2=5.8e-18 E=3.3e-27
+130 chrM 5538 96 + 16775 chrM 12989 96 + 16571 96 EG2=1.2e+05 E=7e-05
+107 chrM 862 42 + 16775 chrM 204 42 + 16571 42 EG2=1.7e+07 E=0.01
+106 chrM 7217 72 + 16775 chrM 3316 72 + 16571 72 EG2=2.2e+07 E=0.013
+# Query sequences=1 normal letters=16571
=====================================
test/last-test.sh
=====================================
@@ -253,6 +253,7 @@ trap 'rm -f $db*' EXIT
# lastdb strands & volumes
lastdb --bits=4 -S2 -s1 -m1 $db galGal3-M-32.fa
lastal -s0 -fTAB -p hufu.train $db hg19-M.fa
+ lastal -fTAB -p hufu.train $db hg19-M.fa
} 2>&1 |
grep -v version | diff -u last-test.out -
=====================================
test/last-train-test.out
=====================================
@@ -12,12 +12,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.87622
# mean insert size: 1.77096
-# matchProb: 0.964909
-# delOpenProb: 0.0173743
-# insOpenProb: 0.0174649
-# delExtendProb: 0.467014
-# insExtendProb: 0.435335
-
# substitution percent identity: 75.1959
# count matrix (query letters = columns, reference letters = rows):
@@ -34,6 +28,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0203692 0.00358088 0.101869 0.00661055
# T 0.022286 0.0241058 0.00321536 0.198356
+# matchProb: 0.964909
+# delOpenProb: 0.0173743
+# insOpenProb: 0.0174649
+# delExtendProb: 0.467014
+# insExtendProb: 0.435335
+# endProb: 0.999874
+
# delExistCost: 358
# insExistCost: 346
# delExtendCost: 68
@@ -55,12 +56,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.86233
# mean insert size: 1.73676
-# matchProb: 0.982101
-# delOpenProb: 0.00850861
-# insOpenProb: 0.00913514
-# delExtendProb: 0.463038
-# insExtendProb: 0.424217
-
# substitution percent identity: 73.6782
# count matrix (query letters = columns, reference letters = rows):
@@ -77,6 +72,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0222071 0.00308742 0.101417 0.0055018
# T 0.0228341 0.0276589 0.00256418 0.194526
+# matchProb: 0.982101
+# delOpenProb: 0.00850861
+# insOpenProb: 0.00913514
+# delExtendProb: 0.463038
+# insExtendProb: 0.424217
+# endProb: 0.999872
+
# delExistCost: 421
# insExistCost: 401
# delExtendCost: 69
@@ -98,12 +100,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.90468
# mean insert size: 1.72985
-# matchProb: 0.988561
-# delOpenProb: 0.00530891
-# insOpenProb: 0.00587501
-# delExtendProb: 0.474978
-# insExtendProb: 0.421916
-
# substitution percent identity: 72.8361
# count matrix (query letters = columns, reference letters = rows):
@@ -120,6 +116,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0226865 0.00331155 0.100579 0.00561951
# T 0.0240677 0.0286165 0.00271094 0.192097
+# matchProb: 0.988561
+# delOpenProb: 0.00530891
+# insOpenProb: 0.00587501
+# delExtendProb: 0.474978
+# insExtendProb: 0.421916
+# endProb: 0.999872
+
# delExistCost: 468
# insExistCost: 439
# delExtendCost: 67
@@ -141,12 +144,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 4
# mean delete size: 1.95701
# mean insert size: 1.72236
-# matchProb: 0.99109
-# delOpenProb: 0.00406366
-# insOpenProb: 0.00452481
-# delExtendProb: 0.489016
-# insExtendProb: 0.4194
-
# substitution percent identity: 72.5763
# count matrix (query letters = columns, reference letters = rows):
@@ -163,6 +160,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0225757 0.00349725 0.10061 0.00579745
# T 0.0246472 0.0288421 0.00282099 0.191128
+# matchProb: 0.99109
+# delOpenProb: 0.00406366
+# insOpenProb: 0.00452481
+# delExtendProb: 0.489016
+# insExtendProb: 0.4194
+# endProb: 0.999839
+
# delExistCost: 497
# insExistCost: 462
# delExtendCost: 65
@@ -184,12 +188,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 4
# mean delete size: 1.98367
# mean insert size: 1.7349
-# matchProb: 0.992013
-# delOpenProb: 0.0036166
-# insOpenProb: 0.00404933
-# delExtendProb: 0.495883
-# insExtendProb: 0.423597
-
# substitution percent identity: 72.4303
# count matrix (query letters = columns, reference letters = rows):
@@ -206,6 +204,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0225355 0.00358142 0.100374 0.0058772
# T 0.0250104 0.0289294 0.00288441 0.190718
+# matchProb: 0.992013
+# delOpenProb: 0.0036166
+# insOpenProb: 0.00404933
+# delExtendProb: 0.495883
+# insExtendProb: 0.423597
+# endProb: 0.999839
+
# delExistCost: 511
# insExistCost: 474
# delExtendCost: 63
@@ -227,12 +232,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 4
# mean delete size: 2.0056
# mean insert size: 1.74324
-# matchProb: 0.992369
-# delOpenProb: 0.00344063
-# insOpenProb: 0.00386967
-# delExtendProb: 0.501396
-# insExtendProb: 0.426354
-
# substitution percent identity: 72.3895
# count matrix (query letters = columns, reference letters = rows):
@@ -249,6 +248,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0225356 0.00361317 0.100269 0.00591981
# T 0.0251034 0.0289261 0.00292041 0.190576
+# matchProb: 0.992369
+# delOpenProb: 0.00344063
+# insOpenProb: 0.00386967
+# delExtendProb: 0.501396
+# insExtendProb: 0.426354
+# endProb: 0.999839
+
# delExistCost: 517
# insExistCost: 479
# delExtendCost: 63
@@ -270,12 +276,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 4
# mean delete size: 2.01205
# mean insert size: 1.75233
-# matchProb: 0.992502
-# delOpenProb: 0.00337994
-# insOpenProb: 0.00379696
-# delExtendProb: 0.502994
-# insExtendProb: 0.42933
-
# substitution percent identity: 72.3637
# count matrix (query letters = columns, reference letters = rows):
@@ -292,6 +292,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0225307 0.00362649 0.100244 0.00592609
# T 0.0251049 0.0289208 0.00292802 0.190522
+# matchProb: 0.992502
+# delOpenProb: 0.00337994
+# insOpenProb: 0.00379696
+# delExtendProb: 0.502994
+# insExtendProb: 0.42933
+# endProb: 0.999839
+
# delExistCost: 520
# insExistCost: 482
# delExtendCost: 62
@@ -313,12 +320,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 4
# mean delete size: 2.02605
# mean insert size: 1.75344
-# matchProb: 0.992543
-# delOpenProb: 0.00335323
-# insOpenProb: 0.00378298
-# delExtendProb: 0.50643
-# insExtendProb: 0.429692
-
# substitution percent identity: 72.3502
# count matrix (query letters = columns, reference letters = rows):
@@ -335,6 +336,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0225293 0.00363263 0.100235 0.00593332
# T 0.0251751 0.0289195 0.00292737 0.190505
+# matchProb: 0.992543
+# delOpenProb: 0.00335323
+# insOpenProb: 0.00378298
+# delExtendProb: 0.50643
+# insExtendProb: 0.429692
+# endProb: 0.999839
+
# delExistCost: 521
# insExistCost: 483
# delExtendCost: 62
@@ -356,12 +364,6 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# alignments: 4
# mean delete size: 2.02231
# mean insert size: 1.75743
-# matchProb: 0.992569
-# delOpenProb: 0.00334619
-# insOpenProb: 0.00376317
-# delExtendProb: 0.505515
-# insExtendProb: 0.430986
-
# substitution percent identity: 72.3486
# count matrix (query letters = columns, reference letters = rows):
@@ -378,6 +380,13 @@ TEST last-train -m1 /tmp/last-train-test < ../examples/mouseMito.fa
# G 0.0225288 0.00363255 0.100233 0.00593319
# T 0.0251745 0.0289195 0.00293448 0.190501
+# matchProb: 0.992569
+# delOpenProb: 0.00334619
+# insOpenProb: 0.00376317
+# delExtendProb: 0.505515
+# insExtendProb: 0.430986
+# endProb: 0.999839
+
# delExistCost: 521
# insExistCost: 483
# delExtendCost: 62
@@ -420,12 +429,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.87622
# mean insert size: 1.77096
-# matchProb: 0.964909
-# delOpenProb: 0.0173743
-# insOpenProb: 0.0174649
-# delExtendProb: 0.467014
-# insExtendProb: 0.435335
-
# substitution percent identity: 75.1959
# count matrix (query letters = columns, reference letters = rows):
@@ -442,6 +445,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0384778 0.00490905 0.149975 0.0272531
# T 0.0230148 0.0189822 0.0113838 0.226004
+# matchProb: 0.964909
+# delOpenProb: 0.0173743
+# insOpenProb: 0.0174649
+# delExtendProb: 0.467014
+# insExtendProb: 0.435335
+# endProb: 0.999874
+
# delExistCost: 358
# insExistCost: 346
# delExtendCost: 68
@@ -463,12 +473,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.85047
# mean insert size: 1.72338
-# matchProb: 0.980824
-# delOpenProb: 0.00912028
-# insOpenProb: 0.00980058
-# delExtendProb: 0.459595
-# insExtendProb: 0.419746
-
# substitution percent identity: 73.8937
# count matrix (query letters = columns, reference letters = rows):
@@ -485,6 +489,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0415456 0.00411683 0.148205 0.0281907
# T 0.0237883 0.021166 0.0117238 0.221263
+# matchProb: 0.980824
+# delOpenProb: 0.00912028
+# insOpenProb: 0.00980058
+# delExtendProb: 0.459595
+# insExtendProb: 0.419746
+# endProb: 0.999872
+
# delExistCost: 414
# insExistCost: 393
# delExtendCost: 70
@@ -506,12 +517,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.88193
# mean insert size: 1.7102
-# matchProb: 0.987463
-# delOpenProb: 0.00583898
-# insOpenProb: 0.00644286
-# delExtendProb: 0.46863
-# insExtendProb: 0.415271
-
# substitution percent identity: 73.0662
# count matrix (query letters = columns, reference letters = rows):
@@ -528,6 +533,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0422692 0.00426486 0.146493 0.028917
# T 0.024942 0.0218414 0.0124345 0.218838
+# matchProb: 0.987463
+# delOpenProb: 0.00583898
+# insOpenProb: 0.00644286
+# delExtendProb: 0.46863
+# insExtendProb: 0.415271
+# endProb: 0.999872
+
# delExistCost: 458
# insExistCost: 429
# delExtendCost: 68
@@ -549,12 +561,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.9244
# mean insert size: 1.72341
-# matchProb: 0.990139
-# delOpenProb: 0.00452675
-# insOpenProb: 0.00507839
-# delExtendProb: 0.480358
-# insExtendProb: 0.419755
-
# substitution percent identity: 72.6658
# count matrix (query letters = columns, reference letters = rows):
@@ -571,6 +577,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0425339 0.00440009 0.145869 0.0292641
# T 0.025593 0.0220682 0.0128117 0.21746
+# matchProb: 0.990139
+# delOpenProb: 0.00452675
+# insOpenProb: 0.00507839
+# delExtendProb: 0.480358
+# insExtendProb: 0.419755
+# endProb: 0.999872
+
# delExistCost: 485
# insExistCost: 452
# delExtendCost: 66
@@ -592,12 +605,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.95593
# mean insert size: 1.73855
-# matchProb: 0.991151
-# delOpenProb: 0.00403159
-# insOpenProb: 0.00456142
-# delExtendProb: 0.488734
-# insExtendProb: 0.424807
-
# substitution percent identity: 72.4953
# count matrix (query letters = columns, reference letters = rows):
@@ -614,6 +621,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0426446 0.00445707 0.145337 0.0294193
# T 0.0258584 0.0221722 0.012972 0.217139
+# matchProb: 0.991151
+# delOpenProb: 0.00403159
+# insOpenProb: 0.00456142
+# delExtendProb: 0.488734
+# insExtendProb: 0.424807
+# endProb: 0.999872
+
# delExistCost: 498
# insExistCost: 464
# delExtendCost: 65
@@ -635,12 +649,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.97178
# mean insert size: 1.74744
-# matchProb: 0.99152
-# delOpenProb: 0.00384991
-# insOpenProb: 0.00437452
-# delExtendProb: 0.492845
-# insExtendProb: 0.427736
-
# substitution percent identity: 72.4478
# count matrix (query letters = columns, reference letters = rows):
@@ -657,6 +665,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0426225 0.00446783 0.145245 0.029466
# T 0.0260087 0.0221611 0.0130349 0.216994
+# matchProb: 0.99152
+# delOpenProb: 0.00384991
+# insOpenProb: 0.00437452
+# delExtendProb: 0.492845
+# insExtendProb: 0.427736
+# endProb: 0.999872
+
# delExistCost: 504
# insExistCost: 468
# delExtendCost: 64
@@ -678,12 +693,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.98206
# mean insert size: 1.75126
-# matchProb: 0.991659
-# delOpenProb: 0.00377696
-# insOpenProb: 0.00430867
-# delExtendProb: 0.495475
-# insExtendProb: 0.428983
-
# substitution percent identity: 72.4132
# count matrix (query letters = columns, reference letters = rows):
@@ -700,6 +709,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0426736 0.00447948 0.145305 0.0294792
# T 0.0260542 0.0222039 0.0130435 0.216761
+# matchProb: 0.991659
+# delOpenProb: 0.00377696
+# insOpenProb: 0.00430867
+# delExtendProb: 0.495475
+# insExtendProb: 0.428983
+# endProb: 0.999872
+
# delExistCost: 506
# insExistCost: 470
# delExtendCost: 64
@@ -721,12 +737,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.98244
# mean insert size: 1.75262
-# matchProb: 0.991719
-# delOpenProb: 0.0037497
-# insOpenProb: 0.0042757
-# delExtendProb: 0.495572
-# insExtendProb: 0.429424
-
# substitution percent identity: 72.4062
# count matrix (query letters = columns, reference letters = rows):
@@ -743,6 +753,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0426699 0.00448916 0.145293 0.0295091
# T 0.0260528 0.0222023 0.0130456 0.216738
+# matchProb: 0.991719
+# delOpenProb: 0.0037497
+# insOpenProb: 0.0042757
+# delExtendProb: 0.495572
+# insExtendProb: 0.429424
+# endProb: 0.999872
+
# delExistCost: 507
# insExistCost: 472
# delExtendCost: 64
@@ -764,12 +781,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.98566
# mean insert size: 1.75772
-# matchProb: 0.991738
-# delOpenProb: 0.00374338
-# insOpenProb: 0.004263
-# delExtendProb: 0.496389
-# insExtendProb: 0.431082
-
# substitution percent identity: 72.4067
# count matrix (query letters = columns, reference letters = rows):
@@ -786,6 +797,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0426705 0.00448915 0.145293 0.0295093
# T 0.026053 0.0222027 0.0130421 0.21674
+# matchProb: 0.991738
+# delOpenProb: 0.00374338
+# insOpenProb: 0.004263
+# delExtendProb: 0.496389
+# insExtendProb: 0.431082
+# endProb: 0.999872
+
# delExistCost: 508
# insExistCost: 472
# delExtendCost: 63
@@ -807,12 +825,6 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# alignments: 3
# mean delete size: 1.98907
# mean insert size: 1.75546
-# matchProb: 0.991725
-# delOpenProb: 0.00374334
-# insOpenProb: 0.00427572
-# delExtendProb: 0.497253
-# insExtendProb: 0.430347
-
# substitution percent identity: 72.4067
# count matrix (query letters = columns, reference letters = rows):
@@ -829,6 +841,13 @@ TEST last-train -m1 -C2 --revsym /tmp/last-train-test ../examples/mouseMito.fa
# G 0.0426705 0.00448915 0.145294 0.029509
# T 0.026053 0.0222028 0.0130421 0.21674
+# matchProb: 0.991725
+# delOpenProb: 0.00374334
+# insOpenProb: 0.00427572
+# delExtendProb: 0.497253
+# insExtendProb: 0.430347
+# endProb: 0.999872
+
# delExistCost: 508
# insExistCost: 472
# delExtendCost: 63
@@ -871,12 +890,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 3
# mean delete size: 1.87622
# mean insert size: 1.77096
-# matchProb: 0.964909
-# delOpenProb: 0.0174196
-# insOpenProb: 0.0174196
-# delExtendProb: 0.451591
-# insExtendProb: 0.451591
-
# substitution percent identity: 75.1959
# count matrix (query letters = columns, reference letters = rows):
@@ -893,6 +906,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0171139 0.00490905 0.101869 0.00491296
# T 0.0230148 0.0403461 0.00491296 0.198356
+# matchProb: 0.964909
+# delOpenProb: 0.0174196
+# insOpenProb: 0.0174196
+# delExtendProb: 0.451591
+# insExtendProb: 0.451591
+# endProb: 0.999874
+
# delExistCost: 353
# insExistCost: 353
# delExtendCost: 70
@@ -914,12 +934,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 3
# mean delete size: 1.85041
# mean insert size: 1.75796
-# matchProb: 0.982371
-# delOpenProb: 0.00868684
-# insOpenProb: 0.00868684
-# delExtendProb: 0.445361
-# insExtendProb: 0.445361
-
# substitution percent identity: 73.5727
# count matrix (query letters = columns, reference letters = rows):
@@ -936,6 +950,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0186794 0.00430299 0.101447 0.00405408
# T 0.0237533 0.0449419 0.00405408 0.193941
+# matchProb: 0.982371
+# delOpenProb: 0.00868684
+# insOpenProb: 0.00868684
+# delExtendProb: 0.445361
+# insExtendProb: 0.445361
+# endProb: 0.999872
+
# delExistCost: 413
# insExistCost: 413
# delExtendCost: 73
@@ -957,12 +978,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 3
# mean delete size: 1.87231
# mean insert size: 1.74329
-# matchProb: 0.989059
-# delOpenProb: 0.00534266
-# insOpenProb: 0.00534266
-# delExtendProb: 0.446086
-# insExtendProb: 0.446086
-
# substitution percent identity: 72.7368
# count matrix (query letters = columns, reference letters = rows):
@@ -979,6 +994,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0191525 0.00450126 0.100525 0.00419646
# T 0.025066 0.0458016 0.00419646 0.192005
+# matchProb: 0.989059
+# delOpenProb: 0.00534266
+# insOpenProb: 0.00534266
+# delExtendProb: 0.446086
+# insExtendProb: 0.446086
+# endProb: 0.999872
+
# delExistCost: 457
# insExistCost: 457
# delExtendCost: 73
@@ -1000,12 +1022,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 3
# mean delete size: 1.9141
# mean insert size: 1.75894
-# matchProb: 0.991273
-# delOpenProb: 0.00423585
-# insOpenProb: 0.00423585
-# delExtendProb: 0.454422
-# insExtendProb: 0.454422
-
# substitution percent identity: 72.3952
# count matrix (query letters = columns, reference letters = rows):
@@ -1022,6 +1038,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0191736 0.00462203 0.100304 0.00435878
# T 0.0257083 0.0460427 0.00435878 0.190513
+# matchProb: 0.991273
+# delOpenProb: 0.00423585
+# insOpenProb: 0.00423585
+# delExtendProb: 0.454422
+# insExtendProb: 0.454422
+# endProb: 0.999872
+
# delExistCost: 482
# insExistCost: 482
# delExtendCost: 71
@@ -1043,12 +1066,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 4
# mean delete size: 1.95468
# mean insert size: 1.76157
-# matchProb: 0.992236
-# delOpenProb: 0.00372153
-# insOpenProb: 0.00372153
-# delExtendProb: 0.460683
-# insExtendProb: 0.460683
-
# substitution percent identity: 72.3763
# count matrix (query letters = columns, reference letters = rows):
@@ -1065,6 +1082,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0190254 0.00463843 0.100396 0.00441427
# T 0.0258439 0.0459483 0.00441427 0.19076
+# matchProb: 0.992236
+# delOpenProb: 0.00372153
+# insOpenProb: 0.00372153
+# delExtendProb: 0.460683
+# insExtendProb: 0.460683
+# endProb: 0.999839
+
# delExistCost: 495
# insExistCost: 495
# delExtendCost: 70
@@ -1086,12 +1110,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 4
# mean delete size: 1.97143
# mean insert size: 1.77121
-# matchProb: 0.992504
-# delOpenProb: 0.00358761
-# insOpenProb: 0.00358761
-# delExtendProb: 0.464405
-# insExtendProb: 0.464405
-
# substitution percent identity: 72.331
# count matrix (query letters = columns, reference letters = rows):
@@ -1108,6 +1126,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0190181 0.00465221 0.100285 0.00444288
# T 0.0258923 0.0460323 0.00444288 0.190605
+# matchProb: 0.992504
+# delOpenProb: 0.00358761
+# insOpenProb: 0.00358761
+# delExtendProb: 0.464405
+# insExtendProb: 0.464405
+# endProb: 0.999839
+
# delExistCost: 500
# insExistCost: 500
# delExtendCost: 69
@@ -1129,12 +1154,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 4
# mean delete size: 1.98307
# mean insert size: 1.77908
-# matchProb: 0.99265
-# delOpenProb: 0.00351446
-# insOpenProb: 0.00351446
-# delExtendProb: 0.467134
-# insExtendProb: 0.467134
-
# substitution percent identity: 72.3022
# count matrix (query letters = columns, reference letters = rows):
@@ -1151,6 +1170,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0190127 0.00465733 0.100192 0.00444515
# T 0.0259897 0.0459912 0.00444515 0.190545
+# matchProb: 0.99265
+# delOpenProb: 0.00351446
+# insOpenProb: 0.00351446
+# delExtendProb: 0.467134
+# insExtendProb: 0.467134
+# endProb: 0.999839
+
# delExistCost: 503
# insExistCost: 503
# delExtendCost: 69
@@ -1172,12 +1198,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 4
# mean delete size: 1.98706
# mean insert size: 1.77931
-# matchProb: 0.992718
-# delOpenProb: 0.00348055
-# insOpenProb: 0.00348055
-# delExtendProb: 0.467697
-# insExtendProb: 0.467697
-
# substitution percent identity: 72.2973
# count matrix (query letters = columns, reference letters = rows):
@@ -1194,6 +1214,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.0190138 0.00466392 0.100196 0.00445254
# T 0.0259982 0.0459905 0.00445254 0.190546
+# matchProb: 0.992718
+# delOpenProb: 0.00348055
+# insOpenProb: 0.00348055
+# delExtendProb: 0.467697
+# insExtendProb: 0.467697
+# endProb: 0.999839
+
# delExistCost: 504
# insExistCost: 504
# delExtendCost: 69
@@ -1215,12 +1242,6 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# alignments: 4
# mean delete size: 1.98732
# mean insert size: 1.77849
-# matchProb: 0.992745
-# delOpenProb: 0.00346706
-# insOpenProb: 0.00346706
-# delExtendProb: 0.467604
-# insExtendProb: 0.467604
-
# substitution percent identity: 72.2948
# count matrix (query letters = columns, reference letters = rows):
@@ -1237,6 +1258,13 @@ TEST last-train -m1 -k16 --matsym --gapsym /tmp/last-train-test ../examples/mous
# G 0.01902 0.00466307 0.100192 0.00445642
# T 0.0260012 0.0459892 0.00445642 0.19054
+# matchProb: 0.992745
+# delOpenProb: 0.00346706
+# insOpenProb: 0.00346706
+# delExtendProb: 0.467604
+# insExtendProb: 0.467604
+# endProb: 0.999839
+
# delExistCost: 504
# insExistCost: 504
# delExtendCost: 69
@@ -1279,12 +1307,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 42
# mean delete size: 1.74792
# mean insert size: 1.80972
-# matchProb: 0.984026
-# delOpenProb: 0.000500826
-# insOpenProb: 0.000453786
-# delExtendProb: 0.427891
-# insExtendProb: 0.447427
-
# substitution percent identity: 92.2405
# count matrix (query letters = columns, reference letters = rows):
@@ -1301,6 +1323,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000836933 0.000414928 0.294448 0.00115497
# T 0.00077815 0.000464734 0.000483526 0.310307
+# matchProb: 0.984026
+# delOpenProb: 0.000500826
+# insOpenProb: 0.000453786
+# delExtendProb: 0.427891
+# insExtendProb: 0.447427
+# endProb: 0.992461
+
# delExistCost: 639
# insExistCost: 653
# delExtendCost: 73
@@ -1322,12 +1351,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.92029
# mean insert size: 1.96852
-# matchProb: 0.987603
-# delOpenProb: 0.000161607
-# insOpenProb: 0.000152944
-# delExtendProb: 0.479244
-# insExtendProb: 0.492005
-
# substitution percent identity: 77.3599
# count matrix (query letters = columns, reference letters = rows):
@@ -1344,6 +1367,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.00041627 0.000304489 0.182988 0.000415098
# T 0.000343844 0.000350039 0.00030737 0.269468
+# matchProb: 0.987603
+# delOpenProb: 0.000161607
+# insOpenProb: 0.000152944
+# delExtendProb: 0.479244
+# insExtendProb: 0.492005
+# endProb: 0.99394
+
# delExistCost: 755
# insExistCost: 765
# delExtendCost: 64
@@ -1365,12 +1395,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.98911
# mean insert size: 1.99145
-# matchProb: 0.987578
-# delOpenProb: 0.000136313
-# insOpenProb: 0.000135176
-# delExtendProb: 0.497264
-# insExtendProb: 0.497852
-
# substitution percent identity: 77.0026
# count matrix (query letters = columns, reference letters = rows):
@@ -1387,6 +1411,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000396203 0.00032824 0.184417 0.000320187
# T 0.000283261 0.000400239 0.000328767 0.268577
+# matchProb: 0.987578
+# delOpenProb: 0.000136313
+# insOpenProb: 0.000135176
+# delExtendProb: 0.497264
+# insExtendProb: 0.497852
+# endProb: 0.993906
+
# delExistCost: 777
# insExistCost: 778
# delExtendCost: 60
@@ -1408,12 +1439,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99494
# mean insert size: 1.9932
-# matchProb: 0.987582
-# delOpenProb: 0.000135467
-# insOpenProb: 0.000134419
-# delExtendProb: 0.498731
-# insExtendProb: 0.498294
-
# substitution percent identity: 76.9893
# count matrix (query letters = columns, reference letters = rows):
@@ -1430,6 +1455,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000393193 0.000341364 0.184406 0.000296234
# T 0.000269104 0.000464531 0.000334926 0.268564
+# matchProb: 0.987582
+# delOpenProb: 0.000135467
+# insOpenProb: 0.000134419
+# delExtendProb: 0.498731
+# insExtendProb: 0.498294
+# endProb: 0.993907
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
@@ -1451,12 +1483,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99489
# mean insert size: 1.99325
-# matchProb: 0.987582
-# delOpenProb: 0.000135378
-# insOpenProb: 0.000134413
-# delExtendProb: 0.49872
-# insExtendProb: 0.498306
-
# substitution percent identity: 76.9857
# count matrix (query letters = columns, reference letters = rows):
@@ -1473,6 +1499,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000392534 0.00034512 0.184382 0.00028924
# T 0.000265772 0.000507 0.000337057 0.268515
+# matchProb: 0.987582
+# delOpenProb: 0.000135378
+# insOpenProb: 0.000134413
+# delExtendProb: 0.49872
+# insExtendProb: 0.498306
+# endProb: 0.993908
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
@@ -1494,12 +1527,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99487
# mean insert size: 1.99324
-# matchProb: 0.987582
-# delOpenProb: 0.00013538
-# insOpenProb: 0.000134414
-# delExtendProb: 0.498715
-# insExtendProb: 0.498304
-
# substitution percent identity: 76.9838
# count matrix (query letters = columns, reference letters = rows):
@@ -1516,6 +1543,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000392531 0.000345957 0.184379 0.000287251
# T 0.000264875 0.000535695 0.000337017 0.268485
+# matchProb: 0.987582
+# delOpenProb: 0.00013538
+# insOpenProb: 0.000134414
+# delExtendProb: 0.498715
+# insExtendProb: 0.498304
+# endProb: 0.993908
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
@@ -1537,12 +1571,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99489
# mean insert size: 1.99319
-# matchProb: 0.98758
-# delOpenProb: 0.000135377
-# insOpenProb: 0.000134412
-# delExtendProb: 0.49872
-# insExtendProb: 0.498291
-
# substitution percent identity: 76.9819
# count matrix (query letters = columns, reference letters = rows):
@@ -1559,6 +1587,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000392503 0.000345765 0.184398 0.000286342
# T 0.000264096 0.000550139 0.000336968 0.268448
+# matchProb: 0.98758
+# delOpenProb: 0.000135377
+# insOpenProb: 0.000134412
+# delExtendProb: 0.49872
+# insExtendProb: 0.498291
+# endProb: 0.993907
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
@@ -1580,12 +1615,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99483
# mean insert size: 1.99314
-# matchProb: 0.987582
-# delOpenProb: 0.000135373
-# insOpenProb: 0.000134409
-# delExtendProb: 0.498704
-# insExtendProb: 0.498278
-
# substitution percent identity: 76.982
# count matrix (query letters = columns, reference letters = rows):
@@ -1602,6 +1631,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000392526 0.000345925 0.184403 0.000286342
# T 0.00026411 0.000561859 0.000337017 0.268443
+# matchProb: 0.987582
+# delOpenProb: 0.000135373
+# insOpenProb: 0.000134409
+# delExtendProb: 0.498704
+# insExtendProb: 0.498278
+# endProb: 0.993907
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
@@ -1623,12 +1659,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99482
# mean insert size: 1.99314
-# matchProb: 0.987582
-# delOpenProb: 0.000135373
-# insOpenProb: 0.000134409
-# delExtendProb: 0.498703
-# insExtendProb: 0.49828
-
# substitution percent identity: 76.9814
# count matrix (query letters = columns, reference letters = rows):
@@ -1645,6 +1675,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000392529 0.000345929 0.184404 0.000286344
# T 0.00026411 0.000566013 0.000337016 0.268432
+# matchProb: 0.987582
+# delOpenProb: 0.000135373
+# insOpenProb: 0.000134409
+# delExtendProb: 0.498703
+# insExtendProb: 0.49828
+# endProb: 0.993907
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
@@ -1666,12 +1703,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99483
# mean insert size: 1.99314
-# matchProb: 0.987582
-# delOpenProb: 0.000135373
-# insOpenProb: 0.000134409
-# delExtendProb: 0.498704
-# insExtendProb: 0.49828
-
# substitution percent identity: 76.9811
# count matrix (query letters = columns, reference letters = rows):
@@ -1688,6 +1719,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000392541 0.000345929 0.184404 0.000286343
# T 0.000264103 0.00057002 0.000337013 0.268431
+# matchProb: 0.987582
+# delOpenProb: 0.000135373
+# insOpenProb: 0.000134409
+# delExtendProb: 0.498704
+# insExtendProb: 0.49828
+# endProb: 0.993907
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
@@ -1709,12 +1747,6 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# alignments: 94
# mean delete size: 1.99491
# mean insert size: 1.99314
-# matchProb: 0.987582
-# delOpenProb: 0.000135384
-# insOpenProb: 0.000134409
-# delExtendProb: 0.498724
-# insExtendProb: 0.498279
-
# substitution percent identity: 76.9808
# count matrix (query letters = columns, reference letters = rows):
@@ -1731,6 +1763,13 @@ TEST last-train -Q1 /tmp/last-train-test bs100.fastq
# G 0.000392541 0.000345932 0.184403 0.000286356
# T 0.0002641 0.000573637 0.000337007 0.26843
+# matchProb: 0.987582
+# delOpenProb: 0.000135384
+# insOpenProb: 0.000134409
+# delExtendProb: 0.498724
+# insExtendProb: 0.498279
+# endProb: 0.993907
+
# delExistCost: 778
# insExistCost: 778
# delExtendCost: 60
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/e54f9ef3844a2987badd28b0e814d83606ca29f6
--
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/e54f9ef3844a2987badd28b0e814d83606ca29f6
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230824/e97f4a9c/attachment-0001.htm>
More information about the debian-med-commit
mailing list