[med-svn] [last-align] 01/03: Imported Upstream version 746
Charles Plessy
plessy at moszumanska.debian.org
Fri Jul 15 04:35:24 UTC 2016
This is an automated email from the git hooks/post-receive script.
plessy pushed a commit to branch master
in repository last-align.
commit d1b9bf48ecde01dfd16f8942016c1b8c736a06e9
Author: Charles Plessy <plessy at debian.org>
Date: Fri Jul 15 13:31:29 2016 +0900
Imported Upstream version 746
---
ChangeLog.txt | 51 ++++++++++++++++++-
src/Centroid.cc | 110 ++++++++++++++++++++--------------------
src/Centroid.hh | 17 ++++---
src/GappedXdropAligner.cc | 29 ++++-------
src/GappedXdropAligner.hh | 35 +++++++------
src/GappedXdropAligner2qual.cc | 6 +--
src/GappedXdropAligner3frame.cc | 12 ++---
src/GappedXdropAlignerPssm.cc | 6 +--
src/LastalArguments.cc | 15 +++---
src/SubsetSuffixArraySearch.cc | 3 +-
src/alp/sls_alp_data.cpp | 20 +-------
src/alp/sls_alp_data.hpp | 4 --
src/alp/sls_basic.cpp | 26 +++++++++-
src/alp/sls_basic.hpp | 4 +-
src/alp/sls_fsa1.cpp | 19 +------
src/alp/sls_fsa1_utils.cpp | 18 +------
src/alp/sls_fsa1_utils.hpp | 6 ---
src/lastal.cc | 3 ++
src/version.hh | 2 +-
19 files changed, 202 insertions(+), 184 deletions(-)
diff --git a/ChangeLog.txt b/ChangeLog.txt
index 42d258b..214187c 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,8 +1,57 @@
+2016-07-05 Martin C. Frith <Martin C. Frith>
+
+ * src/SubsetSuffixArraySearch.cc:
+ Bugfix: lastal with -l0 was not reliable, I think.
+ [60738c7a24e3] [tip]
+
+ * src/LastalArguments.cc:
+ Allowed gap open costs < 0.
+ [4513e0b7e26d]
+
+2016-05-20 Martin C. Frith <Martin C. Frith>
+
+ * src/lastal.cc:
+ Maybe made lastal faster in some cases.
+ [bb03593936dc]
+
+ * src/GappedXdropAligner.hh, src/GappedXdropAligner3frame.cc:
+ Refactoring.
+ [788dd86f42f0]
+
+ * src/Centroid.cc, src/GappedXdropAligner.cc,
+ src/GappedXdropAligner.hh, src/GappedXdropAligner2qual.cc,
+ src/GappedXdropAlignerPssm.cc:
+ Refactoring.
+ [eea957b121e8]
+
+ * src/Centroid.cc:
+ Refactoring.
+ [db4de3a2e6a3]
+
+ * src/Centroid.hh, src/GappedXdropAligner.cc,
+ src/GappedXdropAligner.hh, src/GappedXdropAligner3frame.cc:
+ Refactoring.
+ [a317e3410f24]
+
+2016-05-19 Martin C. Frith <Martin C. Frith>
+
+ * src/Centroid.cc, src/Centroid.hh, src/GappedXdropAligner.cc,
+ src/GappedXdropAligner.hh, src/GappedXdropAligner3frame.cc:
+ Refactoring.
+ [12fa981e66d0]
+
+ * src/alp/sls_alp_data.cpp, src/alp/sls_alp_data.hpp,
+ src/alp/sls_basic.cpp, src/alp/sls_basic.hpp, src/alp/sls_fsa1.cpp,
+ src/alp/sls_fsa1_utils.cpp, src/alp/sls_fsa1_utils.hpp:
+ Tried to fix compile error on Cygwin (I don't fully understand
+ this).
+ [21d485934bef]
+
2016-03-31 Martin C. Frith <Martin C. Frith>
* doc/lastal.txt, src/LastalArguments.cc:
Tried to document lastal options more clearly.
- [9ecd0219342e] [tip]
+ [9ecd0219342e]
2016-03-30 Martin C. Frith <Martin C. Frith>
diff --git a/src/Centroid.cc b/src/Centroid.cc
index ba8866a..792ed45 100644
--- a/src/Centroid.cc
+++ b/src/Centroid.cc
@@ -96,8 +96,8 @@ namespace cbrc{
}
void Centroid::initForwardMatrix(){
- scale.assign ( numAntidiagonals, 1.0 ); // scaling
- std::size_t n = xa.scoreEndIndex( numAntidiagonals );
+ scale.assign ( numAntidiagonals + 2, 1.0 ); // scaling
+ size_t n = xa.scoreEndIndex( numAntidiagonals );
if ( fM.size() < n ) {
fM.resize( n );
@@ -111,12 +111,12 @@ namespace cbrc{
void Centroid::initBackwardMatrix(){
pp.resize( fM.size() );
- mD.assign( numAntidiagonals, 0.0 );
- mI.assign( numAntidiagonals, 0.0 );
- mX1.assign ( numAntidiagonals, 1.0 );
- mX2.assign ( numAntidiagonals, 1.0 );
+ mD.assign( numAntidiagonals + 2, 0.0 );
+ mI.assign( numAntidiagonals + 2, 0.0 );
+ mX1.assign ( numAntidiagonals + 2, 1.0 );
+ mX2.assign ( numAntidiagonals + 2, 1.0 );
- std::size_t n = xa.scoreEndIndex( numAntidiagonals );
+ size_t n = xa.scoreEndIndex( numAntidiagonals );
bM.assign( n, 0.0 );
bD.assign( n, 0.0 );
bI.assign( n, 0.0 );
@@ -169,26 +169,26 @@ namespace cbrc{
assert( gap.insExist == gap.delExist || eP <= 0.0 );
- for( size_t k = 2; k < numAntidiagonals; ++k ){ // loop over antidiagonals
+ for( size_t k = 0; k < numAntidiagonals; ++k ){ // loop over antidiagonals
double sum_f = 0.0; // sum of forward values
- const size_t seq1beg = xa.seq1start( k );
- const std::size_t seq2pos = k - 2 - seq1beg;
- const double scale12 = 1.0 / ( scale[k-1] * scale[k-2] );
- const double scale1 = 1.0 / scale[k-1];
+ const size_t seq1beg = seq1start( k );
+ const size_t seq2pos = k - seq1beg;
+ const double scale12 = 1.0 / ( scale[k+1] * scale[k] );
+ const double scale1 = 1.0 / scale[k+1];
const double seE = eE * scale1;
const double seEI = eEI * scale1;
const double seP = eP * scale12;
- const std::size_t scoreEnd = xa.scoreEndIndex( k );
+ const size_t scoreEnd = xa.scoreEndIndex( k );
double* fM0 = &fM[ scoreEnd ];
double* fD0 = &fD[ scoreEnd ];
double* fI0 = &fI[ scoreEnd ];
double* fP0 = &fP[ scoreEnd ];
- const std::size_t horiBeg = xa.hori( k, seq1beg );
- const std::size_t vertBeg = xa.vert( k, seq1beg );
- const std::size_t diagBeg = xa.diag( k, seq1beg );
+ const size_t horiBeg = xa.hori( k, seq1beg );
+ const size_t vertBeg = xa.vert( k, seq1beg );
+ const size_t diagBeg = xa.diag( k, seq1beg );
const double* fD1 = &fD[ horiBeg ];
const double* fI1 = &fI[ vertBeg ];
const double* fM2 = &fM[ diagBeg ];
@@ -270,12 +270,12 @@ namespace cbrc{
}
}
if( !globality ) Z += sum_f;
- scale[k] = sum_f + 1.0; // seems ugly
- Z /= scale[k]; // scaling
+ scale[k+2] = sum_f + 1.0; // seems ugly
+ Z /= scale[k+2]; // scaling
} // k
//std::cout << "# Z=" << Z << std::endl;
assert( Z > 0.0 );
- scale[ numAntidiagonals - 1 ] *= Z; // this causes scaled Z to equal 1
+ scale[ numAntidiagonals + 1 ] *= Z; // this causes scaled Z to equal 1
}
// added by M. Hamada
@@ -309,18 +309,18 @@ namespace cbrc{
assert( gap.insExist == gap.delExist || eP <= 0.0 );
- for( size_t k = numAntidiagonals-1; k > 1; --k ){
- const size_t seq1beg = xa.seq1start( k );
- const std::size_t seq2pos = k - 2 - seq1beg;
- const double scale12 = 1.0 / ( scale[k-1] * scale[k-2] );
- const double scale1 = 1.0 / scale[k-1];
- scaledUnit /= scale[k];
+ for( size_t k = numAntidiagonals; k-- > 0; ){
+ const size_t seq1beg = seq1start( k );
+ const size_t seq2pos = k - seq1beg;
+ const double scale12 = 1.0 / ( scale[k+1] * scale[k] );
+ const double scale1 = 1.0 / scale[k+1];
+ scaledUnit /= scale[k+2];
const double seE = eE * scale1;
const double seEI = eEI * scale1;
const double seP = eP * scale12;
- const std::size_t scoreEnd = xa.scoreEndIndex( k );
+ const size_t scoreEnd = xa.scoreEndIndex( k );
const double* bM0 = &bM[ scoreEnd + 1 ];
const double* bD0 = &bD[ scoreEnd + 1 ];
const double* bI0 = &bI[ scoreEnd + 1 ];
@@ -328,9 +328,9 @@ namespace cbrc{
double* pp0 = &pp[ scoreEnd ];
- const std::size_t horiBeg = xa.hori( k, seq1beg );
- const std::size_t vertBeg = xa.vert( k, seq1beg );
- const std::size_t diagBeg = xa.diag( k, seq1beg );
+ const size_t horiBeg = xa.hori( k, seq1beg );
+ const size_t vertBeg = xa.vert( k, seq1beg );
+ const size_t diagBeg = xa.diag( k, seq1beg );
double* bD1 = &bD[ horiBeg ];
double* bI1 = &bI[ vertBeg ];
double* bM2 = &bM[ diagBeg ];
@@ -503,11 +503,11 @@ namespace cbrc{
initDecodingMatrix();
- for( size_t k = 3; k < numAntidiagonals; ++k ){ // loop over antidiagonals
- const std::size_t scoreEnd = xa.scoreEndIndex( k );
+ for( size_t k = 1; k < numAntidiagonals; ++k ){ // loop over antidiagonals
+ const size_t scoreEnd = xa.scoreEndIndex( k );
double* X0 = &X[ scoreEnd ];
const double* P0 = &pp[ scoreEnd ];
- size_t cur = xa.seq1start( k );
+ size_t cur = seq1start( k );
const double* const x0end = X0 + xa.numCellsAndPads( k );
const double* X1 = &X[xa.hori(k, cur)];
@@ -536,7 +536,7 @@ namespace cbrc{
size_t i = bestPos1;
size_t oldPos1 = i;
- while( k > 2 ){
+ while( k > 0 ){
const int m =
maxIndex( diagx( X, k, i ) + ( gamma + 1 ) * cellx( pp, k, i ) - 1,
horix( X, k, i ),
@@ -545,8 +545,8 @@ namespace cbrc{
k -= 2;
i -= 1;
}
- if( (m > 0 && oldPos1 != i) || k == 2 ){
- chunks.push_back( SegmentPair( i, k - i - 2, oldPos1 - i ) );
+ if( (m > 0 && oldPos1 != i) || k == 0 ){
+ chunks.push_back( SegmentPair( i, k - i, oldPos1 - i ) );
}
if( m > 0 ){
k -= 1;
@@ -560,12 +560,12 @@ namespace cbrc{
initDecodingMatrix();
- for( size_t k = 3; k < numAntidiagonals; ++k ){ // loop over antidiagonals
- const std::size_t scoreEnd = xa.scoreEndIndex( k );
+ for( size_t k = 1; k < numAntidiagonals; ++k ){ // loop over antidiagonals
+ const size_t scoreEnd = xa.scoreEndIndex( k );
double* X0 = &X[ scoreEnd ];
const double* P0 = &pp[ scoreEnd ];
- size_t cur = xa.seq1start( k );
- size_t seq2pos = k - 2 - cur;
+ size_t cur = seq1start( k );
+ size_t seq2pos = k - cur;
const double* const x0end = X0 + xa.numCellsAndPads( k );
const double* X1 = &X[ xa.hori(k, cur) ];
@@ -597,8 +597,8 @@ namespace cbrc{
size_t i = bestPos1;
size_t oldPos1 = i;
- while( k > 2 ){
- const size_t j = k - i - 2;
+ while( k > 0 ){
+ const size_t j = k - i;
const double s = 2 * gamma * cellx( pp, k, i ) - ( mX1[ i ] + mX2[ j ] );
const double t = gamma * mI[ j ] - mX2[ j ];
const double u = gamma * mD[ i ] - mX1[ i ];
@@ -610,8 +610,8 @@ namespace cbrc{
k -= 2;
i -= 1;
}
- if( (m > 0 && oldPos1 != i) || k == 2 ){
- chunks.push_back( SegmentPair( i, k - i - 2, oldPos1 - i ) );
+ if( (m > 0 && oldPos1 != i) || k == 0 ){
+ chunks.push_back( SegmentPair( i, k - i, oldPos1 - i ) );
}
if( m > 0 ){
k -= 1;
@@ -653,7 +653,7 @@ namespace cbrc{
size_t seq2pos = i->end2();
for( size_t j = 0; j < i->size; ++j ){
- double p = cellx( pp, seq1pos + seq2pos + 2, seq1pos );
+ double p = cellx( pp, seq1pos + seq2pos, seq1pos );
ambiguityCodes.push_back( asciiProbability(p) );
--seq1pos;
--seq2pos;
@@ -678,8 +678,8 @@ namespace cbrc{
double Centroid::logPartitionFunction() const{
double x = 0.0;
- for( std::size_t k = 2; k < numAntidiagonals; ++k ){
- x += std::log( scale[k] );
+ for( size_t k = 0; k < numAntidiagonals; ++k ){
+ x += std::log( scale[k+2] );
}
return T * x;
}
@@ -708,11 +708,11 @@ namespace cbrc{
assert( gap.insExist == gap.delExist || eP <= 0.0 );
- for( size_t k = 2; k < numAntidiagonals; ++k ){ // loop over antidiagonals
- const size_t seq1beg = xa.seq1start( k );
- const std::size_t seq2pos = k - 2 - seq1beg;
- const double scale12 = 1.0 / ( scale[k-1] * scale[k-2] );
- const double scale1 = 1.0 / scale[k-1];
+ for( size_t k = 0; k < numAntidiagonals; ++k ){ // loop over antidiagonals
+ const size_t seq1beg = seq1start( k );
+ const size_t seq2pos = k - seq1beg;
+ const double scale12 = 1.0 / ( scale[k+1] * scale[k] );
+ const double scale1 = 1.0 / scale[k+1];
const double seE = eE * scale1;
const double seEI = eEI * scale1;
@@ -721,15 +721,15 @@ namespace cbrc{
const uchar* s1 = seqPtr( seq1, isForward, seq1beg );
const uchar* s2 = seqPtr( seq2, isForward, seq2pos );
- const std::size_t scoreEnd = xa.scoreEndIndex( k );
+ const size_t scoreEnd = xa.scoreEndIndex( k );
const double* bM0 = &bM[ scoreEnd + 1 ];
const double* bD0 = &bD[ scoreEnd + 1 ];
const double* bI0 = &bI[ scoreEnd + 1 ];
const double* bP0 = &bP[ scoreEnd + 1 ];
- const std::size_t horiBeg = xa.hori( k, seq1beg );
- const std::size_t vertBeg = xa.vert( k, seq1beg );
- const std::size_t diagBeg = xa.diag( k, seq1beg );
+ const size_t horiBeg = xa.hori( k, seq1beg );
+ const size_t vertBeg = xa.vert( k, seq1beg );
+ const size_t diagBeg = xa.diag( k, seq1beg );
const double* fD1 = &fD[ horiBeg ];
const double* fI1 = &fI[ vertBeg ];
const double* fM2 = &fM[ diagBeg ];
diff --git a/src/Centroid.hh b/src/Centroid.hh
index e597c4d..14cef9e 100644
--- a/src/Centroid.hh
+++ b/src/Centroid.hh
@@ -7,7 +7,6 @@
#include "GeneralizedAffineGapCosts.hh"
#include "SegmentPair.hh"
#include "OneQualityScoreMatrix.hh"
-#include <cassert>
#include <stddef.h> // size_t
#include <vector>
#include <iostream> // for debug
@@ -123,30 +122,34 @@ namespace cbrc{
void updateScore( double score, size_t antiDiagonal, size_t cur );
+ // start of the x-drop region (i.e. number of skipped seq1 letters
+ // before the x-drop region) for this antidiagonal
+ size_t seq1start( size_t antidiagonal ) const {
+ return xa.scoreEndIndex( antidiagonal ) - xa.scoreOrigin( antidiagonal );
+ }
+
// get DP matrix value at the given position
double cellx( const dvec_t& matrix,
size_t antiDiagonal, size_t seq1pos ) const{
- return matrix[ xa.scoreEndIndex( antiDiagonal ) + seq1pos - xa.seq1start( antiDiagonal ) ];
+ return matrix[ xa.scoreOrigin( antiDiagonal ) + seq1pos ];
}
// get DP matrix value "left of" the given position
double horix( const dvec_t& matrix,
size_t antiDiagonal, size_t seq1pos ) const{
- assert( antiDiagonal > 0 );
- return cellx( matrix, antiDiagonal-1, seq1pos );
+ return matrix[ xa.hori( antiDiagonal, seq1pos ) ];
}
// get DP matrix value "above" the given position
double vertx( const dvec_t& matrix,
size_t antiDiagonal, size_t seq1pos ) const{
- assert( antiDiagonal > 0 );
- return cellx( matrix, antiDiagonal-1, seq1pos+1 );
+ return matrix[ xa.vert( antiDiagonal, seq1pos ) ];
}
// get DP matrix value "diagonal from" the given position
double diagx( const dvec_t& matrix,
size_t antiDiagonal, size_t seq1pos ) const{
- return cellx( matrix, antiDiagonal-2, seq1pos );
+ return matrix[ xa.diag( antiDiagonal, seq1pos ) ];
}
// get a pointer into a sequence, taking start and direction into account
diff --git a/src/GappedXdropAligner.cc b/src/GappedXdropAligner.cc
index 91d9f41..4c8a532 100644
--- a/src/GappedXdropAligner.cc
+++ b/src/GappedXdropAligner.cc
@@ -55,7 +55,7 @@ namespace cbrc {
// Puts 2 "dummy" antidiagonals at the start, so that we can safely
// look-back from subsequent antidiagonals.
void GappedXdropAligner::init() {
- seq1starts.resize(0);
+ scoreOrigins.resize(0);
scoreEnds.resize(1);
initAntidiagonal(0, 0, 0);
@@ -68,21 +68,15 @@ void GappedXdropAligner::init() {
yScores[1] = -INF;
zScores[1] = -INF;
- bestAntidiagonal = 2;
+ bestAntidiagonal = 0;
}
void GappedXdropAligner::initAntidiagonal(std::size_t seq1beg,
std::size_t scoreEnd,
std::size_t numCells) {
+ scoreOrigins.push_back(scoreEnd - seq1beg);
std::size_t newEnd = scoreEnd + numCells + 1; // + 1 pad cell
-
- if (xScores.size() < newEnd) {
- xScores.resize(newEnd);
- yScores.resize(newEnd);
- zScores.resize(newEnd);
- }
-
- seq1starts.push_back(seq1beg);
+ resizeScoresIfSmaller(newEnd);
scoreEnds.push_back(newEnd);
}
@@ -108,12 +102,12 @@ int GappedXdropAligner::align(const uchar *seq1,
int bestScore = 0;
int bestEdgeScore = -INF;
- std::size_t bestEdgeAntidiagonal = 2;
+ std::size_t bestEdgeAntidiagonal = 0;
std::size_t bestEdgeSeq1position = 0;
init();
- for (std::size_t antidiagonal = 2; /* noop */; ++antidiagonal) {
+ for (std::size_t antidiagonal = 0; /* noop */; ++antidiagonal) {
std::size_t seq1beg = std::min(maxSeq1begs[0], maxSeq1begs[1]);
std::size_t seq1end = std::max(minSeq1ends[0], minSeq1ends[1]);
@@ -124,7 +118,7 @@ int GappedXdropAligner::align(const uchar *seq1,
initAntidiagonal(seq1beg, scoreEnd, numCells);
- std::size_t seq2pos = antidiagonal - 2 - seq1beg;
+ std::size_t seq2pos = antidiagonal - seq1beg;
const uchar *s1 = isForward ? seq1 + seq1beg : seq1 - seq1beg - 1;
const uchar *s2 = isForward ? seq2 + seq2pos : seq2 - seq2pos - 1;
@@ -243,18 +237,17 @@ bool GappedXdropAligner::getNextChunk(std::size_t &end1,
int insExistenceCost,
int insExtensionCost,
int gapUnalignedCost) {
- if (bestAntidiagonal == 2) return false;
+ if (bestAntidiagonal == 0) return false;
end1 = bestSeq1position;
- end2 = bestAntidiagonal - 2 - bestSeq1position;
+ end2 = bestAntidiagonal - bestSeq1position;
const std::size_t undefined = -1;
length = undefined;
int state = 0;
while (1) {
- assert(bestAntidiagonal >= 2);
- assert(bestSeq1position <= bestAntidiagonal - 2);
+ assert(bestSeq1position <= bestAntidiagonal);
std::size_t h = hori(bestAntidiagonal, bestSeq1position);
std::size_t v = vert(bestAntidiagonal, bestSeq1position);
@@ -278,7 +271,7 @@ bool GappedXdropAligner::getNextChunk(std::size_t &end1,
state = maxIndex(x, y, z, a, b);
- if (length == undefined && (state > 0 || bestAntidiagonal == 2)) {
+ if (length == undefined && (state > 0 || bestAntidiagonal == 0)) {
length = end1 - bestSeq1position;
assert(length != undefined);
}
diff --git a/src/GappedXdropAligner.hh b/src/GappedXdropAligner.hh
index 8103344..5a0607f 100644
--- a/src/GappedXdropAligner.hh
+++ b/src/GappedXdropAligner.hh
@@ -165,57 +165,62 @@ class GappedXdropAligner {
// The next 4 functions are for use by Centroid. If the Centroid
// code gets updated, it might make sense to change these functions too.
- // The number of antidiagonals, including dummy ones at the beginning.
+ // The number of antidiagonals, excluding dummy ones at the beginning.
std::size_t numAntidiagonals() const
- { return seq1starts.size(); }
+ { return scoreOrigins.size() - 2; }
- std::size_t seq1start(std::size_t antidiagonal) const
- { return seq1starts[antidiagonal]; }
+ std::size_t scoreOrigin(std::size_t antidiagonal) const
+ { return scoreOrigins[antidiagonal + 2]; }
std::size_t numCellsAndPads(std::size_t antidiagonal) const
- { return scoreEnds[antidiagonal + 1] - scoreEnds[antidiagonal]; }
+ { return scoreEnds[antidiagonal + 3] - scoreEnds[antidiagonal + 2]; }
std::size_t scoreEndIndex(std::size_t antidiagonal) const
- { return scoreEnds[antidiagonal]; }
+ { return scoreEnds[antidiagonal + 2]; }
// The index in the score vectors, of the previous "horizontal" cell.
std::size_t hori(std::size_t antidiagonal, std::size_t seq1coordinate) const
- { return scoreBase(antidiagonal - 1) + seq1coordinate; }
+ { return scoreOrigins[antidiagonal + 1] + seq1coordinate; }
// The index in the score vectors, of the previous "vertical" cell.
std::size_t vert(std::size_t antidiagonal, std::size_t seq1coordinate) const
- { return scoreBase(antidiagonal - 1) + seq1coordinate + 1; }
+ { return scoreOrigins[antidiagonal + 1] + seq1coordinate + 1; }
// The index in the score vectors, of the previous "diagonal" cell.
std::size_t diag(std::size_t antidiagonal, std::size_t seq1coordinate) const
- { return scoreBase(antidiagonal - 2) + seq1coordinate; }
+ { return scoreOrigins[antidiagonal] + seq1coordinate; }
// The index in the score vectors, of the previous in-frame horizontal cell.
std::size_t hori3(std::size_t antidiagonal, std::size_t seq1coordinate) const
- { return scoreBase(antidiagonal - 3) + seq1coordinate + 1; }
+ { return scoreOrigins[antidiagonal - 3] + seq1coordinate; }
// The index in the score vectors, of the previous in-frame vertical cell.
std::size_t vert3(std::size_t antidiagonal, std::size_t seq1coordinate) const
- { return scoreBase(antidiagonal - 3) + seq1coordinate + 2; }
+ { return scoreOrigins[antidiagonal - 3] + seq1coordinate + 1; }
// The index in the score vectors, of the previous in-frame diagonal cell.
std::size_t diag3(std::size_t antidiagonal, std::size_t seq1coordinate) const
- { return scoreBase(antidiagonal - 6) + seq1coordinate + 1; }
+ { return scoreOrigins[antidiagonal - 6] + seq1coordinate; }
private:
std::vector<int> xScores; // best score ending with aligned letters
std::vector<int> yScores; // best score ending with insertion in seq1
std::vector<int> zScores; // best score ending with insertion in seq2
- std::vector<std::size_t> seq1starts; // seq1 start pos for each antidiagonal
+ std::vector<std::size_t> scoreOrigins; // score origin for each antidiagonal
std::vector<std::size_t> scoreEnds; // score end pos for each antidiagonal
// Our position during the trace-back:
std::size_t bestAntidiagonal;
std::size_t bestSeq1position;
- std::size_t scoreBase(std::size_t antidiagonal) const
- { return scoreEnds[antidiagonal] - seq1starts[antidiagonal]; }
+ void resizeScoresIfSmaller(std::size_t size) {
+ if (xScores.size() < size) {
+ xScores.resize(size);
+ yScores.resize(size);
+ zScores.resize(size);
+ }
+ }
void init();
diff --git a/src/GappedXdropAligner2qual.cc b/src/GappedXdropAligner2qual.cc
index e42c4f5..00c1559 100644
--- a/src/GappedXdropAligner2qual.cc
+++ b/src/GappedXdropAligner2qual.cc
@@ -34,12 +34,12 @@ int GappedXdropAligner::align2qual(const uchar *seq1,
int bestScore = 0;
int bestEdgeScore = -INF;
- std::size_t bestEdgeAntidiagonal = 2;
+ std::size_t bestEdgeAntidiagonal = 0;
std::size_t bestEdgeSeq1position = 0;
init();
- for (std::size_t antidiagonal = 2; /* noop */; ++antidiagonal) {
+ for (std::size_t antidiagonal = 0; /* noop */; ++antidiagonal) {
std::size_t seq1beg = std::min(maxSeq1begs[0], maxSeq1begs[1]);
std::size_t seq1end = std::max(minSeq1ends[0], minSeq1ends[1]);
@@ -50,7 +50,7 @@ int GappedXdropAligner::align2qual(const uchar *seq1,
initAntidiagonal(seq1beg, scoreEnd, numCells);
- std::size_t seq2pos = antidiagonal - 2 - seq1beg;
+ std::size_t seq2pos = antidiagonal - seq1beg;
const uchar *s1 = isForward ? seq1 + seq1beg : seq1 - seq1beg - 1;
const uchar *q1 = isForward ? qual1 + seq1beg : qual1 - seq1beg - 1;
diff --git a/src/GappedXdropAligner3frame.cc b/src/GappedXdropAligner3frame.cc
index ab0d70b..87d428a 100644
--- a/src/GappedXdropAligner3frame.cc
+++ b/src/GappedXdropAligner3frame.cc
@@ -59,7 +59,7 @@ namespace cbrc {
// Puts 7 "dummy" antidiagonals at the start, so that we can safely
// look-back from subsequent antidiagonals.
void GappedXdropAligner::init3() {
- seq1starts.resize(0);
+ scoreOrigins.resize(0);
scoreEnds.resize(1);
initAntidiagonal3(0, 0, 0);
@@ -82,15 +82,9 @@ void GappedXdropAligner::init3() {
void GappedXdropAligner::initAntidiagonal3(std::size_t seq1beg,
std::size_t scoreEnd,
std::size_t numCells) {
+ scoreOrigins.push_back(scoreEnd - seq1beg + 1);
std::size_t newEnd = scoreEnd + numCells + 2; // + 2 pad cells
-
- if (xScores.size() < newEnd) {
- xScores.resize(newEnd);
- yScores.resize(newEnd);
- zScores.resize(newEnd);
- }
-
- seq1starts.push_back(seq1beg);
+ resizeScoresIfSmaller(newEnd);
scoreEnds.push_back(newEnd);
}
diff --git a/src/GappedXdropAlignerPssm.cc b/src/GappedXdropAlignerPssm.cc
index e6f2ff4..6d776d3 100644
--- a/src/GappedXdropAlignerPssm.cc
+++ b/src/GappedXdropAlignerPssm.cc
@@ -26,12 +26,12 @@ int GappedXdropAligner::alignPssm(const uchar *seq,
int bestScore = 0;
int bestEdgeScore = -INF;
- std::size_t bestEdgeAntidiagonal = 2;
+ std::size_t bestEdgeAntidiagonal = 0;
std::size_t bestEdgeSeq1position = 0;
init();
- for (std::size_t antidiagonal = 2; /* noop */; ++antidiagonal) {
+ for (std::size_t antidiagonal = 0; /* noop */; ++antidiagonal) {
std::size_t seq1beg = std::min(maxSeq1begs[0], maxSeq1begs[1]);
std::size_t seq1end = std::max(minSeq1ends[0], minSeq1ends[1]);
@@ -42,7 +42,7 @@ int GappedXdropAligner::alignPssm(const uchar *seq,
initAntidiagonal(seq1beg, scoreEnd, numCells);
- std::size_t seq2pos = antidiagonal - 2 - seq1beg;
+ std::size_t seq2pos = antidiagonal - seq1beg;
const uchar *s1 = isForward ? seq + seq1beg : seq - seq1beg - 1;
const ScoreMatrixRow *s2 = isForward ? pssm + seq2pos : pssm - seq2pos - 1;
diff --git a/src/LastalArguments.cc b/src/LastalArguments.cc
index 6a568fa..82ecca1 100644
--- a/src/LastalArguments.cc
+++ b/src/LastalArguments.cc
@@ -9,6 +9,7 @@
#include <vector>
#include <stdexcept>
#include <cctype>
+#include <climits>
#include <cmath> // log
#include <cstring> // strtok
#include <cstdlib> // EXIT_SUCCESS
@@ -58,9 +59,9 @@ LastalArguments::LastalArguments() :
minScoreGapless(-1), // depends on minScoreGapped and the outputType
matchScore(-1), // depends on the alphabet
mismatchCost(-1), // depends on the alphabet
- gapExistCost(-1), // depends on the alphabet
+ gapExistCost(INT_MIN), // depends on the alphabet
gapExtendCost(-1), // depends on the alphabet
- insExistCost(-1), // depends on gapExistCost
+ insExistCost(INT_MIN), // depends on gapExistCost
insExtendCost(-1), // depends on gapExtendCost
gapPairCost(-1), // this means: OFF
frameshiftCost(-1), // this means: ordinary, non-translated alignment
@@ -199,7 +200,6 @@ LAST home page: http://last.cbrc.jp/\n\
break;
case 'a':
unstringify( gapExistCost, optarg );
- if( gapExistCost < 0 ) badopt( c, optarg );
break;
case 'b':
unstringify( gapExtendCost, optarg );
@@ -207,7 +207,6 @@ LAST home page: http://last.cbrc.jp/\n\
break;
case 'A':
unstringify( insExistCost, optarg );
- if( insExistCost < 0 ) badopt( c, optarg );
break;
case 'B':
unstringify( insExtendCost, optarg );
@@ -428,19 +427,19 @@ void LastalArguments::setDefaultsFromAlphabet( bool isDna, bool isProtein,
// default match & mismatch scores: Blosum62 matrix
if( matchScore < 0 && mismatchCost >= 0 ) matchScore = 1; // idiot-proof
if( mismatchCost < 0 && matchScore >= 0 ) mismatchCost = 1; // idiot-proof
- if( gapExistCost < 0 ) gapExistCost = 11;
+ if( gapExistCost == INT_MIN ) gapExistCost = 11;
if( gapExtendCost < 0 ) gapExtendCost = 2;
}
else if( !isQuality( inputFormat ) ){
if( matchScore < 0 ) matchScore = 1;
if( mismatchCost < 0 ) mismatchCost = 1;
- if( gapExistCost < 0 ) gapExistCost = 7;
+ if( gapExistCost == INT_MIN ) gapExistCost = 7;
if( gapExtendCost < 0 ) gapExtendCost = 1;
}
else{ // sequence quality scores will be used:
if( matchScore < 0 ) matchScore = 6;
if( mismatchCost < 0 ) mismatchCost = 18;
- if( gapExistCost < 0 ) gapExistCost = 21;
+ if( gapExistCost == INT_MIN ) gapExistCost = 21;
if( gapExtendCost < 0 ) gapExtendCost = 9;
// With this scoring scheme for DNA, gapless lambda ~= ln(10)/10,
// so these scores should be comparable to PHRED scores.
@@ -455,7 +454,7 @@ void LastalArguments::setDefaultsFromAlphabet( bool isDna, bool isProtein,
maxEvalue = 1e18 / (numOfStrands() * r * queryLettersPerRandomAlignment);
}
- if( insExistCost < 0 ) insExistCost = gapExistCost;
+ if( insExistCost == INT_MIN ) insExistCost = gapExistCost;
if( insExtendCost < 0 ) insExtendCost = gapExtendCost;
if( tantanSetting < 0 ){
diff --git a/src/SubsetSuffixArraySearch.cc b/src/SubsetSuffixArraySearch.cc
index 0629559..2c0efff 100644
--- a/src/SubsetSuffixArraySearch.cc
+++ b/src/SubsetSuffixArraySearch.cc
@@ -39,7 +39,8 @@ void SubsetSuffixArray::match( const indexT*& begPtr, const indexT*& endPtr,
uchar subset = oldMap[ queryPtr[depth-1] ];
bucketPtr -= subset * bucketSteps[depth];
indexT oldBeg = *bucketPtr;
- indexT oldEnd = *(bucketPtr + bucketSteps[depth-1]);
+ indexT oldEnd =
+ (depth > 1) ? *(bucketPtr + bucketSteps[depth-1]) : suffixArray.size();
if( oldEnd - oldBeg > maxHits ) break;
subsetMap = oldMap;
beg = oldBeg;
diff --git a/src/alp/sls_alp_data.cpp b/src/alp/sls_alp_data.cpp
index c752f07..6d32520 100644
--- a/src/alp/sls_alp_data.cpp
+++ b/src/alp/sls_alp_data.cpp
@@ -27,7 +27,7 @@
File name: sls_alp_data.cpp
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
Contents: Input data for the ascending ladder points simulation
@@ -228,23 +228,7 @@ bool insertions_after_deletions_)//if true, then insertions after deletions are
if(!d_rand_flag&&rand_<0)
{
-
- rand_=(long int)time(NULL);
- #ifndef _MSC_VER //UNIX program
- struct timeval tv;
- struct timezone tz;
- gettimeofday(&tv, &tz);
- rand_+=tv.tv_usec*10000000;
- #else
- struct _timeb timebuffer;
- char *timeline;
- _ftime( &timebuffer );
- timeline = ctime( & ( timebuffer.time ) );
- rand_+=timebuffer.millitm*10000000;
- #endif
-
- rand_=abs(rand_);
-
+ rand_=sls_basic::random_seed_from_time();
d_rand_flag=false;
};
diff --git a/src/alp/sls_alp_data.hpp b/src/alp/sls_alp_data.hpp
index 24e6150..39a81de 100644
--- a/src/alp/sls_alp_data.hpp
+++ b/src/alp/sls_alp_data.hpp
@@ -53,13 +53,9 @@ Contents: Contains input data
#ifndef _MSC_VER //UNIX program
-#include <sys/time.h>
#else
-#include <sys/timeb.h>
-
#define _CRTDBG_MAP_ALLOC
#include <crtdbg.h>
-
#endif
diff --git a/src/alp/sls_basic.cpp b/src/alp/sls_basic.cpp
index f5c5a17..7b8ebd2 100644
--- a/src/alp/sls_basic.cpp
+++ b/src/alp/sls_basic.cpp
@@ -27,13 +27,19 @@
File name: sls_basic.cpp
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
Contents: Some basic functions and types
******************************************************************************/
+// 2016: this voodoo is needed to compile on Cygwin, with g++ options
+// such as -std=c++11 or -std=c++03, else it complains about gettimeofday
+#define _DEFAULT_SOURCE 1
+
#include "sls_basic.hpp"
+#include <cstdlib> // std::abs
+#include <ctime>
using namespace Sls;
@@ -67,6 +73,24 @@ double &seconds_)
#endif
}
+long int sls_basic::random_seed_from_time()
+{
+ long int random_factor=(long int)std::time(NULL);
+#ifndef _MSC_VER //UNIX program
+ struct timeval tv;
+ struct timezone tz;
+ gettimeofday(&tv, &tz);
+ random_factor+=tv.tv_usec*10000000;
+#else
+ struct _timeb timebuffer;
+ char *timeline;
+ _ftime( &timebuffer );
+ timeline = ctime( & ( timebuffer.time ) );
+ random_factor+=timebuffer.millitm*10000000;
+#endif
+ return std::abs(random_factor);
+}
+
double sls_basic::one_minus_exp_function(
double y_)
{
diff --git a/src/alp/sls_basic.hpp b/src/alp/sls_basic.hpp
index 1c6e7f1..d289ffd 100644
--- a/src/alp/sls_basic.hpp
+++ b/src/alp/sls_basic.hpp
@@ -30,7 +30,7 @@
File name: sls_basic.hpp
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
Contents: Some basic functions and types
@@ -184,6 +184,8 @@ namespace Sls {
static void get_current_time(
double &seconds_);
+ static long int random_seed_from_time();
+
static double one_minus_exp_function(
double y_);
diff --git a/src/alp/sls_fsa1.cpp b/src/alp/sls_fsa1.cpp
index 8b93f60..eb247e8 100644
--- a/src/alp/sls_fsa1.cpp
+++ b/src/alp/sls_fsa1.cpp
@@ -27,7 +27,7 @@
File name: sls_repwords.cpp
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
Contents: Frameshift alignment algorithms
@@ -181,22 +181,7 @@ long int seq_number_)//number of tested alignments
if(random_factor<0)
{
- random_factor=(long int)time(NULL);
- #ifndef _MSC_VER //UNIX program
- struct timeval tv;
- struct timezone tz;
- gettimeofday(&tv, &tz);
- random_factor+=tv.tv_usec*10000000;
- #else
- struct _timeb timebuffer;
- char *timeline;
- _ftime( &timebuffer );
- timeline = ctime( & ( timebuffer.time ) );
- rand_+=timebuffer.millitm*10000000;
- #endif
-
- random_factor=abs(random_factor);
-
+ random_factor=sls_basic::random_seed_from_time();
d_rand_flag=false;
};
diff --git a/src/alp/sls_fsa1_utils.cpp b/src/alp/sls_fsa1_utils.cpp
index 18aa419..617e50b 100644
--- a/src/alp/sls_fsa1_utils.cpp
+++ b/src/alp/sls_fsa1_utils.cpp
@@ -27,7 +27,7 @@
File name: sls_fsa1_utils.cpp
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
Contents: Frameshift alignment algorithms
@@ -1881,21 +1881,7 @@ bool *rand_flag_)
if(random_factor_<0)
{
- random_factor_=(long int)time(NULL);
- #ifndef _MSC_VER //UNIX program
- struct timeval tv;
- struct timezone tz;
- gettimeofday(&tv, &tz);
- random_factor_+=tv.tv_usec*10000000;
- #else
- struct _timeb timebuffer;
- char *timeline;
- _ftime( &timebuffer );
- timeline = ctime( & ( timebuffer.time ) );
- random_factor_+=timebuffer.millitm*10000000;
- #endif
-
- random_factor_=abs(random_factor_);
+ random_factor_=sls_basic::random_seed_from_time();
if(rand_flag_)
{
diff --git a/src/alp/sls_fsa1_utils.hpp b/src/alp/sls_fsa1_utils.hpp
index e991047..45e6ed8 100644
--- a/src/alp/sls_fsa1_utils.hpp
+++ b/src/alp/sls_fsa1_utils.hpp
@@ -51,12 +51,6 @@ Contents: Frameshift alignment algorithms utilities
#include <climits>
#include <errno.h>
-#ifndef _MSC_VER //UNIX program
-#include <sys/time.h>
-#else
-#include <sys/timeb.h>
-#endif
-
#include "njn_random.hpp"
#include "njn_uniform.hpp"
diff --git a/src/lastal.cc b/src/lastal.cc
index 324833b..41732e5 100644
--- a/src/lastal.cc
+++ b/src/lastal.cc
@@ -826,6 +826,7 @@ void scan( LastAligner& aligner,
SegmentPairPot gaplessAlns;
alignGapless( aligner, gaplessAlns, queryNum, strand, querySeq );
if( args.outputType == 1 ) return; // we just want gapless alignments
+ if( gaplessAlns.size() == 0 ) return;
if( args.maskLowercase == 1 || args.maskLowercase == 2 )
makeQualityPssm( aligner, queryNum, strand, querySeq, false );
@@ -840,11 +841,13 @@ void scan( LastAligner& aligner,
alignGapped( aligner, gappedAlns, gaplessAlns,
queryNum, strand, querySeq, Phase::final );
+ if( gappedAlns.size() == 0 ) return;
if (args.maskLowercase == 2) {
makeQualityPssm(aligner, queryNum, strand, querySeq, true);
eraseWeakAlignments(aligner, gappedAlns, queryNum, strand, querySeq);
LOG2("lowercase-filtered alignments=" << gappedAlns.size());
+ if (gappedAlns.size() == 0) return;
if (args.outputType > 3)
makeQualityPssm(aligner, queryNum, strand, querySeq, false);
}
diff --git a/src/version.hh b/src/version.hh
index aefb9eb..2db7e02 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"737"
+"746"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git
More information about the debian-med-commit
mailing list