[med-svn] [Git][med-team/last-align][upstream] New upstream version 1170
Nilesh Patra
gitlab at salsa.debian.org
Thu Dec 31 11:44:20 GMT 2020
Nilesh Patra pushed to branch upstream at Debian Med / last-align
Commits:
214cac8d by Nilesh Patra at 2020-12-31T17:03:58+05:30
New upstream version 1170
- - - - -
5 changed files:
- ChangeLog.txt
- src/LastEvaluer.cc
- src/mcf_frameshift_xdrop_aligner.cc
- src/mcf_frameshift_xdrop_aligner.hh
- src/version.hh
Changes:
=====================================
ChangeLog.txt
=====================================
@@ -1,8 +1,15 @@
+2020-12-28 Martin C. Frith <Martin C. Frith>
+
+ * src/LastEvaluer.cc, src/mcf_frameshift_xdrop_aligner.cc,
+ src/mcf_frameshift_xdrop_aligner.hh, test/last-test.out:
+ Fix E-values for new-style frameshifts
+ [a55b81199602] [tip]
+
2020-12-23 Martin C. Frith <Martin C. Frith>
* scripts/last-train, src/LastEvaluer.cc:
Try to fix floating-point infinite loops
- [829c63f50494] [tip]
+ [829c63f50494]
2020-12-15 Martin C. Frith <Martin C. Frith>
=====================================
src/LastEvaluer.cc
=====================================
@@ -427,6 +427,7 @@ void LastEvaluer::initFrameshift(const const_dbl_ptr *substitutionProbs,
FrameshiftXdropAligner aligner;
int proteinLength = 200; // xxx long enough to avoid edge effects ???
int tranDnaLength = proteinLength * 3;
+ int origDnaLength = tranDnaLength + 2;
int numOfAlignments = 50; // suggested by Y-K Yu, R Bundschuh, T Hwa, 2002
if (verbosity > 1) numOfAlignments = 1000;
@@ -446,18 +447,18 @@ void LastEvaluer::initFrameshift(const const_dbl_ptr *substitutionProbs,
for (int j = 0; j < proteinLength; ++j) protein[j] = pDist(randGen);
for (int j = 0; j < tranDnaLength; ++j) tranDna[j] = tDist(randGen);
double p = aligner.maxSumOfProbRatios(protein, proteinLength,
- tranDna, tranDnaLength,
+ tranDna, origDnaLength,
substitutionProbs, gapCosts);
probRatioSum += 1 / p;
if (verbosity > 1) std::cerr << "simScore: " << (log(p) / scale) << "\n";
}
// max likelihood k = 1 / (m * n * avg[exp(-lambda * score)])
- double k = numOfAlignments / (proteinLength * tranDnaLength * probRatioSum);
+ double k = numOfAlignments / (proteinLength * origDnaLength * probRatioSum);
if (verbosity > 1) {
std::cerr << "lambda k m n: " << scale << " " << k << " "
- << proteinLength << " " << tranDnaLength << "\n";
+ << proteinLength << " " << origDnaLength << "\n";
}
Sls::AlignmentEvaluerParameters p = {scale, k, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
=====================================
src/mcf_frameshift_xdrop_aligner.cc
=====================================
@@ -381,7 +381,7 @@ void FrameshiftXdropAligner::count(bool isRightwardExtension,
double FrameshiftXdropAligner::maxSumOfProbRatios(const uchar *protein,
int proteinLength,
const uchar *tranDna,
- int tranDnaLength,
+ int origDnaLength,
const const_dbl_ptr *substitutionProbs,
const GapCosts &gapCosts) {
const double delOpenProb = gapCosts.delProbPieces[0].openProb;
@@ -394,45 +394,27 @@ double FrameshiftXdropAligner::maxSumOfProbRatios(const uchar *protein,
const double insProb3 = gapCosts.insProb3;
const int proteinSize = proteinLength + 1;
- const int tranDnaSize = tranDnaLength + 1;
- xFwdProbs.resize(tranDnaSize + proteinSize * tranDnaSize);
+ const int origDnaSize = origDnaLength + 1;
+ xFwdProbs.resize(origDnaSize + proteinSize * origDnaSize);
double *delRow = &xFwdProbs[0];
- double *newRow = delRow + tranDnaSize;
+ double *newRow = delRow + origDnaSize;
double Y1, Y2, Y3, Z1, Z2, Z3;
+ double maxValue = 0;
- Z1 = Z2 = Z3 = 0;
- for (int j = 0; j < tranDnaSize; ++j) {
- double z1 = Z1 * insProb1;
- double z2 = Z2 * insProb2;
- double z3 = Z3 * insProb3;
- double b = z1 + z2 + z3 + 1;
- newRow[j] = b;
- delRow[j] = b * delOpenProb;
- Z3 = Z2;
- Z2 = Z1;
- Z1 = b * insOpenProb + z3;
- }
-
- for (int i = 1; i < proteinSize; ++i) {
- const double *substitutionRow = substitutionProbs[protein[i-1]];
- const double *oldRow = newRow;
- newRow += tranDnaSize;
- Y2 = Z2 = Z3 = 0;
+ std::fill_n(delRow, origDnaSize, 0.0);
- {
- Y3 = delRow[0];
- double y3 = Y3 * delProb3;
- double b = y3 + 1;
- newRow[0] = b;
- delRow[0] = b * delOpenProb + y3;
- Z1 = b * insOpenProb;
- }
+ for (int i = 0; i < proteinSize; ++i) {
+ bool isIn = (i > 0);
+ const double *substitutionRow = isIn ? substitutionProbs[protein[i-1]] : 0;
+ const double *oldRow = newRow - origDnaSize;
+ Y2 = Y3 = Z1 = Z2 = Z3 = 0;
- for (int j = 1; j < tranDnaSize; ++j) {
+ for (int j = 0; j < origDnaSize; ++j) {
Y1 = Y2;
Y2 = Y3;
Y3 = delRow[j];
- double x = oldRow[j-1] * substitutionRow[tranDna[j-1]];
+ bool isMain = (isIn && j > 2);
+ double x = isMain ? oldRow[j-3] * substitutionRow[tranDna[j-3]] : 0;
double y1 = Y1 * delProb1;
double y2 = Y2 * delProb2;
double y3 = Y3 * delProb3;
@@ -446,45 +428,24 @@ double FrameshiftXdropAligner::maxSumOfProbRatios(const uchar *protein,
Z2 = Z1;
Z1 = b * insOpenProb + z3;
}
+ newRow += origDnaSize;
}
- double maxValue = 0;
+ std::fill_n(delRow, origDnaSize, 0.0);
- Z1 = Z2 = Z3 = 0;
- for (int j = tranDnaSize; j-- > 0;) {
- double z1 = Z1 * insProb1;
- double z2 = Z2 * insProb2;
- double z3 = Z3 * insProb3;
- double b = z1 + z2 + z3 + 1;
- maxValue = std::max(maxValue, b * newRow[j]);
- newRow[j] = b;
- delRow[j] = b * delOpenProb;
- Z3 = Z2;
- Z2 = Z1;
- Z1 = b * insOpenProb + z3;
- }
-
- for (int i = proteinLength; i-- > 0;) {
- const double *substitutionRow = substitutionProbs[protein[i]];
- const double *oldRow = newRow;
- newRow -= tranDnaSize;
- Y2 = Z2 = Z3 = 0;
-
- {
- Y3 = delRow[tranDnaLength];
- double y3 = Y3 * delProb3;
- double b = y3 + 1;
- maxValue = std::max(maxValue, b * newRow[tranDnaLength]);
- newRow[tranDnaLength] = b;
- delRow[tranDnaLength] = b * delOpenProb + y3;
- Z1 = b * insOpenProb;
- }
+ for (int i = proteinSize; i-- > 0;) {
+ newRow -= origDnaSize;
+ bool isIn = (i < proteinLength);
+ const double *substitutionRow = isIn ? substitutionProbs[protein[i]] : 0;
+ const double *oldRow = newRow + origDnaSize;
+ Y2 = Y3 = Z1 = Z2 = Z3 = 0;
- for (int j = tranDnaLength; j-- > 0;) {
+ for (int j = origDnaSize; j-- > 0;) {
Y1 = Y2;
Y2 = Y3;
Y3 = delRow[j];
- double x = oldRow[j+1] * substitutionRow[tranDna[j]];
+ bool isMain = (isIn && j < origDnaLength - 2);
+ double x = isMain ? oldRow[j+3] * substitutionRow[tranDna[j]] : 0;
double y1 = Y1 * delProb1;
double y2 = Y2 * delProb2;
double y3 = Y3 * delProb3;
=====================================
src/mcf_frameshift_xdrop_aligner.hh
=====================================
@@ -55,8 +55,9 @@ public:
}
// tranDna should point to translated DNA: frame 012012012...
+ // origDnaLength is the length of the untranslated DNA sequence
double maxSumOfProbRatios(const uchar *protein, int proteinLength,
- const uchar *tranDna, int tranDnaLength,
+ const uchar *tranDna, int origDnaLength,
const const_dbl_ptr *substitutionProbs,
const GapCosts &gapCosts);
=====================================
src/version.hh
=====================================
@@ -1 +1 @@
-"1169"
+"1170"
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/214cac8d75173432c71c2ec177d2b93e542bd07e
--
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/214cac8d75173432c71c2ec177d2b93e542bd07e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201231/d71b1dfa/attachment-0001.html>
More information about the debian-med-commit
mailing list