[med-svn] [Git][med-team/last-align][upstream] New upstream version 1170

Nilesh Patra gitlab at salsa.debian.org
Thu Dec 31 11:44:20 GMT 2020



Nilesh Patra pushed to branch upstream at Debian Med / last-align


Commits:
214cac8d by Nilesh Patra at 2020-12-31T17:03:58+05:30
New upstream version 1170
- - - - -


5 changed files:

- ChangeLog.txt
- src/LastEvaluer.cc
- src/mcf_frameshift_xdrop_aligner.cc
- src/mcf_frameshift_xdrop_aligner.hh
- src/version.hh


Changes:

=====================================
ChangeLog.txt
=====================================
@@ -1,8 +1,15 @@
+2020-12-28  Martin C. Frith  <Martin C. Frith>
+
+	* src/LastEvaluer.cc, src/mcf_frameshift_xdrop_aligner.cc,
+	src/mcf_frameshift_xdrop_aligner.hh, test/last-test.out:
+	Fix E-values for new-style frameshifts
+	[a55b81199602] [tip]
+
 2020-12-23  Martin C. Frith  <Martin C. Frith>
 
 	* scripts/last-train, src/LastEvaluer.cc:
 	Try to fix floating-point infinite loops
-	[829c63f50494] [tip]
+	[829c63f50494]
 
 2020-12-15  Martin C. Frith  <Martin C. Frith>
 


=====================================
src/LastEvaluer.cc
=====================================
@@ -427,6 +427,7 @@ void LastEvaluer::initFrameshift(const const_dbl_ptr *substitutionProbs,
   FrameshiftXdropAligner aligner;
   int proteinLength = 200;  // xxx long enough to avoid edge effects ???
   int tranDnaLength = proteinLength * 3;
+  int origDnaLength = tranDnaLength + 2;
   int numOfAlignments = 50;  // suggested by Y-K Yu, R Bundschuh, T Hwa, 2002
   if (verbosity > 1) numOfAlignments = 1000;
 
@@ -446,18 +447,18 @@ void LastEvaluer::initFrameshift(const const_dbl_ptr *substitutionProbs,
     for (int j = 0; j < proteinLength; ++j) protein[j] = pDist(randGen);
     for (int j = 0; j < tranDnaLength; ++j) tranDna[j] = tDist(randGen);
     double p = aligner.maxSumOfProbRatios(protein, proteinLength,
-					  tranDna, tranDnaLength,
+					  tranDna, origDnaLength,
 					  substitutionProbs, gapCosts);
     probRatioSum += 1 / p;
     if (verbosity > 1) std::cerr << "simScore: " << (log(p) / scale) << "\n";
   }
 
   // max likelihood k  =  1 / (m * n * avg[exp(-lambda * score)])
-  double k = numOfAlignments / (proteinLength * tranDnaLength * probRatioSum);
+  double k = numOfAlignments / (proteinLength * origDnaLength * probRatioSum);
 
   if (verbosity > 1) {
     std::cerr << "lambda k m n: " << scale << " " << k << " "
-	      << proteinLength << " " << tranDnaLength << "\n";
+	      << proteinLength << " " << origDnaLength << "\n";
   }
 
   Sls::AlignmentEvaluerParameters p = {scale, k, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};


=====================================
src/mcf_frameshift_xdrop_aligner.cc
=====================================
@@ -381,7 +381,7 @@ void FrameshiftXdropAligner::count(bool isRightwardExtension,
 double FrameshiftXdropAligner::maxSumOfProbRatios(const uchar *protein,
 						  int proteinLength,
 						  const uchar *tranDna,
-						  int tranDnaLength,
+						  int origDnaLength,
 						  const const_dbl_ptr *substitutionProbs,
 						  const GapCosts &gapCosts) {
   const double delOpenProb = gapCosts.delProbPieces[0].openProb;
@@ -394,45 +394,27 @@ double FrameshiftXdropAligner::maxSumOfProbRatios(const uchar *protein,
   const double insProb3 = gapCosts.insProb3;
 
   const int proteinSize = proteinLength + 1;
-  const int tranDnaSize = tranDnaLength + 1;
-  xFwdProbs.resize(tranDnaSize + proteinSize * tranDnaSize);
+  const int origDnaSize = origDnaLength + 1;
+  xFwdProbs.resize(origDnaSize + proteinSize * origDnaSize);
   double *delRow = &xFwdProbs[0];
-  double *newRow = delRow + tranDnaSize;
+  double *newRow = delRow + origDnaSize;
   double Y1, Y2, Y3, Z1, Z2, Z3;
+  double maxValue = 0;
 
-  Z1 = Z2 = Z3 = 0;
-  for (int j = 0; j < tranDnaSize; ++j) {
-    double z1 = Z1 * insProb1;
-    double z2 = Z2 * insProb2;
-    double z3 = Z3 * insProb3;
-    double b  = z1 + z2 + z3 + 1;
-    newRow[j] = b;
-    delRow[j] = b * delOpenProb;
-    Z3 = Z2;
-    Z2 = Z1;
-    Z1 = b * insOpenProb + z3;
-  }
-
-  for (int i = 1; i < proteinSize; ++i) {
-    const double *substitutionRow = substitutionProbs[protein[i-1]];
-    const double *oldRow = newRow;
-    newRow += tranDnaSize;
-    Y2 = Z2 = Z3 = 0;
+  std::fill_n(delRow, origDnaSize, 0.0);
 
-    {
-      Y3 = delRow[0];
-      double y3 = Y3 * delProb3;
-      double b  = y3 + 1;
-      newRow[0] = b;
-      delRow[0] = b * delOpenProb + y3;
-      Z1 = b * insOpenProb;
-    }
+  for (int i = 0; i < proteinSize; ++i) {
+    bool isIn = (i > 0);
+    const double *substitutionRow = isIn ? substitutionProbs[protein[i-1]] : 0;
+    const double *oldRow = newRow - origDnaSize;
+    Y2 = Y3 = Z1 = Z2 = Z3 = 0;
 
-    for (int j = 1; j < tranDnaSize; ++j) {
+    for (int j = 0; j < origDnaSize; ++j) {
       Y1 = Y2;
       Y2 = Y3;
       Y3 = delRow[j];
-      double x  = oldRow[j-1] * substitutionRow[tranDna[j-1]];
+      bool isMain = (isIn && j > 2);
+      double x  = isMain ? oldRow[j-3] * substitutionRow[tranDna[j-3]] : 0;
       double y1 = Y1 * delProb1;
       double y2 = Y2 * delProb2;
       double y3 = Y3 * delProb3;
@@ -446,45 +428,24 @@ double FrameshiftXdropAligner::maxSumOfProbRatios(const uchar *protein,
       Z2 = Z1;
       Z1 = b * insOpenProb + z3;
     }
+    newRow += origDnaSize;
   }
 
-  double maxValue = 0;
+  std::fill_n(delRow, origDnaSize, 0.0);
 
-  Z1 = Z2 = Z3 = 0;
-  for (int j = tranDnaSize; j-- > 0;) {
-    double z1 = Z1 * insProb1;
-    double z2 = Z2 * insProb2;
-    double z3 = Z3 * insProb3;
-    double b  = z1 + z2 + z3 + 1;
-    maxValue  = std::max(maxValue, b * newRow[j]);
-    newRow[j] = b;
-    delRow[j] = b * delOpenProb;
-    Z3 = Z2;
-    Z2 = Z1;
-    Z1 = b * insOpenProb + z3;
-  }
-
-  for (int i = proteinLength; i-- > 0;) {
-    const double *substitutionRow = substitutionProbs[protein[i]];
-    const double *oldRow = newRow;
-    newRow -= tranDnaSize;
-    Y2 = Z2 = Z3 = 0;
-
-    {
-      Y3 = delRow[tranDnaLength];
-      double y3 = Y3 * delProb3;
-      double b  = y3 + 1;
-      maxValue  = std::max(maxValue, b * newRow[tranDnaLength]);
-      newRow[tranDnaLength] = b;
-      delRow[tranDnaLength] = b * delOpenProb + y3;
-      Z1 = b * insOpenProb;
-    }
+  for (int i = proteinSize; i-- > 0;) {
+    newRow -= origDnaSize;
+    bool isIn = (i < proteinLength);
+    const double *substitutionRow = isIn ? substitutionProbs[protein[i]] : 0;
+    const double *oldRow = newRow + origDnaSize;
+    Y2 = Y3 = Z1 = Z2 = Z3 = 0;
 
-    for (int j = tranDnaLength; j-- > 0;) {
+    for (int j = origDnaSize; j-- > 0;) {
       Y1 = Y2;
       Y2 = Y3;
       Y3 = delRow[j];
-      double x  = oldRow[j+1] * substitutionRow[tranDna[j]];
+      bool isMain = (isIn && j < origDnaLength - 2);
+      double x  = isMain ? oldRow[j+3] * substitutionRow[tranDna[j]] : 0;
       double y1 = Y1 * delProb1;
       double y2 = Y2 * delProb2;
       double y3 = Y3 * delProb3;


=====================================
src/mcf_frameshift_xdrop_aligner.hh
=====================================
@@ -55,8 +55,9 @@ public:
   }
 
   // tranDna should point to translated DNA: frame 012012012...
+  // origDnaLength is the length of the untranslated DNA sequence
   double maxSumOfProbRatios(const uchar *protein, int proteinLength,
-			    const uchar *tranDna, int tranDnaLength,
+			    const uchar *tranDna, int origDnaLength,
 			    const const_dbl_ptr *substitutionProbs,
 			    const GapCosts &gapCosts);
 


=====================================
src/version.hh
=====================================
@@ -1 +1 @@
-"1169"
+"1170"



View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/214cac8d75173432c71c2ec177d2b93e542bd07e

-- 
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/214cac8d75173432c71c2ec177d2b93e542bd07e
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201231/d71b1dfa/attachment-0001.html>


More information about the debian-med-commit mailing list