[med-svn] [Git][med-team/tantan][upstream] New upstream version 49

Sascha Steinbiss (@satta) gitlab at salsa.debian.org
Sat Dec 23 22:06:44 GMT 2023



Sascha Steinbiss pushed to branch upstream at Debian Med / tantan


Commits:
6cf87cf6 by Sascha Steinbiss at 2023-12-23T23:03:16+01:00
New upstream version 49
- - - - -


5 changed files:

- src/Makefile
- src/tantan_repeat_finder.cc
- src/tantan_repeat_finder.hh
- test/tantan_test.out
- test/tantan_test.sh


Changes:

=====================================
src/Makefile
=====================================
@@ -10,7 +10,7 @@ clean:
 	rm -f ../bin/tantan
 
 VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 40) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 49) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
 
 VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
 


=====================================
src/tantan_repeat_finder.cc
=====================================
@@ -71,9 +71,14 @@ void RepeatFinder::init(int maxRepeatOffset,
   f2f1 = myLog(1 - repeatEndProb - firstGapProb);
   f2f2 = myLog(1 - repeatEndProb - firstGapProb * 2);
 
-  double x = 1 / repeatOffsetProbDecay;
-  b2fGrowth = myLog(x);
-  b2fLast = myLog(repeatProb * firstRepeatOffsetProb(x, maxRepeatOffset));
+  double b2fDecay = myLog(repeatOffsetProbDecay);
+  double b2fFirst = myLog(firstRepeatOffsetProb(repeatOffsetProbDecay,
+						maxRepeatOffset) * repeatProb);
+
+  b2fScores.resize(maxRepeatOffset + 1);
+  for (int i = 0; i < maxRepeatOffset; ++i) {
+    b2fScores[i+1] = b2fFirst + i * b2fDecay;
+  }
 
   dpScoresPerLetter = numOfDpScoresPerLetter(maxRepeatOffset, endGapScore);
 }
@@ -86,48 +91,16 @@ void RepeatFinder::initializeBackwardScores() {
   }
 }
 
-void RepeatFinder::calcBackwardTransitionScoresWithGaps() {
-  double toBackground = f2b + scoresPtr[0];
-  double *foregroundPtr = scoresPtr + 1;
-  double f = *foregroundPtr;
-  double toForeground = f;
-
-  double *insertionPtr = scoresPtr + 1 + maxRepeatOffset;
-  double i = *insertionPtr;
-  *foregroundPtr = max3(toBackground, f2f1 + f, i);
-  double d = endGapScore + f;
-  ++foregroundPtr;
-  toForeground += b2fGrowth;
-
-  while (foregroundPtr < scoresPtr + maxRepeatOffset) {
-    f = *foregroundPtr;
-    toForeground = std::max(toForeground, f);
-    i = *(insertionPtr + 1);
-    *foregroundPtr = max3(toBackground, f2f2 + f, std::max(i, d));
-    double oneGapScore_f = oneGapScore + f;
-    *insertionPtr = std::max(oneGapScore_f, g2g + i);
-    d = std::max(oneGapScore_f, g2g + d);
-    ++foregroundPtr;
-    ++insertionPtr;
-    toForeground += b2fGrowth;
-  }
-
-  f = *foregroundPtr;
-  toForeground = std::max(toForeground, f);
-  *foregroundPtr = max3(toBackground, f2f1 + f, d);
-  *insertionPtr = endGapScore + f;
-
-  scoresPtr[0] = std::max(b2b + scoresPtr[0], b2fLast + toForeground);
-}
-
-void RepeatFinder::calcEmissionScores() {
+void RepeatFinder::calcScoresForOneSequencePositionWithGaps() {
+  const double *b2f = &b2fScores[0];
+  const double *oldScores = scoresPtr - dpScoresPerLetter;
+  const double *oldInsertionScores = oldScores + maxRepeatOffset;
   const double *matrixRow = substitutionMatrix[*seqPtr];
-  double *oldScores = scoresPtr - dpScoresPerLetter;
+  double *newInsertionScores = scoresPtr + maxRepeatOffset - 1;
+  double toBackground = f2b + oldScores[0];
   int maxOffset = maxOffsetInTheSequence();
   int i = 1;
 
-  scoresPtr[0] = oldScores[0];
-
   for (; i <= maxOffset; ++i) {
     scoresPtr[i] = oldScores[i] + matrixRow[seqPtr[-i]];
   }
@@ -136,37 +109,56 @@ void RepeatFinder::calcEmissionScores() {
     scoresPtr[i] = -HUGE_VAL;
   }
 
-  std::copy(oldScores + i, scoresPtr, scoresPtr + i);
+  i = 1;
+  double f = scoresPtr[i];
+  double toForeground = f + b2f[i];
+  double ins = oldInsertionScores[i];
+  scoresPtr[i] = max3(toBackground, f2f1 + f, ins);
+  double del = endGapScore + f;
+
+  for (i = 2; i < maxRepeatOffset; ++i) {
+    f = scoresPtr[i];
+    toForeground = std::max(toForeground, f + b2f[i]);
+    ins = oldInsertionScores[i];
+    scoresPtr[i] = max3(toBackground, f2f2 + f, std::max(ins, del));
+    double oneGapScore_f = oneGapScore + f;
+    newInsertionScores[i] = std::max(oneGapScore_f, g2g + ins);
+    del = std::max(oneGapScore_f, g2g + del);
+  }
+
+  f = scoresPtr[i];
+  toForeground = std::max(toForeground, f + b2f[i]);
+  scoresPtr[i] = max3(toBackground, f2f1 + f, del);
+  newInsertionScores[i] = endGapScore + f;
+
+  scoresPtr[0] = std::max(b2b + oldScores[0], toForeground);
 }
 
 void RepeatFinder::calcScoresForOneSequencePosition() {
   if (endGapScore > -HUGE_VAL) {
-    calcEmissionScores();
-    calcBackwardTransitionScoresWithGaps();
+    calcScoresForOneSequencePositionWithGaps();
     return;
   }
 
-  double *oldScores = scoresPtr - dpScoresPerLetter;
-  double toBackground = f2b + oldScores[0];
+  const double *b2f = &b2fScores[0];
+  const double *oldScores = scoresPtr - dpScoresPerLetter;
   const double *matrixRow = substitutionMatrix[*seqPtr];
+  double toBackground = f2b + oldScores[0];
   int maxOffset = maxOffsetInTheSequence();
   double toForeground = -HUGE_VAL;
   int i = 1;
 
   for (; i <= maxOffset; ++i) {
     double f = oldScores[i] + matrixRow[seqPtr[-i]];
-    toForeground = std::max(toForeground + b2fGrowth, f);
+    toForeground = std::max(toForeground, f + b2f[i]);
     scoresPtr[i] = std::max(toBackground, f2f0 + f);
   }
 
   for (; i <= maxRepeatOffset; ++i) {
-    toForeground += b2fGrowth;
     scoresPtr[i] = toBackground;
   }
 
-  std::copy(oldScores + i, scoresPtr, scoresPtr + i);
-
-  scoresPtr[0] = std::max(b2b + oldScores[0], b2fLast + toForeground);
+  scoresPtr[0] = std::max(b2b + oldScores[0], toForeground);
 }
 
 void RepeatFinder::makeCheckpoint() {
@@ -222,8 +214,7 @@ int RepeatFinder::offsetWithMaxScore() const {
   double toForeground = -HUGE_VAL;
 
   for (int i = 1; i <= maxOffset; ++i) {
-    toForeground += b2fGrowth;
-    double f = scoreWithEmission(matrixRow, i);
+    double f = scoreWithEmission(matrixRow, i) + b2fScores[i];
     if (f > toForeground) {
       toForeground = f;
       bestOffset = i;


=====================================
src/tantan_repeat_finder.hh
=====================================
@@ -51,8 +51,8 @@ private:
   double f2f0;
   double f2f1;
   double f2f2;
-  double b2fGrowth;
-  double b2fLast;
+
+  std::vector<double> b2fScores;
 
   int maxRepeatOffset;
   int dpScoresPerLetter;
@@ -68,8 +68,7 @@ private:
   int state;
 
   void initializeBackwardScores();
-  void calcBackwardTransitionScoresWithGaps();
-  void calcEmissionScores();
+  void calcScoresForOneSequencePositionWithGaps();
   void calcScoresForOneSequencePosition();
   void makeCheckpoint();
   void redoCheckpoint();


=====================================
test/tantan_test.out
=====================================
@@ -1533,3 +1533,5 @@ SRR019778.65	2	13	2	5.5	TG	TG,TG,TG,TG,TG,T
 SRR019778.78	9	31	10	2.2	TGCCTTACTA	TGCCTTACTA,TGCCTTACTA,TG
 SRR019778.95	2	18	4	4	TGAT	TGAT,TGAT,TGAT,TGAT
 SRR019778.95	22	45	4	5.75	ATAG	ATAG,ATAG,ATAG,ATAG,ATAG,ATA
+
+hard	0	40	5	7.6	ATCAT	CATCAT,CATCAT,CAT-AT,CATAT,CATAT,CATAT,CATAT,CAT


=====================================
test/tantan_test.sh
=====================================
@@ -39,4 +39,6 @@ countLowercaseLetters () {
     tantan -f4 -b0 panda.fastq
     echo
     tantan -f4 -b0 -j0 panda.fastq
+    echo
+    tantan -f4 -w7000 hard.fa
 } 2>&1 | diff -u tantan_test.out -



View it on GitLab: https://salsa.debian.org/med-team/tantan/-/commit/6cf87cf68cee77560671714d4772ac9d9810a368

-- 
View it on GitLab: https://salsa.debian.org/med-team/tantan/-/commit/6cf87cf68cee77560671714d4772ac9d9810a368
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231223/49c4a64b/attachment-0001.htm>


More information about the debian-med-commit mailing list