[med-svn] [Git][med-team/snap-aligner][upstream] New upstream version 2.0.3+dfsg

Andreas Tille (@tille) gitlab at salsa.debian.org
Thu Aug 24 10:22:30 BST 2023



Andreas Tille pushed to branch upstream at Debian Med / snap-aligner


Commits:
fd2267ee by Andreas Tille at 2023-08-24T11:08:21+02:00
New upstream version 2.0.3+dfsg
- - - - -


8 changed files:

- SNAPLib/Bam.cpp
- SNAPLib/BaseAligner.cpp
- SNAPLib/CommandProcessor.cpp
- SNAPLib/DataReader.cpp
- SNAPLib/DataReader.h
- SNAPLib/IntersectingPairedEndAligner.cpp
- SNAPLib/SingleAligner.cpp
- SNAPLib/SortedDataWriter.cpp


Changes:

=====================================
SNAPLib/Bam.cpp
=====================================
@@ -1025,6 +1025,7 @@ BAMFormat::writeHeader(
     }
 
     *headerActualSize = cursor;
+
     return true;
 }
 


=====================================
SNAPLib/BaseAligner.cpp
=====================================
@@ -693,7 +693,7 @@ Return Value:
             //
             // And finally, try scoring.
             //
-            if (score(
+            bool success = score(
                 false,
                 read,
                 primaryResult,
@@ -706,18 +706,21 @@ Return Value:
                 maxCandidatesForAffineGapBufferSize,
                 nCandidatesForAffineGap,
                 candidatesForAffineGap,
-                useHamming)) {
+                useHamming);
 
+            if (overflowedSecondaryResultsBuffer) {
+                return false;
+            }
+
+            if (success) {
 #ifdef  _DEBUG
                 if (_DumpAlignments) printf("Final result score %d MAPQ %d at %s:%llu\n", primaryResult->score, primaryResult->mapq,
                     genome->getContigAtLocation(primaryResult->location)->name, primaryResult->location - genome->getContigAtLocation(primaryResult->location)->beginningLocation);
 #endif  // _DEBUG
-                if (overflowedSecondaryResultsBuffer) {
-                    return false;
-                }
+
                 finalizeSecondaryResults(read[FORWARD], primaryResult, nSecondaryResults, secondaryResults, maxSecondaryResults, maxEditDistanceForSecondaryResults, primaryResult->score);
                 return true;
-            } // If score says we have a difinitive answer
+            } // If score says we have a definitive answer
         } // If we applied a seed, and so something's changed.
     } // While we're still applying seeds
 


=====================================
SNAPLib/CommandProcessor.cpp
=====================================
@@ -36,7 +36,7 @@ Revision History:
 #include "Compat.h"
 #include "HitDepth.h"
 
-const char *SNAP_VERSION = "2.0.2";
+const char *SNAP_VERSION = "2.0.3";
 
 static void usage()
 {


=====================================
SNAPLib/DataReader.cpp
=====================================
@@ -60,6 +60,8 @@ public:
 
     virtual bool getData(char** o_buffer, _int64* o_validBytes, _int64* o_startBytes = NULL);
 
+    virtual void dumpState();
+
     virtual void advance(_int64 bytes);
 
     virtual void nextBatch();
@@ -165,6 +167,16 @@ protected:
     const size_t         bufferSize;
 };
 
+void
+ReadBasedDataReader::dumpState()
+{
+    WriteErrorMessage("ReadBasedDataReader at 0x%llx state:\n", this);
+    WriteErrorMessage("\theaderBufferSize %lld, headerExtraSize %lld, amountAdvancedThroughUnderlyingStoreByUs %lld, nHeaderBuffersAllocated %d, hitEOFReadingHeader %d, bufferSize %lld\n",
+        headerBufferSize, headerExtraSize, amountAdvancedThroughUnderlyingStoreByUs, nHeaderBuffersAllocated, hitEOFReadingHeader, bufferSize);
+    WriteErrorMessage("\tnBuffers %d, headerBuffersOutstanding, %d, startedReadingHeader %d, extraBytes %lld, overflowBytes %lld, nextBatchID %d, nextBufferForReader %d, nextBufferForConsumer %d, lastBufferForConsumer %d\n",
+        nBuffers, headerBuffersOutstanding, startedReadingHeader, extraBytes, overflowBytes, nextBatchID, nextBufferForReader, nextBufferForConsumer, lastBufferForConsumer);
+}
+
 ReadBasedDataReader::ReadBasedDataReader(
     unsigned i_nBuffers,
     _int64 i_overflowBytes,
@@ -186,12 +198,20 @@ ReadBasedDataReader::ReadBasedDataReader(
     _ASSERT(extraFactor >= 0 && i_nBuffers > 0);
     bufferInfo = new BufferInfo[maxBuffers];
     extraBytes = max((_int64) 0, (_int64) ((bufferSize + overflowBytes) * extraFactor));
+
+    if (bufferSize <= 2 * (size_t)overflowBytes) {
+        WriteErrorMessage("ReadBasedDataReader::ReadBasedDataReader: the buffer size %lld isn't more than twice the overflow size %lld, so we will fail to make progres.  This is a code bug, please create a github issue.\n",
+            bufferSize, overflowBytes);
+        soft_exit(1);
+    }
+
     char* allocated = (char*) BigReserve(maxBuffers * (bufferSize + extraBytes + overflowBytes));
     BigCommit(allocated, nBuffers * (bufferSize + extraBytes + overflowBytes));
     if (NULL == allocated) {
         WriteErrorMessage("ReadBasedDataReader: unable to allocate IO buffer\n");
         soft_exit(1);
     }
+
     for (unsigned i = 0 ; i < nBuffers; i++) {
         bufferInfo[i].buffer = allocated;
         allocated += bufferSize + overflowBytes;
@@ -206,6 +226,7 @@ ReadBasedDataReader::ReadBasedDataReader(
         bufferInfo[i].holds = 0;
 		bufferInfo[i].headerBuffer = false;
     }
+
     nextBatchID = 1;
  
     nextBufferForConsumer = -1;
@@ -1331,6 +1352,8 @@ public:
         return fileName;
     }
 
+    virtual void dumpState();
+
 protected:
 
     // must hold the lock to call
@@ -1354,11 +1377,28 @@ protected:
 
 };
 
+void
+AsyncFileDataReader::dumpState()
+{
+    WriteErrorMessage("AsyncFileDataReader (0x%llx) state:\n", this);
+    WriteErrorMessage("\tfileName %s, fileSize %lld, readOffset %lld, endingOffset %lld\n",
+        fileName, fileSize, readOffset, endingOffset);
+    
+    ReadBasedDataReader::dumpState();
+}
+
 AsyncFileDataReader::AsyncFileDataReader(unsigned i_nBuffers, _int64 i_overflowBytes, double extraFactor, size_t bufferSpace) :
     ReadBasedDataReader(i_nBuffers, i_overflowBytes, extraFactor, bufferSpace), fileName(NULL), asyncFile(NULL), endingOffset(0)
 {
     readOffset = 0;
     bufferReaders = (AsyncFile::Reader**)malloc(sizeof(AsyncFile::Reader*) * maxBuffers);
+
+    if (NULL == bufferReaders) {
+        WriteErrorMessage("AsyncFileDataReader::AsyncFileDataReader(): malloc(%d) failed\n", sizeof(AsyncFile::Reader*) * maxBuffers);
+        soft_exit(1);
+        return; // Just to avoid the compiler warning
+    }
+
     for (unsigned i = 0; i < i_nBuffers; i++) {
         bufferReaders[i] = NULL;
     }


=====================================
SNAPLib/DataReader.h
=====================================
@@ -145,6 +145,11 @@ public:
     // timing for performance tuning (in nanos)
     static volatile _int64 ReadWaitTime;
     static volatile _int64 ReleaseWaitTime;
+
+    //
+    // debugging
+    //
+    virtual void dumpState() {} // Override in a subclass if needed
 };
 
 class DataSupplier


=====================================
SNAPLib/IntersectingPairedEndAligner.cpp
=====================================
@@ -879,10 +879,11 @@ IntersectingPairedEndAligner::alignLandauVishkin(
                 scoreLimit = computeScoreLimit(nonALTAlignment, &scoresForAllAlignments, &scoresForNonAltAlignments, __max(mate->largestBigIndelDetected, __min(candidate->largestBigIndelDetected, fewerEndScore)));
 
                 _ASSERT(genomeLocationIsWithin(mate->readWithMoreHitsGenomeLocation, candidate->readWithFewerHitsGenomeLocation, maxSpacing));
+
                 //
-                // Exclude it if it's strictly smaller than minSpacing; hence, minSpacing -1.
+                // Exclude it if it's strictly smaller than minSpacing; hence, minSpacing - 1.
                 //
-                if (!genomeLocationIsWithin(mate->readWithMoreHitsGenomeLocation, candidate->readWithFewerHitsGenomeLocation, minSpacing -1) && ((mate->bestPossibleScore <= scoreLimit - fewerEndScore))) {
+                if (!genomeLocationIsWithin(mate->readWithMoreHitsGenomeLocation, candidate->readWithFewerHitsGenomeLocation, minSpacing - 1) && ((mate->bestPossibleScore <= scoreLimit - fewerEndScore))) {
                     //
                     // It's within the range and not necessarily too poor of a match.  Consider it.
                     //
@@ -1063,7 +1064,7 @@ IntersectingPairedEndAligner::alignLandauVishkin(
                                 }
 
                                 (*nLVCandidatesForAffineGap)++;
-                            }
+                            } // if not eliminatedByMerge
 
                             if (nonALTAlignment) {
                                 scoresForNonAltAlignments.updateBestHitIfNeeded(pairScore, pairAGScore, pairProbability, fewerEndScore, readWithMoreHits, fewerEndGenomeLocationOffset, candidate, mate);
@@ -3369,7 +3370,7 @@ IntersectingPairedEndAligner::scoreLocation(
         *totalIndelsLV = totalIndels1 + totalIndels2;
     } else {
         *score = ScoreAboveLimit;
-        *agScore = -1;
+        *agScore = ScoreAboveLimit;
         *matchProbability = 0.0;
     }
 


=====================================
SNAPLib/SingleAligner.cpp
=====================================
@@ -247,7 +247,7 @@ SingleAlignerContext::runIterationThreadImpl(Read *& read)
         }
 #endif
         SingleAlignmentResult firstALTResult;
-        while (!aligner->AlignRead(read, alignmentResults, &firstALTResult, maxSecondaryAlignmentAdditionalEditDistance, alignmentResultBufferCount - 1, &nSecondaryResults, maxSecondaryAlignments, alignmentResults + 1, 0, NULL, NULL, useSoftClipping)) {
+        while (!aligner->AlignRead(read, alignmentResults, &firstALTResult, maxSecondaryAlignmentAdditionalEditDistance, alignmentResultBufferCount - 1, &nSecondaryResults, maxSecondaryAlignments, alignmentResults + 1, 0, NULL, NULL)) {
             //
             // Out of secondary alignment buffer.  Reallocate.
             //


=====================================
SNAPLib/SortedDataWriter.cpp
=====================================
@@ -1260,7 +1260,10 @@ SortedDataFilterSupplier::mergeSort()
         {
             i->reader = readerSupplier->getDataReader(1, MAX_READ_LENGTH * 8, 0.0,
                 __min(1UL << 23, __max(1UL << 17, bufferSpace / blocks.size()))); // 128kB to 8MB buffer space per block
-            i->reader->init(tempFileName);
+            if (!i->reader->init(tempFileName)) {
+                WriteErrorMessage("SortedDataFilterSupplier::mergeSort: reader->init(%s) failed\n", tempFileName);
+                soft_exit(1);
+            }
             i->reader->reinit(i->start, i->bytes);
         }
     }
@@ -1274,8 +1277,11 @@ SortedDataFilterSupplier::mergeSort()
         DataReader* headerReader;
         if (blocks[0].dataReaderIsBuffer) 
         {
-            headerReader = readerSupplier->getDataReader(1, MAX_READ_LENGTH * 8, 0.0, headerSize + 4096);
-            headerReader->init(tempFileName);
+            headerReader = readerSupplier->getDataReader(1, 0, 0.0, 0);
+            if (!headerReader->init(tempFileName)) {
+                WriteErrorMessage("SortedDataFilterSupplier::mergeSort: reader->init(%s) failed for headerReader\n", tempFileName);
+                soft_exit(1);
+            }
         } else {
             headerReader = blocks[0].reader;
         }
@@ -1289,10 +1295,12 @@ SortedDataFilterSupplier::mergeSort()
 			if ((!headerReader->getData(&rbuffer, &rbytes)) || rbytes == 0) {
                 headerReader->nextBatch();
 				if (!headerReader->getData(&rbuffer, &rbytes)) {
-					WriteErrorMessage( "read header failed\n");
+					WriteErrorMessage( "read header failed, left %lld, headerSize %lld\n", left, headerSize);
+                    headerReader->dumpState();
 					soft_exit(1);
 				}
 			}
+
 			if ((! writer->getBuffer(&wbuffer, &wbytes)) || wbytes == 0) {
 				writer->nextBatch();
 				if (! writer->getBuffer(&wbuffer, &wbytes)) {
@@ -1303,6 +1311,7 @@ SortedDataFilterSupplier::mergeSort()
 			size_t xfer = min(left, min((size_t) rbytes, wbytes));
 			_ASSERT(xfer > 0 && xfer <= UINT32_MAX);
 			memcpy(wbuffer, rbuffer, xfer);
+
             headerReader->advance(xfer);
 			writer->advance((unsigned) xfer);
 			left -= xfer;



View it on GitLab: https://salsa.debian.org/med-team/snap-aligner/-/commit/fd2267ee08146eeac05c98010e6c8bdefc5e2f66

-- 
View it on GitLab: https://salsa.debian.org/med-team/snap-aligner/-/commit/fd2267ee08146eeac05c98010e6c8bdefc5e2f66
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230824/9def1dd4/attachment-0001.htm>


More information about the debian-med-commit mailing list