[med-svn] [Git][med-team/snap-aligner][master] 5 commits: New upstream version 2.0.3+dfsg

Andreas Tille (@tille) gitlab at salsa.debian.org
Thu Aug 24 10:22:10 BST 2023



Andreas Tille pushed to branch master at Debian Med / snap-aligner


Commits:
fd2267ee by Andreas Tille at 2023-08-24T11:08:21+02:00
New upstream version 2.0.3+dfsg
- - - - -
b9eb6d3f by Andreas Tille at 2023-08-24T11:08:21+02:00
routine-update: New upstream version

- - - - -
e8a03a3f by Andreas Tille at 2023-08-24T11:08:24+02:00
Update upstream source from tag 'upstream/2.0.3+dfsg'

Update to upstream version '2.0.3+dfsg'
with Debian dir 2dd050d623cd164baa6de19460e7d5fff98aa0f7
- - - - -
418e2a4e by Andreas Tille at 2023-08-24T11:17:43+02:00
Remove program version from reference file to not being forced to adapt the reference for every new release

- - - - -
6b8d176f by Andreas Tille at 2023-08-24T11:20:06+02:00
Upload to unstable

- - - - -


11 changed files:

- SNAPLib/Bam.cpp
- SNAPLib/BaseAligner.cpp
- SNAPLib/CommandProcessor.cpp
- SNAPLib/DataReader.cpp
- SNAPLib/DataReader.h
- SNAPLib/IntersectingPairedEndAligner.cpp
- SNAPLib/SingleAligner.cpp
- SNAPLib/SortedDataWriter.cpp
- debian/changelog
- debian/tests/ref/output.sam
- debian/tests/run-unit-test


Changes:

=====================================
SNAPLib/Bam.cpp
=====================================
@@ -1025,6 +1025,7 @@ BAMFormat::writeHeader(
     }
 
     *headerActualSize = cursor;
+
     return true;
 }
 


=====================================
SNAPLib/BaseAligner.cpp
=====================================
@@ -693,7 +693,7 @@ Return Value:
             //
             // And finally, try scoring.
             //
-            if (score(
+            bool success = score(
                 false,
                 read,
                 primaryResult,
@@ -706,18 +706,21 @@ Return Value:
                 maxCandidatesForAffineGapBufferSize,
                 nCandidatesForAffineGap,
                 candidatesForAffineGap,
-                useHamming)) {
+                useHamming);
 
+            if (overflowedSecondaryResultsBuffer) {
+                return false;
+            }
+
+            if (success) {
 #ifdef  _DEBUG
                 if (_DumpAlignments) printf("Final result score %d MAPQ %d at %s:%llu\n", primaryResult->score, primaryResult->mapq,
                     genome->getContigAtLocation(primaryResult->location)->name, primaryResult->location - genome->getContigAtLocation(primaryResult->location)->beginningLocation);
 #endif  // _DEBUG
-                if (overflowedSecondaryResultsBuffer) {
-                    return false;
-                }
+
                 finalizeSecondaryResults(read[FORWARD], primaryResult, nSecondaryResults, secondaryResults, maxSecondaryResults, maxEditDistanceForSecondaryResults, primaryResult->score);
                 return true;
-            } // If score says we have a difinitive answer
+            } // If score says we have a definitive answer
         } // If we applied a seed, and so something's changed.
     } // While we're still applying seeds
 


=====================================
SNAPLib/CommandProcessor.cpp
=====================================
@@ -36,7 +36,7 @@ Revision History:
 #include "Compat.h"
 #include "HitDepth.h"
 
-const char *SNAP_VERSION = "2.0.2";
+const char *SNAP_VERSION = "2.0.3";
 
 static void usage()
 {


=====================================
SNAPLib/DataReader.cpp
=====================================
@@ -60,6 +60,8 @@ public:
 
     virtual bool getData(char** o_buffer, _int64* o_validBytes, _int64* o_startBytes = NULL);
 
+    virtual void dumpState();
+
     virtual void advance(_int64 bytes);
 
     virtual void nextBatch();
@@ -165,6 +167,16 @@ protected:
     const size_t         bufferSize;
 };
 
+void
+ReadBasedDataReader::dumpState()
+{
+    WriteErrorMessage("ReadBasedDataReader at 0x%llx state:\n", this);
+    WriteErrorMessage("\theaderBufferSize %lld, headerExtraSize %lld, amountAdvancedThroughUnderlyingStoreByUs %lld, nHeaderBuffersAllocated %d, hitEOFReadingHeader %d, bufferSize %lld\n",
+        headerBufferSize, headerExtraSize, amountAdvancedThroughUnderlyingStoreByUs, nHeaderBuffersAllocated, hitEOFReadingHeader, bufferSize);
+    WriteErrorMessage("\tnBuffers %d, headerBuffersOutstanding, %d, startedReadingHeader %d, extraBytes %lld, overflowBytes %lld, nextBatchID %d, nextBufferForReader %d, nextBufferForConsumer %d, lastBufferForConsumer %d\n",
+        nBuffers, headerBuffersOutstanding, startedReadingHeader, extraBytes, overflowBytes, nextBatchID, nextBufferForReader, nextBufferForConsumer, lastBufferForConsumer);
+}
+
 ReadBasedDataReader::ReadBasedDataReader(
     unsigned i_nBuffers,
     _int64 i_overflowBytes,
@@ -186,12 +198,20 @@ ReadBasedDataReader::ReadBasedDataReader(
     _ASSERT(extraFactor >= 0 && i_nBuffers > 0);
     bufferInfo = new BufferInfo[maxBuffers];
     extraBytes = max((_int64) 0, (_int64) ((bufferSize + overflowBytes) * extraFactor));
+
+    if (bufferSize <= 2 * (size_t)overflowBytes) {
+        WriteErrorMessage("ReadBasedDataReader::ReadBasedDataReader: the buffer size %lld isn't more than twice the overflow size %lld, so we will fail to make progres.  This is a code bug, please create a github issue.\n",
+            bufferSize, overflowBytes);
+        soft_exit(1);
+    }
+
     char* allocated = (char*) BigReserve(maxBuffers * (bufferSize + extraBytes + overflowBytes));
     BigCommit(allocated, nBuffers * (bufferSize + extraBytes + overflowBytes));
     if (NULL == allocated) {
         WriteErrorMessage("ReadBasedDataReader: unable to allocate IO buffer\n");
         soft_exit(1);
     }
+
     for (unsigned i = 0 ; i < nBuffers; i++) {
         bufferInfo[i].buffer = allocated;
         allocated += bufferSize + overflowBytes;
@@ -206,6 +226,7 @@ ReadBasedDataReader::ReadBasedDataReader(
         bufferInfo[i].holds = 0;
 		bufferInfo[i].headerBuffer = false;
     }
+
     nextBatchID = 1;
  
     nextBufferForConsumer = -1;
@@ -1331,6 +1352,8 @@ public:
         return fileName;
     }
 
+    virtual void dumpState();
+
 protected:
 
     // must hold the lock to call
@@ -1354,11 +1377,28 @@ protected:
 
 };
 
+void
+AsyncFileDataReader::dumpState()
+{
+    WriteErrorMessage("AsyncFileDataReader (0x%llx) state:\n", this);
+    WriteErrorMessage("\tfileName %s, fileSize %lld, readOffset %lld, endingOffset %lld\n",
+        fileName, fileSize, readOffset, endingOffset);
+    
+    ReadBasedDataReader::dumpState();
+}
+
 AsyncFileDataReader::AsyncFileDataReader(unsigned i_nBuffers, _int64 i_overflowBytes, double extraFactor, size_t bufferSpace) :
     ReadBasedDataReader(i_nBuffers, i_overflowBytes, extraFactor, bufferSpace), fileName(NULL), asyncFile(NULL), endingOffset(0)
 {
     readOffset = 0;
     bufferReaders = (AsyncFile::Reader**)malloc(sizeof(AsyncFile::Reader*) * maxBuffers);
+
+    if (NULL == bufferReaders) {
+        WriteErrorMessage("AsyncFileDataReader::AsyncFileDataReader(): malloc(%d) failed\n", sizeof(AsyncFile::Reader*) * maxBuffers);
+        soft_exit(1);
+        return; // Just to avoid the compiler warning
+    }
+
     for (unsigned i = 0; i < i_nBuffers; i++) {
         bufferReaders[i] = NULL;
     }


=====================================
SNAPLib/DataReader.h
=====================================
@@ -145,6 +145,11 @@ public:
     // timing for performance tuning (in nanos)
     static volatile _int64 ReadWaitTime;
     static volatile _int64 ReleaseWaitTime;
+
+    //
+    // debugging
+    //
+    virtual void dumpState() {} // Override in a subclass if needed
 };
 
 class DataSupplier


=====================================
SNAPLib/IntersectingPairedEndAligner.cpp
=====================================
@@ -879,10 +879,11 @@ IntersectingPairedEndAligner::alignLandauVishkin(
                 scoreLimit = computeScoreLimit(nonALTAlignment, &scoresForAllAlignments, &scoresForNonAltAlignments, __max(mate->largestBigIndelDetected, __min(candidate->largestBigIndelDetected, fewerEndScore)));
 
                 _ASSERT(genomeLocationIsWithin(mate->readWithMoreHitsGenomeLocation, candidate->readWithFewerHitsGenomeLocation, maxSpacing));
+
                 //
-                // Exclude it if it's strictly smaller than minSpacing; hence, minSpacing -1.
+                // Exclude it if it's strictly smaller than minSpacing; hence, minSpacing - 1.
                 //
-                if (!genomeLocationIsWithin(mate->readWithMoreHitsGenomeLocation, candidate->readWithFewerHitsGenomeLocation, minSpacing -1) && ((mate->bestPossibleScore <= scoreLimit - fewerEndScore))) {
+                if (!genomeLocationIsWithin(mate->readWithMoreHitsGenomeLocation, candidate->readWithFewerHitsGenomeLocation, minSpacing - 1) && ((mate->bestPossibleScore <= scoreLimit - fewerEndScore))) {
                     //
                     // It's within the range and not necessarily too poor of a match.  Consider it.
                     //
@@ -1063,7 +1064,7 @@ IntersectingPairedEndAligner::alignLandauVishkin(
                                 }
 
                                 (*nLVCandidatesForAffineGap)++;
-                            }
+                            } // if not eliminatedByMerge
 
                             if (nonALTAlignment) {
                                 scoresForNonAltAlignments.updateBestHitIfNeeded(pairScore, pairAGScore, pairProbability, fewerEndScore, readWithMoreHits, fewerEndGenomeLocationOffset, candidate, mate);
@@ -3369,7 +3370,7 @@ IntersectingPairedEndAligner::scoreLocation(
         *totalIndelsLV = totalIndels1 + totalIndels2;
     } else {
         *score = ScoreAboveLimit;
-        *agScore = -1;
+        *agScore = ScoreAboveLimit;
         *matchProbability = 0.0;
     }
 


=====================================
SNAPLib/SingleAligner.cpp
=====================================
@@ -247,7 +247,7 @@ SingleAlignerContext::runIterationThreadImpl(Read *& read)
         }
 #endif
         SingleAlignmentResult firstALTResult;
-        while (!aligner->AlignRead(read, alignmentResults, &firstALTResult, maxSecondaryAlignmentAdditionalEditDistance, alignmentResultBufferCount - 1, &nSecondaryResults, maxSecondaryAlignments, alignmentResults + 1, 0, NULL, NULL, useSoftClipping)) {
+        while (!aligner->AlignRead(read, alignmentResults, &firstALTResult, maxSecondaryAlignmentAdditionalEditDistance, alignmentResultBufferCount - 1, &nSecondaryResults, maxSecondaryAlignments, alignmentResults + 1, 0, NULL, NULL)) {
             //
             // Out of secondary alignment buffer.  Reallocate.
             //


=====================================
SNAPLib/SortedDataWriter.cpp
=====================================
@@ -1260,7 +1260,10 @@ SortedDataFilterSupplier::mergeSort()
         {
             i->reader = readerSupplier->getDataReader(1, MAX_READ_LENGTH * 8, 0.0,
                 __min(1UL << 23, __max(1UL << 17, bufferSpace / blocks.size()))); // 128kB to 8MB buffer space per block
-            i->reader->init(tempFileName);
+            if (!i->reader->init(tempFileName)) {
+                WriteErrorMessage("SortedDataFilterSupplier::mergeSort: reader->init(%s) failed\n", tempFileName);
+                soft_exit(1);
+            }
             i->reader->reinit(i->start, i->bytes);
         }
     }
@@ -1274,8 +1277,11 @@ SortedDataFilterSupplier::mergeSort()
         DataReader* headerReader;
         if (blocks[0].dataReaderIsBuffer) 
         {
-            headerReader = readerSupplier->getDataReader(1, MAX_READ_LENGTH * 8, 0.0, headerSize + 4096);
-            headerReader->init(tempFileName);
+            headerReader = readerSupplier->getDataReader(1, 0, 0.0, 0);
+            if (!headerReader->init(tempFileName)) {
+                WriteErrorMessage("SortedDataFilterSupplier::mergeSort: reader->init(%s) failed for headerReader\n", tempFileName);
+                soft_exit(1);
+            }
         } else {
             headerReader = blocks[0].reader;
         }
@@ -1289,10 +1295,12 @@ SortedDataFilterSupplier::mergeSort()
 			if ((!headerReader->getData(&rbuffer, &rbytes)) || rbytes == 0) {
                 headerReader->nextBatch();
 				if (!headerReader->getData(&rbuffer, &rbytes)) {
-					WriteErrorMessage( "read header failed\n");
+					WriteErrorMessage( "read header failed, left %lld, headerSize %lld\n", left, headerSize);
+                    headerReader->dumpState();
 					soft_exit(1);
 				}
 			}
+
 			if ((! writer->getBuffer(&wbuffer, &wbytes)) || wbytes == 0) {
 				writer->nextBatch();
 				if (! writer->getBuffer(&wbuffer, &wbytes)) {
@@ -1303,6 +1311,7 @@ SortedDataFilterSupplier::mergeSort()
 			size_t xfer = min(left, min((size_t) rbytes, wbytes));
 			_ASSERT(xfer > 0 && xfer <= UINT32_MAX);
 			memcpy(wbuffer, rbuffer, xfer);
+
             headerReader->advance(xfer);
 			writer->advance((unsigned) xfer);
 			left -= xfer;


=====================================
debian/changelog
=====================================
@@ -1,3 +1,12 @@
+snap-aligner (2.0.3+dfsg-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Remove program version from reference file to not being forced to
+    adapt the reference for every new release
+
+ -- Andreas Tille <tille at debian.org>  Thu, 24 Aug 2023 11:17:57 +0200
+
 snap-aligner (2.0.2+dfsg-1) unstable; urgency=medium
 
   * Team upload.


=====================================
debian/tests/ref/output.sam
=====================================
@@ -1,6 +1,6 @@
 @HD	VN:1.6	GO:query
 @RG	ID:FASTQ	PL:Illumina	PU:pu	LB:lb	SM:sm
- at PG	ID:SNAP	PN:SNAP	CL:single datatest datatest.fq -o output.sam	VN:2.0.2
+ at PG	ID:SNAP	PN:SNAP	CL:single datatest datatest.fq -o output.sam	VN:
 @SQ	SN:ref1	LN:202
 read1	0	ref1	1	70	101M	*	0	0	GTCACAAATGCCACAGAGCAAATGGTCCTGAACAAGCAAACAGAACAGGCCCAGAACACGCCAACCTGTTGAAGACAGAAAGTAGCTTCGTGGCCGGGGGG	- at 4>3.>,;$B;A>@&A<<5:@5A?<6<1,>='=7A99=<;7;61></'3+5(<&5,0)30%/=:(&(842&54-+,578)776;.*,&/538)/%$(1,-	PG:Z:SNAP	NM:i:0	RG:Z:FASTQ	PL:Z:Illumina	PU:Z:pu	LB:Z:lb	SM:Z:sm
 read1	0	ref1	102	70	101M	*	0	0	CCACAGCTCTGACTCCTGCATCCTTCTCCTGTGAAGGGGAGGGAGGTGGTGCTGCAGGGGAGGGGAGGGGGCTAGGAGATGTCACTGGGAGCGGAAACGGC	9;6@;:>:2(5.293?+,72$78:974?C>382;A?=:83;96:AB1>=D at 4A;C=AD+<E9=;CBC$<EB2 at A;BF=EE.FA5>ECE(%FFCDBB1A??F	PG:Z:SNAP	NM:i:0	RG:Z:FASTQ	PL:Z:Illumina	PU:Z:pu	LB:Z:lb	SM:Z:sm


=====================================
debian/tests/run-unit-test
=====================================
@@ -31,6 +31,8 @@ echo
 
 echo -e "\e[93m\e[1mTest 3\e[0m"
 snap-aligner single datatest datatest.fq -o output.sam
+# Remove program version from output.sam to not being forced to adapt the reference for every new release
+sed -i 's/\(^@PG.*single datatest.*VN:\)[0-9.]*/\1/' output.sam
 diff output.sam ref/output.sam
 echo -e "\e[92m\e[1mPassed\e[0m"
 echo



View it on GitLab: https://salsa.debian.org/med-team/snap-aligner/-/compare/d318cd7ff8c751f7c95cf47627d2da3775473eae...6b8d176f3a4ad34de0fc50229dd1bd57d0416a5a

-- 
View it on GitLab: https://salsa.debian.org/med-team/snap-aligner/-/compare/d318cd7ff8c751f7c95cf47627d2da3775473eae...6b8d176f3a4ad34de0fc50229dd1bd57d0416a5a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230824/75756135/attachment-0001.htm>


More information about the debian-med-commit mailing list