[med-svn] [Git][med-team/last-align][master] 4 commits: New upstream version 1418

Charles Plessy (@plessy) gitlab at salsa.debian.org
Wed Oct 19 01:44:58 BST 2022



Charles Plessy pushed to branch master at Debian Med / last-align


Commits:
65a863b5 by Charles Plessy at 2022-10-19T09:29:15+09:00
New upstream version 1418
- - - - -
3ed49664 by Charles Plessy at 2022-10-19T09:29:15+09:00
routine-update: New upstream version

- - - - -
f0b46b54 by Charles Plessy at 2022-10-19T09:29:18+09:00
Update upstream source from tag 'upstream/1418'

Update to upstream version '1418'
with Debian dir 18b921c6730843bb31bf1f9865f4337be52808b0
- - - - -
75b5dac6 by Charles Plessy at 2022-10-19T09:33:29+09:00
routine-update: Ready to upload to unstable

- - - - -


5 changed files:

- debian/changelog
- src/SubsetSuffixArray.cc
- src/SubsetSuffixArray.hh
- src/lastdb.cc
- src/makefile


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+last-align (1418-1) unstable; urgency=medium
+
+  * New upstream version
+
+ -- Charles Plessy <plessy at debian.org>  Wed, 19 Oct 2022 09:29:30 +0900
+
 last-align (1411-1) unstable; urgency=medium
 
   * New upstream version


=====================================
src/SubsetSuffixArray.cc
=====================================
@@ -7,6 +7,8 @@
 #include <cstdio>  // remove
 #include <sstream>
 
+#include <thread>
+
 using namespace cbrc;
 
 static void err(const std::string &s) {
@@ -199,11 +201,68 @@ void SubsetSuffixArray::toFiles( const std::string& baseName,
   memoryToBinaryFile( chibiTable.begin(), chibiTable.end(), fileName );
 }
 
+static size_t bucketPos(const uchar *text, const CyclicSubsetSeed &seed,
+			const SubsetSuffixArray::indexT *steps, unsigned depth,
+			const PosPart *textPosPtr) {
+  const uchar *textPtr = text + posGet(textPosPtr);
+
+  size_t bucketIndex = 0;
+  const uchar *subsetMap = seed.firstMap();
+  unsigned d = 0;
+  while (d < depth) {
+    uchar subset = subsetMap[*textPtr];
+    if (subset == CyclicSubsetSeed::DELIMITER) {
+      return bucketIndex + steps[d];  // d > 0
+    }
+    ++textPtr;
+    ++d;
+    bucketIndex += subset * steps[d];
+    subsetMap = seed.nextMap(subsetMap);
+  }
+
+  return bucketIndex + offParts;
+}
+
+static void makeSomeBuckets(const uchar *text, const CyclicSubsetSeed &seed,
+			    const SubsetSuffixArray::indexT *steps,
+			    unsigned depth, OffPart *buckBeg, OffPart *buckPtr,
+			    const PosPart *sa, size_t saBeg, size_t saEnd) {
+  for (size_t i = saBeg; i < saEnd; ++i) {
+    OffPart *b = buckBeg + bucketPos(text, seed, steps, depth, sa);
+    for (; buckPtr < b; buckPtr += offParts) {
+      offSet(buckPtr, i);
+    }
+    sa += posParts;
+  }
+}
+
+static void runThreads(const uchar *text, const CyclicSubsetSeed *seedPtr,
+		       const SubsetSuffixArray::indexT *steps, unsigned depth,
+		       OffPart *buckBeg, OffPart *buckPtr, const PosPart *sa,
+		       size_t saBeg, size_t saEnd, size_t numOfThreads) {
+  const CyclicSubsetSeed &seed = *seedPtr;
+  if (numOfThreads > 1) {
+    size_t len = (saEnd - saBeg + numOfThreads - 1) / numOfThreads;
+    size_t mid = saBeg + len;
+    const PosPart *m = sa + posParts * len;
+    OffPart *b = buckBeg + bucketPos(text, seed, steps, depth, m - posParts);
+    std::thread t(runThreads, text, seedPtr, steps, depth, buckBeg,
+		  b, m, mid, saEnd, numOfThreads - 1);
+    makeSomeBuckets(text, seed, steps, depth, buckBeg,
+		    buckPtr, sa, saBeg, mid);
+    t.join();
+  } else {
+    makeSomeBuckets(text, seed, steps, depth, buckBeg,
+		    buckPtr, sa, saBeg, saEnd);
+  }
+}
+
 void SubsetSuffixArray::makeBuckets(const uchar *text,
 				    unsigned wordLength,
 				    const size_t *cumulativeCounts,
 				    size_t minPositionsPerBucket,
-				    unsigned bucketDepth) {
+				    unsigned bucketDepth,
+				    size_t numOfThreads) {
   std::vector<unsigned> bucketDepths(seeds.size(), bucketDepth);
   if (bucketDepth+1 == 0) {
     assert(minPositionsPerBucket > 0);
@@ -220,48 +279,28 @@ void SubsetSuffixArray::makeBuckets(const uchar *text,
   buckets.v.resize(bucketsSize());
   initBucketEnds();
 
-  OffPart *myBuckets = &buckets.v[0];
-  OffPart *bucketPtr = myBuckets;
-  indexT posInSuffixArray = 0;
-  const PosPart *suffixArrayPtr = suffixArray.begin();
+  const PosPart *sa = suffixArray.begin();
+  OffPart *buckBeg = &buckets.v[0];
+  OffPart *buckPtr = buckBeg;
+  size_t saBeg = 0;
+
   for (size_t s = 0; s < seeds.size(); ++s) {
     const CyclicSubsetSeed &seed = seeds[s];
-    unsigned myBucketDepth = bucketDepths[s];
+    unsigned depth = bucketDepths[s];
     const indexT *steps = bucketStepEnds[s];
-    indexT endInSuffixArray = cumulativeCounts[s];
-
-    while (posInSuffixArray < endInSuffixArray) {
-      const uchar* textPtr = text + posGet(suffixArrayPtr);
-      const uchar* subsetMap = seed.firstMap();
-      indexT bucketIndex = 0;
-      unsigned depth = 0;
-
-      while (depth < myBucketDepth) {
-	uchar subset = subsetMap[ *textPtr ];
-	if( subset == CyclicSubsetSeed::DELIMITER ){
-	  bucketIndex += steps[depth] - offParts;  // depth > 0
-	  break;
-	}
-	++textPtr;
-	++depth;
-	bucketIndex += subset * steps[depth];
-	subsetMap = seed.nextMap( subsetMap );
-      }
-
-      OffPart *lastBucketPtr = myBuckets + bucketIndex;
-      for (; bucketPtr <= lastBucketPtr; bucketPtr += offParts) {
-	offSet(bucketPtr, posInSuffixArray);
-      }
-
-      ++posInSuffixArray;
-      suffixArrayPtr += posParts;
+    size_t saEnd = cumulativeCounts[s];
+    if (saEnd > saBeg) {
+      runThreads(text, &seed, steps, depth, buckBeg,
+		 buckPtr, sa, saBeg, saEnd, numOfThreads);
+      sa += posParts * (saEnd - saBeg);
+      buckPtr = buckBeg + bucketPos(text, seed, steps, depth, sa - posParts);
+      saBeg = saEnd;
     }
-
-    myBuckets += steps[0];
+    buckBeg += steps[0];
   }
 
-  for (; bucketPtr <= myBuckets; bucketPtr += offParts) {
-    offSet(bucketPtr, posInSuffixArray);
+  for (; buckPtr <= buckBeg; buckPtr += offParts) {
+    offSet(buckPtr, saBeg);
   }
 }
 


=====================================
src/SubsetSuffixArray.hh
=====================================
@@ -117,7 +117,8 @@ public:
   // (memory use of stored positions) / minPositionsPerBucket.
   void makeBuckets(const uchar *text,
 		   unsigned wordLength, const size_t *cumulativeCounts,
-		   size_t minPositionsPerBucket, unsigned bucketDepth);
+		   size_t minPositionsPerBucket, unsigned bucketDepth,
+		   size_t numOfThreads);
 
   void fromFiles(const std::string &baseName,
 		 bool isMaskLowercase, const uchar letterCode[],


=====================================
src/lastdb.cc
=====================================
@@ -273,7 +273,8 @@ void makeVolume(std::vector<CyclicSubsetSeed>& seeds,
 
     LOG( "bucketing..." );
     myIndex.makeBuckets(seq, wordsFinder.wordLength, wordCounts,
-			args.minIndexedPositionsPerBucket, args.bucketDepth);
+			args.minIndexedPositionsPerBucket, args.bucketDepth,
+			numOfThreads);
 
     LOG( "writing..." );
     if( numOfIndexes > 1 ){


=====================================
src/makefile
=====================================
@@ -144,7 +144,7 @@ ScoreMatrixData.hh: ../data/*.mat
 	../build/mat-inc.sh ../data/*.mat > $@
 
 VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 1411) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 1418) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
 
 VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
 



View it on GitLab: https://salsa.debian.org/med-team/last-align/-/compare/d7b8961a6bff49a839692eae811ca5850b81c51c...75b5dac638dbf3553c116536e0c99ef3d1be4a97

-- 
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/compare/d7b8961a6bff49a839692eae811ca5850b81c51c...75b5dac638dbf3553c116536e0c99ef3d1be4a97
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221019/d943f222/attachment-0001.htm>


More information about the debian-med-commit mailing list