[med-svn] [Git][med-team/last-align][master] 4 commits: New upstream version 1418
Charles Plessy (@plessy)
gitlab at salsa.debian.org
Wed Oct 19 01:44:58 BST 2022
Charles Plessy pushed to branch master at Debian Med / last-align
Commits:
65a863b5 by Charles Plessy at 2022-10-19T09:29:15+09:00
New upstream version 1418
- - - - -
3ed49664 by Charles Plessy at 2022-10-19T09:29:15+09:00
routine-update: New upstream version
- - - - -
f0b46b54 by Charles Plessy at 2022-10-19T09:29:18+09:00
Update upstream source from tag 'upstream/1418'
Update to upstream version '1418'
with Debian dir 18b921c6730843bb31bf1f9865f4337be52808b0
- - - - -
75b5dac6 by Charles Plessy at 2022-10-19T09:33:29+09:00
routine-update: Ready to upload to unstable
- - - - -
5 changed files:
- debian/changelog
- src/SubsetSuffixArray.cc
- src/SubsetSuffixArray.hh
- src/lastdb.cc
- src/makefile
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+last-align (1418-1) unstable; urgency=medium
+
+ * New upstream version
+
+ -- Charles Plessy <plessy at debian.org> Wed, 19 Oct 2022 09:29:30 +0900
+
last-align (1411-1) unstable; urgency=medium
* New upstream version
=====================================
src/SubsetSuffixArray.cc
=====================================
@@ -7,6 +7,8 @@
#include <cstdio> // remove
#include <sstream>
+#include <thread>
+
using namespace cbrc;
static void err(const std::string &s) {
@@ -199,11 +201,68 @@ void SubsetSuffixArray::toFiles( const std::string& baseName,
memoryToBinaryFile( chibiTable.begin(), chibiTable.end(), fileName );
}
+static size_t bucketPos(const uchar *text, const CyclicSubsetSeed &seed,
+ const SubsetSuffixArray::indexT *steps, unsigned depth,
+ const PosPart *textPosPtr) {
+ const uchar *textPtr = text + posGet(textPosPtr);
+
+ size_t bucketIndex = 0;
+ const uchar *subsetMap = seed.firstMap();
+ unsigned d = 0;
+ while (d < depth) {
+ uchar subset = subsetMap[*textPtr];
+ if (subset == CyclicSubsetSeed::DELIMITER) {
+ return bucketIndex + steps[d]; // d > 0
+ }
+ ++textPtr;
+ ++d;
+ bucketIndex += subset * steps[d];
+ subsetMap = seed.nextMap(subsetMap);
+ }
+
+ return bucketIndex + offParts;
+}
+
+static void makeSomeBuckets(const uchar *text, const CyclicSubsetSeed &seed,
+ const SubsetSuffixArray::indexT *steps,
+ unsigned depth, OffPart *buckBeg, OffPart *buckPtr,
+ const PosPart *sa, size_t saBeg, size_t saEnd) {
+ for (size_t i = saBeg; i < saEnd; ++i) {
+ OffPart *b = buckBeg + bucketPos(text, seed, steps, depth, sa);
+ for (; buckPtr < b; buckPtr += offParts) {
+ offSet(buckPtr, i);
+ }
+ sa += posParts;
+ }
+}
+
+static void runThreads(const uchar *text, const CyclicSubsetSeed *seedPtr,
+ const SubsetSuffixArray::indexT *steps, unsigned depth,
+ OffPart *buckBeg, OffPart *buckPtr, const PosPart *sa,
+ size_t saBeg, size_t saEnd, size_t numOfThreads) {
+ const CyclicSubsetSeed &seed = *seedPtr;
+ if (numOfThreads > 1) {
+ size_t len = (saEnd - saBeg + numOfThreads - 1) / numOfThreads;
+ size_t mid = saBeg + len;
+ const PosPart *m = sa + posParts * len;
+ OffPart *b = buckBeg + bucketPos(text, seed, steps, depth, m - posParts);
+ std::thread t(runThreads, text, seedPtr, steps, depth, buckBeg,
+ b, m, mid, saEnd, numOfThreads - 1);
+ makeSomeBuckets(text, seed, steps, depth, buckBeg,
+ buckPtr, sa, saBeg, mid);
+ t.join();
+ } else {
+ makeSomeBuckets(text, seed, steps, depth, buckBeg,
+ buckPtr, sa, saBeg, saEnd);
+ }
+}
+
void SubsetSuffixArray::makeBuckets(const uchar *text,
unsigned wordLength,
const size_t *cumulativeCounts,
size_t minPositionsPerBucket,
- unsigned bucketDepth) {
+ unsigned bucketDepth,
+ size_t numOfThreads) {
std::vector<unsigned> bucketDepths(seeds.size(), bucketDepth);
if (bucketDepth+1 == 0) {
assert(minPositionsPerBucket > 0);
@@ -220,48 +279,28 @@ void SubsetSuffixArray::makeBuckets(const uchar *text,
buckets.v.resize(bucketsSize());
initBucketEnds();
- OffPart *myBuckets = &buckets.v[0];
- OffPart *bucketPtr = myBuckets;
- indexT posInSuffixArray = 0;
- const PosPart *suffixArrayPtr = suffixArray.begin();
+ const PosPart *sa = suffixArray.begin();
+ OffPart *buckBeg = &buckets.v[0];
+ OffPart *buckPtr = buckBeg;
+ size_t saBeg = 0;
+
for (size_t s = 0; s < seeds.size(); ++s) {
const CyclicSubsetSeed &seed = seeds[s];
- unsigned myBucketDepth = bucketDepths[s];
+ unsigned depth = bucketDepths[s];
const indexT *steps = bucketStepEnds[s];
- indexT endInSuffixArray = cumulativeCounts[s];
-
- while (posInSuffixArray < endInSuffixArray) {
- const uchar* textPtr = text + posGet(suffixArrayPtr);
- const uchar* subsetMap = seed.firstMap();
- indexT bucketIndex = 0;
- unsigned depth = 0;
-
- while (depth < myBucketDepth) {
- uchar subset = subsetMap[ *textPtr ];
- if( subset == CyclicSubsetSeed::DELIMITER ){
- bucketIndex += steps[depth] - offParts; // depth > 0
- break;
- }
- ++textPtr;
- ++depth;
- bucketIndex += subset * steps[depth];
- subsetMap = seed.nextMap( subsetMap );
- }
-
- OffPart *lastBucketPtr = myBuckets + bucketIndex;
- for (; bucketPtr <= lastBucketPtr; bucketPtr += offParts) {
- offSet(bucketPtr, posInSuffixArray);
- }
-
- ++posInSuffixArray;
- suffixArrayPtr += posParts;
+ size_t saEnd = cumulativeCounts[s];
+ if (saEnd > saBeg) {
+ runThreads(text, &seed, steps, depth, buckBeg,
+ buckPtr, sa, saBeg, saEnd, numOfThreads);
+ sa += posParts * (saEnd - saBeg);
+ buckPtr = buckBeg + bucketPos(text, seed, steps, depth, sa - posParts);
+ saBeg = saEnd;
}
-
- myBuckets += steps[0];
+ buckBeg += steps[0];
}
- for (; bucketPtr <= myBuckets; bucketPtr += offParts) {
- offSet(bucketPtr, posInSuffixArray);
+ for (; buckPtr <= buckBeg; buckPtr += offParts) {
+ offSet(buckPtr, saBeg);
}
}
=====================================
src/SubsetSuffixArray.hh
=====================================
@@ -117,7 +117,8 @@ public:
// (memory use of stored positions) / minPositionsPerBucket.
void makeBuckets(const uchar *text,
unsigned wordLength, const size_t *cumulativeCounts,
- size_t minPositionsPerBucket, unsigned bucketDepth);
+ size_t minPositionsPerBucket, unsigned bucketDepth,
+ size_t numOfThreads);
void fromFiles(const std::string &baseName,
bool isMaskLowercase, const uchar letterCode[],
=====================================
src/lastdb.cc
=====================================
@@ -273,7 +273,8 @@ void makeVolume(std::vector<CyclicSubsetSeed>& seeds,
LOG( "bucketing..." );
myIndex.makeBuckets(seq, wordsFinder.wordLength, wordCounts,
- args.minIndexedPositionsPerBucket, args.bucketDepth);
+ args.minIndexedPositionsPerBucket, args.bucketDepth,
+ numOfThreads);
LOG( "writing..." );
if( numOfIndexes > 1 ){
=====================================
src/makefile
=====================================
@@ -144,7 +144,7 @@ ScoreMatrixData.hh: ../data/*.mat
../build/mat-inc.sh ../data/*.mat > $@
VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 1411) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 1418) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/compare/d7b8961a6bff49a839692eae811ca5850b81c51c...75b5dac638dbf3553c116536e0c99ef3d1be4a97
--
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/compare/d7b8961a6bff49a839692eae811ca5850b81c51c...75b5dac638dbf3553c116536e0c99ef3d1be4a97
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221019/d943f222/attachment-0001.htm>
More information about the debian-med-commit
mailing list