[med-svn] [Git][med-team/lamassemble][upstream] New upstream version 1.7.0

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Wed Dec 13 17:02:34 GMT 2023



Étienne Mollier pushed to branch upstream at Debian Med / lamassemble


Commits:
1a45d6c3 by Étienne Mollier at 2023-12-13T17:47:08+01:00
New upstream version 1.7.0
- - - - -


3 changed files:

- README.md
- lamassemble
- setup.py


Changes:

=====================================
README.md
=====================================
@@ -87,7 +87,7 @@ You may see a warning message like this:
 This could mean that some sequences are not similar to the others, or
 that `LAST` didn't find the similarities.  (To save time, `LAST` does
 not find all pairwise similarities.)  You can make `LAST` find more
-similarities by increasing option `-m` (and/or decreasing `-W`).
+similarities by increasing option `-m` (and/or decreasing `-u` or `-W`).
 
 ## Options
 
@@ -136,6 +136,10 @@ similarities by increasing option `-m` (and/or decreasing `-W`).
 
 - `-P P`: number of parallel threads.
 
+- `-u U`: go faster by getting `U`-fold fewer initial matches between
+  2 sequences.  `U` must be 4, 8, 16, 32, or 64 (values allowed by
+  `lastdb -uRY`).
+
 - `-W W`: get initial matches between 2 sequences starting at any base
   that is the "minimum" in any window of `W` consecutive bases.
   ("Minimum" means that the sequence starting here is alphabetically


=====================================
lamassemble
=====================================
@@ -486,27 +486,6 @@ def alignedRangePerSeq(keptAlignments, isRevPerSeq):
         updateRange(begPerSeq, endPerSeq, isRevPerSeq, qry)
     return begPerSeq, endPerSeq
 
-def updateSegmentPairs(segPairs, refBeg, qryBeg):
-    if segPairs:
-        size, r, q = segPairs[-1]
-        if r + size == refBeg and q + size == qryBeg:
-            segPairs[-1] = size + 1, r, q
-            return
-    segPairs.append((1, refBeg, qryBeg))
-
-def bestSegmentPairs(refBeg, refAln, qryBeg, qryAln, probCodes, minProbCode):
-    segPairs = []
-    for p, q, r in zip(probCodes, qryAln, refAln):
-        if q != "-":
-            if r != "-":
-                if p >= minProbCode:
-                    updateSegmentPairs(segPairs, refBeg, qryBeg)
-                refBeg += 1
-            qryBeg += 1
-        elif r != "-":
-            refBeg += 1
-    return segPairs
-
 def pairwiseAnchors(opts, minProbCode, alns, seqRanks, isRevPerSeq, begPerSeq):
     for negScore, qry, ref, probCodes in alns:
         if qry[0] > ref[0]:
@@ -526,14 +505,27 @@ def pairwiseAnchors(opts, minProbCode, alns, seqRanks, isRevPerSeq, begPerSeq):
         if not opts.all:
             refBeg -= begPerSeq[refNum]
             qryBeg -= begPerSeq[qryNum]
-        s = bestSegmentPairs(refBeg, refAln, qryBeg, qryAln, probCodes,
-                             minProbCode)
-        for size, rBeg, qBeg in s:
+        qBeg = rBeg = size = 0
+        for p, q, r in zip(probCodes, qryAln, refAln):
+            if q != "-":
+                if r != "-":
+                    if p >= minProbCode:
+                        if qBeg + size < qryBeg or rBeg + size < refBeg:
+                            if size:
+                                yield qryRank, refRank, qBeg, rBeg, size
+                            qBeg = qryBeg
+                            rBeg = refBeg
+                            size = 0
+                        size += 1
+                    refBeg += 1
+                qryBeg += 1
+            elif r != "-":
+                refBeg += 1
+        if size:
             yield qryRank, refRank, qBeg, rBeg, size
 
 def prepareToAlign(opts, scores, sequences, tmpdir):
     optP = "-P" + str(opts.P)
-    optW = "-W" + str(opts.W)
     fwdMatrix, revMatrix, gapExistCost, gapExtendCost = scores
 
     fwdMat = os.path.join(tmpdir, "fwd.mat")
@@ -549,13 +541,16 @@ def prepareToAlign(opts, scores, sequences, tmpdir):
         printNumberedSequences(sequences, f)
 
     db = os.path.join(tmpdir, "db")
-    cmd = "lastdb", "-uNEAR", "-c", "-R01", optP, optW, db, seqFile
+    if opts.u:
+        cmd = "lastdb", "-c", optP, "-uRY" + str(opts.u), db, seqFile
+    else:
+        cmd = "lastdb", "-c", optP, "-uNEAR", "-W" + str(opts.W), db, seqFile
     logging.info(" ".join(cmd))
     subprocess.check_call(cmd)
 
     return fwdMat, revMat, seqFile, db
 
-def strandAlignments(opts, db, seqFile, matFile, strandNum):
+def addStrandAlignments(alignments, opts, db, seqFile, matFile, strandNum):
     optP = "-P" + str(opts.P)
     optD = "-D1e9"
     mOpt = "-m" + str(opts.m)
@@ -566,12 +561,15 @@ def strandAlignments(opts, db, seqFile, matFile, strandNum):
     logging.info(" ".join(cmd))
     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                             universal_newlines=True)
-    return alignmentInput(opts, proc.stdout)
+    alignments.extend(alignmentInput(opts, proc.stdout))
+    retcode = proc.wait()
+    if retcode:
+        raise subprocess.CalledProcessError(retcode, cmd)
 
 def pairwiseAlignments(opts, fwdMat, revMat, seqFile, db):
     alignments = []
-    alignments.extend(strandAlignments(opts, db, seqFile, fwdMat, 1))
-    alignments.extend(strandAlignments(opts, db, seqFile, revMat, 0))
+    addStrandAlignments(alignments, opts, db, seqFile, fwdMat, 1)
+    addStrandAlignments(alignments, opts, db, seqFile, revMat, 0)
     alignments.sort()
     return alignments
 
@@ -742,6 +740,8 @@ if __name__ == "__main__":
     og = optparse.OptionGroup(op, "LAST options")
     og.add_option("-P", type="int", default=1,
                   help="number of parallel threads (default=%default)")
+    og.add_option("-u", metavar="RY", type="int", help=
+                  "use ~1 per this many initial matches")
     og.add_option("-W", type="int", default=19, help="use minimum positions "
                   "in length-W windows (default=%default)")
     og.add_option("-m", type="int", default=5, help=


=====================================
setup.py
=====================================
@@ -1,6 +1,6 @@
 import setuptools
 
-commitInfo = " (HEAD -> master, tag: 1.6.1)".strip("( )").split()
+commitInfo = " (HEAD -> master, tag: 1.7.0)".strip("( )").split()
 version = commitInfo[commitInfo.index("tag:") + 1].rstrip(",")
 
 setuptools.setup(



View it on GitLab: https://salsa.debian.org/med-team/lamassemble/-/commit/1a45d6c3ee4ecabc511e05706079539d69b8bd5d

-- 
View it on GitLab: https://salsa.debian.org/med-team/lamassemble/-/commit/1a45d6c3ee4ecabc511e05706079539d69b8bd5d
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231213/74fdb8d4/attachment-0001.htm>


More information about the debian-med-commit mailing list