[med-svn] [Git][med-team/lamassemble][upstream] New upstream version 1.7.0
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Wed Dec 13 17:02:34 GMT 2023
Étienne Mollier pushed to branch upstream at Debian Med / lamassemble
Commits:
1a45d6c3 by Étienne Mollier at 2023-12-13T17:47:08+01:00
New upstream version 1.7.0
- - - - -
3 changed files:
- README.md
- lamassemble
- setup.py
Changes:
=====================================
README.md
=====================================
@@ -87,7 +87,7 @@ You may see a warning message like this:
This could mean that some sequences are not similar to the others, or
that `LAST` didn't find the similarities. (To save time, `LAST` does
not find all pairwise similarities.) You can make `LAST` find more
-similarities by increasing option `-m` (and/or decreasing `-W`).
+similarities by increasing option `-m` (and/or decreasing `-u` or `-W`).
## Options
@@ -136,6 +136,10 @@ similarities by increasing option `-m` (and/or decreasing `-W`).
- `-P P`: number of parallel threads.
+- `-u U`: go faster by getting `U`-fold fewer initial matches between
+ 2 sequences. `U` must be 4, 8, 16, 32, or 64 (values allowed by
+ `lastdb -uRY`).
+
- `-W W`: get initial matches between 2 sequences starting at any base
that is the "minimum" in any window of `W` consecutive bases.
("Minimum" means that the sequence starting here is alphabetically
=====================================
lamassemble
=====================================
@@ -486,27 +486,6 @@ def alignedRangePerSeq(keptAlignments, isRevPerSeq):
updateRange(begPerSeq, endPerSeq, isRevPerSeq, qry)
return begPerSeq, endPerSeq
-def updateSegmentPairs(segPairs, refBeg, qryBeg):
- if segPairs:
- size, r, q = segPairs[-1]
- if r + size == refBeg and q + size == qryBeg:
- segPairs[-1] = size + 1, r, q
- return
- segPairs.append((1, refBeg, qryBeg))
-
-def bestSegmentPairs(refBeg, refAln, qryBeg, qryAln, probCodes, minProbCode):
- segPairs = []
- for p, q, r in zip(probCodes, qryAln, refAln):
- if q != "-":
- if r != "-":
- if p >= minProbCode:
- updateSegmentPairs(segPairs, refBeg, qryBeg)
- refBeg += 1
- qryBeg += 1
- elif r != "-":
- refBeg += 1
- return segPairs
-
def pairwiseAnchors(opts, minProbCode, alns, seqRanks, isRevPerSeq, begPerSeq):
for negScore, qry, ref, probCodes in alns:
if qry[0] > ref[0]:
@@ -526,14 +505,27 @@ def pairwiseAnchors(opts, minProbCode, alns, seqRanks, isRevPerSeq, begPerSeq):
if not opts.all:
refBeg -= begPerSeq[refNum]
qryBeg -= begPerSeq[qryNum]
- s = bestSegmentPairs(refBeg, refAln, qryBeg, qryAln, probCodes,
- minProbCode)
- for size, rBeg, qBeg in s:
+ qBeg = rBeg = size = 0
+ for p, q, r in zip(probCodes, qryAln, refAln):
+ if q != "-":
+ if r != "-":
+ if p >= minProbCode:
+ if qBeg + size < qryBeg or rBeg + size < refBeg:
+ if size:
+ yield qryRank, refRank, qBeg, rBeg, size
+ qBeg = qryBeg
+ rBeg = refBeg
+ size = 0
+ size += 1
+ refBeg += 1
+ qryBeg += 1
+ elif r != "-":
+ refBeg += 1
+ if size:
yield qryRank, refRank, qBeg, rBeg, size
def prepareToAlign(opts, scores, sequences, tmpdir):
optP = "-P" + str(opts.P)
- optW = "-W" + str(opts.W)
fwdMatrix, revMatrix, gapExistCost, gapExtendCost = scores
fwdMat = os.path.join(tmpdir, "fwd.mat")
@@ -549,13 +541,16 @@ def prepareToAlign(opts, scores, sequences, tmpdir):
printNumberedSequences(sequences, f)
db = os.path.join(tmpdir, "db")
- cmd = "lastdb", "-uNEAR", "-c", "-R01", optP, optW, db, seqFile
+ if opts.u:
+ cmd = "lastdb", "-c", optP, "-uRY" + str(opts.u), db, seqFile
+ else:
+ cmd = "lastdb", "-c", optP, "-uNEAR", "-W" + str(opts.W), db, seqFile
logging.info(" ".join(cmd))
subprocess.check_call(cmd)
return fwdMat, revMat, seqFile, db
-def strandAlignments(opts, db, seqFile, matFile, strandNum):
+def addStrandAlignments(alignments, opts, db, seqFile, matFile, strandNum):
optP = "-P" + str(opts.P)
optD = "-D1e9"
mOpt = "-m" + str(opts.m)
@@ -566,12 +561,15 @@ def strandAlignments(opts, db, seqFile, matFile, strandNum):
logging.info(" ".join(cmd))
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
universal_newlines=True)
- return alignmentInput(opts, proc.stdout)
+ alignments.extend(alignmentInput(opts, proc.stdout))
+ retcode = proc.wait()
+ if retcode:
+ raise subprocess.CalledProcessError(retcode, cmd)
def pairwiseAlignments(opts, fwdMat, revMat, seqFile, db):
alignments = []
- alignments.extend(strandAlignments(opts, db, seqFile, fwdMat, 1))
- alignments.extend(strandAlignments(opts, db, seqFile, revMat, 0))
+ addStrandAlignments(alignments, opts, db, seqFile, fwdMat, 1)
+ addStrandAlignments(alignments, opts, db, seqFile, revMat, 0)
alignments.sort()
return alignments
@@ -742,6 +740,8 @@ if __name__ == "__main__":
og = optparse.OptionGroup(op, "LAST options")
og.add_option("-P", type="int", default=1,
help="number of parallel threads (default=%default)")
+ og.add_option("-u", metavar="RY", type="int", help=
+ "use ~1 per this many initial matches")
og.add_option("-W", type="int", default=19, help="use minimum positions "
"in length-W windows (default=%default)")
og.add_option("-m", type="int", default=5, help=
=====================================
setup.py
=====================================
@@ -1,6 +1,6 @@
import setuptools
-commitInfo = " (HEAD -> master, tag: 1.6.1)".strip("( )").split()
+commitInfo = " (HEAD -> master, tag: 1.7.0)".strip("( )").split()
version = commitInfo[commitInfo.index("tag:") + 1].rstrip(",")
setuptools.setup(
View it on GitLab: https://salsa.debian.org/med-team/lamassemble/-/commit/1a45d6c3ee4ecabc511e05706079539d69b8bd5d
--
View it on GitLab: https://salsa.debian.org/med-team/lamassemble/-/commit/1a45d6c3ee4ecabc511e05706079539d69b8bd5d
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231213/74fdb8d4/attachment-0001.htm>
More information about the debian-med-commit
mailing list