[med-svn] [Git][med-team/last-align][upstream] New upstream version 1460
Charles Plessy (@plessy)
gitlab at salsa.debian.org
Fri Jul 14 02:07:11 BST 2023
Charles Plessy pushed to branch upstream at Debian Med / last-align
Commits:
25ca6128 by Charles Plessy at 2023-07-14T09:36:16+09:00
New upstream version 1460
- - - - -
8 changed files:
- bin/last-dotplot
- bin/last-train
- bin/maf-cut
- src/lastal.cc
- src/makefile
- test/last-test.out
- test/maf-cut-test.out
- test/maf-cut-test.sh
Changes:
=====================================
bin/last-dotplot
=====================================
@@ -393,11 +393,19 @@ def allSortedRanges(opts, alignments, alignmentsB,
t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
return s1 + t1, s2 + t2
+def myTextsize(textDraw, font, text):
+ try:
+ out = textDraw.textsize(text, font=font)
+ except AttributeError:
+ a, b, c, d = textDraw.textbbox((0, 0), text, font=font)
+ out = c, d
+ return out
+
def sizesPerText(texts, font, textDraw):
sizes = 0, 0
for t in texts:
if textDraw is not None:
- sizes = textDraw.textsize(t, font=font)
+ sizes = myTextsize(textDraw, font, t)
yield t, sizes
def prettyNum(n):
@@ -432,7 +440,7 @@ def rangeLabels(seqRanges, labelOpt, font, textDraw, textRot):
for r in seqRanges:
text = labelText(r, labelOpt)
if textDraw is not None:
- x, y = textDraw.textsize(text, font=font)
+ x, y = myTextsize(textDraw, font, text)
if textRot:
x, y = y, x
yield text, x, y, r[3]
=====================================
bin/last-train
=====================================
@@ -861,8 +861,8 @@ def doTraining(opts, args):
rowProbs = [i / rowSum for i in rowFreqs]
colProbs = [i / colSum for i in colFreqs]
print("# substitution percent identity: {0:.6}".format(100 * pid))
- print("# ref letter %:", *(format(100 * i, "#.3") for i in rowProbs))
- print("# qry letter %:", *(format(100 * i, "#.3") for i in colProbs))
+ print("# ref letter %:", *("%#.3g" % (100 * i) for i in rowProbs))
+ print("# qry letter %:", *("%#.3g" % (100 * i) for i in colProbs))
if opts.X: print("#last -X", opts.X)
if opts.R: print("#last -R", opts.R)
if opts.Q: print("#last -Q", opts.Q)
=====================================
bin/maf-cut
=====================================
@@ -38,14 +38,15 @@ def alignmentRange(cutBeg, cutEnd, sLineFields):
beg = int(sLineFields[2])
if beg >= cutEnd:
return 0, 0
- sequenceWithGaps = sLineFields[6]
- span = len(sequenceWithGaps) - sequenceWithGaps.count("-")
+ span = int(sLineFields[3])
end = beg + span
if end <= cutBeg:
return 0, 0
- seqBeg = max(cutBeg - beg, 0)
+ sequenceWithGaps = sLineFields[6]
+ monomerLen = (len(sequenceWithGaps) - sequenceWithGaps.count("-")) // span
+ seqBeg = max(cutBeg - beg, 0) * monomerLen
alnBeg = alnBegFromSeqBeg(sequenceWithGaps, seqBeg)
- seqEnd = min(cutEnd - beg, span)
+ seqEnd = min(cutEnd - beg, span) * monomerLen
alnEnd = alnEndFromSeqEnd(sequenceWithGaps, seqEnd)
return alnBeg, alnEnd
@@ -57,6 +58,17 @@ def findTheSpecifiedSequence(seqName, mafLines):
return fields
return None
+def sLineCutData(fields, alnBeg, alnEnd):
+ span = int(fields[3])
+ seq = fields[6]
+ n = len(seq)
+ begSpan = alnBeg - seq.count("-", 0, alnBeg)
+ endSpan = n - alnEnd - seq.count("-", alnEnd, n)
+ mul = (n - seq.count("-")) // span
+ b = begSpan // mul
+ e = endSpan // mul
+ return int(fields[2]) + b, span - b - e, begSpan % mul, endSpan % mul
+
def addMafData(dataPerSeq, mafLines, alnBeg, alnEnd):
for line in mafLines:
if line[0] == "s":
@@ -64,9 +76,10 @@ def addMafData(dataPerSeq, mafLines, alnBeg, alnEnd):
seqName = fields[1] # xxx omit theSpecifiedSequence ?
strand = fields[4]
seqLen = int(fields[5])
- oldSeq = fields[6]
- beg = int(fields[2]) + alnBeg - oldSeq.count("-", 0, alnBeg)
- end = beg + alnEnd - alnBeg - oldSeq.count("-", alnBeg, alnEnd)
+ beg, span, begRem, endRem = sLineCutData(fields, alnBeg, alnEnd)
+ alnBeg -= begRem
+ alnEnd += endRem
+ end = beg + span
if strand == "-":
beg, end = seqLen - end, seqLen - beg
seqData = seqLen, beg, end
@@ -76,10 +89,10 @@ def cutMafRecords(mafLines, alnBeg, alnEnd):
for line in mafLines:
fields = line.split()
if line[0] == "s":
- oldSeq = fields[6]
- newSeq = oldSeq[alnBeg:alnEnd]
- beg = int(fields[2]) + alnBeg - oldSeq.count("-", 0, alnBeg)
- span = len(newSeq) - newSeq.count("-")
+ beg, span, begRem, endRem = sLineCutData(fields, alnBeg, alnEnd)
+ alnBeg -= begRem
+ alnEnd += endRem
+ newSeq = fields[6][alnBeg:alnEnd]
yield fields[:2] + [str(beg), str(span)] + fields[4:6] + [newSeq]
elif line[0] == "q":
yield fields[:2] + [fields[2][alnBeg:alnEnd]]
=====================================
src/lastal.cc
=====================================
@@ -1467,7 +1467,8 @@ void writeHeader(countT numOfRefSeqs, countT refLetters, std::ostream &out) {
out << "# Fields: query id, subject id, % identity, alignment length, "
<< "mismatches, gap opens, q. start, q. end, s. start, s. end";
if( evaluer.isGood() ) out << ", evalue, bit score";
- if( args.outputFormat == 'B' ) out << ", query length, subject length";
+ if( args.outputFormat == 'B' )
+ out << ", query length, subject length, raw score";
out << '\n';
}
=====================================
src/makefile
=====================================
@@ -143,7 +143,7 @@ ScoreMatrixData.hh: ../data/*.mat
../build/mat-inc.sh ../data/*.mat > $@
VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 1456) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 1460) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
=====================================
test/last-test.out
=====================================
@@ -3581,7 +3581,7 @@ TEST lastal -fBlastTab+ -pBL62 -b1 -F15 -D1e3 /tmp/last-test galGal3-M-32.fa
# X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -4
# * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
#
-# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, query length, subject length
+# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, query length, subject length, raw score
chrM Q2LCP8 24.22 322 211 8 5259 6128 111 431 2.6e-19 84.9 16775 491 179
chrM Q2LCP8 27.27 88 60 2 11753 12007 102 188 0.0097 30.5 16775 491 59
chrM Q2LCP8 32.00 50 30 2 13893 14033 171 219 0.034 28.7 16775 491 55
=====================================
test/maf-cut-test.out
=====================================
@@ -31,3 +31,15 @@ TAGTAGGTATGTTTGCTTGTAATATTGAATGTAGGAGCGATAGATAAGAG
TTATTATTTATCGTATT
> 95 87 chrMa 131 +
TTATTATTTATCGTATTTACGTTTAATATTATAGGTGAATATATTTATTG
+a score=1607.4 EG2=5e-147 E=2.5e-152
+s UN-L1MA6_pol#LINE/L1 867 11 + 1275 GlyIleThrLeu-ProAspPheLysIleTyrTyr
+s chrUn_KI270748v1 90711 31 - 93321 GGCCTCACA---ATCTGACTTCAAAACATATTAC
+
+a score=3177.2 EG2=0 E=0
+s UN-L1MA6_pol#LINE/L1 884 8 + 1275 ThrAlaTrpTyrTrpHisLysAsn
+s chrUn_KI270748v1 14577 24 + 93321 ACACCATGGTACTGGCATAAACGC
+
+a score=1559.7 EG2=3.8e-142 E=2e-147
+s UN-L1MA6_pol#LINE/L1 884 8 + 1275 ThrAlaTrpTyrTrpHisLys--Asn
+s chrUn_KI270748v1 3235 23 - 93321 tcagcctggtaatcaCGT---AAAAC
+
=====================================
test/maf-cut-test.sh
=====================================
@@ -11,4 +11,8 @@ PATH=../bin:$PATH
awk '(NR-1) % 4 < 2' bs100.fastq | tr '@' '>' |
maf-cut chrM:150-200 bs100.maf -
+
+ maf-cut chrUn_KI270748v1:2579-2609 frameshift-new.maf
+
+ maf-cut UN-L1MA6_pol#LINE/L1:884-892 frameshift-new.maf
} | diff -u maf-cut-test.out -
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/25ca61286794d676ccb767eae7d52c7019dd922b
--
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/25ca61286794d676ccb767eae7d52c7019dd922b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230714/f5407c2d/attachment-0001.htm>
More information about the debian-med-commit
mailing list