[med-svn] [Git][med-team/last-align][upstream] New upstream version 1460

Charles Plessy (@plessy) gitlab at salsa.debian.org
Fri Jul 14 02:07:11 BST 2023



Charles Plessy pushed to branch upstream at Debian Med / last-align


Commits:
25ca6128 by Charles Plessy at 2023-07-14T09:36:16+09:00
New upstream version 1460
- - - - -


8 changed files:

- bin/last-dotplot
- bin/last-train
- bin/maf-cut
- src/lastal.cc
- src/makefile
- test/last-test.out
- test/maf-cut-test.out
- test/maf-cut-test.sh


Changes:

=====================================
bin/last-dotplot
=====================================
@@ -393,11 +393,19 @@ def allSortedRanges(opts, alignments, alignmentsB,
     t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
     return s1 + t1, s2 + t2
 
+def myTextsize(textDraw, font, text):
+    try:
+        out = textDraw.textsize(text, font=font)
+    except AttributeError:
+        a, b, c, d = textDraw.textbbox((0, 0), text, font=font)
+        out = c, d
+    return out
+
 def sizesPerText(texts, font, textDraw):
     sizes = 0, 0
     for t in texts:
         if textDraw is not None:
-            sizes = textDraw.textsize(t, font=font)
+            sizes = myTextsize(textDraw, font, t)
         yield t, sizes
 
 def prettyNum(n):
@@ -432,7 +440,7 @@ def rangeLabels(seqRanges, labelOpt, font, textDraw, textRot):
     for r in seqRanges:
         text = labelText(r, labelOpt)
         if textDraw is not None:
-            x, y = textDraw.textsize(text, font=font)
+            x, y = myTextsize(textDraw, font, text)
             if textRot:
                 x, y = y, x
         yield text, x, y, r[3]


=====================================
bin/last-train
=====================================
@@ -861,8 +861,8 @@ def doTraining(opts, args):
         rowProbs = [i / rowSum for i in rowFreqs]
         colProbs = [i / colSum for i in colFreqs]
         print("# substitution percent identity: {0:.6}".format(100 * pid))
-        print("# ref letter %:", *(format(100 * i, "#.3") for i in rowProbs))
-        print("# qry letter %:", *(format(100 * i, "#.3") for i in colProbs))
+        print("# ref letter %:", *("%#.3g" % (100 * i) for i in rowProbs))
+        print("# qry letter %:", *("%#.3g" % (100 * i) for i in colProbs))
     if opts.X: print("#last -X", opts.X)
     if opts.R: print("#last -R", opts.R)
     if opts.Q: print("#last -Q", opts.Q)


=====================================
bin/maf-cut
=====================================
@@ -38,14 +38,15 @@ def alignmentRange(cutBeg, cutEnd, sLineFields):
     beg = int(sLineFields[2])
     if beg >= cutEnd:
         return 0, 0
-    sequenceWithGaps = sLineFields[6]
-    span = len(sequenceWithGaps) - sequenceWithGaps.count("-")
+    span = int(sLineFields[3])
     end = beg + span
     if end <= cutBeg:
         return 0, 0
-    seqBeg = max(cutBeg - beg, 0)
+    sequenceWithGaps = sLineFields[6]
+    monomerLen = (len(sequenceWithGaps) - sequenceWithGaps.count("-")) // span
+    seqBeg = max(cutBeg - beg, 0) * monomerLen
     alnBeg = alnBegFromSeqBeg(sequenceWithGaps, seqBeg)
-    seqEnd = min(cutEnd - beg, span)
+    seqEnd = min(cutEnd - beg, span) * monomerLen
     alnEnd = alnEndFromSeqEnd(sequenceWithGaps, seqEnd)
     return alnBeg, alnEnd
 
@@ -57,6 +58,17 @@ def findTheSpecifiedSequence(seqName, mafLines):
                 return fields
     return None
 
+def sLineCutData(fields, alnBeg, alnEnd):
+    span = int(fields[3])
+    seq = fields[6]
+    n = len(seq)
+    begSpan = alnBeg - seq.count("-", 0, alnBeg)
+    endSpan = n - alnEnd - seq.count("-", alnEnd, n)
+    mul = (n - seq.count("-")) // span
+    b = begSpan // mul
+    e = endSpan // mul
+    return int(fields[2]) + b, span - b - e, begSpan % mul, endSpan % mul
+
 def addMafData(dataPerSeq, mafLines, alnBeg, alnEnd):
     for line in mafLines:
         if line[0] == "s":
@@ -64,9 +76,10 @@ def addMafData(dataPerSeq, mafLines, alnBeg, alnEnd):
             seqName = fields[1]  # xxx omit theSpecifiedSequence ?
             strand = fields[4]
             seqLen = int(fields[5])
-            oldSeq = fields[6]
-            beg = int(fields[2]) + alnBeg - oldSeq.count("-", 0, alnBeg)
-            end = beg + alnEnd - alnBeg - oldSeq.count("-", alnBeg, alnEnd)
+            beg, span, begRem, endRem = sLineCutData(fields, alnBeg, alnEnd)
+            alnBeg -= begRem
+            alnEnd += endRem
+            end = beg + span
             if strand == "-":
                 beg, end = seqLen - end, seqLen - beg
             seqData = seqLen, beg, end
@@ -76,10 +89,10 @@ def cutMafRecords(mafLines, alnBeg, alnEnd):
     for line in mafLines:
         fields = line.split()
         if line[0] == "s":
-            oldSeq = fields[6]
-            newSeq = oldSeq[alnBeg:alnEnd]
-            beg = int(fields[2]) + alnBeg - oldSeq.count("-", 0, alnBeg)
-            span = len(newSeq) - newSeq.count("-")
+            beg, span, begRem, endRem = sLineCutData(fields, alnBeg, alnEnd)
+            alnBeg -= begRem
+            alnEnd += endRem
+            newSeq = fields[6][alnBeg:alnEnd]
             yield fields[:2] + [str(beg), str(span)] + fields[4:6] + [newSeq]
         elif line[0] == "q":
             yield fields[:2] + [fields[2][alnBeg:alnEnd]]


=====================================
src/lastal.cc
=====================================
@@ -1467,7 +1467,8 @@ void writeHeader(countT numOfRefSeqs, countT refLetters, std::ostream &out) {
       out << "# Fields: query id, subject id, % identity, alignment length, "
 	  << "mismatches, gap opens, q. start, q. end, s. start, s. end";
       if( evaluer.isGood() ) out << ", evalue, bit score";
-      if( args.outputFormat == 'B' ) out << ", query length, subject length";
+      if( args.outputFormat == 'B' )
+	out << ", query length, subject length, raw score";
       out << '\n';
     }
 


=====================================
src/makefile
=====================================
@@ -143,7 +143,7 @@ ScoreMatrixData.hh: ../data/*.mat
 	../build/mat-inc.sh ../data/*.mat > $@
 
 VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 1456) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 1460) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
 
 VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
 


=====================================
test/last-test.out
=====================================
@@ -3581,7 +3581,7 @@ TEST lastal -fBlastTab+ -pBL62 -b1 -F15 -D1e3 /tmp/last-test galGal3-M-32.fa
 # X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -4
 # * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1
 #
-# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, query length, subject length
+# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, query length, subject length, raw score
 chrM	Q2LCP8	24.22	322	211	8	5259	6128	111	431	2.6e-19	84.9	16775	491	179
 chrM	Q2LCP8	27.27	88	60	2	11753	12007	102	188	0.0097	30.5	16775	491	59
 chrM	Q2LCP8	32.00	50	30	2	13893	14033	171	219	0.034	28.7	16775	491	55


=====================================
test/maf-cut-test.out
=====================================
@@ -31,3 +31,15 @@ TAGTAGGTATGTTTGCTTGTAATATTGAATGTAGGAGCGATAGATAAGAG
 TTATTATTTATCGTATT
 > 95 87 chrMa 131 +
 TTATTATTTATCGTATTTACGTTTAATATTATAGGTGAATATATTTATTG
+a score=1607.4 EG2=5e-147 E=2.5e-152
+s UN-L1MA6_pol#LINE/L1   867 11 +  1275 GlyIleThrLeu-ProAspPheLysIleTyrTyr
+s chrUn_KI270748v1     90711 31 - 93321 GGCCTCACA---ATCTGACTTCAAAACATATTAC
+
+a score=3177.2 EG2=0 E=0
+s UN-L1MA6_pol#LINE/L1   884  8 +  1275 ThrAlaTrpTyrTrpHisLysAsn
+s chrUn_KI270748v1     14577 24 + 93321 ACACCATGGTACTGGCATAAACGC
+
+a score=1559.7 EG2=3.8e-142 E=2e-147
+s UN-L1MA6_pol#LINE/L1  884  8 +  1275 ThrAlaTrpTyrTrpHisLys--Asn
+s chrUn_KI270748v1     3235 23 - 93321 tcagcctggtaatcaCGT---AAAAC
+


=====================================
test/maf-cut-test.sh
=====================================
@@ -11,4 +11,8 @@ PATH=../bin:$PATH
 
     awk '(NR-1) % 4 < 2' bs100.fastq | tr '@' '>' |
 	maf-cut chrM:150-200 bs100.maf -
+
+    maf-cut chrUn_KI270748v1:2579-2609 frameshift-new.maf
+
+    maf-cut UN-L1MA6_pol#LINE/L1:884-892 frameshift-new.maf
 } | diff -u maf-cut-test.out -



View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/25ca61286794d676ccb767eae7d52c7019dd922b

-- 
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/25ca61286794d676ccb767eae7d52c7019dd922b
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230714/f5407c2d/attachment-0001.htm>


More information about the debian-med-commit mailing list