[med-svn] [Git][med-team/last-align][master] 4 commits: routine-update: New upstream version
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Sun Aug 22 20:38:34 BST 2021
Nilesh Patra pushed to branch master at Debian Med / last-align
Commits:
fd8c7492 by Nilesh Patra at 2021-08-23T00:56:24+05:30
routine-update: New upstream version
- - - - -
b3ca634d by Nilesh Patra at 2021-08-23T00:56:26+05:30
New upstream version 1256
- - - - -
3e205da7 by Nilesh Patra at 2021-08-23T00:56:35+05:30
Update upstream source from tag 'upstream/1256'
Update to upstream version '1256'
with Debian dir 83550923f4c3323e5d8d894fa86b696262a3696e
- - - - -
6ff84ca5 by Nilesh Patra at 2021-08-23T00:57:43+05:30
routine-update: Ready to upload to unstable
- - - - -
5 changed files:
- bin/last-dotplot
- debian/changelog
- doc/last-dotplot.rst
- src/makefile
- src/mcf_frameshift_xdrop_aligner.cc
Changes:
=====================================
bin/last-dotplot
=====================================
@@ -589,112 +589,121 @@ def expandedSeqDict(seqDict):
newDict[base] = x
return newDict
-def readBed(fileName, rangeDict):
- for line in myOpen(fileName):
- w = line.split()
- if not w: continue
- if not w[1].isdigit():
- w = w[1:]
- seqName = w[0]
- if seqName not in rangeDict: continue
- seqRanges = rangeDict[seqName]
- beg = int(w[1])
- end = int(w[2])
- if all(beg >= i[2] or end <= i[1] for i in seqRanges):
- continue
- itemName = w[3] if len(w) > 3 and w[3] != "." else ""
+def commaSeparatedInts(text):
+ return map(int, text.rstrip(",").split(","))
+
+def annotsFromBedOrAgp(opts, rangeDict, fields):
+ seqName = fields[0]
+ if seqName not in rangeDict: return
+ end = int(fields[2])
+ if len(fields) > 7 and fields[4].isalpha(): # agp format, or gap.txt
+ if fields[4] in "NU" and fields[5].isdigit():
+ beg = end - int(fields[5]) # zero-based coordinate
+ if fields[7] == "yes":
+ yield 30000, opts.bridged_color, seqName, beg, end, ""
+ else:
+ yield 20000, opts.unbridged_color, seqName, beg, end, ""
+ else: # BED format
+ beg = int(fields[1])
+ itemName = fields[3] if len(fields) > 3 and fields[3] != "." else ""
layer = 900
color = "#fbf"
- if len(w) > 4:
- if w[4] != ".":
- layer = float(w[4])
- if len(w) > 5:
- if len(w) > 8 and w[8].count(",") == 2:
- color = "rgb(" + w[8] + ")"
+ if len(fields) > 4:
+ if fields[4] != ".":
+ layer = float(fields[4])
+ if len(fields) > 5:
+ if len(fields) > 8 and fields[8].count(",") == 2:
+ color = "rgb(" + fields[8] + ")"
else:
- strand = w[5]
- isRev = (seqRanges[0][3] > 1)
+ strand = fields[5]
+ isRev = (rangeDict[seqName][0][3] > 1)
if strand == "+" and not isRev or strand == "-" and isRev:
color = "#ffe8e8"
if strand == "-" and not isRev or strand == "+" and isRev:
color = "#e8e8ff"
yield layer, color, seqName, beg, end, itemName
-def commaSeparatedInts(text):
- return map(int, text.rstrip(",").split(","))
-
-def readGenePred(opts, fileName, rangeDict):
- for line in myOpen(fileName):
- fields = line.split() # xxx tab ???
- if not fields: continue
- geneName = fields[12 if len(fields) > 12 else 0] # XXX ???
- if fields[2] not in "+-":
- fields = fields[1:]
- seqName = fields[1]
- if seqName not in rangeDict: continue
- seqRanges = rangeDict[seqName]
- #strand = fields[2]
+def annotsFromGpd(opts, rangeDict, fields, geneName): # xxx split on tabs?
+ seqName = fields[1]
+ if seqName in rangeDict:
cdsBeg = int(fields[5])
cdsEnd = int(fields[6])
exonBegs = commaSeparatedInts(fields[8])
exonEnds = commaSeparatedInts(fields[9])
for beg, end in zip(exonBegs, exonEnds):
- if all(beg >= i[2] or end <= i[1] for i in seqRanges):
- continue
yield 300, opts.exon_color, seqName, beg, end, geneName
b = max(beg, cdsBeg)
e = min(end, cdsEnd)
if b < e: yield 400, opts.cds_color, seqName, b, e, ""
-def readRmsk(fileName, rangeDict):
- for line in myOpen(fileName):
- fields = line.split()
- if len(fields) == 17: # rmsk.txt
- seqName = fields[5]
- if seqName not in rangeDict: continue # do this ASAP for speed
- beg = int(fields[6])
- end = int(fields[7])
- strand = fields[9]
- repeatName = fields[10]
- repeatClass = fields[11]
- elif len(fields) == 15: # .out
- seqName = fields[4]
- if seqName not in rangeDict: continue
- beg = int(fields[5]) - 1
- end = int(fields[6])
- strand = fields[8]
- repeatName = fields[9]
- repeatClass = fields[10].split("/")[0]
- else:
- continue
- seqRanges = rangeDict[seqName]
- if all(beg >= i[2] or end <= i[1] for i in seqRanges):
- continue
- if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
- yield 200, "#fbf", seqName, beg, end, repeatName
- elif (strand == "+") != (seqRanges[0][3] > 1):
- yield 100, "#ffe8e8", seqName, beg, end, repeatName
- else:
- yield 100, "#e8e8ff", seqName, beg, end, repeatName
-
-def isExtraFirstGapField(fields):
- return fields[4].isdigit()
-
-def readGaps(opts, fileName, rangeDict):
- '''Read locations of unsequenced gaps, from an agp or gap file.'''
- for line in myOpen(fileName):
- w = line.split()
- if not w or w[0][0] == "#": continue
- if isExtraFirstGapField(w): w = w[1:]
- if w[4] not in "NU": continue
- seqName = w[0]
- if seqName not in rangeDict: continue
- end = int(w[2])
- beg = end - int(w[5]) # zero-based coordinate
- if w[7] == "yes":
- yield 3000, opts.bridged_color, seqName, beg, end, ""
- else:
- yield 2000, opts.unbridged_color, seqName, beg, end, ""
+def annotsFromGff(opts, line, seqName):
+ fields = line.rstrip().split("\t")
+ feature = fields[2]
+ beg = int(fields[3]) - 1
+ end = int(fields[4])
+ if feature == "exon":
+ geneName = fields[8]
+ if ";" in geneName or "=" in geneName:
+ parts = geneName.rstrip(";").split(";")
+ attrs = dict(re.split('[= ]', i.strip(), 1) for i in parts)
+ if "gene" in attrs:
+ geneName = attrs["gene"] # seems good for NCBI gff
+ elif "Name" in attrs:
+ geneName = attrs["Name"]
+ else:
+ geneName = ""
+ yield 300, opts.exon_color, seqName, beg, end, geneName
+ elif feature == "CDS":
+ yield 400, opts.cds_color, seqName, beg, end, ""
+
+def annotsFromRep(rangeDict, seqName, beg, end, strand, repName, repClass):
+ simple = "Low_complexity", "Simple_repeat", "Satellite"
+ if repClass.startswith(simple):
+ yield 200, "#fbf", seqName, beg, end, repName
+ elif (strand == "+") != (rangeDict[seqName][0][3] > 1):
+ yield 100, "#ffe8e8", seqName, beg, end, repName
+ else:
+ yield 100, "#e8e8ff", seqName, beg, end, repName
+
+def annotsFromFiles(opts, fileNames, rangeDict):
+ isDig = str.isdigit
+ for fileName in fileNames:
+ for line in myOpen(fileName):
+ w = line.split()
+ n = len(w)
+ if n > 10 and w[8] in "+C-" and isDig(w[5]) and isDig(w[6]):
+ seq = w[4] # RepeatMasker .out
+ if seq not in rangeDict: continue # do this ASAP for speed
+ beg = int(w[5]) - 1
+ end = int(w[6])
+ g = annotsFromRep(rangeDict, seq, beg, end, w[8], w[9], w[10])
+ elif n > 11 and w[9] in "+-" and isDig(w[6]) and isDig(w[7]):
+ seq = w[5] # rmsk.txt
+ if seq not in rangeDict: continue
+ beg = int(w[6])
+ end = int(w[7])
+ g = annotsFromRep(rangeDict, seq, beg, end, w[9], w[10], w[11])
+ elif n > 8 and w[6] in "+-." and isDig(w[3]) and isDig(w[4]):
+ seqName = w[0]
+ if seqName not in rangeDict: continue
+ g = annotsFromGff(opts, line, seqName)
+ elif n > 9 and w[2] in "+-" and isDig(w[4] + w[5] + w[6]):
+ geneName = w[12 if n > 12 else 0] # XXX ???
+ g = annotsFromGpd(opts, rangeDict, w, geneName)
+ elif n > 10 and w[3] in "+-" and isDig(w[5] + w[6] + w[7]):
+ geneName = w[12 if n > 12 else 0] # XXX ???
+ g = annotsFromGpd(opts, rangeDict, w[1:], geneName)
+ elif n > 2 and isDig(w[1]) and isDig(w[2]):
+ g = annotsFromBedOrAgp(opts, rangeDict, w)
+ elif n > 3 and isDig(w[2]) and isDig(w[3]):
+ g = annotsFromBedOrAgp(opts, rangeDict, w[1:])
+ else:
+ continue
+ if line[0] == "#": continue
+ for i in g:
+ layer, color, seqName, beg, end, name = i
+ if any(beg < r[2] and end > r[1] for r in rangeDict[seqName]):
+ yield i
def bedBoxes(annots, rangeDict, limit, isTop, bpPerPix):
beds, textSizes, margin = annots
@@ -707,7 +716,7 @@ def bedBoxes(annots, rangeDict, limit, isTop, bpPerPix):
if beg >= end: continue
if isReverseRange:
beg, end = -end, -beg
- if layer <= 1000:
+ if layer <= 10000:
# include partly-covered pixels
pixBeg = (origin + beg) // bpPerPix
pixEnd = div_ceil(origin + end, bpPerPix)
@@ -855,13 +864,9 @@ def remainingSequenceRanges(seqRanges, alignments, seqIndex):
remainingSequences = set(i[seqIndex] for i in alignments)
return [i for i in seqRanges if i[0] in remainingSequences]
-def readAnnotations(opts, font, textDraw, sortedRanges, totalLength,
- bedFile, repFile, geneFile, gapFile):
+def readAnnots(opts, font, textDraw, sortedRanges, totalLength, fileNames):
rangeDict = expandedSeqDict(dict(rangesPerSeq(sortedRanges)))
- annots = sorted(itertools.chain(readBed(bedFile, rangeDict),
- readRmsk(repFile, rangeDict),
- readGenePred(opts, geneFile, rangeDict),
- readGaps(opts, gapFile, rangeDict)))
+ annots = sorted(annotsFromFiles(opts, fileNames, rangeDict))
names = set(i[5] for i in annots)
textSizes = dict(sizesPerText(names, font, textDraw))
maxTextLength = totalLength // 2
@@ -937,12 +942,12 @@ def lastDotplot(opts, args):
logging.info("reading annotations...")
- annots1 = readAnnotations(opts, font, textDraw, sortedRanges1, opts.height,
- opts.bed1, opts.rmsk1, opts.genePred1, opts.gap1)
+ annots1 = readAnnots(opts, font, textDraw, sortedRanges1, opts.height,
+ opts.bed1)
bMargin = annots1[-1]
- annots2 = readAnnotations(opts, font, textDraw, sortedRanges2, opts.width,
- opts.bed2, opts.rmsk2, opts.genePred2, opts.gap2)
+ annots2 = readAnnots(opts, font, textDraw, sortedRanges2, opts.width,
+ opts.bed2)
rMargin = annots2[-1]
maxPixels1 = opts.width - lMargin - rMargin
@@ -1057,10 +1062,6 @@ if __name__ == "__main__":
op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
default=[],
help="which sequences to show from the 2nd genome")
- op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
- help="color for forward alignments (default: %default)")
- op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
- help="color for reverse alignments (default: %default)")
op.add_option("--alignments", metavar="FILE", help="secondary alignments")
op.add_option("--sort1", default="1", metavar="N",
help="genome1 sequence order: 0=input order, 1=name order, "
@@ -1088,11 +1089,12 @@ if __name__ == "__main__":
"2=alignments adjacent in genome2 (default=%default)")
op.add_option("--border-pixels", metavar="INT", type="int", default=1,
help="number of pixels between sequences (default=%default)")
- op.add_option("--border-color", metavar="COLOR", default="black",
- help="color for pixels between sequences (default=%default)")
- # --break-color and/or --break-pixels for intra-sequence breaks?
- op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
- help="margin color")
+ op.add_option("-a", "--bed1", "--rmsk1", "--genePred1", "--gap1",
+ action="append", default=[], metavar="FILE",
+ help="read genome1 annotations")
+ op.add_option("-b", "--bed2", "--rmsk2", "--genePred2", "--gap2",
+ action="append", default=[], metavar="FILE",
+ help="read genome2 annotations")
og = optparse.OptionGroup(op, "Text options")
og.add_option("-f", "--fontfile", metavar="FILE",
@@ -1111,33 +1113,20 @@ if __name__ == "__main__":
help="text rotation for the 2nd genome (default=%default)")
op.add_option_group(og)
- og = optparse.OptionGroup(op, "Annotation options")
- og.add_option("--bed1", metavar="FILE",
- help="read genome1 annotations from BED file")
- og.add_option("--bed2", metavar="FILE",
- help="read genome2 annotations from BED file")
- og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
- "RepeatMasker .out or rmsk.txt file")
- og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
- "RepeatMasker .out or rmsk.txt file")
- op.add_option_group(og)
-
- og = optparse.OptionGroup(op, "Gene options")
- og.add_option("--genePred1", metavar="FILE",
- help="read genome1 genes from genePred file")
- og.add_option("--genePred2", metavar="FILE",
- help="read genome2 genes from genePred file")
+ og = optparse.OptionGroup(op, "Color options")
+ og.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
+ help="color for forward alignments (default: %default)")
+ og.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
+ help="color for reverse alignments (default: %default)")
+ og.add_option("--border-color", metavar="COLOR", default="black",
+ help="color for pixels between sequences (default=%default)")
+ # --break-color and/or --break-pixels for intra-sequence breaks?
+ og.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
+ help="margin color")
og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
help="color for exons (default=%default)")
og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
help="color for protein-coding regions (default=%default)")
- op.add_option_group(og)
-
- og = optparse.OptionGroup(op, "Unsequenced gap options")
- og.add_option("--gap1", metavar="FILE",
- help="read genome1 unsequenced gaps from agp or gap file")
- og.add_option("--gap2", metavar="FILE",
- help="read genome2 unsequenced gaps from agp or gap file")
og.add_option("--bridged-color", metavar="COLOR", default="yellow",
help="color for bridged gaps (default: %default)")
og.add_option("--unbridged-color", metavar="COLOR", default="orange",
=====================================
debian/changelog
=====================================
@@ -1,7 +1,7 @@
-last-align (1254-1) UNRELEASED; urgency=medium
+last-align (1256-1) unstable; urgency=medium
* d/watch: Fix fetch URL - last-align has moved to gitlab
- * New upstream version 1254
+ * New upstream version 1256
* d/p/helpMakefiles.patch: Refresh helpmakefile.patch
* d/p/simde: Attempt adapting simde to new upstream
* Drop Patches:
@@ -25,7 +25,7 @@ last-align (1254-1) UNRELEASED; urgency=medium
* d/salsa-ci.yml: Disable blhc and reprotest
* d/lintian-overrides: Add lintian override for maf-cut manpage
- -- Nilesh Patra <nilesh at debian.org> Mon, 09 Aug 2021 17:25:05 +0530
+ -- Nilesh Patra <nilesh at debian.org> Mon, 23 Aug 2021 00:56:44 +0530
last-align (1179-1) unstable; urgency=medium
=====================================
doc/last-dotplot.rst
=====================================
@@ -2,7 +2,7 @@ last-dotplot
============
This script makes a dotplot, a.k.a. Oxford Grid, of pair-wise sequence
-alignments in MAF or LAST tabular format. It requires the `Python
+alignments in MAF_ or LAST tabular format. It requires the `Python
Imaging Library`_ to be installed. It can be used like this::
last-dotplot my-alignments my-plot.png
@@ -11,6 +11,10 @@ The output can be in any format supported by the Imaging Library::
last-dotplot alns alns.gif
+It can read alignments from a pipe like this::
+
+ ... | last-dotplot - my-plot.png
+
Terminology
-----------
@@ -37,10 +41,14 @@ Options
Which sequences to show from the 1st (horizontal) genome.
-2 PATTERN, --seq2=PATTERN
Which sequences to show from the 2nd (vertical) genome.
--c COLOR, --forwardcolor=COLOR
- Color for forward alignments.
--r COLOR, --reversecolor=COLOR
- Color for reverse alignments.
+-a FILE
+ Read annotations for the 1st (horizontal) genome, and draw them as
+ vertical stripes. For backwards compatibility, these options have
+ the same meaning: ``--bed1 --rmsk1 --genePred1 --gap1``.
+-b FILE
+ Read annotations for the 2nd (vertical) genome, and draw them as
+ horizontal stripes. For backwards compatibility, these options
+ have the same meaning: ``--bed2 --rmsk2 --genePred2 --gap2``.
--alignments=FILE
Read secondary alignments. For example: we could use primary
alignment data with one human DNA read aligned to the human
@@ -91,10 +99,6 @@ Options
in the 2nd (vertical) genome.
--border-pixels=INT
Number of pixels between sequences.
---border-color=COLOR
- Color for pixels between sequences.
---margin-color=COLOR
- Color for the margins.
Text options
~~~~~~~~~~~~
@@ -116,58 +120,49 @@ Text options
--rot2=ROT
Text rotation for the 2nd genome: h(orizontal) or v(ertical).
-Annotation options
-~~~~~~~~~~~~~~~~~~
-
-These options read annotations of sequence segments, and draw them as
-colored horizontal or vertical stripes. This looks good only if the
-annotations are reasonably sparse: e.g. you can't sensibly view 20000
-gene annotations in one small dotplot.
-
---bed1=FILE
- Read BED-format_ annotations for the 1st genome. They are drawn
- as stripes, with coordinates given by the first three BED fields.
- The color is specified by the RGB field if present, else pale red
- if the strand is "+", pale blue if "-", or pale purple.
---bed2=FILE
- Read BED-format annotations for the 2nd genome.
---rmsk1=FILE
- Read repeat annotations for the 1st genome, in RepeatMasker .out
- or rmsk.txt format. The color is pale purple for "low
- complexity", "simple repeats", and "satellites", else pale red
- for "+" strand and pale blue for "-" strand.
---rmsk2=FILE
- Read repeat annotations for the 2nd genome.
-
-Gene options
-~~~~~~~~~~~~
+Color options
+~~~~~~~~~~~~~
---genePred1=FILE
- Read gene annotations for the 1st genome in `genePred format`_.
---genePred2=FILE
- Read gene annotations for the 2nd genome.
+-c COLOR, --forwardcolor=COLOR
+ Color for forward alignments.
+-r COLOR, --reversecolor=COLOR
+ Color for reverse alignments.
+--border-color=COLOR
+ Color for pixels between sequences.
+--margin-color=COLOR
+ Color for the margins.
--exon-color=COLOR
Color for exons.
--cds-color=COLOR
Color for protein-coding regions.
+--bridged-color=COLOR
+ Color for unsequenced gaps with "yes" evidence of linkage.
+--unbridged-color=COLOR
+ Color for unsequenced gaps with "no" evidence of linkage.
-Unsequenced gap options
-~~~~~~~~~~~~~~~~~~~~~~~
+Annotations
+-----------
-Note: these "gaps" are *not* alignment gaps (indels): they are regions
-of unknown sequence.
+Options ``-a`` and ``-b`` can read annotations in these formats:
---gap1=FILE
- Read unsequenced gaps in the 1st genome from an agp or gap file.
---gap2=FILE
- Read unsequenced gaps in the 2nd genome from an agp or gap file.
---bridged-color=COLOR
- Color for bridged gaps.
---unbridged-color=COLOR
- Color for unbridged gaps.
+* BED_: The color is specified by the RGB field if present, else pale
+ red if the strand is "+", pale blue if "-", or pale purple. BED
+ lines with higher score are drawn on top of ones with lower score.
+
+* Repeatmasker_ .out, rmsk.txt: The color is pale purple for "low
+ complexity", "simple repeats", and "satellites", else pale red for
+ "+" strand and pale blue for "-" strand.
+
+* genePred_, GFF/GTF: Exons are shown in green, with a darker shade
+ for protein-coding regions.
+
+* AGP_, gap.txt: Unsequenced gaps are shown, but only if the gap
+ covers at least one whole pixel.
-An unsequenced gap will be shown only if it covers at least one whole
-pixel.
+You can use these options multiple times, e.g. ``-a stuff.bed -a
+more.bed -a rmsk.txt``. Annotations look good only if reasonably
+sparse, e.g. you can't sensibly view 20000 gene annotations in one
+small dotplot.
Choosing sequences
------------------
@@ -224,5 +219,8 @@ Colors can be specified in `various ways described here
<https://pillow.readthedocs.io/en/stable/reference/ImageColor.html>`_.
.. _Python Imaging Library: https://pillow.readthedocs.io/
-.. _BED-format: https://genome.ucsc.edu/FAQ/FAQformat.html#format1
-.. _genePred format: https://genome.ucsc.edu/FAQ/FAQformat.html#format9
+.. _MAF: https://genome.ucsc.edu/FAQ/FAQformat.html#format5
+.. _BED: https://genome.ucsc.edu/FAQ/FAQformat.html#format1
+.. _genePred: https://genome.ucsc.edu/FAQ/FAQformat.html#format9
+.. _RepeatMasker: http://www.repeatmasker.org/
+.. _AGP: https://www.ncbi.nlm.nih.gov/assembly/agp/
=====================================
src/makefile
=====================================
@@ -143,7 +143,7 @@ ScoreMatrixData.hh: ../data/*.mat
../build/mat-inc.sh ../data/*.mat > $@
VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 1254) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 1256) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
=====================================
src/mcf_frameshift_xdrop_aligner.cc
=====================================
@@ -19,46 +19,59 @@ double FrameshiftXdropAligner::forward(const uchar *protein,
const const_dbl_ptr *substitutionProbs,
const GapCosts &gapCosts,
double probDropLimit) {
+ const int seqIncrement = isRightwardExtension ? 1 : -1;
if (isRightwardExtension) {
--protein; --frame0; --frame1; --frame2;
}
proteinPtr = protein;
- frames[0] = frame0;
+ frames[0] = frame0 + seqIncrement;
frames[1] = frame1;
frames[2] = frame2;
- const int seqIncrement = isRightwardExtension ? 1 : -1;
-
- const double delOpenProb = gapCosts.delProbPieces[0].openProb;
- const double insOpenProb = gapCosts.insProbPieces[0].openProb;
- const double delProb1 = gapCosts.delProb1;
- const double delProb2 = gapCosts.delProb2;
- const double delProb3 = gapCosts.delProb3;
- const double insProb1 = gapCosts.insProb1;
- const double insProb2 = gapCosts.insProb2;
- const double insProb3 = gapCosts.insProb3;
- int runOfDrops = 2;
- int runOfEdges = 0;
- int numCells = 1;
+ int numCells = 1; // number of DynProg cells in this antidiagonal
size_t proteinEnd = 1;
- size_t diagPos6 = padSize - 1; // xxx or 0 ?
+ size_t diagPos6 = 2 * padSize - 1;
size_t horiPos5 = diagPos6 + padSize;
size_t horiPos4 = horiPos5 + padSize;
size_t horiPos3 = horiPos4 + padSize;
size_t vertPos2 = horiPos3 + padSize + 1;
size_t vertPos1 = vertPos2 + padSize;
- size_t thisPos = vertPos1; // xxx or + padSize ?
- size_t antidiagonal = 0;
+ size_t thisPos = vertPos1 + numCells;
+ size_t antidiagonal = 1;
+
+ if (xdropShape.empty()) {
+ xdropShape.resize(3);
+ xdropShape[0] = vertPos2;
+ xdropShape[1] = proteinEnd;
+ xdropShape[2] = thisPos;
+ xFwdProbs.resize(thisPos);
+ yFwdProbs.resize(thisPos);
+ zFwdProbs.resize(thisPos);
+ xFwdProbs[vertPos1] = 1;
+ yFwdProbs[vertPos1] = 1;
+ zFwdProbs[vertPos1] = 1;
+ }
- double sumOfProbRatios = 0;
+ double sumOfProbRatios = 1;
double logSumOfProbRatios = 0;
- if (xdropShape.empty()) xdropShape.assign(1, thisPos);
+ if (substitutionProbs[0][*(frames[0])] <= 0) { // at end of DNA sequence
+ numOfAntidiagonals = 1;
+ rescaledSumOfProbRatios = 1;
+ return 0;
+ }
+
+ const double delOpenProb = gapCosts.delProbPieces[0].openProb;
+ const double insOpenProb = gapCosts.insProbPieces[0].openProb;
+ const double delProb1 = gapCosts.delProb1;
+ const double delProb2 = gapCosts.delProb2;
+ const double delProb3 = gapCosts.delProb3;
+ const double insProb1 = gapCosts.insProb1;
+ const double insProb2 = gapCosts.insProb2;
+ const double insProb3 = gapCosts.insProb3;
- resizeFwdProbsIfSmaller(thisPos);
- double behindProb = substitutionProbs[*protein][*frame0];
- assert(behindProb > 0); // XXX might fail?
- xFwdProbs[padSize - 1] = 1 / behindProb; // xxx or padSize ?
+ int runOfDrops = 0;
+ int runOfEdges = (substitutionProbs[*(protein + seqIncrement)][0] > 0);
while (1) {
const uchar *s1 = proteinPtr;
@@ -147,7 +160,6 @@ double FrameshiftXdropAligner::forward(const uchar *protein,
}
if (antidiagonal % rescaleStep == 0) {
- assert(sumOfProbRatios > 0);
double scale = 1 / sumOfProbRatios;
size_t numOfRescales = antidiagonal / rescaleStep;
if (rescales.size() < numOfRescales) rescales.resize(numOfRescales);
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/compare/ad50112696d3c33953d390de717219dda0fc6525...6ff84ca5c95964eec6989226c209afacdcc937c8
--
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/compare/ad50112696d3c33953d390de717219dda0fc6525...6ff84ca5c95964eec6989226c209afacdcc937c8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210822/51a64f6d/attachment-0001.htm>
More information about the debian-med-commit
mailing list