[med-svn] [Git][med-team/dnarrange][upstream] New upstream version 1.6.3
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Sat Nov 1 16:37:01 GMT 2025
Nilesh Patra pushed to branch upstream at Debian Med / dnarrange
Commits:
5299fb6b by Nilesh Patra at 2025-11-01T21:59:56+05:30
New upstream version 1.6.3
- - - - -
7 changed files:
- README.md
- dnarrange
- dnarrange-genes
- dnarrange-link
- dnarrange-merge
- last-multiplot
- setup.py
Changes:
=====================================
README.md
=====================================
@@ -2,9 +2,10 @@
This is a method to find rearrangements in "long" DNA reads relative
to a genome sequence. It can characterize changes such as chromosome
-shattering, gene conversion, and processed pseudogene insertion. For
-more details, please see: [A pipeline for complete characterization of
-complex germline rearrangements from long DNA reads][paper].
+shattering, gene conversion, virus insertion, and processed pseudogene
+insertion. For more details, please see: [A pipeline for complete
+characterization of complex germline rearrangements from long DNA
+reads][paper].
You can install dnarrange (together with all other software that it
uses) from [Bioconda][] or [Debian Med][].
@@ -65,6 +66,17 @@ whose "strongest" rearrangement type is shared with a control read,
where "strength" is defined by: inter-chromosome > inter-strand >
non-colinear > big gap.
+#### Insertions of foreign DNA
+
+You can find insertions of (e.g.) a virus genome into a host genome.
+First, align the DNA reads to a combined "genome" with host and virus
+chromosomes. Then do:
+
+ dnarrange --insert=VirusSeqName alignments.maf > groups.maf
+
+This will get DNA reads that align partly to the sequence called
+`VirusSeqName` and partly to other sequences.
+
### Step 3: Draw pictures of the groups
Draw a picture of each group, showing the read-to-genome alignments,
@@ -81,6 +93,7 @@ This in turn requires the [Python Imaging
Library](https://pillow.readthedocs.io/) to be installed.
* A useful option is `-a`, to display files of genes, repeats, etc.
+* It shows at most 10 reads per group: you can change that with option `-m`.
Tips for viewing the pictures on a Mac: open the folder in Finder, and
@@ -254,11 +267,16 @@ You can also get groups near genes, with option `-o1`:
- `-m PROB`, `--max-mismap=PROB`: discard any alignment with mismap
probability > PROB (default=1).
+- `--insert=NAME`: find insertions of the sequence with this name.
+
- `--shrink`: write the output in a compact format. This format can
be read by `dnarrange`.
- `-v`, `--verbose`: show progress messages.
+- `-w W`, `--width=W`: line-wrap width of group summary lines
+ (default=79). 0 means no wrapping.
+
## `dnarrange-merge` options
You can get the rearranged reads, without merging them, like this:
=====================================
dnarrange
=====================================
@@ -1,9 +1,10 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
# Author: Martin C. Frith 2018
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import print_function
+import collections
import functools
import gzip
import heapq
@@ -221,6 +222,9 @@ def isBigGap(sortedAlignmentsOfOneQuery, opts):
return False
def rearrangementType(opts, alignmentsOfOneQuery):
+ if opts.insert:
+ seqNames = set(i[3] for i in alignmentsOfOneQuery)
+ return "I" if len(seqNames) > 1 and opts.insert in seqNames else None
if "C" in opts.types and isInterChromosome(alignmentsOfOneQuery):
return "C"
if "S" in opts.types and isInterStrand(alignmentsOfOneQuery):
@@ -331,6 +335,8 @@ def isAdjacent(alnX, alnY):
def isNonlinearPair(opts, types, alnX, alnY):
if alnX[3] != alnY[3]:
+ if "I" in types and opts.insert in (alnX[3], alnY[3]):
+ return True
return "C" in types and isDifferentChromosomes(alnX[3], alnY[3])
if alnX[8] is not alnY[8]:
return "S" in types
@@ -428,11 +434,12 @@ def isNoSharedRearrangement(opts, okAlignments, ngAlignments,
def linksBetweenQueries(opts, alignments, alignmentsPerQuery):
logging.info("linking...")
+ types = "I" if opts.insert else opts.types
for alignmentsOfOneQuery in alignmentsPerQuery:
qryNumA = alignmentsOfOneQuery[0][0]
overlaps = sorted(overlapsOfOneQuery(alignments, alignmentsOfOneQuery))
for qryNumB, g in groupby(overlaps, itemgetter(0)):
- strand = sharedRearrangement(opts, opts.types,
+ strand = sharedRearrangement(opts, types,
alignments, alignments, g)
if strand:
yield qryNumA, qryNumB, strand == "-"
@@ -452,52 +459,34 @@ def linksBetweenClumps(alignments, alignmentsPerQuery, clumps):
if b < len(clumps) and b != a:
yield min(a, b), max(a, b), False
-def addJumpsIfShared(opts, alignments, alnA, alnB):
- if alnA[6] == 0: return
- alnAX = alignments[alnA[6] - 1]
- if alnA[8] is alnB[8]:
- if alnB[6] == 0: return
- alnBX = alignments[alnB[6] - 1]
- if (alnAX[8] is alnBX[8] and alnAX[3] == alnBX[3]
- and alnAX[4] < alnBX[5] and alnAX[5] > alnBX[4]
- and alnAX[0] == alnA[0] and alnBX[0] == alnB[0]
- and isSharedRearrangement(opts, alnAX, alnA, alnBX, alnB)):
- alnA[7].append(alnB[0])
- alnB[7].append(alnA[0])
- else:
- if alnB[6] + 1 == len(alignments): return
- alnBX = alignments[alnB[6] + 1]
- if (alnAX[8] is not alnBX[8] and alnAX[3] == alnBX[3]
- and alnAX[4] < alnBX[5] and alnAX[5] > alnBX[4]
- and alnAX[0] == alnA[0] and alnBX[0] == alnB[0]
- and isSharedRearrangement(opts, alnAX, alnA, alnBX, alnB)):
- alnA[7].append(alnB[0])
- alnBX[7].append(alnAX[0])
-
-def addSharedJumps(opts, alignments, myAlignmentsInGenomeOrder):
- logging.info("finding shared jumps...")
+def addSharedJumps(opts, jumpsInGenomeOrder):
stash = []
- refName = ""
- for alnB in myAlignmentsInGenomeOrder:
- if alnB[3] != refName:
- n = 0
- refName = alnB[3]
- qryNum = alnB[0]
- refBeg = alnB[4]
+ n = 0
+ for jumpB in jumpsInGenomeOrder:
+ alnBX, alnBY, overlapsB = jumpB
+ qryNumB = alnBX[0]
+ refBegBX = alnBX[4]
+ refBegBY = alnBY[4]
+ refEndBY = alnBY[5]
+ isRevB = (alnBX[6] > alnBY[6])
+ if isRevB: alnBX, alnBY = alnBY, alnBX
j = 0
- for alnA in islice(stash, n):
- if alnA[5] > refBeg: # overlap in ref
- if alnA[0] < qryNum:
- addJumpsIfShared(opts, alignments, alnA, alnB)
- elif alnA[0] > qryNum:
- addJumpsIfShared(opts, alignments, alnB, alnA)
- stash[j] = alnA
+ for jumpA in islice(stash, n):
+ alnAX, alnAY, overlapsA = jumpA
+ if alnAX[5] > refBegBX:
+ qryNumA = alnAX[0]
+ if (alnAY[4] < refEndBY and alnAY[5] > refBegBY and
+ qryNumA != qryNumB):
+ if isRevB: alnAX, alnAY = alnAY, alnAX
+ if isSharedRearrangement(opts, alnBX, alnBY, alnAX, alnAY):
+ overlapsA.append(qryNumB)
+ overlapsB.append(qryNumA)
+ stash[j] = jumpA
j += 1
else:
- overlapsA = alnA[7]
overlapsA[:] = list(set(overlapsA)) # try to save memory
if len(stash) == j: stash.append(None)
- stash[j] = alnB
+ stash[j] = jumpB
n = j + 1
def isAllJumpsSupported(opts, goodQryNums, alignmentsOfOneQuery):
@@ -516,6 +505,37 @@ def clumpSortKey(alignmentsPerQuery, clump):
k = min(querySortKey(alignmentsPerQuery[i]) for i, isFlipped in clump)
return -len(clump), k
+def isInsertInRevStrand(insertSeqName, alignmentsOfOneQuery):
+ for j, y in enumerate(alignmentsOfOneQuery):
+ if j:
+ x = alignmentsOfOneQuery[j - 1]
+ if x[3] == insertSeqName and y[3] != insertSeqName:
+ return y[8]
+ if x[3] != insertSeqName and y[3] == insertSeqName:
+ return x[8]
+
+def isFlipQryStrand(opts, alignmentsOfOneQuery):
+ if opts.insert:
+ return isInsertInRevStrand(opts.insert, alignmentsOfOneQuery)
+ return alignmentsOfOneQuery[0][8] and alignmentsOfOneQuery[-1][8]
+
+def insertSortKey(opts, alignmentsPerQuery, clump):
+ shift = 50
+ seqName = opts.insert
+ qryNum, isFlipped = clump[0]
+ alns = alignmentsPerQuery[qryNum]
+ for j, y in enumerate(alns):
+ if j:
+ x = alns[j - 1]
+ xSeq, xBeg, xEnd = x[3:6]
+ ySeq, yBeg, yEnd = y[3:6]
+ if xSeq == seqName and ySeq != seqName:
+ pos = yEnd - shift if y[8] else yBeg + shift
+ return ySeq, pos
+ if xSeq != seqName and ySeq == seqName:
+ pos = xBeg + shift if x[8] else xEnd - shift
+ return xSeq, pos
+
def isMergedGroupName(qryName):
return re.match(r"(group|merged?)\d+-", qryName)
@@ -533,9 +553,33 @@ def alignmentsInGenomeOrder(alignmentsPerQuery):
alignments = chain.from_iterable(alignmentsPerQuery)
return sorted(alignments, key=itemgetter(3, 4))
-def alignmentsOfCoveredQueries(opts, alignments, alignmentsPerQuery):
- myAlignmentsInGenomeOrder = alignmentsInGenomeOrder(alignmentsPerQuery)
- addSharedJumps(opts, alignments, myAlignmentsInGenomeOrder)
+def jumpsPerChromosomeStrands(alignmentsPerQuery):
+ jumpsDict = collections.defaultdict(list)
+ for alignmentsOfOneQuery in alignmentsPerQuery:
+ x = None
+ for y in alignmentsOfOneQuery:
+ yRefName = y[3]
+ yStrand = y[8]
+ if x:
+ emptyListOfOverlaps = y[7]
+ fwdKey = xStrand, xRefName, yStrand, yRefName
+ revKey = not yStrand, yRefName, not xStrand, xRefName
+ if fwdKey <= revKey:
+ jumpsDict[fwdKey].append((x, y, emptyListOfOverlaps))
+ if revKey <= fwdKey:
+ jumpsDict[revKey].append((y, x, emptyListOfOverlaps))
+ x, xRefName, xStrand = y, yRefName, yStrand
+ return jumpsDict
+
+def jumpSortKey(jump):
+ return jump[0][4]
+
+def alignmentsOfCoveredQueries(opts, alignmentsPerQuery):
+ jumpsDict = jumpsPerChromosomeStrands(alignmentsPerQuery)
+ logging.info("finding shared jumps...")
+ for jumpsList in jumpsDict.values():
+ jumpsList.sort(key=jumpSortKey)
+ addSharedJumps(opts, jumpsList)
while True:
logging.info("excluding...")
qryNums = set(i[0][0] for i in alignmentsPerQuery)
@@ -544,7 +588,7 @@ def alignmentsOfCoveredQueries(opts, alignments, alignmentsPerQuery):
if len(alignmentsPerQuery) == len(qryNums):
break
logging.info("queries: " + str(len(alignmentsPerQuery)))
- delOverlaps(myAlignmentsInGenomeOrder)
+ delOverlaps(chain.from_iterable(alignmentsPerQuery))
return alignmentsPerQuery
def newStrand(strand, isFlipped):
@@ -558,7 +602,7 @@ def qryNameWithStrand(qryName, isFlipped):
return newQryName, isAddChar
def printMaf(lines, isFlipped):
- lines = [re.split(r"(\s+)", i, 5) for i in lines]
+ lines = [re.split(r"(\s+)", i, maxsplit=5) for i in lines]
sLines = [i for i in lines if i[0] == "s"]
qryLine = sLines[-1]
qryName = qryLine[2]
@@ -732,8 +776,7 @@ def main(opts, args):
args[colonEnd:])
if opts.min_cov:
- alnsPerKeptQuery = alignmentsOfCoveredQueries(opts, alignments,
- alnsPerKeptQuery)
+ alnsPerKeptQuery = alignmentsOfCoveredQueries(opts, alnsPerKeptQuery)
print("#", os.path.basename(sys.argv[0]), *sys.argv[1:])
print()
@@ -746,7 +789,7 @@ def main(opts, args):
adjacencyList = adjacencyListFromLinks(len(alignmentsPerQuery), links)
logging.info("grouping...")
- isRevStrand = [i[0][8] and i[-1][8] for i in alignmentsPerQuery]
+ isRevStrand = [isFlipQryStrand(opts, i) for i in alignmentsPerQuery]
qryPriorities = [(-len(i), -alignedQueryLength(j))
for i, j in zip(adjacencyList, alignmentsPerQuery)]
allClumps = connectedComponents(adjacencyList, qryPriorities, isRevStrand)
@@ -755,6 +798,8 @@ def main(opts, args):
clumps = [i for i in clumps if i[0][0] in keptQueryNums]
isEachQueryOneMergedGroup = all(map(isMergedGroupName, queryNames))
sortKey = functools.partial(clumpSortKey, alignmentsPerQuery)
+ if opts.insert:
+ sortKey = functools.partial(insertSortKey, opts, alignmentsPerQuery)
if isEachQueryOneMergedGroup:
sortKey = functools.partial(groupSortKey, queryNames)
clumps.sort(key=sortKey)
@@ -772,7 +817,8 @@ def main(opts, args):
for newQryName, ranges in s:
paddedName = format(newQryName, str(width))
para = " ".join([paddedName] + ranges)
- print(textwrap.fill(para, 79, break_long_words=False,
+ wrapWidth = opts.width if opts.width else len(para) + 2
+ print(textwrap.fill(para, wrapWidth, break_long_words=False,
break_on_hyphens=False,
initial_indent="# ",
subsequent_indent="# "))
@@ -827,9 +873,13 @@ if __name__ == "__main__":
op.add_option("-m", "--max-mismap", type="float", default=1.0,
metavar="PROB", help="discard any alignment with "
"mismap probability > PROB (default=%default)")
+ op.add_option("--insert", metavar="NAME",
+ help="find insertions of the sequence with this name")
op.add_option("--shrink", action="count", help="shrink the output")
op.add_option("-v", "--verbose", action="count",
help="show progress messages")
+ op.add_option("-w", "--width", type="int", default=79, metavar="W", help=
+ "line-wrap width of group summary lines (default=%default)")
opts, args = op.parse_args()
if opts.min_cov is None:
opts.min_cov = 1 if opts.min_seqs > 1 else 0
=====================================
dnarrange-genes
=====================================
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
# Author: Martin C. Frith 2020
# SPDX-License-Identifier: GPL-3.0-or-later
=====================================
dnarrange-link
=====================================
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
# Author: Martin C. Frith 2019
# SPDX-License-Identifier: GPL-3.0-or-later
=====================================
dnarrange-merge
=====================================
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
# Author: Martin C. Frith 2019
# SPDX-License-Identifier: GPL-3.0-or-later
@@ -72,7 +72,10 @@ def main(opts, args):
if opts.verbose:
cmd.append("-" + "v" * opts.verbose)
cmd.append("-P" + str(opts.P))
- cmd.append("-W" + str(opts.W))
+ if opts.u:
+ cmd.append("-u" + str(opts.u))
+ else:
+ cmd.append("-W" + str(opts.W))
cmd.append("-m" + str(opts.m))
cmd.append("-z" + str(opts.z))
cmd += [args[1], "-"]
@@ -105,6 +108,8 @@ if __name__ == "__main__":
og = optparse.OptionGroup(op, "LAST options")
og.add_option("-P", type="int", default=1,
help="number of parallel threads (default=%default)")
+ og.add_option("-u", metavar="RY", type="int", help=
+ "use ~1 per this many initial matches")
og.add_option("-W", type="int", default=19, help="use minimum positions "
"in length-W windows (default=%default)")
og.add_option("-m", type="int", default=5, help=
=====================================
last-multiplot
=====================================
@@ -30,7 +30,7 @@ for i in "$arg2"/*
do
test -e "$i" || continue
- last-dotplot -j2 --sort2=0 --rot1=v --rot2=h --labels1=2 "$@" "$i" "$i".png
+ last-dotplot -m10 -j2 --sort2=0 --rot1=v --rot2=h --labels1=2 "$@" "$i" "$i".png
rm "$i"
done
=====================================
setup.py
=====================================
@@ -1,6 +1,6 @@
import setuptools
-commitInfo = " (HEAD -> master, tag: 1.5.3)".strip("( )").split()
+commitInfo = " (HEAD -> master, tag: 1.6.3)".strip("( )").split()
version = commitInfo[commitInfo.index("tag:") + 1].rstrip(",")
setuptools.setup(
View it on GitLab: https://salsa.debian.org/med-team/dnarrange/-/commit/5299fb6b4020bc5475ba27bd6509e84e43baaee0
--
View it on GitLab: https://salsa.debian.org/med-team/dnarrange/-/commit/5299fb6b4020bc5475ba27bd6509e84e43baaee0
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20251101/73053cca/attachment-0001.htm>
More information about the debian-med-commit
mailing list