[med-svn] [Git][med-team/pilon][master] 4 commits: New upstream version 1.24
Andreas Tille
gitlab at salsa.debian.org
Thu Feb 4 21:59:20 GMT 2021
Andreas Tille pushed to branch master at Debian Med / pilon
Commits:
70bbfe31 by Andreas Tille at 2021-02-04T22:54:41+01:00
New upstream version 1.24
- - - - -
1c82a6f3 by Andreas Tille at 2021-02-04T22:54:41+01:00
routine-update: New upstream version
- - - - -
56f58c49 by Andreas Tille at 2021-02-04T22:54:42+01:00
Update upstream source from tag 'upstream/1.24'
Update to upstream version '1.24'
with Debian dir 8c3f397ea25b771347ee3cc0f6d91ed845499be3
- - - - -
4b8359ec by Andreas Tille at 2021-02-04T22:54:42+01:00
routine-update: Standards-Version: 4.5.1
- - - - -
14 changed files:
- build.sbt
- build.sh
- debian/changelog
- debian/control
- project/plugins.sbt
- src/main/scala/org/broadinstitute/pilon/BamFile.scala
- src/main/scala/org/broadinstitute/pilon/GapFiller.scala
- src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
- src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
- src/main/scala/org/broadinstitute/pilon/PileUp.scala
- src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
- src/main/scala/org/broadinstitute/pilon/Pilon.scala
- src/main/scala/org/broadinstitute/pilon/Scaffold.scala
- src/main/scala/org/broadinstitute/pilon/Vcf.scala
Changes:
=====================================
build.sbt
=====================================
@@ -1,12 +1,11 @@
name := "pilon"
-version := "1.23"
+version := "1.24"
-scalaVersion := "2.12.7"
+scalaVersion := "2.13.4"
scalacOptions += "-deprecation"
scalacOptions += "-feature"
-libraryDependencies += "commons-lang" % "commons-lang" % "2.6"
-
+libraryDependencies += "com.github.samtools" % "htsjdk" % "2.23.0"
=====================================
build.sh
=====================================
@@ -12,5 +12,5 @@ sed -e "s/\(date.*=\).*/\\1 \"$commitdate\"/" \
<$tmp >$f
#sbt $* package
sbt $* assembly
-ln -sf `pwd`/target/scala-2.12/pilon-assembly-$version.jar ~/lib/pilon/pilon-dev.jar
+ln -sf `pwd`/target/scala-2.13/pilon-assembly-$version.jar ~/lib/pilon/pilon-dev.jar
mv $tmp $f
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+pilon (1.24-1) UNRELEASED; urgency=medium
+
+ * New upstream version
+ * Standards-Version: 4.5.1 (routine-update)
+
+ -- Andreas Tille <tille at debian.org> Thu, 04 Feb 2021 22:54:41 +0100
+
pilon (1.23+dfsg-2) unstable; urgency=medium
* Team upload.
=====================================
debian/control
=====================================
@@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 13),
javahelper,
scala,
libhtsjdk-java
-Standards-Version: 4.5.0
+Standards-Version: 4.5.1
Vcs-Browser: https://salsa.debian.org/med-team/pilon
Vcs-Git: https://salsa.debian.org/med-team/pilon.git
Homepage: https://github.com/broadinstitute/pilon/wiki
=====================================
project/plugins.sbt
=====================================
@@ -1 +1 @@
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
=====================================
src/main/scala/org/broadinstitute/pilon/BamFile.scala
=====================================
@@ -21,12 +21,12 @@ package org.broadinstitute.pilon
import collection.mutable.{HashMap,Map}
import java.io.File
-import scala.collection.JavaConverters._
+import scala.jdk.CollectionConverters._
import htsjdk.samtools._
object BamFile {
val indexSuffix = ".bai"
- val maxInsertSizes = Map(('frags -> 500), ('jumps -> 10000), ('unpaired -> 10000), ('bam -> 10000))
+ val maxInsertSizes = Map(("frags" -> 500), ("jumps" -> 10000), ("unpaired" -> 10000), ("bam" -> 10000))
val minOrientationPct = 10
val maxFragInsertSize = 700
// long read type codes
@@ -35,14 +35,14 @@ object BamFile {
val pacbioLongRead = 2
}
-class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'none) {
+class BamFile(val bamFile: File, var bamType: String, val subType: String = "none") {
import BamFile._
val path = bamFile.getCanonicalPath()
var baseCount: Long = 0
- val longReadType = if (bamType == 'unpaired) {
- if (subType == 'nanopore) nanoporeLongRead
- else if (subType == 'pacbio) pacbioLongRead
+ val longReadType = if (bamType == "unpaired") {
+ if (subType == "nanopore") nanoporeLongRead
+ else if (subType == "pacbio") pacbioLongRead
else 0
} else 0
@@ -75,7 +75,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
}
def validateSeqs(seqsOfInterest: Set[String]) = {
- require(!getSeqNames.intersect(seqsOfInterest).isEmpty, bamFile + " doesn't have sequence for any of " + seqsOfInterest.mkString(", "))
+ require(getSeqNames.intersect(seqsOfInterest).nonEmpty, bamFile.toString + " doesn't have sequence for any of " + seqsOfInterest.mkString(", "))
}
@@ -112,7 +112,8 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
// the pairs for which only one end overlaps? Adding 10k slop until
// that's figured out.
//pileUpRegion.addReads(bam.reader.queryOverlapping(name, start, stop))
- val longRead = (bamType == 'unpaired) && (subType == 'nanopore || subType == 'pacbio)
+ val longRead = (bamType == "unpaired") && (subType == "nanopore" || subType == "pacbio")
+
val r = reader
val reads = r.queryOverlapping(region.name,
(region.start-10000) max 0, (region.stop+10000) min region.contig.length).asScala
@@ -272,22 +273,22 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
}
- def nStrays = pairs.length
+ def nStrays = pairs().length
def printDebug = println("mm: " + readMap1.size + "+" + readMap2.size + "=" + nStrays/2)
}
val strayMateMap = new MateMap()
- def autoBam(): Symbol = {
+ def autoBam(): String = {
val fr = pctFR
val rf = pctRF
val un = pctUnpaired
- if (un >= fr && un >= rf) 'unpaired
+ if (un >= fr && un >= rf) "unpaired"
else {
val insertSize = if (rf > fr) insertStatsRF.mean else insertStatsFR.mean
- if (insertSize >= maxFragInsertSize) 'jumps else 'frags
+ if (insertSize >= maxFragInsertSize) "jumps" else "frags"
}
}
@@ -318,7 +319,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
r.close
val totalReads = mapped + unmapped + filtered
- var summary = bamFile + ": %d reads, %d filtered, %d mapped, %d proper".format(totalReads, filtered, mapped, proper)
+ var summary = bamFile.toString + ": %d reads, %d filtered, %d mapped, %d proper".format(totalReads, filtered, mapped, proper)
if (Pilon.strays) summary += ", " + strayMateMap.nStrays + " stray"
val insertCount = insertStatsFR.count + insertStatsRF.count
if (pctFR >= minOrientationPct)
@@ -328,9 +329,9 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
if (pctUnpaired >= minOrientationPct)
summary += ", Unpaired " + pctUnpaired + "% " + insertStatsUnpaired
summary += ", max " + maxInsertSize
- if (bamType == 'bam) {
- bamType = autoBam
- summary += " " + bamType.name
+ if (bamType == "bam") {
+ bamType = autoBam()
+ summary += " " + bamType
}
println(summary)
}
@@ -346,7 +347,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
val flanks = flankRegion(region)
var reads = readsInRegion(flanks)
if (Pilon.debug) println("readsInRegion flanks: " + flanks + " " + reads.length + " reads")
- if (Pilon.strays && bamType != 'unpaired) {
+ if (Pilon.strays && bamType != "unpaired") {
val mm = new MateMap(reads)
if (Pilon.debug) mm.printDebug
reads ++= mm.findStrays
@@ -377,7 +378,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
val frPct = pctFR
var mates: List[SAMRecord] = Nil
- for ((r1, r2) <- mateMap.pairs) {
+ for ((r1, r2) <- mateMap.pairs()) {
if (!(r1.getReadUnmappedFlag || r1.getProperPairFlag)) {
val rc = r1.getReadNegativeStrandFlag
val start = r1.getAlignmentStart
=====================================
src/main/scala/org/broadinstitute/pilon/GapFiller.scala
=====================================
@@ -73,7 +73,7 @@ class GapFiller(val region: GenomeRegion) {
else if (Pilon.debug) println("Gap closed but bad size: " + break + " " + closedLength)
}
if (solutionOK) solution
- else if (isGap || ((Pilon.fixList contains 'breaks) && loop.length == 0)) {
+ else if (isGap || (Pilon.fixBreaks && loop.length == 0)) {
// build partial solution using consensus from each side, opening gap if necessary
val fromRight = consensusFromRight(pathsFromRight)
val fromLeft = consensusFromLeft(pathsFromLeft)
@@ -127,12 +127,10 @@ class GapFiller(val region: GenomeRegion) {
assembler.addReads(reads)
if (Pilon.dumpReads) writeBam(break.toString, reads)
assembler.buildGraph
- if (Pilon.fixList.contains('novel) && Pilon.novelContigs != Nil) {
+ if (Pilon.fixNovel && Pilon.novelContigs != Nil) {
assembler.addGraphSeqs(Pilon.novelContigs)
- }
+ }
if (Pilon.debug) println("assembleIntoBreak: " + break + " " + assembler)
-
- //if (Pilon.fixList contains 'novelbreaks) assembler.novel
val startOffset = breakRadius
var start = (break.start - startOffset) max region.start
@@ -169,22 +167,22 @@ class GapFiller(val region: GenomeRegion) {
solutionSet += s
}
}
- val solutions = solutionSet.toList sortWith { (a,b) => a._3.length - a._2.length > b._3.length - b._2.length }
- var solutionLengths = Set[Int]()
- solutions map {s => solutionLengths += s._3.length - s._2.length}
+
+ val solutions = solutionSet.toList sortWith { (a,b) => a._3.length + a._2.length < b._3.length + b._2.length }
+ var solutionLengths = solutionSet map {s => s._3.length - s._2.length}
if (Pilon.debug) {
println("breakJoins: " + solutions.length + " solutions; lengths " + solutionLengths)
for (s <- solutions) println(" " + s)
}
if (solutionLengths.size == 1) {
- if (Pilon.debug) println("...all solutions of same length, using first above")
+ if (Pilon.debug) println("...all solutions of same length, using smallest total change above")
List(solutions.head)
} else{
solutions
}
}
-
+
def joinBreak(startArg: Int, forward: String, reverse: String, stopArg: Int) = {
var start = startArg
var stop = stopArg
@@ -203,13 +201,13 @@ class GapFiller(val region: GenomeRegion) {
if (patch != "") {
val solution = trimPatch(start, patch, stop)
start = solution._1
-
+
if (solution._2 == solution._3) {
if (Pilon.debug) println("...no change")
solution
} else {
if (Pilon.debug)
- println("...start=" + start + " ref=(" + solution._2.length + ")" + solution._2
+ println("...start=" + start + " ref=(" + solution._2.length + ")" + solution._2
+ " new=(" + solution._3.length + ")" + solution._3)
solution
}
@@ -218,7 +216,7 @@ class GapFiller(val region: GenomeRegion) {
noSolution
}
}
-
+
def properOverlap(left: String, right: String, minOverlap: Int): String = {
def substrMatch(a: String, aOffset: Int, b: String, bOffset: Int, len: Int): Boolean = {
@@ -304,7 +302,7 @@ class GapFiller(val region: GenomeRegion) {
if (estimatedLength == 0) {
reads = recruitReads(region)
} else {
- val unpairedBams = region.bamsOfType('unpaired)
+ val unpairedBams = region.bamsOfType("unpaired")
for (bam <- unpairedBams) {
reads ++= bam.readsInRegion(leftFlank)
reads ++= bam.readsInRegion(rightFlank)
@@ -324,14 +322,14 @@ class GapFiller(val region: GenomeRegion) {
assembler.addReads(reads)
assembler.buildGraph
val (forward, reverse, loop) = assembler.multiBridge(left, right)
- if (Pilon.verbose && !loop.isEmpty)
+ if (Pilon.verbose && loop.nonEmpty)
println(" loop: " + loop.length + " " + loop)
var patches = Set[String]()
for (f <- forward; r <- reverse) {
val patch = properOverlap(f, r, GapFiller.k)
// We should be very close in our closure estimation from alignments. Don't allow
- if (!patch.isEmpty && (estimatedLength == 0 || (patch.length - 2 * breakRadius).abs < estimatedLengthSlop)) {
+ if (patch.nonEmpty && (estimatedLength == 0 || (patch.length - 2 * breakRadius).abs < estimatedLengthSlop)) {
patches += patch
}
}
@@ -340,7 +338,7 @@ class GapFiller(val region: GenomeRegion) {
}
val lengths = patches.map(_.length).toSet
if (Pilon.verbose)
- println(patches.size + " patches; lengths " + lengths)
+ println(s"{$patches.size} patches; lengths $lengths")
if (lengths.size == 1 && (estimatedLength == 0 || loop.isEmpty || (loop.length - estimatedLength).abs < estimatedLengthSlop)) {
val patch = patches.head
@@ -358,11 +356,11 @@ class GapFiller(val region: GenomeRegion) {
def breakRadius = {
val minRadius = 3 * Assembler.K
- val inserts = region.bamsOfType('frags) map {bam => bam.insertSizeMean /*+ bam.insertSizeSigma*/}
+ val inserts = region.bamsOfType("frags") map {bam => bam.insertSizeMean /*+ bam.insertSizeSigma*/}
val insertMean = if (inserts.length > 0) (inserts.sum / inserts.length).round.toInt else 0
minRadius max insertMean
}
-
+
def recruitReadsFromBams(reg: Region, bams: List[BamFile]) = {
var reads = List[SAMRecord]()
for (b <- bams) {
@@ -373,37 +371,37 @@ class GapFiller(val region: GenomeRegion) {
reads
}
- def recruitReadsOfType(reg: Region, bamType: Symbol) = {
+ def recruitReadsOfType(reg: Region, bamType: String) = {
recruitReadsFromBams(reg, region.bamsOfType(bamType))
}
//def recruitFrags(reg: Region) = mateMapOfType(reg, 'frags).values.toList
- def recruitFrags(reg: Region) = recruitReadsOfType(reg, 'frags)
+ def recruitFrags(reg: Region) = recruitReadsOfType(reg, "frags")
def recruitJumps(reg: Region) = {
var reads = List[SAMRecord]()
- for (b <- region.bamsOfType('jumps)) {
+ for (b <- region.bamsOfType("jumps")) {
reads ++= b.recruitBadMates(reg)
}
- if (Pilon.debug)
+ if (Pilon.debug)
println("# Recruiting jump bad mates: count=" + reads.length)
reads
}
def recruitUnpaired(reg: Region) =
- if (!Pilon.longread) recruitReadsOfType(reg, 'unpaired) else Nil
+ if (!Pilon.longread) recruitReadsOfType(reg, "unpaired") else Nil
def recruitLocalReads(reg: Region) = recruitFrags(reg) ++ recruitUnpaired(reg)
def recruitLongReads(reg: Region) = {
- recruitReadsFromBams(reg, region.bamsOfType('unpaired).filter(_.longReadType > 0))
+ recruitReadsFromBams(reg, region.bamsOfType("unpaired").filter(_.longReadType > 0))
}
def recruitReads(reg: Region) = {
recruitLocalReads(reg) ++ recruitJumps(reg) // ++ recruitLongReads(reg)
}
- def writeBam(fileName: String, reads: List[SAMRecord]) {
+ def writeBam(fileName: String, reads: List[SAMRecord]) = {
val file = new File(fileName + ".sam")
val header = new SAMFileHeader()
header.addSequence(new SAMSequenceRecord(region.contig.getName, region.contig.getBases.length))
=====================================
src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
=====================================
@@ -86,43 +86,36 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
bamFiles foreach {_.validateSeqs(contigsOfInterest)}
- if (Pilon.strays || Pilon.fixList.contains('circles)) {
+ if (Pilon.strays || Pilon.fixCircles) {
println("Scanning BAMs")
- // Scan BAMs in parallel
- //bamFiles.filter({_.bamType != 'unpaired}).par.map(_.scan(contigsOfInterest))
- bamFiles.par.map(_.scan(contigsOfInterest))
+ bamFiles.map(_.scan(contigsOfInterest))
}
val circles = Scaffold.findHgapCircles(bamFiles)
// If assemble novel sequence up front, so that we can potentially place the
// contigs into scaffolds when we process them.
- if (Pilon.fixList contains 'novel)
+ if (Pilon.fixNovel)
Pilon.novelContigs = assembleNovel(bamFiles)
var chunks = regions.map(_._2).flatten
- if (Pilon.threads > 1) {
- // Do parallel processing randomly to even out load if all the
- // big chunks are early in the file
- chunks = Random.shuffle(chunks)
- }
- chunks.par foreach { r =>
+ chunks foreach { r =>
println("Processing " + r)
r.initializePileUps
bamFiles foreach { r.processBam(_) }
r.postProcess
- if ((Pilon.fixList contains 'circles) &&
+ if ((Pilon.fixCircles) &&
/*(r.contig.length < 5000 && r.contig.length >= 1000) ||*/ (circles contains r.contig.getName)) {
- if (Pilon.verbose) println(r + " might be a circle!")
+ if (Pilon.verbose) println(s"$r might be a circle!")
r.closeCircle(circles.getOrElse(r.contig.getName, 0))
}
- if (Pilon.vcf || !Pilon.fixList.intersect(Set('snps, 'indels, 'gaps, 'local)).isEmpty) {
+ if (Pilon.vcf || Pilon.fixSnps || Pilon.fixIndels || Pilon.fixGaps || Pilon.fixLocal) {
r.identifyAndFixIssues
// If we don't need pileups for VCF later, free up the memory now!
if (!Pilon.vcf) r.finalizePileUps
}
- println(r + " log:")
- r.printLog
+ println(s"$r log:")
+ r.printLog()
println("Finished processing " + r)
}
@@ -131,7 +124,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
new PrintWriter(new BufferedWriter(new FileWriter(changesFile)))
else null
val fastaFile = Pilon.outputFile(".fasta")
- val fastaWriter = if (!Pilon.fixList.isEmpty)
+ val fastaWriter = if (Pilon.fixSnps || Pilon.fixIndels || Pilon.fixGaps || Pilon.fixLocal || Pilon.fixNovel)
new PrintWriter(new BufferedWriter(new FileWriter(fastaFile)))
else null
@@ -160,7 +153,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
}
}
// Write the FASTA all at once rather than in chunks for formatting reasons
- if (!Pilon.fixList.isEmpty) {
+ if (fastaWriter != null) {
println("Writing updated " + newName + " to " + fastaFile)
val fixedRegions = reg._2 map { _.bases }
val bases = fixedRegions reduceLeft {_ ++ _} map {_.toChar} mkString ""
@@ -168,7 +161,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
}
}
- if (Pilon.fixList contains 'novel) {
+ if (Pilon.fixNovel) {
val novelContigs = Pilon.novelContigs
for (n <- 0 until novelContigs.length) {
val header = "pilon_novel_%03d".format(n + 1)
@@ -176,7 +169,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
writeFastaElement(fastaWriter, header, novelContigs(n))
}
}
- if (!Pilon.fixList.isEmpty) fastaWriter.close
+ if (fastaWriter != null) fastaWriter.close
if (Pilon.vcf) vcf.close
if (Pilon.changes) changesWriter.close
coverageSummary(bamFiles)
@@ -187,7 +180,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
var totalBaseCount: Long = 0
for ((t,files) <- bamTypes) {
val typeBaseCount = files.map(_.baseCount).sum
- println("Mean " + t.name + " coverage: " + roundDiv(typeBaseCount, genomeSize))
+ println("Mean " + t + " coverage: " + roundDiv(typeBaseCount, genomeSize))
totalBaseCount += typeBaseCount
}
println("Mean total coverage: " + roundDiv(totalBaseCount, genomeSize))
@@ -204,16 +197,16 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
if (Pilon.verbose) print("..." + contig.getName)
}
//genomeGraph.buildGraph
- if (Pilon.verbose) println
+ if (Pilon.verbose) println()
val assembler = new Assembler()
- bamFiles filter {_.bamType != 'jumps} foreach { bam =>
+ bamFiles filter {_.bamType != "jumps"} foreach { bam =>
val reads = bam.getUnaligned
print("..." + reads.length + " unmapped reads")
assembler.addReads(reads)
}
print("...assembling contigs")
val contigs = assembler.novel(genomeGraph)
- println
+ println()
val contigLengths = contigs map {_.length}
println("Assembled %d novel contigs containing %d bases".format(contigs.length, contigLengths.sum))
contigs
@@ -244,7 +237,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
def parseTargetFile(fileName: String) = {
try {
- Source.fromFile(fileName).getLines.flatMap({parseTargetString(_)}).toList
+ Source.fromFile(fileName).getLines().flatMap({parseTargetString(_)}).toList
} catch {
case ioe: Exception => Nil
}
=====================================
src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
=====================================
@@ -63,25 +63,27 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
var logString = ""
- // Hack alert...if we're running multithreaded, buffer our output for atomic
- // write when we're done. Otherwise, we would rather see output as it happens.
def log(s: String) = {
- if (Pilon.threads > 1)
- logString += s
- else
- print(s)
+ print(s)
}
def logln(s: String = "") = {
log(s + "\n")
}
- def printLog() = if (Pilon.threads > 1) print(logString)
+ def printLog() = print(logString)
- var changeMap = Map.empty[Int, (Symbol, PileUp)]
- def addChange(loc: Int, kind: Symbol, pu: PileUp) = {
- if (kind == 'amb) ambiguous(loc) = true
+ object ChangeKind extends Enumeration {
+ type ChangeKind = Value
+ val SNP, INS, DEL, AMB = Value
+ }
+ import ChangeKind._
+
+ var changeMap = Map.empty[Int, (ChangeKind, PileUp)]
+
+ def addChange(loc: Int, kind: ChangeKind, pu: PileUp) = {
+ if (kind == AMB) ambiguous(loc) = true
else changed(loc) = true
changeMap += (loc -> (kind, pu))
}
@@ -134,13 +136,13 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
//val bams = Map.empty[Symbol, BamRegion]
var bams = List[BamFile]()
- def bamsOfType(bamType: Symbol) = bams filter { _.bamType == bamType }
+ def bamsOfType(bamType: String) = bams filter { _.bamType == bamType }
- def nanoporeBams = bamsOfType('unpaired) filter { _.subType == 'nanopore}
+ def nanoporeBams = bamsOfType("unpaired") filter { _.subType == "nanopore"}
- def pacbioBams = bamsOfType('unpaired) filter { _.subType == 'pacbio }
+ def pacbioBams = bamsOfType("unpaired") filter { _.subType == "pacbio" }
- def fragBams = bamsOfType('frags)
+ def fragBams = bamsOfType("frags")
def longReadOnly = Pilon.longread && fragBams.isEmpty
@@ -230,7 +232,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
//meanReadLength = pileUpRegion.meanReadLength
// Pass 1: pull out values from pileups & call base changes
- val fixamb = Pilon.iupac || (Pilon.fixList contains 'amb)
+ val fixamb = Pilon.iupac || Pilon.fixAmb
for (i <- 0 until size) {
val pu = pileUpRegion(i)
@@ -253,9 +255,9 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
if (n >= minDepth && r != 'N' && !deleted(i) && bc.called) {
if (homo && b == r && bc.highConfidence && !bc.indel)
confirmed(i) = true
- else if (bc.isInsertion && bc.homoIndel) addChange(i, 'ins, pu)
+ else if (bc.isInsertion && bc.homoIndel) addChange(i, INS, pu)
else if (bc.isDeletion && bc.homoIndel) {
- addChange(i, 'del, pu)
+ addChange(i, DEL, pu)
for (j <- 1 until bc.deletion.length) {
deleted(i + j) = true
pileUpRegion(i + j).deletions += pu.deletions
@@ -263,8 +265,8 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
} else if (b != r && bc.score > 0) {
// for ambiguous bases, fix them if --fix fixamb or if original base
// not one of the top two alternatives
- if (homo) addChange(i, 'snp, pu)
- else if (fixamb || bc.altBase != r) addChange(i, 'amb, pu)
+ if (homo) addChange(i, SNP, pu)
+ else if (fixamb || bc.altBase != r) addChange(i, AMB, pu)
}
}
}
@@ -283,15 +285,15 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
}
def processBam(bam: BamFile) = {
- log(bam.bamType.name + " " + bam.bamFile + ": ")
+ log(bam.bamType + " " + bam.bamFile + ": ")
// This is a real kludge...
val covBefore = new Array[Int](size)
- if (bam.bamType != 'jumps)
+ if (bam.bamType != "jumps")
for (i <- 0 until size)
covBefore(i) = pileUpRegion.pileups(i).depth.toInt
val coverage = bam.process(this)
logln("coverage " + coverage.toString)
- if (bam.bamType != 'jumps)
+ if (bam.bamType != "jumps")
for (i <- 0 until size)
fragCoverage(i) += pileUpRegion.pileups(i).depth.toInt - covBefore(i)
bams ::= bam
@@ -307,8 +309,8 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
logln("# IdentifyIssues: " + this)
identifyIssueRegions
}
- val fixSnps = Pilon.fixList contains 'snps
- val fixIndels = Pilon.fixList contains 'indels
+ val fixSnps = Pilon.fixSnps
+ val fixIndels = Pilon.fixIndels
var snps = 0
var ins = 0
var dels = 0
@@ -323,10 +325,10 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
val cBase = bc.base
if (!excluded(i)) {
kind match {
- case 'snp =>
+ case SNP =>
if (fixSnps) snpFixList ::= (loc, rBase.toString, cBase.toString)
snps += 1
- case 'amb =>
+ case AMB =>
if (fixSnps && !Pilon.longread) {
if (Pilon.iupac) {
// we put these on the small fix list because iupac codes can mess up assembly
@@ -337,12 +339,12 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
}
amb += 1
}
- case 'ins =>
+ case INS =>
val insert = bc.insertion
if (fixIndels) smallFixList ::= (loc, "", insert)
ins += 1
insBases += insert.length
- case 'del =>
+ case DEL =>
val deletion = bc.deletion
if (fixIndels) smallFixList ::= (loc, deletion, "")
dels += 1
@@ -357,15 +359,15 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
val nonN = originalBases count {x => x != 'N'}
logln("Confirmed " + nConfirmed + " of " + nonN + " bases (" +
(nConfirmed * 100.0 / nonN).formatted("%.2f") + "%)")
- if (Pilon.fixList contains 'snps) log("Corrected ") else log("Found ")
- if (Pilon.diploid) log((snps + amb) + " snps")
+ if (Pilon.fixSnps) log("Corrected ") else log("Found ")
+ if (Pilon.diploid) log(s"${snps+amb} snps")
else {
- log(snps + " snps; ")
- log(amb + " ambiguous bases")
+ log(s"$snps snps; ")
+ log(s"$amb ambiguous bases")
}
- if (Pilon.fixList contains 'indels) log("; corrected ") else log("; found ")
- log(ins + " small insertions totaling " + insBases + " bases")
- logln(", " + dels + " small deletions totaling " + delBases + " bases")
+ if (Pilon.fixIndels) log("; corrected ") else log("; found ")
+ log(s"$ins small insertions totaling $insBases bases")
+ logln(s", $dels small deletions totaling $delBases bases")
// Report large collapsed regions (possible segmental duplication)
val duplications = duplicationEvents
@@ -378,7 +380,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
fixIssues(snpFixList)
// Try to fill gaps
- if ((Pilon.fixList contains 'gaps) && gaps.nonEmpty) {
+ if (Pilon.fixGaps && gaps.nonEmpty) {
logln("# Attempting to fill gaps")
for (gap <- gaps) {
val filler = new GapFiller(this)
@@ -392,7 +394,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
// Try to reassemble around possible contiguity breaks, but stay away from gaps
val breaks = possibleBreaks
- if ((Pilon.fixList contains 'local) && breaks.nonEmpty) {
+ if (Pilon.fixLocal && breaks.nonEmpty) {
logln("# Attempting to fix local continuity breaks")
for (break <- breaks) {
val filler = new GapFiller(this)
@@ -520,21 +522,21 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
val rBase = refBase(locus(i))
val cBase = pu.baseCall.base
kind match {
- case 'snp =>
- log(name + " " + locus(i) + " " + kind.name + " " + rBase + " " + cBase)
+ case SNP =>
+ log(name + " " + locus(i) + " snp " + rBase + " " + cBase)
if (Pilon.debug) log(" " + pu)
log(endLine)
- case 'ins =>
- log(name + " " + locus(i) + " " + kind.name + " " + "." + " " + pu.baseCall.insertion)
+ case INS =>
+ log(name + " " + locus(i) + " ins " + "." + " " + pu.baseCall.insertion)
if (Pilon.debug) log(" " + pu)
log(endLine)
- case 'del =>
- log(name + " " + locus(i) + " " + kind.name + " " + pu.baseCall.deletion + " " + ".")
+ case DEL =>
+ log(name + " " + locus(i) + " del " + pu.baseCall.deletion + " " + ".")
if (Pilon.debug) log(" " + pu)
log(endLine)
- case 'amb =>
+ case AMB =>
if (Pilon.verbose && rBase != cBase) {
- log(name + " " + locus(i) + " " + kind.name + " " + rBase + " " + cBase)
+ log(name + " " + locus(i) + " amb " + rBase + " " + cBase)
if (Pilon.debug) log(" " + pu)
log(endLine)
}
@@ -640,7 +642,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
}
}
- def writeChanges(changes: PrintWriter, newName: String = name, offset: Int = 0) {
+ def writeChanges(changes: PrintWriter, newName: String = name, offset: Int = 0) = {
val fixes = fixFixList(snpFixList ++ smallFixList ++ bigFixList)
var delta = 0
for (fix <- fixes) {
=====================================
src/main/scala/org/broadinstitute/pilon/PileUp.scala
=====================================
@@ -172,7 +172,7 @@ class PileUp {
else base.toString //+ (if (!homo) "/" + altBase else "")
}
- def baseMatch(refBase: Char) {//
+ def baseMatch(refBase: Char) = {//
refBase == base // TODO: handle IUPAC codes
}
=====================================
src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
=====================================
@@ -19,7 +19,7 @@
package org.broadinstitute.pilon
-import scala.collection.JavaConverters._
+import scala.jdk.CollectionConverters._
import htsjdk.samtools._
import Utils._
@@ -220,7 +220,7 @@ class PileUpRegion(name: String, start: Int, stop: Int)
}
def dump = {
- for (i <- 0 to size - 1) println(locus(i) + ": " + pileups(i))
+ for (i <- 0 to size - 1) println(s"${locus(i)}: ${pileups(i)}")
}
def postProcess = {
=====================================
src/main/scala/org/broadinstitute/pilon/Pilon.scala
=====================================
@@ -22,9 +22,18 @@ package org.broadinstitute.pilon
import java.io.File
object Pilon {
- // types of fixing we know about
- val fixChoices = Set('snps, 'indels, 'gaps, 'local)
- val experimentalFixChoices = Set('amb, 'breaks, 'circles, 'novel, 'scaffolds)
+ val fixChoices= Set("snps", "indels", "gaps", "local")
+ val experimentalFixChoices = Set("amb", "breaks", "circles", "novel", "scaffolds")
+
+ var fixSnps = false
+ var fixIndels = false
+ var fixGaps = false
+ var fixLocal = false
+ var fixAmb = false
+ var fixBreaks = false
+ var fixCircles = false
+ var fixNovel = false
+ var fixScaffolds = false
// input parameters
var bamFiles = List[BamFile]()
@@ -61,7 +70,6 @@ object Pilon {
var pacbio = false
var nanopore = false
var strays = true
- var threads = 1
var trSafe = true
// Global computed data
@@ -70,7 +78,7 @@ object Pilon {
// for logging to output files
var commandArgs = Array[String]()
- def main(args: Array[String]) {
+ def main(args: Array[String]) = {
commandArgs = args
println(Version.version)
optionParse(args.toList)
@@ -84,11 +92,9 @@ object Pilon {
}
}
- setDefaultParallelism(threads)
-
// Stray computation is expensive up front, so only turn it on
// if we're doing local reassembly
- strays &= (fixList contains 'gaps) || (fixList contains 'local) || (fixList contains 'scaffolds)
+ strays &= fixGaps || fixLocal || fixScaffolds
if (bamFiles.length == 0) {
println(usage)
@@ -101,7 +107,7 @@ object Pilon {
println("Genome: " + genomePath)
val genome = new GenomeFile(new File(genomePath), targets)
- println("Fixing " + (fixList map {_.name} mkString(", ")))
+ println("Fixing " + (fixList mkString(", ")))
genome.processRegions(bamFiles)
if (tracks) {
@@ -111,9 +117,8 @@ object Pilon {
}
- def optionParse(list: List[String]) : Unit = {
+ def optionParse(list: List[String]) : Unit = {
list match {
- case Nil => Nil
case "--help" :: tail =>
println(usage)
print(help)
@@ -123,7 +128,7 @@ object Pilon {
// println(Version.version)
sys.exit(0)
case "--bam" :: value :: tail =>
- bamFiles ::= new BamFile(new File(value), 'bam)
+ bamFiles ::= new BamFile(new File(value), "bam")
optionParse(tail)
case "--changes" :: tail =>
changes = true
@@ -157,7 +162,7 @@ object Pilon {
flank = value.toInt
optionParse(tail)
case "--frags" :: value :: tail =>
- bamFiles ::= new BamFile(new File(value), 'frags)
+ bamFiles ::= new BamFile(new File(value), "frags")
optionParse(tail)
case "--gapmargin" :: value :: tail =>
gapMargin = value.toInt
@@ -166,7 +171,7 @@ object Pilon {
genomePath = value
optionParse(tail)
case "--jumps" :: value :: tail =>
- bamFiles ::= new BamFile(new File(value), 'jumps)
+ bamFiles ::= new BamFile(new File(value), "jumps")
optionParse(tail)
case "--K" :: value :: tail =>
Assembler.K = value.toInt
@@ -187,7 +192,7 @@ object Pilon {
multiClosure = true
optionParse(tail)
case "--nanopore" :: value :: tail =>
- bamFiles ::= new BamFile(new File(value), 'unpaired, 'nanopore)
+ bamFiles ::= new BamFile(new File(value), "unpaired", "nanopore")
longread = true
nanopore = true
optionParse(tail)
@@ -204,7 +209,7 @@ object Pilon {
outdir = value
optionParse(tail)
case "--pacbio" :: value :: tail =>
- bamFiles ::= new BamFile(new File(value), 'unpaired, 'pacbio)
+ bamFiles ::= new BamFile(new File(value), "unpaired", "pacbio")
longread = true
pacbio = true
optionParse(tail)
@@ -212,7 +217,8 @@ object Pilon {
targets = value
optionParse(tail)
case "--threads" :: value :: tail =>
- threads = value.toInt
+ println("--threads argument no longer supported; ignoring!")
+ //threads = value.toInt
optionParse(tail)
case "--tracks" :: tail =>
tracks = true
@@ -221,12 +227,12 @@ object Pilon {
// trSafe = true
// optionParse(tail)
case "--unpaired" :: value :: tail =>
- bamFiles ::= new BamFile(new File(value), 'unpaired)
+ bamFiles ::= new BamFile(new File(value), "unpaired")
optionParse(tail)
case "--variant" :: tail =>
// variant calling mode
vcf = true
- fixList += 'breaks
+ fixList += "breaks"
optionParse(tail)
//multiClosure = true
case "--vcf" :: tail =>
@@ -244,10 +250,21 @@ object Pilon {
case option :: tail =>
println("Unknown option " + option)
sys.exit(1)
+ case Nil =>
}
+ fixSnps = fixList contains "snps"
+ fixIndels = fixList contains "indels"
+ fixGaps = fixList contains "gaps"
+ fixLocal = fixList contains "local"
+ fixAmb = fixList contains "amb"
+ fixBreaks = fixList contains "breaks"
+ fixCircles = fixList contains "circles"
+ fixNovel = fixList contains "novel"
+ fixScaffolds = fixList contains "scaffolds"
}
def parseFixList(fix: String) = {
+ // types of fixing we know about
val fixes = fix.split(",")
if (fix(0) != '+' && fix(0) != '-') fixList = Set.empty
for (f <- fixes) {
@@ -255,7 +272,7 @@ object Pilon {
else if (f == "none") fixList = Set.empty
else {
val plusMinus = if (f(0) == '-') f(0) else '+'
- val fsym = if (f(0) == plusMinus) Symbol(f.substring(1)) else Symbol(f)
+ val fsym = if (f(0) == plusMinus) f.substring(1) else f
if (fixChoices contains fsym) {
if (plusMinus == '+') fixList += fsym
else fixList -= fsym
@@ -264,14 +281,14 @@ object Pilon {
println("Warning: experimental fix option " + f)
if (plusMinus == '+') fixList += fsym
else fixList -= fsym
- } else if (fsym == 'bases) {
+ } else if (fsym == "bases") {
// for backward compatibility, 'bases applies to both 'snps and 'indels
if (plusMinus == '+') {
- fixList += 'snps
- fixList += 'indels
+ fixList += "snps"
+ fixList += "indels"
} else {
- fixList -= 'snps
- fixList -= 'indels
+ fixList -= "snps"
+ fixList -= "indels"
}
} else {
println("Error: unknown fix option " + f)
@@ -286,21 +303,8 @@ object Pilon {
val filePath = if (Pilon.outdir == "") fileName else Pilon.outdir + "/" + fileName
new File(filePath)
}
-
- // Ugly, but thank you http://stackoverflow.com/questions/17865823
- def setDefaultParallelism(numThreads: Int): Unit = {
- val parPkgObj = scala.collection.parallel.`package`
- val defaultTaskSupportField = parPkgObj.getClass.getDeclaredFields.find{
- _.getName == "defaultTaskSupport"
- }.get
- defaultTaskSupportField.setAccessible(true)
- defaultTaskSupportField.set(
- parPkgObj,
- new scala.collection.parallel.ForkJoinTaskSupport(
- new scala.concurrent.forkjoin.ForkJoinPool(numThreads)))
- }
-
+
val usage = """
Usage: pilon --genome genome.fasta [--frags frags.bam] [--jumps jumps.bam] [--unpaired unpaired.bam]
[...other options...]
@@ -324,6 +328,10 @@ object Pilon {
--bam any.bam
A bam file of unknown type; Pilon will scan it and attempt to classify it as one
of the above bam types.
+ --nanopore ont.bam
+ A bam file containing Oxford Nanopore read alignments. Experimental.
+ --pacbio pb.bam
+ A bam file containing Pacific Biosciences read alignments. Experimental.
OUTPUTS:
--output prefix
Prefix for output files
@@ -377,8 +385,6 @@ object Pilon {
scaffold00001 and coordinates 10000-20000 of scaffold00002.
If "targetlist" is the name of a file, each line will be treated as a target
specification.
- --threads
- Degree of parallelism to use for certain processing (default 1). Experimental.
--verbose
More verbose output.
--debug
=====================================
src/main/scala/org/broadinstitute/pilon/Scaffold.scala
=====================================
@@ -28,7 +28,7 @@ package org.broadinstitute.pilon
*/
-import scala.collection.JavaConverters._
+import scala.jdk.CollectionConverters._
import collection.mutable.{ Map, HashMap }
import htsjdk.samtools._
@@ -221,7 +221,7 @@ object Scaffold {
}
def analyzeStrays(bam: BamFile) = {
- val mm = bam.strayMateMap.pairs
+ val mm = bam.strayMateMap.pairs()
val sigma = bam.insertSizeSigma
val scaffolds = bam.getSeqs
val scaffoldSizes = scaffolds.map({_.getSequenceLength})
@@ -269,16 +269,16 @@ object Scaffold {
}
}
- def analyze(bamFiles: List[BamFile]) {
+ def analyze(bamFiles: List[BamFile]) = {
println("Analyze scaffolds")
- for (bam <- bamFiles filter {_.bamType == 'jumps})
+ for (bam <- bamFiles filter {_.bamType == "jumps"})
analyzeStrays(bam)
}
def findHgapCircles(bamFiles: List[BamFile]): Map[String, Int] = {
- if (Pilon.fixList contains 'circles) {
- val endAlignments = bamFiles filter {_.bamType == 'unpaired} flatMap findEndAlignments
+ if (Pilon.fixCircles) {
+ val endAlignments = bamFiles filter {_.bamType == "unpaired"} flatMap findEndAlignments
findCircles(endAlignments)
} else return new HashMap()
}
=====================================
src/main/scala/org/broadinstitute/pilon/Vcf.scala
=====================================
@@ -90,7 +90,7 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
val p = pileUp.delPct
//(rBase + bcString, rBase.toString, "1/1", depth - pileUp.deletions, pileUp.deletions)
//(rBase + bcString, rBase.toString, callType, pileUp.mqSum - pileUp.delQual, pileUp.delQual)
- (rBase + bcString, rBase.toString, callType, 100 - p, p)
+ (rBase.toString + bcString, rBase.toString, callType, 100 - p, p)
} else if (indelOk && !embedded && bc.isInsertion) {
loc -= 1
val rBase = region.refBase(loc)
@@ -98,7 +98,7 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
val p = pileUp.insPct
//(rBase.toString, rBase + bcString, "1/1", depth - pileUp.insertions, pileUp.insertions)
//(rBase.toString, rBase + bcString, callType, pileUp.mqSum - pileUp.insQual, pileUp.insQual)
- (rBase.toString, rBase + bcString, callType, 100 - p, p)
+ (rBase.toString, rBase.toString + bcString, callType, 100 - p, p)
} else if (bc.homo) {
val rBase = region.refBase(loc)
if (rBase == bc.base || bcString == "N")
@@ -178,8 +178,8 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
def writeFixRecord(region: GenomeRegion, fix: GenomeRegion.Fix) = {
val loc = fix._1 - 1
val rBase = region.refBase(loc)
- val ref = rBase + fix._2
- val alt = rBase + fix._3
+ val ref = rBase.toString + fix._2
+ val alt = rBase.toString + fix._3
val svlen = alt.length - ref.length
val svend = loc + ref.length - 1
val svtype = if (svlen < 0) "DEL" else "INS"
@@ -195,7 +195,7 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
var loc = dup.start - 1
val rBase = region.refBase(loc)
var line = region.name + tab + loc + tab + "." + tab
- line += rBase + tab + "<DUP>" + tab + "." + tab + "PASS" + tab
+ line += rBase.toString + tab + "<DUP>" + tab + "." + tab + "PASS" + tab
line += "SVTYPE=DUP;SVLEN=" + dup.size + ";END=" + dup.stop + ";IMPRECISE"
line += tab + "GT" + tab + "./."
writer.println(line)
View it on GitLab: https://salsa.debian.org/med-team/pilon/-/compare/b51bae118d0a257eb81cf8b4b4da817a9e1e21f1...4b8359ec03af8333fa579fef248e56dea2f035e7
--
View it on GitLab: https://salsa.debian.org/med-team/pilon/-/compare/b51bae118d0a257eb81cf8b4b4da817a9e1e21f1...4b8359ec03af8333fa579fef248e56dea2f035e7
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210204/deda0367/attachment-0001.html>
More information about the debian-med-commit
mailing list