[med-svn] [Git][med-team/pilon][upstream] New upstream version 1.24

Andreas Tille gitlab at salsa.debian.org
Thu Feb 4 21:59:29 GMT 2021



Andreas Tille pushed to branch upstream at Debian Med / pilon


Commits:
70bbfe31 by Andreas Tille at 2021-02-04T22:54:41+01:00
New upstream version 1.24
- - - - -


12 changed files:

- build.sbt
- build.sh
- project/plugins.sbt
- src/main/scala/org/broadinstitute/pilon/BamFile.scala
- src/main/scala/org/broadinstitute/pilon/GapFiller.scala
- src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
- src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
- src/main/scala/org/broadinstitute/pilon/PileUp.scala
- src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
- src/main/scala/org/broadinstitute/pilon/Pilon.scala
- src/main/scala/org/broadinstitute/pilon/Scaffold.scala
- src/main/scala/org/broadinstitute/pilon/Vcf.scala


Changes:

=====================================
build.sbt
=====================================
@@ -1,12 +1,11 @@
 name := "pilon"
 
-version := "1.23"
+version := "1.24"
 
-scalaVersion := "2.12.7"
+scalaVersion := "2.13.4"
 
 scalacOptions += "-deprecation"
 
 scalacOptions += "-feature"
 
-libraryDependencies += "commons-lang" % "commons-lang" % "2.6"
-
+libraryDependencies += "com.github.samtools" % "htsjdk" % "2.23.0"


=====================================
build.sh
=====================================
@@ -12,5 +12,5 @@ sed -e "s/\(date.*=\).*/\\1 \"$commitdate\"/" \
     <$tmp >$f
 #sbt $* package
 sbt $* assembly
-ln -sf `pwd`/target/scala-2.12/pilon-assembly-$version.jar ~/lib/pilon/pilon-dev.jar
+ln -sf `pwd`/target/scala-2.13/pilon-assembly-$version.jar ~/lib/pilon/pilon-dev.jar
 mv $tmp $f


=====================================
project/plugins.sbt
=====================================
@@ -1 +1 @@
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")


=====================================
src/main/scala/org/broadinstitute/pilon/BamFile.scala
=====================================
@@ -21,12 +21,12 @@ package org.broadinstitute.pilon
 
 import collection.mutable.{HashMap,Map}
 import java.io.File
-import scala.collection.JavaConverters._
+import scala.jdk.CollectionConverters._
 import htsjdk.samtools._
 
 object BamFile {
   val indexSuffix = ".bai"
-  val maxInsertSizes = Map(('frags -> 500), ('jumps -> 10000), ('unpaired -> 10000), ('bam -> 10000))
+  val maxInsertSizes = Map(("frags" -> 500), ("jumps" -> 10000), ("unpaired" -> 10000), ("bam" -> 10000))
   val minOrientationPct = 10
   val maxFragInsertSize = 700
   // long read type codes
@@ -35,14 +35,14 @@ object BamFile {
   val pacbioLongRead = 2
 }
 
-class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'none) {
+class BamFile(val bamFile: File, var bamType: String, val subType: String = "none") {
   import BamFile._
   val path = bamFile.getCanonicalPath()
   var baseCount: Long = 0
 
-  val longReadType = if (bamType == 'unpaired) {
-    if (subType == 'nanopore) nanoporeLongRead
-    else if (subType == 'pacbio) pacbioLongRead
+  val longReadType = if (bamType == "unpaired") {
+    if (subType == "nanopore") nanoporeLongRead
+    else if (subType == "pacbio") pacbioLongRead
     else 0
   } else 0
 
@@ -75,7 +75,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
   }
 
   def validateSeqs(seqsOfInterest: Set[String]) = {
-    require(!getSeqNames.intersect(seqsOfInterest).isEmpty, bamFile + " doesn't have sequence for any of " + seqsOfInterest.mkString(", "))
+    require(getSeqNames.intersect(seqsOfInterest).nonEmpty, bamFile.toString + " doesn't have sequence for any of " + seqsOfInterest.mkString(", "))
   }
 
 
@@ -112,7 +112,8 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
     // the pairs for which only one end overlaps?  Adding 10k slop until
     // that's figured out.
     //pileUpRegion.addReads(bam.reader.queryOverlapping(name, start, stop))
-    val longRead = (bamType == 'unpaired) && (subType == 'nanopore || subType == 'pacbio)
+    val longRead = (bamType == "unpaired") && (subType == "nanopore" || subType == "pacbio")
+
     val r = reader
     val reads = r.queryOverlapping(region.name,
       (region.start-10000) max 0, (region.stop+10000) min region.contig.length).asScala
@@ -272,22 +273,22 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
     }
 
 
-    def nStrays = pairs.length
+    def nStrays = pairs().length
 
     def printDebug = println("mm: " + readMap1.size + "+" + readMap2.size + "=" + nStrays/2)
   }
   
   val strayMateMap = new MateMap()
 
-  def autoBam(): Symbol = {
+  def autoBam(): String = {
     val fr = pctFR
     val rf = pctRF
     val un = pctUnpaired
 
-    if (un >= fr && un >= rf) 'unpaired
+    if (un >= fr && un >= rf) "unpaired"
     else {
       val insertSize = if (rf > fr) insertStatsRF.mean else insertStatsFR.mean
-      if (insertSize >= maxFragInsertSize) 'jumps else 'frags
+      if (insertSize >= maxFragInsertSize) "jumps" else "frags"
     }
   }
   
@@ -318,7 +319,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
     r.close
 
     val totalReads = mapped + unmapped + filtered
-    var summary = bamFile + ": %d reads, %d filtered, %d mapped, %d proper".format(totalReads, filtered, mapped, proper)
+    var summary = bamFile.toString + ": %d reads, %d filtered, %d mapped, %d proper".format(totalReads, filtered, mapped, proper)
     if (Pilon.strays) summary += ", " + strayMateMap.nStrays + " stray"
     val insertCount = insertStatsFR.count + insertStatsRF.count
     if (pctFR >= minOrientationPct)
@@ -328,9 +329,9 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
     if (pctUnpaired >= minOrientationPct)
       summary += ", Unpaired " + pctUnpaired + "% " + insertStatsUnpaired
     summary += ", max " + maxInsertSize
-    if (bamType == 'bam) {
-      bamType = autoBam
-      summary += " " + bamType.name
+    if (bamType == "bam") {
+      bamType = autoBam()
+      summary += " " + bamType
     }
     println(summary)
   }
@@ -346,7 +347,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
     val flanks = flankRegion(region)
     var reads = readsInRegion(flanks) 
     if (Pilon.debug) println("readsInRegion flanks: " + flanks + " " + reads.length + " reads")
-    if (Pilon.strays && bamType != 'unpaired) {
+    if (Pilon.strays && bamType != "unpaired") {
       val mm = new MateMap(reads)
       if (Pilon.debug) mm.printDebug
       reads ++= mm.findStrays
@@ -377,7 +378,7 @@ class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'non
     val frPct = pctFR
 
     var mates: List[SAMRecord] = Nil
-    for ((r1, r2) <- mateMap.pairs) {
+    for ((r1, r2) <- mateMap.pairs()) {
       if (!(r1.getReadUnmappedFlag || r1.getProperPairFlag)) {
         val rc = r1.getReadNegativeStrandFlag
         val start = r1.getAlignmentStart


=====================================
src/main/scala/org/broadinstitute/pilon/GapFiller.scala
=====================================
@@ -73,7 +73,7 @@ class GapFiller(val region: GenomeRegion) {
       else if (Pilon.debug) println("Gap closed but bad size: " + break + " " + closedLength)
     }
     if (solutionOK) solution
-    else if (isGap || ((Pilon.fixList contains 'breaks) && loop.length == 0)) {
+    else if (isGap || (Pilon.fixBreaks && loop.length == 0)) {
       // build partial solution using consensus from each side, opening gap if necessary
       val fromRight = consensusFromRight(pathsFromRight)
       val fromLeft = consensusFromLeft(pathsFromLeft)
@@ -127,12 +127,10 @@ class GapFiller(val region: GenomeRegion) {
     assembler.addReads(reads)
     if (Pilon.dumpReads) writeBam(break.toString, reads)
     assembler.buildGraph
-    if (Pilon.fixList.contains('novel) && Pilon.novelContigs != Nil) {
+    if (Pilon.fixNovel && Pilon.novelContigs != Nil) {
       assembler.addGraphSeqs(Pilon.novelContigs)
-    } 
+    }
     if (Pilon.debug) println("assembleIntoBreak: " + break + " " + assembler)
-      
-    //if (Pilon.fixList contains 'novelbreaks) assembler.novel
 
     val startOffset = breakRadius
     var start = (break.start - startOffset) max region.start
@@ -169,22 +167,22 @@ class GapFiller(val region: GenomeRegion) {
         solutionSet += s
       }
     }
-    val solutions = solutionSet.toList sortWith { (a,b) => a._3.length - a._2.length > b._3.length - b._2.length }
-    var solutionLengths = Set[Int]()
-    solutions map {s => solutionLengths += s._3.length - s._2.length}
+
+    val solutions = solutionSet.toList sortWith { (a,b) => a._3.length + a._2.length < b._3.length + b._2.length }
+    var solutionLengths = solutionSet map {s => s._3.length - s._2.length}
 
     if (Pilon.debug) {
       println("breakJoins: " + solutions.length + " solutions; lengths " + solutionLengths)
       for (s <- solutions) println("  " + s)
     }
     if (solutionLengths.size == 1) {
-      if (Pilon.debug) println("...all solutions of same length, using first above")
+      if (Pilon.debug) println("...all solutions of same length, using smallest total change above")
       List(solutions.head)
     } else{
       solutions
     }
   }
-  
+
   def joinBreak(startArg: Int, forward: String, reverse: String, stopArg: Int) = {
     var start = startArg
     var stop = stopArg
@@ -203,13 +201,13 @@ class GapFiller(val region: GenomeRegion) {
     if (patch != "") {
     	val solution = trimPatch(start, patch, stop)
     	start = solution._1
-    	
+
     	if (solution._2 == solution._3) {
     	  if (Pilon.debug) println("...no change")
     	  solution
     	} else {
     	  if (Pilon.debug)
-    	    println("...start=" + start + " ref=(" + solution._2.length + ")" + solution._2 
+    	    println("...start=" + start + " ref=(" + solution._2.length + ")" + solution._2
     	    		+ " new=(" + solution._3.length + ")" + solution._3)
     	  solution
     	}
@@ -218,7 +216,7 @@ class GapFiller(val region: GenomeRegion) {
     	noSolution
     }
   }
-  
+
   def properOverlap(left: String, right: String, minOverlap: Int): String = {
 
     def substrMatch(a: String, aOffset: Int, b: String, bOffset: Int, len: Int): Boolean = {
@@ -304,7 +302,7 @@ class GapFiller(val region: GenomeRegion) {
     if (estimatedLength == 0) {
       reads = recruitReads(region)
     } else {
-      val unpairedBams = region.bamsOfType('unpaired)
+      val unpairedBams = region.bamsOfType("unpaired")
       for (bam <- unpairedBams) {
         reads ++= bam.readsInRegion(leftFlank)
         reads ++= bam.readsInRegion(rightFlank)
@@ -324,14 +322,14 @@ class GapFiller(val region: GenomeRegion) {
     assembler.addReads(reads)
     assembler.buildGraph
     val (forward, reverse, loop) = assembler.multiBridge(left, right)
-    if (Pilon.verbose && !loop.isEmpty)
+    if (Pilon.verbose && loop.nonEmpty)
       println(" loop: " + loop.length + " " + loop)
 
     var patches = Set[String]()
     for (f <- forward; r <- reverse) {
       val patch = properOverlap(f, r, GapFiller.k)
       // We should be very close in our closure estimation from alignments. Don't allow
-      if (!patch.isEmpty && (estimatedLength == 0 || (patch.length - 2 * breakRadius).abs < estimatedLengthSlop)) {
+      if (patch.nonEmpty && (estimatedLength == 0 || (patch.length - 2 * breakRadius).abs < estimatedLengthSlop)) {
         patches += patch
       }
     }
@@ -340,7 +338,7 @@ class GapFiller(val region: GenomeRegion) {
     }
     val lengths = patches.map(_.length).toSet
     if (Pilon.verbose)
-      println(patches.size + " patches; lengths " + lengths)
+      println(s"{$patches.size} patches; lengths $lengths")
 
     if (lengths.size == 1 && (estimatedLength == 0 || loop.isEmpty || (loop.length - estimatedLength).abs < estimatedLengthSlop)) {
       val patch = patches.head
@@ -358,11 +356,11 @@ class GapFiller(val region: GenomeRegion) {
 
   def breakRadius = {
     val minRadius = 3 * Assembler.K
-    val inserts = region.bamsOfType('frags) map {bam => bam.insertSizeMean /*+ bam.insertSizeSigma*/}
+    val inserts = region.bamsOfType("frags") map {bam => bam.insertSizeMean /*+ bam.insertSizeSigma*/}
     val insertMean = if (inserts.length > 0) (inserts.sum / inserts.length).round.toInt else 0
     minRadius max insertMean
   }
-    
+
   def recruitReadsFromBams(reg: Region, bams: List[BamFile]) = {
     var reads = List[SAMRecord]()
     for (b <- bams) {
@@ -373,37 +371,37 @@ class GapFiller(val region: GenomeRegion) {
     reads
   }
 
-  def recruitReadsOfType(reg: Region, bamType: Symbol) = {
+  def recruitReadsOfType(reg: Region, bamType: String) = {
     recruitReadsFromBams(reg, region.bamsOfType(bamType))
   }
 
   //def recruitFrags(reg: Region) = mateMapOfType(reg, 'frags).values.toList
-  def recruitFrags(reg: Region) = recruitReadsOfType(reg, 'frags)
+  def recruitFrags(reg: Region) = recruitReadsOfType(reg, "frags")
 
   def recruitJumps(reg: Region) = {
     var reads = List[SAMRecord]()
-    for (b <- region.bamsOfType('jumps)) {
+    for (b <- region.bamsOfType("jumps")) {
       reads ++= b.recruitBadMates(reg)
     }
-    if (Pilon.debug) 
+    if (Pilon.debug)
       println("# Recruiting jump bad mates: count=" + reads.length)
     reads
   }
 
   def recruitUnpaired(reg: Region) =
-    if (!Pilon.longread) recruitReadsOfType(reg, 'unpaired) else Nil
+    if (!Pilon.longread) recruitReadsOfType(reg, "unpaired") else Nil
 
   def recruitLocalReads(reg: Region) = recruitFrags(reg) ++ recruitUnpaired(reg)
 
   def recruitLongReads(reg: Region) = {
-    recruitReadsFromBams(reg, region.bamsOfType('unpaired).filter(_.longReadType > 0))
+    recruitReadsFromBams(reg, region.bamsOfType("unpaired").filter(_.longReadType > 0))
   }
 
   def recruitReads(reg: Region) = {
     recruitLocalReads(reg) ++ recruitJumps(reg) // ++ recruitLongReads(reg)
   }
 
-  def writeBam(fileName: String, reads: List[SAMRecord]) {
+  def writeBam(fileName: String, reads: List[SAMRecord]) = {
     val file = new File(fileName + ".sam")
     val header = new SAMFileHeader()
     header.addSequence(new SAMSequenceRecord(region.contig.getName, region.contig.getBases.length))


=====================================
src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
=====================================
@@ -86,43 +86,36 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
 
     bamFiles foreach {_.validateSeqs(contigsOfInterest)}
 
-    if (Pilon.strays || Pilon.fixList.contains('circles)) {
+    if (Pilon.strays || Pilon.fixCircles) {
       println("Scanning BAMs")
-      // Scan BAMs in parallel
-      //bamFiles.filter({_.bamType != 'unpaired}).par.map(_.scan(contigsOfInterest))
-      bamFiles.par.map(_.scan(contigsOfInterest))
+      bamFiles.map(_.scan(contigsOfInterest))
     }
 
     val circles = Scaffold.findHgapCircles(bamFiles)
 
     // If assemble novel sequence up front, so that we can potentially place the
     // contigs into scaffolds when we process them.
-    if (Pilon.fixList contains 'novel)
+    if (Pilon.fixNovel)
       Pilon.novelContigs = assembleNovel(bamFiles)    
 
     var chunks = regions.map(_._2).flatten
-    if (Pilon.threads > 1) {
-      // Do parallel processing randomly to even out load if all the 
-      // big chunks are early in the file
-      chunks = Random.shuffle(chunks)
-    }
-    chunks.par foreach { r =>
+    chunks foreach { r =>
       println("Processing " + r)
       r.initializePileUps
       bamFiles foreach { r.processBam(_) }
       r.postProcess
-      if ((Pilon.fixList contains 'circles) &&
+      if ((Pilon.fixCircles) &&
         /*(r.contig.length < 5000 && r.contig.length >= 1000) ||*/ (circles contains r.contig.getName)) {
-        if (Pilon.verbose) println(r + " might be a circle!")
+        if (Pilon.verbose) println(s"$r might be a circle!")
         r.closeCircle(circles.getOrElse(r.contig.getName, 0))
       }
-      if (Pilon.vcf || !Pilon.fixList.intersect(Set('snps, 'indels, 'gaps, 'local)).isEmpty) {
+      if (Pilon.vcf || Pilon.fixSnps || Pilon.fixIndels || Pilon.fixGaps || Pilon.fixLocal) {
         r.identifyAndFixIssues
         // If we don't need pileups for VCF later, free up the memory now!
         if (!Pilon.vcf) r.finalizePileUps
       }
-      println(r + " log:")
-      r.printLog
+      println(s"$r log:")
+      r.printLog()
       println("Finished processing " + r)
     }
 
@@ -131,7 +124,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
                           new PrintWriter(new BufferedWriter(new FileWriter(changesFile)))
                         else null
     val fastaFile = Pilon.outputFile(".fasta")
-    val fastaWriter = if (!Pilon.fixList.isEmpty)
+    val fastaWriter = if (Pilon.fixSnps || Pilon.fixIndels || Pilon.fixGaps || Pilon.fixLocal || Pilon.fixNovel)
 	                      new PrintWriter(new BufferedWriter(new FileWriter(fastaFile)))
                       else null
 
@@ -160,7 +153,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
         }
       }
       // Write the FASTA all at once rather than in chunks for formatting reasons
-      if (!Pilon.fixList.isEmpty) {
+      if (fastaWriter != null) {
         println("Writing updated " + newName + " to " + fastaFile)
         val fixedRegions = reg._2 map { _.bases }
         val bases = fixedRegions reduceLeft {_ ++ _} map {_.toChar} mkString ""
@@ -168,7 +161,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
       }
     }
 
-    if (Pilon.fixList contains 'novel) {
+    if (Pilon.fixNovel) {
       val novelContigs = Pilon.novelContigs
       for (n <- 0 until novelContigs.length) {
         val header = "pilon_novel_%03d".format(n + 1)
@@ -176,7 +169,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
         writeFastaElement(fastaWriter, header, novelContigs(n))
       }
     }
-    if (!Pilon.fixList.isEmpty) fastaWriter.close
+    if (fastaWriter != null) fastaWriter.close
     if (Pilon.vcf) vcf.close
     if (Pilon.changes) changesWriter.close
     coverageSummary(bamFiles)
@@ -187,7 +180,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
     var totalBaseCount: Long = 0
     for ((t,files) <- bamTypes) {
       val typeBaseCount = files.map(_.baseCount).sum
-      println("Mean " + t.name + " coverage: " + roundDiv(typeBaseCount, genomeSize))
+      println("Mean " + t + " coverage: " + roundDiv(typeBaseCount, genomeSize))
       totalBaseCount += typeBaseCount
     }
     println("Mean total coverage: " + roundDiv(totalBaseCount, genomeSize))
@@ -204,16 +197,16 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
       if (Pilon.verbose) print("..." + contig.getName)
     }
     //genomeGraph.buildGraph
-    if (Pilon.verbose) println
+    if (Pilon.verbose) println()
     val assembler = new Assembler()
-    bamFiles filter {_.bamType != 'jumps} foreach { bam =>
+    bamFiles filter {_.bamType != "jumps"} foreach { bam =>
       val reads = bam.getUnaligned
       print("..." + reads.length + " unmapped reads")
       assembler.addReads(reads)
     }
     print("...assembling contigs")
     val contigs = assembler.novel(genomeGraph)
-    println
+    println()
     val contigLengths = contigs map {_.length}
     println("Assembled %d novel contigs containing %d bases".format(contigs.length, contigLengths.sum))
     contigs
@@ -244,7 +237,7 @@ class GenomeFile(val referenceFile: File, val targets : String = "") {
 
   def parseTargetFile(fileName: String) = {
     try {
-      Source.fromFile(fileName).getLines.flatMap({parseTargetString(_)}).toList
+      Source.fromFile(fileName).getLines().flatMap({parseTargetString(_)}).toList
     } catch {
       case ioe: Exception => Nil
     }


=====================================
src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
=====================================
@@ -63,25 +63,27 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
 
   var logString = ""
 
-  // Hack alert...if we're running multithreaded, buffer our output for atomic
-  // write when we're done. Otherwise, we would rather see output as it happens.
   def log(s: String) = {
-    if (Pilon.threads > 1)
-      logString += s
-    else
-      print(s)
+    print(s)
   }
   
   def logln(s: String = "") = {
     log(s + "\n")
   }
   
-  def printLog() = if (Pilon.threads > 1) print(logString)
+  def printLog() = print(logString)
   
-  var changeMap = Map.empty[Int, (Symbol, PileUp)]
 
-  def addChange(loc: Int, kind: Symbol, pu: PileUp) = {
-    if (kind == 'amb) ambiguous(loc) = true
+  object ChangeKind extends Enumeration {
+    type ChangeKind = Value
+    val SNP, INS, DEL, AMB = Value
+  }
+  import ChangeKind._
+
+  var changeMap = Map.empty[Int, (ChangeKind, PileUp)]
+
+  def addChange(loc: Int, kind: ChangeKind, pu: PileUp) = {
+    if (kind == AMB) ambiguous(loc) = true
     else changed(loc) = true
     changeMap += (loc -> (kind, pu))
   }
@@ -134,13 +136,13 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
   //val bams = Map.empty[Symbol, BamRegion]
   var bams = List[BamFile]()
 
-  def bamsOfType(bamType: Symbol) = bams filter { _.bamType == bamType }
+  def bamsOfType(bamType: String) = bams filter { _.bamType == bamType }
 
-  def nanoporeBams = bamsOfType('unpaired) filter { _.subType == 'nanopore}
+  def nanoporeBams = bamsOfType("unpaired") filter { _.subType == "nanopore"}
 
-  def pacbioBams = bamsOfType('unpaired) filter { _.subType == 'pacbio }
+  def pacbioBams = bamsOfType("unpaired") filter { _.subType == "pacbio" }
 
-  def fragBams = bamsOfType('frags)
+  def fragBams = bamsOfType("frags")
 
   def longReadOnly = Pilon.longread && fragBams.isEmpty
 
@@ -230,7 +232,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     //meanReadLength = pileUpRegion.meanReadLength
 
     // Pass 1: pull out values from pileups & call base changes
-    val fixamb = Pilon.iupac || (Pilon.fixList contains 'amb)
+    val fixamb = Pilon.iupac || Pilon.fixAmb
 
     for (i <- 0 until size) {
       val pu = pileUpRegion(i)
@@ -253,9 +255,9 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
       if (n >= minDepth && r != 'N' && !deleted(i) && bc.called) {
         if (homo && b == r && bc.highConfidence && !bc.indel)
           confirmed(i) = true
-        else if (bc.isInsertion && bc.homoIndel) addChange(i, 'ins, pu)
+        else if (bc.isInsertion && bc.homoIndel) addChange(i, INS, pu)
         else if (bc.isDeletion && bc.homoIndel) {
-          addChange(i, 'del, pu)
+          addChange(i, DEL, pu)
           for (j <- 1 until bc.deletion.length) {
             deleted(i + j) = true
             pileUpRegion(i + j).deletions += pu.deletions
@@ -263,8 +265,8 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
         } else if (b != r && bc.score > 0) {
           // for ambiguous bases, fix them if --fix fixamb or if original base
           // not one of the top two alternatives
-          if (homo) addChange(i, 'snp, pu)
-          else if (fixamb || bc.altBase != r) addChange(i, 'amb, pu)
+          if (homo) addChange(i, SNP, pu)
+          else if (fixamb || bc.altBase != r) addChange(i, AMB, pu)
         }
       }
     }
@@ -283,15 +285,15 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
   }
 
   def processBam(bam: BamFile) = {
-    log(bam.bamType.name + " " + bam.bamFile + ": ")
+    log(bam.bamType + " " + bam.bamFile + ": ")
     // This is a real kludge...
     val covBefore = new Array[Int](size)
-    if (bam.bamType != 'jumps)
+    if (bam.bamType != "jumps")
       for (i <- 0 until size) 
         covBefore(i) = pileUpRegion.pileups(i).depth.toInt
     val coverage = bam.process(this)
     logln("coverage " + coverage.toString)
-    if (bam.bamType != 'jumps)
+    if (bam.bamType != "jumps")
       for (i <- 0 until size) 
         fragCoverage(i) += pileUpRegion.pileups(i).depth.toInt - covBefore(i)
     bams ::= bam
@@ -307,8 +309,8 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
       logln("# IdentifyIssues: " + this)
       identifyIssueRegions
     }
-    val fixSnps = Pilon.fixList contains 'snps
-    val fixIndels = Pilon.fixList contains 'indels
+    val fixSnps = Pilon.fixSnps
+    val fixIndels = Pilon.fixIndels
     var snps = 0
     var ins = 0
     var dels = 0
@@ -323,10 +325,10 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
       val cBase = bc.base
       if (!excluded(i)) {
         kind match {
-          case 'snp =>
+          case SNP =>
             if (fixSnps) snpFixList ::= (loc, rBase.toString, cBase.toString)
             snps += 1
-          case 'amb =>
+          case AMB =>
             if (fixSnps && !Pilon.longread) {
               if (Pilon.iupac) {
                 // we put these on the small fix list because iupac codes can mess up assembly
@@ -337,12 +339,12 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
               }
               amb += 1
             }
-          case 'ins =>
+          case INS =>
             val insert = bc.insertion
             if (fixIndels) smallFixList ::= (loc, "", insert)
             ins += 1
             insBases += insert.length
-          case 'del =>
+          case DEL =>
             val deletion = bc.deletion
             if (fixIndels) smallFixList ::= (loc, deletion, "")
             dels += 1
@@ -357,15 +359,15 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     val nonN = originalBases count {x => x != 'N'}
     logln("Confirmed " + nConfirmed + " of " + nonN + " bases (" +
       (nConfirmed * 100.0 / nonN).formatted("%.2f") + "%)")
-    if (Pilon.fixList contains 'snps) log("Corrected ") else log("Found ")
-    if (Pilon.diploid) log((snps + amb) + " snps")
+    if (Pilon.fixSnps) log("Corrected ") else log("Found ")
+    if (Pilon.diploid) log(s"${snps+amb} snps")
     else {
-      log(snps + " snps; ")
-      log(amb + " ambiguous bases")
+      log(s"$snps snps; ")
+      log(s"$amb ambiguous bases")
     }
-    if (Pilon.fixList contains 'indels) log("; corrected ") else log("; found ")
-    log(ins + " small insertions totaling " + insBases + " bases")
-    logln(", " + dels + " small deletions totaling " + delBases + " bases")
+    if (Pilon.fixIndels) log("; corrected ") else log("; found ")
+    log(s"$ins small insertions totaling $insBases bases")
+    logln(s", $dels small deletions totaling $delBases bases")
 
     // Report large collapsed regions (possible segmental duplication)
     val duplications = duplicationEvents
@@ -378,7 +380,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     fixIssues(snpFixList)
 
     // Try to fill gaps
-    if ((Pilon.fixList contains 'gaps) && gaps.nonEmpty) {
+    if (Pilon.fixGaps && gaps.nonEmpty) {
       logln("# Attempting to fill gaps")
       for (gap <- gaps) {
         val filler = new GapFiller(this)
@@ -392,7 +394,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
 
     // Try to reassemble around possible contiguity breaks, but stay away from gaps
     val breaks = possibleBreaks
-    if ((Pilon.fixList contains 'local) && breaks.nonEmpty) {
+    if (Pilon.fixLocal && breaks.nonEmpty) {
       logln("# Attempting to fix local continuity breaks")
       for (break <- breaks) {
         val filler = new GapFiller(this)
@@ -520,21 +522,21 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     val rBase = refBase(locus(i))
     val cBase = pu.baseCall.base
     kind match {
-      case 'snp =>
-        log(name + " " + locus(i) + " " + kind.name + " " + rBase + " " + cBase)
+      case SNP =>
+        log(name + " " + locus(i) + " snp " + rBase + " " + cBase)
         if (Pilon.debug) log(" " + pu)
         log(endLine)
-      case 'ins =>
-        log(name + " " + locus(i) + " " + kind.name + " " + "." + " " + pu.baseCall.insertion)
+      case INS =>
+        log(name + " " + locus(i) + " ins " + "." + " " + pu.baseCall.insertion)
         if (Pilon.debug) log(" " + pu)
         log(endLine)
-      case 'del =>
-        log(name + " " + locus(i) + " " + kind.name + " " + pu.baseCall.deletion + " " + ".")
+      case DEL =>
+        log(name + " " + locus(i) + " del " + pu.baseCall.deletion + " " + ".")
         if (Pilon.debug) log(" " + pu)
         log(endLine)
-      case 'amb =>
+      case AMB =>
         if (Pilon.verbose && rBase != cBase) {
-          log(name + " " + locus(i) + " " + kind.name + " " + rBase + " " + cBase)
+          log(name + " " + locus(i) + " amb " + rBase + " " + cBase)
           if (Pilon.debug) log(" " + pu)
           log(endLine)
         }
@@ -640,7 +642,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     }
   }
 
-  def writeChanges(changes: PrintWriter, newName: String = name, offset: Int = 0) {
+  def writeChanges(changes: PrintWriter, newName: String = name, offset: Int = 0)  = {
     val fixes = fixFixList(snpFixList ++ smallFixList ++ bigFixList)
     var delta = 0
     for (fix <- fixes) {


=====================================
src/main/scala/org/broadinstitute/pilon/PileUp.scala
=====================================
@@ -172,7 +172,7 @@ class PileUp {
       else base.toString //+ (if (!homo) "/" + altBase else "")
     }
 
-    def baseMatch(refBase: Char) {//
+    def baseMatch(refBase: Char)  = {//
       refBase == base	// TODO: handle IUPAC codes
     }
 


=====================================
src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
=====================================
@@ -19,7 +19,7 @@
 
 package org.broadinstitute.pilon
 
-import scala.collection.JavaConverters._
+import scala.jdk.CollectionConverters._
 import htsjdk.samtools._
 import Utils._
 
@@ -220,7 +220,7 @@ class PileUpRegion(name: String, start: Int, stop: Int)
   }
 
   def dump = {
-    for (i <- 0 to size - 1) println(locus(i) + ": " + pileups(i))
+    for (i <- 0 to size - 1) println(s"${locus(i)}: ${pileups(i)}")
   }
 
   def postProcess = {


=====================================
src/main/scala/org/broadinstitute/pilon/Pilon.scala
=====================================
@@ -22,9 +22,18 @@ package org.broadinstitute.pilon
 import java.io.File
 
 object Pilon {
-  // types of fixing we know about
-  val fixChoices = Set('snps, 'indels, 'gaps, 'local)
-  val experimentalFixChoices = Set('amb, 'breaks, 'circles, 'novel, 'scaffolds)
+  val fixChoices= Set("snps", "indels", "gaps", "local")
+  val experimentalFixChoices = Set("amb", "breaks", "circles", "novel", "scaffolds")
+
+  var fixSnps = false
+  var fixIndels = false
+  var fixGaps = false
+  var fixLocal = false
+  var fixAmb = false
+  var fixBreaks = false
+  var fixCircles = false
+  var fixNovel = false
+  var fixScaffolds = false
 
   // input parameters
   var bamFiles = List[BamFile]()
@@ -61,7 +70,6 @@ object Pilon {
   var pacbio = false
   var nanopore = false
   var strays = true
-  var threads = 1
   var trSafe = true
 
   // Global computed data
@@ -70,7 +78,7 @@ object Pilon {
   // for logging to output files
   var commandArgs = Array[String]()
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]) = {
     commandArgs = args
     println(Version.version)
     optionParse(args.toList)
@@ -84,11 +92,9 @@ object Pilon {
       }
     }
 
-    setDefaultParallelism(threads)
-
     // Stray computation is expensive up front, so only turn it on
     // if we're doing local reassembly
-    strays &= (fixList contains 'gaps) || (fixList contains 'local) || (fixList contains 'scaffolds)
+    strays &= fixGaps || fixLocal || fixScaffolds
 
     if (bamFiles.length == 0) {
       println(usage)
@@ -101,7 +107,7 @@ object Pilon {
     println("Genome: " + genomePath)
     val genome = new GenomeFile(new File(genomePath), targets)
 
-    println("Fixing " + (fixList map {_.name} mkString(", ")))
+    println("Fixing " + (fixList mkString(", ")))
     genome.processRegions(bamFiles)
 
     if (tracks) {
@@ -111,9 +117,8 @@ object Pilon {
 
   }
 
-  def optionParse(list: List[String]) : Unit = {
+  def optionParse(list: List[String]) : Unit  = {
     list match {
-      case Nil => Nil
       case "--help" :: tail =>
         println(usage)
         print(help)
@@ -123,7 +128,7 @@ object Pilon {
         // println(Version.version)
         sys.exit(0)
       case "--bam" :: value :: tail =>
-        bamFiles ::= new BamFile(new File(value), 'bam)
+        bamFiles ::= new BamFile(new File(value), "bam")
         optionParse(tail)
       case "--changes" :: tail =>
         changes = true
@@ -157,7 +162,7 @@ object Pilon {
         flank = value.toInt
         optionParse(tail)
       case "--frags" :: value :: tail =>
-        bamFiles ::= new BamFile(new File(value), 'frags)
+        bamFiles ::= new BamFile(new File(value), "frags")
         optionParse(tail)
       case "--gapmargin" :: value :: tail =>
         gapMargin = value.toInt
@@ -166,7 +171,7 @@ object Pilon {
         genomePath = value
         optionParse(tail)
       case "--jumps" :: value :: tail =>
-        bamFiles ::= new BamFile(new File(value), 'jumps)
+        bamFiles ::= new BamFile(new File(value), "jumps")
         optionParse(tail)
       case "--K" :: value :: tail =>
         Assembler.K = value.toInt
@@ -187,7 +192,7 @@ object Pilon {
         multiClosure = true
         optionParse(tail)
       case "--nanopore" :: value :: tail =>
-        bamFiles ::= new BamFile(new File(value), 'unpaired, 'nanopore)
+        bamFiles ::= new BamFile(new File(value), "unpaired", "nanopore")
         longread = true
         nanopore = true
         optionParse(tail)
@@ -204,7 +209,7 @@ object Pilon {
         outdir = value
         optionParse(tail)
       case "--pacbio" :: value :: tail =>
-        bamFiles ::= new BamFile(new File(value), 'unpaired, 'pacbio)
+        bamFiles ::= new BamFile(new File(value), "unpaired", "pacbio")
         longread = true
         pacbio = true
         optionParse(tail)
@@ -212,7 +217,8 @@ object Pilon {
         targets = value
         optionParse(tail)
       case "--threads" :: value :: tail =>
-        threads = value.toInt
+        println("--threads argument no longer supported; ignoring!")
+        //threads = value.toInt
         optionParse(tail)
       case "--tracks" :: tail =>
         tracks = true
@@ -221,12 +227,12 @@ object Pilon {
       //  trSafe = true
       //  optionParse(tail)
       case "--unpaired" :: value :: tail =>
-        bamFiles ::= new BamFile(new File(value), 'unpaired)
+        bamFiles ::= new BamFile(new File(value), "unpaired")
         optionParse(tail)
       case "--variant" :: tail =>
         // variant calling mode
         vcf = true
-        fixList += 'breaks
+        fixList += "breaks"
         optionParse(tail)
         //multiClosure = true
       case "--vcf" :: tail =>
@@ -244,10 +250,21 @@ object Pilon {
       case option :: tail =>
         println("Unknown option " + option)
         sys.exit(1)
+      case Nil =>
     }
+    fixSnps = fixList contains "snps"
+    fixIndels = fixList contains "indels"
+    fixGaps = fixList contains "gaps"
+    fixLocal = fixList contains "local"
+    fixAmb = fixList contains "amb"
+    fixBreaks = fixList contains "breaks"
+    fixCircles = fixList contains "circles"
+    fixNovel = fixList contains "novel"
+    fixScaffolds = fixList contains "scaffolds"
   }
   
   def parseFixList(fix: String) = {
+    // types of fixing we know about
     val fixes = fix.split(",")
     if (fix(0) != '+' && fix(0) != '-') fixList = Set.empty
     for (f <- fixes) {
@@ -255,7 +272,7 @@ object Pilon {
       else if (f == "none") fixList = Set.empty
       else {
         val plusMinus = if (f(0) == '-') f(0) else '+'
-        val fsym = if (f(0) == plusMinus) Symbol(f.substring(1)) else Symbol(f)
+        val fsym = if (f(0) == plusMinus) f.substring(1) else f
         if (fixChoices contains fsym) {
           if (plusMinus == '+') fixList += fsym
           else fixList -= fsym
@@ -264,14 +281,14 @@ object Pilon {
           println("Warning: experimental fix option " + f)
           if (plusMinus == '+') fixList += fsym
           else fixList -= fsym
-        } else if (fsym == 'bases) {
+        } else if (fsym == "bases") {
           // for backward compatibility, 'bases applies to both 'snps and 'indels
           if (plusMinus == '+') {
-            fixList += 'snps
-            fixList += 'indels
+            fixList += "snps"
+            fixList += "indels"
           } else {
-            fixList -= 'snps
-            fixList -= 'indels
+            fixList -= "snps"
+            fixList -= "indels"
           }
         } else {
           println("Error: unknown fix option " + f)
@@ -286,21 +303,8 @@ object Pilon {
     val filePath = if (Pilon.outdir == "") fileName else Pilon.outdir + "/" + fileName
     new File(filePath)
   }
-  
-  // Ugly, but thank you http://stackoverflow.com/questions/17865823
-  def setDefaultParallelism(numThreads: Int): Unit = {
-    val parPkgObj = scala.collection.parallel.`package`
-    val defaultTaskSupportField = parPkgObj.getClass.getDeclaredFields.find{
-      _.getName == "defaultTaskSupport"
-    }.get
 
-    defaultTaskSupportField.setAccessible(true)
-    defaultTaskSupportField.set(
-      parPkgObj,
-      new scala.collection.parallel.ForkJoinTaskSupport(
-        new scala.concurrent.forkjoin.ForkJoinPool(numThreads)))
-  }
-  
+
   val usage = """
     Usage: pilon --genome genome.fasta [--frags frags.bam] [--jumps jumps.bam] [--unpaired unpaired.bam]
                  [...other options...]
@@ -324,6 +328,10 @@ object Pilon {
            --bam any.bam
               A bam file of unknown type; Pilon will scan it and attempt to classify it as one
               of the above bam types.
+           --nanopore ont.bam
+              A bam file containing Oxford Nanopore read alignments. Experimental.
+           --pacbio pb.bam
+              A bam file containing Pacific Biosciences read alignments. Experimental.
          OUTPUTS:
            --output prefix
               Prefix for output files
@@ -377,8 +385,6 @@ object Pilon {
               scaffold00001 and coordinates 10000-20000 of scaffold00002.
               If "targetlist" is the name of a file, each line will be treated as a target
               specification.
-           --threads
-              Degree of parallelism to use for certain processing (default 1). Experimental.
            --verbose
               More verbose output.
            --debug


=====================================
src/main/scala/org/broadinstitute/pilon/Scaffold.scala
=====================================
@@ -28,7 +28,7 @@ package org.broadinstitute.pilon
  */
 
 
-import scala.collection.JavaConverters._
+import scala.jdk.CollectionConverters._
 import collection.mutable.{ Map, HashMap }
 import htsjdk.samtools._
 
@@ -221,7 +221,7 @@ object Scaffold {
   }
 
   def analyzeStrays(bam: BamFile) = {
-    val mm = bam.strayMateMap.pairs
+    val mm = bam.strayMateMap.pairs()
     val sigma = bam.insertSizeSigma
     val scaffolds = bam.getSeqs
     val scaffoldSizes = scaffolds.map({_.getSequenceLength})
@@ -269,16 +269,16 @@ object Scaffold {
     }
   }
 
-  def analyze(bamFiles: List[BamFile]) {
+  def analyze(bamFiles: List[BamFile]) = {
     println("Analyze scaffolds")
-    for (bam <- bamFiles filter {_.bamType == 'jumps})
+    for (bam <- bamFiles filter {_.bamType == "jumps"})
       analyzeStrays(bam)
   }
 
 
   def findHgapCircles(bamFiles: List[BamFile]): Map[String, Int] = {
-    if (Pilon.fixList contains 'circles) {
-      val endAlignments = bamFiles filter {_.bamType == 'unpaired} flatMap findEndAlignments
+    if (Pilon.fixCircles) {
+      val endAlignments = bamFiles filter {_.bamType == "unpaired"} flatMap findEndAlignments
       findCircles(endAlignments)
     } else return new HashMap()
   }


=====================================
src/main/scala/org/broadinstitute/pilon/Vcf.scala
=====================================
@@ -90,7 +90,7 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
         val p = pileUp.delPct
         //(rBase + bcString, rBase.toString, "1/1", depth - pileUp.deletions, pileUp.deletions)
         //(rBase + bcString, rBase.toString, callType, pileUp.mqSum - pileUp.delQual, pileUp.delQual)
-        (rBase + bcString, rBase.toString, callType, 100 - p, p)
+        (rBase.toString + bcString, rBase.toString, callType, 100 - p, p)
       } else if (indelOk && !embedded && bc.isInsertion) {
         loc -= 1
         val rBase = region.refBase(loc)
@@ -98,7 +98,7 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
         val p = pileUp.insPct
         //(rBase.toString, rBase + bcString, "1/1", depth - pileUp.insertions, pileUp.insertions)
         //(rBase.toString, rBase + bcString, callType, pileUp.mqSum - pileUp.insQual, pileUp.insQual)
-        (rBase.toString, rBase + bcString, callType, 100 - p, p)
+        (rBase.toString, rBase.toString + bcString, callType, 100 - p, p)
       } else if (bc.homo) {
         val rBase = region.refBase(loc)
         if (rBase == bc.base || bcString == "N")
@@ -178,8 +178,8 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
   def writeFixRecord(region: GenomeRegion, fix: GenomeRegion.Fix) = {
     val loc = fix._1 - 1
     val rBase = region.refBase(loc)
-    val ref = rBase + fix._2
-    val alt = rBase + fix._3
+    val ref = rBase.toString + fix._2
+    val alt = rBase.toString + fix._3
     val svlen = alt.length - ref.length
     val svend = loc + ref.length - 1
     val svtype = if (svlen < 0) "DEL" else "INS"
@@ -195,7 +195,7 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
     var loc = dup.start - 1
     val rBase = region.refBase(loc)
     var line = region.name + tab + loc + tab + "." + tab
-    line += rBase + tab + "<DUP>" + tab + "." + tab + "PASS" + tab
+    line += rBase.toString + tab + "<DUP>" + tab + "." + tab + "PASS" + tab
     line += "SVTYPE=DUP;SVLEN=" + dup.size + ";END=" + dup.stop + ";IMPRECISE"
     line += tab + "GT" + tab + "./."
     writer.println(line)



View it on GitLab: https://salsa.debian.org/med-team/pilon/-/commit/70bbfe31222b393a7de4c2ff3491f78747990f58

-- 
View it on GitLab: https://salsa.debian.org/med-team/pilon/-/commit/70bbfe31222b393a7de4c2ff3491f78747990f58
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210204/593a0d73/attachment-0001.html>


More information about the debian-med-commit mailing list