[med-svn] [Git][med-team/pilon][master] 4 commits: New upstream version 1.23+dfsg

Andreas Tille gitlab at salsa.debian.org
Sat Dec 8 20:21:35 GMT 2018


Andreas Tille pushed to branch master at Debian Med / pilon


Commits:
debb98f9 by Andreas Tille at 2018-12-08T20:13:45Z
New upstream version 1.23+dfsg
- - - - -
42844f49 by Andreas Tille at 2018-12-08T20:13:45Z
Update upstream source from tag 'upstream/1.23+dfsg'

Update to upstream version '1.23+dfsg'
with Debian dir 60e5722e6743120daccc1d952dcc9ae64d59b14b
- - - - -
339251dc by Andreas Tille at 2018-12-08T20:13:45Z
New upstream version

- - - - -
90e6b3a4 by Andreas Tille at 2018-12-08T20:17:01Z
Upload to unstable

- - - - -


22 changed files:

- COPYRIGHT
- build.sbt
- build.sh
- debian/changelog
- project/plugins.sbt
- src/main/scala/org/broadinstitute/pilon/Assembler.scala
- src/main/scala/org/broadinstitute/pilon/BamFile.scala
- src/main/scala/org/broadinstitute/pilon/BaseSum.scala
- src/main/scala/org/broadinstitute/pilon/Bases.scala
- src/main/scala/org/broadinstitute/pilon/GapFiller.scala
- src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
- src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
- src/main/scala/org/broadinstitute/pilon/NormalDistribution.scala
- src/main/scala/org/broadinstitute/pilon/PileUp.scala
- src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
- src/main/scala/org/broadinstitute/pilon/Pilon.scala
- src/main/scala/org/broadinstitute/pilon/Region.scala
- src/main/scala/org/broadinstitute/pilon/Scaffold.scala
- src/main/scala/org/broadinstitute/pilon/Tracks.scala
- src/main/scala/org/broadinstitute/pilon/Utils.scala
- src/main/scala/org/broadinstitute/pilon/Vcf.scala
- src/main/scala/org/broadinstitute/pilon/Version.scala


Changes:

=====================================
COPYRIGHT
=====================================
@@ -1,4 +1,4 @@
-Copyright 2012-2014 Broad Institute, Inc.
+Copyright (c) 2012-2018 Broad Institute, Inc.
 
 Pilon is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License version 2


=====================================
build.sbt
=====================================
@@ -1,13 +1,12 @@
 name := "pilon"
 
-version := "1.22"
+version := "1.23"
 
-scalaVersion := "2.11.8"
+scalaVersion := "2.12.7"
 
 scalacOptions += "-deprecation"
 
 scalacOptions += "-feature"
 
-Seq(com.github.retronym.SbtOneJar.oneJarSettings: _*)
-
 libraryDependencies += "commons-lang" % "commons-lang" % "2.6"
+


=====================================
build.sh
=====================================
@@ -11,6 +11,6 @@ sed -e "s/\(date.*=\).*/\\1 \"$commitdate\"/" \
     -e "s/\(sbt.*=\).*/\\1 \"$version\"/" \
     <$tmp >$f
 #sbt $* package
-sbt $* one-jar
-ln -sf `pwd`/target/scala-2.11/pilon_2.11-$version-one-jar.jar ~/lib/pilon/pilon-dev.jar
+sbt $* assembly
+ln -sf `pwd`/target/scala-2.12/pilon-assembly-$version.jar ~/lib/pilon/pilon-dev.jar
 mv $tmp $f


=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+pilon (1.23+dfsg-1) unstable; urgency=medium
+
+  * New upstream version
+
+ -- Andreas Tille <tille at debian.org>  Sat, 08 Dec 2018 21:13:54 +0100
+
 pilon (1.22+dfsg-3) unstable; urgency=medium
 
   * Drop versoin restriction from default-jre


=====================================
project/plugins.sbt
=====================================
@@ -1 +1 @@
-addSbtPlugin("com.github.retronym" % "sbt-onejar" % "0.8")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")


=====================================
src/main/scala/org/broadinstitute/pilon/Assembler.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,12 +14,11 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
-import scala.annotation.tailrec
-import collection.JavaConversions._
-import collection.mutable.{ Map, HashMap, Set, HashSet }
+import collection.mutable.{ HashMap, HashSet }
 import htsjdk.samtools._
 
 object Assembler {


=====================================
src/main/scala/org/broadinstitute/pilon/BamFile.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,13 +14,14 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
 
 import collection.mutable.{HashMap,Map}
 import java.io.File
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
 import htsjdk.samtools._
 
 object BamFile {
@@ -28,13 +29,23 @@ object BamFile {
   val maxInsertSizes = Map(('frags -> 500), ('jumps -> 10000), ('unpaired -> 10000), ('bam -> 10000))
   val minOrientationPct = 10
   val maxFragInsertSize = 700
+  // long read type codes
+  val notLongRead = 0
+  val nanoporeLongRead = 1
+  val pacbioLongRead = 2
 }
 
-class BamFile(val bamFile: File, var bamType: Symbol) {
+class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'none) {
   import BamFile._
   val path = bamFile.getCanonicalPath()
   var baseCount: Long = 0
 
+  val longReadType = if (bamType == 'unpaired) {
+    if (subType == 'nanopore) nanoporeLongRead
+    else if (subType == 'pacbio) pacbioLongRead
+    else 0
+  } else 0
+
   def reader = {
     //val r = new SAMFileReader(bamFile, new File(path + BamFile.indexSuffix))
     val r = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile)
@@ -51,7 +62,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
   def getSeqs = {
     // returns an array of sequence records indexed by bam seq index
     val r = reader
-    val seqs = r.getFileHeader.getSequenceDictionary.getSequences
+    val seqs = r.getFileHeader.getSequenceDictionary.getSequences.asScala
     r.close
     val seqArray = new Array[SAMSequenceRecord](seqs.length)
     for (s <- seqs) seqArray(s.getSequenceIndex) = s
@@ -82,7 +93,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
 
   def getUnaligned = {
     val r = reader
-    val reads = r.queryUnmapped.filter(validateRead(_)).toList
+    val reads = r.queryUnmapped.asScala.filter(validateRead(_)).toList
     r.close
     reads
   }
@@ -90,21 +101,21 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
   def validateRead(read: SAMRecord) = {
     (Pilon.nonPf || !read.getReadFailsVendorQualityCheckFlag) &&
       (Pilon.duplicates || !read.getDuplicateReadFlag) &&
-    !read.getNotPrimaryAlignmentFlag
+    !read.isSecondaryAlignment
   }
 
   
   def process(region: GenomeRegion, printInterval: Int = 100000) = {
-
     val pileUpRegion = region.pileUpRegion
 
     // Ugh. Is there a way for samtools to do the right thing here and get
     // the pairs for which only one end overlaps?  Adding 10k slop until
     // that's figured out.
     //pileUpRegion.addReads(bam.reader.queryOverlapping(name, start, stop))
+    val longRead = (bamType == 'unpaired) && (subType == 'nanopore || subType == 'pacbio)
     val r = reader
     val reads = r.queryOverlapping(region.name,
-      (region.start-10000) max 0, (region.stop+10000) min region.contig.length)
+      (region.start-10000) max 0, (region.stop+10000) min region.contig.length).asScala
     val readsBefore = pileUpRegion.readCount
     val baseCountBefore = pileUpRegion.baseCount
     val covBefore = pileUpRegion.coverage
@@ -118,7 +129,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
         print("..." + lastLoc)
       }
       if (validateRead(read)) {
-        val insertSize = pileUpRegion.addRead(read, region.contigBases)
+        val insertSize = pileUpRegion.addRead(read, region.contigBases, longReadType)
         if (insertSize > huge) {
           //if (Pilon.debug) println("WARNING: huge insert " + insertSize + " " + read)
         }
@@ -282,7 +293,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
   
   def scan(seqsOfInterest: Set[String]) = {
     val r = reader
-    for (read <- r.iterator) {
+    for (read <- r.iterator.asScala) {
       if (!validateRead(read)) filtered += 1
       else if (read.getReadUnmappedFlag) unmapped += 1
       else {
@@ -326,8 +337,8 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
   
   def readsInRegion(region: Region) = {
     val r = reader
-    val reads = r.queryOverlapping(region.name, region.start, region.stop).filter(validateRead(_)).toList
-    r.close
+    val reads = r.queryOverlapping(region.name, region.start, region.stop).asScala.filter(validateRead(_)).toList
+    r.close()
     reads
   }
 


=====================================
src/main/scala/org/broadinstitute/pilon/BaseSum.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon


=====================================
src/main/scala/org/broadinstitute/pilon/Bases.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon


=====================================
src/main/scala/org/broadinstitute/pilon/GapFiller.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,11 +14,12 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
 
-import collection.JavaConversions._
+
 import collection.mutable.Map
 import java.io.File
 import htsjdk.samtools._
@@ -48,6 +49,8 @@ class GapFiller(val region: GenomeRegion) {
   def assembleAcrossBreak(break: Region, isGap: Boolean) = {
     // TODO: ugh, this is ugly and really wants to be re-written.
     //val reads = if (break.size < 100 && isGap) recruitLocalReads(break) else recruitReads(break)
+    //val longReads = recruitLongReads(break)
+    //if (Pilon.verbose) println("%d long reads".format(longReads.length))
     val reads = recruitReads(break)
     var (start, pathsFromLeft, pathsFromRight, stop, loop) = assembleIntoBreak(break, reads)
     if (Pilon.verbose) println("L=%d R=%d".format(pathsFromLeft.length, pathsFromRight.length))
@@ -360,17 +363,20 @@ class GapFiller(val region: GenomeRegion) {
     minRadius max insertMean
   }
     
-  def recruitReadsOfType(reg: Region, bamType: Symbol) = {
-    val bams = region.bamsOfType(bamType)
+  def recruitReadsFromBams(reg: Region, bams: List[BamFile]) = {
     var reads = List[SAMRecord]()
     for (b <- bams) {
       reads ++= b.recruitFlankReads(reg)
     }
-    if (Pilon.debug) 
-      println("# Recruiting " + bamType.name + " reads: count=" + reads.length)
+    if (Pilon.debug)
+      println("# Recruiting reads from " + bams.toString + ": count=" + reads.length)
     reads
   }
 
+  def recruitReadsOfType(reg: Region, bamType: Symbol) = {
+    recruitReadsFromBams(reg, region.bamsOfType(bamType))
+  }
+
   //def recruitFrags(reg: Region) = mateMapOfType(reg, 'frags).values.toList
   def recruitFrags(reg: Region) = recruitReadsOfType(reg, 'frags)
 
@@ -384,11 +390,18 @@ class GapFiller(val region: GenomeRegion) {
     reads
   }
 
-  def recruitUnpaired(reg: Region) = recruitReadsOfType(reg, 'unpaired)
+  def recruitUnpaired(reg: Region) =
+    if (!Pilon.longread) recruitReadsOfType(reg, 'unpaired) else Nil
 
   def recruitLocalReads(reg: Region) = recruitFrags(reg) ++ recruitUnpaired(reg)
 
-  def recruitReads(reg: Region) = recruitLocalReads(reg) ++ recruitJumps(reg)
+  def recruitLongReads(reg: Region) = {
+    recruitReadsFromBams(reg, region.bamsOfType('unpaired).filter(_.longReadType > 0))
+  }
+
+  def recruitReads(reg: Region) = {
+    recruitLocalReads(reg) ++ recruitJumps(reg) // ++ recruitLongReads(reg)
+  }
 
   def writeBam(fileName: String, reads: List[SAMRecord]) {
     val file = new File(fileName + ".sam")


=====================================
src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon


=====================================
src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
@@ -45,6 +46,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
   var ambiguous = new Array[Boolean](size)
   var changed = new Array[Boolean](size)
   var deleted = new Array[Boolean](size)
+  var excluded = new Array[Boolean](size)
   //var lowCoverage = new Array[Boolean](size)
   //var lowConfidence = new Array[Boolean](size)
 
@@ -131,8 +133,17 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
 
   //val bams = Map.empty[Symbol, BamRegion]
   var bams = List[BamFile]()
+
   def bamsOfType(bamType: Symbol) = bams filter { _.bamType == bamType }
 
+  def nanoporeBams = bamsOfType('unpaired) filter { _.subType == 'nanopore}
+
+  def pacbioBams = bamsOfType('unpaired) filter { _.subType == 'pacbio }
+
+  def fragBams = bamsOfType('frags)
+
+  def longReadOnly = Pilon.longread && fragBams.isEmpty
+
   def initializePileUps = {
     pileUpRegion = new PileUpRegion(name, start, stop)
   }
@@ -203,6 +214,8 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     val meanCoverage = pileUpRegion.coverage
     val nReads = pileUpRegion.readCount
 
+    if (longReadOnly) excludeMotifs()
+
     minDepth = {
       if (Pilon.minDepth >= 1) Pilon.minDepth.toInt
       else (Pilon.minDepth * meanCoverage).round.toInt max Pilon.minMinDepth
@@ -247,7 +260,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
             deleted(i + j) = true
             pileUpRegion(i + j).deletions += pu.deletions
           }
-        } else if (b != r) {
+        } else if (b != r && bc.score > 0) {
           // for ambiguous bases, fix them if --fix fixamb or if original base
           // not one of the top two alternatives
           if (homo) addChange(i, 'snp, pu)
@@ -304,38 +317,41 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     var amb = 0
     for (i <- changeList) {
       val (kind, pu) = changeMap(i)
-      val rBase = refBase(locus(i))
+      val loc = locus(i)
+      val rBase = refBase(loc)
       val bc = pu.baseCall
       val cBase = bc.base
-      kind match {
-        case 'snp =>
-          if (fixSnps) snpFixList ::= (locus(i), rBase.toString, cBase.toString)
-          snps += 1
-        case 'amb =>
-          if (fixSnps) {
-            if (Pilon.iupac) {
-              // we put these on the small fix list because iupac codes can mess up assembly
-              // flank anchor kmers
-              smallFixList ::= (locus(i), rBase.toString, Bases.toIUPAC(cBase, bc.altBase).toString)
-            } else {
-              snpFixList ::= (locus(i), rBase.toString, cBase.toString)
+      if (!excluded(i)) {
+        kind match {
+          case 'snp =>
+            if (fixSnps) snpFixList ::= (loc, rBase.toString, cBase.toString)
+            snps += 1
+          case 'amb =>
+            if (fixSnps && !Pilon.longread) {
+              if (Pilon.iupac) {
+                // we put these on the small fix list because iupac codes can mess up assembly
+                // flank anchor kmers
+                smallFixList ::= (loc, rBase.toString, Bases.toIUPAC(cBase, bc.altBase).toString)
+              } else {
+                snpFixList ::= (loc, rBase.toString, cBase.toString)
+              }
+              amb += 1
             }
-          }
-          amb += 1
-        case 'ins =>
-          val insert = bc.insertion
-          if (fixIndels) smallFixList ::= (locus(i), "", insert)
-          ins += 1
-          insBases += insert.length
-        case 'del =>
-          val deletion = bc.deletion
-          if (fixIndels) smallFixList ::= (locus(i), deletion, "")
-          dels += 1
-          delBases += deletion.length
+          case 'ins =>
+            val insert = bc.insertion
+            if (fixIndels) smallFixList ::= (loc, "", insert)
+            ins += 1
+            insBases += insert.length
+          case 'del =>
+            val deletion = bc.deletion
+            if (fixIndels) smallFixList ::= (loc, deletion, "")
+            dels += 1
+            delBases += deletion.length
+        }
       }
       if (Pilon.verbose) logChange(i)
     }
-    
+
     // Report some stats
     val nConfirmed = confirmed count {x => x}
     val nonN = originalBases count {x => x != 'N'}
@@ -350,19 +366,19 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     if (Pilon.fixList contains 'indels) log("; corrected ") else log("; found ")
     log(ins + " small insertions totaling " + insBases + " bases")
     logln(", " + dels + " small deletions totaling " + delBases + " bases")
-    
+
     // Report large collapsed regions (possible segmental duplication)
     val duplications = duplicationEvents
     if (duplications.size > 0) {
       for (d <- duplications) logln("Large collapsed region: " + d + " size " + d.size)
     }
-    
-    // Apply SNP fixes prior to reassemblies...it helps by giving better anchor sequence!  
+
+    // Apply SNP fixes prior to reassemblies...it helps by giving better anchor sequence!
     // We can't change coords here, though, so no indels!
     fixIssues(snpFixList)
 
     // Try to fill gaps
-    if ((Pilon.fixList contains 'gaps) && gaps.length > 0) {
+    if ((Pilon.fixList contains 'gaps) && gaps.nonEmpty) {
       logln("# Attempting to fill gaps")
       for (gap <- gaps) {
         val filler = new GapFiller(this)
@@ -376,7 +392,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
 
     // Try to reassemble around possible contiguity breaks, but stay away from gaps
     val breaks = possibleBreaks
-    if ((Pilon.fixList contains 'local) && !breaks.isEmpty) {
+    if ((Pilon.fixList contains 'local) && breaks.nonEmpty) {
       logln("# Attempting to fix local continuity breaks")
       for (break <- breaks) {
         val filler = new GapFiller(this)
@@ -395,6 +411,37 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
     fixIssues(smallFixList ++ bigFixList)
   }
 
+  def homoRun(loc: Int): Int = {
+    val baseAtLoc = baseAt(loc)
+    for (i <- loc to stop) {
+      if (baseAt(i) != baseAtLoc) return i - loc
+    }
+    return 1 + stop - loc
+  }
+
+  def nanoporeExclude(loc: Int) = {
+    inRegion(loc-2) && inRegion(loc+2) &&
+      baseAt(loc - 2) == 'C' &&
+      baseAt(loc - 1) == 'C' &&
+      baseAt(loc + 1) == 'G' &&
+      baseAt(loc + 2) == 'G'
+  }
+
+  def excludeMotifs() = {
+    val pb = pacbioBams.nonEmpty
+    val nano = nanoporeBams.nonEmpty
+    val lr = pb || nano
+
+    for (i <- 0 until size)
+      excluded(i) = homoRun(locus(i)) >= 4 || (nano && nanoporeExclude(locus(i)))
+  }
+
+  def longReadChangeFilter(loc: Int): Boolean = {
+    if (homoRun(index(loc)) >= 4) false
+    else if (Pilon.nanopore && nanoporeExclude(index(loc))) false
+    else true
+  }
+
   val reassemblyFixes = Map.empty[Region, String]
 
   def closeCircle(estimatedLength: Int) = {


=====================================
src/main/scala/org/broadinstitute/pilon/NormalDistribution.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,11 +14,11 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
 
-import collection.mutable.ArrayBuffer
 import math.pow
 
 // Calculates the moments of a sample distribution.


=====================================
src/main/scala/org/broadinstitute/pilon/PileUp.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
@@ -131,8 +132,8 @@ class PileUp {
   class BaseCall {
     val n = count
     val (baseIndex, altBaseIndex) = {
-      val order = qualSum.order
-      (order(0), order(1))
+      val order = if (qSum > 0) qualSum.order else baseCount.order
+        (order(0), order(1))
     }
     val base = if (n > 0) indexBase(baseIndex) else 'N'
     val baseSum = qualSum.sums(baseIndex)
@@ -143,7 +144,7 @@ class PileUp {
       val homoScore = baseSum - (total - baseSum)
       val halfTotal = total / 2
       val heteroScore = total - (halfTotal - baseSum).abs - (halfTotal - altBaseSum).abs
-      val homo = homoScore >= heteroScore
+      val homo = /* homoScore > 0 && */ homoScore >= heteroScore
       val score = if (mqSum > 0) (homoScore - heteroScore).abs  * n / mqSum else 0
       (homo, score)
     }
@@ -180,12 +181,12 @@ class PileUp {
     }
 
     def insertCall = {
-      if (insertions > 0) hetIndelCall(insertionList, insPct)
+      if (insertions > 2 && insertions > deletions) hetIndelCall(insertionList, insPct)
       else ("", true)
     }
 
     def deletionCall = {
-      if (deletions > 0) hetIndelCall(deletionList, delPct)
+      if (deletions > 2 && deletions > insertions) hetIndelCall(deletionList, delPct)
       else ("", true)
     }
 
@@ -216,7 +217,7 @@ class PileUp {
         map(indelStr) = map.getOrElse(indelStr, 0) + 1
       }
       val winner = map.toSeq.sortBy({ _._2 }).last
-      if (winner._2 < indelList.length / 2) return ("", true)
+      if (winner._2 < 2 || winner._2 <= indelList.length / 2) return ("", true)
       val winStr = winner._1
       if (winStr contains 'N') return ("", true)
       if (Pilon.oldIndel) {


=====================================
src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,11 +14,12 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
 
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
 import htsjdk.samtools._
 import Utils._
 
@@ -98,7 +99,7 @@ class PileUpRegion(name: String, start: Int, stop: Int)
         pileups(i).insertSize /= pileups(i).physCov
   }
 
-  def addRead(r: SAMRecord, refBases: Array[Byte]) = {
+  def addRead(r: SAMRecord, refBases: Array[Byte], longRead: Int = 0) = {
     val length = r.getReadLength
     val bases = r.getReadBases
     val mq = r.getMappingQuality
@@ -116,12 +117,29 @@ class PileUpRegion(name: String, start: Int, stop: Int)
     def baseString(bases: Array[Byte]) = bases map { _.toChar } mkString ""
     def trusted(offset: Int) = offset >= trustedFlank && length - trustedFlank > offset
 
-    val cigarElements = cigar.getCigarElements
+    def homoRun(i0: Int): Int = {
+      val baseAtLoc = refBases(i0)
+      for (i <- i0 + 1 until refBases.length) {
+        if (refBases(i) != baseAtLoc) return i - i0
+      }
+      return refBases.length - i0
+    }
+
+    def nanoporeExclude(i0: Int) = {
+      inRegion(locus(i0-2)) && inRegion(locus(i0+2)) &&
+        refBases(i0 - 2) == 'C' &&
+        refBases(i0 - 1) == 'C' &&
+        refBases(i0 + 1) == 'G' &&
+        refBases(i0 + 2) == 'G'
+    }
+
+    val cigarElements = cigar.getCigarElements.asScala
 
     // First count up number of clipped bases, so we can use to weight alignment
     val clippedBases = (cigarElements map {e => if (e.getOperator == CigarOperator.S) e.getLength else 0}).sum
     // de-rate mq proportionally to fraction of bases clipped
     val adjMq = Utils.roundDiv(mq * (length - clippedBases), length)
+    val indelMq = if (longRead > 0) (adjMq min 8) else adjMq
 
     // parse read alignment and add to pileups
     for (ele <- cigarElements) {
@@ -139,7 +157,8 @@ class PileUpRegion(name: String, start: Int, stop: Int)
               iloc -= 1
               insertion = insertion.slice(len - 1, len) ++ insertion.slice(0, len - 1)
             }
-            pileups(index(iloc)).addInsertion(insertion, quals(readOffset), adjMq)
+            if (!(longRead > 0 && homoRun(iloc) >= 4))
+              pileups(index(iloc)).addInsertion(insertion, quals(readOffset), indelMq)
           }
         case CigarOperator.D =>
           var dloc = locus
@@ -158,7 +177,9 @@ class PileUpRegion(name: String, start: Int, stop: Int)
                   add(dloc + len, base, qual, adjMq, valid)
               }
             }
-            pileups(index(dloc)).addDeletion(refBases.slice(dloc - 1, dloc + len - 1), quals(readOffset), adjMq)
+            if (!(longRead > 0 && ((homoRun(index(dloc)) >= 4)
+              || (longRead == BamFile.nanoporeLongRead && nanoporeExclude(index(dloc))))))
+              pileups(index(dloc)).addDeletion(refBases.slice(dloc - 1, dloc + len - 1), quals(readOffset), indelMq)
           }
         case CigarOperator.M | CigarOperator.EQ | CigarOperator.X =>
           for (i <- 0 until len) {
@@ -166,10 +187,8 @@ class PileUpRegion(name: String, start: Int, stop: Int)
             if (trusted(rOff)) {
               val locusPlus = locus + i
               val base = bases(rOff).toChar
-              val qual = quals(rOff)
-              if (inRegion(locusPlus)) {
-                add(locusPlus, base, qual, adjMq, valid)
-              }
+              val qual = if (longRead == BamFile.nanoporeLongRead && nanoporeExclude(index(locusPlus))) 0 else quals(rOff)
+              add(locusPlus, base, qual, adjMq, valid)
             }
           }
         case CigarOperator.S =>
@@ -200,28 +219,10 @@ class PileUpRegion(name: String, start: Int, stop: Int)
     physCovIncr(aStart, aEnd, insert, paired, valid)
   }
 
-  def addReads(reads: SAMRecordIterator, refBases: Array[Byte], printInterval: Int = 100000) = {
-    var lastLoc = 0
-    for (r <- reads) {
-      val loc = addRead(r, refBases)
-      if (Pilon.verbose && printInterval > 0 && loc > lastLoc + printInterval) {
-        lastLoc = printInterval * (loc / printInterval)
-        print("..." + lastLoc)
-      }
-    }
-  }
-
   def dump = {
     for (i <- 0 to size - 1) println(locus(i) + ": " + pileups(i))
   }
 
-  def callRegion = {
-    for (i <- 0 until size) {
-      var bc = pileups(i).baseCall
-      if (bc.q < 20 && !bc.homo) println(locus(i) + ": bc=" + bc + " pu=" + pileups(i))
-    }
-  }
-
   def postProcess = {
     // To be called after all reads have been added to pileUps
     computePhysCov


=====================================
src/main/scala/org/broadinstitute/pilon/Pilon.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
@@ -40,7 +41,7 @@ object Pilon {
   var debug = false
   // heuristics and control parameters
   var chunkSize = 10000000
-  var defaultQual: Byte = 15
+  var defaultQual: Byte = 10
   var diploid = false
   var duplicates = false
   var dumpReads = false
@@ -56,16 +57,19 @@ object Pilon {
   var multiClosure = false
   var nonPf = false
   var oldIndel = false
+  var longread = false
+  var pacbio = false
+  var nanopore = false
   var strays = true
   var threads = 1
   var trSafe = true
 
   // Global computed data
   var novelContigs = List[String]()
-  
+
   // for logging to output files
   var commandArgs = Array[String]()
-  
+
   def main(args: Array[String]) {
     commandArgs = args
     println(Version.version)
@@ -85,7 +89,7 @@ object Pilon {
     // Stray computation is expensive up front, so only turn it on
     // if we're doing local reassembly
     strays &= (fixList contains 'gaps) || (fixList contains 'local) || (fixList contains 'scaffolds)
-    
+
     if (bamFiles.length == 0) {
       println(usage)
       sys.exit(0)
@@ -100,11 +104,11 @@ object Pilon {
     println("Fixing " + (fixList map {_.name} mkString(", ")))
     genome.processRegions(bamFiles)
 
-    if (tracks) { 
+    if (tracks) {
       val tracks = new Tracks(genome)
       tracks.standardTracks
     }
-    
+
   }
 
   def optionParse(list: List[String]) : Unit = {
@@ -182,6 +186,14 @@ object Pilon {
       case "--multiclosure" :: tail =>      // undocumented experimental option
         multiClosure = true
         optionParse(tail)
+      case "--nanopore" :: value :: tail =>
+        bamFiles ::= new BamFile(new File(value), 'unpaired, 'nanopore)
+        longread = true
+        nanopore = true
+        optionParse(tail)
+      case "--nonpf" :: tail =>
+        nonPf = true
+        optionParse(tail)
       case "--oldindel" :: tail =>      // undocumented experimental option
         oldIndel = true
         optionParse(tail)
@@ -191,8 +203,10 @@ object Pilon {
       case "--outdir" :: value :: tail =>
         outdir = value
         optionParse(tail)
-      case "--nonpf" :: tail =>
-        nonPf = true
+      case "--pacbio" :: value :: tail =>
+        bamFiles ::= new BamFile(new File(value), 'unpaired, 'pacbio)
+        longread = true
+        pacbio = true
         optionParse(tail)
       case "--targets" :: value :: tail =>
         targets = value
@@ -275,16 +289,16 @@ object Pilon {
   
   // Ugly, but thank you http://stackoverflow.com/questions/17865823
   def setDefaultParallelism(numThreads: Int): Unit = {
-	val parPkgObj = scala.collection.parallel.`package`
-	val defaultTaskSupportField = parPkgObj.getClass.getDeclaredFields.find{
-		_.getName == "defaultTaskSupport"
-	}.get
+    val parPkgObj = scala.collection.parallel.`package`
+    val defaultTaskSupportField = parPkgObj.getClass.getDeclaredFields.find{
+      _.getName == "defaultTaskSupport"
+    }.get
 
-	defaultTaskSupportField.setAccessible(true)
-	defaultTaskSupportField.set(
-			parPkgObj, 
-			new scala.collection.parallel.ForkJoinTaskSupport(
-					new scala.concurrent.forkjoin.ForkJoinPool(numThreads)))
+    defaultTaskSupportField.setAccessible(true)
+    defaultTaskSupportField.set(
+      parPkgObj,
+      new scala.collection.parallel.ForkJoinTaskSupport(
+        new scala.concurrent.forkjoin.ForkJoinPool(numThreads)))
   }
   
   val usage = """
@@ -373,7 +387,7 @@ object Pilon {
               Print version string and exit.
          HEURISTICS:
            --defaultqual qual
-              Assumes bases are of this quality if quals are no present in input BAMs (default 15).
+              Assumes bases are of this quality if quals are no present in input BAMs (default 10).
            --flank nbases
               Controls how much of the well-aligned reads will be used; this many bases at each
               end of the good reads will be ignored (default 10).


=====================================
src/main/scala/org/broadinstitute/pilon/Region.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon


=====================================
src/main/scala/org/broadinstitute/pilon/Scaffold.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
@@ -27,8 +28,8 @@ package org.broadinstitute.pilon
  */
 
 
-import scala.collection.JavaConversions._
-import collection.mutable.{ Map, HashMap, Set, HashSet }
+import scala.collection.JavaConverters._
+import collection.mutable.{ Map, HashMap }
 import htsjdk.samtools._
 
 
@@ -347,7 +348,7 @@ object Scaffold {
 
     val reader = bam.reader
     var eaList: List[EndAlignment] = Nil
-    for (read <- reader.iterator if (!read.getReadUnmappedFlag)) {
+    for (read <- reader.iterator.asScala if (!read.getReadUnmappedFlag)) {
       val contig = scaffolds(read.getReferenceIndex())
       val ca = new EndAlignment(read, contig)
       //println(ca)


=====================================
src/main/scala/org/broadinstitute/pilon/Tracks.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
@@ -27,9 +28,6 @@ package org.broadinstitute.pilon
  */
 import java.io._
 
-import scala.collection.JavaConversions._
-import htsjdk.samtools._
-
 class Tracks(val reference: GenomeFile) {
   def standardTracks = {
     makeBedTrack("Pilon.bed", "Pilon")


=====================================
src/main/scala/org/broadinstitute/pilon/Utils.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon


=====================================
src/main/scala/org/broadinstitute/pilon/Vcf.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon
@@ -71,7 +72,8 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
   def close = writer.close
 
   def writeRecord(region: GenomeRegion, index: Int,
-      embedded: Boolean = false, indelOk: Boolean = true): Unit = {
+                  embedded: Boolean = false, indelOkArg: Boolean = true): Unit = {
+    val indelOk = indelOkArg && index > 0
     val locus = region.locus(index)
     val pileUp = region.pileUpRegion(index)
     val bc = pileUp.baseCall


=====================================
src/main/scala/org/broadinstitute/pilon/Version.scala
=====================================
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
  *
  * This file is part of Pilon.
  *
@@ -14,6 +14,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with Pilon.  If not, see <http://www.gnu.org/licenses/>.
+ *
  */
 
 package org.broadinstitute.pilon



View it on GitLab: https://salsa.debian.org/med-team/pilon/compare/691e0fae19090ad680d8a4158abe91d574999c5e...90e6b3a48e4916913050160d67263c5e6dffe31a

-- 
View it on GitLab: https://salsa.debian.org/med-team/pilon/compare/691e0fae19090ad680d8a4158abe91d574999c5e...90e6b3a48e4916913050160d67263c5e6dffe31a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181208/59cbacb6/attachment-0001.html>


More information about the debian-med-commit mailing list