[med-svn] [Git][med-team/pilon][upstream] New upstream version 1.23+dfsg
Andreas Tille
gitlab at salsa.debian.org
Sat Dec 8 20:21:39 GMT 2018
Andreas Tille pushed to branch upstream at Debian Med / pilon
Commits:
debb98f9 by Andreas Tille at 2018-12-08T20:13:45Z
New upstream version 1.23+dfsg
- - - - -
21 changed files:
- COPYRIGHT
- build.sbt
- build.sh
- project/plugins.sbt
- src/main/scala/org/broadinstitute/pilon/Assembler.scala
- src/main/scala/org/broadinstitute/pilon/BamFile.scala
- src/main/scala/org/broadinstitute/pilon/BaseSum.scala
- src/main/scala/org/broadinstitute/pilon/Bases.scala
- src/main/scala/org/broadinstitute/pilon/GapFiller.scala
- src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
- src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
- src/main/scala/org/broadinstitute/pilon/NormalDistribution.scala
- src/main/scala/org/broadinstitute/pilon/PileUp.scala
- src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
- src/main/scala/org/broadinstitute/pilon/Pilon.scala
- src/main/scala/org/broadinstitute/pilon/Region.scala
- src/main/scala/org/broadinstitute/pilon/Scaffold.scala
- src/main/scala/org/broadinstitute/pilon/Tracks.scala
- src/main/scala/org/broadinstitute/pilon/Utils.scala
- src/main/scala/org/broadinstitute/pilon/Vcf.scala
- src/main/scala/org/broadinstitute/pilon/Version.scala
Changes:
=====================================
COPYRIGHT
=====================================
@@ -1,4 +1,4 @@
-Copyright 2012-2014 Broad Institute, Inc.
+Copyright (c) 2012-2018 Broad Institute, Inc.
Pilon is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
=====================================
build.sbt
=====================================
@@ -1,13 +1,12 @@
name := "pilon"
-version := "1.22"
+version := "1.23"
-scalaVersion := "2.11.8"
+scalaVersion := "2.12.7"
scalacOptions += "-deprecation"
scalacOptions += "-feature"
-Seq(com.github.retronym.SbtOneJar.oneJarSettings: _*)
-
libraryDependencies += "commons-lang" % "commons-lang" % "2.6"
+
=====================================
build.sh
=====================================
@@ -11,6 +11,6 @@ sed -e "s/\(date.*=\).*/\\1 \"$commitdate\"/" \
-e "s/\(sbt.*=\).*/\\1 \"$version\"/" \
<$tmp >$f
#sbt $* package
-sbt $* one-jar
-ln -sf `pwd`/target/scala-2.11/pilon_2.11-$version-one-jar.jar ~/lib/pilon/pilon-dev.jar
+sbt $* assembly
+ln -sf `pwd`/target/scala-2.12/pilon-assembly-$version.jar ~/lib/pilon/pilon-dev.jar
mv $tmp $f
=====================================
project/plugins.sbt
=====================================
@@ -1 +1 @@
-addSbtPlugin("com.github.retronym" % "sbt-onejar" % "0.8")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")
=====================================
src/main/scala/org/broadinstitute/pilon/Assembler.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,12 +14,11 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
-import scala.annotation.tailrec
-import collection.JavaConversions._
-import collection.mutable.{ Map, HashMap, Set, HashSet }
+import collection.mutable.{ HashMap, HashSet }
import htsjdk.samtools._
object Assembler {
=====================================
src/main/scala/org/broadinstitute/pilon/BamFile.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,13 +14,14 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
import collection.mutable.{HashMap,Map}
import java.io.File
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
import htsjdk.samtools._
object BamFile {
@@ -28,13 +29,23 @@ object BamFile {
val maxInsertSizes = Map(('frags -> 500), ('jumps -> 10000), ('unpaired -> 10000), ('bam -> 10000))
val minOrientationPct = 10
val maxFragInsertSize = 700
+ // long read type codes
+ val notLongRead = 0
+ val nanoporeLongRead = 1
+ val pacbioLongRead = 2
}
-class BamFile(val bamFile: File, var bamType: Symbol) {
+class BamFile(val bamFile: File, var bamType: Symbol, val subType: Symbol = 'none) {
import BamFile._
val path = bamFile.getCanonicalPath()
var baseCount: Long = 0
+ val longReadType = if (bamType == 'unpaired) {
+ if (subType == 'nanopore) nanoporeLongRead
+ else if (subType == 'pacbio) pacbioLongRead
+ else 0
+ } else 0
+
def reader = {
//val r = new SAMFileReader(bamFile, new File(path + BamFile.indexSuffix))
val r = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile)
@@ -51,7 +62,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
def getSeqs = {
// returns an array of sequence records indexed by bam seq index
val r = reader
- val seqs = r.getFileHeader.getSequenceDictionary.getSequences
+ val seqs = r.getFileHeader.getSequenceDictionary.getSequences.asScala
r.close
val seqArray = new Array[SAMSequenceRecord](seqs.length)
for (s <- seqs) seqArray(s.getSequenceIndex) = s
@@ -82,7 +93,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
def getUnaligned = {
val r = reader
- val reads = r.queryUnmapped.filter(validateRead(_)).toList
+ val reads = r.queryUnmapped.asScala.filter(validateRead(_)).toList
r.close
reads
}
@@ -90,21 +101,21 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
def validateRead(read: SAMRecord) = {
(Pilon.nonPf || !read.getReadFailsVendorQualityCheckFlag) &&
(Pilon.duplicates || !read.getDuplicateReadFlag) &&
- !read.getNotPrimaryAlignmentFlag
+ !read.isSecondaryAlignment
}
def process(region: GenomeRegion, printInterval: Int = 100000) = {
-
val pileUpRegion = region.pileUpRegion
// Ugh. Is there a way for samtools to do the right thing here and get
// the pairs for which only one end overlaps? Adding 10k slop until
// that's figured out.
//pileUpRegion.addReads(bam.reader.queryOverlapping(name, start, stop))
+ val longRead = (bamType == 'unpaired) && (subType == 'nanopore || subType == 'pacbio)
val r = reader
val reads = r.queryOverlapping(region.name,
- (region.start-10000) max 0, (region.stop+10000) min region.contig.length)
+ (region.start-10000) max 0, (region.stop+10000) min region.contig.length).asScala
val readsBefore = pileUpRegion.readCount
val baseCountBefore = pileUpRegion.baseCount
val covBefore = pileUpRegion.coverage
@@ -118,7 +129,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
print("..." + lastLoc)
}
if (validateRead(read)) {
- val insertSize = pileUpRegion.addRead(read, region.contigBases)
+ val insertSize = pileUpRegion.addRead(read, region.contigBases, longReadType)
if (insertSize > huge) {
//if (Pilon.debug) println("WARNING: huge insert " + insertSize + " " + read)
}
@@ -282,7 +293,7 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
def scan(seqsOfInterest: Set[String]) = {
val r = reader
- for (read <- r.iterator) {
+ for (read <- r.iterator.asScala) {
if (!validateRead(read)) filtered += 1
else if (read.getReadUnmappedFlag) unmapped += 1
else {
@@ -326,8 +337,8 @@ class BamFile(val bamFile: File, var bamType: Symbol) {
def readsInRegion(region: Region) = {
val r = reader
- val reads = r.queryOverlapping(region.name, region.start, region.stop).filter(validateRead(_)).toList
- r.close
+ val reads = r.queryOverlapping(region.name, region.start, region.stop).asScala.filter(validateRead(_)).toList
+ r.close()
reads
}
=====================================
src/main/scala/org/broadinstitute/pilon/BaseSum.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
=====================================
src/main/scala/org/broadinstitute/pilon/Bases.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
=====================================
src/main/scala/org/broadinstitute/pilon/GapFiller.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,11 +14,12 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
-import collection.JavaConversions._
+
import collection.mutable.Map
import java.io.File
import htsjdk.samtools._
@@ -48,6 +49,8 @@ class GapFiller(val region: GenomeRegion) {
def assembleAcrossBreak(break: Region, isGap: Boolean) = {
// TODO: ugh, this is ugly and really wants to be re-written.
//val reads = if (break.size < 100 && isGap) recruitLocalReads(break) else recruitReads(break)
+ //val longReads = recruitLongReads(break)
+ //if (Pilon.verbose) println("%d long reads".format(longReads.length))
val reads = recruitReads(break)
var (start, pathsFromLeft, pathsFromRight, stop, loop) = assembleIntoBreak(break, reads)
if (Pilon.verbose) println("L=%d R=%d".format(pathsFromLeft.length, pathsFromRight.length))
@@ -360,17 +363,20 @@ class GapFiller(val region: GenomeRegion) {
minRadius max insertMean
}
- def recruitReadsOfType(reg: Region, bamType: Symbol) = {
- val bams = region.bamsOfType(bamType)
+ def recruitReadsFromBams(reg: Region, bams: List[BamFile]) = {
var reads = List[SAMRecord]()
for (b <- bams) {
reads ++= b.recruitFlankReads(reg)
}
- if (Pilon.debug)
- println("# Recruiting " + bamType.name + " reads: count=" + reads.length)
+ if (Pilon.debug)
+ println("# Recruiting reads from " + bams.toString + ": count=" + reads.length)
reads
}
+ def recruitReadsOfType(reg: Region, bamType: Symbol) = {
+ recruitReadsFromBams(reg, region.bamsOfType(bamType))
+ }
+
//def recruitFrags(reg: Region) = mateMapOfType(reg, 'frags).values.toList
def recruitFrags(reg: Region) = recruitReadsOfType(reg, 'frags)
@@ -384,11 +390,18 @@ class GapFiller(val region: GenomeRegion) {
reads
}
- def recruitUnpaired(reg: Region) = recruitReadsOfType(reg, 'unpaired)
+ def recruitUnpaired(reg: Region) =
+ if (!Pilon.longread) recruitReadsOfType(reg, 'unpaired) else Nil
def recruitLocalReads(reg: Region) = recruitFrags(reg) ++ recruitUnpaired(reg)
- def recruitReads(reg: Region) = recruitLocalReads(reg) ++ recruitJumps(reg)
+ def recruitLongReads(reg: Region) = {
+ recruitReadsFromBams(reg, region.bamsOfType('unpaired).filter(_.longReadType > 0))
+ }
+
+ def recruitReads(reg: Region) = {
+ recruitLocalReads(reg) ++ recruitJumps(reg) // ++ recruitLongReads(reg)
+ }
def writeBam(fileName: String, reads: List[SAMRecord]) {
val file = new File(fileName + ".sam")
=====================================
src/main/scala/org/broadinstitute/pilon/GenomeFile.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
=====================================
src/main/scala/org/broadinstitute/pilon/GenomeRegion.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
@@ -45,6 +46,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
var ambiguous = new Array[Boolean](size)
var changed = new Array[Boolean](size)
var deleted = new Array[Boolean](size)
+ var excluded = new Array[Boolean](size)
//var lowCoverage = new Array[Boolean](size)
//var lowConfidence = new Array[Boolean](size)
@@ -131,8 +133,17 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
//val bams = Map.empty[Symbol, BamRegion]
var bams = List[BamFile]()
+
def bamsOfType(bamType: Symbol) = bams filter { _.bamType == bamType }
+ def nanoporeBams = bamsOfType('unpaired) filter { _.subType == 'nanopore}
+
+ def pacbioBams = bamsOfType('unpaired) filter { _.subType == 'pacbio }
+
+ def fragBams = bamsOfType('frags)
+
+ def longReadOnly = Pilon.longread && fragBams.isEmpty
+
def initializePileUps = {
pileUpRegion = new PileUpRegion(name, start, stop)
}
@@ -203,6 +214,8 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
val meanCoverage = pileUpRegion.coverage
val nReads = pileUpRegion.readCount
+ if (longReadOnly) excludeMotifs()
+
minDepth = {
if (Pilon.minDepth >= 1) Pilon.minDepth.toInt
else (Pilon.minDepth * meanCoverage).round.toInt max Pilon.minMinDepth
@@ -247,7 +260,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
deleted(i + j) = true
pileUpRegion(i + j).deletions += pu.deletions
}
- } else if (b != r) {
+ } else if (b != r && bc.score > 0) {
// for ambiguous bases, fix them if --fix fixamb or if original base
// not one of the top two alternatives
if (homo) addChange(i, 'snp, pu)
@@ -304,38 +317,41 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
var amb = 0
for (i <- changeList) {
val (kind, pu) = changeMap(i)
- val rBase = refBase(locus(i))
+ val loc = locus(i)
+ val rBase = refBase(loc)
val bc = pu.baseCall
val cBase = bc.base
- kind match {
- case 'snp =>
- if (fixSnps) snpFixList ::= (locus(i), rBase.toString, cBase.toString)
- snps += 1
- case 'amb =>
- if (fixSnps) {
- if (Pilon.iupac) {
- // we put these on the small fix list because iupac codes can mess up assembly
- // flank anchor kmers
- smallFixList ::= (locus(i), rBase.toString, Bases.toIUPAC(cBase, bc.altBase).toString)
- } else {
- snpFixList ::= (locus(i), rBase.toString, cBase.toString)
+ if (!excluded(i)) {
+ kind match {
+ case 'snp =>
+ if (fixSnps) snpFixList ::= (loc, rBase.toString, cBase.toString)
+ snps += 1
+ case 'amb =>
+ if (fixSnps && !Pilon.longread) {
+ if (Pilon.iupac) {
+ // we put these on the small fix list because iupac codes can mess up assembly
+ // flank anchor kmers
+ smallFixList ::= (loc, rBase.toString, Bases.toIUPAC(cBase, bc.altBase).toString)
+ } else {
+ snpFixList ::= (loc, rBase.toString, cBase.toString)
+ }
+ amb += 1
}
- }
- amb += 1
- case 'ins =>
- val insert = bc.insertion
- if (fixIndels) smallFixList ::= (locus(i), "", insert)
- ins += 1
- insBases += insert.length
- case 'del =>
- val deletion = bc.deletion
- if (fixIndels) smallFixList ::= (locus(i), deletion, "")
- dels += 1
- delBases += deletion.length
+ case 'ins =>
+ val insert = bc.insertion
+ if (fixIndels) smallFixList ::= (loc, "", insert)
+ ins += 1
+ insBases += insert.length
+ case 'del =>
+ val deletion = bc.deletion
+ if (fixIndels) smallFixList ::= (loc, deletion, "")
+ dels += 1
+ delBases += deletion.length
+ }
}
if (Pilon.verbose) logChange(i)
}
-
+
// Report some stats
val nConfirmed = confirmed count {x => x}
val nonN = originalBases count {x => x != 'N'}
@@ -350,19 +366,19 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
if (Pilon.fixList contains 'indels) log("; corrected ") else log("; found ")
log(ins + " small insertions totaling " + insBases + " bases")
logln(", " + dels + " small deletions totaling " + delBases + " bases")
-
+
// Report large collapsed regions (possible segmental duplication)
val duplications = duplicationEvents
if (duplications.size > 0) {
for (d <- duplications) logln("Large collapsed region: " + d + " size " + d.size)
}
-
- // Apply SNP fixes prior to reassemblies...it helps by giving better anchor sequence!
+
+ // Apply SNP fixes prior to reassemblies...it helps by giving better anchor sequence!
// We can't change coords here, though, so no indels!
fixIssues(snpFixList)
// Try to fill gaps
- if ((Pilon.fixList contains 'gaps) && gaps.length > 0) {
+ if ((Pilon.fixList contains 'gaps) && gaps.nonEmpty) {
logln("# Attempting to fill gaps")
for (gap <- gaps) {
val filler = new GapFiller(this)
@@ -376,7 +392,7 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
// Try to reassemble around possible contiguity breaks, but stay away from gaps
val breaks = possibleBreaks
- if ((Pilon.fixList contains 'local) && !breaks.isEmpty) {
+ if ((Pilon.fixList contains 'local) && breaks.nonEmpty) {
logln("# Attempting to fix local continuity breaks")
for (break <- breaks) {
val filler = new GapFiller(this)
@@ -395,6 +411,37 @@ class GenomeRegion(val contig: ReferenceSequence, start: Int, stop: Int)
fixIssues(smallFixList ++ bigFixList)
}
+ def homoRun(loc: Int): Int = {
+ val baseAtLoc = baseAt(loc)
+ for (i <- loc to stop) {
+ if (baseAt(i) != baseAtLoc) return i - loc
+ }
+ return 1 + stop - loc
+ }
+
+ def nanoporeExclude(loc: Int) = {
+ inRegion(loc-2) && inRegion(loc+2) &&
+ baseAt(loc - 2) == 'C' &&
+ baseAt(loc - 1) == 'C' &&
+ baseAt(loc + 1) == 'G' &&
+ baseAt(loc + 2) == 'G'
+ }
+
+ def excludeMotifs() = {
+ val pb = pacbioBams.nonEmpty
+ val nano = nanoporeBams.nonEmpty
+ val lr = pb || nano
+
+ for (i <- 0 until size)
+ excluded(i) = homoRun(locus(i)) >= 4 || (nano && nanoporeExclude(locus(i)))
+ }
+
+ def longReadChangeFilter(loc: Int): Boolean = {
+ if (homoRun(index(loc)) >= 4) false
+ else if (Pilon.nanopore && nanoporeExclude(index(loc))) false
+ else true
+ }
+
val reassemblyFixes = Map.empty[Region, String]
def closeCircle(estimatedLength: Int) = {
=====================================
src/main/scala/org/broadinstitute/pilon/NormalDistribution.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,11 +14,11 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
-import collection.mutable.ArrayBuffer
import math.pow
// Calculates the moments of a sample distribution.
=====================================
src/main/scala/org/broadinstitute/pilon/PileUp.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
@@ -131,8 +132,8 @@ class PileUp {
class BaseCall {
val n = count
val (baseIndex, altBaseIndex) = {
- val order = qualSum.order
- (order(0), order(1))
+ val order = if (qSum > 0) qualSum.order else baseCount.order
+ (order(0), order(1))
}
val base = if (n > 0) indexBase(baseIndex) else 'N'
val baseSum = qualSum.sums(baseIndex)
@@ -143,7 +144,7 @@ class PileUp {
val homoScore = baseSum - (total - baseSum)
val halfTotal = total / 2
val heteroScore = total - (halfTotal - baseSum).abs - (halfTotal - altBaseSum).abs
- val homo = homoScore >= heteroScore
+ val homo = /* homoScore > 0 && */ homoScore >= heteroScore
val score = if (mqSum > 0) (homoScore - heteroScore).abs * n / mqSum else 0
(homo, score)
}
@@ -180,12 +181,12 @@ class PileUp {
}
def insertCall = {
- if (insertions > 0) hetIndelCall(insertionList, insPct)
+ if (insertions > 2 && insertions > deletions) hetIndelCall(insertionList, insPct)
else ("", true)
}
def deletionCall = {
- if (deletions > 0) hetIndelCall(deletionList, delPct)
+ if (deletions > 2 && deletions > insertions) hetIndelCall(deletionList, delPct)
else ("", true)
}
@@ -216,7 +217,7 @@ class PileUp {
map(indelStr) = map.getOrElse(indelStr, 0) + 1
}
val winner = map.toSeq.sortBy({ _._2 }).last
- if (winner._2 < indelList.length / 2) return ("", true)
+ if (winner._2 < 2 || winner._2 <= indelList.length / 2) return ("", true)
val winStr = winner._1
if (winStr contains 'N') return ("", true)
if (Pilon.oldIndel) {
=====================================
src/main/scala/org/broadinstitute/pilon/PileUpRegion.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,11 +14,12 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
import htsjdk.samtools._
import Utils._
@@ -98,7 +99,7 @@ class PileUpRegion(name: String, start: Int, stop: Int)
pileups(i).insertSize /= pileups(i).physCov
}
- def addRead(r: SAMRecord, refBases: Array[Byte]) = {
+ def addRead(r: SAMRecord, refBases: Array[Byte], longRead: Int = 0) = {
val length = r.getReadLength
val bases = r.getReadBases
val mq = r.getMappingQuality
@@ -116,12 +117,29 @@ class PileUpRegion(name: String, start: Int, stop: Int)
def baseString(bases: Array[Byte]) = bases map { _.toChar } mkString ""
def trusted(offset: Int) = offset >= trustedFlank && length - trustedFlank > offset
- val cigarElements = cigar.getCigarElements
+ def homoRun(i0: Int): Int = {
+ val baseAtLoc = refBases(i0)
+ for (i <- i0 + 1 until refBases.length) {
+ if (refBases(i) != baseAtLoc) return i - i0
+ }
+ return refBases.length - i0
+ }
+
+ def nanoporeExclude(i0: Int) = {
+ inRegion(locus(i0-2)) && inRegion(locus(i0+2)) &&
+ refBases(i0 - 2) == 'C' &&
+ refBases(i0 - 1) == 'C' &&
+ refBases(i0 + 1) == 'G' &&
+ refBases(i0 + 2) == 'G'
+ }
+
+ val cigarElements = cigar.getCigarElements.asScala
// First count up number of clipped bases, so we can use to weight alignment
val clippedBases = (cigarElements map {e => if (e.getOperator == CigarOperator.S) e.getLength else 0}).sum
// de-rate mq proportionally to fraction of bases clipped
val adjMq = Utils.roundDiv(mq * (length - clippedBases), length)
+ val indelMq = if (longRead > 0) (adjMq min 8) else adjMq
// parse read alignment and add to pileups
for (ele <- cigarElements) {
@@ -139,7 +157,8 @@ class PileUpRegion(name: String, start: Int, stop: Int)
iloc -= 1
insertion = insertion.slice(len - 1, len) ++ insertion.slice(0, len - 1)
}
- pileups(index(iloc)).addInsertion(insertion, quals(readOffset), adjMq)
+ if (!(longRead > 0 && homoRun(iloc) >= 4))
+ pileups(index(iloc)).addInsertion(insertion, quals(readOffset), indelMq)
}
case CigarOperator.D =>
var dloc = locus
@@ -158,7 +177,9 @@ class PileUpRegion(name: String, start: Int, stop: Int)
add(dloc + len, base, qual, adjMq, valid)
}
}
- pileups(index(dloc)).addDeletion(refBases.slice(dloc - 1, dloc + len - 1), quals(readOffset), adjMq)
+ if (!(longRead > 0 && ((homoRun(index(dloc)) >= 4)
+ || (longRead == BamFile.nanoporeLongRead && nanoporeExclude(index(dloc))))))
+ pileups(index(dloc)).addDeletion(refBases.slice(dloc - 1, dloc + len - 1), quals(readOffset), indelMq)
}
case CigarOperator.M | CigarOperator.EQ | CigarOperator.X =>
for (i <- 0 until len) {
@@ -166,10 +187,8 @@ class PileUpRegion(name: String, start: Int, stop: Int)
if (trusted(rOff)) {
val locusPlus = locus + i
val base = bases(rOff).toChar
- val qual = quals(rOff)
- if (inRegion(locusPlus)) {
- add(locusPlus, base, qual, adjMq, valid)
- }
+ val qual = if (longRead == BamFile.nanoporeLongRead && nanoporeExclude(index(locusPlus))) 0 else quals(rOff)
+ add(locusPlus, base, qual, adjMq, valid)
}
}
case CigarOperator.S =>
@@ -200,28 +219,10 @@ class PileUpRegion(name: String, start: Int, stop: Int)
physCovIncr(aStart, aEnd, insert, paired, valid)
}
- def addReads(reads: SAMRecordIterator, refBases: Array[Byte], printInterval: Int = 100000) = {
- var lastLoc = 0
- for (r <- reads) {
- val loc = addRead(r, refBases)
- if (Pilon.verbose && printInterval > 0 && loc > lastLoc + printInterval) {
- lastLoc = printInterval * (loc / printInterval)
- print("..." + lastLoc)
- }
- }
- }
-
def dump = {
for (i <- 0 to size - 1) println(locus(i) + ": " + pileups(i))
}
- def callRegion = {
- for (i <- 0 until size) {
- var bc = pileups(i).baseCall
- if (bc.q < 20 && !bc.homo) println(locus(i) + ": bc=" + bc + " pu=" + pileups(i))
- }
- }
-
def postProcess = {
// To be called after all reads have been added to pileUps
computePhysCov
=====================================
src/main/scala/org/broadinstitute/pilon/Pilon.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
@@ -40,7 +41,7 @@ object Pilon {
var debug = false
// heuristics and control parameters
var chunkSize = 10000000
- var defaultQual: Byte = 15
+ var defaultQual: Byte = 10
var diploid = false
var duplicates = false
var dumpReads = false
@@ -56,16 +57,19 @@ object Pilon {
var multiClosure = false
var nonPf = false
var oldIndel = false
+ var longread = false
+ var pacbio = false
+ var nanopore = false
var strays = true
var threads = 1
var trSafe = true
// Global computed data
var novelContigs = List[String]()
-
+
// for logging to output files
var commandArgs = Array[String]()
-
+
def main(args: Array[String]) {
commandArgs = args
println(Version.version)
@@ -85,7 +89,7 @@ object Pilon {
// Stray computation is expensive up front, so only turn it on
// if we're doing local reassembly
strays &= (fixList contains 'gaps) || (fixList contains 'local) || (fixList contains 'scaffolds)
-
+
if (bamFiles.length == 0) {
println(usage)
sys.exit(0)
@@ -100,11 +104,11 @@ object Pilon {
println("Fixing " + (fixList map {_.name} mkString(", ")))
genome.processRegions(bamFiles)
- if (tracks) {
+ if (tracks) {
val tracks = new Tracks(genome)
tracks.standardTracks
}
-
+
}
def optionParse(list: List[String]) : Unit = {
@@ -182,6 +186,14 @@ object Pilon {
case "--multiclosure" :: tail => // undocumented experimental option
multiClosure = true
optionParse(tail)
+ case "--nanopore" :: value :: tail =>
+ bamFiles ::= new BamFile(new File(value), 'unpaired, 'nanopore)
+ longread = true
+ nanopore = true
+ optionParse(tail)
+ case "--nonpf" :: tail =>
+ nonPf = true
+ optionParse(tail)
case "--oldindel" :: tail => // undocumented experimental option
oldIndel = true
optionParse(tail)
@@ -191,8 +203,10 @@ object Pilon {
case "--outdir" :: value :: tail =>
outdir = value
optionParse(tail)
- case "--nonpf" :: tail =>
- nonPf = true
+ case "--pacbio" :: value :: tail =>
+ bamFiles ::= new BamFile(new File(value), 'unpaired, 'pacbio)
+ longread = true
+ pacbio = true
optionParse(tail)
case "--targets" :: value :: tail =>
targets = value
@@ -275,16 +289,16 @@ object Pilon {
// Ugly, but thank you http://stackoverflow.com/questions/17865823
def setDefaultParallelism(numThreads: Int): Unit = {
- val parPkgObj = scala.collection.parallel.`package`
- val defaultTaskSupportField = parPkgObj.getClass.getDeclaredFields.find{
- _.getName == "defaultTaskSupport"
- }.get
+ val parPkgObj = scala.collection.parallel.`package`
+ val defaultTaskSupportField = parPkgObj.getClass.getDeclaredFields.find{
+ _.getName == "defaultTaskSupport"
+ }.get
- defaultTaskSupportField.setAccessible(true)
- defaultTaskSupportField.set(
- parPkgObj,
- new scala.collection.parallel.ForkJoinTaskSupport(
- new scala.concurrent.forkjoin.ForkJoinPool(numThreads)))
+ defaultTaskSupportField.setAccessible(true)
+ defaultTaskSupportField.set(
+ parPkgObj,
+ new scala.collection.parallel.ForkJoinTaskSupport(
+ new scala.concurrent.forkjoin.ForkJoinPool(numThreads)))
}
val usage = """
@@ -373,7 +387,7 @@ object Pilon {
Print version string and exit.
HEURISTICS:
--defaultqual qual
- Assumes bases are of this quality if quals are no present in input BAMs (default 15).
+ Assumes bases are of this quality if quals are no present in input BAMs (default 10).
--flank nbases
Controls how much of the well-aligned reads will be used; this many bases at each
end of the good reads will be ignored (default 10).
=====================================
src/main/scala/org/broadinstitute/pilon/Region.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
=====================================
src/main/scala/org/broadinstitute/pilon/Scaffold.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
@@ -27,8 +28,8 @@ package org.broadinstitute.pilon
*/
-import scala.collection.JavaConversions._
-import collection.mutable.{ Map, HashMap, Set, HashSet }
+import scala.collection.JavaConverters._
+import collection.mutable.{ Map, HashMap }
import htsjdk.samtools._
@@ -347,7 +348,7 @@ object Scaffold {
val reader = bam.reader
var eaList: List[EndAlignment] = Nil
- for (read <- reader.iterator if (!read.getReadUnmappedFlag)) {
+ for (read <- reader.iterator.asScala if (!read.getReadUnmappedFlag)) {
val contig = scaffolds(read.getReferenceIndex())
val ca = new EndAlignment(read, contig)
//println(ca)
=====================================
src/main/scala/org/broadinstitute/pilon/Tracks.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
@@ -27,9 +28,6 @@ package org.broadinstitute.pilon
*/
import java.io._
-import scala.collection.JavaConversions._
-import htsjdk.samtools._
-
class Tracks(val reference: GenomeFile) {
def standardTracks = {
makeBedTrack("Pilon.bed", "Pilon")
=====================================
src/main/scala/org/broadinstitute/pilon/Utils.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
=====================================
src/main/scala/org/broadinstitute/pilon/Vcf.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
@@ -71,7 +72,8 @@ class Vcf(val file: File, val contigsWithSizes: List[(String, Int)] = Nil) {
def close = writer.close
def writeRecord(region: GenomeRegion, index: Int,
- embedded: Boolean = false, indelOk: Boolean = true): Unit = {
+ embedded: Boolean = false, indelOkArg: Boolean = true): Unit = {
+ val indelOk = indelOkArg && index > 0
val locus = region.locus(index)
val pileUp = region.pileUpRegion(index)
val bc = pileUp.baseCall
=====================================
src/main/scala/org/broadinstitute/pilon/Version.scala
=====================================
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2015 Broad Institute, Inc.
+ * Copyright (c) 2012-2018 Broad Institute, Inc.
*
* This file is part of Pilon.
*
@@ -14,6 +14,7 @@
*
* You should have received a copy of the GNU General Public License
* along with Pilon. If not, see <http://www.gnu.org/licenses/>.
+ *
*/
package org.broadinstitute.pilon
View it on GitLab: https://salsa.debian.org/med-team/pilon/commit/debb98f957c35c9e47f4f20a0d26b0279554a394
--
View it on GitLab: https://salsa.debian.org/med-team/pilon/commit/debb98f957c35c9e47f4f20a0d26b0279554a394
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181208/2a022445/attachment-0001.html>
More information about the debian-med-commit
mailing list