[med-svn] [varscan] 02/07: New upstream version 2.4.3
Andreas Tille
tille at debian.org
Sun Dec 4 07:41:40 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository varscan.
commit 061067cf377c8a075bbd46fc98209aea1954c99b
Author: Andreas Tille <tille at debian.org>
Date: Sun Dec 4 08:26:08 2016 +0100
New upstream version 2.4.3
---
sf/varscan/CallMpileup.java | 8 +-
sf/varscan/CallPileup.java | 2 +-
sf/varscan/Comparison.java | 10 +-
sf/varscan/CopyCaller.java | 5 +-
sf/varscan/Copynumber.java | 696 +++++++++++++++++++++-------------------
sf/varscan/Coverage.java | 6 +-
sf/varscan/FilterSomatic.java | 6 +-
sf/varscan/FilterVariants.java | 6 +-
sf/varscan/FishersExact.java | 2 +-
sf/varscan/FpFilter.java | 305 ++++++++++++++----
sf/varscan/LimitVariants.java | 6 +-
sf/varscan/ProcessSomatic.java | 6 +-
sf/varscan/ReadCounts.java | 6 +-
sf/varscan/SmartFileReader.java | 37 +++
sf/varscan/Somatic.java | 253 ++++++++++++---
sf/varscan/Trio.java | 105 ++++--
sf/varscan/VarScan.java | 43 +--
17 files changed, 979 insertions(+), 523 deletions(-)
diff --git a/sf/varscan/CallMpileup.java b/sf/varscan/CallMpileup.java
index ebba0b3..2f25809 100644
--- a/sf/varscan/CallMpileup.java
+++ b/sf/varscan/CallMpileup.java
@@ -10,7 +10,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.lang.Math;
@@ -18,7 +18,7 @@ import java.lang.Math;
/**
* A class for calling variants or consensus bases from a pileup file
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -121,7 +121,7 @@ public class CallMpileup {
// Parse sample list //
if(samplefile.exists())
{
- BufferedReader in = new BufferedReader(new FileReader(samplefile));
+ BufferedReader in = new BufferedReader(new SmartFileReader(samplefile));
String line = "";
if(in.ready())
{
@@ -782,7 +782,7 @@ public class CallMpileup {
// For the smaller deletion, determine ref bases to add //
if(varAllele.length() < maxDelSize)
{
- String varEntry = maxDelBases.replace(varAllele, "");
+ String varEntry = maxDelBases.replaceFirst(varAllele, "");
varColumn = varColumn + refBase + varEntry;
}
else
diff --git a/sf/varscan/CallPileup.java b/sf/varscan/CallPileup.java
index 72c6bd0..39cbf48 100644
--- a/sf/varscan/CallPileup.java
+++ b/sf/varscan/CallPileup.java
@@ -14,7 +14,7 @@ import java.util.HashMap;
/**
* A class for calling variants or consensus bases from a pileup file
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
diff --git a/sf/varscan/Comparison.java b/sf/varscan/Comparison.java
index 1ac4e4b..82139f7 100644
--- a/sf/varscan/Comparison.java
+++ b/sf/varscan/Comparison.java
@@ -10,7 +10,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.BitSet;
@@ -18,7 +18,7 @@ import java.util.BitSet;
/**
* A class for comparing positions (variants) between two files
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -55,8 +55,8 @@ public class Comparison {
outFile = new PrintStream( new FileOutputStream(outFileName) );
- BufferedReader file1 = new BufferedReader(new FileReader(fileName1));
- BufferedReader file2 = new BufferedReader(new FileReader(fileName2));
+ BufferedReader file1 = new BufferedReader(new SmartFileReader(fileName1));
+ BufferedReader file2 = new BufferedReader(new SmartFileReader(fileName2));
if(!(file1.ready() && file2.ready()))
{
@@ -241,7 +241,7 @@ public class Comparison {
try
{
- BufferedReader infile = new BufferedReader(new FileReader(fileName));
+ BufferedReader infile = new BufferedReader(new SmartFileReader(fileName));
String line = "";
int lineCounter = 0;
diff --git a/sf/varscan/CopyCaller.java b/sf/varscan/CopyCaller.java
index 0eaabff..a9b3efb 100644
--- a/sf/varscan/CopyCaller.java
+++ b/sf/varscan/CopyCaller.java
@@ -10,17 +10,14 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.FileOutputStream;
-import java.io.FileReader;
import java.io.PrintStream;
import java.text.DecimalFormat;
-import java.util.Arrays;
-import java.util.BitSet;
import java.util.HashMap;
/**
* A class for calling/GC-adjusting copy number variants from raw somatic copynumber output
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
diff --git a/sf/varscan/Copynumber.java b/sf/varscan/Copynumber.java
index 443c8ae..ad94d4f 100644
--- a/sf/varscan/Copynumber.java
+++ b/sf/varscan/Copynumber.java
@@ -2,7 +2,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.text.DecimalFormat;
@@ -11,7 +11,7 @@ import java.util.HashMap;
/**
* A class for calling copy number variants between a tumor and a matched normal sample
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -62,6 +62,7 @@ public class Copynumber {
int maxSegmentSize = 100;
double dataRatio = 1.00;
double pValueThreshold = 0.01;
+ long numBases = 0;
// Try adjusting any provided parameters based on user inut //
try
@@ -199,6 +200,7 @@ public class Copynumber {
while ((line = in.readLine()) != null)
{
+ numBases++;
// Begin try-catch for line parsing //
@@ -208,7 +210,16 @@ public class Copynumber {
// Verify expected pileup format //
- if(lineContents.length > 5 && lineContents[0].length() > 0 && lineContents[1].length() > 0 && lineContents[2].length() > 0 && lineContents[3].length() > 0)
+// if(lineContents.length > 5 && lineContents[0].length() > 0 && lineContents[1].length() > 0 && lineContents[2].length() > 0 && lineContents[3].length() > 0)
+ if(lineContents.length < 8)
+ {
+ // This is an incomplete mpileup line, so skip it. If verbose, throw a warning //
+ if(params.containsKey("verbose"))
+ {
+ System.err.println("Incomplete mpileup at line " + numBases + "; line being skipped.");
+ }
+ }
+ else
{
sharedPositions++;
@@ -224,15 +235,25 @@ public class Copynumber {
// Parse normal, which should be first sample //
int normalOffset = 3;
- int pileupDepthNormal = Integer.parseInt(lineContents[normalOffset]);
- //String normalBases = lineContents[normalOffset + 1];
- String normalQualities = lineContents[normalOffset + 2];
+ int pileupDepthNormal = 0;
+ String normalQualities = "";
+ if(lineContents.length >= (normalOffset + 1))
+ {
+ pileupDepthNormal = Integer.parseInt(lineContents[normalOffset]);
+ //String normalBases = lineContents[normalOffset + 1];
+ normalQualities = lineContents[normalOffset + 2];
+ }
// Parse tumor, which should be second sample //
int tumorOffset = 6;
- int pileupDepthTumor = Integer.parseInt(lineContents[tumorOffset]);
- //String tumorBases = lineContents[tumorOffset + 1];
- String tumorQualities = lineContents[tumorOffset + 2];
+ int pileupDepthTumor = 0;
+ String tumorQualities = "";
+ if(lineContents.length >= (tumorOffset + 2 + 1))
+ {
+ pileupDepthTumor = Integer.parseInt(lineContents[tumorOffset]);
+ //String tumorBases = lineContents[tumorOffset + 1];
+ tumorQualities = lineContents[tumorOffset + 2];
+ }
// If either sample met the minimum coverage and both had at least one read //
@@ -372,11 +393,7 @@ public class Copynumber {
}
}
- else
- {
- System.err.println("Error: Invalid format or not enough samples in mpileup: " + line + "\n");
- return;
- }
+
}
catch(Exception e)
{
@@ -473,6 +490,7 @@ public class Copynumber {
System.err.println("Normal Pileup: " + normalPileupFile);
System.err.println("Tumor Pileup: " + tumorPileupFile);
+ System.err.println("NOTICE: While dual input files are still supported, using a single mpileup file (normal-tumor) with the --mpileup 1 setting is strongly recommended.");
// Set parameter defaults //
@@ -555,8 +573,8 @@ public class Copynumber {
// Prepare file readers for normal and tumor pileups //
- BufferedReader normal = new BufferedReader(new FileReader(normalPileupFile));
- BufferedReader tumor = new BufferedReader(new FileReader(tumorPileupFile));
+ BufferedReader normal = new BufferedReader(new SmartFileReader(normalPileupFile));
+ BufferedReader tumor = new BufferedReader(new SmartFileReader(tumorPileupFile));
if(!(normal.ready() && tumor.ready()))
{
@@ -612,347 +630,365 @@ public class Copynumber {
DecimalFormat oneDigit = new DecimalFormat("#0.0");
DecimalFormat threeDigits = new DecimalFormat("#0.000");
+ try {
+ // Get first line of Normal //
- // Get first line of Normal //
+ if((lineNormal = normal.readLine()) != null)
+ {
+ String[] normalContents = lineNormal.split("\t");
- if((lineNormal = normal.readLine()) != null)
- {
- String[] normalContents = lineNormal.split("\t");
-
- if(normalContents.length > 1)
- {
- chromNormal = normalContents[0];
- posNormal = Integer.parseInt(normalContents[1]);
- }
- }
-
- // Loop through lines in tumor //
+ if(normalContents.length > 1)
+ {
+ chromNormal = normalContents[0];
+ posNormal = Integer.parseInt(normalContents[1]);
+ }
+ }
- while ((lineTumor = tumor.readLine()) != null)
- {
- tumorPositions++;
- String[] tumorContents = lineTumor.split("\t");
+ // Loop through lines in tumor //
- if(tumorContents.length > 1)
+ while ((lineTumor = tumor.readLine()) != null)
{
- chromTumor = tumorContents[0];
- posTumor = Integer.parseInt(tumorContents[1]);
- }
+ tumorPositions++;
+ String[] tumorContents = lineTumor.split("\t");
- // Parse normal lines until we get the same chromosome //
- boolean flagEOF = false;
- boolean normalWasReset = false;
-
- // Advance in normal file if tumor is changed but normal is not, or if tumor is higher //
- while(!chromNormal.equals(chromTumor) && !chromTumor.equals(prevChromTumor) && !flagEOF && (chromNormal.equals(prevChromTumor) || inSortOrder(chromNormal, chromTumor)))
- {
- //System.err.println("Normal (" + chromNormal + ") catching up to " + chromTumor);
- // Get next line from normal pileup //
- if((lineNormal = normal.readLine()) != null)
- {
- String[] normalContents = lineNormal.split("\t");
-
- if(normalContents.length > 1)
- {
- chromNormal = normalContents[0];
- posNormal = Integer.parseInt(normalContents[1]);
- }
- }
- else
- {
- flagEOF = true;
- }
+ if(tumorContents.length > 1)
+ {
+ chromTumor = tumorContents[0];
+ posTumor = Integer.parseInt(tumorContents[1]);
+ }
+ // Parse normal lines until we get the same chromosome //
+ boolean flagEOF = false;
+ boolean normalWasReset = false;
- }
+ // Advance in normal file if tumor is changed but normal is not, or if tumor is higher //
+ while(!chromNormal.equals(chromTumor) && !chromTumor.equals(prevChromTumor) && !flagEOF && (chromNormal.equals(prevChromTumor) || inSortOrder(chromNormal, chromTumor)))
+ {
+ //System.err.println("Normal (" + chromNormal + ") catching up to " + chromTumor);
+ // Get next line from normal pileup //
+ if((lineNormal = normal.readLine()) != null)
+ {
+ String[] normalContents = lineNormal.split("\t");
- // If chromosomes match and are non-blank, attempt to get matching positions //
- if(chromNormal.equals(chromTumor) && !chromNormal.equals(""))
- {
- normalWasReset = false;
- // Seek to matching Normal Position //
-
- while(chromNormal.equals(chromTumor) && posNormal < posTumor && ((lineNormal = normal.readLine()) != null))
- {
- String[] normalContents = lineNormal.split("\t");
- if(normalContents.length > 1)
- {
- chromNormal = normalContents[0];
- posNormal = Integer.parseInt(normalContents[1]);
-
- // If still less than tumor position, look for homozygous del //
- if(posNormal < posTumor)
+ if(normalContents.length > 1)
{
- int pileupDepthNormal = 0;
- String normalQualities = "";
+ chromNormal = normalContents[0];
+ posNormal = Integer.parseInt(normalContents[1]);
+ }
+ }
+ else
+ {
+ flagEOF = true;
+ }
- // Pileup Files have 6-7 columns //
- if(normalContents.length <= 7)
- {
- pileupDepthNormal = Integer.parseInt(normalContents[3]);
- normalQualities = normalContents[5];
- }
- // Pileup lines in CNS files have 10-11 columns
- else if (normalContents.length >= 10 && normalContents.length <= 11)
- {
- pileupDepthNormal = Integer.parseInt(normalContents[7]);
- normalQualities = normalContents[9];
- }
- }
- else
- {
+ }
+
+ // If chromosomes match and are non-blank, attempt to get matching positions //
+ if(chromNormal.equals(chromTumor) && !chromNormal.equals(""))
+ {
+ normalWasReset = false;
+ // Seek to matching Normal Position //
+ while(chromNormal.equals(chromTumor) && posNormal < posTumor && ((lineNormal = normal.readLine()) != null))
+ {
+ String[] normalContents = lineNormal.split("\t");
+ if(normalContents.length > 1)
+ {
+ chromNormal = normalContents[0];
+ posNormal = Integer.parseInt(normalContents[1]);
+
+ // If still less than tumor position, look for homozygous del //
+ if(posNormal < posTumor)
+ {
+ int pileupDepthNormal = 0;
+ String normalQualities = "";
+
+ try
+ {
+ // Pileup Files have 6-7 columns //
+ if(normalContents.length <= 7)
+ {
+ pileupDepthNormal = Integer.parseInt(normalContents[3]);
+ normalQualities = normalContents[5];
+ }
+ // Pileup lines in CNS files have 10-11 columns
+ else if (normalContents.length >= 10 && normalContents.length <= 11)
+ {
+ pileupDepthNormal = Integer.parseInt(normalContents[7]);
+ normalQualities = normalContents[9];
+ }
+ }
+ catch(Exception e)
+ {
+
+ }
+
+ }
+ else
+ {
+
+ }
}
- }
- }
-
- // Seek to matching Tumor Position //
-
- while(chromNormal.equals(chromTumor) && posTumor < posNormal && ((lineTumor = tumor.readLine()) != null))
- {
- tumorContents = lineTumor.split("\t");
- if(tumorContents.length > 1)
- {
- chromTumor = tumorContents[0];
- posTumor = Integer.parseInt(tumorContents[1]);
- }
- }
-
- // Proceed if normal and tumor positions match //
-
- if(chromNormal.equals(chromTumor) && chromNormal.equals(chromTumor) && posNormal == posTumor)
- {
- //stats.put("sharedPositions", (stats.get("sharedPositions") + 1));
- sharedPositions++;
- refBase = tumorContents[2];
-
-// Parse out base qualities //
- String[] normalContents = lineNormal.split("\t");
- int pileupDepthNormal = 0;
- int pileupDepthTumor = 0;
- String normalQualities = "";
- String tumorQualities = "";
-
- // Pileup Files have 6-7 columns //
- if(normalContents.length >= 6 && normalContents.length <= 7)
- {
- pileupDepthNormal = Integer.parseInt(normalContents[3]);
- normalQualities = normalContents[5];
- }
- // Pileup lines in CNS files have 10-11 columns
- else if (normalContents.length >= 10 && normalContents.length <= 11)
- {
- pileupDepthNormal = Integer.parseInt(normalContents[7]);
- normalQualities = normalContents[9];
- }
-
- // Pileup Files have 6-7 columns //
- if(tumorContents.length >= 6 && tumorContents.length <= 7)
- {
- tumorQualities = tumorContents[5];
- pileupDepthTumor = Integer.parseInt(tumorContents[3]);
- }
- // Pileup lines in CNS files have 10-11 columns
- else if (tumorContents.length >= 10 && tumorContents.length <= 11)
- {
- tumorQualities = tumorContents[9];
- pileupDepthTumor = Integer.parseInt(tumorContents[7]);
- }
-
- // If either sample met the minimum coverage and both had at least one read //
-
-// if((pileupDepthNormal >= minCoverage || pileupDepthTumor >= minCoverage) && normalQualities.length() > 0 && tumorQualities.length() > 0)
-
- // We want the normal sample to meet the minimum coverage because that's the comparator //
- if(pileupDepthNormal >= minCoverage && normalQualities.length() > 0) // && tumorQualities.length() > 0)
- {
- comparedPositions++;
-// Get the depth of bases above minimum quality //
-
- int normalDepth = VarScan.qualityDepth(normalQualities, minBaseQual);
- int tumorDepth = VarScan.qualityDepth(tumorQualities, minBaseQual);
-
- // Determine if we have a copy changepoint //
- // If this base is not contiguous with the copyRegion
- // If the normal or tumor depth changes //
-
- int diffNormal = Math.abs(copyDepthNormal - normalDepth);
- int diffTumor = Math.abs(copyDepthTumor - tumorDepth);
- int posDiff = posTumor - copyStop;
-
- // DETERMINE IF WE CONTINUE THIS REGION OR PROCESS IT AND START A NEW ONE //
-
- boolean continueFlag = false;
-
- // If chromosomes differ or contiguity broken, process the region //
-
- if(posDiff > 2 || !(copyChrom.equals(chromTumor)))
- {
- continueFlag = false;
- }
- else
- {
- if(copyPositions >= maxSegmentSize)
- {
- continueFlag = false;
- }
- else if(diffNormal <= 2 && diffTumor <= 2)
- {
- continueFlag = true;
- }
- else
- {
- // Do a Fisher's exact test on the copy number changes. ##
-
- double changePvalue = VarScan.getSignificance(copyDepthNormal, copyDepthTumor, normalDepth, tumorDepth);
-
- // If depth change not significant, continue with region //
- if(changePvalue >= pValueThreshold)
- {
- continueFlag = true;
- }
- else
- {
- continueFlag = false;
- }
-
- }
- }
-
-
- // If continuing, extend this region and don't process yet //
-
- if(continueFlag)
- {
- copySumNormal += normalDepth;
- copySumTumor += tumorDepth;
- copyPositions++;
- if(refBase.equals("G") || refBase.equals("C") || refBase.equals("g") || refBase.equals("c"))
- copyPositionsGC++;
- copyStop = posTumor;
- }
-
- // Otherwise, process this region (if it qualifies) and start a new one //
-
- else
- {
- if(copyPositions >= minSegmentSize)
- {
- rawCopySegments++;
- String regionResults = processCopyRegion(copyChrom, copyStart, copyStop, copyPositions, copyPositionsGC, copySumNormal, copySumTumor, minCoverage, dataRatio);
-
- if(regionResults.length() > 0)
- {
- outCopySegments.println(regionResults);
- goodCopySegments++;
- }
- }
-
- // Start a new copyNumber region //
- copyChrom = chromTumor;
- copyStart = posTumor;
- copyStop = posTumor;
- copyDepthNormal = normalDepth;
- copyDepthTumor = tumorDepth;
- copySumNormal = normalDepth;
- copySumTumor = tumorDepth;
- copyPositions = 1;
- if(refBase.equals("G") || refBase.equals("C") || refBase.equals("g") || refBase.equals("c"))
- copyPositionsGC = 1;
- else
- copyPositionsGC = 0;
- }
-
-
- }
- else
- {
- // If minimum coverage was not met, print region //
- // If we had a copyNumber region that met minimum coverage, report it //
- if(copyPositions >= minSegmentSize)
- {
- rawCopySegments++;
- String regionResults = processCopyRegion(copyChrom, copyStart, copyStop, copyPositions, copyPositionsGC, copySumNormal, copySumTumor, minCoverage, dataRatio);
-
- if(regionResults.length() > 0)
- {
- outCopySegments.println(regionResults);
- goodCopySegments++;
- }
- }
+ }
- // Reset the copyNumber region //
- copyChrom = "";
- copyStart = 0;
- copyStop = 0;
- copyDepthNormal = 0;
- copyDepthTumor = 0;
- copySumNormal = 0;
- copySumTumor = 0;
- copyPositions = 0;
- copyPositionsGC = 0;
- }
-
- // Record this chromosome //
-
- prevChromNormal = chromNormal;
- prevChromTumor = chromTumor;
- }
- else
- {
- //System.err.println("Failed to match positions " + chromNormal + " " + posNormal + " to Tumor " + chromTumor + " " + posTumor);
- }
- }
- // If they're in sort order, do nothing so that tumor can catch up //
- else if(inSortOrder(chromNormal, chromTumor))
- {
- System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
- }
- // If we reached the end of the normal file but never saw this chromosome, //
- // fast-forward until tumor chromosome changes and reset normal file //
- else if(flagEOF)
- {
- flagEOF = false;
+ // Seek to matching Tumor Position //
- while(prevChromTumor.equals(chromTumor) && !flagEOF)
- {
- if((lineTumor = tumor.readLine()) != null)
- {
+ while(chromNormal.equals(chromTumor) && posTumor < posNormal && ((lineTumor = tumor.readLine()) != null))
+ {
tumorContents = lineTumor.split("\t");
-
if(tumorContents.length > 1)
{
chromTumor = tumorContents[0];
posTumor = Integer.parseInt(tumorContents[1]);
}
- }
- else
- {
- flagEOF = true;
- }
- }
+ }
- // Reset the normal file if we've already passed this chromosome in normal //
+ // Proceed if normal and tumor positions match //
- if(!flagEOF && !normalWasReset)
- {
- if(inSortOrder(chromNormal, chromTumor))
- {
- System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
- }
- else
- {
- System.err.println("Resetting normal file because " + chromNormal + " > " + chromTumor);
- normalWasReset = true;
- normal.close();
- normal = new BufferedReader(new FileReader(normalPileupFile));
- }
+ if(chromNormal.equals(chromTumor) && chromNormal.equals(chromTumor) && posNormal == posTumor)
+ {
+ //stats.put("sharedPositions", (stats.get("sharedPositions") + 1));
+ sharedPositions++;
+ refBase = tumorContents[2];
+
+// Parse out base qualities //
+ String[] normalContents = lineNormal.split("\t");
+ int pileupDepthNormal = 0;
+ int pileupDepthTumor = 0;
+ String normalQualities = "";
+ String tumorQualities = "";
+
+ // Pileup Files have 6-7 columns //
+ if(normalContents.length >= 6 && normalContents.length <= 7)
+ {
+ pileupDepthNormal = Integer.parseInt(normalContents[3]);
+ normalQualities = normalContents[5];
+ }
+ // Pileup lines in CNS files have 10-11 columns
+ else if (normalContents.length >= 10 && normalContents.length <= 11)
+ {
+ pileupDepthNormal = Integer.parseInt(normalContents[7]);
+ normalQualities = normalContents[9];
+ }
+
+ // Pileup Files have 6-7 columns //
+ if(tumorContents.length >= 6 && tumorContents.length <= 7)
+ {
+ tumorQualities = tumorContents[5];
+ pileupDepthTumor = Integer.parseInt(tumorContents[3]);
+ }
+ // Pileup lines in CNS files have 10-11 columns
+ else if (tumorContents.length >= 10 && tumorContents.length <= 11)
+ {
+ tumorQualities = tumorContents[9];
+ pileupDepthTumor = Integer.parseInt(tumorContents[7]);
+ }
+
+ // If either sample met the minimum coverage and both had at least one read //
+
+// if((pileupDepthNormal >= minCoverage || pileupDepthTumor >= minCoverage) && normalQualities.length() > 0 && tumorQualities.length() > 0)
+
+ // We want the normal sample to meet the minimum coverage because that's the comparator //
+ if(pileupDepthNormal >= minCoverage && normalQualities.length() > 0) // && tumorQualities.length() > 0)
+ {
+ comparedPositions++;
+// Get the depth of bases above minimum quality //
+
+ int normalDepth = VarScan.qualityDepth(normalQualities, minBaseQual);
+ int tumorDepth = VarScan.qualityDepth(tumorQualities, minBaseQual);
+
+ // Determine if we have a copy changepoint //
+ // If this base is not contiguous with the copyRegion
+ // If the normal or tumor depth changes //
+
+ int diffNormal = Math.abs(copyDepthNormal - normalDepth);
+ int diffTumor = Math.abs(copyDepthTumor - tumorDepth);
+ int posDiff = posTumor - copyStop;
+
+ // DETERMINE IF WE CONTINUE THIS REGION OR PROCESS IT AND START A NEW ONE //
+
+ boolean continueFlag = false;
+
+ // If chromosomes differ or contiguity broken, process the region //
+
+ if(posDiff > 2 || !(copyChrom.equals(chromTumor)))
+ {
+ continueFlag = false;
+ }
+ else
+ {
+ if(copyPositions >= maxSegmentSize)
+ {
+ continueFlag = false;
+ }
+ else if(diffNormal <= 2 && diffTumor <= 2)
+ {
+ continueFlag = true;
+ }
+ else
+ {
+ // Do a Fisher's exact test on the copy number changes. ##
+
+ double changePvalue = VarScan.getSignificance(copyDepthNormal, copyDepthTumor, normalDepth, tumorDepth);
+
+ // If depth change not significant, continue with region //
+ if(changePvalue >= pValueThreshold)
+ {
+ continueFlag = true;
+ }
+ else
+ {
+ continueFlag = false;
+ }
+
+ }
+ }
+
+
+ // If continuing, extend this region and don't process yet //
+
+ if(continueFlag)
+ {
+ copySumNormal += normalDepth;
+ copySumTumor += tumorDepth;
+ copyPositions++;
+ if(refBase.equals("G") || refBase.equals("C") || refBase.equals("g") || refBase.equals("c"))
+ copyPositionsGC++;
+ copyStop = posTumor;
+ }
+
+ // Otherwise, process this region (if it qualifies) and start a new one //
+
+ else
+ {
+ if(copyPositions >= minSegmentSize)
+ {
+ rawCopySegments++;
+ String regionResults = processCopyRegion(copyChrom, copyStart, copyStop, copyPositions, copyPositionsGC, copySumNormal, copySumTumor, minCoverage, dataRatio);
+
+ if(regionResults.length() > 0)
+ {
+ outCopySegments.println(regionResults);
+ goodCopySegments++;
+ }
+ }
+
+ // Start a new copyNumber region //
+ copyChrom = chromTumor;
+ copyStart = posTumor;
+ copyStop = posTumor;
+ copyDepthNormal = normalDepth;
+ copyDepthTumor = tumorDepth;
+ copySumNormal = normalDepth;
+ copySumTumor = tumorDepth;
+ copyPositions = 1;
+ if(refBase.equals("G") || refBase.equals("C") || refBase.equals("g") || refBase.equals("c"))
+ copyPositionsGC = 1;
+ else
+ copyPositionsGC = 0;
+ }
+
+
+ }
+ else
+ {
+ // If minimum coverage was not met, print region //
+ // If we had a copyNumber region that met minimum coverage, report it //
+ if(copyPositions >= minSegmentSize)
+ {
+ rawCopySegments++;
+ String regionResults = processCopyRegion(copyChrom, copyStart, copyStop, copyPositions, copyPositionsGC, copySumNormal, copySumTumor, minCoverage, dataRatio);
+
+ if(regionResults.length() > 0)
+ {
+ outCopySegments.println(regionResults);
+ goodCopySegments++;
+ }
+ }
+
+ // Reset the copyNumber region //
+ copyChrom = "";
+ copyStart = 0;
+ copyStop = 0;
+ copyDepthNormal = 0;
+ copyDepthTumor = 0;
+ copySumNormal = 0;
+ copySumTumor = 0;
+ copyPositions = 0;
+ copyPositionsGC = 0;
+ }
+
+ // Record this chromosome //
+
+ prevChromNormal = chromNormal;
+ prevChromTumor = chromTumor;
+ }
+ else
+ {
+ //System.err.println("Failed to match positions " + chromNormal + " " + posNormal + " to Tumor " + chromTumor + " " + posTumor);
+ }
+ }
+ // If they're in sort order, do nothing so that tumor can catch up //
+ else if(inSortOrder(chromNormal, chromTumor))
+ {
+ System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
+ }
+ // If we reached the end of the normal file but never saw this chromosome, //
+ // fast-forward until tumor chromosome changes and reset normal file //
+ else if(flagEOF)
+ {
+ flagEOF = false;
+
+ while(prevChromTumor.equals(chromTumor) && !flagEOF)
+ {
+ if((lineTumor = tumor.readLine()) != null)
+ {
+ tumorContents = lineTumor.split("\t");
+
+ if(tumorContents.length > 1)
+ {
+ chromTumor = tumorContents[0];
+ posTumor = Integer.parseInt(tumorContents[1]);
+ }
+ }
+ else
+ {
+ flagEOF = true;
+ }
+ }
+
+ // Reset the normal file if we've already passed this chromosome in normal //
+
+ if(!flagEOF && !normalWasReset)
+ {
+ if(inSortOrder(chromNormal, chromTumor))
+ {
+ System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
+ }
+ else
+ {
+ System.err.println("Resetting normal file because " + chromNormal + " > " + chromTumor);
+ normalWasReset = true;
+ normal.close();
+ normal = new BufferedReader(new SmartFileReader(normalPileupFile));
+ }
+
+ }
+ }
- }
}
- }
+ }
+ catch (Exception e)
+ {
+ System.err.println("Exception encountered while parsing normal/tumor files: " + e.getMessage());
+ System.err.println("Note: It is HIGHLY recommended that you use a two-sample mpileup input rather than separate pileup files for normal/tumor.");
+ return;
+ }
+
+
+
normal.close();
diff --git a/sf/varscan/Coverage.java b/sf/varscan/Coverage.java
index bc83bf0..0ebe4b8 100644
--- a/sf/varscan/Coverage.java
+++ b/sf/varscan/Coverage.java
@@ -10,7 +10,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.BitSet;
@@ -19,7 +19,7 @@ import java.util.HashMap;
/**
* A class for assessing coverage of target regions (experimental)
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -332,7 +332,7 @@ public class Coverage {
try
{
- BufferedReader infile = new BufferedReader(new FileReader(fileName));
+ BufferedReader infile = new BufferedReader(new SmartFileReader(fileName));
String line = "";
int lineCounter = 0;
diff --git a/sf/varscan/FilterSomatic.java b/sf/varscan/FilterSomatic.java
index 02ff284..28e4470 100644
--- a/sf/varscan/FilterSomatic.java
+++ b/sf/varscan/FilterSomatic.java
@@ -11,7 +11,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
@@ -19,7 +19,7 @@ import java.util.HashMap;
/**
* A class for filtering VarScan variant predictions
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -417,7 +417,7 @@ public class FilterSomatic {
File infile = new File(filename);
if(infile.exists())
{
- BufferedReader in = new BufferedReader(new FileReader(infile));
+ BufferedReader in = new BufferedReader(new SmartFileReader(infile));
if(in.ready())
{
diff --git a/sf/varscan/FilterVariants.java b/sf/varscan/FilterVariants.java
index c684df0..0c0d960 100644
--- a/sf/varscan/FilterVariants.java
+++ b/sf/varscan/FilterVariants.java
@@ -11,14 +11,14 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.util.HashMap;
/**
* A class for filtering VarScan variant predictions
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -544,7 +544,7 @@ public class FilterVariants {
File infile = new File(filename);
if(infile.exists())
{
- BufferedReader in = new BufferedReader(new FileReader(infile));
+ BufferedReader in = new BufferedReader(new SmartFileReader(infile));
if(in.ready())
{
diff --git a/sf/varscan/FishersExact.java b/sf/varscan/FishersExact.java
index c9827ae..a85befe 100644
--- a/sf/varscan/FishersExact.java
+++ b/sf/varscan/FishersExact.java
@@ -19,7 +19,7 @@ package net.sf.varscan;
* which might not be very accurate if the marginal is very uneven or if there is a small value (less than five)
* in one of the cells.
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
diff --git a/sf/varscan/FpFilter.java b/sf/varscan/FpFilter.java
index e7ab8c8..d4701fe 100644
--- a/sf/varscan/FpFilter.java
+++ b/sf/varscan/FpFilter.java
@@ -11,7 +11,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.util.HashMap;
@@ -19,7 +19,7 @@ import java.util.HashMap;
/**
* A class for applying the false-positive filter to VarScan variant calls
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -28,52 +28,75 @@ public class FpFilter {
public FpFilter(String[] args)
{
- // Define the usage message //
- String usage = "USAGE: java -jar VarScan.jar fpfilter [variant file] [readcount file] OPTIONS\n" +
- "\tvariant file - A file of SNPs or indels in VarScan-native or VCF format\n" +
- "\treadcount file - The output file from bam-readcount for those positions\n" +
- "\t***For detailed filtering instructions, please visit http://varscan.sourceforge.net***\n" +
- "\n" +
- "\tOPTIONS:\n" +
- "\t--output-file\t\tOptional output file for filter-pass variants\n" +
- "\t--filtered-file\t\tOptional output file for filter-fail variants\n" +
- "\t--keep-failures\t\tIf set to 1, include failures in the output file\n\n" +
- "\t--min-var-count\t\tMinimum number of variant-supporting reads [4]\n" +
- "\t--min-var-freq\t\tMinimum variant allele frequency [0.05]\n" +
- "\t--min-var-readpos\t\tMinimum average read position of var-supporting reads [0.10]\n" +
- "\t--min-var-dist3\t\tMinimum average relative distance to effective 3' end [0.10]\n" +
- "\t--min-strandedness\tMinimum fraction of variant reads from each strand [0.01]\n" +
- "\t--min-strand-reads\tMinimum allele depth required to perform the strand tests [5]\n" +
- "\t--min-ref-basequal\t\tMinimum average base quality for ref allele [30]\n" +
- "\t--min-var-basequal\t\tMinimum average base quality for var allele [30]\n" +
- "\t--max-rl-diff\t\tMaximum average relative read length difference (ref - var) [0.25]\n" +
- "\t--max-var-mmqs\t\tMaximum mismatch quality sum of variant-supporting reads [100]\n" +
- "\t--max-mmqs-diff\t\tMaximum average mismatch quality sum (var - ref) [50]\n" +
- "\t--min-ref-mapqual\t\tMinimum average mapping quality for ref allele [30]\n" +
- "\t--min-var-mapqual\t\tMinimum average mapping quality for var allele [30]\n" +
- "\t--max-mapqual-diff\tMaximum average mapping quality (ref - var) [50]";
-
-
// Set parameter defaults //
+ double minRefReadPos = 0.10;
+ double minRefDist3 = 0.10;
double minVarReadPos = 0.10;
double minVarDist3 = 0.10;
double minStrandedness = 0.01;
int minStrandReads = 5;
+
+ int minRefAvgRL = 90;
+ int minVarAvgRL = 90;
double maxReadLenDiff = 0.25;
double minVarFreq = 0.05;
int minVarCount = 4;
-
- int maxVarMMQS = 150;
- int maxMMQSdiff = 150;
-
- int minRefBaseQual = 30;
- int minVarBaseQual = 30;
- int minRefMapQual = 30;
- int minVarMapQual = 30;
+ int minVarCountLC = 2; // Min variant count for low-coverage sites
+ double maxSomaticP = 0.05;
+ int maxSomaticPdepth = 10;
+
+ int maxVarMMQS = 100;
+ int maxRefMMQS = 100;
+ int maxMMQSdiff = 50;
+ int minMMQSdiff = 0;
+ int maxBAQdiff = 50;
+
+ int minRefBaseQual = 15;
+ int minVarBaseQual = 15;
+ int minRefMapQual = 15;
+ int minVarMapQual = 15;
int maxMapQualDiff = 50;
+ // Define the usage message //
+ String usage = "USAGE: java -jar VarScan.jar fpfilter [variant file] [readcount file] OPTIONS\n" +
+ "\tvariant file - A file of SNPs or indels in VarScan-native or VCF format\n" +
+ "\treadcount file - The output file from bam-readcount for those positions\n" +
+ "\t***For detailed filtering instructions, please visit http://varscan.sourceforge.net***\n" +
+ "\n" +
+ "\tOPTIONS:\n" +
+ "\t--output-file\t\tOptional output file for filter-pass variants\n" +
+ "\t--filtered-file\t\tOptional output file for filter-fail variants\n" +
+ "\t--dream3-settings\tIf set to 1, optimizes filter parameters based on TCGA-ICGC DREAM-3 SNV Challenge results\n" +
+ "\t--keep-failures\t\tIf set to 1, includes failures in the output file\n\n" +
+ "\n" +
+ "\tFILTERING PARAMETERS:\n" +
+ "\t--min-var-count\t\tMinimum number of variant-supporting reads [" + minVarCount + "]\n" +
+ "\t--min-var-count-lc\tMinimum number of variant-supporting reads when depth below somaticPdepth [" + minVarCountLC + "]\n" +
+ "\t--min-var-freq\t\tMinimum variant allele frequency [" + minVarFreq + "]\n" +
+ "\t--max-somatic-p\t\tMaximum somatic p-value [" + maxSomaticP + "]\n" +
+ "\t--max-somatic-p-depth\tDepth required to test max somatic p-value [" + maxSomaticPdepth + "]\n" +
+ "\t--min-ref-readpos\tMinimum average read position of ref-supporting reads [" + minRefReadPos + "]\n" +
+ "\t--min-var-readpos\tMinimum average read position of var-supporting reads [" + minVarReadPos + "]\n" +
+ "\t--min-ref-dist3\t\tMinimum average distance to effective 3' end (ref) [" + minRefDist3 + "]\n" +
+ "\t--min-var-dist3\t\tMinimum average distance to effective 3' end (var) [" + minVarDist3 + "]\n" +
+ "\t--min-strandedness\tMinimum fraction of variant reads from each strand [" + minStrandedness + "]\n" +
+ "\t--min-strand-reads\tMinimum allele depth required to perform the strand tests [" + minStrandReads + "]\n" +
+ "\t--min-ref-basequal\tMinimum average base quality for ref allele [" + minRefBaseQual + "]\n" +
+ "\t--min-var-basequal\tMinimum average base quality for var allele [" + minVarBaseQual + "]\n" +
+ "\t--max-basequal-diff\t\tMaximum average base quality diff (ref - var) [" + maxBAQdiff + "]\n" +
+ "\t--min-ref-avgrl\t\tMinimum average trimmed read length for ref allele [" + minRefAvgRL + "]\n" +
+ "\t--min-var-avgrl\t\tMinimum average trimmed read length for var allele [" + minVarAvgRL + "]\n" +
+ "\t--max-rl-diff\t\tMaximum average relative read length difference (ref - var) [" + maxReadLenDiff + "]\n" +
+ "\t--max-ref-mmqs\t\tMaximum mismatch quality sum of reference-supporting reads [" + maxRefMMQS + "]\n" +
+ "\t--max-var-mmqs\t\tMaximum mismatch quality sum of variant-supporting reads [" + maxVarMMQS + "]\n" +
+ "\t--min-mmqs-diff\t\tMinimum average mismatch quality sum (var - ref) [" + minMMQSdiff + "]\n" +
+ "\t--max-mmqs-diff\t\tMaximum average mismatch quality sum (var - ref) [" + maxMMQSdiff + "]\n" +
+ "\t--min-ref-mapqual\tMinimum average mapping quality for ref allele [" + minRefMapQual + "]\n" +
+ "\t--min-var-mapqual\tMinimum average mapping quality for var allele [" + minVarMapQual + "]\n" +
+ "\t--max-mapqual-diff\tMaximum average mapping quality (ref - var) [" + maxMapQualDiff + "]";
+
String outFileName = "";
String filteredFileName = "";
@@ -90,45 +113,94 @@ public class FpFilter {
if(params.containsKey("filtered-file"))
filteredFileName = params.get("filtered-file");
+ if(params.containsKey("dream3-settings"))
+ {
+ // Alter the parameter for optimal DREAM-3 performance //
+ minVarCount = 3;
+ minVarCountLC = 1;
+ minStrandedness = 0;
+ minVarBaseQual = 30;
+ minRefReadPos = 0.20;
+ minRefDist3 = 0.20;
+ minVarReadPos = 0.15;
+ minVarDist3 = 0.15;
+ maxReadLenDiff = 0.05;
+ maxMapQualDiff = 10;
+ minRefMapQual = 20;
+ minVarMapQual = 30;
+ maxVarMMQS = 100;
+ maxRefMMQS = 50;
+ }
+
+ if(params.containsKey("min-var-count"))
+ minVarCount = Integer.parseInt(params.get("min-var-count"));
+
+ if(params.containsKey("min-var-count-lc"))
+ minVarCountLC = Integer.parseInt(params.get("min-var-count-lc"));
+
if(params.containsKey("min-var-freq"))
minVarFreq = Double.parseDouble(params.get("min-var-freq"));
+ if(params.containsKey("max-somatic-p"))
+ maxSomaticP = Double.parseDouble(params.get("max-somatic-p"));
+
+ if(params.containsKey("max-somatic-p-depth"))
+ maxSomaticPdepth = Integer.parseInt(params.get("max-somatic-p-depth"));
+
+ if(params.containsKey("min-ref-readpos"))
+ minRefReadPos = Double.parseDouble(params.get("min-ref-readpos"));
+
if(params.containsKey("min-var-readpos"))
minVarReadPos = Double.parseDouble(params.get("min-var-readpos"));
+ if(params.containsKey("min-ref-dist3"))
+ minRefDist3 = Double.parseDouble(params.get("min-ref-dist3"));
+
if(params.containsKey("min-var-dist3"))
minVarDist3 = Double.parseDouble(params.get("min-var-dist3"));
- if(params.containsKey("max-rl-diff"))
- maxReadLenDiff = Double.parseDouble(params.get("max-rl-diff"));
-
if(params.containsKey("min-strandedness"))
minStrandedness = Double.parseDouble(params.get("min-strandedness"));
if(params.containsKey("min-strand-reads"))
minStrandReads = Integer.parseInt(params.get("min-strand-reads"));
- if(params.containsKey("min-var-count"))
- minVarCount = Integer.parseInt(params.get("min-var-count"));
-
if(params.containsKey("min-ref-mapqual"))
minRefMapQual = Integer.parseInt(params.get("min-ref-mapqual"));
if(params.containsKey("min-var-mapqual"))
minVarMapQual = Integer.parseInt(params.get("min-var-mapqual"));
+ if(params.containsKey("min-ref-avgrl"))
+ minRefAvgRL = Integer.parseInt(params.get("min-ref-avgrl"));
+
+ if(params.containsKey("min-var-avgrl"))
+ minVarAvgRL = Integer.parseInt(params.get("min-var-avgrl"));
+
+ if(params.containsKey("max-rl-diff"))
+ maxReadLenDiff = Double.parseDouble(params.get("max-rl-diff"));
+
if(params.containsKey("min-ref-basequal"))
minRefBaseQual = Integer.parseInt(params.get("min-ref-basequal"));
if(params.containsKey("min-var-basequal"))
minVarBaseQual = Integer.parseInt(params.get("min-var-basequal"));
+ if(params.containsKey("max-basequal-diff"))
+ maxBAQdiff = Integer.parseInt(params.get("max-basequal-diff"));
+
if(params.containsKey("max-var-mmqs"))
maxVarMMQS = Integer.parseInt(params.get("max-var-mmqs"));
+ if(params.containsKey("max-ref-mmqs"))
+ maxRefMMQS = Integer.parseInt(params.get("max-ref-mmqs"));
+
if(params.containsKey("max-mmqs-diff"))
maxMMQSdiff = Integer.parseInt(params.get("max-mmqs-diff"));
+ if(params.containsKey("min-mmqs-diff"))
+ minMMQSdiff = Integer.parseInt(params.get("min-mmqs-diff"));
+
if(params.containsKey("max-mapqual-diff"))
maxMapQualDiff = Integer.parseInt(params.get("max-mapqual-diff"));
}
@@ -168,16 +240,23 @@ public class FpFilter {
stats.put("numFailVarCount", 0);
stats.put("numFailVarFreq", 0);
stats.put("numFailStrand", 0);
+ stats.put("numFailRefReadPos", 0);
+ stats.put("numFailRefDist3", 0);
stats.put("numFailVarReadPos", 0);
stats.put("numFailVarDist3", 0);
stats.put("numFailVarMMQS", 0);
+ stats.put("numFailRefMMQS", 0);
stats.put("numFailMMQSdiff", 0);
+ stats.put("numFailMinMMQSdiff", 0);
stats.put("numFailRefMapQual", 0);
stats.put("numFailVarMapQual", 0);
stats.put("numFailRefBaseQual", 0);
stats.put("numFailVarBaseQual", 0);
+ stats.put("numFailMaxBAQdiff", 0);
stats.put("numFailMapQualDiff", 0);
stats.put("numFailReadLenDiff", 0);
+ stats.put("numFailRefAvgRL", 0);
+ stats.put("numFailVarAvgRL", 0);
try
@@ -210,7 +289,7 @@ public class FpFilter {
if(variantFile.exists())
{
- BufferedReader in = new BufferedReader(new FileReader(variantFile));
+ BufferedReader in = new BufferedReader(new SmartFileReader(variantFile));
if(in.ready())
{
@@ -278,7 +357,7 @@ public class FpFilter {
}
else
{
- String filterHeader = "ref_reads\tvar_reads\tref_strand\tvar_strand\tref_basequal\tvar_basequal\tref_readpos\tvar_readpos\tref_dist3\tvar_dist3\tref_mapqual\tvar_mapqual\tmapqual_diff\tref_mmqs\tvar_mmqs\tmmqs_diff\tfilter_status";
+ String filterHeader = "ref_reads\tvar_reads\tref_strand\tvar_strand\tref_basequal\tvar_basequal\tref_readpos\tvar_readpos\tref_dist3\tvar_dist3\tref_mapqual\tvar_mapqual\tmapqual_diff\tref_mmqs\tvar_mmqs\tmmqs_diff\t\tref_avg_rl\tvar_avg_rl\tavg_rl_diff\tfilter_status";
if(params.containsKey("output-file"))
outFile.println(line + "\t" + filterHeader);
if(params.containsKey("filtered-file"))
@@ -322,6 +401,7 @@ public class FpFilter {
double varStrandedness = -1;
double refStrandedness = -1;
double mmqsDiff = 0;
+ double basequalDiff = 0;
double mapQualDiff = 0;
double avgReadLenDiff = 0;
@@ -363,6 +443,13 @@ public class FpFilter {
{
ref = lineContents[2];
String cns = lineContents[3];
+ if(ref.matches("-?\\d+(\\.\\d+)?"))
+ {
+ // Adjust file lists chr_start and chr_stop instead of position //
+ ref = lineContents[3];
+ cns = lineContents[4];
+ }
+
if(cns.length() > 1)
{
isIndel = true;
@@ -476,16 +563,81 @@ public class FpFilter {
refMMQS = Double.parseDouble(refContents[9]);
refRL = Double.parseDouble(refContents[12]);
refDist3 = Double.parseDouble(refContents[13]);
+
+ // ONLY APPLY THESE FILTERS IF WE HAVE 2+ REF-SUPPORTING READS //
+
+ if(refReads >= 2)
+ {
+ // Variant read position for ref-supporting reads //
+
+ if(refPos < minRefReadPos)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "RefReadPos";
+ stats.put("numFailRefReadPos", (stats.get("numFailRefReadPos") + 1));
+ }
+
+ if(refDist3 < minRefDist3)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "RefDist3";
+ stats.put("numFailRefDist3", (stats.get("numFailRefDist3") + 1));
+ }
+
+ // Look at ref average mapping quality //
+
+ if(refReads > 0 && refMapQual < minRefMapQual)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "RefMapQual";
+ stats.put("numFailRefMapQual", (stats.get("numFailRefMapQual") + 1));
+
+ }
+
+ // Look at ref MMQS //
+
+ if(refMMQS > maxRefMMQS)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "RefMMQS";
+ stats.put("numFailRefMMQS", (stats.get("numFailRefMMQS") + 1));
+ }
+
+ // Look at ref average read length //
+
+ if(!isIndel && refReads > 0 && refRL < minRefAvgRL)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "RefAvgRL";
+ stats.put("numFailRefAvgRL", (stats.get("numFailRefAvgRL") + 1));
+ }
+ }
+
+
}
- // If variant fails any criteria we have at this point, fail it //
+ // APPLY VARIANT-READ-BASED FILTERS //
+ int totalDepth = refReads + varReads;
if(varReads < minVarCount)
{
- if(failReason.length() > 0)
- failReason += ",";
- failReason += "VarCount";
- stats.put("numFailVarCount", (stats.get("numFailVarCount") + 1));
+ // Pass this site if variant-supporting read count meets min for low-cov //
+ if(totalDepth < maxSomaticPdepth && varReads >= minVarCountLC)
+ {
+ // PASS//
+ }
+ else
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarCount";
+ stats.put("numFailVarCount", (stats.get("numFailVarCount") + 1));
+ }
}
// Compute variant allele frequency //
@@ -530,14 +682,6 @@ public class FpFilter {
// Apply minimum average mapqual checks //
- if(refReads > 0 && refMapQual < minRefMapQual)
- {
- if(failReason.length() > 0)
- failReason += ",";
- failReason += "RefMapQual";
- stats.put("numFailRefMapQual", (stats.get("numFailRefMapQual") + 1));
-
- }
if(varReads > 0 && varMapQual < minVarMapQual)
{
@@ -566,6 +710,14 @@ public class FpFilter {
stats.put("numFailVarBaseQual", (stats.get("numFailVarBaseQual") + 1));
}
+ if(!isIndel && varReads > 0 && varRL < minVarAvgRL)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarAvgRL";
+ stats.put("numFailVarAvgRL", (stats.get("numFailVarAvgRL") + 1));
+ }
+
// IF we have enough reads supporting the variant, check strand //
if(refReads >= minStrandReads && refReads > 0)
@@ -588,11 +740,23 @@ public class FpFilter {
}
+ // REF AND VAR COMPARISONS: REQUIRE 2+ READS FOR EACH ALLELE //
+
if(refReads >= 2 && varReads >= 2)
{
stats.put("numWithReads1", (stats.get("numWithReads1") + 1));
// Only make difference comparisons if we had reference reads //
+ basequalDiff = refBaseQual - varBaseQual;
+
+ if(basequalDiff > maxBAQdiff)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "MaxBAQdiff";
+ stats.put("numFailMaxBAQdiff", (stats.get("numFailMaxBAQdiff") + 1));
+ }
+
mmqsDiff = varMMQS - refMMQS;
if(mmqsDiff > maxMMQSdiff)
@@ -603,6 +767,14 @@ public class FpFilter {
stats.put("numFailMMQSdiff", (stats.get("numFailMMQSdiff") + 1));
}
+ if(mmqsDiff < minMMQSdiff)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "MinMMQSdiff";
+ stats.put("numFailMinMMQSdiff", (stats.get("numFailMinMMQSdiff") + 1));
+ }
+
// Compare average mapping qualities //
mapQualDiff = refMapQual - varMapQual;
@@ -629,13 +801,6 @@ public class FpFilter {
}
-
- // Check to see if it failed any filters. If so, mark it for failure //
-
- if(failReason.length() > 0)
- filterFlag = true;
-
-
}
catch (Exception e)
{
@@ -644,6 +809,12 @@ public class FpFilter {
return;
}
+ // Check to see if it failed any filters. If so, mark it for failure //
+
+ if(failReason.length() > 0)
+ filterFlag = true;
+
+
}
else
{
@@ -674,6 +845,7 @@ public class FpFilter {
filterColumns += "\t" + refBaseQual + "\t" + varBaseQual + "\t" + refPos + "\t" + varPos;
filterColumns += "\t" + refDist3 + "\t" + varDist3 + "\t" + refMapQual + "\t" + varMapQual;
filterColumns += "\t" + twoDigits.format(mapQualDiff) + "\t" + refMMQS + "\t" + varMMQS + "\t" + twoDigits.format(mmqsDiff);
+ filterColumns += "\t" + refRL + "\t" + varRL + "\t" + twoDigits.format(avgReadLenDiff);
if(filterFlag)
{
@@ -746,8 +918,11 @@ public class FpFilter {
System.err.println("\t" + stats.get("numFailVarCount") + " failed minimim variant count < " + minVarCount);
System.err.println("\t" + stats.get("numFailVarFreq") + " failed minimum variant freq < " + minVarFreq);
System.err.println("\t" + stats.get("numFailStrand") + " failed minimum strandedness < " + minStrandedness);
+ System.err.println("\t" + stats.get("numFailRefReadPos") + " failed minimum reference readpos < " + minRefReadPos);
System.err.println("\t" + stats.get("numFailVarReadPos") + " failed minimum variant readpos < " + minVarReadPos);
+ System.err.println("\t" + stats.get("numFailRefDist3") + " failed minimum reference dist3 < " + minRefDist3);
System.err.println("\t" + stats.get("numFailVarDist3") + " failed minimum variant dist3 < " + minVarDist3);
+ System.err.println("\t" + stats.get("numFailRefMMQS") + " failed maximum reference MMQS > " + maxRefMMQS);
System.err.println("\t" + stats.get("numFailVarMMQS") + " failed maximum variant MMQS > " + maxVarMMQS);
System.err.println("\t" + stats.get("numFailMMQSdiff") + " failed maximum MMQS diff (var - ref) > " + maxMMQSdiff);
System.err.println("\t" + stats.get("numFailMapQualDiff") + " failed maximum mapqual diff (ref - var) > " + maxMapQualDiff);
@@ -801,7 +976,7 @@ public class FpFilter {
File infile = new File(filename);
if(infile.exists())
{
- BufferedReader in = new BufferedReader(new FileReader(infile));
+ BufferedReader in = new BufferedReader(new SmartFileReader(infile));
if(in.ready())
{
diff --git a/sf/varscan/LimitVariants.java b/sf/varscan/LimitVariants.java
index 75852ba..b9a3c75 100644
--- a/sf/varscan/LimitVariants.java
+++ b/sf/varscan/LimitVariants.java
@@ -10,7 +10,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.BitSet;
@@ -18,7 +18,7 @@ import java.util.BitSet;
/**
* A class for restricting a list of variants to a given set of positions or regions
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -223,7 +223,7 @@ public class LimitVariants {
try
{
- BufferedReader infile = new BufferedReader(new FileReader(fileName));
+ BufferedReader infile = new BufferedReader(new SmartFileReader(fileName));
String line = "";
int lineCounter = 0;
diff --git a/sf/varscan/ProcessSomatic.java b/sf/varscan/ProcessSomatic.java
index 7af5bc3..3669d04 100644
--- a/sf/varscan/ProcessSomatic.java
+++ b/sf/varscan/ProcessSomatic.java
@@ -2,7 +2,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.util.BitSet;
import java.util.HashMap;
@@ -10,7 +10,7 @@ import java.util.HashMap;
/**
* A class for processing VarScan output by somatic status and confidence
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -348,7 +348,7 @@ public class ProcessSomatic {
try
{
- BufferedReader infile = new BufferedReader(new FileReader(fileName));
+ BufferedReader infile = new BufferedReader(new SmartFileReader(fileName));
String line = "";
int lineCounter = 0;
diff --git a/sf/varscan/ReadCounts.java b/sf/varscan/ReadCounts.java
index f0edc36..e3b9c6c 100644
--- a/sf/varscan/ReadCounts.java
+++ b/sf/varscan/ReadCounts.java
@@ -10,7 +10,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.BitSet;
@@ -19,7 +19,7 @@ import java.util.HashMap;
/**
* A class for obtaining read counts for a list of variants
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -355,7 +355,7 @@ public class ReadCounts {
try
{
- BufferedReader infile = new BufferedReader(new FileReader(fileName));
+ BufferedReader infile = new BufferedReader(new SmartFileReader(fileName));
String line = "";
int lineCounter = 0;
diff --git a/sf/varscan/SmartFileReader.java b/sf/varscan/SmartFileReader.java
new file mode 100644
index 0000000..34e65f1
--- /dev/null
+++ b/sf/varscan/SmartFileReader.java
@@ -0,0 +1,37 @@
+package net.sf.varscan;
+
+import java.io.File;
+import java.io.FileDescriptor;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+
+/**
+ * A class for bug-free file reading with the Java API contributed by Bina Technologies
+ *
+ * @version 2.4
+ *
+ * @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
+ *
+ */
+public class SmartFileReader extends FileReader {
+
+ public SmartFileReader(File file) throws FileNotFoundException {
+ super(file);
+ }
+
+ public SmartFileReader(FileDescriptor fd) {
+ super(fd);
+ }
+
+ public SmartFileReader(String fileName) throws FileNotFoundException {
+ super(fileName);
+ }
+
+ public boolean ready() throws IOException {
+ super.ready();
+ // Implemented because sun.nio.cs.StreamDecoder doesn't implement ready() properly.
+ return true;
+ }
+
+}
diff --git a/sf/varscan/Somatic.java b/sf/varscan/Somatic.java
index f652dd3..8e7bf58 100644
--- a/sf/varscan/Somatic.java
+++ b/sf/varscan/Somatic.java
@@ -18,7 +18,7 @@ import java.lang.Math.*;
/**
* A class for determining somatic status of variants from Normal/Tumor pileup files
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -111,6 +111,7 @@ public class Somatic {
double somaticPvalue = 0.05; //1.0e-04;
double minFreqForHom = 0.75;
boolean doStrandFilter = true;
+ boolean doValidation = false;
// Try adjusting any provided parameters based on user inut //
try
@@ -181,6 +182,21 @@ public class Somatic {
doStrandFilter = false;
}
+ if(params.containsKey("validation"))
+ {
+ try{
+ int validation = Integer.parseInt(params.get("validation"));
+ if(validation > 0)
+ doValidation = true;
+ else
+ doValidation = false;
+ }
+ catch(Exception e)
+ {
+ doValidation = true;
+ }
+ }
+
// System.err.println("Min coverage:\t" + minCoverage);
System.err.println("Min coverage:\t" + minCoverageNormal + "x for Normal, " + minCoverageTumor + "x for Tumor");
System.err.println("Min reads2:\t" + minReads2);
@@ -192,7 +208,7 @@ public class Somatic {
System.err.println("Min avg qual:\t" + minAvgQual);
System.err.println("P-value thresh:\t" + pValueThreshold);
System.err.println("Somatic p-value:\t" + somaticPvalue);
- if(params.containsKey("validation"))
+ if(doValidation)
System.err.println("Validation mode: on");
}
@@ -312,7 +328,7 @@ public class Somatic {
outIndel.println(vcfHeader);
}
- if(params.containsKey("validation"))
+ if(doValidation)
{
outValidation = new PrintStream( new FileOutputStream(outputName + ".validation") );
if(!params.containsKey("no-headers") && !params.containsKey("output-vcf"))
@@ -434,25 +450,130 @@ public class Somatic {
int totalDepth = pileupDepthNormal + pileupDepthTumor;
- if(allele2.startsWith("+"))
+ // Determine the ref and var columns and alleles //
+ String varBases = allele2.replace("/", ",");
+ String refColumn = "";
+ String varColumn = "";
+
+ int normalVarAlleleNumber = 1;
+ int tumorVarAlleleNumber = 1;
+ if(varBases.contains(","))
+ {
+ tumorVarAlleleNumber = 2;
+ }
+
+ // Handle complex positions with multiple alleles including at least one indel //
+
+ if(varBases.contains(",") && (varBases.contains("-") || varBases.contains("+")))
+ {
+ // Multi-allele indel //
+ int maxDelSize = 0;
+ String maxDelBases = "";
+ // Go through each varAllele to find longest deletion //
+ String[] varBaseContents = varBases.split(",");
+ for(String varAllele : varBaseContents)
+ {
+ // Address possible deletions//
+ if(varAllele.startsWith("-"))
+ {
+ varAllele = varAllele.replace("-", "");
+ if(varAllele.length() > maxDelSize)
+ {
+ maxDelBases = varAllele;
+ maxDelSize = varAllele.length();
+ }
+ }
+ }
+
+ // Set refBase to maximum del //
+ refColumn = refBase + maxDelBases;
+
+ // Establish each allele in var Column //
+ varColumn = "";
+
+ for(String varAllele : varBaseContents)
+ {
+ if(varColumn.length() > 0)
+ varColumn = varColumn + ",";
+
+ if(varAllele.startsWith("-"))
+ {
+ varAllele = varAllele.replace("-", "");
+
+ // For the smaller deletion, determine ref bases to add //
+ if(varAllele.length() < maxDelSize)
+ {
+ String varEntry = maxDelBases.replaceFirst(varAllele, "");
+ varColumn = varColumn + refBase + varEntry;
+ }
+ else
+ {
+ varColumn = varColumn + refBase;
+ }
+ }
+ else if(varAllele.startsWith("+"))
+ {
+ varAllele = varAllele.replace("+", "");
+ String varEntry = refBase + varAllele + maxDelBases;
+ varColumn = varColumn + varEntry;
+ }
+ else
+ {
+ String varEntry = varAllele + maxDelBases;
+ varColumn = varColumn + varEntry;
+ }
+ }
+
+
+ }
+
+ else if(varBases.startsWith("+"))
{
// INSERTION //
// Ref = ref base; Var = ref base followed by inserted bases //
- String varColumn = allele1 + allele2.replace("+", "");
- compareResult = "." + "\t" + allele1 + "\t" + varColumn + "\t" + ".";
+ refColumn = refBase;
+ varColumn = refBase + varBases.replace("+", "");
}
- else if(allele2.startsWith("-"))
+ else if(varBases.startsWith("-"))
{
// DELETION //
// Ref = ref base followed by deleted bases; var = ref base //
- String refColumn = allele1 + allele2.replace("-", "");
- compareResult = "." + "\t" + refColumn + "\t" + allele1 + "\t" + ".";
+ refColumn = refBase + varBases.replace("-", "");
+ varColumn = refBase;
}
else
{
- compareResult = "." + "\t" + allele1 + "\t" + allele2 + "\t" + ".";
+ refColumn = refBase;
+ varColumn = varBases;
}
+ // Ensure that varColumn does not contain any +/- //
+ varColumn = varColumn.replace("+", "");
+ varColumn = varColumn.replace("-", "");
+
+ compareResult = "." + "\t" + refColumn + "\t" + varColumn + "\t" + ".";
+
+
+
+// if(allele2.startsWith("+"))
+// {
+// // INSERTION //
+// // Ref = ref base; Var = ref base followed by inserted bases //
+// String varColumn = allele1 + allele2.replace("+", "");
+// compareResult = "." + "\t" + allele1 + "\t" + varColumn + "\t" + ".";
+// }
+// else if(allele2.startsWith("-"))
+// {
+// // DELETION //
+// // Ref = ref base followed by deleted bases; var = ref base //
+// String refColumn = allele1 + allele2.replace("-", "");
+// compareResult = "." + "\t" + refColumn + "\t" + allele1 + "\t" + ".";
+// }
+// else
+// {
+// compareResult = "." + "\t" + allele1 + "\t" + allele2 + "\t" + ".";
+// }
+
// Decide on filter field //
if(doStrandFilter && strandednessDiff > 0.10 && (strandedness2 < 0.10 || strandedness2 > 0.90))
@@ -566,7 +687,8 @@ public class Somatic {
else
compareResult += "\tGT:GQ:DP:RD:AD:FREQ";
- // Determine normal genotype //
+
+ // Determine normal genotype //
String normalGt = ".";
String tumorGt = ".";
if(normalCall.equals(refBase))
@@ -575,11 +697,11 @@ public class Somatic {
}
else if(VarScan.isHeterozygous(normalCall))
{
- normalGt = "0/1";
+ normalGt = "0/" + normalVarAlleleNumber;
}
else
{
- normalGt = "1/1";
+ normalGt = normalVarAlleleNumber + "/" + normalVarAlleleNumber;
}
if(tumorCall.equals(refBase))
@@ -588,11 +710,11 @@ public class Somatic {
}
else if(VarScan.isHeterozygous(tumorCall))
{
- tumorGt = "0/1";
+ tumorGt = "0/" + tumorVarAlleleNumber;
}
else
{
- tumorGt = "1/1";
+ tumorGt = tumorVarAlleleNumber + "/" + tumorVarAlleleNumber;
}
if(tumorDP4.length() > 0)
@@ -609,12 +731,12 @@ public class Somatic {
// Print to master file for validation //
- if(params.containsKey("validation"))
+ if(doValidation)
{
outValidation.println(chromNormal + "\t" + posNormal + "\t" + compareResult);
}
- if(!params.containsKey("validation") && (compareResult.contains("Reference") || compareResult.contains("SS=0") || compareResult.contains("Filter")))
+ if(!doValidation && (compareResult.contains("Reference") || compareResult.contains("SS=0") || compareResult.contains("Filter")))
{
// Don't print reference/indelfilter positions unless doing validation //
}
@@ -779,6 +901,7 @@ public class Somatic {
System.err.println("Normal Pileup: " + normalPileupFile);
System.err.println("Tumor Pileup: " + tumorPileupFile);
+ System.err.println("NOTICE: While dual input files are still supported, using a single mpileup file (normal-tumor) with the --mpileup 1 setting is strongly recommended.");
// Set parameter defaults //
@@ -795,6 +918,8 @@ public class Somatic {
double pValueThreshold = 0.99;
double somaticPvalue = 0.05; //1.0e-04;
double minFreqForHom = 0.75;
+ boolean doStrandFilter = true;
+ boolean doValidation = false;
// Parse command-line parameters //
HashMap<String, String> params = VarScan.getParams(args);
@@ -856,7 +981,31 @@ public class Somatic {
{
tumorPurity = Double.parseDouble(params.get("tumor-purity"));
if(tumorPurity > 1)
- tumorPurity = normalPurity / 100.00;
+ tumorPurity = tumorPurity / 100.00;
+ }
+
+ if(params.containsKey("strand-filter"))
+ {
+ int filter = Integer.parseInt(params.get("strand-filter"));
+ if(filter > 0)
+ doStrandFilter = true;
+ else
+ doStrandFilter = false;
+ }
+
+ if(params.containsKey("validation"))
+ {
+ try{
+ int validation = Integer.parseInt(params.get("validation"));
+ if(validation > 0)
+ doValidation = true;
+ else
+ doValidation = false;
+ }
+ catch(Exception e)
+ {
+ doValidation = true;
+ }
}
// System.err.println("Min coverage:\t" + minCoverage);
@@ -870,7 +1019,7 @@ public class Somatic {
System.err.println("Min avg qual:\t" + minAvgQual);
System.err.println("P-value thresh:\t" + pValueThreshold);
System.err.println("Somatic p-value:\t" + somaticPvalue);
- if(params.containsKey("validation"))
+ if(doValidation)
System.err.println("Validation mode: on");
}
@@ -942,7 +1091,7 @@ public class Somatic {
outIndel.println(vcfHeader);
}
- if(params.containsKey("validation"))
+ if(doValidation)
{
outValidation = new PrintStream( new FileOutputStream(outputName + ".validation") );
if(!params.containsKey("no-headers") && !params.containsKey("output-vcf"))
@@ -954,8 +1103,9 @@ public class Somatic {
}
- BufferedReader normal = new BufferedReader(new FileReader(normalPileupFile));
- BufferedReader tumor = new BufferedReader(new FileReader(tumorPileupFile));
+ BufferedReader normal = new BufferedReader(new SmartFileReader(normalPileupFile));
+ BufferedReader tumor = new BufferedReader(new SmartFileReader(tumorPileupFile));
+
// If input file not ready, give it a few seconds //
int numNaps = 0;
@@ -1183,7 +1333,7 @@ public class Somatic {
// Decide on filter field //
- if(params.containsKey("strand-filter") && strandednessDiff > 0.10 && (strandedness2 < 0.10 || strandedness2 > 0.90))
+ if(doStrandFilter && strandednessDiff > 0.10 && (strandedness2 < 0.10 || strandedness2 > 0.90))
{
compareResult += "\t" + "str10";
}
@@ -1336,12 +1486,12 @@ public class Somatic {
}
// Print to master file for validation //
- if(params.containsKey("validation"))
+ if(doValidation)
{
outValidation.println(chromNormal + "\t" + posNormal + "\t" + compareResult);
}
- if(!params.containsKey("validation") && (compareResult.contains("Reference") || compareResult.contains("SS=0") || compareResult.contains("Filter")))
+ if(!doValidation && (compareResult.contains("Reference") || compareResult.contains("SS=0") || compareResult.contains("Filter")))
{
// Don't print reference/indelfilter positions unless doing validation //
}
@@ -1392,7 +1542,8 @@ public class Somatic {
// If they're in sort order, do nothing so that tumor can catch up //
else if(inSortOrder(chromNormal, chromTumor))
{
- System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
+ if(params.containsKey("verbose"))
+ System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
}
// If we reached the end of the normal file but never saw this chromosome, //
// fast-forward until tumor chromosome changes and reset normal file //
@@ -1424,14 +1575,16 @@ public class Somatic {
{
if(inSortOrder(chromNormal, chromTumor))
{
- System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
+ if(params.containsKey("verbose"))
+ System.err.println("Not resetting normal file because " + chromNormal + " < " + chromTumor);
}
else
{
- System.err.println("Resetting normal file because " + chromNormal + " > " + chromTumor);
+ if(params.containsKey("verbose"))
+ System.err.println("Resetting normal file because " + chromNormal + " > " + chromTumor);
normalWasReset = true;
normal.close();
- normal = new BufferedReader(new FileReader(normalPileupFile));
+ normal = new BufferedReader(new SmartFileReader(normalPileupFile));
}
}
@@ -1600,6 +1753,13 @@ public class Somatic {
}
else if(normalDepth >= minCoverage)
{
+ // Parse out the read counts in tumor //
+ int tumorReads1 = Integer.parseInt(tumorConsensusContents[1]);
+ int tumorReads2 = Integer.parseInt(tumorConsensusContents[2]);
+ int tumorCoverage = tumorReads1 + tumorReads2;
+ String tumorAllele2 = VarScan.getVarAllele(refBase, tumorConsensusContents[0]);
+
+
// Adjust for normal purity (i.e., tumor contamination of normal in AML) //
double normalMinVarFreq = minVarFreq;
@@ -1609,6 +1769,25 @@ public class Somatic {
}
HashMap<String, String> readCountsNormal = VarScan.getReadCounts(refBase, normalBases, normalQualities, minAvgQual, normalMapQuals);
+
+ // Before calling the normal consensus, if we observe a tumor variant allele, only consider normal bases from wild-type or that variant allele
+ if(!tumorAllele2.equals(refBase))
+ {
+ HashMap<String, String> newNormalCounts = new HashMap<String, String>();
+ String[] sortedKeys = (String[]) readCountsNormal.keySet().toArray(new String[0]);
+
+ // Put alleles into this array in their order of occurrence in VCF line //
+ for(String allele : sortedKeys)
+ {
+ String result = readCountsNormal.get(allele);
+ if(allele.equals(refBase) || allele.equals(tumorAllele2))
+ newNormalCounts.put(allele, result);
+ }
+
+ readCountsNormal = newNormalCounts;
+
+ }
+
String normalConsensusLine = VarScan.callPosition(refBase, readCountsNormal, "CNS", minReads2, normalMinVarFreq, minAvgQual, 0.99, minFreqForHom); //pValueThreshold, minFreqForHom);
String[] normalConsensusContents = normalConsensusLine.split("\t");
@@ -1621,11 +1800,6 @@ public class Somatic {
}
else
{
- // Parse out the read counts in tumor //
- int tumorReads1 = Integer.parseInt(tumorConsensusContents[1]);
- int tumorReads2 = Integer.parseInt(tumorConsensusContents[2]);
- int tumorCoverage = tumorReads1 + tumorReads2;
- String tumorAllele2 = VarScan.getVarAllele(refBase, tumorConsensusContents[0]);
// Parse out strand support in tumor //
int tumorReads1plus = 0;
@@ -1660,7 +1834,6 @@ public class Somatic {
int normalCoverage = normalReads1 + normalReads2;
String normalAllele2 = VarScan.getVarAllele(refBase, normalConsensusContents[0]);
-
// Get the Normal Read counts for the tumor variant allele //
if(!tumorAllele2.equals(refBase)) // normalAllele2.equals(refBase) &&
@@ -1671,7 +1844,15 @@ public class Somatic {
String[] alleleContents = readCountsNormal.get(tumorAllele2).split("\t");
normalReads2 = Integer.parseInt(alleleContents[0]);
normalCoverage = normalReads1 + normalReads2;
+ normalAllele2 = tumorAllele2;
}
+ else // Suggested completion logic from Chris Smowton caused logic issues with somatic calling
+ {
+ normalReads2 = 0;
+ normalCoverage = normalReads1;
+ normalAllele2 = tumorAllele2;
+ }
+
}
else if(!normalAllele2.equals(refBase))
{
@@ -1949,7 +2130,7 @@ public class Somatic {
String[] unsorted = {chrom1, chrom2};
String[] sorted = {chrom1, chrom2};
Arrays.sort(sorted);
- System.err.println("Sorted order is " + sorted[0] + " " + sorted[1]);
+
try{
if(sorted[0].equals(unsorted[0]))
{
diff --git a/sf/varscan/Trio.java b/sf/varscan/Trio.java
index f1c0271..b7ce616 100644
--- a/sf/varscan/Trio.java
+++ b/sf/varscan/Trio.java
@@ -11,7 +11,7 @@ package net.sf.varscan;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
-import java.io.FileReader;
+import net.sf.varscan.SmartFileReader;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.util.*;
@@ -20,7 +20,7 @@ import java.lang.Math;
/**
* A class for calling variants in a mother-father-child trio
*
- * @version 2.3
+ * @version 2.4
*
* @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
*
@@ -143,7 +143,7 @@ public class Trio {
// Parse sample list //
if(samplefile.exists())
{
- BufferedReader in = new BufferedReader(new FileReader(samplefile));
+ BufferedReader in = new BufferedReader(new SmartFileReader(samplefile));
String line = "";
if(in.ready())
{
@@ -289,7 +289,7 @@ public class Trio {
vcfHeader += "\n" + "##source=VarScan2";
vcfHeader += "\n" + "##INFO=<ID=ADP,Number=1,Type=Integer,Description=\"Average per-sample depth of bases with Phred score >= " + minAvgQual + "\">";
- vcfHeader += "\n" + "##INFO=<ID=STATUS,Number=1,Type=String,Description=\"Variant status in trio (1=untransmitted, 2=transmitted, 3=denovo, 4=MIE)\">";
+ vcfHeader += "\n" + "##INFO=<ID=STATUS,Number=1,Type=String,Description=\"Variant status in trio (0=unknown, 1=untransmitted, 2=transmitted, 3=denovo, 4=MIE)\">";
vcfHeader += "\n" + "##INFO=<ID=DENOVO,Number=0,Type=Flag,Description=\"Indicates apparent de novo mutations unique to the child\">";
vcfHeader += "\n" + "##FILTER=<ID=str10,Description=\"Less than 10% or more than 90% of variant supporting reads on one strand\">";
vcfHeader += "\n" + "##FILTER=<ID=indelError,Description=\"Likely artifact due to indel reads at this position\">";
@@ -377,26 +377,65 @@ public class Trio {
System.err.println("Warning: More than 3 samples in pileup; but only first 3 will be used and they should be father, mother child");
}
- // Get Father Call //
- int offset = 3;
- int fatherDepth = Integer.parseInt(lineContents[offset]);
- String fatherBases = lineContents[offset + 1];
- String fatherQualities = lineContents[offset + 2];
- int fatherQualityDepth = VarScan.qualityDepth(fatherQualities, minAvgQual);
-
- // Get Mother Call //
- offset = 6;
- int motherDepth = Integer.parseInt(lineContents[offset]);
- String motherBases = lineContents[offset + 1];
- String motherQualities = lineContents[offset + 2];
- int motherQualityDepth = VarScan.qualityDepth(motherQualities, minAvgQual);
-
- // Get Child Call //
- offset = 9;
- int childDepth = Integer.parseInt(lineContents[offset]);
- String childBases = lineContents[offset + 1];
- String childQualities = lineContents[offset + 2];
- int childQualityDepth = VarScan.qualityDepth(childQualities, minAvgQual);
+ // Set variables to zero values //
+ int fatherDepth = 0;
+ int motherDepth = 0;
+ int childDepth = 0;
+ int fatherQualityDepth = 0;
+ int motherQualityDepth = 0;
+ int childQualityDepth = 0;
+ String fatherBases = null;
+ String motherBases = null;
+ String childBases = null;
+ String fatherQualities = null;
+ String motherQualities = null;
+ String childQualities = null;
+
+ try
+ {
+ if(lineContents.length < 10)
+ {
+ // This is an incomplete mpileup line, so skip it. If verbose, throw a warning //
+ if(params.containsKey("verbose"))
+ {
+ System.err.println("Incomplete mpileup at line " + numBases + "; line being skipped.");
+ }
+ }
+ else
+ {
+ // Get Father Call //
+ int offset = 3;
+ fatherDepth = Integer.parseInt(lineContents[offset]);
+ fatherBases = lineContents[offset + 1];
+ fatherQualities = lineContents[offset + 2];
+ fatherQualityDepth = VarScan.qualityDepth(fatherQualities, minAvgQual);
+
+ // Get Mother Call //
+ offset = 6;
+ motherDepth = Integer.parseInt(lineContents[offset]);
+ motherBases = lineContents[offset + 1];
+ motherQualities = lineContents[offset + 2];
+ motherQualityDepth = VarScan.qualityDepth(motherQualities, minAvgQual);
+
+ // Get Child Call //
+ offset = 9;
+ childDepth = Integer.parseInt(lineContents[offset]);
+ childBases = lineContents[offset + 1];
+ childQualities = lineContents[offset + 2];
+ childQualityDepth = VarScan.qualityDepth(childQualities, minAvgQual);
+
+ }
+
+ }
+ catch(Exception e)
+ {
+ if(params.containsKey("verbose"))
+ {
+ System.err.println("Exception thrown while parsing mpileup at line " + numBases + ": " + e.getMessage());
+ }
+ // Exception thrown while parsing mpileup, which likely means one sample had no coverage //
+ // IN this case next if statement will fail, which is right and just //
+ }
if(fatherQualityDepth >= minCoverage && motherQualityDepth >= minCoverage && childQualityDepth >= minCoverage)
{
@@ -533,8 +572,8 @@ public class Trio {
String childAllele = refBase;
// BUILD FATHER VCF //
-
- String fatherVCF = "./.:.:" + fatherQualityDepth;
+ //GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR
+ String fatherVCF = "./.:.:" + fatherQualityDepth + ":.:.:.:.:.:.:.:.:.:.:.";
if(fatherContents.length >= 15)
{
@@ -586,7 +625,7 @@ public class Trio {
}
// Father is variant //
- else if(fatherAllele.length() > 0 && !fatherAllele.equals("N") && !fatherAllele.equals("."))
+ else if(fatherAllele.length() > 0 && !fatherAllele.equals(refBase) && !fatherAllele.equals("N") && !fatherAllele.equals("."))
{
// Determine how many variant alleles have been seen //
@@ -630,7 +669,7 @@ public class Trio {
// BUILD MOTHER VCF //
- String motherVCF = "./.:.:" + motherQualityDepth;
+ String motherVCF = "./.:.:" + motherQualityDepth + ":.:.:.:.:.:.:.:.:.:.:.";
if(motherContents.length >= 15)
{
@@ -682,7 +721,7 @@ public class Trio {
}
// mother is variant //
- else if(motherAllele.length() > 0 && !motherAllele.equals("N") && !motherAllele.equals("."))
+ else if(motherAllele.length() > 0 && !motherAllele.equals(refBase) && !motherAllele.equals("N") && !motherAllele.equals("."))
{
// Determine how many variant alleles have been seen //
@@ -726,7 +765,7 @@ public class Trio {
// BUILD CHILD VCF //
- String childVCF = "./.:.:" + childQualityDepth;
+ String childVCF = "./.:.:" + childQualityDepth + ":.:.:.:.:.:.:.:.:.:.:.";
if(childContents.length >= 15)
{
@@ -778,7 +817,7 @@ public class Trio {
}
// child is variant //
- else if(childAllele.length() > 0 && !childAllele.equals("N") && !childAllele.equals("."))
+ else if(childAllele.length() > 0 && !childAllele.equals(refBase) && !childAllele.equals("N") && !childAllele.equals("."))
{
// Determine how many variant alleles have been seen //
@@ -994,6 +1033,10 @@ public class Trio {
{
outLine += "4";
}
+ else
+ {
+ outLine += "0";
+ }
outLine += "\t" + "GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR" + "\t";
diff --git a/sf/varscan/VarScan.java b/sf/varscan/VarScan.java
index 0bc9f38..73f2687 100644
--- a/sf/varscan/VarScan.java
+++ b/sf/varscan/VarScan.java
@@ -18,9 +18,9 @@ import java.text.*;
/**
* A set of tools for variant detection in next-generation sequence data.
*
- * @version 2.3
+ * @version 2.4
*
- * @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
+ * @author Daniel C. Koboldt <dankoboldt at gmail.com>
*
* <BR>
* <pre>
@@ -110,7 +110,7 @@ public class VarScan {
*/
public static void main(String[] args) {
- String usage = "VarScan v2.3\n\nUSAGE: java -jar VarScan.jar [COMMAND] [OPTIONS] \n\n";
+ String usage = "VarScan v2.4.3\n\n***NON-COMMERCIAL VERSION***\n\nUSAGE: java -jar VarScan.jar [COMMAND] [OPTIONS] \n\n";
usage = usage + "COMMANDS:\n" +
"\tpileup2snp\t\tIdentify SNPs from a pileup file\n" +
"\tpileup2indel\t\tIdentify indels a pileup file\n" +
@@ -217,24 +217,7 @@ public class VarScan {
{
coverage(args, params);
}
- else if(args[0].equals("test"))
- {
- System.err.println("Testing...");
- try
- {
- RandomAccessFile ref = new RandomAccessFile("test.fasta", "r");
- ref.seek(52);
- byte[] buffer = new byte[5];
- ref.read(buffer);
- String thisBase = new String(buffer);
- System.err.println("Got " + thisBase);
- }
- catch(Exception e)
- {
- System.err.println("Error: Reference file: " + e.getLocalizedMessage());
- }
- }
else
{
@@ -496,7 +479,7 @@ public class VarScan {
{
// Parse the infile //
System.err.println("Reading input from " + args[1]);
- in = new BufferedReader(new FileReader(args[1]));
+ in = new BufferedReader(new SmartFileReader(args[1]));
}
else
{
@@ -781,11 +764,13 @@ public class VarScan {
if(readBase.equals("+"))
{
- indelType = "INS";
+ //indelType = "INS"; Changed October 2016 to fix VCF issue //
+ indelType = "+";
}
else
{
- indelType = "DEL";
+ //indelType = "DEL"; Changed October 2016 to fix VCF issue //
+ indelType = "-";
}
// If the previous base was a reference, count this read as reference but with indel //
@@ -851,7 +836,8 @@ public class VarScan {
// Build an indel key //
- String indelKey = indelType + "-" + indel_size + "-" + indelBases;
+// String indelKey = indelType + "-" + indel_size + "-" + indelBases; Changed October 2016 to fix VCF issue
+ String indelKey = indelType + indelBases;
// Count the read //
if(readCounts.containsKey(indelKey))
@@ -1174,7 +1160,7 @@ public class VarScan {
thisReads2minus = Integer.parseInt(alleleContents[5]);
// If this is an indel, make note of it //
- if(allele.contains("INS") || allele.contains("DEL"))
+ if(allele.contains("INS") || allele.contains("DEL") || allele.startsWith("+") || allele.startsWith("-"))
{
readsWithIndels += thisReads2;
}
@@ -1191,7 +1177,7 @@ public class VarScan {
double thisVarFreq = (double) thisReads2 / (double) totalReadCounts;
double thisPvalue = 1;
// For indels, adjust the read1 count //
- if(allele.contains("INS") || allele.contains("DEL"))
+ if(allele.contains("INS") || allele.contains("DEL") || allele.startsWith("+") || allele.startsWith("-"))
{
//System.err.println(allele + " gets " + thisReads2 + " " + thisVarFreq);
// Adjust the reads1 counts which include reads supporting indels //
@@ -1257,14 +1243,15 @@ public class VarScan {
// Determine type of variant //
String thisVarType = "SNP";
- if(allele.contains("INS") || allele.contains("DEL"))
+ if(allele.startsWith("+") || allele.startsWith("-") || allele.contains("INS") || allele.contains("DEL"))
{
thisVarType = "INDEL";
thisReads1 = reads1;
if(thisReads1 < 0)
thisReads1 = 0;
// Change allele to short indel version //
- allele = getShortIndel(allele);
+ if(allele.contains("INS") || allele.contains("DEL"))
+ allele = getShortIndel(allele);
}
if(thisPvalue <= pValueThreshold)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/varscan.git
More information about the debian-med-commit
mailing list