[med-svn] [varscan] 01/06: Imported Upstream version 2.3.9+dfsg
Andreas Tille
tille at debian.org
Wed Jul 8 03:25:49 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository varscan.
commit 4557f3b78993f12db727061f603eab909fda1e21
Author: Andreas Tille <tille at debian.org>
Date: Wed Jul 8 04:54:21 2015 +0200
Imported Upstream version 2.3.9+dfsg
---
{net/sf => sf}/varscan/CallMpileup.java | 0
{net/sf => sf}/varscan/CallPileup.java | 0
{net/sf => sf}/varscan/Comparison.java | 0
{net/sf => sf}/varscan/CopyCaller.java | 0
{net/sf => sf}/varscan/Copynumber.java | 0
{net/sf => sf}/varscan/Coverage.java | 0
{net/sf => sf}/varscan/FilterSomatic.java | 0
{net/sf => sf}/varscan/FilterVariants.java | 0
{net/sf => sf}/varscan/FishersExact.java | 0
sf/varscan/FpFilter.java | 874 +++++++++++++++++++++++++++++
{net/sf => sf}/varscan/LimitVariants.java | 0
{net/sf => sf}/varscan/ProcessSomatic.java | 0
{net/sf => sf}/varscan/ReadCounts.java | 0
{net/sf => sf}/varscan/Somatic.java | 0
{net/sf => sf}/varscan/Trio.java | 0
{net/sf => sf}/varscan/VarScan.java | 22 +
16 files changed, 896 insertions(+)
diff --git a/net/sf/varscan/CallMpileup.java b/sf/varscan/CallMpileup.java
similarity index 100%
rename from net/sf/varscan/CallMpileup.java
rename to sf/varscan/CallMpileup.java
diff --git a/net/sf/varscan/CallPileup.java b/sf/varscan/CallPileup.java
similarity index 100%
rename from net/sf/varscan/CallPileup.java
rename to sf/varscan/CallPileup.java
diff --git a/net/sf/varscan/Comparison.java b/sf/varscan/Comparison.java
similarity index 100%
rename from net/sf/varscan/Comparison.java
rename to sf/varscan/Comparison.java
diff --git a/net/sf/varscan/CopyCaller.java b/sf/varscan/CopyCaller.java
similarity index 100%
rename from net/sf/varscan/CopyCaller.java
rename to sf/varscan/CopyCaller.java
diff --git a/net/sf/varscan/Copynumber.java b/sf/varscan/Copynumber.java
similarity index 100%
rename from net/sf/varscan/Copynumber.java
rename to sf/varscan/Copynumber.java
diff --git a/net/sf/varscan/Coverage.java b/sf/varscan/Coverage.java
similarity index 100%
rename from net/sf/varscan/Coverage.java
rename to sf/varscan/Coverage.java
diff --git a/net/sf/varscan/FilterSomatic.java b/sf/varscan/FilterSomatic.java
similarity index 100%
rename from net/sf/varscan/FilterSomatic.java
rename to sf/varscan/FilterSomatic.java
diff --git a/net/sf/varscan/FilterVariants.java b/sf/varscan/FilterVariants.java
similarity index 100%
rename from net/sf/varscan/FilterVariants.java
rename to sf/varscan/FilterVariants.java
diff --git a/net/sf/varscan/FishersExact.java b/sf/varscan/FishersExact.java
similarity index 100%
rename from net/sf/varscan/FishersExact.java
rename to sf/varscan/FishersExact.java
diff --git a/sf/varscan/FpFilter.java b/sf/varscan/FpFilter.java
new file mode 100644
index 0000000..e7ab8c8
--- /dev/null
+++ b/sf/varscan/FpFilter.java
@@ -0,0 +1,874 @@
+/**
+ * @(#)FpFilter.java
+ *
+ * Copyright (c) 2009-2010 Daniel C. Koboldt and Washington University in St. Louis
+ *
+ * COPYRIGHT
+ */
+
+package net.sf.varscan;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.text.DecimalFormat;
+import java.util.HashMap;
+
+/**
+ * A class for applying the false-positive filter to VarScan variant calls
+ *
+ * @version 2.3
+ *
+ * @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
+ *
+ */
+public class FpFilter {
+
+ public FpFilter(String[] args)
+ {
+ // Define the usage message //
+ String usage = "USAGE: java -jar VarScan.jar fpfilter [variant file] [readcount file] OPTIONS\n" +
+ "\tvariant file - A file of SNPs or indels in VarScan-native or VCF format\n" +
+ "\treadcount file - The output file from bam-readcount for those positions\n" +
+ "\t***For detailed filtering instructions, please visit http://varscan.sourceforge.net***\n" +
+ "\n" +
+ "\tOPTIONS:\n" +
+ "\t--output-file\t\tOptional output file for filter-pass variants\n" +
+ "\t--filtered-file\t\tOptional output file for filter-fail variants\n" +
+ "\t--keep-failures\t\tIf set to 1, include failures in the output file\n\n" +
+ "\t--min-var-count\t\tMinimum number of variant-supporting reads [4]\n" +
+ "\t--min-var-freq\t\tMinimum variant allele frequency [0.05]\n" +
+ "\t--min-var-readpos\t\tMinimum average read position of var-supporting reads [0.10]\n" +
+ "\t--min-var-dist3\t\tMinimum average relative distance to effective 3' end [0.10]\n" +
+ "\t--min-strandedness\tMinimum fraction of variant reads from each strand [0.01]\n" +
+ "\t--min-strand-reads\tMinimum allele depth required to perform the strand tests [5]\n" +
+ "\t--min-ref-basequal\t\tMinimum average base quality for ref allele [30]\n" +
+ "\t--min-var-basequal\t\tMinimum average base quality for var allele [30]\n" +
+ "\t--max-rl-diff\t\tMaximum average relative read length difference (ref - var) [0.25]\n" +
+ "\t--max-var-mmqs\t\tMaximum mismatch quality sum of variant-supporting reads [100]\n" +
+ "\t--max-mmqs-diff\t\tMaximum average mismatch quality sum (var - ref) [50]\n" +
+ "\t--min-ref-mapqual\t\tMinimum average mapping quality for ref allele [30]\n" +
+ "\t--min-var-mapqual\t\tMinimum average mapping quality for var allele [30]\n" +
+ "\t--max-mapqual-diff\tMaximum average mapping quality (ref - var) [50]";
+
+
+ // Set parameter defaults //
+
+ double minVarReadPos = 0.10;
+ double minVarDist3 = 0.10;
+ double minStrandedness = 0.01;
+ int minStrandReads = 5;
+ double maxReadLenDiff = 0.25;
+
+ double minVarFreq = 0.05;
+ int minVarCount = 4;
+
+ int maxVarMMQS = 150;
+ int maxMMQSdiff = 150;
+
+ int minRefBaseQual = 30;
+ int minVarBaseQual = 30;
+ int minRefMapQual = 30;
+ int minVarMapQual = 30;
+ int maxMapQualDiff = 50;
+
+
+ String outFileName = "";
+ String filteredFileName = "";
+
+ // Adjust parameters based on user input //
+
+ HashMap<String, String> params = VarScan.getParams(args);
+
+ try
+ {
+ if(params.containsKey("output-file"))
+ outFileName = params.get("output-file");
+
+ if(params.containsKey("filtered-file"))
+ filteredFileName = params.get("filtered-file");
+
+ if(params.containsKey("min-var-freq"))
+ minVarFreq = Double.parseDouble(params.get("min-var-freq"));
+
+ if(params.containsKey("min-var-readpos"))
+ minVarReadPos = Double.parseDouble(params.get("min-var-readpos"));
+
+ if(params.containsKey("min-var-dist3"))
+ minVarDist3 = Double.parseDouble(params.get("min-var-dist3"));
+
+ if(params.containsKey("max-rl-diff"))
+ maxReadLenDiff = Double.parseDouble(params.get("max-rl-diff"));
+
+ if(params.containsKey("min-strandedness"))
+ minStrandedness = Double.parseDouble(params.get("min-strandedness"));
+
+ if(params.containsKey("min-strand-reads"))
+ minStrandReads = Integer.parseInt(params.get("min-strand-reads"));
+
+ if(params.containsKey("min-var-count"))
+ minVarCount = Integer.parseInt(params.get("min-var-count"));
+
+ if(params.containsKey("min-ref-mapqual"))
+ minRefMapQual = Integer.parseInt(params.get("min-ref-mapqual"));
+
+ if(params.containsKey("min-var-mapqual"))
+ minVarMapQual = Integer.parseInt(params.get("min-var-mapqual"));
+
+ if(params.containsKey("min-ref-basequal"))
+ minRefBaseQual = Integer.parseInt(params.get("min-ref-basequal"));
+
+ if(params.containsKey("min-var-basequal"))
+ minVarBaseQual = Integer.parseInt(params.get("min-var-basequal"));
+
+ if(params.containsKey("max-var-mmqs"))
+ maxVarMMQS = Integer.parseInt(params.get("max-var-mmqs"));
+
+ if(params.containsKey("max-mmqs-diff"))
+ maxMMQSdiff = Integer.parseInt(params.get("max-mmqs-diff"));
+
+ if(params.containsKey("max-mapqual-diff"))
+ maxMapQualDiff = Integer.parseInt(params.get("max-mapqual-diff"));
+ }
+ catch(Exception e)
+ {
+ System.err.println("Input Parameter Threw Exception: " + e.getLocalizedMessage());
+ e.printStackTrace(System.err);
+ System.exit(1);
+ }
+
+ // Print usage if -h or --help invoked //
+ if(params.containsKey("help") || params.containsKey("h"))
+ {
+ System.err.println(usage);
+ return;
+ }
+ // Print usage if -h or --help invoked //
+ else if(args.length < 3)
+ {
+ System.err.println("ERROR: Input files not provided!\n");
+ System.err.println(usage);
+ return;
+ }
+
+
+ // Define two-decimal-place format and statistics hash //
+ DecimalFormat twoDigits = new DecimalFormat("#0.00");
+ DecimalFormat threeDigits = new DecimalFormat("#0.000");
+
+ HashMap<String, Integer> stats = new HashMap<String, Integer>();
+ stats.put("numVariants", 0);
+ stats.put("numWithRC", 0);
+ stats.put("numWithReads1", 0);
+ stats.put("numPassFilter", 0);
+ stats.put("numFailFilter", 0);
+ stats.put("numFailNoRC", 0);
+ stats.put("numFailVarCount", 0);
+ stats.put("numFailVarFreq", 0);
+ stats.put("numFailStrand", 0);
+ stats.put("numFailVarReadPos", 0);
+ stats.put("numFailVarDist3", 0);
+ stats.put("numFailVarMMQS", 0);
+ stats.put("numFailMMQSdiff", 0);
+ stats.put("numFailRefMapQual", 0);
+ stats.put("numFailVarMapQual", 0);
+ stats.put("numFailRefBaseQual", 0);
+ stats.put("numFailVarBaseQual", 0);
+ stats.put("numFailMapQualDiff", 0);
+ stats.put("numFailReadLenDiff", 0);
+
+
+ try
+ {
+ // Check for presence of files //
+ File variantFile = new File(args[1]);
+ File readcountFile = new File(args[2]);
+
+ if(!variantFile.exists() || !readcountFile.exists())
+ {
+ System.err.println("ERROR: One of your input files is missing!\n");
+ System.err.println(usage);
+ return;
+ }
+
+
+ // Declare file-parsing variables //
+
+ String line;
+ int lineCounter = 0;
+ boolean isVCF = false;
+
+// if(args[1].endsWith(".vcf"))
+// isVCF = true;
+
+ HashMap<String, String> readcounts = new HashMap<String, String>();
+ System.err.println("Loading readcounts from " + args[2] + "...");
+ readcounts = loadReadcounts(args[2]);
+ System.err.println("Parsing variants from " + args[1] + "...");
+
+ if(variantFile.exists())
+ {
+ BufferedReader in = new BufferedReader(new FileReader(variantFile));
+
+ if(in.ready())
+ {
+ // Declare output files //
+ PrintStream outFile = null;
+ if(params.containsKey("output-file"))
+ outFile = new PrintStream( new FileOutputStream(outFileName) );
+
+ PrintStream filteredFile = null;
+ if(params.containsKey("filtered-file"))
+ filteredFile = new PrintStream( new FileOutputStream(filteredFileName) );
+
+ String vcfHeaderInfo = "";
+ vcfHeaderInfo = "##FILTER=<ID=VarCount,Description=\"Fewer than " + minVarCount + " variant-supporting reads\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=VarFreq,Description=\"Variant allele frequency below " + minVarFreq + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=VarReadPos,Description=\"Relative average read position < " + minVarReadPos + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=VarDist3,Description=\"Average distance to effective 3' end < " + minVarDist3 + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=VarMMQS,Description=\"Average mismatch quality sum for variant reads > " + maxVarMMQS + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=VarMapQual,Description=\"Average mapping quality of variant reads < " + minVarMapQual + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=VarBaseQual,Description=\"Average base quality of variant reads < " + minVarBaseQual + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=Strand,Description=\"Strand representation of variant reads < " + minStrandedness + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=RefMapQual,Description=\"Average mapping quality of reference reads < " + minRefMapQual + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=RefBaseQual,Description=\"Average base quality of reference reads < " + minRefBaseQual + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=MMQSdiff,Description=\"Mismatch quality sum difference (ref - var) > " + maxMMQSdiff + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=MapQualDiff,Description=\"Mapping quality difference (ref - var) > " + maxMapQualDiff + "\">";
+ vcfHeaderInfo += "\n##FILTER=<ID=ReadLenDiff,Description=\"Average supporting read length difference (ref - var) > " + maxReadLenDiff + "\">";
+
+
+ // Parse the infile line by line //
+
+ while ((line = in.readLine()) != null)
+ {
+ lineCounter++;
+
+ try
+ {
+ String[] lineContents = line.split("\t");
+ String chrom = lineContents[0];
+ String failReason = "";
+ boolean filterFlag = false;
+
+ if(chrom.equals("Chrom") || chrom.equals("chrom") || line.startsWith("#"))
+ {
+ if(line.startsWith("#"))
+ isVCF = true;
+
+ // Print header //
+ if(isVCF)
+ {
+ if(params.containsKey("output-file"))
+ {
+ if(line.startsWith("#CHROM"))
+ outFile.println(vcfHeaderInfo);
+
+ outFile.println(line);
+ }
+
+ if(params.containsKey("filtered-file"))
+ {
+ if(line.startsWith("#CHROM"))
+ filteredFile.println(vcfHeaderInfo);
+
+ filteredFile.println(line);
+ }
+ }
+ else
+ {
+ String filterHeader = "ref_reads\tvar_reads\tref_strand\tvar_strand\tref_basequal\tvar_basequal\tref_readpos\tvar_readpos\tref_dist3\tvar_dist3\tref_mapqual\tvar_mapqual\tmapqual_diff\tref_mmqs\tvar_mmqs\tmmqs_diff\tfilter_status";
+ if(params.containsKey("output-file"))
+ outFile.println(line + "\t" + filterHeader);
+ if(params.containsKey("filtered-file"))
+ filteredFile.println(line + "\t" + filterHeader);
+ }
+
+ }
+ else
+ {
+ stats.put("numVariants", (stats.get("numVariants") + 1));
+ int position = Integer.parseInt(lineContents[1]);
+ String positionKey = chrom + "\t" + position;
+ boolean isIndel = false;
+
+ // Reference-allele values //
+ int refReads = 0;
+ double refMapQual = 0;
+ double refBaseQual = 0;
+ int refReadsPlus = 0;
+ int refReadsMinus = 0;
+ double refPos = 0;
+ double refSubs = 0;
+ double refMMQS = 0;
+ double refRL = 0;
+ double refDist3 = 0;
+
+ // Variant-allele values //
+ int varReads = 0;
+ double varMapQual = 0;
+ double varBaseQual = 0;
+ int varReadsPlus = 0;
+ int varReadsMinus = 0;
+ double varPos = 0;
+ double varSubs = 0;
+ double varMMQS = 0;
+ double varRL = 0;
+ double varDist3 = 0;
+
+ // Calculated values //
+ double varFreq = 0;
+ double varStrandedness = -1;
+ double refStrandedness = -1;
+ double mmqsDiff = 0;
+ double mapQualDiff = 0;
+ double avgReadLenDiff = 0;
+
+
+// if(readcounts.containsKey(positionKey))
+ // {
+ try {
+ String refCounts = "";
+ String varCounts = "";
+ String ref = "";
+ String alt = "";
+
+ if(isVCF)
+ {
+ // Filter only applied to the first alt //
+ ref = lineContents[3];
+ String alts = lineContents[4];
+
+ String[] altContents = alts.split(",");
+ alt = altContents[0];
+ // Check for and convert indel //
+ if(ref.length() > 1)
+ {
+ isIndel = true;
+ // Deletion //
+ String thisVar = ref.replaceFirst(alt, "");
+ ref = alt;
+ alt = "-" + thisVar;
+ }
+ else if(alt.length() > 1)
+ {
+ // Insertion //
+ isIndel = true;
+ String thisVar = alt.replaceFirst(ref, "");
+ alt = "+" + thisVar;
+ }
+ }
+ else
+ {
+ ref = lineContents[2];
+ String cns = lineContents[3];
+ if(cns.length() > 1)
+ {
+ isIndel = true;
+ // CONVERT INDEL //
+ if(cns.contains("/"))
+ {
+ String[] indelContents = cns.split("/");
+ if(indelContents.length > 1)
+ alt = indelContents[1];
+ }
+ else
+ {
+ alt = cns;
+ }
+
+ }
+ else
+ {
+ // CONVERT SNV //
+ alt = VarScan.getVarAllele(ref, cns);
+
+ }
+
+ }
+
+ if(alt.length() > 0)
+ {
+ // Set a SNV style variant key as the default //
+ String varKey = chrom + "\t" + position + "\t" + alt;
+
+ // Determine if we have an indel. If so, muddle around until we find the right varKey //
+ if(ref.length() > 1 || alt.length() > 1)
+ {
+ isIndel = true;
+ int indelSize = 0;
+ if(ref.length() > 1)
+ indelSize = ref.length() - 1;
+ else
+ indelSize = alt.length() - 1;
+
+ if(readcounts.containsKey(varKey))
+ {
+ // Keep this perfect match //
+
+ }
+ else
+ {
+ // Shimmy back and forth out to size of indel to look for this //
+ for(int windowSize = indelSize + 1; windowSize >= 1; windowSize--)
+ {
+ String testKey1 = chrom + "\t" + (position - windowSize) + "\t" + alt;
+ String testKey2 = chrom + "\t" + (position + windowSize) + "\t" + alt;
+
+ if(readcounts.containsKey(testKey1))
+ {
+ varKey = testKey1;
+ }
+ else if(readcounts.containsKey(testKey2))
+ {
+ varKey = testKey2;
+ }
+
+ }
+ }
+
+
+ }
+
+
+ if(readcounts.containsKey(varKey))
+ {
+ stats.put("numWithRC", (stats.get("numWithRC") + 1));
+
+ // Try to split out values and proceed with filter comparison //
+ try {
+ String[] rcContents = readcounts.get(varKey).split("\t");
+ int readDepth = Integer.parseInt(rcContents[0]);
+ varCounts = rcContents[1];
+ String[] varContents = varCounts.split(":");
+
+ // Parse out variant allele values //
+
+ varReads = Integer.parseInt(varContents[1]);
+ varMapQual = Double.parseDouble(varContents[2]);
+ varBaseQual = Double.parseDouble(varContents[3]);
+ varReadsPlus = Integer.parseInt(varContents[5]);
+ varReadsMinus = Integer.parseInt(varContents[6]);
+ varPos = Double.parseDouble(varContents[7]);
+ varSubs = Double.parseDouble(varContents[8]);
+ varMMQS = Double.parseDouble(varContents[9]);
+ varRL = Double.parseDouble(varContents[12]);
+ varDist3 = Double.parseDouble(varContents[13]);
+
+ String refKey = chrom + "\t" + position + "\t" + ref;
+ if(readcounts.containsKey(refKey))
+ {
+ String[] rcContents2 = readcounts.get(refKey).split("\t");
+ int readDepth2 = Integer.parseInt(rcContents2[0]);
+
+ refCounts = rcContents2[1];
+ // Parse out reference allele values //
+ String[] refContents = refCounts.split(":");
+
+ refReads = Integer.parseInt(refContents[1]);
+ refMapQual = Double.parseDouble(refContents[2]);
+ refBaseQual = Double.parseDouble(refContents[3]);
+ refReadsPlus = Integer.parseInt(refContents[5]);
+ refReadsMinus = Integer.parseInt(refContents[6]);
+ refPos = Double.parseDouble(refContents[7]);
+ refSubs = Double.parseDouble(refContents[8]);
+ refMMQS = Double.parseDouble(refContents[9]);
+ refRL = Double.parseDouble(refContents[12]);
+ refDist3 = Double.parseDouble(refContents[13]);
+ }
+
+ // If variant fails any criteria we have at this point, fail it //
+
+ if(varReads < minVarCount)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarCount";
+ stats.put("numFailVarCount", (stats.get("numFailVarCount") + 1));
+ }
+
+ // Compute variant allele frequency //
+
+ varFreq = (double) varReads / (double) readDepth;
+
+ if(varFreq < minVarFreq)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarFreq";
+ stats.put("numFailVarFreq", (stats.get("numFailVarFreq") + 1));
+ }
+
+ // Variant read position //
+ if(varPos < minVarReadPos)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarReadPos";
+ stats.put("numFailVarReadPos", (stats.get("numFailVarReadPos") + 1));
+ }
+
+ if(varDist3 < minVarDist3)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarDist3";
+ stats.put("numFailVarDist3", (stats.get("numFailVarDist3") + 1));
+ }
+
+
+ // Look at variant MMQS and MMQS diff //
+
+ if(varMMQS > maxVarMMQS)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarMMQS";
+ stats.put("numFailVarMMQS", (stats.get("numFailVarMMQS") + 1));
+ }
+
+ // Apply minimum average mapqual checks //
+
+ if(refReads > 0 && refMapQual < minRefMapQual)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "RefMapQual";
+ stats.put("numFailRefMapQual", (stats.get("numFailRefMapQual") + 1));
+
+ }
+
+ if(varReads > 0 && varMapQual < minVarMapQual)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarMapQual";
+ stats.put("numFailVarMapQual", (stats.get("numFailVarMapQual") + 1));
+ }
+
+ // Apply minimum average basequal checks //
+
+ if(refReads > 0 && refBaseQual < minRefBaseQual)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "RefBaseQual";
+ stats.put("numFailRefBaseQual", (stats.get("numFailRefBaseQual") + 1));
+
+ }
+
+ if(!isIndel && varReads > 0 && varBaseQual < minVarBaseQual)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "VarBaseQual";
+ stats.put("numFailVarBaseQual", (stats.get("numFailVarBaseQual") + 1));
+ }
+
+ // IF we have enough reads supporting the variant, check strand //
+
+ if(refReads >= minStrandReads && refReads > 0)
+ {
+ refStrandedness = (double) refReadsPlus / (double) refReads;
+ }
+
+ if(varReads >= minStrandReads && varReads > 0)
+ {
+ varStrandedness = (double) varReadsPlus / (double) varReads;
+
+ if(varStrandedness < minStrandedness || (1 - varStrandedness) < minStrandedness)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "Strand";
+ stats.put("numFailStrand", (stats.get("numFailStrand") + 1));
+// System.err.println(positionKey + "\t" + refReadsPlus + "\t" + refReadsMinus + "\t" + refStrandedness + "\t" + varReadsPlus + "\t" + varReadsMinus + "\t" + varStrandedness);
+ }
+
+ }
+
+ if(refReads >= 2 && varReads >= 2)
+ {
+ stats.put("numWithReads1", (stats.get("numWithReads1") + 1));
+ // Only make difference comparisons if we had reference reads //
+
+ mmqsDiff = varMMQS - refMMQS;
+
+ if(mmqsDiff > maxMMQSdiff)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "MMQSdiff";
+ stats.put("numFailMMQSdiff", (stats.get("numFailMMQSdiff") + 1));
+ }
+
+ // Compare average mapping qualities //
+
+ mapQualDiff = refMapQual - varMapQual;
+
+// System.err.println(refReads + "/" + readDepth + "\t" + refCounts + "\tMapqual diff: " + refMapQual + " - " + varMapQual + " = " + mapQualDiff);
+
+ if(mapQualDiff > maxMapQualDiff)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "MapQualDiff";
+ stats.put("numFailMapQualDiff", (stats.get("numFailMapQualDiff") + 1));
+ }
+
+ avgReadLenDiff = (refRL - varRL) / refRL;
+
+ if(avgReadLenDiff > maxReadLenDiff)
+ {
+ if(failReason.length() > 0)
+ failReason += ",";
+ failReason += "ReadLenDiff";
+ stats.put("numFailReadLenDiff", (stats.get("numFailReadLenDiff") + 1));
+ }
+
+ }
+
+
+ // Check to see if it failed any filters. If so, mark it for failure //
+
+ if(failReason.length() > 0)
+ filterFlag = true;
+
+
+ }
+ catch (Exception e)
+ {
+ System.err.println("Exception thrown while processing readcounts at " + positionKey + ": " + e.getLocalizedMessage());
+ e.printStackTrace(System.err);
+ return;
+ }
+
+ }
+ else
+ {
+ // Unable to get readcounts for ref or var allele //
+ failReason = "NoReadCounts";
+ stats.put("numFailNoRC", (stats.get("numFailNoRC") + 1));
+ filterFlag = true;
+ }
+
+
+ } // End of else for non-VCF output
+
+ }
+ catch(Exception e)
+ {
+ System.err.println("Exception thrown while filtering at " + positionKey + ": " + e.getLocalizedMessage());
+ e.printStackTrace(System.err);
+ return;
+ }
+
+
+// }
+
+ // Prepare filter output line //
+
+ String filterColumns = "";
+ filterColumns = refReads + "\t" + varReads + "\t" + threeDigits.format(refStrandedness) + "\t" + threeDigits.format(varStrandedness);
+ filterColumns += "\t" + refBaseQual + "\t" + varBaseQual + "\t" + refPos + "\t" + varPos;
+ filterColumns += "\t" + refDist3 + "\t" + varDist3 + "\t" + refMapQual + "\t" + varMapQual;
+ filterColumns += "\t" + twoDigits.format(mapQualDiff) + "\t" + refMMQS + "\t" + varMMQS + "\t" + twoDigits.format(mmqsDiff);
+
+ if(filterFlag)
+ {
+ // VARIANT FAILED THE FILTER //
+ filterColumns += "\t" + failReason;
+ stats.put("numFailFilter", (stats.get("numFailFilter") + 1));
+ }
+ else
+ {
+ // VARIANT PASSED THE FILTER
+ filterColumns += "\t" + "PASS";
+ stats.put("numPassFilter", (stats.get("numPassFilter") + 1));
+ }
+
+ // Prepare new VCF line //
+
+ if(isVCF)
+ {
+ String newVCFline = "";
+
+ for(int colCounter = 0; colCounter < lineContents.length; colCounter++)
+ {
+ if(newVCFline.length() > 0)
+ newVCFline += "\t";
+
+ if(filterFlag && colCounter == 6)
+ newVCFline += failReason;
+ else
+ newVCFline += lineContents[colCounter];
+ }
+ // Print the output line if it passed, or if the user specified to keep failures //
+
+ if(params.containsKey("output-file") && (!filterFlag || params.containsKey("keep-failures")))
+ outFile.println(newVCFline);
+
+ if(filterFlag && params.containsKey("filtered-file"))
+ filteredFile.println(newVCFline);
+ }
+ else
+ {
+ // Print the output line if it passed, or if the user specified to keep failures //
+ if(params.containsKey("output-file") && (!filterFlag || params.containsKey("keep-failures")))
+ outFile.println(line + "\t" + filterColumns);
+
+ // If it failed and the user specified a failure file, print it //
+ if(filterFlag && params.containsKey("filtered-file"))
+ filteredFile.println(line + "\t" + filterColumns);
+
+ }
+
+ } // End of else for non-header line //
+ }
+ catch(Exception e)
+ {
+ System.err.println("Parsing Exception on line:\n" + line + "\n" + e.getLocalizedMessage());
+ e.printStackTrace(System.err);
+ return;
+ }
+
+ }
+
+ // Report summary of results //
+
+ System.err.println(stats.get("numVariants") + " variants in input file");
+ System.err.println(stats.get("numWithRC") + " had a bam-readcount result");
+ System.err.println(stats.get("numWithReads1") + " had reads1>=2");
+ System.err.println(stats.get("numPassFilter") + " passed filters");
+ System.err.println(stats.get("numFailFilter") + " failed filters");
+ System.err.println("\t" + stats.get("numFailNoRC") + " failed because no readcounts were returned");
+ System.err.println("\t" + stats.get("numFailVarCount") + " failed minimim variant count < " + minVarCount);
+ System.err.println("\t" + stats.get("numFailVarFreq") + " failed minimum variant freq < " + minVarFreq);
+ System.err.println("\t" + stats.get("numFailStrand") + " failed minimum strandedness < " + minStrandedness);
+ System.err.println("\t" + stats.get("numFailVarReadPos") + " failed minimum variant readpos < " + minVarReadPos);
+ System.err.println("\t" + stats.get("numFailVarDist3") + " failed minimum variant dist3 < " + minVarDist3);
+ System.err.println("\t" + stats.get("numFailVarMMQS") + " failed maximum variant MMQS > " + maxVarMMQS);
+ System.err.println("\t" + stats.get("numFailMMQSdiff") + " failed maximum MMQS diff (var - ref) > " + maxMMQSdiff);
+ System.err.println("\t" + stats.get("numFailMapQualDiff") + " failed maximum mapqual diff (ref - var) > " + maxMapQualDiff);
+ System.err.println("\t" + stats.get("numFailRefMapQual") + " failed minimim ref mapqual < " + minRefMapQual);
+ System.err.println("\t" + stats.get("numFailVarMapQual") + " failed minimim var mapqual < " + minVarMapQual);
+ System.err.println("\t" + stats.get("numFailRefBaseQual") + " failed minimim ref basequal < " + minRefBaseQual);
+ System.err.println("\t" + stats.get("numFailVarBaseQual") + " failed minimim var basequal < " + minVarBaseQual);
+ System.err.println("\t" + stats.get("numFailReadLenDiff") + " failed maximum RL diff (ref - var) > " + maxReadLenDiff);
+
+ in.close();
+
+ }
+ else
+ {
+ System.err.println("Unable to open SNVs file for reading");
+ }
+
+ in.close();
+ }
+ else
+ {
+ // Infile doesn't exist //
+ }
+ }
+ catch(Exception e)
+ {
+ System.err.println("Error Parsing SNV File: " + e.getMessage() + "\n" + e.getLocalizedMessage());
+ e.printStackTrace(System.err);
+ }
+
+
+ }
+
+
+ /**
+ * Loads snvs to be filtered
+ *
+ * @param filename Path to file of snvs
+ * @return snvs HashMap of SNV positions (chrom\tposition\alt)
+ */
+ static HashMap<String, String> loadReadcounts(String filename)
+ {
+ HashMap<String, String> readcounts = new HashMap<String, String>();
+
+ try
+ {
+ // Declare file-parsing variables //
+
+ String line;
+
+ File infile = new File(filename);
+ if(infile.exists())
+ {
+ BufferedReader in = new BufferedReader(new FileReader(infile));
+
+ if(in.ready())
+ {
+ while ((line = in.readLine()) != null)
+ {
+ String[] lineContents = line.split("\t");
+ try {
+ String chrom = lineContents[0];
+ String position = lineContents[1];
+// String snvKey = chrom + "\t" + position;
+
+ // Start resultLine as depth //
+
+
+ for(int colCounter = 4; colCounter < lineContents.length; colCounter++)
+ {
+ String[] alleleContents = lineContents[colCounter].split(":");
+ String thisAllele = alleleContents[0];
+ int thisReads = Integer.parseInt(alleleContents[1]);
+
+ if(thisAllele.equals("N") || thisAllele.equals("="))
+ {
+ // Skip non-informative alleles //
+ }
+ else if(thisReads == 0)
+ {
+ // Skip empty results //
+ }
+ else
+ {
+ String rcLine = lineContents[3];
+ rcLine = rcLine + "\t" + lineContents[colCounter];
+ // Save the readcount result line //
+ String snvKey = chrom + "\t" + position + "\t" + thisAllele;
+ readcounts.put(snvKey, rcLine);
+ }
+
+
+ }
+
+
+ }
+ catch (Exception e)
+ {
+ // If we encounter an issue, print a warning //
+ System.err.println("Warning: Exception thrown while loading bam-readcount: " + e.getMessage());
+ System.err.println("Attempting to continue, but please double-check file format and completeness");
+ }
+
+
+ }
+ }
+ else
+ {
+ System.err.println("Unable to open SNVs file for reading");
+ }
+
+ in.close();
+ }
+ }
+ catch(Exception e)
+ {
+ System.err.println("Error Parsing SNV File: " + e.getLocalizedMessage());
+ }
+
+ return(readcounts);
+ }
+
+
+}
\ No newline at end of file
diff --git a/net/sf/varscan/LimitVariants.java b/sf/varscan/LimitVariants.java
similarity index 100%
rename from net/sf/varscan/LimitVariants.java
rename to sf/varscan/LimitVariants.java
diff --git a/net/sf/varscan/ProcessSomatic.java b/sf/varscan/ProcessSomatic.java
similarity index 100%
rename from net/sf/varscan/ProcessSomatic.java
rename to sf/varscan/ProcessSomatic.java
diff --git a/net/sf/varscan/ReadCounts.java b/sf/varscan/ReadCounts.java
similarity index 100%
rename from net/sf/varscan/ReadCounts.java
rename to sf/varscan/ReadCounts.java
diff --git a/net/sf/varscan/Somatic.java b/sf/varscan/Somatic.java
similarity index 100%
rename from net/sf/varscan/Somatic.java
rename to sf/varscan/Somatic.java
diff --git a/net/sf/varscan/Trio.java b/sf/varscan/Trio.java
similarity index 100%
rename from net/sf/varscan/Trio.java
rename to sf/varscan/Trio.java
diff --git a/net/sf/varscan/VarScan.java b/sf/varscan/VarScan.java
similarity index 98%
rename from net/sf/varscan/VarScan.java
rename to sf/varscan/VarScan.java
index 3c509d5..0bc9f38 100644
--- a/net/sf/varscan/VarScan.java
+++ b/sf/varscan/VarScan.java
@@ -65,6 +65,11 @@ import java.text.*;
* Input: VarScan output for SNPs or Indels (varscan.output.snp)
* Output: Variants passing all filters (varscan.output.snp.filter)
*
+ * fpFilter [variant-file] [readcount-file] OPTIONS
+ * Apply the false-positive filter to VarScan variant calls
+ * Input: VarScan output for SNPs or Indels (varscan.output.snp) and bam-readcount output
+ * Output: Variants passing all filters (varscan.output.snp.fpfilter)
+ *
* processSomatic [somatic-status file] OPTIONS
* Process VarScan output by somatic status and confidence
* Input: VarScan output for SNPs or Indels (varscan.output.snp)
@@ -121,6 +126,7 @@ public class VarScan {
"\tfilter\t\t\tFilter SNPs by coverage, frequency, p-value, etc.\n" +
"\tsomaticFilter\t\tFilter somatic variants for clusters/indels\n" +
+ "\tfpfilter\t\tApply the false-positive filter\n\n" +
"\tprocessSomatic\t\tIsolate Germline/LOH/Somatic calls from output\n" +
"\tcopyCaller\t\tGC-adjust and process copy number changes from VarScan copynumber output\n" +
@@ -163,6 +169,11 @@ public class VarScan {
somaticFilter(args, params);
}
+ else if(args[0].equals("fpfilter"))
+ {
+ fpfilter(args, params);
+ }
+
else if(args[0].equals("processSomatic"))
{
processSomatic(args, params);
@@ -330,6 +341,17 @@ public class VarScan {
FilterVariants myFilter = new FilterVariants(args);
}
+
+ /**
+ * Applies false positive filter using bam-readcount information
+ *
+ * @param args Command-line arguments
+ */
+ public static void fpfilter(String[] args, HashMap<String, String> params)
+ {
+ FpFilter myFilter = new FpFilter(args);
+ }
+
/**
* Filters variants by coverage, significance, frequency, etc.
*
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/varscan.git
More information about the debian-med-commit
mailing list