[med-svn] [varscan] 01/06: Imported Upstream version 2.3.9+dfsg

Andreas Tille tille at debian.org
Wed Jul 8 03:25:49 UTC 2015


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository varscan.

commit 4557f3b78993f12db727061f603eab909fda1e21
Author: Andreas Tille <tille at debian.org>
Date:   Wed Jul 8 04:54:21 2015 +0200

    Imported Upstream version 2.3.9+dfsg
---
 {net/sf => sf}/varscan/CallMpileup.java    |   0
 {net/sf => sf}/varscan/CallPileup.java     |   0
 {net/sf => sf}/varscan/Comparison.java     |   0
 {net/sf => sf}/varscan/CopyCaller.java     |   0
 {net/sf => sf}/varscan/Copynumber.java     |   0
 {net/sf => sf}/varscan/Coverage.java       |   0
 {net/sf => sf}/varscan/FilterSomatic.java  |   0
 {net/sf => sf}/varscan/FilterVariants.java |   0
 {net/sf => sf}/varscan/FishersExact.java   |   0
 sf/varscan/FpFilter.java                   | 874 +++++++++++++++++++++++++++++
 {net/sf => sf}/varscan/LimitVariants.java  |   0
 {net/sf => sf}/varscan/ProcessSomatic.java |   0
 {net/sf => sf}/varscan/ReadCounts.java     |   0
 {net/sf => sf}/varscan/Somatic.java        |   0
 {net/sf => sf}/varscan/Trio.java           |   0
 {net/sf => sf}/varscan/VarScan.java        |  22 +
 16 files changed, 896 insertions(+)

diff --git a/net/sf/varscan/CallMpileup.java b/sf/varscan/CallMpileup.java
similarity index 100%
rename from net/sf/varscan/CallMpileup.java
rename to sf/varscan/CallMpileup.java
diff --git a/net/sf/varscan/CallPileup.java b/sf/varscan/CallPileup.java
similarity index 100%
rename from net/sf/varscan/CallPileup.java
rename to sf/varscan/CallPileup.java
diff --git a/net/sf/varscan/Comparison.java b/sf/varscan/Comparison.java
similarity index 100%
rename from net/sf/varscan/Comparison.java
rename to sf/varscan/Comparison.java
diff --git a/net/sf/varscan/CopyCaller.java b/sf/varscan/CopyCaller.java
similarity index 100%
rename from net/sf/varscan/CopyCaller.java
rename to sf/varscan/CopyCaller.java
diff --git a/net/sf/varscan/Copynumber.java b/sf/varscan/Copynumber.java
similarity index 100%
rename from net/sf/varscan/Copynumber.java
rename to sf/varscan/Copynumber.java
diff --git a/net/sf/varscan/Coverage.java b/sf/varscan/Coverage.java
similarity index 100%
rename from net/sf/varscan/Coverage.java
rename to sf/varscan/Coverage.java
diff --git a/net/sf/varscan/FilterSomatic.java b/sf/varscan/FilterSomatic.java
similarity index 100%
rename from net/sf/varscan/FilterSomatic.java
rename to sf/varscan/FilterSomatic.java
diff --git a/net/sf/varscan/FilterVariants.java b/sf/varscan/FilterVariants.java
similarity index 100%
rename from net/sf/varscan/FilterVariants.java
rename to sf/varscan/FilterVariants.java
diff --git a/net/sf/varscan/FishersExact.java b/sf/varscan/FishersExact.java
similarity index 100%
rename from net/sf/varscan/FishersExact.java
rename to sf/varscan/FishersExact.java
diff --git a/sf/varscan/FpFilter.java b/sf/varscan/FpFilter.java
new file mode 100644
index 0000000..e7ab8c8
--- /dev/null
+++ b/sf/varscan/FpFilter.java
@@ -0,0 +1,874 @@
+/**
+ * @(#)FpFilter.java
+ *
+ * Copyright (c) 2009-2010 Daniel C. Koboldt and Washington University in St. Louis
+ *
+ * COPYRIGHT
+ */
+
+package net.sf.varscan;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.text.DecimalFormat;
+import java.util.HashMap;
+
+/**
+ * A class for applying the false-positive filter to VarScan variant calls
+ *
+ * @version	2.3
+ *
+ * @author Daniel C. Koboldt <dkoboldt at genome.wustl.edu>
+ *
+ */
+public class FpFilter {
+
+	public FpFilter(String[] args)
+	{
+		//		 Define the usage message //
+		String usage = "USAGE: java -jar VarScan.jar fpfilter [variant file] [readcount file] OPTIONS\n" +
+		"\tvariant file - A file of SNPs or indels in VarScan-native or VCF format\n" +
+		"\treadcount file - The output file from bam-readcount for those positions\n" +
+		"\t***For detailed filtering instructions, please visit http://varscan.sourceforge.net***\n" +
+		"\n" +
+		"\tOPTIONS:\n" +
+		"\t--output-file\t\tOptional output file for filter-pass variants\n" +
+		"\t--filtered-file\t\tOptional output file for filter-fail variants\n" +
+		"\t--keep-failures\t\tIf set to 1, include failures in the output file\n\n" +
+		"\t--min-var-count\t\tMinimum number of variant-supporting reads [4]\n" +
+		"\t--min-var-freq\t\tMinimum variant allele frequency [0.05]\n" +
+		"\t--min-var-readpos\t\tMinimum average read position of var-supporting reads [0.10]\n" +
+		"\t--min-var-dist3\t\tMinimum average relative distance to effective 3' end [0.10]\n" +
+		"\t--min-strandedness\tMinimum fraction of variant reads from each strand [0.01]\n" +
+		"\t--min-strand-reads\tMinimum allele depth required to perform the strand tests [5]\n" +
+		"\t--min-ref-basequal\t\tMinimum average base quality for ref allele [30]\n" +
+		"\t--min-var-basequal\t\tMinimum average base quality for var allele [30]\n" +
+		"\t--max-rl-diff\t\tMaximum average relative read length difference (ref - var) [0.25]\n" +
+		"\t--max-var-mmqs\t\tMaximum mismatch quality sum of variant-supporting reads [100]\n" +
+		"\t--max-mmqs-diff\t\tMaximum average mismatch quality sum (var - ref) [50]\n" +
+		"\t--min-ref-mapqual\t\tMinimum average mapping quality for ref allele [30]\n" +
+		"\t--min-var-mapqual\t\tMinimum average mapping quality for var allele [30]\n" +
+		"\t--max-mapqual-diff\tMaximum average mapping quality (ref - var) [50]";
+
+
+		// Set parameter defaults //
+
+		double minVarReadPos = 0.10;
+		double minVarDist3 = 0.10;
+		double minStrandedness = 0.01;
+		int minStrandReads = 5;
+		double maxReadLenDiff = 0.25;
+
+		double minVarFreq = 0.05;
+		int minVarCount = 	4;
+
+		int maxVarMMQS = 150;
+		int maxMMQSdiff = 150;
+
+		int minRefBaseQual = 30;
+		int minVarBaseQual = 30;
+		int minRefMapQual = 30;
+		int minVarMapQual = 30;
+		int maxMapQualDiff = 50;
+
+
+		String outFileName = "";
+		String filteredFileName = "";
+
+		// Adjust parameters based on user input //
+
+		HashMap<String, String> params = VarScan.getParams(args);
+
+		try
+		{
+			if(params.containsKey("output-file"))
+				outFileName = params.get("output-file");
+
+			if(params.containsKey("filtered-file"))
+				filteredFileName = params.get("filtered-file");
+
+			if(params.containsKey("min-var-freq"))
+				 minVarFreq = Double.parseDouble(params.get("min-var-freq"));
+
+			if(params.containsKey("min-var-readpos"))
+				 minVarReadPos = Double.parseDouble(params.get("min-var-readpos"));
+
+			if(params.containsKey("min-var-dist3"))
+				 minVarDist3 = Double.parseDouble(params.get("min-var-dist3"));
+
+			if(params.containsKey("max-rl-diff"))
+				 maxReadLenDiff = Double.parseDouble(params.get("max-rl-diff"));
+
+			if(params.containsKey("min-strandedness"))
+				 minStrandedness = Double.parseDouble(params.get("min-strandedness"));
+
+			if(params.containsKey("min-strand-reads"))
+				 minStrandReads = Integer.parseInt(params.get("min-strand-reads"));
+
+			if(params.containsKey("min-var-count"))
+				 minVarCount = Integer.parseInt(params.get("min-var-count"));
+
+			if(params.containsKey("min-ref-mapqual"))
+				 minRefMapQual = Integer.parseInt(params.get("min-ref-mapqual"));
+
+			if(params.containsKey("min-var-mapqual"))
+				 minVarMapQual = Integer.parseInt(params.get("min-var-mapqual"));
+
+			if(params.containsKey("min-ref-basequal"))
+				 minRefBaseQual = Integer.parseInt(params.get("min-ref-basequal"));
+
+			if(params.containsKey("min-var-basequal"))
+				 minVarBaseQual = Integer.parseInt(params.get("min-var-basequal"));
+
+			if(params.containsKey("max-var-mmqs"))
+				 maxVarMMQS = Integer.parseInt(params.get("max-var-mmqs"));
+
+			if(params.containsKey("max-mmqs-diff"))
+				 maxMMQSdiff = Integer.parseInt(params.get("max-mmqs-diff"));
+
+			if(params.containsKey("max-mapqual-diff"))
+				 maxMapQualDiff = Integer.parseInt(params.get("max-mapqual-diff"));
+		}
+		catch(Exception e)
+		{
+	    	System.err.println("Input Parameter Threw Exception: " + e.getLocalizedMessage());
+	    	e.printStackTrace(System.err);
+	    	System.exit(1);
+		}
+
+		// Print usage if -h or --help invoked //
+		if(params.containsKey("help") || params.containsKey("h"))
+		{
+			System.err.println(usage);
+			return;
+		}
+		// Print usage if -h or --help invoked //
+		else if(args.length < 3)
+		{
+			System.err.println("ERROR: Input files not provided!\n");
+			System.err.println(usage);
+			return;
+		}
+
+
+	    // Define two-decimal-place format and statistics hash //
+	    DecimalFormat twoDigits = new DecimalFormat("#0.00");
+	    DecimalFormat threeDigits = new DecimalFormat("#0.000");
+
+	    HashMap<String, Integer> stats = new HashMap<String, Integer>();
+	    stats.put("numVariants", 0);
+	    stats.put("numWithRC", 0);
+	    stats.put("numWithReads1", 0);
+	    stats.put("numPassFilter", 0);
+	    stats.put("numFailFilter", 0);
+	    stats.put("numFailNoRC", 0);
+	    stats.put("numFailVarCount", 0);
+	    stats.put("numFailVarFreq", 0);
+	    stats.put("numFailStrand", 0);
+	    stats.put("numFailVarReadPos", 0);
+	    stats.put("numFailVarDist3", 0);
+	    stats.put("numFailVarMMQS", 0);
+	    stats.put("numFailMMQSdiff", 0);
+	    stats.put("numFailRefMapQual", 0);
+	    stats.put("numFailVarMapQual", 0);
+	    stats.put("numFailRefBaseQual", 0);
+	    stats.put("numFailVarBaseQual", 0);
+	    stats.put("numFailMapQualDiff", 0);
+	    stats.put("numFailReadLenDiff", 0);
+
+
+	    try
+	    {
+	    	// Check for presence of files //
+	    	File variantFile = new File(args[1]);
+	    	File readcountFile = new File(args[2]);
+
+	    	if(!variantFile.exists() || !readcountFile.exists())
+	    	{
+				System.err.println("ERROR: One of your input files is missing!\n");
+				System.err.println(usage);
+				return;
+	    	}
+
+
+	    	// Declare file-parsing variables //
+
+	    	String line;
+	    	int lineCounter = 0;
+	    	boolean isVCF = false;
+
+//	    	if(args[1].endsWith(".vcf"))
+//	    		isVCF = true;
+
+	    	HashMap<String, String> readcounts = new HashMap<String, String>();
+	    	System.err.println("Loading readcounts from " + args[2] + "...");
+	    	readcounts = loadReadcounts(args[2]);
+	    	System.err.println("Parsing variants from " + args[1] + "...");
+
+    		if(variantFile.exists())
+    		{
+    			BufferedReader in = new BufferedReader(new FileReader(variantFile));
+
+    			if(in.ready())
+    			{
+    				// Declare output files //
+    				PrintStream outFile = null;
+    				if(params.containsKey("output-file"))
+    					outFile = new PrintStream( new FileOutputStream(outFileName) );
+
+    				PrintStream filteredFile = null;
+    				if(params.containsKey("filtered-file"))
+    					filteredFile = new PrintStream( new FileOutputStream(filteredFileName) );
+
+    				String vcfHeaderInfo = "";
+    				vcfHeaderInfo = "##FILTER=<ID=VarCount,Description=\"Fewer than " + minVarCount + " variant-supporting reads\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=VarFreq,Description=\"Variant allele frequency below " + minVarFreq + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=VarReadPos,Description=\"Relative average read position < " + minVarReadPos + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=VarDist3,Description=\"Average distance to effective 3' end < " + minVarDist3 + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=VarMMQS,Description=\"Average mismatch quality sum for variant reads > " + maxVarMMQS + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=VarMapQual,Description=\"Average mapping quality of variant reads < " + minVarMapQual + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=VarBaseQual,Description=\"Average base quality of variant reads < " + minVarBaseQual + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=Strand,Description=\"Strand representation of variant reads < " + minStrandedness + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=RefMapQual,Description=\"Average mapping quality of reference reads < " + minRefMapQual + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=RefBaseQual,Description=\"Average base quality of reference reads < " + minRefBaseQual + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=MMQSdiff,Description=\"Mismatch quality sum difference (ref - var) > " + maxMMQSdiff + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=MapQualDiff,Description=\"Mapping quality difference (ref - var) > " + maxMapQualDiff + "\">";
+    				vcfHeaderInfo += "\n##FILTER=<ID=ReadLenDiff,Description=\"Average supporting read length difference (ref - var) > " + maxReadLenDiff + "\">";
+
+
+    	    		// Parse the infile line by line //
+
+    	    		while ((line = in.readLine()) != null)
+    	    		{
+    	    			lineCounter++;
+
+    	    			try
+    	    			{
+    		    			String[] lineContents = line.split("\t");
+    		    			String chrom = lineContents[0];
+    		    			String failReason = "";
+    		    			boolean filterFlag = false;
+
+    		    			if(chrom.equals("Chrom") || chrom.equals("chrom") || line.startsWith("#"))
+    		    			{
+        	    				if(line.startsWith("#"))
+        	    					isVCF = true;
+
+    		    				// Print header //
+    		    				if(isVCF)
+    		    				{
+	    	    					if(params.containsKey("output-file"))
+	    	    					{
+	    	    						if(line.startsWith("#CHROM"))
+	    	    							outFile.println(vcfHeaderInfo);
+
+	    	    						outFile.println(line);
+	    	    					}
+
+	    	    					if(params.containsKey("filtered-file"))
+	    	    					{
+	    	    						if(line.startsWith("#CHROM"))
+	    	    							filteredFile.println(vcfHeaderInfo);
+
+	    	    						filteredFile.println(line);
+	    	    					}
+    		    				}
+    		    				else
+    		    				{
+    		    					String filterHeader = "ref_reads\tvar_reads\tref_strand\tvar_strand\tref_basequal\tvar_basequal\tref_readpos\tvar_readpos\tref_dist3\tvar_dist3\tref_mapqual\tvar_mapqual\tmapqual_diff\tref_mmqs\tvar_mmqs\tmmqs_diff\tfilter_status";
+	    	    					if(params.containsKey("output-file"))
+	    	    						outFile.println(line + "\t" + filterHeader);
+	    	    					if(params.containsKey("filtered-file"))
+	    	    						filteredFile.println(line + "\t" + filterHeader);
+    		    				}
+
+    		    			}
+    		    			else
+    		    			{
+    		    				stats.put("numVariants", (stats.get("numVariants") + 1));
+    		    				int position = Integer.parseInt(lineContents[1]);
+    			    			String positionKey = chrom + "\t" + position;
+    			    			boolean isIndel = false;
+
+    			    			// Reference-allele values //
+    							int refReads = 0;
+    							double refMapQual = 0;
+    							double refBaseQual = 0;
+    							int refReadsPlus = 0;
+    							int refReadsMinus = 0;
+    							double refPos = 0;
+    							double refSubs = 0;
+    							double refMMQS = 0;
+    							double refRL = 0;
+    							double refDist3 = 0;
+
+    							// Variant-allele values //
+    							int varReads = 0;
+    							double varMapQual = 0;
+    							double varBaseQual = 0;
+    							int varReadsPlus = 0;
+    							int varReadsMinus = 0;
+    							double varPos = 0;
+    							double varSubs = 0;
+    							double varMMQS = 0;
+    							double varRL = 0;
+    							double varDist3 = 0;
+
+    							// Calculated values //
+    							double varFreq = 0;
+    							double varStrandedness = -1;
+    							double refStrandedness = -1;
+    							double mmqsDiff = 0;
+    							double mapQualDiff = 0;
+    							double avgReadLenDiff = 0;
+
+
+//    			    			if(readcounts.containsKey(positionKey))
+  //  			    			{
+			    				try {
+    			    				String refCounts = "";
+    			    				String varCounts = "";
+    			    				String ref = "";
+    			    				String alt = "";
+
+        	    					if(isVCF)
+        	    					{
+        	    						// Filter only applied to the first alt //
+        	    						ref = lineContents[3];
+        	    						String alts = lineContents[4];
+
+        	    						String[] altContents = alts.split(",");
+        	    						alt = altContents[0];
+        	    						// Check for and convert indel //
+        	    						if(ref.length() > 1)
+        	    						{
+        	    							isIndel = true;
+        	    							// Deletion //
+        	    							String thisVar = ref.replaceFirst(alt, "");
+        	    							ref = alt;
+        	    							alt = "-" + thisVar;
+        	    						}
+        	    						else if(alt.length() > 1)
+        	    						{
+        	    							// Insertion //
+        	    							isIndel = true;
+        	    							String thisVar = alt.replaceFirst(ref, "");
+        	    							alt = "+" + thisVar;
+        	    						}
+        	    					}
+        	    					else
+        	    					{
+        	    						ref = lineContents[2];
+        	    						String cns = lineContents[3];
+        	    						if(cns.length() > 1)
+        	    						{
+        	    							isIndel = true;
+        	    							// CONVERT INDEL //
+        	    							if(cns.contains("/"))
+        	    							{
+            	    							String[] indelContents = cns.split("/");
+            	    							if(indelContents.length > 1)
+            	    								alt = indelContents[1];
+        	    							}
+        	    							else
+        	    							{
+        	    								alt = cns;
+        	    							}
+
+        	    						}
+        	    						else
+        	    						{
+        	    							// CONVERT SNV //
+            	    						alt = VarScan.getVarAllele(ref, cns);
+
+        	    						}
+
+        	    					}
+
+        	    					if(alt.length() > 0)
+        	    					{
+        	    						// Set a SNV style variant key as the default //
+        	    						String varKey = chrom + "\t" + position + "\t" + alt;
+
+        	    						// Determine if we have an indel. If so, muddle around until we find the right varKey //
+        	    						if(ref.length() > 1 || alt.length() > 1)
+        	    						{
+        	    							isIndel = true;
+        	    							int indelSize = 0;
+        	    							if(ref.length() > 1)
+        	    								indelSize = ref.length() - 1;
+        	    							else
+        	    								indelSize = alt.length() - 1;
+
+        	    							if(readcounts.containsKey(varKey))
+        	    							{
+        	    								// Keep this perfect match //
+
+        	    							}
+        	    							else
+        	    							{
+        	    								// Shimmy back and forth out to size of indel to look for this //
+            	    							for(int windowSize = indelSize + 1; windowSize >= 1; windowSize--)
+            	    							{
+                	    							String testKey1 = chrom + "\t" + (position - windowSize) + "\t" + alt;
+                	    							String testKey2 = chrom + "\t" + (position + windowSize) + "\t" + alt;
+
+                	    							if(readcounts.containsKey(testKey1))
+                	    							{
+                	    								varKey = testKey1;
+                	    							}
+                	    							else if(readcounts.containsKey(testKey2))
+                	    							{
+                	    								varKey = testKey2;
+                	    							}
+
+            	    							}
+        	    							}
+
+
+        	    						}
+
+
+        	    						if(readcounts.containsKey(varKey))
+        	    						{
+        	    							stats.put("numWithRC", (stats.get("numWithRC") + 1));
+
+        	    							// Try to split out values and proceed with filter comparison //
+        	    							try {
+        	    								String[] rcContents = readcounts.get(varKey).split("\t");
+        	    								int readDepth = Integer.parseInt(rcContents[0]);
+        	    								varCounts = rcContents[1];
+            	    							String[] varContents = varCounts.split(":");
+
+            	    							// Parse out variant allele values //
+
+            	    							varReads = Integer.parseInt(varContents[1]);
+            	    							varMapQual = Double.parseDouble(varContents[2]);
+            	    							varBaseQual = Double.parseDouble(varContents[3]);
+            	    							varReadsPlus = Integer.parseInt(varContents[5]);
+            	    							varReadsMinus = Integer.parseInt(varContents[6]);
+            	    							varPos = Double.parseDouble(varContents[7]);
+            	    							varSubs = Double.parseDouble(varContents[8]);
+            	    							varMMQS = Double.parseDouble(varContents[9]);
+            	    							varRL = Double.parseDouble(varContents[12]);
+            	    							varDist3 = Double.parseDouble(varContents[13]);
+
+            	    							String refKey = chrom + "\t" + position + "\t" + ref;
+            	    							if(readcounts.containsKey(refKey))
+            	    							{
+            	    								String[] rcContents2 = readcounts.get(refKey).split("\t");
+            	    								int readDepth2 = Integer.parseInt(rcContents2[0]);
+
+            	    								refCounts = rcContents2[1];
+                	    							// Parse out reference allele values //
+                	    							String[] refContents = refCounts.split(":");
+
+                	    							refReads = Integer.parseInt(refContents[1]);
+                	    							refMapQual = Double.parseDouble(refContents[2]);
+                	    							refBaseQual = Double.parseDouble(refContents[3]);
+                	    							refReadsPlus = Integer.parseInt(refContents[5]);
+                	    							refReadsMinus = Integer.parseInt(refContents[6]);
+                	    							refPos = Double.parseDouble(refContents[7]);
+                	    							refSubs = Double.parseDouble(refContents[8]);
+                	    							refMMQS = Double.parseDouble(refContents[9]);
+                	    							refRL = Double.parseDouble(refContents[12]);
+                	    							refDist3 = Double.parseDouble(refContents[13]);
+            	    							}
+
+            	    							// If variant fails any criteria we have at this point, fail it //
+
+            	    							if(varReads < minVarCount)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "VarCount";
+            	    								stats.put("numFailVarCount", (stats.get("numFailVarCount") + 1));
+            	    							}
+
+            	    							// Compute variant allele frequency //
+
+            	    							varFreq = (double) varReads / (double) readDepth;
+
+            	    							if(varFreq < minVarFreq)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "VarFreq";
+            	    								stats.put("numFailVarFreq", (stats.get("numFailVarFreq") + 1));
+            	    							}
+
+            	    							// Variant read position //
+            	    							if(varPos < minVarReadPos)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "VarReadPos";
+            	    								stats.put("numFailVarReadPos", (stats.get("numFailVarReadPos") + 1));
+            	    							}
+
+            	    							if(varDist3 < minVarDist3)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "VarDist3";
+            	    								stats.put("numFailVarDist3", (stats.get("numFailVarDist3") + 1));
+            	    							}
+
+
+            	    							// Look at variant MMQS and MMQS diff //
+
+            	    							if(varMMQS > maxVarMMQS)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "VarMMQS";
+            	    								stats.put("numFailVarMMQS", (stats.get("numFailVarMMQS") + 1));
+            	    							}
+
+            	    							// Apply minimum average mapqual checks //
+
+            	    							if(refReads > 0 && refMapQual < minRefMapQual)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "RefMapQual";
+            	    								stats.put("numFailRefMapQual", (stats.get("numFailRefMapQual") + 1));
+
+            	    							}
+
+            	    							if(varReads > 0 && varMapQual < minVarMapQual)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "VarMapQual";
+            	    								stats.put("numFailVarMapQual", (stats.get("numFailVarMapQual") + 1));
+            	    							}
+
+            	    							// Apply minimum average basequal checks //
+
+            	    							if(refReads > 0 && refBaseQual < minRefBaseQual)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "RefBaseQual";
+            	    								stats.put("numFailRefBaseQual", (stats.get("numFailRefBaseQual") + 1));
+
+            	    							}
+
+            	    							if(!isIndel && varReads > 0 && varBaseQual < minVarBaseQual)
+            	    							{
+            	    								if(failReason.length() > 0)
+            	    									failReason += ",";
+            	    								failReason += "VarBaseQual";
+            	    								stats.put("numFailVarBaseQual", (stats.get("numFailVarBaseQual") + 1));
+            	    							}
+
+            	    							// IF we have enough reads supporting the variant, check strand //
+
+            	    							if(refReads >= minStrandReads && refReads > 0)
+            	    							{
+            	    								refStrandedness = (double) refReadsPlus / (double) refReads;
+            	    							}
+
+            	    							if(varReads >= minStrandReads && varReads > 0)
+            	    							{
+            	    								varStrandedness = (double) varReadsPlus / (double) varReads;
+
+        	    									if(varStrandedness < minStrandedness || (1 - varStrandedness) < minStrandedness)
+        	    									{
+                	    								if(failReason.length() > 0)
+                	    									failReason += ",";
+                	    								failReason += "Strand";
+                	    								stats.put("numFailStrand", (stats.get("numFailStrand") + 1));
+//                    	    								System.err.println(positionKey + "\t" + refReadsPlus + "\t" + refReadsMinus + "\t" + refStrandedness + "\t" + varReadsPlus + "\t" + varReadsMinus + "\t" + varStrandedness);
+        	    									}
+
+            	    							}
+
+            	    							if(refReads >= 2 && varReads >= 2)
+            	    							{
+            	    								stats.put("numWithReads1", (stats.get("numWithReads1") + 1));
+            	    								// Only make difference comparisons if we had reference reads //
+
+                	    							mmqsDiff = varMMQS - refMMQS;
+
+                	    							if(mmqsDiff > maxMMQSdiff)
+                	    							{
+                	    								if(failReason.length() > 0)
+                	    									failReason += ",";
+                	    								failReason += "MMQSdiff";
+                	    								stats.put("numFailMMQSdiff", (stats.get("numFailMMQSdiff") + 1));
+                	    							}
+
+                	    							// Compare average mapping qualities //
+
+                	    							mapQualDiff = refMapQual - varMapQual;
+
+//                    	    							System.err.println(refReads + "/" + readDepth + "\t" + refCounts + "\tMapqual diff: " + refMapQual + " - " + varMapQual + " = " + mapQualDiff);
+
+                	    							if(mapQualDiff > maxMapQualDiff)
+                	    							{
+                	    								if(failReason.length() > 0)
+                	    									failReason += ",";
+                	    								failReason += "MapQualDiff";
+                	    								stats.put("numFailMapQualDiff", (stats.get("numFailMapQualDiff") + 1));
+                	    							}
+
+                	    							avgReadLenDiff = (refRL - varRL) / refRL;
+
+                	    							if(avgReadLenDiff > maxReadLenDiff)
+                	    							{
+                	    								if(failReason.length() > 0)
+                	    									failReason += ",";
+                	    								failReason += "ReadLenDiff";
+                	    								stats.put("numFailReadLenDiff", (stats.get("numFailReadLenDiff") + 1));
+                	    							}
+
+            	    							}
+
+
+            	    							// Check to see if it failed any filters. If so, mark it for failure //
+
+            	    							if(failReason.length() > 0)
+            	    								filterFlag = true;
+
+
+        	    							}
+        	    							catch (Exception e)
+        	    							{
+        	    								System.err.println("Exception thrown while processing readcounts at " + positionKey + ": " + e.getLocalizedMessage());
+        	    								e.printStackTrace(System.err);
+        	    								return;
+        	    							}
+
+        	    						}
+        	    						else
+        	    						{
+        	    							// Unable to get readcounts for ref or var allele //
+        	    							failReason = "NoReadCounts";
+        	    							stats.put("numFailNoRC", (stats.get("numFailNoRC") + 1));
+        	    							filterFlag = true;
+        	    						}
+
+
+        	    					}	// End of else for non-VCF output
+
+			    				}
+			    				catch(Exception e)
+			    				{
+			    					System.err.println("Exception thrown while filtering at " + positionKey + ": " + e.getLocalizedMessage());
+			    					e.printStackTrace(System.err);
+			    					return;
+			    				}
+
+
+//    			    			}
+
+    			    			// Prepare filter output line //
+
+    			    			String filterColumns = "";
+    			    			filterColumns = refReads + "\t" + varReads + "\t" + threeDigits.format(refStrandedness) + "\t" + threeDigits.format(varStrandedness);
+    			    			filterColumns += "\t" + refBaseQual + "\t" + varBaseQual + "\t" + refPos + "\t" + varPos;
+    			    			filterColumns += "\t" + refDist3 + "\t" + varDist3 + "\t" + refMapQual + "\t" + varMapQual;
+    			    			filterColumns += "\t" + twoDigits.format(mapQualDiff) + "\t" + refMMQS + "\t" + varMMQS + "\t" + twoDigits.format(mmqsDiff);
+
+    			    			if(filterFlag)
+    							{
+    			    				// VARIANT FAILED THE FILTER //
+    								filterColumns += "\t" + failReason;
+    								stats.put("numFailFilter", (stats.get("numFailFilter") + 1));
+    							}
+    			    			else
+    			    			{
+    			    				// VARIANT PASSED THE FILTER
+    			    				filterColumns += "\t" + "PASS";
+    			    				stats.put("numPassFilter", (stats.get("numPassFilter") + 1));
+    			    			}
+
+    			    			// Prepare new VCF line //
+
+    			    			if(isVCF)
+    			    			{
+    			    				String newVCFline = "";
+
+    			    				for(int colCounter = 0; colCounter < lineContents.length; colCounter++)
+    			    				{
+    			    					if(newVCFline.length() > 0)
+    			    						newVCFline += "\t";
+
+    			    					if(filterFlag && colCounter == 6)
+   			    							newVCFline += failReason;
+    			    					else
+    			    						newVCFline += lineContents[colCounter];
+    			    				}
+    			    				// Print the output line if it passed, or if the user specified to keep failures //
+
+    			    				if(params.containsKey("output-file") && (!filterFlag || params.containsKey("keep-failures")))
+    		    						outFile.println(newVCFline);
+
+    			    				if(filterFlag && params.containsKey("filtered-file"))
+    		    						filteredFile.println(newVCFline);
+    			    			}
+    			    			else
+    			    			{
+    			    				// Print the output line if it passed, or if the user specified to keep failures //
+    			    				if(params.containsKey("output-file") && (!filterFlag || params.containsKey("keep-failures")))
+    		    						outFile.println(line + "\t" + filterColumns);
+
+    			    				// If it failed and the user specified a failure file, print it //
+    			    				if(filterFlag && params.containsKey("filtered-file"))
+    		    						filteredFile.println(line + "\t" + filterColumns);
+
+    			    			}
+
+    		    			}	// End of else for non-header line //
+    	    			}
+    	    			catch(Exception e)
+    	    			{
+    		    				System.err.println("Parsing Exception on line:\n" + line + "\n" + e.getLocalizedMessage());
+    		    				e.printStackTrace(System.err);
+    		    				return;
+    	    			}
+
+    	    		}
+
+    	    		// Report summary of results //
+
+    	    		System.err.println(stats.get("numVariants") + " variants in input file");
+    	    		System.err.println(stats.get("numWithRC") + " had a bam-readcount result");
+    	    		System.err.println(stats.get("numWithReads1") + " had reads1>=2");
+    	    		System.err.println(stats.get("numPassFilter") + " passed filters");
+    	    		System.err.println(stats.get("numFailFilter") + " failed filters");
+    	    		System.err.println("\t" + stats.get("numFailNoRC") + " failed because no readcounts were returned");
+    	    		System.err.println("\t" + stats.get("numFailVarCount") + " failed minimim variant count < " + minVarCount);
+    	    		System.err.println("\t" + stats.get("numFailVarFreq") + " failed minimum variant freq < " + minVarFreq);
+    	    		System.err.println("\t" + stats.get("numFailStrand") + " failed minimum strandedness < " + minStrandedness);
+    	    		System.err.println("\t" + stats.get("numFailVarReadPos") + " failed minimum variant readpos < " + minVarReadPos);
+    	    		System.err.println("\t" + stats.get("numFailVarDist3") + " failed minimum variant dist3 < " + minVarDist3);
+    	    		System.err.println("\t" + stats.get("numFailVarMMQS") + " failed maximum variant MMQS > " + maxVarMMQS);
+    	    		System.err.println("\t" + stats.get("numFailMMQSdiff") + " failed maximum MMQS diff (var - ref) > " + maxMMQSdiff);
+    	    		System.err.println("\t" + stats.get("numFailMapQualDiff") + " failed maximum mapqual diff (ref - var) > " + maxMapQualDiff);
+    	    		System.err.println("\t" + stats.get("numFailRefMapQual") + " failed minimim ref mapqual < " + minRefMapQual);
+    	    		System.err.println("\t" + stats.get("numFailVarMapQual") + " failed minimim var mapqual < " + minVarMapQual);
+    	    		System.err.println("\t" + stats.get("numFailRefBaseQual") + " failed minimim ref basequal < " + minRefBaseQual);
+    	    		System.err.println("\t" + stats.get("numFailVarBaseQual") + " failed minimim var basequal < " + minVarBaseQual);
+    	    		System.err.println("\t" + stats.get("numFailReadLenDiff") + " failed maximum RL diff (ref - var) > " + maxReadLenDiff);
+
+    	    		in.close();
+
+    			}
+    			else
+    			{
+    				System.err.println("Unable to open SNVs file for reading");
+    			}
+
+    			in.close();
+    		}
+    		else
+    		{
+    			// Infile doesn't exist //
+    		}
+	    }
+	    catch(Exception e)
+	    {
+	    	System.err.println("Error Parsing SNV File: " + e.getMessage() + "\n" + e.getLocalizedMessage());
+	    	e.printStackTrace(System.err);
+	    }
+
+
+	}
+
+
+	/**
+	 * Loads snvs to be filtered
+	 *
+	 * @param	filename	Path to file of snvs
+	 * @return	snvs		HashMap of SNV positions (chrom\tposition\alt)
+	 */
+	static HashMap<String, String> loadReadcounts(String filename)
+	{
+    	HashMap<String, String> readcounts = new HashMap<String, String>();
+
+	    try
+	    {
+	    	// Declare file-parsing variables //
+
+	    	String line;
+
+    		File infile = new File(filename);
+    		if(infile.exists())
+    		{
+    			BufferedReader in = new BufferedReader(new FileReader(infile));
+
+    			if(in.ready())
+    			{
+    				while ((line = in.readLine()) != null)
+    				{
+    					String[] lineContents = line.split("\t");
+						try {
+							String chrom = lineContents[0];
+	    					String position = lineContents[1];
+//	    					String snvKey = chrom + "\t" + position;
+
+	    					// Start resultLine as depth //
+
+
+	    					for(int colCounter = 4; colCounter < lineContents.length; colCounter++)
+	    					{
+	    						String[] alleleContents = lineContents[colCounter].split(":");
+	    						String thisAllele = alleleContents[0];
+	    						int thisReads = Integer.parseInt(alleleContents[1]);
+
+	    						if(thisAllele.equals("N") || thisAllele.equals("="))
+	    						{
+	    							// Skip non-informative alleles //
+	    						}
+	    						else if(thisReads == 0)
+	    						{
+	    							// Skip empty results //
+	    						}
+	    						else
+	    						{
+		    						String rcLine = lineContents[3];
+		    						rcLine = rcLine + "\t" + lineContents[colCounter];
+		    						// Save the readcount result line //
+		    						String snvKey = chrom + "\t" + position + "\t" + thisAllele;
+			    					readcounts.put(snvKey, rcLine);
+	    						}
+
+
+	    					}
+
+
+						}
+						catch (Exception e)
+						{
+							// If we encounter an issue, print a warning //
+							System.err.println("Warning: Exception thrown while loading bam-readcount: " + e.getMessage());
+							System.err.println("Attempting to continue, but please double-check file format and completeness");
+						}
+
+
+    				}
+    			}
+    			else
+    			{
+    				System.err.println("Unable to open SNVs file for reading");
+    			}
+
+    			in.close();
+    		}
+	    }
+	    catch(Exception e)
+	    {
+	    	System.err.println("Error Parsing SNV File: " + e.getLocalizedMessage());
+	    }
+
+	    return(readcounts);
+	}
+
+
+}
\ No newline at end of file
diff --git a/net/sf/varscan/LimitVariants.java b/sf/varscan/LimitVariants.java
similarity index 100%
rename from net/sf/varscan/LimitVariants.java
rename to sf/varscan/LimitVariants.java
diff --git a/net/sf/varscan/ProcessSomatic.java b/sf/varscan/ProcessSomatic.java
similarity index 100%
rename from net/sf/varscan/ProcessSomatic.java
rename to sf/varscan/ProcessSomatic.java
diff --git a/net/sf/varscan/ReadCounts.java b/sf/varscan/ReadCounts.java
similarity index 100%
rename from net/sf/varscan/ReadCounts.java
rename to sf/varscan/ReadCounts.java
diff --git a/net/sf/varscan/Somatic.java b/sf/varscan/Somatic.java
similarity index 100%
rename from net/sf/varscan/Somatic.java
rename to sf/varscan/Somatic.java
diff --git a/net/sf/varscan/Trio.java b/sf/varscan/Trio.java
similarity index 100%
rename from net/sf/varscan/Trio.java
rename to sf/varscan/Trio.java
diff --git a/net/sf/varscan/VarScan.java b/sf/varscan/VarScan.java
similarity index 98%
rename from net/sf/varscan/VarScan.java
rename to sf/varscan/VarScan.java
index 3c509d5..0bc9f38 100644
--- a/net/sf/varscan/VarScan.java
+++ b/sf/varscan/VarScan.java
@@ -65,6 +65,11 @@ import java.text.*;
  * 			Input:	VarScan output for SNPs or Indels (varscan.output.snp)
  * 			Output: Variants passing all filters (varscan.output.snp.filter)
  *
+ * fpFilter [variant-file] [readcount-file] OPTIONS
+ * 			Apply the false-positive filter to VarScan variant calls
+ * 			Input:	VarScan output for SNPs or Indels (varscan.output.snp) and bam-readcount output
+ * 			Output: Variants passing all filters (varscan.output.snp.fpfilter)
+ *
  * processSomatic [somatic-status file] OPTIONS
  * 			Process VarScan output by somatic status and confidence
  * 			Input:	VarScan output for SNPs or Indels (varscan.output.snp)
@@ -121,6 +126,7 @@ public class VarScan {
 
 				"\tfilter\t\t\tFilter SNPs by coverage, frequency, p-value, etc.\n" +
 				"\tsomaticFilter\t\tFilter somatic variants for clusters/indels\n" +
+				"\tfpfilter\t\tApply the false-positive filter\n\n" +
 				"\tprocessSomatic\t\tIsolate Germline/LOH/Somatic calls from output\n" +
 				"\tcopyCaller\t\tGC-adjust and process copy number changes from VarScan copynumber output\n" +
 
@@ -163,6 +169,11 @@ public class VarScan {
 				somaticFilter(args, params);
 			}
 
+			else if(args[0].equals("fpfilter"))
+			{
+				fpfilter(args, params);
+			}
+
 			else if(args[0].equals("processSomatic"))
 			{
 				processSomatic(args, params);
@@ -330,6 +341,17 @@ public class VarScan {
 		FilterVariants myFilter = new FilterVariants(args);
 	}
 
+
+	/**
+	 * Applies false positive filter using bam-readcount information
+	 *
+	 * @param	args	Command-line arguments
+	 */
+	public static void fpfilter(String[] args, HashMap<String, String> params)
+	{
+		FpFilter myFilter = new FpFilter(args);
+	}
+
 	/**
 	 * Filters variants by coverage, significance, frequency, etc.
 	 *

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/varscan.git



More information about the debian-med-commit mailing list