[med-svn] [qcumber] 14/15: New upstream version 1.0.0+dfsg
Andreas Tille
tille at debian.org
Wed Nov 23 08:21:09 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository qcumber.
commit 1df4686ff044b06133621fbf15cf67843e849458
Author: Andreas Tille <tille at debian.org>
Date: Wed Nov 23 09:19:08 2016 +0100
New upstream version 1.0.0+dfsg
---
.gitignore | 5 +
QCumber.py | 508 +++++++++++++++++++++++++++++++++++++++++++++++++++
barplot.R | 28 +++
batch_report.html | 530 +++++++++++++++++++++++++++++++++++++-----------------
boxplot.R | 23 +++
classes.py | 440 ++++++++++++++++++++-------------------------
combineFastQC.py | 431 --------------------------------------------
config.txt | 8 +-
helper.py | 240 +++++++++++++++++++++++--
lib/box.js | 320 +++++++++++++++++++++++++++++++++
lib/qcstyle.css | 84 +++++++++
parameter.txt | 9 +-
readme.md | 119 ++++++++++++
readme.txt | 111 ------------
runQCPipeline.py | 484 -------------------------------------------------
sav.R | 150 ++++++++++++++++
16 files changed, 2027 insertions(+), 1463 deletions(-)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4e330dc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.Rhistory
+.idea
+.svn
+__pycache__
+config.txt
diff --git a/QCumber.py b/QCumber.py
new file mode 100755
index 0000000..c21b0eb
--- /dev/null
+++ b/QCumber.py
@@ -0,0 +1,508 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+__author__ = 'LieuV'
+__version__ = "1.0.0"
+
+from classes import *
+from helper import *
+import os
+from itertools import groupby
+from jinja2 import Environment,FileSystemLoader
+import shutil
+import configparser
+from collections import OrderedDict
+import json
+
+#dependencies
+try:
+ import argparse
+except ImportError:
+ print('The "argparse" module is not available. Use Python >3.2.')
+ sys.exit(1)
+
+
+parameter = configparser.ConfigParser()
+parameter.read(join(dirname(__file__), "parameter.txt"))
+
+file_extensions = parameter["Fileextension"]["fastq"]
+r1_pattern = parameter["Pattern"]["R1"]
+r2_pattern = parameter["Pattern"]["R2"]
+lane_pattern = parameter["Pattern"]["lane"]
+
+def get_illumina_reads( output, tmp):
+ ## Add readsets to sample
+ readsets = []
+ # split by lanes
+ if isinstance(arguments["r1"], list):
+ for lane in sorted(set([re.search(lane_pattern, x).group() for x in arguments["r1"]])):
+ # concat same files
+ r1_reads = [x for x in arguments["r1"] if lane in x]
+ readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
+ if len(arguments["r1"]) != 1:
+ r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads] )
+ else:
+ r1 = FastQFile(r1_reads[0])
+ if arguments["r2"]:
+ r2_reads = [x for x in arguments["r2"] if lane in x]
+
+ if len(r2_reads) != 1:
+ r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
+ else:
+ r2 = FastQFile(r2_reads[0])
+ #return [ReadSet( r1, r2)]
+ readsets.append(ReadSet(r1,r2))
+ else:
+ readsets.append(ReadSet(r1))
+ else:
+ r1 = FastQFile(arguments["r1"])
+ if arguments["r2"]:
+ r2 = FastQFile(arguments["r2"])
+ return [ReadSet(r1,r2)]
+ else:
+ return [ReadSet(r1)]
+
+
+ return readsets
+
+def run_analysis (readset, arguments, output, tmp):
+ print("Run FastQC...")
+ readset.run_FastQC(join(output, "FastQC"))
+ if arguments["trimBetter"]:
+ trimming_perc = arguments["trimBetter_threshold"]
+ else:
+ trimming_perc = ""
+ readset.run_Trimmomatic(arguments["adapter"], arguments["palindrome"],arguments["minlen"], arguments["trimOption"], trimming_perc)
+ readset.trimRes = readset.trimRes.run_FastQC( tmp)
+ return readset
+
+
+def check_input(arguments):
+ if arguments["r1"]:
+ if arguments["r2"]:
+ return [arguments["r1"], arguments["r2"]]
+ else:
+ return [arguments["r1"]]
+
+ if arguments["technology"]=="Illumina":
+ if os.path.isdir(arguments["input"]):
+ files = os.listdir(arguments["input"])
+ files = [os.path.join(arguments["input"], x) for x in files]
+ files = [x for x in files if os.path.isfile(x) & any([ext in x for ext in file_extensions])]
+ if len(files)==0:
+ sys.exit(arguments["input"] + " does not contain fastq files.")
+ return files
+ else:
+ return [arguments["input"]]
+ elif arguments["technology"]=="IonTorrent":
+
+ if os.path.isfile(arguments["input"]):
+
+ if arguments["input"].endswith(".bam"):
+ return arguments["input"]
+ else:
+ sys.exit("IonTorrent PGM input has to be a bam-file. Otherwise use -b. Use --help for more information.")
+ else:
+ sys.exit("Incorrect file input")
+ #elif arguments["technology"] == "Illumina-MiSeq":
+ # if os.path.isfile(arguments["input"]):
+ # if any([arguments["input"].endswith(x) for x in file_extensions]):
+ # return arguments["input"]
+ # else:
+ # sys.exit("Not supported file extension.")
+ # else:
+ # sys.exit("Illumina MiSeq input has to be a fastq-file. Otherwise use -b for a batch run. Use --help for more information.")
+ else:
+ sys.exit("Incorrect file input")
+ return None
+
+def getSetname():
+ try:
+ if arguments["technology"] == "Illumina":
+
+ if isinstance(arguments["r1"], list):
+ return "_".join(basename(arguments["r1"][0]).split("_")[:-3])
+ else:
+ return getFilenameWithoutExtension(arguments["r1"], True)
+ else:
+ return getFilenameWithoutExtension(arguments["r1"], getBase=True)
+ except:
+ return getFilenameWithoutExtension(arguments["r1"], getBase=True)
+
+def fillClasses(temp_bowtie_path = "", tmp=''):
+ try:
+ output = getDir([arguments["output"], "QCResults"], True)
+
+ sample = Sample(getSetname(), output, arguments["reference"],arguments["threads"], arguments["technology"])
+ if arguments["technology"].startswith("Illumina"):
+ readsets = get_illumina_reads( output, tmp)
+ elif arguments["technology"] == "IonTorrent":
+ print("IonTorrent", arguments["r1"])
+ fastqfile = bam_to_fastq(arguments["r1"],tmp)
+ readsets = [ReadSet( fastqfile)]
+
+ #os.makedirs(tempdir, exist_ok=True)
+ for rs in readsets:
+ readset = run_analysis(rs, arguments, output, tmp)
+ sample = sample.add_readSet(readset)
+ if not arguments["nomapping"]:
+ if arguments["save_mapping"]:
+ sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"))
+ else:
+ sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
+ if not arguments["nokraken"]:
+ print("Run Kraken.")
+ sample = sample.run_Kraken(arguments["db"])
+
+ sample.stop = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
+ finally:
+ shutil.rmtree(tmp)
+ return sample
+
+def writeReport(sample, arguments):
+ report_name = os.path.join(sample.mainResultsPath, "Report",
+ "summary_" + sample.name + "_" + datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
+ print("Writing latex " ,report_name)
+ env = Environment()
+ env.loader = FileSystemLoader(os.path.dirname(__file__))
+
+ template = env.get_template("report.tex")
+ pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"], arguments["trimOption"]))
+
+ latex = open(report_name + ".tex", "w")
+ latex.write(pdf_latex)
+ latex.close()
+
+ process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+ for line in iter(process.stdout.readline, b''):
+ pass #print(line)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+
+ process.communicate()
+
+ for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
+ try:
+ os.remove(report_name + ext)
+ except OSError:
+ pass
+
+def main(arguments):
+
+ tmp = join(arguments["output"],"QCResults" ,"qc_tmp")
+ makedirs(tmp, exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "FastQC"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Trimmed"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Trimmed", "FastQC"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Report"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Report", "src"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Report", "src", "img"), exist_ok=True)
+
+ # create new boxplot csv files
+ for csv in ["Per_sequence_quality_scores.csv", "Per_sequence_GC_content.csv", "Sequence_Length_Distribution.csv"]:
+ new_csv = open(join(arguments["output"], "QCResults", "Report", "src", csv), "w")
+ new_csv.write("Sample,Read,Lane,Trimmed,Value,Count\n")
+ new_csv.close()
+ ##
+ # SAV
+ try:
+ write_SAV(arguments["sav"], join(arguments["output"], "QCResults", "Report", "src"))
+ except:
+ print("Couldnt write SAV.")
+ ##
+
+ if not os.path.exists(join(arguments["output"], "QCResults", "Report", "lib" )):
+ shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], "QCResults", "Report", "lib" ))
+ if arguments["technology"].startswith("Illumina"):
+ technology = "Illumina"
+ else:
+ technology = "IonTorrent"
+
+ if arguments["forAssembly"]:
+ if not arguments["trimBetter_threshold"]:
+ arguments["trimBetter_threshold"] = parameter["forAssembly." + technology]["trimBetter_threshold"]
+ if not arguments["trimOption"]:
+ arguments["trimOption"] = parameter["forAssembly." + technology]["trimOption"]
+
+ elif arguments["forMapping"]:
+ if not arguments["trimBetter_threshold"]:
+ arguments["trimBetter_threshold"] = parameter["forMapping." + technology]["trimBetter_threshold"]
+ if not arguments["trimOption"]:
+ arguments["trimOption"] = parameter["forMapping." + technology]["trimOption"]
+
+ if not arguments["trimOption"]:
+ arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
+ if not arguments["trimBetter_threshold"]:
+ arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
+
+ # check input
+ pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
+ if(arguments["trimOption"]!=""):
+ if not ((pattern.group("option") =="SLIDINGWINDOW") or (pattern.group("option") =="MAXINFO")):
+ sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
+ try:
+ int(pattern.group("value1"))
+ int(pattern.group("value2"))
+ except:
+ sys.exit("Check trimOptions.")
+
+ if arguments['technology'] == "Illumina":
+ if not arguments['adapter']:
+ sys.exit("Illumina projects requires an adapter file")
+ if arguments["nomapping"]:
+ #print("no mapping")
+ arguments["reference"] = ""
+ else:
+ if not arguments["reference"]:
+ sys.exit("Mapping needs a reference.")
+ else:
+ if not os.path.exists(arguments["reference"]):
+ sys.exit("Reference does not exist.")
+
+ if not arguments["nokraken"]:
+ if not os.path.exists(arguments["db"]):
+ sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
+ else:
+ if "database.kdb" not in os.listdir(arguments["db"]):
+ sys.exit("database "+arguments["db"] +" does not contain necessary file database.kdb")
+ if not arguments["input"]:
+ if arguments["r1"]:
+ print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
+ if not os.path.exists(arguments["r1"]):
+ sys.exit(arguments["r1"] + " does not exist.")
+ #if arguments["technology"]=="IonTorrent":
+ # arguments["input"] = arguments["r1"]
+ # arguments["r1"] = ""
+ else:
+ sys.exit("Input file required. Use option -input or -1 / -2")
+
+ if arguments["r2"]:
+ if not os.path.exists(arguments["r2"]):
+ sys.exit(arguments["r2"] + " does not exist.")
+
+ if arguments["index"]:
+ mapping_index_path = arguments["index"]
+ else:
+ mapping_index_path = join(tmp, "bowtie2_index")
+ os.makedirs(mapping_index_path, exist_ok=True)
+ # RUN
+ try:
+ batch_json = json.dump({"commandline":arguments, "summary":[],"kraken":{}, "versions":Pipeline().__dict__}, open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
+ jsonfile_dict = []
+ kraken_reports = OrderedDict()
+ #boxplot_data = []
+ i = 1
+ project = []
+ tmp_overview = open(join(tmp, "overview.csv"), "w")
+ tmp_overview.write("Sample,Trimmed,Mapped,Classified\n")
+ myFiles = []
+
+ #filter samples and put it into for loop
+ # Get folder and filter by last three pattern splitted by "_"
+ if arguments["input"]:
+ input = [ join(arguments["input"], x) for x in sorted(os.listdir(arguments["input"]))]
+ #### check input format ####
+ # Case 1: Folder contains fastq files
+ if len([x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ])>0:
+ input= [x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ]
+ if arguments["technology"].startswith("Illumina"):
+ for setname, files in groupby(input, key=lambda x: "_".join(x.split("_")[:-3])):
+ #for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-2])):
+ temp_file = sorted(list(files))
+ r1_reads = [x for x in temp_file if r1_pattern in x]
+ r2_reads = [x for x in temp_file if r2_pattern in x]
+ myFiles.append([r1_reads, r2_reads])
+ else:
+ # IonTorrent
+ #for setname, files in groupby(input, key=lambda x: "_".join(x.split("_")[:-2])):
+ # temp_file = sorted(list(files))
+ # TODO Changed
+ myFiles = input
+ #r1_reads = [x for x in input if r1_pattern in x]
+ #r2_reads = [x for x in input if r2_pattern in x]
+ #myFiles.append([r1_reads, r2_reads])
+
+ # Case 2: Folder contains subfolder for each sample
+ else:
+ input = [x for x in input if os.path.isdir(x)]
+
+ if len(input) > 0:
+ for sample in input:
+ files = sorted([join(sample,x) for x in listdir(sample) if any([x.endswith(ext) for ext in file_extensions])])
+ if arguments["technology"].startswith("Illumina"):# and len(files) > 1:
+ for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-3])):
+ temp_file = sorted(list(files))
+ r1_reads = [x for x in temp_file if r1_pattern in x]
+ r2_reads = [x for x in temp_file if r2_pattern in x]
+ myFiles.append([r1_reads, r2_reads])
+ else:
+ myFiles.append(sorted(list(files)))
+ files = None
+ if len(myFiles) == 0:
+ sys.exit("Enter a valid folder, which contain sample folders")
+ else:
+ if arguments["r1"]:
+ if not os.path.exists(arguments["r1"]):
+ sys.exit(arguments["r1"] + " does not exist.")
+
+ if arguments["r2"]:
+ myFiles.append([arguments["r1"], arguments["r2"]])
+ else:
+ myFiles.append([arguments["r1"]])
+
+ print("Found " + str(len(myFiles)) + " sample(s).")
+ for subset in myFiles:
+ print("Analyse " + str(i) + "/" + str(len(myFiles)))
+
+ sample_tmp = join(tmp, "Sample_" + str(i))
+ os.makedirs(sample_tmp, exist_ok=True)
+
+ if isinstance(subset, str):
+ arguments["r1"] = subset
+ else:
+ arguments["r1"] = subset[0]
+ if len(subset)>1:
+ arguments["r2"] = subset[1]
+
+ sample = fillClasses( mapping_index_path, sample_tmp )
+ if sample is not None:
+ project.append(sample)
+ try:
+ writeReport(sample, arguments)
+ except:
+ print("Couldnt write pdf.")
+ try:
+ batch_json = json.load(open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "r"), object_pairs_hook=OrderedDict)
+ if sample.mappingRes is None:
+ pMapping = 0
+ nMapping = 0
+ else:
+ pMapping = sample.mappingRes.percentAlignedReads
+ nMapping = sample.mappingRes.numberOfAlignedReads
+ if sample.krakenRes is None:
+ kraken = 0
+ kraken_reports ={}
+ else:
+ kraken = sample.krakenRes.pClassified
+ batch_json["kraken"][sample.name] = json.loads(str_to_json(sample.krakenRes.report))
+ #kraken_reports[sample.name] = json.loads(str_to_json(sample.krakenRes.report))
+ tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), str(nMapping), str(kraken)]) +"\n")
+ batch_json["summary"].append(
+ #jsonfile_dict.append(
+ OrderedDict([("setname", sample.name),
+ ("Map to reference [#]", nMapping),
+ ("Map to reference [%]", pMapping),
+ ("Reads after trimming [#]", sample.nTrimmed()),
+ ("Reads after trimming [%]", sample.pTrimmed()),
+ ("Classified (Kraken)", kraken),
+ ("Shorter fragments", sample.get_mean_trimRes()),
+ ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"],arguments["trimOption"])),
+ ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
+
+ json.dump(batch_json, open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
+
+ #boxplot_data.append(sample.get_all_qc_plots())
+ except:
+ print("Couldnt produce HTML report.")
+ sample = None
+ i+=1
+
+ try:
+ #json.dump({"summary": jsonfile_dict, "commandline":arguments, "kraken":kraken_reports},open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
+
+ # plot boxplots
+ boxplots = [{"file":"Per_sequence_quality_scores.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+ "title": "Per sequence quality scores",
+ "ylab": "Mean Sequence Quality (Phred Score)",
+ "xlab": "Sample" },
+ {"file": "Sequence_Length_Distribution.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+ "title": "Sequence Length Distribution",
+ "ylab": "Sequence Length (bp)",
+ "xlab": "Sample"},
+ {"file": "Per_sequence_GC_content.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+ "title": "Per sequence GC content",
+ "ylab": "Mean GC content (%)",
+ "xlab": "Sample"} ]
+ for plot in boxplots:
+ process = subprocess.Popen(" ".join(["Rscript --vanilla ",join(os.path.dirname(__file__) ,"boxplot.R"),
+ join(arguments["output"],"QCResults", "Report", "src", plot["file"]), plot["output"], '"'+ plot["title"]+'"', '"'+plot["xlab"]+'"','"'+ plot["ylab"]+'"']),
+ stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell =True)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+ process.communicate()
+ #plot barplots
+ tmp_overview.close()
+ process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"),join(tmp, "overview.csv"),
+ join(arguments["output"], "QCResults/Report/src/img")]),stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+ process.communicate()
+ shutil.copy(join(os.path.dirname(__file__), "batch_report.html"), join(arguments["output"], "QCResults", "Report", "batch_report.html"))
+
+ summary_latex_name = join(arguments["output"], "QCResults", "Report", "summary.latex")
+ with open(summary_latex_name, "w") as summary_latex:
+ summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
+ summary_latex.write(
+ "\\rowcolor{tableBlue} \\textbf{Sample} & \\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & \\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} & \\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
+
+ for i in range(0, len(jsonfile_dict)):
+ summary_latex.write(
+ str(batch_json["summary"][i]["setname"]) + " & " +
+ str(batch_json["summary"][i]["Map to reference [#]"]) + " & " +
+ str(batch_json["summary"][i]["Map to reference [%]"]) + " & " +
+ str(batch_json["summary"][i]["Reads after trimming [#]"]) + " & " +
+ str(batch_json["summary"][i]["Reads after trimming [%]"]) + " & " +
+ str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
+ str(batch_json["summary"][i]["Shorter fragments"]) + "\\\\\n"
+ )
+ summary_latex.write("\\end{tabular}\n")
+ summary_latex.write("\\caption{Statistics on reads after trimming. }")
+ summary_latex.close()
+ except:
+
+ print("Couldnt produce html file.")
+ finally:
+
+ shutil.rmtree(tmp)
+ delete_files(join(arguments["output"],"QCResults"), "FastQC", "_fastqc.html")
+ delete_files(join(arguments["output"],"QCResults"), "Trimmed/FastQC", "_fastqc.html")
+
+if __name__ == "__main__":
+ config = configparser.ConfigParser()
+ config.read(join(dirname(__file__), "config.txt"))
+ kraken_db = ""
+ if "DEFAULT" in config:
+ if "kraken_db" in config["DEFAULT"].keys():
+ kraken_db = config["DEFAULT"]["kraken_db"]
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
+ parser.add_argument('-1' , dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
+ parser.add_argument( '-2', dest='r2', help = "input file", required=False)
+
+ parser.add_argument('-output', dest='output', default="")
+ parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent"] ,required = True)
+ parser.add_argument('-threads', dest='threads', default = 4, type = int)
+ parser.add_argument('-adapter', dest = 'adapter', choices = ['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 'NexteraPE-PE' ])
+ parser.add_argument('-reference', dest='reference', required = False, help = "Map reads against reference")
+ parser.add_argument('-index', dest='index', required = False, help = "Mapping index")
+ parser.add_argument('-sav', dest='sav', required=False,help="Illumina folder for SAV. Requires RunInfo.xml, RunParamter.xml and Interop folder.")
+
+ parser.add_argument('-save_mapping', action = "store_true")
+ parser.add_argument('-db', dest='db', help='Kraken database', required = False, default= kraken_db) #"/home/andruscha/programs/kraken/minikraken_20141208/"
+ parser.add_argument('-palindrome', dest='palindrome', help= 'Use palindrome parameter 30 or 1000 for further analysis. Default:30', default=30, type = int)
+ parser.add_argument('-minlen', dest='minlen',help='Minlen parameter for Trimmomatic. Default:50', default=50, type=int)
+ parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', type= str)
+ parser.add_argument('-version', action='version', version='%(prog)s v' + __version__)
+ parser.add_argument('-nokraken', action = "store_true")
+ parser.add_argument('-nomapping', action = "store_true")
+ parser.add_argument('-trimBetter', action = "store_true")
+ parser.add_argument('-trimBetter_threshold', dest='trimBetter_threshold', help = "Default setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, type=float)
+ parser.add_argument('-forAssembly', action = "store_true", help = "Trim parameter are optimized for assemblies (trim more aggressive).")
+ parser.add_argument('-forMapping', action = "store_true", help = "Trim parameter are optimized for mapping (allow more errors).")
+
+ arguments = vars(parser.parse_args())
+ main(arguments)
diff --git a/barplot.R b/barplot.R
new file mode 100755
index 0000000..bcf3b94
--- /dev/null
+++ b/barplot.R
@@ -0,0 +1,28 @@
+library(ggplot2)
+args = commandArgs(trailingOnly=TRUE)
+
+summary<- as.data.frame(read.csv(args[1]))
+
+ggplot(summary, aes(x=Sample, y = Trimmed )) +
+ geom_bar(stat = "identity", fill="#67a9cf") +
+ theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+ ggtitle("Number of reads after trimming") +
+ xlab("Sample")+
+ ylab("Reads [#]")
+ggsave(paste(args[2], "/trimming.png", sep=""))
+
+ggplot(summary, aes(x=Sample, y = Mapped )) +
+ geom_bar(stat = "identity", fill="#67a9cf") +
+ theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+ ggtitle("Number of reads mapped to reference") +
+ xlab("Sample")+
+ ylab("Reads [#]")
+ggsave(paste(args[2], "/mapping.png", sep=""))
+
+ggplot(summary, aes(x=Sample, y = Classified )) +
+ geom_bar(stat = "identity", fill="#67a9cf") +
+ theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+ ggtitle("Precentage of reads classified by Kraken") +
+ xlab("Sample")+
+ ylab("Reads [%]")
+ggsave(paste(args[2], "/kraken.png", sep=""))
diff --git a/batch_report.html b/batch_report.html
index af91d19..8c8c574 100755
--- a/batch_report.html
+++ b/batch_report.html
@@ -3,20 +3,26 @@
<head>
<meta charset="utf-8">
<title>Batch Report</title>
- <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.0/jquery.min.js"></script>
- <script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.4.5/angular.min.js"></script>
- <script src="http://d3js.org/d3.v3.min.js"></script>
+ <script src="lib/jquery.min.js"></script>
+ <script src="lib/angular.min.js"></script>
+ <script src="lib/d3.v3.min.js"></script>
<!-- Latest compiled and minified CSS -->
- <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css">
+ <link rel="stylesheet" href="lib/bootstrap.min.css">
<!-- Optional theme -->
- <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap-theme.min.css">
+ <link rel="stylesheet" href="lib/bootstrap-theme.min.css">
<!-- Latest compiled and minified JavaScript -->
- <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
+ <script src="lib/bootstrap.min.js"></script>
<style>
.row_hidden {
display: none;
}
+ .panel {
+ background-color:#F7F7F7;
+ }
+ .panel-heading{
+ cursor:pointer;
+ }
.show_row {
display: show;
}
@@ -43,8 +49,9 @@
box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
}
- #sidebar {
- float: both;
+ .sidebar {
+
+ overflow-y: auto;
position: fixed;
height: 100%;
top: 50px;
@@ -53,17 +60,22 @@
box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
background: #f7f7f7;
}
-
- #header {
- background: #336AA2;
- position: fixed;
- width: 100%;
- height: 50px;
- top: 0;
- z-index: 1000;
- box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.16), 0 2px 10px 0 rgba(0, 0, 0, 0.12);
- }
-
+ .navbar-default{
+ background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
+ }
+ .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
+ background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
+
+ }
+ .navbar-default .navbar-brand {
+ color: white;
+ }
+ h4{
+ color: #0A6780;
+ }
+ h5{
+ color: #0D7793;
+ }
.node {
cursor: pointer;
}
@@ -83,47 +95,21 @@
stroke: #ccc;
stroke-width: 1.5px;
}
+ .table{
+ margin-bottom: auto;}
+
</style>
<script>
- $(window).load(function () {
- $("#main").css({left: $('#sidebar').width()});
- });
-
- $(document).ready(function () {
-
- loadGallery(true, 'a.thumbnail');
+ //$(window).load(function () {
+ // $("#main").css({left: $('#sidebar').width()});
+ //});
- function loadGallery(setIDs, setClickAttr) {
- var current_image,
- selector,
- counter = 0;
-
- function updateGallery(selector) {
- var $sel = selector;
- current_image = $sel.data('image-id');
- $('#image-gallery-caption').text($sel.data('caption'));
- $('#image-gallery-title').text($sel.data('title'));
- $('#image-gallery-image').attr('src', $sel.data('image'));
- disableButtons(counter, $sel.data('image-id'));
- }
-
- if (setIDs == true) {
- $('[data-image-id]').each(function () {
- counter++;
- $(this).attr('data-image-id', counter);
- });
- }
- $(setClickAttr).on('click', function () {
- updateGallery($(this));
- });
- }
- });
//Start d3 tree
function d3_kraken(root, id) {
var margin = {top: 20, right: 120, bottom: 20, left: 120},
width = $(document).width(),//1600 - margin.right - margin.left,
- height =$(document).height();
+ height = $(document).height();
var i = 0,
duration = 750//,root
@@ -157,6 +143,7 @@
d.children = null;
}
}
+
function uncollapse(d) {
if (d._children) {
d.children = d._children;
@@ -173,23 +160,23 @@
d3.select(self.frameElement).style("height", "800px");
function update(source) {
-var levelWidth = [1];
- var childCount = function(level, n) {
+ var levelWidth = [1];
+ var childCount = function (level, n) {
- if (n.children && n.children.length > 0) {
- if (levelWidth.length <= level + 1) levelWidth.push(0);
+ if (n.children && n.children.length > 0) {
+ if (levelWidth.length <= level + 1) levelWidth.push(0);
- levelWidth[level + 1] += n.children.length;
- n.children.forEach(function(d) {
- childCount(level + 1, d);
- });
- }
- };
- childCount(0, root);
+ levelWidth[level + 1] += n.children.length;
+ n.children.forEach(function (d) {
+ childCount(level + 1, d);
+ });
+ }
+ };
+ childCount(0, root);
- var newHeight = d3.max(levelWidth) * 100; // 25 pixels per line
- tree = tree.size([newHeight, width]);
+ var newHeight = d3.max(levelWidth) * 100; // 25 pixels per line
+ tree = tree.size([newHeight, width]);
// Compute the new tree layout.
var nodes = tree.nodes(root).reverse(),
links = tree.links(nodes);
@@ -212,8 +199,10 @@ var levelWidth = [1];
return "translate(" + source.y0 + "," + source.x0 + ")";
})
.on("click", click)
-.attr("data-toggle","tooltip")
- .attr("title", function(d) { return d.name +"\n" + d.perc + "%";})
+ .attr("data-toggle", "tooltip")
+ .attr("title", function (d) {
+ return d.name + "\n" + d.perc + "%";
+ })
;
nodeEnter.append("circle")
@@ -313,9 +302,9 @@ var levelWidth = [1];
} else {
d.children = d._children;
d._children = null;
- if ( d.name == "root"){
- d.children.forEach(uncollapse);
- }
+ if (d.name == "root") {
+ d.children.forEach(uncollapse);
+ }
}
update(d);
}
@@ -323,38 +312,64 @@ var levelWidth = [1];
;
// end d3 tree
var myApp = angular.module('BatchReport', []);
- myApp.controller('SummaryTable', ['$scope', function ($scope) {
+ myApp.controller('SummaryTable',function ($scope, $http) {
// create a projects Object
$scope.projects = {};
// pretend data we just got back from the server
// this is an ARRAY of OBJECTS
$scope.information = {};
- $scope.information.commandline = @= commandline =@;
+ //$scope.information.commandline= {"minlen": 50, "index": null, "blobplot": false, "output": "", "nomapping": true, "r1": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L001_R1_001.fastq", "/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L002_R1_001.fastq"], "reference": "", "threads": 4, "forMapping": false, "palindrome": 30, "technology": "Illumina", "adapter": "NexteraPE-PE", "r2": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/sub [...]
- $scope.projects.samples = @= json_samples =@;
$scope.show_summary = {};
$scope.show_img = {};
$scope.show_raw_data = true;
- $scope.show_kraken = "";
- $scope.kraken = @= kraken =@;
+ $scope.sequencer_information = [];
+ $scope.show_kraken = "";
+ $scope.kraken = {};
+
+ $http.get('src/summary.json')
+ .success(function(res){
+ $scope.information.commandline= res.commandline;
+ $scope.information.versions= res.versions;
+ $scope.projects.samples = res.summary;
+ $scope.kraken = res.kraken;
+ $.each($scope.kraken, function (key, value) {
+ d3_kraken(value, key)
+ });
+ });
+
+
+ $http.get('src/sav.json')
+ .success(function(res){
+ $scope.sequencer_information = res;
+ });
+
+
$.each($scope.kraken, function (key, value) {
+
d3_kraken(value, key)
});
$scope.$watch("show_kraken", function (newVal, oldVal) {
- d3.select("#kraken_" + newVal).attr("class", "show_row");
+ d3.select("#kraken_" + newVal).attr("class", "show_row");
d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
});
$scope.collapse = true;
+ $scope.isArray = angular.isArray;
+ $scope.isString = angular.isString;
$scope.$watch("collapse", function (newVal, oldVal) {
d3.select("#kraken_" + newVal).attr("class", "show_row");
d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
});
+ $scope.selected = {title:"", img:"", caption:""};
+ }
- }]);
+ )
+
+ ;
myApp.filter('imgFilter', function () {
return function (input, filter) {
@@ -393,92 +408,247 @@ var levelWidth = [1];
<div ng-app="BatchReport">
<!-- head Quality Control-->
- <div class="row" id="header">
+ <nav class="navbar navbar-default navbar-fixed-top" >
+ <div class="container-fluid">
+ <div class="navbar-header">
+ <a class="navbar-brand" href="#">Quality Control</a>
+ </div>
+ <ul class="nav navbar-nav">
- <div class="col-md-3"></div>
- <div class="col-md-3">
- <a href="#summary_plots" style="font-size: x-large;color: aliceblue;" data-toggle="tab">Quality Control</a>
- </div>
- <div class="col-md-1">
- <a href="#summary" style="color: aliceblue;">Summary Table</a>
- </div>
- <div class="col-md-1">
- <a href="#fastqc" style="color: aliceblue;" data-toggle="tab">FastQC</a>
- </div>
- <div class="col-md-1">
- <a href="#kraken_report" style="color: aliceblue;" data-toggle="tab">Kraken</a>
- </div>
- <div class="col-md-1">
- <a href="#information" style="color: aliceblue;" data-toggle="tab">Information</a>
- </div>
+ <li><a href = "#sequencer_information" style="color: aliceblue;" data-toggle="tab">Sequencer Information</a></li>
+ <li class="active" ><a href="#summary" style="color: aliceblue;" data-toggle="tab">Summary Table</a></li>
+ <li> <a href="#fastqc" style="color: aliceblue;" data-toggle="tab">FastQC</a></li>
+ <li> <a href="#kraken_report" style="color: aliceblue;" data-toggle="tab">Kraken</a></li>
+ <li> <a href="#information" style="color: aliceblue;" data-toggle="tab">Information</a></li>
+ </ul>
+ </div>
- </div>
+
+ </nav>
<!-- END head Quality Control-->
<!-- Summary Table Controller-->
<div ng-controller="SummaryTable">
<div class="row" style="margin-top: 50px">
- <!--Sidebar-->
- <div class="col-md-2" id="sidebar">
- <h4 style="color: #336AA2">Filter by ...</h4>
- <br/>
- <h5 style="color: #336AA2">... dataset</h5>
- <p>
- <input type="checkbox" ng-model="show_raw_data"/> show raw data
- </p>
- <br/>
- <!--Filter by image name-->
- <h5 style="color: #336AA2">..FastQC Images</h5>
- <p ng-repeat="(type,bla) in projects.samples[0].images[0].raw[0]">
- <input type="checkbox" ng-model="show_img[bla.name]" ng-init="show_img[bla.name]=true"/>
- {{bla.name}}
- </p>
-
- <!-- END Filter by image name-->
- <!--Filter by trimmed /untrimmed-->
-
- <!-- END Filter by trimmed /untrimmed-->
- </div>
- <!-- END Sidebar-->
-
- <div class="col-md-10" id="main" style="float: both;padding-left: 30px;">
- <!-- Summary Table -->
- <h4 id="summary" style="color:#336AA2">Summary</h4>
- <table class="table table-striped">
- <thead style="font-weight:600;color:#336AA2">
- <tr>
- <td ng-repeat="(headline,bla) in projects.samples[0]" ng-if="headline!='images'">{{headline}}
- </td>
- </tr>
- </thead>
- <tr ng-repeat="sample in projects.samples | orderBy:'setname'">
- <td ng-repeat="(key,value) in sample " ng-if="key!='images'">
- <input type="checkbox" ng-model="show_summary[value]" ng-if="key=='setname'"
- ng-init="show_summary[value]=true"/> {{value}}
- </td>
-
- </tr>
- </table>
- <!-- END Summary Table -->
+
+ <div class="col-md-12" id="main">
+
<!-- TAB-CONTENT-->
<div class="tab-content">
- <!-- FASTQC Images-->
- <div class="tab-pane fade " id="kraken_report">
+ <div class="tab-pane fade" id="sequencer_information">
+ <!--Sidebar-->
+ <div class="col-md-2 sidebar" >
+ </br>
+ <ul class="nav">
+ <li><a href = "#sequencer_information_table"><h5>Sequencer Information</h5></a></li>
+ <li>
+ <a href = "#sequencer_information_plots" ><h5>Sequencer Plots</h5></a></li>
+
+ </ul>
+ </div>
+ <!-- END Sidebar-->
+
+
+ <div class="col-md-10" style = "padding-left:17%;">
+ <h4 id= "sequencer_information_table">Sequencer Information</h4>
+ <table class="table table-striped">
+ <tr ng-repeat="(attr, value) in sequencer_information.tables">
+ <td style="max-width:80px;">{{attr}}</td>
+ <td ng-if = "isArray(value)==false && isString(value)==true">{{value}}</td>
+ <td ng-if = "isArray(value)==true " >
+ <table class="table table-sm" style="background-color:inherit; width : auto;" >
+ <tr>
+ <td ng-repeat = "(headline, value) in value[0]">{{headline}}</td>
+ </tr>
+ <tr ng-repeat="nested in value"><td ng-repeat = "(headline, value) in nested">{{value}}</td></tr>
+ </table></td>
+ <td ng-if = "isArray(value)==false && isString(value)==false" >
+ <table class ="table table-sm" style="background-color:inherit;width:auto;">
+ <tr ng-repeat = "(headline, value) in value">
+ <td >{{headline}}</td>
+ <td ng-if ="isArray(value)==false"> {{value}}</td>
+ <td ng-repeat = "val in value" ng-if ="isArray(value)==true"> {{val}}</td>
+ </tr>
+
+ </table></td>
+
+ </tr>
+ </table>
+
+ <h4 id= "sequencer_information_plots">Sequencer Plots</h4>
+ <a ng-click="selected.title='Data by Cycle - Intensity'; selected.caption='This plot shows the intensity by color of the 90% percentile of the data for each cycle.'; selected.img='src/img/intensity.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/intensity.png">
+ </a>
+ <a ng-click="selected.title='Data by Cycle - FWHM'; selected.caption='The average full width of clusters at half maximum (in pixels).'; selected.img='src/img/FWHM.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/FWHM.png">
+ </a>
+ <a ng-click="selected.title='Data by Cycle - % Base'; selected.caption='The percentage of clusters for which the selected base has been called.'; selected.img='src/img/base_perc.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/base_perc.png">
+ </a>
+ <a ng-click="selected.title='Data by Lane - Phasing'; selected.caption='Thepercentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.'; selected.img='src/img/phasing.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/phasing.png">
+ </a>
+ <a ng-click="selected.title='Data by Lane - Prephasing'; selected.caption='The percentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.';
+ selected.img='src/img/prephasing.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/prephasing.png">
+ </a>
+
+
+ <a ng-click="selected.caption='The density of clusters for each tile (in thousand per mm2)'; selected.title='Data by Lane - Density'; selected.img='src/img/density.png'"
+ data-image-id="" data-toggle="modal" data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/density.png">
+ </a>
+ <a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score distribution shows the number of reads by quality score. The quality score os cumulative for current cycle and previous cycles, and only
+ reads that pass the quality filter are included. The Q-score is based on the Phred scale.'; selected.img='src/img/qscore_distr.png'"
+ data-image-id="" data-toggle="modal" data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/qscore_distr.png">
+ </a>
+ <a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score heat map shows the Q-score by cycle for all lanes.'; selected.img='src/img/qscore_heatmap.png'"
+ data-image-id="" data-toggle="modal" data-target="#image-gallery">
+ <img width="500px" height="300px" ng-src="src/img/qscore_heatmap.png">
+ </a>
+
+
+ </div>
+
+
+ </div>
+ <!-- END Sequencer Information content -->
+ <div class="tab-pane fade in active" id="summary">
+ <!-- Summary Table -->
+ <h4>Summary</h4>
+ <table class="table table-striped">
+ <thead style="font-weight:600;color:#0D7793">
+ <tr>
+ <td ng-repeat="(headline,bla) in projects.samples[0]" ng-if="headline!='images'">
+ {{headline}}
+ </td>
+ </tr>
+ </thead>
+ <tr ng-repeat="sample in projects.samples | orderBy:'setname'">
+ <td ng-repeat="(key,value) in sample " ng-if="key!='images'">
+ <input type="checkbox" ng-model="show_summary[value]" ng-if="key=='setname'"
+ ng-init="show_summary[value]=true"/> {{value}}
+ </td>
+
+ </tr>
+ </table>
+ <!-- END Summary Table -->
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Per sequence quality scores'; selected.caption=''; selected.img='src/img/Per_sequence_quality_scores.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/Per_sequence_quality_scores.png">
+ </a>
+ </div>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Per sequence GC content'; selected.caption=''; selected.img='src/img/Per_sequence_GC_content.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/Per_sequence_GC_content.png">
+ </a>
+ </div>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Sequence length distribution'; selected.caption=''; selected.img='src/img/Sequence_Length_Distribution.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/Sequence_Length_Distribution.png">
+ </a>
+ </div>
+ </br>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Number of reads remained after trimming'; selected.caption=''; selected.img='src/img/trimming.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/trimming.png">
+ </a>
+ </div>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Number of reads mapped to reference'; selected.caption=''; selected.img='src/img/mapping.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/mapping.png">
+ </a>
+ </div>
+
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Percentage of reads classified by Kraken'; selected.caption=''; selected.img='src/img/kraken.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/kraken.png">
+ </a>
+ </div>
- Show Kraken report of : <select id="select_kraken" ng-model="show_kraken">
- <option value="" disabled selected hidden>Choose sample...</option>
- <option ng-repeat="(name, value) in kraken" value="{{name}}">{{name}}</option>
- </select>
- <table id="kraken"></table>
- </div>
- <div class="tab-pane fade in active" id="summary_plots">
</div>
- <div class="tab-pane fade in active" id="fastqc">
- <div style="width: 100%; " class="row">
- <div style="display: table-row;" class="col-md-10">
+
+ <div class="tab-pane fade" id="fastqc">
+ <div style="width: 100%; " class="row">
+ <!--Sidebar-->
+ <div class="col-md-2 sidebar" >
+ <h4>Filter by ...</h4>
+ <br/>
+ <div class="panel panel-default" >
+ <div class="panel-heading">
+ <h5 class="panel-title" data-toggle="collapse" data-target="#show_sample">... sample</h5>
+
+ </div>
+ <div id = "show_sample" >
+ <p ng-repeat="(sample, value) in show_summary">
+ <input type="checkbox" ng-model="show_summary[sample]" ng-init="show_summary[sample] = value" /> {{sample}}
+ </p>
+ </div>
+</div>
+ </tr>
+ <div class="panel panel-default" >
+ <div class="panel-heading">
+ <h5 class="panel-title" data-toggle="collapse" data-target="#show_raw">... dataset</h5>
+
+ </div>
+ <div id = "show_raw" class = "collapse" >
+ <p>
+ <input type="checkbox" ng-model="show_raw_data"/> show raw data
+ </p>
+ </div>
+</div>
+ <!--Filter by image name-->
+
+ <div class="panel panel-default" >
+ <div class="panel-heading">
+ <h5 class="panel-title" data-toggle="collapse" data-target="#show_fastqc">... FastQC Images</h5>
+ </div>
+ <div class = "collapse" id="show_fastqc">
+ <p ng-repeat="(type,bla) in projects.samples[0].images[0].raw[0]">
+ <input type="checkbox" ng-model="show_img[bla.name]" ng-init="show_img[bla.name]=true"/>
+ {{bla.name}}
+ </p>
+</div></div>
+ <!-- END Filter by image name-->
+ <!--Filter by trimmed /untrimmed-->
+
+ <!-- END Filter by trimmed /untrimmed-->
+
+ </div>
+ <!-- END Sidebar-->
+ <div style="display: table-row;padding-left:17%;" class="col-md-10">
<div class="sample_box"
ng-repeat="sample in projects.samples | orderBy:'setname'| typeFilter: show_summary ">
@@ -490,7 +660,9 @@ var levelWidth = [1];
<div style="display:inline-block">
<a class="thumbnail"
ng-repeat="img in sample_img.raw[0]| imgFilter: show_img" href="#"
- data-image-id="" data-toggle="modal" data-title="Raw {{sample.setname}} R1"
+ ng-click="selected.caption=img.name; selected.title='Raw '+sample.setname +' R1'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Raw {{sample.setname}} R1"
data-caption="{{img.name}}" data-image="{{img.file}}"
data-target="#image-gallery">
<img width="200px" height="150px" ng-src="{{img.file}}"
@@ -501,7 +673,9 @@ var levelWidth = [1];
<div style="display:inline-block">
<a class="thumbnail"
ng-repeat="img in sample_img.raw[1]| imgFilter: show_img" href="#"
- data-image-id="" data-toggle="modal" data-title="Raw {{sample.setname}} R2"
+ ng-click="selected.caption=img.name; selected.title='Raw ' + sample.setname + ' R2'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Raw {{sample.setname}} R2"
data-caption="{{img.name}}" data-image="{{img.file}}"
data-target="#image-gallery">
<img width="200px" height="150px" ng-src="{{img.file}}"
@@ -514,7 +688,9 @@ var levelWidth = [1];
<div style="display:inline-block">
<a class="thumbnail"
ng-repeat="img in sample_img.trimmed[0]| imgFilter: show_img" href="#"
- data-image-id="" data-toggle="modal" data-title="Trimmed {{sample.setname}} R1"
+ ng-click="selected.caption=img.name; selected.title='Trimmed ' + sample.setname + ' R1'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Trimmed {{sample.setname}} R1"
data-caption="{{img.name}}" data-image="{{img.file}}"
data-target="#image-gallery">
<img width="200px" height="150px" ng-src="{{img.file}}"
@@ -525,7 +701,9 @@ var levelWidth = [1];
<div style="display:inline-block">
<a class="thumbnail"
ng-repeat="img in sample_img.trimmed[1]| imgFilter: show_img" href="#"
- data-image-id="" data-toggle="modal" data-title="Trimmed {{sample.setname}} R2"
+ ng-click="selected.caption=img.name; selected.title='Trimmed ' + sample.setname + ' R2'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Trimmed {{sample.setname}} R2"
data-caption="{{img.name}}" data-image="{{img.file}}"
data-target="#image-gallery">
<img width="200px" height="150px" ng-src="{{img.file}}"
@@ -538,48 +716,70 @@ var levelWidth = [1];
</div>
</div>
<!-- END FastQC -->
+
+ <!-- Kraken Report -->
+ <div class="tab-pane fade " id="kraken_report">
+
+ Show Kraken report of : <select id="select_kraken" ng-model="show_kraken">
+ <option value="" disabled selected hidden>Choose sample...</option>
+ <option ng-repeat="(name, value) in kraken" value="{{name}}">{{name}}</option>
+ </select>
+ <table id="kraken"></table>
+ </div>
+ <!-- END Kraken Report -->
+
+
<div class="tab-pane fade" id="information">
<h4>Commandline</h4>
- <table class="table">
+ <table class="table table-striped">
<tr ng-repeat="(attr, value) in information.commandline" ng-if="value !=''">
<td>{{attr}}</td>
<td>{{value}}</td>
</tr>
</table>
+ <h4>Version Control</h4>
+
+ <table class="table table-striped">
+ <tr ng-repeat="(attr, value) in information.versions" ng-if="value !=''">
+ <td>{{attr}}</td>
+ <td>{{value}}</td>
+ </tr>
+ </table>
</div>
<!-- END tab content-->
</div>
</div>
</div>
- </div>
- <!-- END div row-->
- <!-- Trigger the modal with a button -->
-
- <!-- Modal -->
+ <!-- Modal -->
<div class="modal fade" id="image-gallery" tabindex="-1" role="dialog" aria-labelledby="myModalLabel"
aria-hidden="true">
- <div class="modal-dialog">
+ <div class="modal-dialog modal-lg">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-label="Close"><span
aria-hidden="true">×</span></button>
- <div class="col-md-8 text-justify" id="image-gallery-title">
+ <div class="col-md-8 text-justify" id="image-gallery-title" > <h4>{{selected.title}}</h4>
</div>
</div>
- <div class="modal-body">
- <img id="image-gallery-image" class="img-responsive" src="">
+ <div class="modal-body modal-lg">
+ <img id="image-gallery-image" style="height:100%;width:100%;" class="img-responsive" src="{{selected.img}}">
</div>
<div class="modal-footer">
- <div class="col-md-8 text-justify" id="image-gallery-caption">
+ <div class="col-md-12 text-justify" id="image-gallery-caption">{{selected.caption}}
</div>
</div>
</div>
</div>
<!-- END Modal -->
</div>
+ <!-- END div row-->
+ <!-- Trigger the modal with a button -->
+
+
+ </div>
<!-- END Summary Table Controller-->
</div>
@@ -588,4 +788,4 @@ var levelWidth = [1];
</body>
-</html>
+</html>
\ No newline at end of file
diff --git a/boxplot.R b/boxplot.R
new file mode 100755
index 0000000..c347713
--- /dev/null
+++ b/boxplot.R
@@ -0,0 +1,23 @@
+#!/usr/bin/env Rscript
+library(ggplot2)
+args = commandArgs(trailingOnly=TRUE)
+mytable<- read.csv(args[1])
+mytable<- mytable[which(mytable$Count>0),]
+if(!any(is.na(mytable$Read))){
+ ggplot(mytable, aes(fill=Read,group=interaction(Sample, Trimmed, Read,Lane),x=interaction(Sample, Lane), y=Value, weight=Count)) +
+ geom_boxplot(outlier.size = 0.5) +theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+ facet_wrap(~ Trimmed)+
+ scale_fill_manual(values=c("#ef8a62","#67a9cf","#D55E00", "#999999", "#56B4E9", "#F0E442", "#0072B2", "#CC79A7")) +
+ ggtitle(args[3]) +
+ xlab(args[4])+
+ ylab(args[5])
+}else{
+ ggplot(mytable, aes(fill=Sample,group=Sample,x=Sample, y=Value, weight=Count)) +
+ geom_boxplot(outlier.size = 0.5) +theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5),legend.position="none") +
+ facet_wrap(~ Trimmed)+
+ scale_fill_manual(values=c("#ef8a62","#67a9cf","#D55E00", "#999999", "#56B4E9", "#F0E442", "#0072B2", "#CC79A7")) +
+ ggtitle(args[3]) +
+ xlab(args[4])+
+ ylab(args[5])
+}
+ggsave(args[2])
\ No newline at end of file
diff --git a/classes.py b/classes.py
index fb181fd..f8fb79c 100755
--- a/classes.py
+++ b/classes.py
@@ -1,4 +1,4 @@
-import runQCPipeline
+import QCumber
import datetime
import numpy as np
import getpass
@@ -23,24 +23,22 @@ def get_tool_path(name, section="PATH"):
class Pipeline:
'''Get all tool versions '''
-
- name = "Quality Control"
- version = runQCPipeline.__version__
- user = getpass.getuser()
- python_version = toLatex(sys.version)
-
def __init__(self):
- self.fastqc_version = toLatex(subprocess.check_output(join(get_tool_path("fastqc"), "fastqc") + " --version", shell = True).decode("utf-8"))
+
+ self.name = "Quality Control"
+ self.version = QCumber.__version__
+ self.user = getpass.getuser()
+ self.python_version = toLatex(sys.version)
+ self.fastqc_version = toLatex(subprocess.check_output(join(get_tool_path("fastqc"), "fastqc") + " --version", shell = True, stderr=subprocess.PIPE).decode("utf-8"))
try:
- self.trimmo_version = subprocess.check_output(join(get_tool_path("trimmomatic"), "TrimmomaticSE") + " -version", shell=True).decode("utf-8")
+ self.trimmo_version = subprocess.check_output(join(get_tool_path("trimmomatic"), "TrimmomaticSE") + " -version", shell=True, stderr=subprocess.PIPE).decode("utf-8")
except:
- config = configparser.ConfigParser()
- config.read(join(dirname(__file__), "config.txt"))
- if "VERSION" in config:
- self.trimmo_version = config["VERSION"]["trimmomatic"]
- else:
- self.trimmo_version = "0.32"
+ try:
+ from debpackageinfo import get_upstream_version
+ self.trimmo_version = get_upstream_version("trimmomatic")
+ except:
+ self.trimmo_version = "< 0.32"
bowtie_v = subprocess.check_output(join(get_tool_path("bowtie2"), "bowtie2") + " --version", shell=True).decode("utf-8")
self.bowtie_version = re.match(".*\)", toLatex(bowtie_v, True)).group()
self.kraken_version =toLatex(subprocess.check_output( join(get_tool_path('kraken'), "kraken") + " --version", shell = True).decode("utf-8"))
@@ -53,21 +51,39 @@ class Pipeline:
self.machine = toLatex(system.machine)
-class Sample:
+class Sample(object):
"""Sample with reference to ReadSets, MappingRes, KrakenResr"""
- def __init__(self, name, path, reference, threads, technology):
+ def __init__(self, name, path, reference, threads, techn):
+ global mainResultsPath
+ global num_threads
+ global technology
+ num_threads = threads
+ mainResultsPath = path
+ technology = techn
+
self.start = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
self.stop = None
+
self.name = name
- self.technology = technology
+ self.technology = techn
+ self.threads = threads
+ self.phred = "phred33"
self.readSets = []
- #self.trimmed_readSets =[]
+
+ self.mainResultsPath = path
self.mappingRes = None
self.krakenRes = None
self.reference = reference
- self.mainResultsPath = path
- self.threads = threads
+ def get_all_qc_plots(self):
+ qc_dict = OrderedDict([("setname", self.name),("raw",[]), ("trimmed",[])])
+ for rs in self.readSets:
+ qc_dict["raw"].append(rs.r1.qcRes.get_plots())
+ qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.get_plots())
+ if (rs.r2 is not None):
+ qc_dict["raw"].append(rs.r2.qcRes.get_plots())
+ qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.get_plots())
+ return qc_dict
def total_reads(self):
total = 0
@@ -97,12 +113,12 @@ class Sample:
def run_Kraken(self, db):
r1, r2 = self.get_all_reads()
- self.krakenRes = KrakenResult(r1,r2,db, self.mainResultsPath, self.threads, splitext(self.readSets[0].r1.filename)[-1], self.name)
+ self.krakenRes = KrakenResult(r1,r2,db, self.name)
return self
def run_Bowtie2(self, bowtie_index, save_mapping ):
r1, r2 = self.get_all_reads()
- self.mappingRes = MappingResult(self.reference, r1, r2, self.readSets[0].r1.phred, bowtie_index)
+ self.mappingRes = MappingResult(self.reference, r1, r2, bowtie_index)
self.mappingRes = self.mappingRes.run_bowtie(self.threads, save_mapping)
return self.mappingRes
@@ -120,7 +136,8 @@ class Sample:
class ReadSet:
''' Readset contains of r1 and r2'''
- def __init__(self,outputDir, r1 ,r2=None):
+ def __init__(self, r1 ,r2=None):
+
self.r2 = None
if type(r1) is str:
self.r1 = FastQFile(r1) # FastQFile
@@ -133,7 +150,7 @@ class ReadSet:
self.numberOfReads = 0
self.numberofTrimmedReads = 0
self.trimRes = None
- self.outputDir = outputDir
+ #self.outputDir = outputDir
self.paired = True
if len( basename(self.r1.filename).split("_")[:-3]) > 4:
self.setname = "_".join(basename(self.r1.filename).split("_")[:-3])
@@ -142,55 +159,53 @@ class ReadSet:
if(r2 is None):
self.paired = False
- def run_FastQC(self, threads):
- self.r1 = self.r1.run_FastQC(threads, self.outputDir)
+ def run_FastQC(self, output):
+ self.r1 = self.r1.run_FastQC(output)
if(self.r2 is not None):
- self.r2 = self.r2.run_FastQC(threads, self.outputDir)
+ self.r2 = self.r2.run_FastQC(output)
return self
- def run_Trimmomatic(self, adapter, threads, palindrome, outputDir, technology, minlen, trimOption, betterTrimming):
-
- self.trimRes = TrimResult(adapter, palindrome,self.r1,self.r2, threads,outputDir, technology, minlen, trimOption, betterTrimming )
+ def run_Trimmomatic(self, adapter, palindrome, minlen, trimOption, betterTrimming):
+ self.trimRes = TrimResult(self, adapter, palindrome, minlen, trimOption, betterTrimming )
return self
def get_all_qc(self):
- qc_results = [ (self.r1.qcRes, self.trimRes.r1.qcRes) ]
+ qc_results = [ (self.r1.qcRes, self.trimRes.readset.r1.qcRes) ]
if (self.r2 is not None):
- qc_results.append((self.r2.qcRes, self.trimRes.r2.qcRes) )
+ qc_results.append((self.r2.qcRes, self.trimRes.readset.r2.qcRes) )
return qc_results
-
def get_all_qc_dict(self):
- qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
-
+ '''qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_to_base64()]]
qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_to_base64()]]
-
if (self.r2 is not None):
qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_to_base64()])
qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_to_base64()])
-
+ return qc_dict'''
+ qc_dict = OrderedDict({"raw": [], "trimmed": []})
+ qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_for_report()]]
+ qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_for_report()]]
+ if (self.r2 is not None):
+ qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_for_report()])
+ qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_for_report()])
return qc_dict
class FastQFile:
- def __init__(self, absFilename):
+ def __init__(self, absFilename, concat_files = None):
self.filename = absFilename
self.qcRes = None
self.log = ""
self.phred="phred33"
self.concat_files = None
- def run_FastQC(self, threads, outputDir):
-
- outputDir = join(outputDir, "FastQC")
- makedirs(outputDir, exist_ok= True)
- process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(threads), "--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
-
+ def run_FastQC(self, outputDir):
+ process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), "--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
for line in iter(process.stderr.readline, b''):
line = line.decode("utf-8")
line = line.replace("\n", "")
- print(line, end="\r")
+ print(line)
for line in iter(process.stdout.readline, b''):
line = line.decode("utf-8")
if line.startswith("Approx"):
@@ -202,7 +217,7 @@ class FastQFile:
self.phred = pattern.group("phred")
print("")
process.communicate()
- self.qcRes = QCResult(join(outputDir, getFilenameWithoutExtension(basename(self.filename), level = 5) + "_fastqc"))
+ self.qcRes = QCResult(join(outputDir, getFilenameWithoutExtension(basename(self.filename)) + "_fastqc"), join(mainResultsPath, "Report", "src"))
return self
def get_filename(self):
return toLatex(basename(self.filename))
@@ -211,19 +226,21 @@ class FastQFile:
###########
class QCResult:
- def __init__(self, resultsPath):
+ def __init__(self, resultsPath, summary_output):
self.path = resultsPath
- self.results = self.get_results(resultsPath)
+ self.results = self.get_results()
+ #write summary csv for boxplots
+ write_fastqc_summary(resultsPath, summary_output)
- def get_results(self, resultsPath):
+ def get_results(self):
summary_list = []
no_plot=["Basic Statistics", "Overrepresented sequences"]
- with open(join(resultsPath, "summary.txt"), "r") as summary:
+ with open(join(self.path, "summary.txt"), "r") as summary:
reader = csv.reader(summary, delimiter = "\t")
for row in reader:
if (row[1] not in no_plot):
try:
- plot = Plot(row[0], row[1], resultsPath)
+ plot = Plot(row[0], row[1], self.path)
if exists(plot.file):
summary_list.append(plot)
else:
@@ -234,12 +251,52 @@ class QCResult:
print(row[1], "does not exist")
summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
return summary_list
+
+ def get_plots(self):
+ print("extracting data ", self.path)
+ store = False
+ data = {"name": basename(self.path).replace("_fastc",""),
+ "Sequence_Length_Distribution": [],
+ "Per_sequence_GC_content": [],
+ "Per_base_sequence_quality": []}
+
+ with open(join(self.path, "fastqc_data.txt"), "r") as fastqcdatafile:
+ box_values = []
+ for line in iter(fastqcdatafile):
+ if line.startswith("#"):
+ pass
+ elif line.startswith(">>END_MODULE"):
+ if store==True:
+ data[key] = get_distribution(box_values, key)
+ store = False
+ key = None
+ box_values = []
+ elif line.startswith(">>"):
+ key = line.split("\t")[0]
+ key=key.replace(">>","")
+ key = key.replace(" ","_")
+ if key in data.keys():
+ store = True
+ else:
+ if store:
+ box_values.append(line)
+ return data
+
def results_to_base64(self):
converted_plot = []
for plot in self.results:
- with open(plot.file, "rb") as imgfile:
- imgstring = base64.b64encode(imgfile.read())
- plot.file ='data:image/png;base64,' + imgstring.decode("utf-8")
+ if not plot.file.startswith("NotExisting"):
+ with open(plot.file, "rb") as imgfile:
+ imgstring = base64.b64encode(imgfile.read())
+ plot.file ='data:image/png;base64,' + imgstring.decode("utf-8")
+ converted_plot.append(plot)
+ return converted_plot
+
+ def results_for_report(self):
+ converted_plot = []
+ for plot in self.results:
+ if not plot.file.startswith("NotExisting"):
+ plot.file =plot.file.replace(mainResultsPath, "..")
converted_plot.append(plot)
return converted_plot
@@ -272,8 +329,11 @@ class TrimParameters:
leading = str(default_parameter["Trimmomatic"]["leading"]) #"3" # remove leading low quality below 3
trailing = str(default_parameter["Trimmomatic"]["trailing"]) #"3" # remove trailing quality below 3
adapter_path = get_tool_path('adapter')
+ def __init__(self):
+ #self.palindrome = palindrome
+ self.extraTrimOptions = ""
- def make_callstring(self, adapter, palindrome, technology, minlen, trimOption, betterTrimmingOption ):
+ def make_callstring(self, adapter,palindrome, minlen, trimOption, betterTrimmingOption ):
call = []
if technology =="Illumina":
call.append("ILLUMINACLIP:" + join(self.adapter_path, adapter+ ".fa") + ":" + self.illuminaClip_seedMismatch + ":" + str(palindrome) + ":" + self.illuminaClip_simpleClip)
@@ -284,104 +344,94 @@ class TrimParameters:
class TrimResult:
- parameters = TrimParameters()
-
- def __init__(self, adapter, palindrome,r1,r2, threads, outputDir, technology, minlen, trimOption, betterTrimming):
- self.readset = ReadSet(outputDir, r1,r2)
- self.adapter = adapter
- self.technology = technology
- # pathnames
- self.outputDir = outputDir
+ def __init__(self, readset, adapter, palindrome, minlen, trimOption, betterTrimming):
+ self.parameters = TrimParameters()
+ self.readset = readset
self.r1_paired = None
self.r2_paired = None
self.r1_unpaired = None
self.r2_unpaired = None
+
self.logs = ""
+
+ #Results
self.shorter_fragments_percentage = 0
- self.threads = threads
self.input_number = 0
self.nTrimmed = 0
self.pTrimmed = 0
- self.extraTrimOptions = ""
- self.run(palindrome, outputDir, minlen, trimOption, betterTrimming)
- self.blobplot = ""
+ self.adapter = adapter
+ self.run( palindrome, minlen, trimOption, betterTrimming)
- def make_callstring(self, palindrome, path, technology, minlen, trimOption, save = False, betterTrimmingOption =""):
- if self.readset.paired:
+ def make_callstring(self, readset, palindrome, minlen, trimOption, save = False, betterTrimmingOption =""):
+ if readset.paired:
if save:
- self.r1_paired =join(path, "r1_P_" + getFilenameWithoutExtension(self.readset.r1.filename, level = 2,getBase = True) + ".fastq")
- self.r1_unpaired = join(path, "r1_UP_" + getFilenameWithoutExtension(self.readset.r1.filename, level=2, getBase = True)+ ".fastq")
- self.r2_paired =join(path, "r2_P_" + getFilenameWithoutExtension(self.readset.r2.filename, level=2,getBase = True)+ ".fastq")
- self.r2_unpaired =join(path, "r2_UP_" + getFilenameWithoutExtension(self.readset.r2.filename,level=2, getBase = True)+ ".fastq")
+ self.r1_paired =join(mainResultsPath, "Trimmed", "r1_P_" + getFilenameWithoutExtension(readset.r1.filename, getBase = True) + ".fastq")
+ self.r1_unpaired = join(mainResultsPath, "Trimmed", "r1_UP_" + getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq")
+ self.r2_paired =join(mainResultsPath, "Trimmed", "r2_P_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq")
+ self.r2_unpaired =join(mainResultsPath, "Trimmed", "r2_UP_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq")
output = " ".join([self.r1_paired, self.r1_unpaired, self.r2_paired, self.r2_unpaired])
else:
output = "/dev/null /dev/null /dev/null /dev/null"
callProcess = [join(get_tool_path("trimmomatic"), "TrimmomaticPE "),
- "-threads", str(self.threads),
- "-" + self.readset.r1.phred,
- self.readset.r1.filename, # input 1
- self.readset.r2.filename, # input 2
+ "-threads", str(num_threads),
+ "-" + readset.r1.phred,
+ readset.r1.filename, # input 1
+ readset.r2.filename, # input 2
output,
- self.parameters.make_callstring(self.adapter, palindrome, technology, minlen, trimOption, betterTrimmingOption),
+ self.parameters.make_callstring(self.adapter,palindrome, minlen, trimOption, betterTrimmingOption)
]
else:
callProcess = [join(get_tool_path("trimmomatic"),"TrimmomaticSE "),
- "-threads", str(self.threads),
- "-" + self.readset.r1.phred,
- self.readset.r1.filename, # input 1
- join(path, getFilenameWithoutExtension(self.readset.r1.filename,level=2, getBase = True)+ ".fastq"),#output
- self.parameters.make_callstring(self.adapter, palindrome, technology, minlen, trimOption, betterTrimmingOption),
+ "-threads", str(num_threads),
+ "-" + readset.r1.phred,
+ readset.r1.filename, # input 1
+ join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq"),#output
+ self.parameters.make_callstring(self.adapter, palindrome, minlen, trimOption, betterTrimmingOption)
]
- self.r1_unpaired = join(path,getFilenameWithoutExtension(self.readset.r1.filename,level=2, getBase = True)+ ".fastq")
+ self.r1_unpaired = join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq")
return callProcess
- def check_quality(self, path, trim_perc):
- #makedirs(join(path, "temp"), exist_ok=True)
- #path= join(path, "temp")
- if self.readset.paired:
+ def check_quality(self,readset, trim_perc):
+ path = join(mainResultsPath, "Trimmed", "temp")
+ if readset.paired:
try:
- r1_name = join_reads([self.r1_paired, self.r1_unpaired], path, basename(self.readset.r1.filename))
+ r1_name = join_reads([self.r1_paired, self.r1_unpaired], path, basename(readset.r1.filename))
print("Run FastQC to optimize trimming.")
- process = subprocess.Popen( [join(get_tool_path('fastqc'), "fastqc"), r1_name,"--nogroup", "--extract", "-o", path,"--threads", str(self.threads)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ process = subprocess.Popen( [join(get_tool_path('fastqc'), "fastqc"), r1_name,"--nogroup", "--extract", "-o", path,"--threads", str(num_threads)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.communicate()
- trim_bool, headcrop, crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r1_name), level=5) + "_fastqc"),trim_perc)
+ trim_bool, headcrop, crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r1_name)) + "_fastqc"), trim_perc)
#if r1 was ok, go on and check r2
print("Check 'per sequence base content' of R2.")
- r2_name = join_reads([self.r2_paired, self.r2_unpaired], path, basename(self.readset.r2.filename))
+ r2_name = join_reads([self.r2_paired, self.r2_unpaired], path, basename(readset.r2.filename))
process = subprocess.Popen(
- [join(get_tool_path('fastqc'), "fastqc"), r2_name,"--nogroup", "--extract", "-o",path, "--threads", str(self.threads)],
+ [join(get_tool_path('fastqc'), "fastqc"), r2_name,"--nogroup", "--extract", "-o",path, "--threads", str(num_threads)],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- for line in iter(process.stderr.readline, b''):
- print(line)
- for line in iter(process.stdout.readline, b''):
- print(line)
process.communicate()
- trim_bool, r2_headcrop, r2_crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r2_name), level=5) + "_fastqc"), trim_perc)
+ trim_bool, r2_headcrop, r2_crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r2_name)) + "_fastqc"), trim_perc)
return trim_bool, max(headcrop, r2_headcrop), min(crop, r2_crop)
- #if headcrop >= 0 or crop > 0:
finally:
shutil.rmtree(path)
else:
- process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc"), self.r1_unpaired,"--nogroup", "--extract", "-o", join(path,"temp"), "--threads", str(self.threads)],
+ process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc"), self.r1_unpaired,"--nogroup", "--extract", "-o", path, "--threads", str(num_threads)],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for line in iter(process.stdout.readline, b''):
- print(line, end="\r")
+ print(line, end='\r')
process.communicate()
- trim_bool, headcrop, crop = optimize_trimming(join(path,"temp", getFilenameWithoutExtension(basename(self.r1_unpaired), level=5) + "_fastqc"), trim_perc)
+ trim_bool, headcrop, crop = optimize_trimming(join(path,getFilenameWithoutExtension(basename(self.r1_unpaired)) + "_fastqc"), trim_perc)
return trim_bool, headcrop, crop
- def run(self, palindrome, path, minlen, trimOption, betterTrimming):
-
+ def run(self, palindrome, minlen, trimOption, betterTrimming):
# Report Trimmomatic Output
# Run palindrome parameter which should be kept at last
+
survived = {'30':0, '1000':0}
- call = self.make_callstring(palindrome, path, self.technology,minlen, trimOption, save=True)
+ call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True)
process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process,call[0], True)
process.communicate()
@@ -389,13 +439,12 @@ class TrimResult:
### Optimize trimming
if betterTrimming !="":
print("Improve trimming parameter")
- makedirs(join(path,"temp"), exist_ok=True)
- print(join(path, "temp"), betterTrimming )
- trim_bool, final_headcrop, final_crop = self.check_quality(join(path, "temp"), betterTrimming)
+ makedirs(join(mainResultsPath, "Trimmed","temp"), exist_ok=True)
+ trim_bool, final_headcrop, final_crop = self.check_quality(self.readset, betterTrimming)
if trim_bool:
- self.extraTrimOptions = "HEADCROP:"+str(final_headcrop) + " CROP:"+str(final_crop)
- call = self.make_callstring(palindrome, path, self.technology, minlen, trimOption, save=True, betterTrimmingOption = self.extraTrimOptions)
+ self.parameters.extraTrimOptions = "HEADCROP:"+str(final_headcrop) + " CROP:"+str(final_crop)
+ call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True, betterTrimmingOption = self.parameters.extraTrimOptions)
process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process, call[0], True)
process.communicate()
@@ -406,13 +455,15 @@ class TrimResult:
self.logs += log
############################
+
survived[str(palindrome)] = survived1
if self.readset.r2 is not None:
if palindrome==30:
palindrome=1000
else:
+
palindrome=30
- process = subprocess.Popen(" ".join(self.make_callstring(palindrome, path, self.technology, minlen, trimOption, save=False, betterTrimmingOption = self.extraTrimOptions) ),
+ process = subprocess.Popen(" ".join(self.make_callstring( self.readset, palindrome, minlen, trimOption, save=False, betterTrimmingOption = self.parameters.extraTrimOptions) ),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
survived2, total,nTrimmedReads, pTrimmedReads, temp = trimmomatic_report(process, call[0],False)
@@ -422,14 +473,14 @@ class TrimResult:
return self
- def run_FastQC(self, outputDir, tempdir):
+ def run_FastQC(self, tempdir):
if self.readset.paired:
#tempdir = tempfile.mkdtemp()
r1_name = join_reads([self.r1_paired, self.r1_unpaired], tempdir, basename(self.readset.r1.filename))
r2_name = join_reads([self.r2_paired, self.r2_unpaired], tempdir, basename(self.readset.r2.filename))
try:
- self.readset = ReadSet(outputDir, r1_name, r2_name)
- self.readset.run_FastQC(self.threads)
+ self.readset = ReadSet( r1_name, r2_name)
+ self.readset.run_FastQC(join(mainResultsPath, "Trimmed/FastQC"))
except:
print(sys.exc_info())
sys.exit()
@@ -439,34 +490,29 @@ class TrimResult:
self.readset.r1.filename = " ".join([self.r1_paired, self.r1_unpaired])
self.readset.r2.filename = " ".join([self.r2_paired, self.r2_unpaired])
else:
- self.readset = ReadSet(outputDir, self.r1_unpaired)
- self.readset.run_FastQC(self.threads)
+ self.readset = ReadSet( self.r1_unpaired)
+ self.readset.run_FastQC(join(mainResultsPath, "Trimmed"))
return self
- def run_blobplot(self, db):
- all_inputs = [x for x in [self.r1_paired, self.r1_unpaired, self.r2_paired, self.r2_unpaired] if x is not None]
- blobplot = BlobResult()
- self.blobplot = blobplot.run(all_inputs, self.outputDir, self.threads, self.readset.setname.split(".")[0], db)
- return self
- def print_param(self, minlen, trimOption, pali):
- return self.parameters.make_callstring(self.adapter, pali, self.technology, minlen, trimOption, self.extraTrimOptions)
+
+ def print_param(self, palindrome, minlen, trimOption):
+ return self.parameters.make_callstring(self.adapter, palindrome, minlen, trimOption, self.parameters.extraTrimOptions)
class MappingParameter:
def __init__(self, mapping):
pass
class MappingResult:
- def __init__(self, reference, r1_name, r2_name, phred, bowtie_index):
+ def __init__(self, reference, r1_name, r2_name, bowtie_index):
self.numberOfAlignedReads = 0
self.percentAlignedReads= 0
self.index = self.build_index(reference, bowtie_index)
self.log = ""
self.r1_name = r1_name
self.r2_name = r2_name
- self.phred = phred
def build_index(self, reference, index):
- index_name = join(index, getFilenameWithoutExtension(basename(reference), level=2) +"_bt2") # reference name "bowtie2 build index_name"
+ index_name = join(index, getFilenameWithoutExtension(basename(reference)) +"_bt2") # reference name "bowtie2 build index_name"
if not isdir(index):
return index
process = subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference, index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
@@ -481,10 +527,14 @@ class MappingResult:
def run_bowtie(self, threads, save_mapping):
print("Run Bowtie2")
- call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", self.index, "--no-unal --threads ", str(threads)]
- call.extend(["-U", re.sub(r"(?<!\\)\s",",",self.r1_name)])
- if self.r2_name is not None:
- call.extend(['-U',re.sub(r"(?<!\\)\s",",",self.r2_name)])
+ call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", self.index, "--no-unal --threads ", str(num_threads)]
+
+ #if self.r2_name is not None:
+ # call.extend(["-1", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
+ # call.extend(['-2',re.sub(r"(?<!\\)\s",",",self.r2_name)])
+ #else:
+ # call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
+ call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name + " " + self.r2_name)])
call.extend(["-S", save_mapping, "2>&1"])
#print("Call ", " ".join(call))
process = subprocess.Popen(" ".join(call), stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell=True)
@@ -513,44 +563,43 @@ class MappingResult:
class KrakenResult:
- def __init__(self, r1,r2, db, outputDir, threads, fileext, project_name):
+ def __init__(self, r1,r2, db, name):
+ self.name = name
self.report_name = None
self.report = ""
self.pClassified = 0
self.nClassified = 0
self.log = ""
- self.run_kraken(r1,r2, db, outputDir, threads, fileext, project_name)
+ self.run_kraken(r1,r2, db)
- def make_callstring(self, r1,r2, db, outputDir, threads, fileext, project_name):
+ def make_callstring(self, r1,r2, db):
#input_type = ""
#if fileext.endswith("")
call = [join(get_tool_path('kraken') , "kraken") , "--db", db, r1]
if r2 is not None:
call.append( r2)# + " --paired")
call.append("--preload")
- call.append("--threads " + str(threads))
- self.report_name = join(outputDir, project_name + ".krakenreport")
+ call.append("--threads " + str(num_threads))
+ self.report_name = join(mainResultsPath, self.name + ".krakenreport")
if exists(self.report_name):
i = 2
- new_name = join(outputDir, project_name + "_" +str(i) + ".krakenreport")
+ new_name = join(mainResultsPath, self.name + "_" +str(i) + ".krakenreport")
while exists(new_name):
i += 1
- new_name = join(outputDir, project_name + "_" + str(i) + ".krakenreport")
+ new_name = join(mainResultsPath, self.name + "_" + str(i) + ".krakenreport")
self.report_name=new_name
call.append("--output " + self.report_name)
return " ".join(call)
- def run_kraken(self, r1,r2, db, outputDir, threads, fileext, project_name):
- #print(self.make_callstring(r1,r2, db, outputDir, threads, fileext, project_name))
- process = subprocess.Popen(self.make_callstring(r1,r2, db, outputDir, threads, fileext, project_name), shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- self.log += "\\textcolor{gray}{Using Parameter " + " ".join(["\path{"+ x + "}" for x in self.make_callstring(r1,r2, db, outputDir, threads, fileext, project_name).split(" ")]) + "}\\\\"
+ def run_kraken(self, r1,r2, db):
+ process = subprocess.Popen(self.make_callstring(r1,r2, db), shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+ self.log += "\\textcolor{gray}{Using Parameter " + " ".join(["\path{"+ x + "}" for x in self.make_callstring(r1,r2, db).split(" ")]) + "}\\\\"
for line in iter(process.stderr.readline, b''):
line = line.decode("utf-8")
print(line)
if not (line.startswith("Loading database") or re.search("Processed \d+", line)):
- #self.output += line + "\n"
self.log += toLatex(line) +"\n"
pattern = re.match("\s+(?P<nClassified>\d+) sequences classified \((?P<pClassified>\d+.\d+)%\)", line)
if pattern:
@@ -562,7 +611,6 @@ class KrakenResult:
print(line)
if not ( re.search("Processed \d+", line) or line.startswith("Loading database") ):
self.output += line + "\n"
-
process.communicate()
print("Convert Kraken-Report..")
@@ -572,110 +620,6 @@ class KrakenResult:
def convert_report(self, db):
process = subprocess.check_output([join(get_tool_path('kraken') ,'kraken-report') , "--db", db, self.report_name])
self.report = process.decode("utf-8")
- #converted_kraken = open("kraken_output.csv","w")
- #converted_kraken.write(self.report)
- #converted_kraken.close()
return self
-class AssemblyParameter:
- method = "velvet"
- kmer = "35"
- input_type = "-fmtAuto -short"
-
-class BlastParameter:
- method="blastn"
- task = "megablast"
- evalue= "1e-5"
- max_target_seqs = "1"
- outfmt="'6 qseqid staxids bitscore std sscinames sskingdoms stitle' "
- extraParams = "-culling_limit 5"
-
- def __init__(self):
- self.db = join(get_tool_path("blast_db","DEFAULT"), "nt")
-
- def print(self):
- return " ".join([self.method, "-task", self.task, "-evalue", self.evalue, "-max_target_seqs", self.max_target_seqs, "-outfmt", self.outfmt, self.extraParams , "-db", self.db])
-
-class BlobParameter:
- def __init__(self, taxonomy_path):
- self.db = ""
- self.nodes = join(taxonomy_path, "taxonomy/nodes.dmp")
- self.names = join(taxonomy_path, "taxonomy/names.dmp")
-
- def print(self):
- if self.db !="":
- return "--db " + self.db
- else:
- return " ".join(["--nodes", self.nodes, "--names", self.names])
-
-class BlobResult:
- assParams = AssemblyParameter()
- blastParams = BlastParameter()
-
- def run(self, input, output, threads, setname, db):
- print("Run blobtools...")
- self.blobParams = BlobParameter(db)
- contigs=""
- #ASSEMBLY
- print("...first assemble")
- if self.assParams.method == "velvet":
- concat_input = " -short ".join(input)
- full_output = join(output, "Velvet",setname)
- makedirs(full_output, exist_ok=True)
- #print(" ".join(["velveth", full_output, self.assParams.kmer, self.assParams.input_type, concat_input]))
-
- process = subprocess.Popen(" ".join(["velveth", full_output, self.assParams.kmer, self.assParams.input_type, concat_input]), shell= True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- '''for line in iter(process.stderr.readline, b''):
- print(line)
- for line in iter(process.stdout.readline, b''):
- print(line)'''
- process.communicate()
- print(["velvetg", full_output])
- process = subprocess.Popen(" ".join(["velvetg", full_output]), shell=True, stderr=subprocess.PIPE,
- stdout=subprocess.PIPE)
- '''for line in iter(process.stderr.readline, b''):
- print(line)
- for line in iter(process.stdout.readline, b''):
- print(line)'''
- process.communicate()
-
- contigs = join(full_output, "contigs.fa")
-
- # blast search
- blast_result = join(output, setname + "." + self.blastParams.evalue + ".megablast")
- print("...blastn ")
-
- process = subprocess.Popen(" ".join([self.blastParams.print(), "-query", contigs, "-out",blast_result, "-num_threads", str(threads)]),shell = True,stderr = subprocess.PIPE, stdout = subprocess.PIPE)
- for line in iter(process.stderr.readline, b''):
- print(line)
- for line in iter(process.stdout.readline, b''):
- print(line)
- process.communicate()
-
- # blobtools
- img = None
- blob_output= join(output, setname)
- print("Run blobtools")
- process = subprocess.Popen(" ".join([join(get_tool_path("blobtools"),"blobtools"), "create", self.blobParams.print(), "-y", self.assParams.method, "-t", blast_result, "-i" , contigs, "-o", blob_output]),shell = True,
- stderr = subprocess.PIPE,
- stdout = subprocess.PIPE)
- for line in iter(process.stderr.readline, b''):
- print(line)
- for line in iter(process.stdout.readline, b''):
- print(line)
- process.communicate()
- process = subprocess.Popen(" ".join([join(get_tool_path("blobtools"),"blobtools"), "plot", "-i",blob_output + ".BlobDB.json"]),shell = True,
- stderr = subprocess.PIPE,
- stdout = subprocess.PIPE)
- for line in iter(process.stdout.readline, b''):
- line = line.decode()
- if line.startswith("[STATUS]"):
- if "Plotting" in line:
- pattern = re.match("\[STATUS\]\t: Plotting (?P<path>.*)",line)
- img = pattern.group("path")
- print(line)
-
- process.communicate()
-
- return img
diff --git a/combineFastQC.py b/combineFastQC.py
deleted file mode 100755
index b0ca7d3..0000000
--- a/combineFastQC.py
+++ /dev/null
@@ -1,431 +0,0 @@
- # !/usr/bin/env python # -*- coding: utf-8 -*-
-# created by Stephan Fuchs (FG13, RKI), 2015
-
-'''
-Combine FastQC Results
-Input: Project/Sample Folder which was created by runQCPipeline (folder contains QCResults - <FastQC_Result>)
-Output: html file
-'''
-
-# config
-version = '0.2.9'
-logsep = '=====================================\n'
-
-import os
-import sys
-import time
-import shutil
-import base64
-import tempfile
-import matplotlib
-
-matplotlib.use("Agg") #in case no display is set
-import matplotlib.pyplot as plt
-import argparse
-import numpy as np
-
-
-
-#advanced config - HTML output
-htmlheader = '<!doctype html>' \
- '<html>' \
- '<head>' \
- '<meta charset="utf-8">' \
- '<title>Combined FastQC Report</title>' \
- '<script language="javascript">top.DSfirstAccess = 1442945125;top.DSmaxTimeout=3600;top.DStimediff=1442945348 - parseInt(""+(new Date()).getTime()/1000);</script><SCRIPT language="javascript" id="dsshim" src="/dana-cached/js/shimdata.cgi"></SCRIPT><SCRIPT language="javascript" id="dsfunc" src="/dana-cached/js/oth.js?a4e535923255f21d1261a51909daf29f"></SCRIPT><SCRIPT language="javascript" id="dstimeout" src="/dana-cached/js/sessiontimeout.js?a4e535923255f21d1261a51909daf29f"></SCRIPT> [...]
- '//<![CDATA[' \
- 'var dsnodocwrites = 0 ; var DanaCookie="MstrPgLd1=1; MstrPgLd2=1; UserContext=mBoHOGHwbUOz5yuRe-kSp4w1h6L4yNIICPjPhC-9ifhLTYXFL0EmDebAMytIx2kDia-CCTHmi8Y.; tzid=W. Europe Standard Time"; var DSHost="webmail.rki.de"; var DSDanaInfoStr=""; var DSLang="de"; var DSObfuscateHostname=0;var DSTBSettings=17425;var DSTBLU=\'/dana/home/starter.cgi?startpageonly=1\';;danaSetDSHost();' \
- '//]]>' \
- '</SCRIPT></head>' \
- '<body>'
-
-htmlfooter = '</table>\n</body>\n<SCRIPT language="javascript" id="dstb-id">dstb()</SCRIPT></html>\n'
-
-#advanced config - fastqc img extraction
-order = ['sld', 'sq', 'sgc', 'bq', 'bnc', 'kmer', 'dup']
-imgfiles = {'dup': 'duplication_levels.png',
- 'kmer': 'kmer_profiles.png',
- 'bgc': 'per_base_gc_content.png',
- 'bnc': 'per_base_n_content.png',
- 'bq': 'per_base_quality.png',
- 'bsc': 'per_base_sequence_content.png',
- 'sgc': 'per_sequence_gc_content.png',
- 'sq': 'per_sequence_quality.png',
- 'sld': 'sequence_length_distribution.png',
- "ptsq": "per_tile_quality.png",
- "ac": "adapter_content.png"
- }
-#advanced config - fastqc data extraction
-modules = {'sld': '>>Sequence Length Distribution\t', 'sq': '>>Per sequence quality scores\t',
- 'sgc': '>>Per sequence GC content\t'}
-all_modules = {'bq': ">>Per base sequence quality",
- 'bsc': ">>Per base sequence content",
- 'bgc': ">>Per base GC content",
- 'bnc': ">>Per base N content",
- 'sld': ">>Sequence Length Distribution",
- 'dup': ">>Sequence Duplication Levels",
- 'kmer': ">>Kmer Content",
- 'sq': '>>Per sequence quality scores',
- 'sgc': '>>Per sequence GC content',
- 'ptsq': '>>Per tile sequence quality',
- 'ac':'>>Adapter Content'
- }
-codes = {'comment': '#', 'modulestart': '>>', 'moduleend': '>>END_MODULE'}
-
-#advanced config - boxplot design
-
-#processing command line arguments
-'''parser = argparse.ArgumentParser(prog="COMBINE_FASTQC",
- description='performs FastQC for multiple fastq files and combines results in a single html file.')
-parser.add_argument('file', metavar="FILE(S)", help="input BAM file", type=argparse.FileType('r'), nargs='+')
-parser.add_argument('-t', metavar='INT', help="number of threads/CPUs used for FastQC", type=int, action="store",
- default=4)
-parser.add_argument('--version', action='version', version='%(prog)s ' + version)
-
-args = parser.parse_args()
-'''
-
-
-#functions
-def createCSV(dataset, lines, key, path):
- '''creates CSV used for boxplotting'''
- output = []
- for line in lines:
- line = line.strip()
- fields = line.split('\t')
-
- #Sequence Length Distribution
- if key == "sld":
- l = fields[0].split('-')
- try:
- mean = (int(l[0]) + int(l[1])) / 2
- except:
- mean = int(l[0])
- output.extend([str(mean)] * int(float(fields[1])))
-
- #sequence quality or sequence GC content
- if key == "sq" or key == 'sgc':
- v = fields[0] #quality or gc [%]
- output.extend([str(v)] * int(float(fields[1])))
-
- #duplicates
- if key == 'dup':
- c = fields[0].replace('>', '') #count of duplicates
- c = c.replace('k', '000')
- c = c.replace('+', '')
- output.extend([str(c)] * int(float(fields[1]) * 100))
- csvhandle = open(path + '/' + key + ".csv", 'a')
- csvhandle.write(dataset.replace(',', '_') + "," + ",".join(output) + '\n')
- csvhandle.close()
-
-
-def color(line):
- line = line.strip()
- if line.endswith("pass"):
- return "#3CBC3C"
- elif line.endswith("warn"):
- return "orange"
- elif line.endswith("fail"):
- return "red"
- else:
- return "black"
-
-
-def get_key(dict, line):
- for key, value in dict.items():
- if line.startswith(value):
- return key
- return None
-
-
-def barplot(overview, width, height, tmpdir, type, xlabel, ylabel, title):
- bplotfont = {'weight': 'bold', 'size': 5}
- plt.rc('font', **bplotfont)
- fig = plt.figure(1, figsize=(width, height))
- ax = fig.add_subplot(111)
- index = np.arange(overview[1].shape[0])
- opacity = 0.4
- bar_width = 0.35
- bplot = plt.bar(index, overview[1][type], bar_width,
- alpha=opacity,
- color='#385d8a') #, label='Aligned Reads')
- plt.xlabel(xlabel)
- plt.ylabel(ylabel)
- plt.title(title)
- plt.xticks(index + (bar_width / 2), [x.decode("utf-8") for x in overview[1]['name']], rotation=90)
- # plt.ylim((0,100))
-
- for rect, label in zip(bplot, overview[1][type]):
- ax.text(rect.get_x() + rect.get_width() / 2, rect.get_height() , str(label), ha='center', va='bottom')
-
- fig.savefig(os.path.join(tmpdir ,type + ".png"), bbox_inches='tight', dpi=200)
- fig.clf()
-
-
-def combineFastQC(project_folder, overview=None, tmp = "tmp"):
-
- err = 1
- while err > 0:
- timecode = time.strftime("%Y-%m-%d_%H-%M-%S")
- html_fname = "FASTQC_COMBINE_" + timecode + ".html"
- if os.path.isfile(html_fname):
- err += 1
- time.sleep(1)
- else:
- err = 0
- if err == 30:
- print("ERROR: giving up to find available output file name")
- sys.exit(1)
- #working
- try:
- tmpdir = tempfile.mkdtemp(tmp)
- os.makedirs(tmpdir, exist_ok=True)
- chunkfiles = [tmpdir + '/' + html_fname + '1', tmpdir + '/' + html_fname + '2']
- outhandle = open(chunkfiles[1], "w")
-
- print("Summarize fastqc ", project_folder)
- fastqc_dirs = []
- #sample_name=[]
- for sample in os.listdir(project_folder):
- if os.path.isdir(os.path.join(project_folder, sample)):
- if sample == "QCResults":
- sample = ""
- path = os.path.join(project_folder, sample)
- if "QCResults" in os.listdir(path):
- path = os.path.join(path, "QCResults")
-
- if "FastQC" in os.listdir(path):
- path = os.path.join(path, "FastQC")
-
- print("Found path " + path)
- if os.path.exists(path):
- fastqc_folder = sorted( [x for x in os.listdir(path) if x.endswith("_fastqc")])
-
- if len(fastqc_folder) != 0:
- path_trimmed = os.path.join(path, "..", "Trimmed", "FastQC")
- if os.path.exists(path_trimmed):
- for fastqc in fastqc_folder:
- fastqc_dirs.append(os.path.join(path, fastqc))
- fastqc_dirs.append(os.path.join(path_trimmed, fastqc))
- else:
- fastqc_dirs = fastqc_folder
- fastqc_folder = None
-
- i = 0
- outhandle.write('<table border = "1"> <tr>')
-
- for fastqcdir in fastqc_dirs:
- i += 1
- print("extracting data ", fastqcdir)
- store = False
- data = []
- fastqcdatafile = open(os.path.join(fastqcdir, "fastqc_data.txt"), "r")
- frame_color = {}
- for line in iter(fastqcdatafile):
- if line.startswith(codes['comment']):
- pass #print("comment")
- #continue
- elif line.startswith(codes['moduleend']):
- if len(data) > 0:
- name1 = os.path.basename(fastqcdir)
- if "/Trimmed/" in fastqcdir:
- name1 = "Trimmed " + name1
- createCSV(name1, data[2:], key, tmpdir)
- data = []
- store = False
-
- elif line.startswith(codes['modulestart']):
- key = get_key(all_modules, line)
- if key in modules.keys():
- store = True
- if key is not None:
- frame_color[key] = color(line)
- if store:
- data.extend([line])
- fastqcdatafile.close()
-
- #image processing
- name = os.path.basename(fastqcdir)
- if "/Trimmed/" in fastqcdir:
- name = "Trimmed " + os.path.basename(fastqcdir)
- outhandle.write('<tr>\n<td><b>' + name + "</b></td>\n")
- for imgkey in order:
- try:
- with open(os.path.join(fastqcdir , "Images" , imgfiles[imgkey]), "rb") as imgfile:
- imgstring = base64.b64encode(imgfile.read())
- outhandle.write('<td><img style="border: 5px solid ' + frame_color[imgkey] + ';" alt="Embedded Image" src="data:image/png;base64,' + imgstring.decode(
- "utf-8") + '" /></td>\n')
- except:
- outhandle.write('<td></td>\n')
- outhandle.write('</tr>\n')
- if "/Trimmed/" in fastqcdir:
- outhandle.write('</tr><tr></tr><tr></tr><tr></tr><tr></tr><tr></tr><tr></tr>')
-
- outhandle.write(htmlfooter)
- outhandle.close()
-
- bplotfont = {'weight': 'bold', 'size': 5}
- plt.rc('font', **bplotfont)
-
- #generate boxplots
- print ("generating comparative boxplots...")
- xclasses = range(1, len(fastqc_dirs) + 1)
- width = 4.5
- height = 3.5
- if (len(fastqc_dirs) / 3) > width:
- width = len(fastqc_dirs) / 3
- height = len(fastqc_dirs) / 4
- #max width
- if width > 12:
- width = 12
- height = 7
- bplotfont = {'weight': 'bold', 'size': 3}
- plt.rc('font', **bplotfont)
-
- xlabels = []
- for key in order:
- if key in modules:
- data = []
-
- plottitle = modules[key].replace(">", '')
- plottitle = plottitle.replace("\t", '')
- print("Plot ", plottitle )
-
- #csvhandle = open(tmpdir + '/' + key + ".csv", 'r')
- with open(tmpdir + '/' + key + ".csv", 'r') as csvfile:
- csvhandle = csv.reader(csvfile, delimiter=',', quotechar='|')
- for row in csvhandle:
- xlabels.append(row[0])
- data.append([float(x) for x in row[1:]])
-
- fig = plt.figure(1, figsize=(width, height))
- ax = fig.add_subplot(111)
- #bp = ax.boxplot(data, notch=True, patch_artist=True)
-
- # set props
- '''whisker = {'color':'#b4b4b4', 'linestyle':"-", 'linewidth':0.75}
- caps = {'color':'#b4b4b4', 'linewidth':1}
- flier = {"marker":'o', "lw":0, "color":'#e6e6e6', "markersize":0.5}
- box = {"color":'#385d8a', "linewidth":0.75}'''
- bp = ax.boxplot(data)#, capsprops = caps , flierprops = flier, boxprops =box)
-
- for box in bp['boxes']:
- #outline color
- box.set(color='#385d8a', linewidth=0.75)
-
- for whisker in bp['whiskers']:
- whisker.set(color='#b4b4b4', linestyle="-", linewidth=0.75)
-
- for cap in bp['caps']:
- cap.set(color='#b4b4b4', linewidth=1)
-
- for median in bp['medians']:
- median.set(color='#c0504d', linewidth=1)
-
- for flier in bp['fliers']:
- flier.set(marker='o', lw=0, color='#e6e6e6', markersize=0.5)
-
- plt.title(plottitle)
- plt.xticks(xclasses, xlabels, rotation=90)
- fig.savefig(tmpdir + '/' + key + ".png", bbox_inches='tight', dpi=200)
- fig.clf()
-
- if overview is not None:
- try:
- if len([x for x in overview[1]['mapping'] if x != 0.0]) != 0:
- barplot(overview, width, height, tmpdir, 'mapping', 'Samples', 'Mapped reads [%]',
- 'Percentage of reads aligned to ' + overview[0])
- except:
- print("Couldnt plot mapping results")
- try:
- barplot(overview, width, height, tmpdir, 'trimming', 'Samples', 'Reads after trimming [%]',
- 'Percentage of reads after trimming')
- except:
- print("Couldnt plot trimming results")
- try:
- if len([x for x in overview[1]['shorter_reads'] if x != 0.0]) != 0:
- barplot(overview, width, height, tmpdir, 'shorter_reads', 'Samples', 'Shorter fragments [%]',
- 'Percentage of fragments shorter than read length')
- except:
- print("Couldnt plot shorter fragments")
- try:
- if len([x for x in overview[1]['kraken'] if x != 0.0]) != 0:
- barplot(overview, width, height, tmpdir, 'kraken', 'Samples', 'Classified reads [%]',
- 'Percentage of classified reads')
- except:
- print("Couldnt plot Kraken results")
- #save boxplots to html
- outhandle = open(chunkfiles[0], "w")
- outhandle.write(htmlheader)
- #for line in iter(outhandle):
- # if line.strip() == "<boxplotline \>":
- outhandle.write("<tr>\n<td><b>summary</b></td>\n")
- for key in order:
- if key not in modules:
- outhandle.write("<td>-</td>")
- else:
- imgfile = open(tmpdir + '/' + key + ".png", "rb")
- imgstring = base64.b64encode(imgfile.read())
- imgfile.close()
- outhandle.write('<td><img alt="Embedded Image" src="data:image/png;base64,' + imgstring.decode(
- "utf-8") + '" /></td>\n')
- outhandle.write('</tr>\n')
-
- outhandle.write("<tr>\n<td></td>\n")
- if overview is not None:
- for result in ['mapping', 'trimming', 'shorter_reads', 'kraken']:
- try:
- imgfile = open(os.path.join(tmpdir, result + ".png"), "rb")
- imgstring = base64.b64encode(imgfile.read())
- imgfile.close()
- outhandle.write('<td><img alt="Embedded Image" src="data:image/png;base64,' + imgstring.decode(
- "utf-8") + '" /></td>\n')
- except:
- pass
- outhandle.write('</tr>\n')
- outhandle.close()
-
-
- #join sub-htm-files
- print("Create result file: " + os.path.join(project_folder, html_fname))
-
- htmlhandle = open(os.path.join(project_folder, html_fname), 'w')
- for chunkfile in chunkfiles:
- chunkhandle = open(chunkfile, 'r')
- for line in iter(chunkhandle):
- htmlhandle.write(line)
- chunkhandle.close()
- htmlhandle.close()
- finally:
- #cleaning
- shutil.rmtree(tmpdir)
-
-
-import csv
-
-if __name__ == "__main__":
- start =time.time()
- print(start)
- parser = argparse.ArgumentParser()
- parser.add_argument('-folder', dest='folder', help="Sample folder", required=True)
- parser.add_argument('-csv', dest='csv',
- help="csv file.Format: Sample name | Mapping Result | Trimming Result | Kraken Result | Shorter Fragments ",
- required=False)
- parser.add_argument('-reference', dest="reference", help="Name of reference", required=False)
- arguments = vars(parser.parse_args())
-
- if arguments["csv"] and arguments["reference"]:
-
- overview = []
- with open(arguments["csv"], newline='') as csvfile:
- infosheet = csv.reader(csvfile, delimiter=';', quotechar='|')
- for row in infosheet:
- overview.append(tuple(row))
- print(row)
- combineFastQC(arguments["folder"], (arguments["reference"],np.array(overview, dtype = [('name', '|S100'), ('mapping', float), ('trimming', float), ('kraken', float), ('shorter_reads', float)])))
-
- elif (arguments["csv"] or arguments["reference"]):
- sys.exit("csv and reference required")
- else:
- combineFastQC(arguments["folder"])
- print(time.time() - start)
diff --git a/config.txt b/config.txt
index 39ba760..0e269df 100755
--- a/config.txt
+++ b/config.txt
@@ -1,14 +1,10 @@
[DEFAULT]
-kraken_db =/home/andruscha/programs/kraken/minikraken_20141208/
-blast_db = /data/BLAST/nt/
+kraken_db =/home/andruscha/software/kraken/minikraken_20141208/
[PATH]
-kraken = /home/andruscha/programs/kraken/kraken_build/
+kraken =
adapter = /data/GS/tools/Trimmomatic-0.32/adapters/
fastqc = /home/lieuv/tools/FastQC/
trimmomatic =
bowtie2 =
-blobtools = /home/lieuv/tools/blobtools
-[VERSION]
-trimmomatic = 0.32
diff --git a/helper.py b/helper.py
index e75cd69..b1c906f 100755
--- a/helper.py
+++ b/helper.py
@@ -1,9 +1,15 @@
__author__ = 'LieuV'
from os.path import basename, splitext, join, isfile, dirname, abspath, exists
-from os import makedirs
+from os import makedirs, listdir, remove
import subprocess
-import re, sys, numpy
+import re, sys, numpy, shutil
+import matplotlib
+matplotlib.use("Agg") # in case no display is set
+import matplotlib.pyplot as plt
+import xml.etree.ElementTree as ET
+from collections import OrderedDict
+import json
def getDir(args, getAbs = False):
if isfile(args[0]):
@@ -14,21 +20,19 @@ def getDir(args, getAbs = False):
makedirs(path, exist_ok=True)
return path
-def getFilenameWithoutExtension(string, level = 1, getBase = False):
+def getFilenameWithoutExtension(string, getBase = False):
''' In case of double extension '''
if getBase:
string = basename(string)
string = splitext(string)[0]
i = 0
while splitext(string)[-1] in [".gz", ".gzip", ".zip", ".bz", ".fasta", ".fastq", ".bam"]:
- #while i < level:
- #i +=1
string = splitext(string)[0]
return string
def bam_to_fastq(bamfile, output):
print("Convert bam to fastq...", bamfile)
- outfile = join(output, getFilenameWithoutExtension(bamfile, level = 2, getBase= True) +".fastq")
+ outfile = join(output, getFilenameWithoutExtension(bamfile, getBase= True) +".fastq")
subprocess.Popen("bamtools convert -format fastq -in " + bamfile + " -out " + outfile, shell = True)
return outfile
@@ -47,15 +51,14 @@ def join_reads(read_list, outputDir, outputName):
return None
elif len(read_list) ==1:
return read_list[0]
- outfile = join(outputDir, getFilenameWithoutExtension(outputName, True) +".gz")
- #if not exists(outfile):
- if read_list[0].endswith(".gz"):
-
- print("Decompress and concatenate files. This may take a while." )
- process = subprocess.call("zcat " + " ".join(read_list) + "|gzip --fast > " + outfile, shell=True)
- else:
- print("Decompress and concatenate files. This may take a while.")
- process = subprocess.call("gzip --fast -c " + " ".join(read_list) + " > " + outfile, shell=True)
+ outfile = join(outputDir, getFilenameWithoutExtension(outputName) )
+ outfile += splitext(read_list[0])[-1]
+ #if read_list[0].endswith(".gz"):
+ print("Concatenate files. This may take a while." )
+ process = subprocess.call("cat " + " ".join(read_list) + " > " + outfile, shell=True)
+ #else:
+ # print("Decompress and concatenate files. This may take a while.")
+ # process = subprocess.call("gzip --fast -c " + " ".join(read_list) + " > " + outfile, shell=True)
if process != 0:
sys.exit("Error with gzip")
return outfile
@@ -201,4 +204,209 @@ def str_to_json(report_str):
pre_depth = curr_depth
while len(closings) > 0:
json_file += closings.pop() # end node
- return json_file
\ No newline at end of file
+ return json_file
+
+def delete_files(path, extended="", pattern="_fastqc"):
+ for f in listdir(join(path, extended)):
+ filename =join(path,extended,f)
+ if f.endswith(pattern):
+ if isfile(filename):
+ remove(filename)
+ else:
+ shutil.rmtree(filename)
+
+
+def get_distribution(lines, key):
+ '''creates CSV used for boxplotting'''
+ output = []
+ for line in lines:
+ line = line.strip()
+ fields = line.split('\t')
+ if key == "Sequence_Length_Distribution":
+ l = fields[0].split('-')
+ try:
+ mean = (int(l[0]) + int(l[1])) / 2
+ except:
+ mean = int(l[0])
+ output.extend([str(mean)] * int(float(fields[1])))
+
+ # sequence quality or sequence GC content
+ if key == 'Per_sequence_GC_content' or key == "Per_sequence_quality_scores":
+ v = fields[0] # quality or gc [%]
+ output.extend(int(v) * [int(float(fields[1]))])
+ return output
+
+def createCSV(dataset, lines, key, path, trimmed):
+ '''creates CSV used for boxplotting'''
+ #output = []
+ splitted_ds=dataset.split("_")
+
+ csvhandle = open(path + '/' + key + ".csv", 'a')
+ for line in lines:
+ line = line.strip()
+ fields = line.split('\t')
+ #Sequence Length Distribution
+ if key == "Sequence_Length_Distribution":
+ l = fields[0].split('-')
+ try:
+ value = (int(l[0]) + int(l[1])) / 2
+ except:
+ value = int(l[0])
+ count = int(float(fields[1]))
+
+ #sequence quality or sequence GC content
+ if key == 'Per_sequence_quality_scores' or key == 'Per_sequence_GC_content':
+ value = fields[0] #quality or gc [%]
+ count = int(float(fields[1]))
+ try:
+ csvhandle.write(",".join(["_".join(splitted_ds[:-4]), splitted_ds[-3], splitted_ds[-4],trimmed, str(value), str(count)])+"\n")
+ except:
+ csvhandle.write(",".join([dataset.replace("_fastqc",""), "NA", "NA" , trimmed, str(value), str(count)]) + "\n")
+ csvhandle.close()
+
+def write_fastqc_summary(fastqcdir, output):
+ store = False
+ data = []
+ fastqcdatafile = open(join(fastqcdir,"fastqc_data.txt"), "r")
+ for line in iter(fastqcdatafile):
+ if line.startswith("#"):
+ pass # print("comment")
+ # continue
+ elif line.startswith('>>END_MODULE'):
+ if len(data) > 0:
+ name1 = basename(fastqcdir)
+ if "Trimmed" in fastqcdir:
+ # name1 = "Trimmed " + name1
+ trimmed = "Trimmed"
+ else:
+ trimmed = "Raw"
+ createCSV(name1, data[2:], key, output, trimmed)
+ data = []
+ store = False
+ elif line.startswith('>>'):
+ key = line.split("\t")[0]
+ key = key.replace(">>","")
+ key = key.replace(" ","_")
+ if key in ['Sequence_Length_Distribution', 'Per_sequence_quality_scores','Per_sequence_GC_content']:
+ store = True
+ if store:
+ data.extend([line])
+ fastqcdatafile.close()
+
+def write_SAV(folder, output):
+ print("Plot SAV images...")
+ process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(dirname(__file__), "sav.R"),
+ folder, join(output,"img")]),
+ stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+ tree = ET.parse(join(folder,'RunInfo.xml'))
+ root = tree.getroot()
+ runinfo = OrderedDict()
+
+ for child in root.iter("Run"):
+ for grand in child.getchildren():
+
+ if len(grand.attrib) != 0:
+ runinfo[grand.tag] = grand.attrib
+
+ if len(grand.getchildren()) !=0:
+ for grandgrand in grand:
+ if len(grandgrand.attrib) != 0:
+ try:
+ runinfo[grand.tag].append(grandgrand.attrib)
+ except:
+ runinfo[grand.tag] = [grandgrand.attrib]
+
+ if grandgrand.text is not None:
+ if grandgrand.text.strip() != "":
+ if grand.tag not in runinfo.keys():
+ runinfo[grand.tag]={}
+ try:
+ runinfo[grand.tag][grandgrand.tag].append(grandgrand.text)
+ except:
+ runinfo[grand.tag][grandgrand.tag] = [grandgrand.text]
+
+ if grand.text is not None:
+ if grand.text.strip()!="":
+ if len(grand.attrib) != 0:
+ runinfo[grand.tag][grand.tag] = grand.text
+ else:
+ runinfo[grand.tag] = grand.text
+
+ runparameter_xml = ET.parse(join(folder, 'RunParameters.xml'))
+ runparameters_unique = ["ExperimentName", "FPGAVersion", "CPLDVersion", "MCSVersion","RTAVersion", "CameraFirmware", "CameraDriver", "ClusteringChoice" ,"RapidRunChemistry", "RunMode", "ApplicationName", "ApplicationVersion"]
+ runparameters_path = ["Setup/ReagentKits/Sbs/SbsReagentKit/ID","Setup/ReagentKits/Index/ReagentKit/ID","Setup/Sbs","Setup/Rehyb", "PR2BottleRFIDTag/PartNumber","FlowcellRFIDTag/PartNumber","ReagentKitRFIDTag/PartNumber"]
+ root = runparameter_xml.getroot()
+ runparameter = OrderedDict()
+ for label in runparameters_unique:
+ for i in root.iter(label):
+ runparameter[label] = i.text
+ #print(label, ": ", i.text)
+
+ for label_path in runparameters_path:
+ label_path = label_path.split("/")
+ element = root.find(label_path[0])
+ for label in label_path[1:]:
+ if element is None:
+ break
+ element = element.find(label)
+ if element is not None:
+ runparameter["/".join(label_path)] = element.text
+ #print(label_path, element.text)
+ else:
+ print(label_path, " does not exist.")
+ runparameter.update(runinfo)
+ json.dump({"tables":runparameter}, open(join(output, "sav.json"),"w"))
+
+'''def boxplot(data, title,output):
+ bplotfont = {"weight": "bold", "size": 8}
+ plt.rc("font", **bplotfont)
+ xclasses = range(1, len(data) + 1)
+ width = 4.5
+ height = 3.5
+ if (len(data) / 3) > width:
+ width = len(data) / 3
+ height = len(data) / 4
+ # max width
+ if width > 12:
+ width = 12
+ height = 7
+ bplotfont = {"weight": "bold", "size": 8}
+ plt.rc("font", **bplotfont)
+
+ fig = plt.figure(1, figsize=(width, height))
+ ax = fig.add_subplot(111)
+ bp = ax.boxplot(data, notch=True, patch_artist=True)
+
+ colors = ["cyan", "lightblue", "lightgreen", "tan", "pink"]
+ for patch, color in zip(bp["boxes"], colors):
+ patch.set_facecolor(color)
+
+ # set props
+ #whisker = {"color":"#b4b4b4", "linestyle":"-", "linewidth":0.75}
+ #caps = {"color":"#b4b4b4", "linewidth":1}
+ #flier = {"marker":"o", "lw":0, "color":"#e6e6e6", "markersize":0.5}
+ #box = {"color":"#385d8a", "linewidth":0.75}"""
+ #bp = ax.boxplot(data)# , capsprops = caps , flierprops = flier, boxprops =box)
+
+ for box in bp["boxes"]:
+ # outline color
+ box.set(color="#385d8a", linewidth=0.75)
+
+ for whisker in bp["whiskers"]:
+ whisker.set(color="#b4b4b4", linestyle="-", linewidth=0.75)
+
+ for cap in bp["caps"]:
+ cap.set(color="#b4b4b4", linewidth=1)
+
+ for median in bp["medians"]:
+ median.set(color="#c0504d", linewidth=1)
+
+ for flier in bp["fliers"]:
+ flier.set(marker="o", lw=0, color="#e6e6e6", markersize=0.5)"""
+ plt.title(title)
+ plt.xticks(xclasses, xlabels, rotation=90)
+ fig.savefig(output, bbox_inches="tight", dpi=200)
+ fig.clf()
+'''
diff --git a/lib/box.js b/lib/box.js
new file mode 100755
index 0000000..471bb8e
--- /dev/null
+++ b/lib/box.js
@@ -0,0 +1,320 @@
+(function() {
+
+// Inspired by http://informationandvisualization.de/blog/box-plot
+d3.box = function() {
+ var width = 1,
+ height = 1,
+ duration = 0,
+ domain = null,
+ value = Number,
+ whiskers = boxWhiskers,
+ quartiles = boxQuartiles,
+ showLabels = true, // whether or not to show text labels
+ numBars = 4,
+ curBar = 1,
+ tickFormat = null;
+
+ // For each small multiple…
+ function box(g) {
+ g.each(function(data, i) {
+ //d = d.map(value).sort(d3.ascending);
+ //var boxIndex = data[0];
+ //var boxIndex = 1;
+ var d = data[1].sort(d3.ascending);
+
+ // console.log(boxIndex);
+ //console.log(d);
+
+ var g = d3.select(this),
+ n = d.length,
+ min = d[0],
+ max = d[n - 1];
+
+ // Compute quartiles. Must return exactly 3 elements.
+ var quartileData = d.quartiles = quartiles(d);
+
+ // Compute whiskers. Must return exactly 2 elements, or null.
+ var whiskerIndices = whiskers && whiskers.call(this, d, i),
+ whiskerData = whiskerIndices && whiskerIndices.map(function(i) { return d[i]; });
+
+ // Compute outliers. If no whiskers are specified, all data are "outliers".
+ // We compute the outliers as indices, so that we can join across transitions!
+ var outlierIndices = whiskerIndices
+ ? d3.range(0, whiskerIndices[0]).concat(d3.range(whiskerIndices[1] + 1, n))
+ : d3.range(n);
+
+ // Compute the new x-scale.
+ var x1 = d3.scale.linear()
+ .domain(domain && domain.call(this, d, i) || [min, max])
+ .range([height, 0]);
+
+ // Retrieve the old x-scale, if this is an update.
+ var x0 = this.__chart__ || d3.scale.linear()
+ .domain([0, Infinity])
+ // .domain([0, max])
+ .range(x1.range());
+
+ // Stash the new scale.
+ this.__chart__ = x1;
+
+ // Note: the box, median, and box tick elements are fixed in number,
+ // so we only have to handle enter and update. In contrast, the outliers
+ // and other elements are variable, so we need to exit them! Variable
+ // elements also fade in and out.
+
+ // Update center line: the vertical line spanning the whiskers.
+ var center = g.selectAll("line.center")
+ .data(whiskerData ? [whiskerData] : []);
+
+ //vertical line
+ center.enter().insert("line", "rect")
+ .attr("class", "center")
+ .attr("x1", width / 2)
+ .attr("y1", function(d) { return x0(d[0]); })
+ .attr("x2", width / 2)
+ .attr("y2", function(d) { return x0(d[1]); })
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .style("opacity", 1)
+ .attr("y1", function(d) { return x1(d[0]); })
+ .attr("y2", function(d) { return x1(d[1]); });
+
+ center.transition()
+ .duration(duration)
+ .style("opacity", 1)
+ .attr("y1", function(d) { return x1(d[0]); })
+ .attr("y2", function(d) { return x1(d[1]); });
+
+ center.exit().transition()
+ .duration(duration)
+ .style("opacity", 1e-6)
+ .attr("y1", function(d) { return x1(d[0]); })
+ .attr("y2", function(d) { return x1(d[1]); })
+ .remove();
+
+ // Update innerquartile box.
+ var box = g.selectAll("rect.box")
+ .data([quartileData]);
+
+ box.enter().append("rect")
+ .attr("class", "box")
+ .attr("x", 0)
+ .attr("y", function(d) { return x0(d[2]); })
+ .attr("width", width)
+ .attr("height", function(d) { return x0(d[0]) - x0(d[2]); })
+ .transition()
+ .duration(duration)
+ .attr("y", function(d) { return x1(d[2]); })
+ .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
+
+ box.transition()
+ .duration(duration)
+ .attr("y", function(d) { return x1(d[2]); })
+ .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
+
+ // Update median line.
+ var medianLine = g.selectAll("line.median")
+ .data([quartileData[1]]);
+
+ medianLine.enter().append("line")
+ .attr("class", "median")
+ .attr("x1", 0)
+ .attr("y1", x0)
+ .attr("x2", width)
+ .attr("y2", x0)
+ .transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1);
+
+ medianLine.transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1);
+
+ // Update whiskers.
+ var whisker = g.selectAll("line.whisker")
+ .data(whiskerData || []);
+
+ whisker.enter().insert("line", "circle, text")
+ .attr("class", "whisker")
+ .attr("x1", 0)
+ .attr("y1", x0)
+ .attr("x2", 0 + width)
+ .attr("y2", x0)
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1)
+ .style("opacity", 1);
+
+ whisker.transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1)
+ .style("opacity", 1);
+
+ whisker.exit().transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1)
+ .style("opacity", 1e-6)
+ .remove();
+
+ // Update outliers.
+ var outlier = g.selectAll("circle.outlier")
+ .data(outlierIndices, Number);
+
+ outlier.enter().insert("circle", "text")
+ .attr("class", "outlier")
+ .attr("r", 5)
+ .attr("cx", width / 2)
+ .attr("cy", function(i) { return x0(d[i]); })
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .attr("cy", function(i) { return x1(d[i]); })
+ .style("opacity", 1);
+
+ outlier.transition()
+ .duration(duration)
+ .attr("cy", function(i) { return x1(d[i]); })
+ .style("opacity", 1);
+
+ outlier.exit().transition()
+ .duration(duration)
+ .attr("cy", function(i) { return x1(d[i]); })
+ .style("opacity", 1e-6)
+ .remove();
+
+ // Compute the tick format.
+ var format = tickFormat || x1.tickFormat(8);
+
+ // Update box ticks.
+ var boxTick = g.selectAll("text.box")
+ .data(quartileData);
+ if(showLabels == true) {
+ boxTick.enter().append("text")
+ .attr("class", "box")
+ .attr("dy", ".3em")
+ .attr("dx", function(d, i) { return i & 1 ? 6 : -6 })
+ .attr("x", function(d, i) { return i & 1 ? + width : 0 })
+ .attr("y", x0)
+ .attr("text-anchor", function(d, i) { return i & 1 ? "start" : "end"; })
+ .text(format)
+ .transition()
+ .duration(duration)
+ .attr("y", x1);
+ }
+
+ boxTick.transition()
+ .duration(duration)
+ .text(format)
+ .attr("y", x1);
+
+ // Update whisker ticks. These are handled separately from the box
+ // ticks because they may or may not exist, and we want don't want
+ // to join box ticks pre-transition with whisker ticks post-.
+ var whiskerTick = g.selectAll("text.whisker")
+ .data(whiskerData || []);
+ if(showLabels == true) {
+ whiskerTick.enter().append("text")
+ .attr("class", "whisker")
+ .attr("dy", ".3em")
+ .attr("dx", 6)
+ .attr("x", width)
+ .attr("y", x0)
+ .text(format)
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .attr("y", x1)
+ .style("opacity", 1);
+ }
+ whiskerTick.transition()
+ .duration(duration)
+ .text(format)
+ .attr("y", x1)
+ .style("opacity", 1);
+
+ whiskerTick.exit().transition()
+ .duration(duration)
+ .attr("y", x1)
+ .style("opacity", 1e-6)
+ .remove();
+ });
+ d3.timer.flush();
+ }
+
+ box.width = function(x) {
+ if (!arguments.length) return width;
+ width = x;
+ return box;
+ };
+
+ box.height = function(x) {
+ if (!arguments.length) return height;
+ height = x;
+ return box;
+ };
+
+ box.tickFormat = function(x) {
+ if (!arguments.length) return tickFormat;
+ tickFormat = x;
+ return box;
+ };
+
+ box.duration = function(x) {
+ if (!arguments.length) return duration;
+ duration = x;
+ return box;
+ };
+
+ box.domain = function(x) {
+ if (!arguments.length) return domain;
+ domain = x == null ? x : d3.functor(x);
+ return box;
+ };
+
+ box.value = function(x) {
+ if (!arguments.length) return value;
+ value = x;
+ return box;
+ };
+
+ box.whiskers = function(x) {
+ if (!arguments.length) return whiskers;
+ whiskers = x;
+ return box;
+ };
+
+ box.showLabels = function(x) {
+ if (!arguments.length) return showLabels;
+ showLabels = x;
+ return box;
+ };
+
+ box.quartiles = function(x) {
+ if (!arguments.length) return quartiles;
+ quartiles = x;
+ return box;
+ };
+
+ return box;
+};
+
+function boxWhiskers(d) {
+ return [0, d.length - 1];
+}
+
+function boxQuartiles(d) {
+ return [
+ d3.quantile(d, .25),
+ d3.quantile(d, .5),
+ d3.quantile(d, .75)
+ ];
+}
+
+})();
\ No newline at end of file
diff --git a/lib/qcstyle.css b/lib/qcstyle.css
new file mode 100755
index 0000000..4bad164
--- /dev/null
+++ b/lib/qcstyle.css
@@ -0,0 +1,84 @@
+ .row_hidden {
+ display: none;
+ }
+ .panel {
+ background-color:#F7F7F7;
+
+ }
+ .panel-heading{
+ cursor:pointer;
+ }
+ .show_row {
+ display: show;
+ }
+
+ p.inbox_text {
+ text-align: center;
+ color: grey;
+ margin: 0 0 0px;
+ }
+
+ .thumbnail {
+ margin-bottom: 5px;
+ border: 0px solid #000;
+ height: 150px;
+ width: 200px;
+ }
+
+ .sample_box {
+ display: table-cell;
+ margin: 10px;
+ padding: 10px;
+ color: lightgrey;
+ border-radius: 5px;
+ box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
+ }
+
+ .sidebar {
+ float: both;
+ overflow-y: auto;
+ position: fixed;
+ height: 100%;
+ top: 50px;
+ z-index: 999;
+ padding-right: 0px;
+ box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
+ background: #f7f7f7;
+ }
+ .navbar-default{
+ background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
+ }
+ .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
+ background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
+
+ }
+ .navbar-default .navbar-brand {
+ color: white;
+ }
+ h4{
+ color: #0A6780;
+ }
+ h5{
+ color: #0D7793;
+ }
+ .node {
+ cursor: pointer;
+ }
+
+ .node circle {
+ fill: #fff;
+ stroke: steelblue;
+ stroke-width: 1.5px;
+ }
+
+ .node text {
+ font: 10px sans-serif;
+ }
+
+ .link {
+ fill: none;
+ stroke: #ccc;
+ stroke-width: 1.5px;
+ }
+ .table{
+ margin-bottom: auto;}
diff --git a/parameter.txt b/parameter.txt
index cc97f05..f3b5b6c 100755
--- a/parameter.txt
+++ b/parameter.txt
@@ -20,7 +20,7 @@ trimOption = SLIDINGWINDOW:4:20
trimBetter_threshold = 0.15
[forAssembly.Illumina]
-trimBetter_threshold = 0.15
+trimBetter_threshold = 0.1
trimOption = SLIDINGWINDOW:4:25
[forAssembly.IonTorrent]
@@ -39,4 +39,9 @@ trimOption = SLIDINGWINDOW:4:15
Illumina Universal Adapter = TruSeq2
[Fileextension]
-fastq = .fastq.gz, .fastq, .fq, .fq.gz
\ No newline at end of file
+fastq = .fastq.gz, .fastq, .fq, .fq.gz, .bam
+
+[Pattern]
+R1 = _R1_
+R2 = _R2_
+lane = _L\d{3}_
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100755
index 0000000..3dd3605
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,119 @@
+------------
+Introduction
+------------
+
+QCumber is a tool for quality control and exploration of NGS data. The workflow is as follows:
+
+* optional: extract information from Sequence Analysis Viewer
+* Quality control with FastQC
+* Trim Reads with Trimmomatic
+* optional: run FastQC and retrim if necessary
+* Quality control of trimmed reads with FastQC
+* optional: Map reads against reference using bowtie2
+* optional: Classify reads with Kraken
+
+------------
+Dependencies
+------------
+
+This tool was implemented in python and needs Python 3.4 or higher. For plotting and SAV extraction R (3.0.2) is required. Furhermore, FastQC (>v0.10.1), bowtie2 (> 2.2.3) and Kraken (0.10.5) are required.
+
+Further packages via apt-get install:
+* Python3-pip
+* libfreetype6-dev
+* r-cran-quantreg
+* r-bioc-savr
+
+Packages via pip3 install:
+* Jinja2
+* matplotlib
+
+R packages:
+* ggplot2
+* savR
+
+
+To change tool or adapter path, change config.txt.
+
+-----
+Usage
+-----
+```
+python3 QCumber.py -i <input> -technology <Illumina/IonTorrent> <option(s)>
+```
+Input parameter:
+
+ -i, -input sample folder/file. If Illumina folder, files has to match pattern <Sample name>_<lane>_<R1/R2>_<number>.
+ Eg. Sample_12_345_R1_001.fastq. Otherwise use -1,-2
+ -1 , -2 alternatively to -i: filename. Must not match Illumina names.
+ -technology sequencing technology (Illumina/IonTorrent)
+
+Options:
+
+ -output output folder, default: input folder
+ -reference reference file
+ -adapter adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
+ -sav Sequence Analysis Viewer folder. Requires Interop folder, RunInfo.xml and RunParameter.xml
+ -threads threads. Default:4
+ -palindrome palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
+ -db Kraken database
+ -trimOption Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>.
+ default: SLIDINGWINDOW:4:15
+ -trimBetter Optimize trimming parameter using 'Per sequence base content' from fastqc
+ -trimBetter_threshold Threshold for 'Per sequence base content' fluctuation. Default:0.15
+ -forAssembly Trim parameter are optimized for assemblies (trim more aggressive).
+ -forMapping Trim parameter are optimized for mapping(allow more errors).
+ -minlen Minlen parameter for Trimmomatic. Default:50
+ -index Bowtie2 index if available
+ -save_mapping Save sam files
+ -nokraken skip Kraken
+ -nomapping skip mapping
+
+ -version Get version
+
+Output:
+
+<Sample/Output Folder>
+|-- QCResult
+ |-- Report
+ |-- PDF report per sample
+ |-- HTML report for entire project
+ |-- src
+ |-- img
+ |-- Summary images
+ |-- FastQC
+ |-- <output folder(s) from FastQC>
+ |-- Trimmed
+ |-- <trimmed reads>
+ |-- FastQC
+ |-- <output folder(s) from FastQC>
+
+-------------------
+Program Description
+-------------------
+
+This project consists of 6 files:
+
+QCumber.py main script for running complete pipeline
+classes.py script containing classes
+helper.py small helper functions
+report.tex Template for sample reports
+config.txt configuration for Kraken and Trimmomatic
+boxplot.R boxplots of fastqc output for batch report
+paramter.txt default parameter
+config.txt tool location
+
+
+-------
+Example
+-------
+
+1. Simple usage for Illumina:
+```
+python3 QCumber.py -1 sample_R1.fastq -2 sample_R2.fastq -technology Illumina -adapter NexteraPE-PE -r myReference.fasta
+```
+
+2. Entering a project:
+```
+python3 QCumber.py -input myProjectFolder/ -technology IonTorrent -r myReference.fasta
+```
diff --git a/readme.txt b/readme.txt
deleted file mode 100755
index f4943c3..0000000
--- a/readme.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-------------
-Introduction
-------------
-
-QCPipeline is a tool for quality control. The workflow is as followed:
-
- 1. Quality control with FastQC
- 2. Trim Reads with Trimmomatic
- 3. Quality control of trimmed reads with FastQC
- 4. Map reads against reference using bowtie2
- 5. Classify reads with Kraken
-
-------------
-Dependencies
-------------
-
-This tool was implemented in python and needs Python 3.4 or higher. Furhermore, FastQC (>v0.10.1), bowtie2 (> 2.2.3) and Kraken (0.10.5) are required.
-
-Further packages via apt-get install:
-* Python3-pip
-* libfreetype6-dev
-
-Packages via pip3 install:
-* Jinja2
-* matplotlib
-
-
-To change tool or adapter path, change config.txt.
-
------
-Usage
------
-```
-python3.4 runQCPipeline.py -i <input> -technology <Illumina/IonTorrent> <option(s)>
-```
-Input parameter:
-
- -i, -input sample folder/file. If Illumina folder, files has to match pattern <Sample name>_<lane>_<R1/R2>_<number>. Eg. Sample_12_345_R1_001.fastq. Otherwise use -1,-2
- -1 , -2 alternatively to -i: filename. Must not match Illumina names.
- -technology sequencing technology (Illumina/IonTorrent)
-
-Options:
-
- -o , -output output folder, default: input folder
- -r reference file
- -a adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
- -threads threads. Default:4
- -p palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
- -db Kraken database
- -b input is a project folder. A project folder contains furher sample folder(s)
- -trimOption Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>.
- default: SLIDINGWINDOW:4:15
- -minlen Minlen parameter for Trimmomatic. Default:50
- -nokraken skip Kraken
- -nomapping skip mapping
-
- -version Get version
-
-Output:
-
-<Sample/Output Folder>
-|-- QCResult
- |-- summary_<sample name>.pdf (summarizes all FastQC, Trimming and Mapping Results)
- |-- FastQC
- |-- <output folder(s) from FastQC>
- |-- Trimmed
- |-- <trimmed reads>
- |-- FastQC
- |-- <output folder(s) from FastQC>
-[-- if project is entered (-b): FASTQC_COMBINE_<datetime>.html (summarizes all (plotted) results in oen html file) ]
-
--------------------
-Program Description
--------------------
-
-This project consists of 6 files:
-
-runQCPipeline.py main script for running complete pipeline
-classes.py script containing classes
-helper.py small helper functions
-report.tex Template for sample reports
-config.txt configuration for Kraken and Trimmomatic
-
-combineFastQC.py Written by Stefan Fuchs. It combines FastQC, mapping, trimming, classification results in a html file.
- This file can be called without running runQCPipeline.
-
- Usage: python3.4 combineFastQC.py -f project_folder [-csv csvtable]
-
- csv is used to plot mapping, trimming and classification results if the user does not want to run the complete QCPipeline.
- Format: sample name | mapping result | trimming result | Kraken result | % of fragments shorter than read length
-
- The project folder must have the output structure (QCResults).
-
--------
-Example
--------
-
-1. Simple usage for Illumina:
-```
-python3.4 runQCPipeline.py -input mySampleFolder/ -technology Illumina -adapter NexteraPE-PE -r myReference.fasta
-```
-> Output files will be in SampleFolder/QcResults
-
-
-2. Entering a project:
-```
-python3.4 runQCPipeline.py -input myProjectFolder/ -technology IonTorrent -r myReference.fasta -b
-```
-> Output files will be in myProjectFolder/SampleFolder/QcResults and an overall summary will be in myProjectFolder.
-
-
diff --git a/runQCPipeline.py b/runQCPipeline.py
deleted file mode 100755
index e794596..0000000
--- a/runQCPipeline.py
+++ /dev/null
@@ -1,484 +0,0 @@
-#!/usr/bin/env python3.4
-# -*- coding: utf-8 -*-
-__author__ = 'LieuV'
-__version__ = "1.1.2"
-
-from classes import *
-from helper import *
-import os
-from itertools import groupby
-from jinja2 import Environment,FileSystemLoader
-import shutil
-from combineFastQC import combineFastQC
-import configparser
-from collections import OrderedDict
-import json
-
-#dependencies
-try:
- import argparse
-except ImportError:
- print('The "argparse" module is not available. Use Python >3.2.')
- sys.exit(1)
-
-
-
-file_extensions = ["fastq.gz",".fastq", ".fq","fq.gz"]
-
-def get_read(values,argMap, read = "_R1_"):
- read_list = list(filter(lambda x: read in x, values))
-
- if(len(read_list)==0):
- return None
- elif (len(read_list) == 1):
- read = FastQFile(os.path.join(argMap["input"], os.path.basename(read_list[0])))
- else:
- concat_reads = join_reads(read_list, argMap["tmp"], "concat_" + "_".join(os.path.basename(read_list[0]).split("_")[:-1]) + os.path.splitext(read_list[0])[-1] )
- read = FastQFile(concat_reads)
- read.concat_files = [toLatex(os.path.basename(x)) for x in read_list]
- return read
-
-
-def get_illumina_reads( files, argMap, output):
- ## Add readsets to sample
- readsets = []
- # concat same files
- files = sorted(files)
- if(len(files)==1):
- return [ReadSet(output,files[0])]
- if argMap["r1"]:
- if argMap["r2"]:
- return[ReadSet(output, FastQFile(argMap["r1"]), FastQFile(argMap["r2"]) )]
- else:
- return [ReadSet(output, FastQFile(argMap["r1"]))]
-
- for p, values in groupby(files, key=lambda x:x.split("_")[:-2]):
- val = list(values)
- r1 = get_read(val,argMap, "_R1_")
- r2 = get_read(val,argMap, read = "_R2_")
- readsets.append(ReadSet(output, r1,r2))
- return readsets
-
-def run_analysis (readset, argMap, output):
- print("Run FastQC...")
- readset.run_FastQC(argMap["threads"])
- trimmed_output = os.path.join(output, "Trimmed")
- os.makedirs(trimmed_output, exist_ok=True)
- if argMap["trimBetter"]:
- trimming_perc = argMap["trimBetter_threshold"]
- else:
- trimming_perc = ""
- readset.run_Trimmomatic(argMap["adapter"], argMap["threads"], argMap["palindrome"], trimmed_output, argMap["technology"], argMap["minlen"], argMap["trimOption"], trimming_perc)
- readset.trimRes = readset.trimRes.run_FastQC(trimmed_output, argMap["tmp"])
- if argMap["blobplot"]:
- readset.trimRes.run_blobplot(argMap["db"])
- return readset
-
-
-def check_input(argMap):
- if argMap["r1"]:
- if argMap["r2"]:
- return [argMap["r1"], argMap["r2"]]
- else:
- return [argMap["r1"]]
-
- if argMap["technology"]=="Illumina":
- if os.path.isdir(argMap["input"]):
- files = os.listdir(argMap["input"])
- files = [os.path.join(argMap["input"], x) for x in files]
- files = [x for x in files if os.path.isfile(x) & any([ext in x for ext in file_extensions])]
- if len(files)==0:
- sys.exit(argMap["input"] + " does not contain fastq files.")
- return files
- else:
- return [argMap["input"]]
- elif argMap["technology"]=="IonTorrent":
-
- if os.path.isfile(argMap["input"]):
-
- if argMap["input"].endswith(".bam"):
- return argMap["input"]
- else:
- sys.exit("IonTorrent PGM input has to be a bam-file. Otherwise use -b. Use --help for more information.")
- else:
- sys.exit("Incorrect file input")
- elif argMap["technology"] == "Illumina-MiSeq":
- if os.path.isfile(argMap["input"]):
- if any([argMap["input"].endswith(x) for x in file_extensions]):
- return argMap["input"]
- else:
- sys.exit("Not supported file extension.")
- else:
- sys.exit("Illumina MiSeq input has to be a fastq-file. Otherwise use -b for a batch run. Use --help for more information.")
- else:
- sys.exit("Incorrect file input")
- return None
-
-def fillClasses(argMap, temp_bowtie_path = "", tmp = ""):
- #tempdir = tempfile.mkdtemp(dir = argMap["tmp"])
- try:
- if tmp!="":
- argMap["tmp"] = tmp
- if argMap["r1"]:
- argMap["input"] = argMap["r1"]
- argMap["input"] = os.path.abspath(argMap["input"])
- if argMap["output"] == "":
- output = getDir([os.curdir , "QCResults"], True)#getDir([argMap["input"] , "QCResults"], True)
- else:
- output = getDir([argMap["output"], "QCResults"], True)
-
- sample = Sample(getFilenameWithoutExtension(argMap["input"], level = 2, getBase=True), output, argMap["reference"],argMap["threads"], argMap["technology"])
- if argMap["technology"].startswith("Illumina"):
- files = check_input(argMap)
- if len(files) == 0:
- return None
- readsets = get_illumina_reads( files, argMap, output)
-
- #os.makedirs(tempdir, exist_ok=True)
- for rs in readsets:
- readset = run_analysis(rs, argMap, output)
- sample = sample.add_readSet(readset)
- if not argMap["nomapping"]:
- if argMap["save_mapping"]:
- sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(argMap["output"], "QCResults", sample.name +".bam"))
- else:
- sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
- if not argMap["nokraken"]:
- print("Run Kraken.")
- sample = sample.run_Kraken(argMap["db"])
-
- elif argMap["technology"] == "IonTorrent":
- argMap["input"] = check_input(argMap)
- print("IonTorrent", argMap["input"])
- fastqfile = bam_to_fastq(argMap["input"], argMap["tmp"])
- readset = ReadSet(output, fastqfile)
- readset = run_analysis(readset,argMap, output)
- sample.add_readSet(readset)
- if not argMap["nomapping"]:
- if argMap["save_mapping"]:
- sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(argMap["output"], "QCResults", sample.name +".bam"))
- else:
- sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
- if not argMap["nokraken"]:
- print("Run Kraken.")
- sample = sample.run_Kraken(argMap["db"])
-
- sample.stop = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
- finally:
- shutil.rmtree(argMap["tmp"])
- return sample
-
-def writeReport(sample, argMap):
- report_name = os.path.join(sample.mainResultsPath,
- "summary_" + sample.name + "_" + datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
- print("Writing latex " ,report_name)
- env = Environment()
- env.loader = FileSystemLoader(os.path.dirname(__file__))
-
- template = env.get_template("report.tex")
- pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = sample.readSets[0].trimRes.print_param(argMap["minlen"], argMap["trimOption"], argMap["palindrome"]))
-
- latex = open(report_name + ".tex", "w")
- latex.write(pdf_latex)
- latex.close()
-
- process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + sample.mainResultsPath, report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
- for line in iter(process.stdout.readline, b''):
- pass#print(line)
- for line in iter(process.stderr.readline, b''):
- print(line)
-
- process.communicate()
-
- for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
- try:
- os.remove(report_name + ext)
- except OSError:
- pass
-
-def check_project(argMap):
- if argMap["technology"] =="IonTorrent":
- if os.path.isdir(argMap["input"]):
- files = os.listdir(argMap["input"])
- files = [os.path.join(argMap["input"], x) for x in files if x.endswith(".bam")]
- if len(files) == 0:
- sys.exit("Folder does not contain bam-files")
- return sorted(files)
- else:
- sys.exit("Enter a valid PGM folder, which contains bam-files.")
- elif argMap["technology"] =="Illumina":
- if os.path.isdir(argMap["input"]):
- files = os.listdir(argMap["input"])
- files = [os.path.join(argMap["input"], x) for x in files]
- files = [x for x in files if os.path.isdir(x)]
- if len(files) == 0:
- sys.exit("Enter a valid folder, which contain sample folders")
- return sorted(files)
- else:
- sys.exit("Illumina projects has to contain subfolders with fastq-files.")
-
-
-
-def main(arguments):
- parameter = configparser.ConfigParser()
- parameter.read(join(dirname(__file__), "parameter.txt"))
-
- if not arguments["tmp"]:
- arguments["tmp"] = join(arguments["output"], "qc_tmp")
- else:
- if os.path.exists(arguments["tmp"]):
- arguments["tmp"] = join(arguments["tmp"], "qc_tmp")
-
- if arguments["technology"].startswith("Illumina"):
- technology = "Illumina"
- else:
- technology = "IonTorrent"
-
- if arguments["forAssembly"]:
- if not arguments["trimBetter_threshold"]:
- arguments["trimBetter_threshold"] = parameter["forAssembly." + technology]["trimBetter_threshold"]
- if not arguments["trimOption"]:
- arguments["trimOption"] = parameter["forAssembly." + technology]["trimOption"]
-
- elif arguments["forMapping"]:
- if not arguments["trimBetter_threshold"]:
- arguments["trimBetter_threshold"] = parameter["forMapping." + technology]["trimBetter_threshold"]
- if not arguments["trimOption"]:
- arguments["trimOption"] = parameter["forMapping." + technology]["trimOption"]
-
- if not arguments["trimOption"]:
- arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
- if not arguments["trimBetter_threshold"]:
- arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
-
- # check input
- pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
- if(arguments["trimOption"]!=""):
- if not ((pattern.group("option") =="SLIDINGWINDOW") or (pattern.group("option") =="MAXINFO")):
- sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
- try:
- int(pattern.group("value1"))
- int(pattern.group("value2"))
- except:
- sys.exit("Check trimOptions.")
-
- if arguments['technology'] == "Illumina":
- if not arguments['adapter']:
- sys.exit("Illumina projects requires an adapter file")
- if arguments["nomapping"]:
- #print("no mapping")
- arguments["reference"] = ""
- else:
- if not arguments["reference"]:
- sys.exit("Mapping needs a reference.")
- else:
- if not os.path.exists(arguments["reference"]):
- sys.exit("Reference does not exist.")
-
- if not arguments["nokraken"]:
- if not os.path.exists(arguments["db"]):
- sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
- else:
- if "database.kdb" not in os.listdir(arguments["db"]):
- sys.exit("database "+arguments["db"] +" does not contain necessary file database.kdb")
- if not arguments["input"]:
- if arguments["r1"]:
- print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
- if not os.path.exists(arguments["r1"]):
- sys.exit(arguments["r1"] + " does not exist.")
- if arguments["technology"]=="IonTorrent":
- arguments["input"] = arguments["r1"]
- arguments["r1"] = ""
- else:
- sys.exit("Input file required. Use option -input or -1 / -2")
-
- if arguments["r2"]:
- if not os.path.exists(arguments["r2"]):
- sys.exit(arguments["r2"] + " does not exist.")
-
-
- os.makedirs(arguments["tmp"], exist_ok=True)
- if arguments["index"]:
- mapping_index_path = arguments["index"]
- else:
- mapping_index_path = join(arguments["tmp"], "bowtie2_index")
- os.makedirs(mapping_index_path, exist_ok=True)
- # RUN
- try:
- jsonfile_dict = []
- kraken_reports =OrderedDict()
- if arguments["b"]:
- print("A project was entered")
- i=1
- project = []
- overview = []
-
- if arguments["technology"]=="Illumina-MiSeq":
-
- myFiles=[]
- input = sorted(os.listdir(arguments["input"]), key=lambda x: x.split("_")[:-3])
- #print(input)
- input = [os.path.join(arguments["input"], x) for x in input if os.path.isfile(os.path.join(arguments["input"], x))and any([x.endswith(ext) for ext in file_extensions])]
-
- for p, values in groupby(input, key=lambda x: x.split("_")[:-3]):
- myFiles.append(sorted(list(values)))
- print("Found " + str(len(myFiles)) + " samples.")
- for subset in myFiles:
- print("Analyse " + str(i) + "/" + str(len(myFiles)))
- tmp = join(arguments["tmp"], "Sample_" + str(i))
- os.makedirs(tmp, exist_ok=True)
-
- arguments["r1"] = subset[0]
- if len(subset)>1:
- arguments["r2"] = subset[1]
- sample = fillClasses(arguments, mapping_index_path,tmp )
- if sample is not None:
- project.append(sample)
- try:
- writeReport(sample, arguments)
- except:
- print("Couldnt write pdf.")
- try:
- if sample.mappingRes is None:
- mapping = 0
- else:
- mapping = sample.mappingRes.percentAlignedReads
- if sample.krakenRes is None:
- kraken = 0
- else:
- kraken = sample.krakenRes.pClassified
- jsonfile_dict.append(
- OrderedDict([("setname", sample.name), ("Map to reference [%]", mapping),
- ("Reads after trimming", sample.pTrimmed()),
- ("Classified (Kraken)", kraken),
- ("Shorter fragments", sample.get_mean_trimRes()),
- ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
- overview.append((sample.name, mapping, sample.pTrimmed(), kraken, sample.get_mean_trimRes()))
- except:
- print("Error in appending summary of all samples.")
- sample = None
- i+=1
-
- else:
- sample_list = check_project(arguments)
- print("Found " + str(len(sample_list)) + " samples.")
- for sub in sample_list:
- print("Analyse " + str(i) + "/" + str(len(sample_list)))
- tmp = join(arguments["tmp"], "Sample_" + str(i))
- os.makedirs(tmp, exist_ok=True)
-
- arguments["input"] = sub
- sample = fillClasses(arguments, mapping_index_path, tmp)
- if sample is not None:
- project.append(sample)
- try:
- writeReport(sample, arguments)
- except:
- print("Couldnt write pdf.")
- try:
- if sample.mappingRes is None:
- mapping = 0
- else:
- mapping = sample.mappingRes.percentAlignedReads
- if sample.krakenRes is None:
- kraken = 0
- else:
- kraken = sample.krakenRes.pClassified
- jsonfile_dict.append(
- OrderedDict([("setname", sample.name), ("Map to reference [%]", mapping),
- ("Reads after trimming", sample.pTrimmed()),
- ("Classified (Kraken)", kraken),
- ("Shorter fragments", sample.get_mean_trimRes()),
- ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["minlen"],arguments["trimOption"],arguments["palindrome"])),
- ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
- overview.append((sample.name, mapping, sample.pTrimmed(), kraken , sample.get_mean_trimRes()))
- except:
- print("Error in appending summary of all samples.")
-
- sample = None
- i+=1
- try:
- combineFastQC(join(arguments["output"],"QCResults"), overview= ( getFilenameWithoutExtension(arguments["reference"], level=1, getBase = True), np.array(overview, dtype = [('name', '|S100'), ('mapping', float), ('trimming', float), ('kraken', float), ('shorter_reads', float)])), tmp = arguments["tmp"])
- except:
- print("Couldnt produce html.")
-
- else: # no batch
- if check_input(arguments):
- sample = fillClasses(arguments, mapping_index_path)
- try:
- writeReport(sample, arguments)
- except:
- print("Couldnt write pdf")
- if sample.mappingRes is None:
- mapping = 0
- else:
- mapping = sample.mappingRes.percentAlignedReads
- if sample.krakenRes is None:
- kraken = 0
- else:
- kraken = sample.krakenRes.pClassified
- kraken_reports[sample.name] = json.loads(str_to_json(sample.krakenRes.report))
- jsonfile_dict.append(
- OrderedDict([("setname", sample.name), ("Map to reference [%]", mapping),
- ("Reads after trimming", sample.pTrimmed()),
- ("Classified (Kraken)", kraken),
- ("Shorter fragments", sample.get_mean_trimRes()),
- ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["minlen"], arguments["trimOption"], arguments["palindrome"])),
- ("images", [x.get_all_qc_dict() for x in sample.readSets])
- ]))
-
- try:
- env = Environment(block_start_string='@@', block_end_string='@@', variable_start_string='@=',
- variable_end_string='=@')
- env.loader = FileSystemLoader(os.path.dirname(__file__))
- template = env.get_template("batch_report.html")
- batch_report = template.render(json_samples=json.dumps(jsonfile_dict),
- commandline=json.dumps(arguments), kraken = json.dumps(kraken_reports))
- batch_html = open(join(arguments["output"], "QCResults", "batch_report.html"), "w")
- batch_html.write(batch_report)
- batch_html.close()
- except:
- print("Couldnt produce html file.")
- finally:
-
- #if not arguments["index"]:
- # print(mapping_index_path)
- shutil.rmtree(arguments["tmp"])
-
-if __name__ == "__main__":
- config = configparser.ConfigParser()
- config.read(join(dirname(__file__), "config.txt"))
- kraken_db = ""
- if "DEFAULT" in config:
- if "kraken_db" in config["DEFAULT"].keys():
- kraken_db = config["DEFAULT"]["kraken_db"]
-
- parser = argparse.ArgumentParser()
- parser.add_argument('-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
- parser.add_argument('-1', dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
- parser.add_argument('-2', dest='r2', help = "input file", required=False)
-
- parser.add_argument('-output', dest='output', default="")
- parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent", "Illumina-MiSeq"] ,required = True)
- parser.add_argument('-threads', dest='threads', default = 4, type = int)
- parser.add_argument('-adapter', dest = 'adapter', choices = ['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 'NexteraPE-PE' ])
- parser.add_argument('-reference', dest='reference', required = False, help = "Map reads against reference")
- parser.add_argument('-index', dest='index', required = False, help = "Mapping index")
- parser.add_argument('-save_mapping', action = "store_true")
- parser.add_argument('-db', dest='db', help='Kraken database', required = False, default= kraken_db) #"/home/andruscha/programs/kraken/minikraken_20141208/"
- parser.add_argument('-palindrome', dest='palindrome', help= 'Use palindrome parameter 30 or 1000 for further analysis. Default:30', default=30, type = int)
- parser.add_argument('-minlen', dest='minlen',help='Minlen parameter for Trimmomatic. Default:50', default=50, type=int)
- parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', type= str)
- parser.add_argument('-version', action='version', version='%(prog)s v' + __version__)
- parser.add_argument('-b', action = "store_true", help = "Batch analysis. Run QC on a project.")
- parser.add_argument('-nokraken', action = "store_true")
- parser.add_argument('-nomapping', action = "store_true")
- parser.add_argument('-trimBetter', action = "store_true")
- parser.add_argument('-trimBetter_threshold', dest='trimBetter_threshold', help = "Default setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, type=float)
- parser.add_argument('-blobplot', action = "store_true")
- parser.add_argument('-forAssembly', action = "store_true", help = "Trim parameter are optimized for assemblies (trim more aggressive).")
- parser.add_argument('-forMapping', action = "store_true", help = "Trim parameter are optimized for mapping (allow more errors).")
- parser.add_argument('-tmp',dest="tmp", required = False, help= "Define Temp folder. Default: output folder.")
-
- arguments = vars(parser.parse_args())
- main(arguments)
\ No newline at end of file
diff --git a/sav.R b/sav.R
new file mode 100755
index 0000000..06a33b5
--- /dev/null
+++ b/sav.R
@@ -0,0 +1,150 @@
+#!/usr/bin/env Rscript
+#source("http://bioconductor.org/biocLite.R")
+#biocLite("savR")
+#install.packages("Cairo")
+#library(Cairo)
+library(savR)
+library(reshape2)
+
+args = commandArgs(trailingOnly=TRUE)
+#args <- c("Z://testdaten/ProjectXY/SAV", "Z://testsav")
+project <- savR(args[1])
+
+################
+## Indexing ##
+################
+
+#total reads
+total_reads<- clusters(project, 1L)
+pf_reads<- pfClusters(project, 1L)
+
+
+################
+## Plots ##
+################
+
+##
+# Data By Cycle
+##
+
+extraction<- extractionMetrics((project))
+
+# Data By Cycle, FWHM/All Lanes / Both surfaces / All Bases
+reshaped_extraction <- melt(extraction, measure.vars= c("FWHM_A","FWHM_C", "FWHM_T","FWHM_G"))
+FWHM<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
+colnames(FWHM) <- c("Cycles","FWHM", "Value")
+FWHM$FWHM<- sub("FWHM_","",FWHM$FWHM)
+ggplot(data=FWHM )+
+ geom_line( aes(x=Cycles , y =Value, color=FWHM)) +
+ ggtitle("Data by Cycle - FWHM") +
+ xlab("Cycle") +
+ ylab("All bases FWHM")
+ggsave(paste(args[2], "/FWHM.png", sep=""))
+
+# Data By Cycle,Intensity /All Lanes / Both surfaces / All Bases
+reshaped_extraction <- melt(extraction, measure.vars= c("int_A","int_C", "int_T","int_G"))
+intensity<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
+colnames(intensity) <- c("Cycles","Intensity", "Value")
+intensity$Intensity<- sub("int_","", intensity$Intensity)
+ggplot(data=intensity )+
+ geom_line( aes(x=Cycles , y =Value, color=Intensity))+
+ ggtitle("Data By Cycle - Intensity")+
+ xlab("Cycle")+ylab("All bases intensity")
+ggsave(paste(args[2], "/intensity.png", sep=""))
+# Data By Cycle, %Base /All Lanes / Both surfaces / All Bases
+#
+corr<- correctedIntensities(project)
+
+
+corr[,seq(14,17)]<-round(corr[,seq(14,17)] / apply(corr[,seq(14,17)], 1, sum) *100,2)
+corr<- melt(corr, measure.vars= c("num_A","num_C", "num_T","num_G"))
+corr<-(aggregate(corr$value, by=list(corr$cycle, corr$variable), FUN=mean))
+colnames(corr)<- c("Cycle", "Base", "Perc_Base")
+corr$Base<- sub("num_","", corr$Base)
+ggplot(corr) +
+ geom_line(aes(x=Cycle, y= Perc_Base, color=Base)) +
+ ylab("All Bases % Base") +
+ ggtitle("Data by Cycle - % Base")
+ggsave(paste(args[2], "/base_perc.png" , sep =""))
+
+##
+# Data By Lane
+##
+
+tiles<- tileMetrics(project)
+# Density, Both Surfaces
+#pfBoxplot(project) # Generate a boxplot of the numbers of clusters and the number of Illumina pass-filter clusters per tile and lane
+dens <-(tiles[which(tiles$code==100 | tiles$code==101 ),])
+dens[which(dens$code==100),]$code <- "Raw Clusters"
+dens[which(dens$code==101),]$code<- "PF Clusters"
+dens$value <- dens$value/1000
+ggplot(data = dens , aes(x=lane, y=value, fill=code))+
+ geom_boxplot() +
+ ggtitle("Data By Lane - Cluster Density") +
+ xlab("Lane")+ylab("Cluster Density (K/mm2)")
+ggsave(paste(args[2], "/density.png", sep=""))
+
+# Phasing, Both Surfaces, All Bases
+phasing_code <- seq(200, (200 + (length(project at reads)-1)*2),2)
+phasing <-(tiles[which(tiles$code %in% phasing_code) ,])
+for(i in phasing_code){
+ cat(paste("Read ",((i-200)/2)+1))
+ phasing[which(phasing$code==i),]$code = paste("Read ",((i-200)/2)+1)
+}
+ggplot(data = phasing[which(phasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
+ geom_boxplot() +
+ ggtitle("Data By Lane - Phasing")+
+ xlab("Lane")+
+ ylab("% Phasing")+
+ scale_x_continuous(breaks = unique(phasing$lane))
+ggsave(paste(args[2], "/phasing.png", sep=""))
+
+
+# Pre-Phasing, Both Surfaces, All Bases
+prephasing_code <- seq(201, (201 + (length(project at reads)-1)*2),2)
+prephasing <-(tiles[which(tiles$code %in% prephasing_code) ,])
+for(i in prephasing_code){
+
+ prephasing[which(prephasing$code==i),]$code = paste("Read ",((i-201)/2)+1)
+}
+ggplot(data = prephasing[which(prephasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
+ geom_boxplot() +
+ ggtitle("Data By Lane - Prephasing")+
+ xlab("Lane")+
+ ylab("% Prephasing") +
+ scale_x_continuous(breaks = unique(prephasing$lane))
+ggsave(paste(args[2], "/prephasing.png", sep=""))
+
+
+
+##
+# QScore Heatmap
+##
+png(paste(args[2], "/qscore_heatmap.png", sep=""), height=1025, width = 2571, res = 200)
+qualityHeatmap(project, lane=seq(1,project at layout@lanecount) ,read=c(1,2))+ theme(axis.title.y = element_blank())
+dev.off()
+
+qualy<- qualityMetrics(project)
+qualy<- data.frame(apply(qualy, 2, as.numeric))
+qualy<- melt(qualy, measure.vars= colnames(qualy)[4:ncol(qualy)])
+
+qualy<- aggregate(qualy$value, by=list(qualy$variable), FUN=sum)
+colnames(qualy)<- c("QScore","Total")
+qualy$Total <- qualy$Total/1000000
+qualy$QScore <- as.numeric(qualy$QScore)
+ggplot(qualy, aes(x=QScore, y = Total )) +
+ geom_bar(stat="identity", aes(fill=QScore>=30)) +
+ ylab("Total (million)") +
+ geom_vline(aes(xintercept=30), linetype="dashed") +
+ geom_text(aes(x=35, y=max(Total)-max(Total)*0.1 ,label=(paste("QScore >=30 \n",
+ round(sum(qualy[which(qualy$QScore>=30),]$Total)/1000,2),
+ "G \n",
+ round(sum(qualy[which(qualy$QScore>=30),]$Total)/ sum(qualy$Total)*100,2),
+ "%")
+ ))) +
+ ggtitle("QScore Distribution") +
+ theme(legend.position="none")
+ggsave(paste(args[2], "/qscore_distr.png", sep=""))
+
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qcumber.git
More information about the debian-med-commit
mailing list