[med-svn] [qcumber] 03/07: New upstream version 1.0.5+dfsg
Andreas Tille
tille at debian.org
Wed Feb 15 09:48:53 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository qcumber.
commit 0585a8a7dd87edeca3a34ce4ad6cec4e85aceee6
Author: Andreas Tille <tille at debian.org>
Date: Wed Feb 15 08:37:57 2017 +0100
New upstream version 1.0.5+dfsg
---
.gitignore | 2 +-
QCumber.py | 1029 +++++++++++++++++-----------------
barplot.R | 57 +-
batch_report.html | 1580 ++++++++++++++++++++++++++---------------------------
boxplot.R | 50 +-
classes.py | 1256 +++++++++++++++++++++---------------------
config.txt | 20 +-
helper.py | 786 +++++++++++++-------------
lib/box.js | 638 ++++++++++-----------
lib/qcstyle.css | 168 +++---
license.txt | 165 ++++++
parameter.txt | 92 ++--
readme.md | 18 +
report.tex | 366 ++++++-------
sav.R | 298 +++++-----
15 files changed, 3343 insertions(+), 3182 deletions(-)
diff --git a/.gitignore b/.gitignore
index 4e330dc..e2b60ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,4 @@
.idea
.svn
__pycache__
-config.txt
+
diff --git a/QCumber.py b/QCumber.py
index 303edfe..c56f2a7 100755
--- a/QCumber.py
+++ b/QCumber.py
@@ -1,511 +1,518 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-__author__ = 'LieuV'
-__version__ = "1.0.1"
-
-from classes import *
-from helper import *
-import os
-from itertools import groupby
-from jinja2 import Environment,FileSystemLoader
-import shutil
-import configparser
-from collections import OrderedDict
-import json
-
-#dependencies
-try:
- import argparse
-except ImportError:
- print('The "argparse" module is not available. Use Python >3.2.')
- sys.exit(1)
-
-
-parameter = configparser.ConfigParser()
-parameter.read(join(dirname(__file__), "parameter.txt"))
-
-file_extensions = parameter["Fileextension"]["fastq"]
-r1_pattern = parameter["Pattern"]["R1"]
-r2_pattern = parameter["Pattern"]["R2"]
-lane_pattern = parameter["Pattern"]["lane"]
-
-def get_illumina_reads( output, tmp):
- ## Add readsets to sample
- readsets = []
- # split by lanes
- if isinstance(arguments["r1"], list):
- for lane in sorted(set([re.search(lane_pattern, x).group() for x in arguments["r1"]])):
- # concat same files
- r1_reads = [x for x in arguments["r1"] if lane in x]
- readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
- if len(arguments["r1"]) != 1:
- r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads] )
- else:
- r1 = FastQFile(r1_reads[0])
- if arguments["r2"]:
- r2_reads = [x for x in arguments["r2"] if lane in x]
-
- if len(r2_reads) != 1:
- r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
- else:
- r2 = FastQFile(r2_reads[0])
- #return [ReadSet( r1, r2)]
- readsets.append(ReadSet(r1,r2))
- else:
- readsets.append(ReadSet(r1))
- else:
- r1 = FastQFile(arguments["r1"])
- if arguments["r2"]:
- r2 = FastQFile(arguments["r2"])
- return [ReadSet(r1,r2)]
- else:
- return [ReadSet(r1)]
-
-
- return readsets
-
-def run_analysis (readset, arguments, output, tmp):
- print("Run FastQC...")
- readset.run_FastQC(join(output, "FastQC"))
- if arguments["trimBetter"]:
- trimming_perc = arguments["trimBetter_threshold"]
- else:
- trimming_perc = ""
- readset.run_Trimmomatic(arguments["adapter"], arguments["palindrome"],arguments["minlen"], arguments["trimOption"], trimming_perc)
- readset.trimRes = readset.trimRes.run_FastQC( tmp)
- return readset
-
-
-def check_input(arguments):
- if arguments["r1"]:
- if arguments["r2"]:
- return [arguments["r1"], arguments["r2"]]
- else:
- return [arguments["r1"]]
-
- if arguments["technology"]=="Illumina":
- if os.path.isdir(arguments["input"]):
- files = os.listdir(arguments["input"])
- files = [os.path.join(arguments["input"], x) for x in files]
- files = [x for x in files if os.path.isfile(x) & any([ext in x for ext in file_extensions])]
- if len(files)==0:
- sys.exit(arguments["input"] + " does not contain fastq files.")
- return files
- else:
- return [arguments["input"]]
- elif arguments["technology"]=="IonTorrent":
-
- if os.path.isfile(arguments["input"]):
-
- if arguments["input"].endswith(".bam"):
- return arguments["input"]
- else:
- sys.exit("IonTorrent PGM input has to be a bam-file. Otherwise use -b. Use --help for more information.")
- else:
- sys.exit("Incorrect file input")
- #elif arguments["technology"] == "Illumina-MiSeq":
- # if os.path.isfile(arguments["input"]):
- # if any([arguments["input"].endswith(x) for x in file_extensions]):
- # return arguments["input"]
- # else:
- # sys.exit("Not supported file extension.")
- # else:
- # sys.exit("Illumina MiSeq input has to be a fastq-file. Otherwise use -b for a batch run. Use --help for more information.")
- else:
- sys.exit("Incorrect file input")
- return None
-
-def getSetname():
- try:
- if arguments["technology"] == "Illumina":
-
- if isinstance(arguments["r1"], list):
- return "_".join(basename(arguments["r1"][0]).split("_")[:-3])
- else:
- return getFilenameWithoutExtension(arguments["r1"], True)
- else:
- return getFilenameWithoutExtension(arguments["r1"], getBase=True)
- except:
- return getFilenameWithoutExtension(arguments["r1"], getBase=True)
-
-def fillClasses(temp_bowtie_path = "", tmp=''):
- try:
- output = getDir([arguments["output"], "QCResults"], True)
-
- sample = Sample(getSetname(), output, arguments["reference"],arguments["threads"], arguments["technology"])
- if arguments["technology"].startswith("Illumina"):
- readsets = get_illumina_reads( output, tmp)
- elif arguments["technology"] == "IonTorrent":
- print("IonTorrent", arguments["r1"])
- fastqfile = bam_to_fastq(arguments["r1"],tmp)
- readsets = [ReadSet( fastqfile)]
-
- #os.makedirs(tempdir, exist_ok=True)
- for rs in readsets:
- readset = run_analysis(rs, arguments, output, tmp)
- sample = sample.add_readSet(readset)
- if not arguments["nomapping"]:
- if arguments["save_mapping"]:
- sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"))
- else:
- sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
- if not arguments["nokraken"]:
- print("Run Kraken.")
- sample = sample.run_Kraken(arguments["db"])
-
- sample.stop = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
- finally:
- shutil.rmtree(tmp)
- return sample
-
-def writeReport(sample, arguments):
- report_name = os.path.join(sample.mainResultsPath, "Report",
- "summary_" + sample.name + "_" + datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
- print("Writing latex " ,report_name)
- env = Environment()
- env.loader = FileSystemLoader(os.path.dirname(__file__))
-
- template = env.get_template("report.tex")
- pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"], arguments["trimOption"]))
-
- latex = open(report_name + ".tex", "w")
- latex.write(pdf_latex)
- latex.close()
-
- process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
- for line in iter(process.stdout.readline, b''):
- pass #print(line)
- for line in iter(process.stderr.readline, b''):
- print(line)
-
- process.communicate()
-
- for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
- try:
- os.remove(report_name + ext)
- except OSError:
- pass
-
-def main(arguments):
-
- tmp = join(arguments["output"],"QCResults" ,"qc_tmp")
- makedirs(tmp, exist_ok=True)
- makedirs(join(arguments["output"], "QCResults"), exist_ok=True)
- makedirs(join(arguments["output"], "QCResults", "FastQC"), exist_ok=True)
- makedirs(join(arguments["output"], "QCResults", "Trimmed"), exist_ok=True)
- makedirs(join(arguments["output"], "QCResults", "Trimmed", "FastQC"), exist_ok=True)
- makedirs(join(arguments["output"], "QCResults", "Report"), exist_ok=True)
- makedirs(join(arguments["output"], "QCResults", "Report", "src"), exist_ok=True)
- makedirs(join(arguments["output"], "QCResults", "Report", "src", "img"), exist_ok=True)
-
- # create new boxplot csv files
- for csv in ["Per_sequence_quality_scores.csv", "Per_sequence_GC_content.csv", "Sequence_Length_Distribution.csv"]:
- new_csv = open(join(arguments["output"], "QCResults", "Report", "src", csv), "w")
- new_csv.write("Sample,Read,Lane,Trimmed,Value,Count\n")
- new_csv.close()
- ##
- # SAV
- try:
- write_SAV(arguments["sav"], join(arguments["output"], "QCResults", "Report", "src"))
- except:
- print("Couldnt write SAV.")
- ##
-
- if not os.path.exists(join(arguments["output"], "QCResults", "Report", "lib" )):
- shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], "QCResults", "Report", "lib" ))
- if arguments["technology"].startswith("Illumina"):
- technology = "Illumina"
- else:
- technology = "IonTorrent"
-
- if arguments["forAssembly"]:
- if not arguments["trimBetter_threshold"]:
- arguments["trimBetter_threshold"] = parameter["forAssembly." + technology]["trimBetter_threshold"]
- if not arguments["trimOption"]:
- arguments["trimOption"] = parameter["forAssembly." + technology]["trimOption"]
-
- elif arguments["forMapping"]:
- if not arguments["trimBetter_threshold"]:
- arguments["trimBetter_threshold"] = parameter["forMapping." + technology]["trimBetter_threshold"]
- if not arguments["trimOption"]:
- arguments["trimOption"] = parameter["forMapping." + technology]["trimOption"]
-
- if not arguments["trimOption"]:
- arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
- if not arguments["trimBetter_threshold"]:
- arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
-
- # check input
- pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
- if(arguments["trimOption"]!=""):
- if not ((pattern.group("option") =="SLIDINGWINDOW") or (pattern.group("option") =="MAXINFO")):
- sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
- try:
- int(pattern.group("value1"))
- int(pattern.group("value2"))
- except:
- sys.exit("Check trimOptions.")
-
- if arguments['technology'] == "Illumina":
- if not arguments['adapter']:
- sys.exit("Illumina projects requires an adapter file")
- if arguments["nomapping"]:
- #print("no mapping")
- arguments["reference"] = ""
- else:
- if not arguments["reference"]:
- sys.exit("Mapping needs a reference.")
- else:
- if not os.path.exists(arguments["reference"]):
- sys.exit("Reference does not exist.")
-
- if not arguments["nokraken"]:
- if not os.path.exists(arguments["db"]):
- sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
- else:
- if "database.kdb" not in os.listdir(arguments["db"]):
- sys.exit("database "+arguments["db"] +" does not contain necessary file database.kdb")
- if not arguments["input"]:
- if arguments["r1"]:
- print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
- if not os.path.exists(arguments["r1"]):
- sys.exit(arguments["r1"] + " does not exist.")
- #if arguments["technology"]=="IonTorrent":
- # arguments["input"] = arguments["r1"]
- # arguments["r1"] = ""
- else:
- sys.exit("Input file required. Use option -input or -1 / -2")
-
- if arguments["r2"]:
- if not os.path.exists(arguments["r2"]):
- sys.exit(arguments["r2"] + " does not exist.")
-
- if arguments["index"]:
- mapping_index_path = arguments["index"]
- else:
- mapping_index_path = join(tmp, "bowtie2_index")
- os.makedirs(mapping_index_path, exist_ok=True)
- # RUN
- try:
- batch_json = json.dump({"commandline":arguments, "summary":[],"kraken":{}, "versions":Pipeline().__dict__}, open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
- jsonfile_dict = []
- kraken_reports = OrderedDict()
- #boxplot_data = []
- i = 1
- project = []
- tmp_overview = open(join(tmp, "overview.csv"), "w")
- tmp_overview.write("Sample,Trimmed,Mapped,Classified\n")
- myFiles = []
-
- #filter samples and put it into for loop
- # Get folder and filter by last three pattern splitted by "_"
- if arguments["input"]:
- input = [ join(arguments["input"], x) for x in sorted(os.listdir(arguments["input"]))]
- #### check input format ####
- # Case 1: Folder contains fastq files
- if len([x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ])>0:
- input= [x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ]
- if len(input)!=0:
- if arguments["technology"].startswith("Illumina"):
- for setname, files in groupby(input, key=lambda x: "_".join(x.split("_")[:-3])):
- #for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-2])):
- temp_file = sorted(list(files))
- r1_reads = [x for x in temp_file if r1_pattern in x]
- r2_reads = [x for x in temp_file if r2_pattern in x]
- if len(r1_reads) != 0:
- if len(r2_reads) != 0:
- myFiles.append([r1_reads, r2_reads])
- else:
- myFiles.append(r1_reads)
- else:
- # IonTorrent
- myFiles = input
- # Case 2: Folder contains subfolder for each sample
- else:
- input = [x for x in input if os.path.isdir(x)]
-
- if len(input) > 0:
- for sample in input:
- files = sorted([join(sample,x) for x in listdir(sample) if any([x.endswith(ext) for ext in file_extensions])])
- if arguments["technology"].startswith("Illumina"):# and len(files) > 1:
- for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-3])):
- temp_file = sorted(list(files))
- r1_reads = [x for x in temp_file if r1_pattern in x]
- r2_reads = [x for x in temp_file if r2_pattern in x]
- if len(r1_reads)!=0:
- if len(r2_reads)!=0:
- myFiles.append([r1_reads, r2_reads])
- else:
- myFiles.append(r1_reads)
- else:
- myFiles.append(sorted(list(files)))
- files = None
- if len(myFiles) == 0:
- sys.exit("Enter a valid folder, which contain sample folders")
- else:
- if arguments["r1"]:
- if not os.path.exists(arguments["r1"]):
- sys.exit(arguments["r1"] + " does not exist.")
-
- if arguments["r2"]:
- myFiles.append([arguments["r1"], arguments["r2"]])
- else:
- myFiles.append([arguments["r1"]])
-
- print("Found " + str(len(myFiles)) + " sample(s).")
-
- for subset in myFiles:
- print("Analyse " + str(i) + "/" + str(len(myFiles)))
-
- sample_tmp = join(tmp, "Sample_" + str(i))
- os.makedirs(sample_tmp, exist_ok=True)
-
- if isinstance(subset, str):
- arguments["r1"] = subset
- else:
- arguments["r1"] = subset[0]
- if len(subset)>1:
- arguments["r2"] = subset[1]
-
- sample = fillClasses( mapping_index_path, sample_tmp )
- if sample is not None:
- project.append(sample)
- try:
- writeReport(sample, arguments)
- except:
- print("Couldnt write pdf.")
- try:
- batch_json = json.load(open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "r"), object_pairs_hook=OrderedDict)
- if sample.mappingRes is None:
- pMapping = 0
- nMapping = 0
- else:
- pMapping = sample.mappingRes.percentAlignedReads
- nMapping = sample.mappingRes.numberOfAlignedReads
- if sample.krakenRes is None:
- kraken = 0
- kraken_reports ={}
- else:
- kraken = sample.krakenRes.pClassified
- batch_json["kraken"][sample.name] = json.loads(str_to_json(sample.krakenRes.report))
- #kraken_reports[sample.name] = json.loads(str_to_json(sample.krakenRes.report))
- tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), str(nMapping), str(kraken)]) +"\n")
- batch_json["summary"].append(
- #jsonfile_dict.append(
- OrderedDict([("setname", sample.name),
- ("Map to reference [#]", nMapping),
- ("Map to reference [%]", pMapping),
- ("Reads after trimming [#]", sample.nTrimmed()),
- ("Reads after trimming [%]", sample.pTrimmed()),
- ("Classified (Kraken)", kraken),
- ("Shorter fragments", sample.get_mean_trimRes()),
- ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"],arguments["trimOption"])),
- ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
-
- json.dump(batch_json, open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
-
- #boxplot_data.append(sample.get_all_qc_plots())
- except:
- print("Couldnt produce HTML report.")
- sample = None
- i+=1
-
- try:
- #json.dump({"summary": jsonfile_dict, "commandline":arguments, "kraken":kraken_reports},open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
-
- # plot boxplots
- boxplots = [{"file":"Per_sequence_quality_scores.csv",
- "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
- "title": "Per sequence quality scores",
- "ylab": "Mean Sequence Quality (Phred Score)",
- "xlab": "Sample" },
- {"file": "Sequence_Length_Distribution.csv",
- "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
- "title": "Sequence Length Distribution",
- "ylab": "Sequence Length (bp)",
- "xlab": "Sample"},
- {"file": "Per_sequence_GC_content.csv",
- "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
- "title": "Per sequence GC content",
- "ylab": "Mean GC content (%)",
- "xlab": "Sample"} ]
- for plot in boxplots:
- process = subprocess.Popen(" ".join(["Rscript --vanilla ",join(os.path.dirname(__file__) ,"boxplot.R"),
- join(arguments["output"],"QCResults", "Report", "src", plot["file"]), plot["output"], '"'+ plot["title"]+'"', '"'+plot["xlab"]+'"','"'+ plot["ylab"]+'"']),
- stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell =True)
- for line in iter(process.stderr.readline, b''):
- print(line)
- process.communicate()
- #plot barplots
- tmp_overview.close()
- process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"),join(tmp, "overview.csv"),
- join(arguments["output"], "QCResults/Report/src/img")]),stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
- for line in iter(process.stderr.readline, b''):
- print(line)
- process.communicate()
- shutil.copy(join(os.path.dirname(__file__), "batch_report.html"), join(arguments["output"], "QCResults", "Report", "batch_report.html"))
-
- summary_latex_name = join(arguments["output"], "QCResults", "Report", "summary.latex")
- with open(summary_latex_name, "w") as summary_latex:
- summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
- summary_latex.write(
- "\\rowcolor{tableBlue} \\textbf{Sample} & \\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & \\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} & \\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
-
- for i in range(0, len(jsonfile_dict)):
- summary_latex.write(
- str(batch_json["summary"][i]["setname"]) + " & " +
- str(batch_json["summary"][i]["Map to reference [#]"]) + " & " +
- str(batch_json["summary"][i]["Map to reference [%]"]) + " & " +
- str(batch_json["summary"][i]["Reads after trimming [#]"]) + " & " +
- str(batch_json["summary"][i]["Reads after trimming [%]"]) + " & " +
- str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
- str(batch_json["summary"][i]["Shorter fragments"]) + "\\\\\n"
- )
- summary_latex.write("\\end{tabular}\n")
- summary_latex.write("\\caption{Statistics on reads after trimming. }")
- summary_latex.close()
- except:
-
- print("Couldnt produce html file.")
- finally:
-
- shutil.rmtree(tmp)
- delete_files(join(arguments["output"],"QCResults"), "FastQC", "_fastqc.html")
- delete_files(join(arguments["output"],"QCResults"), "Trimmed/FastQC", "_fastqc.html")
-
-if __name__ == "__main__":
- config = configparser.ConfigParser()
- config.read(join(dirname(__file__), "config.txt"))
- kraken_db = ""
- if "DEFAULT" in config:
- if "kraken_db" in config["DEFAULT"].keys():
- kraken_db = config["DEFAULT"]["kraken_db"]
-
- parser = argparse.ArgumentParser()
- parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
- parser.add_argument('-1' , dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
- parser.add_argument( '-2', dest='r2', help = "input file", required=False)
-
- parser.add_argument('-output', dest='output', default="")
- parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent"] ,required = True)
- parser.add_argument('-threads', dest='threads', default = 4, type = int)
- parser.add_argument('-adapter', dest = 'adapter', choices = ['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 'NexteraPE-PE' ])
- parser.add_argument('-reference', dest='reference', required = False, help = "Map reads against reference")
- parser.add_argument('-index', dest='index', required = False, help = "Bowtie2 index if available.")
- parser.add_argument('-sav', dest='sav', required=False,help="Illumina folder for SAV. Requires RunInfo.xml, RunParamter.xml and Interop folder.")
-
- parser.add_argument('-save_mapping', action = "store_true")
- parser.add_argument('-db', dest='db', help='Kraken database', required = False, default= kraken_db) #"/home/andruscha/programs/kraken/minikraken_20141208/"
- parser.add_argument('-palindrome', dest='palindrome', help= 'Use palindrome parameter 30 or 1000 for further analysis. Default:30', default=30, type = int)
- parser.add_argument('-minlen', dest='minlen',help='Minlen parameter for Trimmomatic. Default:50', default=50, type=int)
- parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', type= str)
- parser.add_argument('-version', action='version', version='%(prog)s v' + __version__)
- parser.add_argument('-nokraken', action = "store_true")
- parser.add_argument('-nomapping', action = "store_true")
- parser.add_argument('-trimBetter', action = "store_true", help= "Optimize trimming parameter using 'Per sequence base content' from fastqc. Not recommended for amplicons.")
- parser.add_argument('-trimBetter_threshold', dest='trimBetter_threshold', help = "Set -trimBetter to use this option.Default setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, type=float)
- parser.add_argument('-forAssembly', action = "store_true", help = "Set -trimBetter to use this option.Trim parameter are optimized for assemblies (trim more aggressive).")
- parser.add_argument('-forMapping', action = "store_true", help = "Set -trimBetter to use this option.Trim parameter are optimized for mapping (allow more errors).")
-
- arguments = vars(parser.parse_args())
- main(arguments)
+#!/usr/bin/env python3
+__author__ = 'LieuV'
+__version__ = "1.0.5"
+
+from classes import *
+from helper import *
+import os
+from itertools import groupby
+from jinja2 import Environment,FileSystemLoader
+import shutil
+import configparser
+from collections import OrderedDict
+import json
+import csv
+
+#dependencies
+try:
+ import argparse
+except ImportError:
+ print('The "argparse" module is not available. Use Python >3.2.')
+ sys.exit(1)
+
+
+parameter = configparser.ConfigParser()
+parameter.read(join(dirname(__file__), "parameter.txt"))
+
+file_extensions = parameter["Fileextension"]["fastq"]
+r1_pattern = parameter["Pattern"]["R1"]
+r2_pattern = parameter["Pattern"]["R2"]
+lane_pattern = parameter["Pattern"]["lane"]
+
+def get_illumina_reads( output, tmp):
+ ## Add readsets to sample
+ readsets = []
+ # split by lanes
+ if isinstance(arguments["r1"], list):
+ for lane in sorted(set([re.search(lane_pattern, x).group() for x in arguments["r1"]])):
+ # concat same files
+ r1_reads = [x for x in arguments["r1"] if lane in x]
+ readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
+ if len(arguments["r1"]) != 1:
+ r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads] )
+ else:
+ r1 = FastQFile(r1_reads[0])
+ if arguments["r2"]:
+ r2_reads = [x for x in arguments["r2"] if lane in x]
+
+ if len(r2_reads) != 1:
+ r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
+ else:
+ r2 = FastQFile(r2_reads[0])
+ readsets.append(ReadSet(r1,r2))
+ else:
+ readsets.append(ReadSet(r1))
+ else:
+ r1 = FastQFile(arguments["r1"])
+ if arguments["r2"]:
+ r2 = FastQFile(arguments["r2"])
+ return [ReadSet(r1,r2)]
+ else:
+ return [ReadSet(r1)]
+
+
+ return readsets
+
+def run_analysis (readset, arguments, output, tmp):
+ print("Run FastQC...")
+ readset.run_FastQC(join(output, "FastQC"))
+ if arguments["trimBetter"]:
+ trimming_perc = arguments["trimBetter_threshold"]
+ else:
+ trimming_perc = ""
+ readset.run_Trimmomatic(arguments["adapter"], arguments["palindrome"],arguments["minlen"], arguments["trimOption"], trimming_perc, arguments["gz"])
+ readset.trimRes = readset.trimRes.run_FastQC( tmp)
+ return readset
+
+
+def check_input(arguments):
+ if arguments["r1"]:
+ if arguments["r2"]:
+ return [arguments["r1"], arguments["r2"]]
+ else:
+ return [arguments["r1"]]
+
+ if arguments["technology"]=="Illumina":
+ if os.path.isdir(arguments["input"]):
+ files = os.listdir(arguments["input"])
+ files = [os.path.join(arguments["input"], x) for x in files]
+ files = [x for x in files if os.path.isfile(x) & any([ext in x for ext in file_extensions])]
+ if len(files)==0:
+ sys.exit(arguments["input"] + " does not contain fastq files.")
+ return files
+ else:
+ return [arguments["input"]]
+ elif arguments["technology"]=="IonTorrent":
+
+ if os.path.isfile(arguments["input"]):
+
+ if arguments["input"].endswith(".bam"):
+ return arguments["input"]
+ else:
+ sys.exit("IonTorrent PGM input has to be a bam-file. Otherwise use -b. Use --help for more information.")
+ else:
+ sys.exit("Incorrect file input")
+ else:
+ sys.exit("Incorrect file input")
+ return None
+
+def getSetname():
+ try:
+ print("Renmae", arguments["rename"], arguments["r1"])
+ if arguments["rename"]:
+ new_name = [arguments["rename"][x] for x in arguments["rename"].keys() if basename(arguments["r1"][0]).startswith(x)]
+
+ if len(new_name)>1:
+ print("Name mapping ambiguous. Found: ", new_name)
+ return getFilenameWithoutExtension(arguments["r1"][0], True)
+ elif len(new_name)==0:
+ return getFilenameWithoutExtension(arguments["r1"][0], True)
+ else:
+ print("Renamed to " ,new_name)
+ return new_name[0]
+
+ if arguments["technology"] == "Illumina":
+
+ if isinstance(arguments["r1"], list):
+ return "_".join(basename(arguments["r1"][0]).split("_")[:-3])
+ else:
+ return getFilenameWithoutExtension(arguments["r1"], True)
+ else:
+ return getFilenameWithoutExtension(arguments["r1"], getBase=True)
+ except:
+ if isinstance(arguments["r1"], list):
+ return getFilenameWithoutExtension(arguments["r1"][0], getBase=True)
+ else:
+ return getFilenameWithoutExtension(arguments["r1"], getBase=True)
+
+def fillClasses(temp_bowtie_path = "", tmp=''):
+ try:
+ output = getDir([arguments["output"], "QCResults"], True)
+
+ sample = Sample(getSetname(), output, arguments["reference"],arguments["threads"], arguments["technology"])
+ if arguments["technology"].startswith("Illumina"):
+ readsets = get_illumina_reads( output, tmp)
+ elif arguments["technology"] == "IonTorrent":
+ print("IonTorrent", arguments["r1"])
+ fastqfile = bam_to_fastq(arguments["r1"],tmp)
+ readsets = [ReadSet( fastqfile)]
+
+ for rs in readsets:
+ readset = run_analysis(rs, arguments, output, tmp)
+ sample = sample.add_readSet(readset)
+ if not arguments["nomapping"]:
+ if arguments["save_mapping"]:
+ sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"))
+ else:
+ sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
+ if not arguments["nokraken"]:
+ print("Run Kraken.")
+ sample = sample.run_Kraken(arguments["db"])
+
+ sample.stop = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
+ finally:
+ shutil.rmtree(tmp)
+ return sample
+
+def writeReport(sample, arguments):
+ report_name = os.path.join(sample.mainResultsPath, "Report",
+ "summary_" + sample.name + "_" + datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
+ print("Writing latex " ,report_name)
+ env = Environment()
+ env.loader = FileSystemLoader(os.path.dirname(__file__))
+
+ template = env.get_template("report.tex")
+ pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"], arguments["trimOption"]))
+
+ latex = open(report_name + ".tex", "w")
+ latex.write(pdf_latex)
+ latex.close()
+
+ process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+ for line in iter(process.stdout.readline, b''):
+ pass #print(line)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+
+ process.communicate()
+
+ for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
+ try:
+ os.remove(report_name + ext)
+ except OSError:
+ pass
+
+def main(arguments):
+
+ tmp = join(arguments["output"],"QCResults" ,"qc_tmp")
+ makedirs(tmp, exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "FastQC"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Trimmed"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Trimmed", "FastQC"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Report"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Report", "src"), exist_ok=True)
+ makedirs(join(arguments["output"], "QCResults", "Report", "src", "img"), exist_ok=True)
+
+ # rename sample names
+ if arguments["rename"]:
+ rename_dict = {}
+ with open(arguments["rename"], "r") as csvfile:
+ for line in csvfile:
+ row = line.split("\t")
+ rename_dict[row[0]] = row[1].strip()
+ arguments["rename"] = rename_dict
+ # create new boxplot csv files
+ for csv in ["Per_sequence_quality_scores.csv", "Per_sequence_GC_content.csv", "Sequence_Length_Distribution.csv"]:
+ new_csv = open(join(arguments["output"], "QCResults", "Report", "src", csv), "w")
+ new_csv.write("Sample,Read,Lane,Trimmed,Value,Count\n")
+ new_csv.close()
+ ##
+ # SAV
+ try:
+ if arguments["sav"]:
+ write_SAV(arguments["sav"], join(arguments["output"], "QCResults", "Report", "src"))
+ except:
+ print("Couldnt write SAV.")
+ ##
+
+ if not os.path.exists(join(arguments["output"], "QCResults", "Report", "lib" )):
+ shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], "QCResults", "Report", "lib" ))
+ if arguments["technology"].startswith("Illumina"):
+ technology = "Illumina"
+ else:
+ technology = "IonTorrent"
+
+ if arguments["forAssembly"]:
+ if not arguments["trimBetter_threshold"]:
+ arguments["trimBetter_threshold"] = parameter["forAssembly." + technology]["trimBetter_threshold"]
+ if not arguments["trimOption"]:
+ arguments["trimOption"] = parameter["forAssembly." + technology]["trimOption"]
+
+ elif arguments["forMapping"]:
+ if not arguments["trimBetter_threshold"]:
+ arguments["trimBetter_threshold"] = parameter["forMapping." + technology]["trimBetter_threshold"]
+ if not arguments["trimOption"]:
+ arguments["trimOption"] = parameter["forMapping." + technology]["trimOption"]
+
+ if not arguments["trimOption"]:
+ arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
+ if not arguments["trimBetter_threshold"]:
+ arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
+
+ # check input
+ pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
+ if(arguments["trimOption"]!=""):
+ if not ((pattern.group("option") =="SLIDINGWINDOW") or (pattern.group("option") =="MAXINFO")):
+ sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
+ try:
+ int(pattern.group("value1"))
+ int(pattern.group("value2"))
+ except:
+ sys.exit("Check trimOptions.")
+
+ if arguments['technology'] == "Illumina":
+ if not arguments['adapter']:
+ sys.exit("Illumina projects requires an adapter file")
+ if arguments["nomapping"]:
+ arguments["reference"] = ""
+ else:
+ if not arguments["reference"]:
+ sys.exit("Mapping needs a reference.")
+ else:
+ if not os.path.exists(arguments["reference"]):
+ sys.exit("Reference does not exist.")
+
+ if not arguments["nokraken"]:
+ if not os.path.exists(arguments["db"]):
+ sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
+ else:
+ if "database.kdb" not in os.listdir(arguments["db"]):
+ sys.exit("database "+arguments["db"] +" does not contain necessary file database.kdb")
+ if not arguments["input"]:
+ if arguments["r1"]:
+ print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
+ if not os.path.exists(arguments["r1"]):
+ sys.exit(arguments["r1"] + " does not exist.")
+ else:
+ sys.exit("Input file required. Use option -input or -1 / -2")
+
+ if arguments["r2"]:
+ if not os.path.exists(arguments["r2"]):
+ sys.exit(arguments["r2"] + " does not exist.")
+
+ if arguments["index"]:
+ mapping_index_path = arguments["index"]
+ else:
+ mapping_index_path = join(tmp, "bowtie2_index")
+ os.makedirs(mapping_index_path, exist_ok=True)
+ # RUN
+ try:
+ batch_json = json.dump({"commandline":arguments, "summary":[],"kraken":{}, "versions":Pipeline().__dict__}, open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
+ i = 1
+ project = []
+ tmp_overview = open(join(tmp, "overview.csv"), "w")
+ tmp_overview.write("Sample,Trimmed,Mapped,Classified\n")
+ myFiles = []
+
+ #filter samples and put it into for loop
+ # Get folder and filter by last three pattern splitted by "_"
+ if arguments["input"]:
+ input = [ join(arguments["input"], x) for x in sorted(os.listdir(arguments["input"]))]
+ #### check input format ####
+ # Case 1: Folder contains fastq files
+ if len([x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ])>0:
+ input= [x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ]
+ if len(input)!=0:
+ if arguments["technology"].startswith("Illumina"):
+ for setname, files in groupby(input, key=lambda x: "_".join(x.split("_")[:-3])):
+ #for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-2])):
+ temp_file = sorted(list(files))
+ r1_reads = [x for x in temp_file if r1_pattern in x]
+ r2_reads = [x for x in temp_file if r2_pattern in x]
+ if len(r1_reads) != 0:
+ if len(r2_reads) != 0:
+ myFiles.append([r1_reads, r2_reads])
+ else:
+ myFiles.append(r1_reads)
+ else:
+ # IonTorrent
+ myFiles = input
+ # Case 2: Folder contains subfolder for each sample
+ else:
+ input = [x for x in input if os.path.isdir(x)]
+
+ if len(input) > 0:
+ for sample in input:
+ files = sorted([join(sample,x) for x in listdir(sample) if any([x.endswith(ext) for ext in file_extensions])])
+ if arguments["technology"].startswith("Illumina"):# and len(files) > 1:
+ for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-3])):
+ temp_file = sorted(list(files))
+ r1_reads = [x for x in temp_file if r1_pattern in x]
+ r2_reads = [x for x in temp_file if r2_pattern in x]
+ if len(r1_reads)!=0:
+ if len(r2_reads)!=0:
+ myFiles.append([r1_reads, r2_reads])
+ else:
+ myFiles.append(r1_reads)
+ else:
+ myFiles.append(sorted(list(files)))
+ files = None
+ if len(myFiles) == 0:
+ sys.exit("Enter a valid folder, which contain sample folders")
+ else:
+ if arguments["r1"]:
+ if not os.path.exists(arguments["r1"]):
+ sys.exit(arguments["r1"] + " does not exist.")
+
+ if arguments["r2"]:
+ myFiles.append([arguments["r1"], arguments["r2"]])
+ else:
+ myFiles.append([arguments["r1"]])
+
+ print("Found " + str(len(myFiles)) + " sample(s).")
+
+ for subset in myFiles:
+ print("Analyse " + str(i) + "/" + str(len(myFiles)))
+
+ sample_tmp = join(tmp, "Sample_" + str(i))
+ os.makedirs(sample_tmp, exist_ok=True)
+
+ if isinstance(subset, str):
+ arguments["r1"] = subset
+ else:
+ arguments["r1"] = subset[0]
+ if len(subset)>1:
+ arguments["r2"] = subset[1]
+
+ sample = fillClasses( mapping_index_path, sample_tmp )
+ if sample is not None:
+ project.append(sample)
+ try:
+ writeReport(sample, arguments)
+ except:
+ print("Couldnt write pdf.")
+ try:
+ batch_json = json.load(open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "r"), object_pairs_hook=OrderedDict)
+ if sample.mappingRes is None:
+ pMapping = 0
+ nMapping = 0
+ else:
+ pMapping = sample.mappingRes.percentAlignedReads
+ nMapping = sample.mappingRes.numberOfAlignedReads
+ if sample.krakenRes is None:
+ kraken = 0
+ kraken_reports ={}
+ else:
+ kraken = sample.krakenRes.pClassified
+ batch_json["kraken"][sample.name] = json.loads(str_to_json(sample.krakenRes.report))
+ tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), str(nMapping), str(kraken)]) +"\n")
+ batch_json["summary"].append(
+ OrderedDict([("setname", sample.name),
+ ("Total reads [#]", sample.total_reads()),
+ ("Reads after trimming [#]", sample.nTrimmed()),
+ ("Reads after trimming [%]", sample.pTrimmed()),
+ ("Map to reference [#]", nMapping),
+ ("Map to reference [%]", pMapping),
+ ("Classified (Kraken)", kraken),
+ ("Shorter fragments", sample.get_mean_trimRes()),
+ ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"],arguments["trimOption"])),
+ ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
+
+ json.dump(batch_json, open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
+ except:
+ print("Couldnt produce HTML report.")
+ sample = None
+ i+=1
+
+ try:
+ # plot boxplots
+ boxplots = [{"file":"Per_sequence_quality_scores.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+ "title": "Per sequence quality scores",
+ "ylab": "Mean Sequence Quality (Phred Score)",
+ "xlab": "Sample" },
+ {"file": "Sequence_Length_Distribution.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+ "title": "Sequence Length Distribution",
+ "ylab": "Sequence Length (bp)",
+ "xlab": "Sample"},
+ {"file": "Per_sequence_GC_content.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+ "title": "Per sequence GC content",
+ "ylab": "Mean GC content (%)",
+ "xlab": "Sample"} ]
+ for plot in boxplots:
+ process = subprocess.Popen(" ".join(["Rscript --vanilla ",join(os.path.dirname(__file__) ,"boxplot.R"),
+ join(arguments["output"],"QCResults", "Report", "src", plot["file"]), plot["output"], '"'+ plot["title"]+'"', '"'+plot["xlab"]+'"','"'+ plot["ylab"]+'"']),
+ stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell =True)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+ process.communicate()
+ #plot barplots
+ tmp_overview.close()
+ process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"),join(tmp, "overview.csv"),
+ join(arguments["output"], "QCResults/Report/src/img")]),stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+ process.communicate()
+ shutil.copy(join(os.path.dirname(__file__), "batch_report.html"), join(arguments["output"], "QCResults", "Report", "batch_report.html"))
+ except:
+ print("Couldnt plot boxplots.")
+ try:
+ summary_latex_name = join(arguments["output"], "QCResults", "Report", "summary.tex")
+ with open(summary_latex_name, "w") as summary_latex:
+ summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
+ summary_latex.write(
+ "\\rowcolor{tableBlue} \\textbf{Sample} & \\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & \\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} & \\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
+
+ for i in range(0, len(batch_json["summary"])):
+ summary_latex.write(
+ str(batch_json["summary"][i]["setname"]) + " & " +
+ str(batch_json["summary"][i]["Map to reference [#]"]) + " & " +
+ str(batch_json["summary"][i]["Map to reference [%]"]) + " & " +
+ str(batch_json["summary"][i]["Reads after trimming [#]"]) + " & " +
+ str(batch_json["summary"][i]["Reads after trimming [%]"]) + " & " +
+ str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
+ str(batch_json["summary"][i]["Shorter fragments"]) + "\\\\\n"
+ )
+ summary_latex.write("\\end{tabular}\n")
+ summary_latex.write("\\caption{Statistics on reads after trimming. }")
+ summary_latex.close()
+ except:
+
+ print("Couldnt write summary.tex file.")
+ finally:
+
+ shutil.rmtree(tmp)
+ delete_files(join(arguments["output"],"QCResults"), "FastQC", "_fastqc.html")
+ delete_files(join(arguments["output"],"QCResults"), "Trimmed/FastQC", "_fastqc.html")
+
+if __name__ == "__main__":
+ config = configparser.ConfigParser()
+ config.read(join(dirname(__file__), "config.txt"))
+ kraken_db = ""
+ if "DEFAULT" in config:
+ if "kraken_db" in config["DEFAULT"].keys():
+ kraken_db = config["DEFAULT"]["kraken_db"]
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
+ parser.add_argument('-1' , dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
+ parser.add_argument( '-2', dest='r2', help = "input file", required=False)
+
+ parser.add_argument('-output', dest='output', default="")
+ parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent"] ,required = True)
+ parser.add_argument('-threads', dest='threads', default = 4, type = int, help= "Number of threads. Default: %(default)s")
+ parser.add_argument('-adapter', dest = 'adapter', choices = ['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 'NexteraPE-PE' ])
+ parser.add_argument('-reference', dest='reference', required = False, help = "Map reads against reference")
+ parser.add_argument('-index', dest='index', required = False, help = "Bowtie2 index if available.")
+ parser.add_argument('-sav', dest='sav', required=False,help="Illumina folder for SAV. Requires RunInfo.xml, RunParamter.xml and Interop folder.")
+
+ parser.add_argument('-save_mapping', action = "store_true")
+ parser.add_argument('-db', dest='db', help='Kraken database', required = False, default= kraken_db)
+ parser.add_argument('-palindrome', dest='palindrome', default=30, help= 'Use palindrome parameter 30 or 1000 for further analysis. Default: %(default)s', type = int)
+ parser.add_argument('-minlen', default=50,dest='minlen',help='Minlen parameter for Trimmomatic. Default: %(default)s', type=int)
+ parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', type= str)
+ parser.add_argument('-version', action='version', version='%(prog)s v' + __version__)
+ parser.add_argument('-nokraken', action = "store_true")
+ parser.add_argument('-nomapping', action = "store_true")
+ parser.add_argument('-gz', action = "store_true")
+ parser.add_argument('-trimBetter', action = "store_true", help= "Optimize trimming parameter using 'Per sequence base content' from fastqc. Not recommended for amplicons.")
+ parser.add_argument('-trimBetter_threshold', dest='trimBetter_threshold', help = "Set -trimBetter to use this option.Default setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, type=float)
+ parser.add_argument('-forAssembly', action = "store_true", help = "Set -trimBetter to use this option.Trim parameter are optimized for assemblies (trim more aggressive).")
+ parser.add_argument('-forMapping', action = "store_true", help = "Set -trimBetter to use this option.Trim parameter are optimized for mapping (allow more errors).")
+ parser.add_argument('-rename', dest="rename", required=False, help="TSV File with two columns: <old sample name> <new sample name>")
+
+ arguments = vars(parser.parse_args())
+ main(arguments)
diff --git a/barplot.R b/barplot.R
index bcf3b94..350342c 100755
--- a/barplot.R
+++ b/barplot.R
@@ -1,28 +1,29 @@
-library(ggplot2)
-args = commandArgs(trailingOnly=TRUE)
-
-summary<- as.data.frame(read.csv(args[1]))
-
-ggplot(summary, aes(x=Sample, y = Trimmed )) +
- geom_bar(stat = "identity", fill="#67a9cf") +
- theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
- ggtitle("Number of reads after trimming") +
- xlab("Sample")+
- ylab("Reads [#]")
-ggsave(paste(args[2], "/trimming.png", sep=""))
-
-ggplot(summary, aes(x=Sample, y = Mapped )) +
- geom_bar(stat = "identity", fill="#67a9cf") +
- theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
- ggtitle("Number of reads mapped to reference") +
- xlab("Sample")+
- ylab("Reads [#]")
-ggsave(paste(args[2], "/mapping.png", sep=""))
-
-ggplot(summary, aes(x=Sample, y = Classified )) +
- geom_bar(stat = "identity", fill="#67a9cf") +
- theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
- ggtitle("Precentage of reads classified by Kraken") +
- xlab("Sample")+
- ylab("Reads [%]")
-ggsave(paste(args[2], "/kraken.png", sep=""))
+options(warn=-1)
+library(ggplot2)
+args = commandArgs(trailingOnly=TRUE)
+
+summary<- as.data.frame(read.csv(args[1]))
+
+ggplot(summary, aes(x=Sample, y = Trimmed )) +
+ geom_bar(stat = "identity", fill="#67a9cf") +
+ theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+ ggtitle("Number of reads after trimming") +
+ xlab("Sample")+
+ ylab("Reads [#]")
+ggsave(paste(args[2], "/trimming.png", sep=""))
+
+ggplot(summary, aes(x=Sample, y = Mapped )) +
+ geom_bar(stat = "identity", fill="#67a9cf") +
+ theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+ ggtitle("Number of reads mapped to reference") +
+ xlab("Sample")+
+ ylab("Reads [#]")
+ggsave(paste(args[2], "/mapping.png", sep=""))
+
+ggplot(summary, aes(x=Sample, y = Classified )) +
+ geom_bar(stat = "identity", fill="#67a9cf") +
+ theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+ ggtitle("Precentage of reads classified by Kraken") +
+ xlab("Sample")+
+ ylab("Reads [%]")
+ggsave(paste(args[2], "/kraken.png", sep=""))
diff --git a/batch_report.html b/batch_report.html
index 8c8c574..23fc0f4 100755
--- a/batch_report.html
+++ b/batch_report.html
@@ -1,791 +1,791 @@
-<!doctype html>
-<html>
-<head>
- <meta charset="utf-8">
- <title>Batch Report</title>
- <script src="lib/jquery.min.js"></script>
- <script src="lib/angular.min.js"></script>
- <script src="lib/d3.v3.min.js"></script>
- <!-- Latest compiled and minified CSS -->
- <link rel="stylesheet" href="lib/bootstrap.min.css">
- <!-- Optional theme -->
- <link rel="stylesheet" href="lib/bootstrap-theme.min.css">
- <!-- Latest compiled and minified JavaScript -->
- <script src="lib/bootstrap.min.js"></script>
- <style>
- .row_hidden {
- display: none;
- }
- .panel {
- background-color:#F7F7F7;
-
- }
- .panel-heading{
- cursor:pointer;
- }
- .show_row {
- display: show;
- }
-
- p.inbox_text {
- text-align: center;
- color: grey;
- margin: 0 0 0px;
- }
-
- .thumbnail {
- margin-bottom: 5px;
- border: 0px solid #000;
- height: 150px;
- width: 200px;
- }
-
- .sample_box {
- display: table-cell;
- margin: 10px;
- padding: 10px;
- color: lightgrey;
- border-radius: 5px;
- box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
- }
-
- .sidebar {
-
- overflow-y: auto;
- position: fixed;
- height: 100%;
- top: 50px;
- z-index: 999;
- padding-right: 0px;
- box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
- background: #f7f7f7;
- }
- .navbar-default{
- background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
- }
- .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
- background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
-
- }
- .navbar-default .navbar-brand {
- color: white;
- }
- h4{
- color: #0A6780;
- }
- h5{
- color: #0D7793;
- }
- .node {
- cursor: pointer;
- }
-
- .node circle {
- fill: #fff;
- stroke: steelblue;
- stroke-width: 1.5px;
- }
-
- .node text {
- font: 10px sans-serif;
- }
-
- .link {
- fill: none;
- stroke: #ccc;
- stroke-width: 1.5px;
- }
- .table{
- margin-bottom: auto;}
-
- </style>
- <script>
- //$(window).load(function () {
- // $("#main").css({left: $('#sidebar').width()});
- //});
-
-
- //Start d3 tree
- function d3_kraken(root, id) {
- var margin = {top: 20, right: 120, bottom: 20, left: 120},
- width = $(document).width(),//1600 - margin.right - margin.left,
- height = $(document).height();
-
- var i = 0,
- duration = 750//,root
- ;
-
- var tree = d3.layout.tree()
- .size([height, width]);
-
- var diagonal = d3.svg.diagonal()
- .projection(function (d) {
- return [d.y, d.x];
- });
-
-
- var svg = d3.select("#kraken").append("tr")
- .attr("id", "kraken_" + id)
- .attr("class", "row_hidden")
- .append("svg")
- .attr("width", width + margin.right + margin.left)
- .attr("height", height + margin.top + margin.bottom)
- .append("g")
- .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
-
- root.x0 = height / 2;
- root.y0 = 0;
-
- function collapse(d) {
- if (d.children) {
- d._children = d.children;
- d._children.forEach(collapse);
- d.children = null;
- }
- }
-
- function uncollapse(d) {
- if (d._children) {
- d.children = d._children;
- d._children.forEach(uncollapse);
- d._children = null;
- }
-
- }
-
- root.children.forEach(collapse);
- update(root);
-
-
- d3.select(self.frameElement).style("height", "800px");
-
- function update(source) {
- var levelWidth = [1];
- var childCount = function (level, n) {
-
- if (n.children && n.children.length > 0) {
- if (levelWidth.length <= level + 1) levelWidth.push(0);
-
- levelWidth[level + 1] += n.children.length;
- n.children.forEach(function (d) {
- childCount(level + 1, d);
- });
- }
- };
- childCount(0, root);
-
-
- var newHeight = d3.max(levelWidth) * 100; // 25 pixels per line
- tree = tree.size([newHeight, width]);
- // Compute the new tree layout.
- var nodes = tree.nodes(root).reverse(),
- links = tree.links(nodes);
-
- // Normalize for fixed-depth.
- nodes.forEach(function (d) {
- d.y = d.depth * 180;
- });
-
- // Update the nodes…
- var node = svg.selectAll("g.node")
- .data(nodes, function (d) {
- return d.id || (d.id = ++i);
- });
-
- // Enter any new nodes at the parent's previous position.
- var nodeEnter = node.enter().append("g")
- .attr("class", "node")
- .attr("transform", function (d) {
- return "translate(" + source.y0 + "," + source.x0 + ")";
- })
- .on("click", click)
- .attr("data-toggle", "tooltip")
- .attr("title", function (d) {
- return d.name + "\n" + d.perc + "%";
- })
- ;
-
- nodeEnter.append("circle")
- .attr("r", function (d) {
- if (d.name != "unclassified") return d.perc + 1e-06; else 0.0;
- })
- .style("fill", function (d) {
- return d._children ? "lightsteelblue" : "#fff";
- })
-
-
- nodeEnter.append("text")
- .attr("x", function (d) {
- return d.children || d._children ? -10 : 10;
- })
- .attr("dy", ".35em")
- .attr("text-anchor", function (d) {
- return d.children || d._children ? "end" : "start";
- })
- .text(function (d) {
- return d.name + ", " + d.perc;
- })
- .style("fill-opacity", 1e-6);
-
- // Transition nodes to their new position.
- var nodeUpdate = node.transition()
- .duration(duration)
- .attr("transform", function (d) {
- return "translate(" + d.y + "," + d.x + ")";
- });
-
- nodeUpdate.select("circle")
- .attr("r", function (d) {
- if (d.name != "unclassified") return d.perc + 4.5; else return 0.0;
- })
- .style("fill", function (d) {
- return d._children ? "lightsteelblue" : "#fff";
- });
-
- nodeUpdate.select("text")
- .style("fill-opacity", 1);
-
- // Transition exiting nodes to the parent's new position.
- var nodeExit = node.exit().transition()
- .duration(duration)
- .attr("transform", function (d) {
- return "translate(" + source.y + "," + source.x + ")";
- })
- .remove();
-
- nodeExit.select("circle")
- .attr("r", 1e-6);
-
- nodeExit.select("text")
- .style("fill-opacity", 1e-6);
-
- // Update the links…
- var link = svg.selectAll("path.link")
- .data(links, function (d) {
- return d.target.id;
- });
-
- // Enter any new links at the parent's previous position.
- link.enter().insert("path", "g")
- .attr("class", "link")
- .attr("d", function (d) {
- var o = {x: source.x0, y: source.y0};
- return diagonal({source: o, target: o});
- });
-
- // Transition links to their new position.
- link.transition()
- .duration(duration)
- .attr("d", diagonal);
-
- // Transition exiting nodes to the parent's new position.
- link.exit().transition()
- .duration(duration)
- .attr("d", function (d) {
- var o = {x: source.x, y: source.y};
- return diagonal({source: o, target: o});
- })
- .remove();
-
- // Stash the old positions for transition.
- nodes.forEach(function (d) {
- d.x0 = d.x;
- d.y0 = d.y;
- });
- }
-
-// Toggle children on click.
- function click(d) {
- if (d.children) {
- d._children = d.children;
- d.children = null;
- } else {
- d.children = d._children;
- d._children = null;
- if (d.name == "root") {
- d.children.forEach(uncollapse);
- }
- }
- update(d);
- }
- }
- ;
- // end d3 tree
- var myApp = angular.module('BatchReport', []);
- myApp.controller('SummaryTable',function ($scope, $http) {
- // create a projects Object
- $scope.projects = {};
- // pretend data we just got back from the server
- // this is an ARRAY of OBJECTS
- $scope.information = {};
- //$scope.information.commandline= {"minlen": 50, "index": null, "blobplot": false, "output": "", "nomapping": true, "r1": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L001_R1_001.fastq", "/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L002_R1_001.fastq"], "reference": "", "threads": 4, "forMapping": false, "palindrome": 30, "technology": "Illumina", "adapter": "NexteraPE-PE", "r2": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/sub [...]
-
- $scope.show_summary = {};
- $scope.show_img = {};
- $scope.show_raw_data = true;
-
- $scope.sequencer_information = [];
- $scope.show_kraken = "";
- $scope.kraken = {};
-
- $http.get('src/summary.json')
- .success(function(res){
- $scope.information.commandline= res.commandline;
- $scope.information.versions= res.versions;
- $scope.projects.samples = res.summary;
- $scope.kraken = res.kraken;
- $.each($scope.kraken, function (key, value) {
- d3_kraken(value, key)
- });
- });
-
-
- $http.get('src/sav.json')
- .success(function(res){
- $scope.sequencer_information = res;
- });
-
-
- $.each($scope.kraken, function (key, value) {
-
- d3_kraken(value, key)
- });
- $scope.$watch("show_kraken", function (newVal, oldVal) {
- d3.select("#kraken_" + newVal).attr("class", "show_row");
- d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
-
- });
- $scope.collapse = true;
- $scope.isArray = angular.isArray;
- $scope.isString = angular.isString;
- $scope.$watch("collapse", function (newVal, oldVal) {
- d3.select("#kraken_" + newVal).attr("class", "show_row");
- d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
-
- });
- $scope.selected = {title:"", img:"", caption:""};
-
- }
-
- )
-
- ;
-
- myApp.filter('imgFilter', function () {
- return function (input, filter) {
- var result = [];
- angular.forEach(input, function (input) {
- angular.forEach(filter, function (isfiltered, name) {
- if (isfiltered && name === input.name) {
- result.push(input);
- }
- });
- });
- return result;
- };
- });
- myApp.filter('typeFilter', function () {
- return function (input, filter) {
- var result = [];
- angular.forEach(input, function (input) {
- angular.forEach(filter, function (isfiltered, setname) {
-
- if (isfiltered && setname === input.setname) {
- result.push(input);
-
- }
- });
- });
- return result;
- };
- });
-
-
- </script>
-
-</head>
-<body>
-<div ng-app="BatchReport">
-
- <!-- head Quality Control-->
- <nav class="navbar navbar-default navbar-fixed-top" >
- <div class="container-fluid">
- <div class="navbar-header">
- <a class="navbar-brand" href="#">Quality Control</a>
- </div>
- <ul class="nav navbar-nav">
-
- <li><a href = "#sequencer_information" style="color: aliceblue;" data-toggle="tab">Sequencer Information</a></li>
- <li class="active" ><a href="#summary" style="color: aliceblue;" data-toggle="tab">Summary Table</a></li>
- <li> <a href="#fastqc" style="color: aliceblue;" data-toggle="tab">FastQC</a></li>
- <li> <a href="#kraken_report" style="color: aliceblue;" data-toggle="tab">Kraken</a></li>
- <li> <a href="#information" style="color: aliceblue;" data-toggle="tab">Information</a></li>
- </ul>
- </div>
-
-
- </nav>
- <!-- END head Quality Control-->
-
- <!-- Summary Table Controller-->
- <div ng-controller="SummaryTable">
- <div class="row" style="margin-top: 50px">
-
-
- <div class="col-md-12" id="main">
-
-
- <!-- TAB-CONTENT-->
- <div class="tab-content">
- <div class="tab-pane fade" id="sequencer_information">
- <!--Sidebar-->
- <div class="col-md-2 sidebar" >
- </br>
- <ul class="nav">
- <li><a href = "#sequencer_information_table"><h5>Sequencer Information</h5></a></li>
- <li>
- <a href = "#sequencer_information_plots" ><h5>Sequencer Plots</h5></a></li>
-
- </ul>
- </div>
- <!-- END Sidebar-->
-
-
- <div class="col-md-10" style = "padding-left:17%;">
- <h4 id= "sequencer_information_table">Sequencer Information</h4>
- <table class="table table-striped">
- <tr ng-repeat="(attr, value) in sequencer_information.tables">
- <td style="max-width:80px;">{{attr}}</td>
- <td ng-if = "isArray(value)==false && isString(value)==true">{{value}}</td>
- <td ng-if = "isArray(value)==true " >
- <table class="table table-sm" style="background-color:inherit; width : auto;" >
- <tr>
- <td ng-repeat = "(headline, value) in value[0]">{{headline}}</td>
- </tr>
- <tr ng-repeat="nested in value"><td ng-repeat = "(headline, value) in nested">{{value}}</td></tr>
- </table></td>
- <td ng-if = "isArray(value)==false && isString(value)==false" >
- <table class ="table table-sm" style="background-color:inherit;width:auto;">
- <tr ng-repeat = "(headline, value) in value">
- <td >{{headline}}</td>
- <td ng-if ="isArray(value)==false"> {{value}}</td>
- <td ng-repeat = "val in value" ng-if ="isArray(value)==true"> {{val}}</td>
- </tr>
-
- </table></td>
-
- </tr>
- </table>
-
- <h4 id= "sequencer_information_plots">Sequencer Plots</h4>
- <a ng-click="selected.title='Data by Cycle - Intensity'; selected.caption='This plot shows the intensity by color of the 90% percentile of the data for each cycle.'; selected.img='src/img/intensity.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img width="400px" height="300px" ng-src="src/img/intensity.png">
- </a>
- <a ng-click="selected.title='Data by Cycle - FWHM'; selected.caption='The average full width of clusters at half maximum (in pixels).'; selected.img='src/img/FWHM.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img width="400px" height="300px" ng-src="src/img/FWHM.png">
- </a>
- <a ng-click="selected.title='Data by Cycle - % Base'; selected.caption='The percentage of clusters for which the selected base has been called.'; selected.img='src/img/base_perc.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img width="400px" height="300px" ng-src="src/img/base_perc.png">
- </a>
- <a ng-click="selected.title='Data by Lane - Phasing'; selected.caption='Thepercentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.'; selected.img='src/img/phasing.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img width="400px" height="300px" ng-src="src/img/phasing.png">
- </a>
- <a ng-click="selected.title='Data by Lane - Prephasing'; selected.caption='The percentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.';
- selected.img='src/img/prephasing.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img width="400px" height="300px" ng-src="src/img/prephasing.png">
- </a>
-
-
- <a ng-click="selected.caption='The density of clusters for each tile (in thousand per mm2)'; selected.title='Data by Lane - Density'; selected.img='src/img/density.png'"
- data-image-id="" data-toggle="modal" data-target="#image-gallery">
- <img width="400px" height="300px" ng-src="src/img/density.png">
- </a>
- <a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score distribution shows the number of reads by quality score. The quality score os cumulative for current cycle and previous cycles, and only
- reads that pass the quality filter are included. The Q-score is based on the Phred scale.'; selected.img='src/img/qscore_distr.png'"
- data-image-id="" data-toggle="modal" data-target="#image-gallery">
- <img width="400px" height="300px" ng-src="src/img/qscore_distr.png">
- </a>
- <a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score heat map shows the Q-score by cycle for all lanes.'; selected.img='src/img/qscore_heatmap.png'"
- data-image-id="" data-toggle="modal" data-target="#image-gallery">
- <img width="500px" height="300px" ng-src="src/img/qscore_heatmap.png">
- </a>
-
-
- </div>
-
-
- </div>
- <!-- END Sequencer Information content -->
- <div class="tab-pane fade in active" id="summary">
- <!-- Summary Table -->
- <h4>Summary</h4>
- <table class="table table-striped">
- <thead style="font-weight:600;color:#0D7793">
- <tr>
- <td ng-repeat="(headline,bla) in projects.samples[0]" ng-if="headline!='images'">
- {{headline}}
- </td>
- </tr>
- </thead>
- <tr ng-repeat="sample in projects.samples | orderBy:'setname'">
- <td ng-repeat="(key,value) in sample " ng-if="key!='images'">
- <input type="checkbox" ng-model="show_summary[value]" ng-if="key=='setname'"
- ng-init="show_summary[value]=true"/> {{value}}
- </td>
-
- </tr>
- </table>
- <!-- END Summary Table -->
- <div class="col-md-4">
-
- <a ng-click="selected.title='Per sequence quality scores'; selected.caption=''; selected.img='src/img/Per_sequence_quality_scores.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img height="500px" ng-src="src/img/Per_sequence_quality_scores.png">
- </a>
- </div>
- <div class="col-md-4">
-
- <a ng-click="selected.title='Per sequence GC content'; selected.caption=''; selected.img='src/img/Per_sequence_GC_content.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img height="500px" ng-src="src/img/Per_sequence_GC_content.png">
- </a>
- </div>
- <div class="col-md-4">
-
- <a ng-click="selected.title='Sequence length distribution'; selected.caption=''; selected.img='src/img/Sequence_Length_Distribution.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img height="500px" ng-src="src/img/Sequence_Length_Distribution.png">
- </a>
- </div>
- </br>
- <div class="col-md-4">
-
- <a ng-click="selected.title='Number of reads remained after trimming'; selected.caption=''; selected.img='src/img/trimming.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img height="500px" ng-src="src/img/trimming.png">
- </a>
- </div>
- <div class="col-md-4">
-
- <a ng-click="selected.title='Number of reads mapped to reference'; selected.caption=''; selected.img='src/img/mapping.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img height="500px" ng-src="src/img/mapping.png">
- </a>
- </div>
-
- <div class="col-md-4">
-
- <a ng-click="selected.title='Percentage of reads classified by Kraken'; selected.caption=''; selected.img='src/img/kraken.png'"
- data-image-id="" data-toggle="modal"
- data-target="#image-gallery">
- <img height="500px" ng-src="src/img/kraken.png">
- </a>
- </div>
-
- </div>
-
-
- <div class="tab-pane fade" id="fastqc">
- <div style="width: 100%; " class="row">
- <!--Sidebar-->
- <div class="col-md-2 sidebar" >
- <h4>Filter by ...</h4>
- <br/>
- <div class="panel panel-default" >
- <div class="panel-heading">
- <h5 class="panel-title" data-toggle="collapse" data-target="#show_sample">... sample</h5>
-
- </div>
- <div id = "show_sample" >
- <p ng-repeat="(sample, value) in show_summary">
- <input type="checkbox" ng-model="show_summary[sample]" ng-init="show_summary[sample] = value" /> {{sample}}
- </p>
- </div>
-</div>
- </tr>
- <div class="panel panel-default" >
- <div class="panel-heading">
- <h5 class="panel-title" data-toggle="collapse" data-target="#show_raw">... dataset</h5>
-
- </div>
- <div id = "show_raw" class = "collapse" >
- <p>
- <input type="checkbox" ng-model="show_raw_data"/> show raw data
- </p>
- </div>
-</div>
- <!--Filter by image name-->
-
- <div class="panel panel-default" >
- <div class="panel-heading">
- <h5 class="panel-title" data-toggle="collapse" data-target="#show_fastqc">... FastQC Images</h5>
- </div>
- <div class = "collapse" id="show_fastqc">
- <p ng-repeat="(type,bla) in projects.samples[0].images[0].raw[0]">
- <input type="checkbox" ng-model="show_img[bla.name]" ng-init="show_img[bla.name]=true"/>
- {{bla.name}}
- </p>
-</div></div>
- <!-- END Filter by image name-->
- <!--Filter by trimmed /untrimmed-->
-
- <!-- END Filter by trimmed /untrimmed-->
-
- </div>
- <!-- END Sidebar-->
- <div style="display: table-row;padding-left:17%;" class="col-md-10">
-
- <div class="sample_box"
- ng-repeat="sample in projects.samples | orderBy:'setname'| typeFilter: show_summary ">
- <p class="inbox_text">{{sample.setname}}</p>
- <div ng-repeat="sample_img in sample.images " style="display:table-cell;"
- ng-if="show_raw_data">
- <p class="inbox_text">Raw data</p>
- <p class="inbox_text" ng-if="sample_img.raw.length==2">R1</p>
- <div style="display:inline-block">
- <a class="thumbnail"
- ng-repeat="img in sample_img.raw[0]| imgFilter: show_img" href="#"
- ng-click="selected.caption=img.name; selected.title='Raw '+sample.setname +' R1'; selected.img=img.file"
- data-image-id="" data-toggle="modal"
- data-title="Raw {{sample.setname}} R1"
- data-caption="{{img.name}}" data-image="{{img.file}}"
- data-target="#image-gallery">
- <img width="200px" height="150px" ng-src="{{img.file}}"
- style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
- </a>
- </div>
- <p class="inbox_text" ng-if="sample_img.raw.length==2">R2</p>
- <div style="display:inline-block">
- <a class="thumbnail"
- ng-repeat="img in sample_img.raw[1]| imgFilter: show_img" href="#"
- ng-click="selected.caption=img.name; selected.title='Raw ' + sample.setname + ' R2'; selected.img=img.file"
- data-image-id="" data-toggle="modal"
- data-title="Raw {{sample.setname}} R2"
- data-caption="{{img.name}}" data-image="{{img.file}}"
- data-target="#image-gallery">
- <img width="200px" height="150px" ng-src="{{img.file}}"
- style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
- </a></div>
- </div>
- <div ng-repeat="sample_img in sample.images " style="display:table-cell;">
- <p class="inbox_text">Trimmed data</p>
- <p class="inbox_text" ng-if="sample_img.raw.length==2">R1</p>
- <div style="display:inline-block">
- <a class="thumbnail"
- ng-repeat="img in sample_img.trimmed[0]| imgFilter: show_img" href="#"
- ng-click="selected.caption=img.name; selected.title='Trimmed ' + sample.setname + ' R1'; selected.img=img.file"
- data-image-id="" data-toggle="modal"
- data-title="Trimmed {{sample.setname}} R1"
- data-caption="{{img.name}}" data-image="{{img.file}}"
- data-target="#image-gallery">
- <img width="200px" height="150px" ng-src="{{img.file}}"
- style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
- </a>
- </div>
- <p class="inbox_text" ng-if="sample_img.raw.length ==2">R2</p>
- <div style="display:inline-block">
- <a class="thumbnail"
- ng-repeat="img in sample_img.trimmed[1]| imgFilter: show_img" href="#"
- ng-click="selected.caption=img.name; selected.title='Trimmed ' + sample.setname + ' R2'; selected.img=img.file"
- data-image-id="" data-toggle="modal"
- data-title="Trimmed {{sample.setname}} R2"
- data-caption="{{img.name}}" data-image="{{img.file}}"
- data-target="#image-gallery">
- <img width="200px" height="150px" ng-src="{{img.file}}"
- style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
- </a>
- </div>
- </div>
- </div>
- </div>
- </div>
- </div>
- <!-- END FastQC -->
-
- <!-- Kraken Report -->
- <div class="tab-pane fade " id="kraken_report">
-
- Show Kraken report of : <select id="select_kraken" ng-model="show_kraken">
- <option value="" disabled selected hidden>Choose sample...</option>
- <option ng-repeat="(name, value) in kraken" value="{{name}}">{{name}}</option>
- </select>
- <table id="kraken"></table>
- </div>
- <!-- END Kraken Report -->
-
-
- <div class="tab-pane fade" id="information">
- <h4>Commandline</h4>
-
- <table class="table table-striped">
- <tr ng-repeat="(attr, value) in information.commandline" ng-if="value !=''">
- <td>{{attr}}</td>
- <td>{{value}}</td>
- </tr>
- </table>
- <h4>Version Control</h4>
-
- <table class="table table-striped">
- <tr ng-repeat="(attr, value) in information.versions" ng-if="value !=''">
- <td>{{attr}}</td>
- <td>{{value}}</td>
- </tr>
- </table>
- </div>
- <!-- END tab content-->
- </div>
-
- </div>
- </div>
- <!-- Modal -->
- <div class="modal fade" id="image-gallery" tabindex="-1" role="dialog" aria-labelledby="myModalLabel"
- aria-hidden="true">
- <div class="modal-dialog modal-lg">
- <div class="modal-content">
- <div class="modal-header">
- <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span
- aria-hidden="true">×</span></button>
- <div class="col-md-8 text-justify" id="image-gallery-title" > <h4>{{selected.title}}</h4>
- </div>
-
- </div>
- <div class="modal-body modal-lg">
- <img id="image-gallery-image" style="height:100%;width:100%;" class="img-responsive" src="{{selected.img}}">
- </div>
- <div class="modal-footer">
- <div class="col-md-12 text-justify" id="image-gallery-caption">{{selected.caption}}
- </div>
- </div>
- </div>
- </div>
- <!-- END Modal -->
- </div>
- <!-- END div row-->
- <!-- Trigger the modal with a button -->
-
-
- </div>
- <!-- END Summary Table Controller-->
-
-</div>
-
-</div>
-
-
-</body>
+<!doctype html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <title>Batch Report</title>
+ <script src="lib/jquery.min.js"></script>
+ <script src="lib/angular.min.js"></script>
+ <script src="lib/d3.v3.min.js"></script>
+ <!-- Latest compiled and minified CSS -->
+ <link rel="stylesheet" href="lib/bootstrap.min.css">
+ <!-- Optional theme -->
+ <link rel="stylesheet" href="lib/bootstrap-theme.min.css">
+ <!-- Latest compiled and minified JavaScript -->
+ <script src="lib/bootstrap.min.js"></script>
+ <style>
+ .row_hidden {
+ display: none;
+ }
+ .panel {
+ background-color:#F7F7F7;
+
+ }
+ .panel-heading{
+ cursor:pointer;
+ }
+ .show_row {
+ display: show;
+ }
+
+ p.inbox_text {
+ text-align: center;
+ color: grey;
+ margin: 0 0 0px;
+ }
+
+ .thumbnail {
+ margin-bottom: 5px;
+ border: 0px solid #000;
+ height: 150px;
+ width: 200px;
+ }
+
+ .sample_box {
+ display: table-cell;
+ margin: 10px;
+ padding: 10px;
+ color: lightgrey;
+ border-radius: 5px;
+ box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
+ }
+
+ .sidebar {
+
+ overflow-y: auto;
+ position: fixed;
+ height: 100%;
+ top: 50px;
+ z-index: 999;
+ padding-right: 0px;
+ box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
+ background: #f7f7f7;
+ }
+ .navbar-default{
+ background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
+ }
+ .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
+ background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
+
+ }
+ .navbar-default .navbar-brand {
+ color: white;
+ }
+ h4{
+ color: #0A6780;
+ }
+ h5{
+ color: #0D7793;
+ }
+ .node {
+ cursor: pointer;
+ }
+
+ .node circle {
+ fill: #fff;
+ stroke: steelblue;
+ stroke-width: 1.5px;
+ }
+
+ .node text {
+ font: 10px sans-serif;
+ }
+
+ .link {
+ fill: none;
+ stroke: #ccc;
+ stroke-width: 1.5px;
+ }
+ .table{
+ margin-bottom: auto;}
+
+ </style>
+ <script>
+ //$(window).load(function () {
+ // $("#main").css({left: $('#sidebar').width()});
+ //});
+
+
+ //Start d3 tree
+ function d3_kraken(root, id) {
+ var margin = {top: 20, right: 120, bottom: 20, left: 120},
+ width = $(document).width(),//1600 - margin.right - margin.left,
+ height = $(document).height();
+
+ var i = 0,
+ duration = 750//,root
+ ;
+
+ var tree = d3.layout.tree()
+ .size([height, width]);
+
+ var diagonal = d3.svg.diagonal()
+ .projection(function (d) {
+ return [d.y, d.x];
+ });
+
+
+ var svg = d3.select("#kraken").append("tr")
+ .attr("id", "kraken_" + id)
+ .attr("class", "row_hidden")
+ .append("svg")
+ .attr("width", width + margin.right + margin.left)
+ .attr("height", height + margin.top + margin.bottom)
+ .append("g")
+ .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
+
+ root.x0 = height / 2;
+ root.y0 = 0;
+
+ function collapse(d) {
+ if (d.children) {
+ d._children = d.children;
+ d._children.forEach(collapse);
+ d.children = null;
+ }
+ }
+
+ function uncollapse(d) {
+ if (d._children) {
+ d.children = d._children;
+ d._children.forEach(uncollapse);
+ d._children = null;
+ }
+
+ }
+
+ root.children.forEach(collapse);
+ update(root);
+
+
+ d3.select(self.frameElement).style("height", "800px");
+
+ function update(source) {
+ var levelWidth = [1];
+ var childCount = function (level, n) {
+
+ if (n.children && n.children.length > 0) {
+ if (levelWidth.length <= level + 1) levelWidth.push(0);
+
+ levelWidth[level + 1] += n.children.length;
+ n.children.forEach(function (d) {
+ childCount(level + 1, d);
+ });
+ }
+ };
+ childCount(0, root);
+
+
+ var newHeight = d3.max(levelWidth) * 100; // 25 pixels per line
+ tree = tree.size([newHeight, width]);
+ // Compute the new tree layout.
+ var nodes = tree.nodes(root).reverse(),
+ links = tree.links(nodes);
+
+ // Normalize for fixed-depth.
+ nodes.forEach(function (d) {
+ d.y = d.depth * 180;
+ });
+
+ // Update the nodes…
+ var node = svg.selectAll("g.node")
+ .data(nodes, function (d) {
+ return d.id || (d.id = ++i);
+ });
+
+ // Enter any new nodes at the parent's previous position.
+ var nodeEnter = node.enter().append("g")
+ .attr("class", "node")
+ .attr("transform", function (d) {
+ return "translate(" + source.y0 + "," + source.x0 + ")";
+ })
+ .on("click", click)
+ .attr("data-toggle", "tooltip")
+ .attr("title", function (d) {
+ return d.name + "\n" + d.perc + "%";
+ })
+ ;
+
+ nodeEnter.append("circle")
+ .attr("r", function (d) {
+ if (d.name != "unclassified") return d.perc + 1e-06; else 0.0;
+ })
+ .style("fill", function (d) {
+ return d._children ? "lightsteelblue" : "#fff";
+ })
+
+
+ nodeEnter.append("text")
+ .attr("x", function (d) {
+ return d.children || d._children ? -10 : 10;
+ })
+ .attr("dy", ".35em")
+ .attr("text-anchor", function (d) {
+ return d.children || d._children ? "end" : "start";
+ })
+ .text(function (d) {
+ return d.name + ", " + d.perc;
+ })
+ .style("fill-opacity", 1e-6);
+
+ // Transition nodes to their new position.
+ var nodeUpdate = node.transition()
+ .duration(duration)
+ .attr("transform", function (d) {
+ return "translate(" + d.y + "," + d.x + ")";
+ });
+
+ nodeUpdate.select("circle")
+ .attr("r", function (d) {
+ if (d.name != "unclassified") return d.perc + 4.5; else return 0.0;
+ })
+ .style("fill", function (d) {
+ return d._children ? "lightsteelblue" : "#fff";
+ });
+
+ nodeUpdate.select("text")
+ .style("fill-opacity", 1);
+
+ // Transition exiting nodes to the parent's new position.
+ var nodeExit = node.exit().transition()
+ .duration(duration)
+ .attr("transform", function (d) {
+ return "translate(" + source.y + "," + source.x + ")";
+ })
+ .remove();
+
+ nodeExit.select("circle")
+ .attr("r", 1e-6);
+
+ nodeExit.select("text")
+ .style("fill-opacity", 1e-6);
+
+ // Update the links…
+ var link = svg.selectAll("path.link")
+ .data(links, function (d) {
+ return d.target.id;
+ });
+
+ // Enter any new links at the parent's previous position.
+ link.enter().insert("path", "g")
+ .attr("class", "link")
+ .attr("d", function (d) {
+ var o = {x: source.x0, y: source.y0};
+ return diagonal({source: o, target: o});
+ });
+
+ // Transition links to their new position.
+ link.transition()
+ .duration(duration)
+ .attr("d", diagonal);
+
+ // Transition exiting nodes to the parent's new position.
+ link.exit().transition()
+ .duration(duration)
+ .attr("d", function (d) {
+ var o = {x: source.x, y: source.y};
+ return diagonal({source: o, target: o});
+ })
+ .remove();
+
+ // Stash the old positions for transition.
+ nodes.forEach(function (d) {
+ d.x0 = d.x;
+ d.y0 = d.y;
+ });
+ }
+
+// Toggle children on click.
+ function click(d) {
+ if (d.children) {
+ d._children = d.children;
+ d.children = null;
+ } else {
+ d.children = d._children;
+ d._children = null;
+ if (d.name == "root") {
+ d.children.forEach(uncollapse);
+ }
+ }
+ update(d);
+ }
+ }
+ ;
+ // end d3 tree
+ var myApp = angular.module('BatchReport', []);
+ myApp.controller('SummaryTable',function ($scope, $http) {
+ // create a projects Object
+ $scope.projects = {};
+ // pretend data we just got back from the server
+ // this is an ARRAY of OBJECTS
+ $scope.information = {};
+ //$scope.information.commandline= {"minlen": 50, "index": null, "blobplot": false, "output": "", "nomapping": true, "r1": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L001_R1_001.fastq", "/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L002_R1_001.fastq"], "reference": "", "threads": 4, "forMapping": false, "palindrome": 30, "technology": "Illumina", "adapter": "NexteraPE-PE", "r2": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/sub [...]
+
+ $scope.show_summary = {};
+ $scope.show_img = {};
+ $scope.show_raw_data = true;
+
+ $scope.sequencer_information = [];
+ $scope.show_kraken = "";
+ $scope.kraken = {};
+
+ $http.get('src/summary.json')
+ .success(function(res){
+ $scope.information.commandline= res.commandline;
+ $scope.information.versions= res.versions;
+ $scope.projects.samples = res.summary;
+ $scope.kraken = res.kraken;
+ $.each($scope.kraken, function (key, value) {
+ d3_kraken(value, key)
+ });
+ });
+
+
+ $http.get('src/sav.json')
+ .success(function(res){
+ $scope.sequencer_information = res;
+ });
+
+
+ $.each($scope.kraken, function (key, value) {
+
+ d3_kraken(value, key)
+ });
+ $scope.$watch("show_kraken", function (newVal, oldVal) {
+ d3.select("#kraken_" + newVal).attr("class", "show_row");
+ d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
+
+ });
+ $scope.collapse = true;
+ $scope.isArray = angular.isArray;
+ $scope.isString = angular.isString;
+ $scope.$watch("collapse", function (newVal, oldVal) {
+ d3.select("#kraken_" + newVal).attr("class", "show_row");
+ d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
+
+ });
+ $scope.selected = {title:"", img:"", caption:""};
+
+ }
+
+ )
+
+ ;
+
+ myApp.filter('imgFilter', function () {
+ return function (input, filter) {
+ var result = [];
+ angular.forEach(input, function (input) {
+ angular.forEach(filter, function (isfiltered, name) {
+ if (isfiltered && name === input.name) {
+ result.push(input);
+ }
+ });
+ });
+ return result;
+ };
+ });
+ myApp.filter('typeFilter', function () {
+ return function (input, filter) {
+ var result = [];
+ angular.forEach(input, function (input) {
+ angular.forEach(filter, function (isfiltered, setname) {
+
+ if (isfiltered && setname === input.setname) {
+ result.push(input);
+
+ }
+ });
+ });
+ return result;
+ };
+ });
+
+
+ </script>
+
+</head>
+<body>
+<div ng-app="BatchReport">
+
+ <!-- head Quality Control-->
+ <nav class="navbar navbar-default navbar-fixed-top" >
+ <div class="container-fluid">
+ <div class="navbar-header">
+ <a class="navbar-brand" href="#">Quality Control</a>
+ </div>
+ <ul class="nav navbar-nav">
+
+ <li><a href = "#sequencer_information" style="color: aliceblue;" data-toggle="tab">Sequencer Information</a></li>
+ <li class="active" ><a href="#summary" style="color: aliceblue;" data-toggle="tab">Summary Table</a></li>
+ <li> <a href="#fastqc" style="color: aliceblue;" data-toggle="tab">FastQC</a></li>
+ <li> <a href="#kraken_report" style="color: aliceblue;" data-toggle="tab">Kraken</a></li>
+ <li> <a href="#information" style="color: aliceblue;" data-toggle="tab">Information</a></li>
+ </ul>
+ </div>
+
+
+ </nav>
+ <!-- END head Quality Control-->
+
+ <!-- Summary Table Controller-->
+ <div ng-controller="SummaryTable">
+ <div class="row" style="margin-top: 50px">
+
+
+ <div class="col-md-12" id="main">
+
+
+ <!-- TAB-CONTENT-->
+ <div class="tab-content">
+ <div class="tab-pane fade" id="sequencer_information">
+ <!--Sidebar-->
+ <div class="col-md-2 sidebar" >
+ </br>
+ <ul class="nav">
+ <li><a href = "#sequencer_information_table"><h5>Sequencer Information</h5></a></li>
+ <li>
+ <a href = "#sequencer_information_plots" ><h5>Sequencer Plots</h5></a></li>
+
+ </ul>
+ </div>
+ <!-- END Sidebar-->
+
+
+ <div class="col-md-10" style = "padding-left:17%;">
+ <h4 id= "sequencer_information_table">Sequencer Information</h4>
+ <table class="table table-striped">
+ <tr ng-repeat="(attr, value) in sequencer_information.tables">
+ <td style="max-width:80px;">{{attr}}</td>
+ <td ng-if = "isArray(value)==false && isString(value)==true">{{value}}</td>
+ <td ng-if = "isArray(value)==true " >
+ <table class="table table-sm" style="background-color:inherit; width : auto;" >
+ <tr>
+ <td ng-repeat = "(headline, value) in value[0]">{{headline}}</td>
+ </tr>
+ <tr ng-repeat="nested in value"><td ng-repeat = "(headline, value) in nested">{{value}}</td></tr>
+ </table></td>
+ <td ng-if = "isArray(value)==false && isString(value)==false" >
+ <table class ="table table-sm" style="background-color:inherit;width:auto;">
+ <tr ng-repeat = "(headline, value) in value">
+ <td >{{headline}}</td>
+ <td ng-if ="isArray(value)==false"> {{value}}</td>
+ <td ng-repeat = "val in value" ng-if ="isArray(value)==true"> {{val}}</td>
+ </tr>
+
+ </table></td>
+
+ </tr>
+ </table>
+
+ <h4 id= "sequencer_information_plots">Sequencer Plots</h4>
+ <a ng-click="selected.title='Data by Cycle - Intensity'; selected.caption='This plot shows the intensity by color of the 90% percentile of the data for each cycle.'; selected.img='src/img/intensity.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/intensity.png">
+ </a>
+ <a ng-click="selected.title='Data by Cycle - FWHM'; selected.caption='The average full width of clusters at half maximum (in pixels).'; selected.img='src/img/FWHM.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/FWHM.png">
+ </a>
+ <a ng-click="selected.title='Data by Cycle - % Base'; selected.caption='The percentage of clusters for which the selected base has been called.'; selected.img='src/img/base_perc.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/base_perc.png">
+ </a>
+ <a ng-click="selected.title='Data by Lane - Phasing'; selected.caption='Thepercentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.'; selected.img='src/img/phasing.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/phasing.png">
+ </a>
+ <a ng-click="selected.title='Data by Lane - Prephasing'; selected.caption='The percentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.';
+ selected.img='src/img/prephasing.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/prephasing.png">
+ </a>
+
+
+ <a ng-click="selected.caption='The density of clusters for each tile (in thousand per mm2)'; selected.title='Data by Lane - Density'; selected.img='src/img/density.png'"
+ data-image-id="" data-toggle="modal" data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/density.png">
+ </a>
+ <a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score distribution shows the number of reads by quality score. The quality score os cumulative for current cycle and previous cycles, and only
+ reads that pass the quality filter are included. The Q-score is based on the Phred scale.'; selected.img='src/img/qscore_distr.png'"
+ data-image-id="" data-toggle="modal" data-target="#image-gallery">
+ <img width="400px" height="300px" ng-src="src/img/qscore_distr.png">
+ </a>
+ <a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score heat map shows the Q-score by cycle for all lanes.'; selected.img='src/img/qscore_heatmap.png'"
+ data-image-id="" data-toggle="modal" data-target="#image-gallery">
+ <img width="500px" height="300px" ng-src="src/img/qscore_heatmap.png">
+ </a>
+
+
+ </div>
+
+
+ </div>
+ <!-- END Sequencer Information content -->
+ <div class="tab-pane fade in active" id="summary">
+ <!-- Summary Table -->
+ <h4>Summary</h4>
+ <table class="table table-striped">
+ <thead style="font-weight:600;color:#0D7793">
+ <tr>
+ <td ng-repeat="(headline,bla) in projects.samples[0]" ng-if="headline!='images'">
+ {{headline}}
+ </td>
+ </tr>
+ </thead>
+ <tr ng-repeat="sample in projects.samples | orderBy:'setname'">
+ <td ng-repeat="(key,value) in sample " ng-if="key!='images'">
+ <input type="checkbox" ng-model="show_summary[value]" ng-if="key=='setname'"
+ ng-init="show_summary[value]=true"/> {{value}}
+ </td>
+
+ </tr>
+ </table>
+ <!-- END Summary Table -->
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Per sequence quality scores'; selected.caption=''; selected.img='src/img/Per_sequence_quality_scores.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/Per_sequence_quality_scores.png">
+ </a>
+ </div>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Per sequence GC content'; selected.caption=''; selected.img='src/img/Per_sequence_GC_content.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/Per_sequence_GC_content.png">
+ </a>
+ </div>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Sequence length distribution'; selected.caption=''; selected.img='src/img/Sequence_Length_Distribution.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/Sequence_Length_Distribution.png">
+ </a>
+ </div>
+ </br>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Number of reads remained after trimming'; selected.caption=''; selected.img='src/img/trimming.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/trimming.png">
+ </a>
+ </div>
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Number of reads mapped to reference'; selected.caption=''; selected.img='src/img/mapping.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/mapping.png">
+ </a>
+ </div>
+
+ <div class="col-md-4">
+
+ <a ng-click="selected.title='Percentage of reads classified by Kraken'; selected.caption=''; selected.img='src/img/kraken.png'"
+ data-image-id="" data-toggle="modal"
+ data-target="#image-gallery">
+ <img height="500px" ng-src="src/img/kraken.png">
+ </a>
+ </div>
+
+ </div>
+
+
+ <div class="tab-pane fade" id="fastqc">
+ <div style="width: 100%; " class="row">
+ <!--Sidebar-->
+ <div class="col-md-2 sidebar" >
+ <h4>Filter by ...</h4>
+ <br/>
+ <div class="panel panel-default" >
+ <div class="panel-heading">
+ <h5 class="panel-title" data-toggle="collapse" data-target="#show_sample">... sample</h5>
+
+ </div>
+ <div id = "show_sample" >
+ <p ng-repeat="(sample, value) in show_summary">
+ <input type="checkbox" ng-model="show_summary[sample]" ng-init="show_summary[sample] = value" /> {{sample}}
+ </p>
+ </div>
+</div>
+ </tr>
+ <div class="panel panel-default" >
+ <div class="panel-heading">
+ <h5 class="panel-title" data-toggle="collapse" data-target="#show_raw">... dataset</h5>
+
+ </div>
+ <div id = "show_raw" class = "collapse" >
+ <p>
+ <input type="checkbox" ng-model="show_raw_data"/> show raw data
+ </p>
+ </div>
+</div>
+ <!--Filter by image name-->
+
+ <div class="panel panel-default" >
+ <div class="panel-heading">
+ <h5 class="panel-title" data-toggle="collapse" data-target="#show_fastqc">... FastQC Images</h5>
+ </div>
+ <div class = "collapse" id="show_fastqc">
+ <p ng-repeat="(type,bla) in projects.samples[0].images[0].raw[0]">
+ <input type="checkbox" ng-model="show_img[bla.name]" ng-init="show_img[bla.name]=true"/>
+ {{bla.name}}
+ </p>
+</div></div>
+ <!-- END Filter by image name-->
+ <!--Filter by trimmed /untrimmed-->
+
+ <!-- END Filter by trimmed /untrimmed-->
+
+ </div>
+ <!-- END Sidebar-->
+ <div style="display: table-row;padding-left:17%;" class="col-md-10">
+
+ <div class="sample_box"
+ ng-repeat="sample in projects.samples | orderBy:'setname'| typeFilter: show_summary ">
+ <p class="inbox_text">{{sample.setname}}</p>
+ <div ng-repeat="sample_img in sample.images " style="display:table-cell;"
+ ng-if="show_raw_data">
+ <p class="inbox_text">Raw data</p>
+ <p class="inbox_text" ng-if="sample_img.raw.length==2">R1</p>
+ <div style="display:inline-block">
+ <a class="thumbnail"
+ ng-repeat="img in sample_img.raw[0]| imgFilter: show_img" href="#"
+ ng-click="selected.caption=img.name; selected.title='Raw '+sample.setname +' R1'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Raw {{sample.setname}} R1"
+ data-caption="{{img.name}}" data-image="{{img.file}}"
+ data-target="#image-gallery">
+ <img width="200px" height="150px" ng-src="{{img.file}}"
+ style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
+ </a>
+ </div>
+ <p class="inbox_text" ng-if="sample_img.raw.length==2">R2</p>
+ <div style="display:inline-block">
+ <a class="thumbnail"
+ ng-repeat="img in sample_img.raw[1]| imgFilter: show_img" href="#"
+ ng-click="selected.caption=img.name; selected.title='Raw ' + sample.setname + ' R2'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Raw {{sample.setname}} R2"
+ data-caption="{{img.name}}" data-image="{{img.file}}"
+ data-target="#image-gallery">
+ <img width="200px" height="150px" ng-src="{{img.file}}"
+ style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
+ </a></div>
+ </div>
+ <div ng-repeat="sample_img in sample.images " style="display:table-cell;">
+ <p class="inbox_text">Trimmed data</p>
+ <p class="inbox_text" ng-if="sample_img.raw.length==2">R1</p>
+ <div style="display:inline-block">
+ <a class="thumbnail"
+ ng-repeat="img in sample_img.trimmed[0]| imgFilter: show_img" href="#"
+ ng-click="selected.caption=img.name; selected.title='Trimmed ' + sample.setname + ' R1'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Trimmed {{sample.setname}} R1"
+ data-caption="{{img.name}}" data-image="{{img.file}}"
+ data-target="#image-gallery">
+ <img width="200px" height="150px" ng-src="{{img.file}}"
+ style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
+ </a>
+ </div>
+ <p class="inbox_text" ng-if="sample_img.raw.length ==2">R2</p>
+ <div style="display:inline-block">
+ <a class="thumbnail"
+ ng-repeat="img in sample_img.trimmed[1]| imgFilter: show_img" href="#"
+ ng-click="selected.caption=img.name; selected.title='Trimmed ' + sample.setname + ' R2'; selected.img=img.file"
+ data-image-id="" data-toggle="modal"
+ data-title="Trimmed {{sample.setname}} R2"
+ data-caption="{{img.name}}" data-image="{{img.file}}"
+ data-target="#image-gallery">
+ <img width="200px" height="150px" ng-src="{{img.file}}"
+ style="border-radius:3px;border:solid 2px {{img.color}};display: block;">
+ </a>
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+ <!-- END FastQC -->
+
+ <!-- Kraken Report -->
+ <div class="tab-pane fade " id="kraken_report">
+
+ Show Kraken report of : <select id="select_kraken" ng-model="show_kraken">
+ <option value="" disabled selected hidden>Choose sample...</option>
+ <option ng-repeat="(name, value) in kraken" value="{{name}}">{{name}}</option>
+ </select>
+ <table id="kraken"></table>
+ </div>
+ <!-- END Kraken Report -->
+
+
+ <div class="tab-pane fade" id="information">
+ <h4>Commandline</h4>
+
+ <table class="table table-striped">
+ <tr ng-repeat="(attr, value) in information.commandline" ng-if="value !=''">
+ <td>{{attr}}</td>
+ <td>{{value}}</td>
+ </tr>
+ </table>
+ <h4>Version Control</h4>
+
+ <table class="table table-striped">
+ <tr ng-repeat="(attr, value) in information.versions" ng-if="value !=''">
+ <td>{{attr}}</td>
+ <td>{{value}}</td>
+ </tr>
+ </table>
+ </div>
+ <!-- END tab content-->
+ </div>
+
+ </div>
+ </div>
+ <!-- Modal -->
+ <div class="modal fade" id="image-gallery" tabindex="-1" role="dialog" aria-labelledby="myModalLabel"
+ aria-hidden="true">
+ <div class="modal-dialog modal-lg">
+ <div class="modal-content">
+ <div class="modal-header">
+ <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span
+ aria-hidden="true">×</span></button>
+ <div class="col-md-8 text-justify" id="image-gallery-title" > <h4>{{selected.title}}</h4>
+ </div>
+
+ </div>
+ <div class="modal-body modal-lg">
+ <img id="image-gallery-image" style="height:100%;width:100%;" class="img-responsive" src="{{selected.img}}">
+ </div>
+ <div class="modal-footer">
+ <div class="col-md-12 text-justify" id="image-gallery-caption">{{selected.caption}}
+ </div>
+ </div>
+ </div>
+ </div>
+ <!-- END Modal -->
+ </div>
+ <!-- END div row-->
+ <!-- Trigger the modal with a button -->
+
+
+ </div>
+ <!-- END Summary Table Controller-->
+
+</div>
+
+</div>
+
+
+</body>
</html>
\ No newline at end of file
diff --git a/boxplot.R b/boxplot.R
index c347713..40ff94a 100755
--- a/boxplot.R
+++ b/boxplot.R
@@ -1,23 +1,27 @@
-#!/usr/bin/env Rscript
-library(ggplot2)
-args = commandArgs(trailingOnly=TRUE)
-mytable<- read.csv(args[1])
-mytable<- mytable[which(mytable$Count>0),]
-if(!any(is.na(mytable$Read))){
- ggplot(mytable, aes(fill=Read,group=interaction(Sample, Trimmed, Read,Lane),x=interaction(Sample, Lane), y=Value, weight=Count)) +
- geom_boxplot(outlier.size = 0.5) +theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
- facet_wrap(~ Trimmed)+
- scale_fill_manual(values=c("#ef8a62","#67a9cf","#D55E00", "#999999", "#56B4E9", "#F0E442", "#0072B2", "#CC79A7")) +
- ggtitle(args[3]) +
- xlab(args[4])+
- ylab(args[5])
-}else{
- ggplot(mytable, aes(fill=Sample,group=Sample,x=Sample, y=Value, weight=Count)) +
- geom_boxplot(outlier.size = 0.5) +theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5),legend.position="none") +
- facet_wrap(~ Trimmed)+
- scale_fill_manual(values=c("#ef8a62","#67a9cf","#D55E00", "#999999", "#56B4E9", "#F0E442", "#0072B2", "#CC79A7")) +
- ggtitle(args[3]) +
- xlab(args[4])+
- ylab(args[5])
-}
-ggsave(args[2])
\ No newline at end of file
+#!/usr/bin/env Rscript
+options(warn=-1)
+library(ggplot2)
+args = commandArgs(trailingOnly=TRUE)
+mytable<- read.csv(args[1])
+mytable<- mytable[which(mytable$Count>0),]
+
+
+if(!any(is.na(mytable$Read))){
+ gp<- ggplot(mytable, aes(fill=Read,group=interaction(Sample, Trimmed, Read,Lane),x=interaction(Sample, Lane), y=Value, weight=Count))
+}else{
+ gp<- ggplot(mytable, aes(fill=Sample,group=Sample,x=Sample, y=Value, weight=Count))
+}
+
+gp<- gp+ geom_boxplot(outlier.size = 0.5) +theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5),legend.position="none") +
+ scale_fill_manual(values=c("#ef8a62","#67a9cf","#D55E00", "#999999", "#56B4E9", "#F0E442", "#0072B2", "#CC79A7")) +
+ ggtitle(args[3]) +
+ xlab(args[4])+
+ ylab(args[5])
+if(length(unique(mytable$Sample))>15){
+ gp<-gp + facet_wrap(~ Trimmed, ncol=1)
+ height=20
+}else{
+ gp<-gp + facet_wrap(~ Trimmed)
+ height=10
+}
+ggsave(args[2],plot=gp, unit="cm")
\ No newline at end of file
diff --git a/classes.py b/classes.py
index f8fb79c..bbfb42f 100755
--- a/classes.py
+++ b/classes.py
@@ -1,625 +1,631 @@
-import QCumber
-import datetime
-import numpy as np
-import getpass
-import csv
-from helper import *
-import re
-from os.path import basename, splitext, join, exists, dirname, isdir
-from os import uname, listdir, remove
-import configparser
-import shutil
-from collections import OrderedDict
-import base64
-
-default_parameter = configparser.ConfigParser()
-default_parameter.read(join(dirname(__file__), "parameter.txt"))
-
-def get_tool_path(name, section="PATH"):
- config = configparser.ConfigParser()
- config.read(join(dirname(__file__), "config.txt"))
- path_dict = config[section]
- return path_dict[name]
-
-class Pipeline:
- '''Get all tool versions '''
- def __init__(self):
-
- self.name = "Quality Control"
- self.version = QCumber.__version__
- self.user = getpass.getuser()
- self.python_version = toLatex(sys.version)
- self.fastqc_version = toLatex(subprocess.check_output(join(get_tool_path("fastqc"), "fastqc") + " --version", shell = True, stderr=subprocess.PIPE).decode("utf-8"))
-
- try:
- self.trimmo_version = subprocess.check_output(join(get_tool_path("trimmomatic"), "TrimmomaticSE") + " -version", shell=True, stderr=subprocess.PIPE).decode("utf-8")
- except:
- try:
- from debpackageinfo import get_upstream_version
- self.trimmo_version = get_upstream_version("trimmomatic")
- except:
- self.trimmo_version = "< 0.32"
- bowtie_v = subprocess.check_output(join(get_tool_path("bowtie2"), "bowtie2") + " --version", shell=True).decode("utf-8")
- self.bowtie_version = re.match(".*\)", toLatex(bowtie_v, True)).group()
- self.kraken_version =toLatex(subprocess.check_output( join(get_tool_path('kraken'), "kraken") + " --version", shell = True).decode("utf-8"))
-
- system = uname()
- self.system = toLatex(system.sysname)
- self.server = toLatex(system.nodename)
- self.release = toLatex(system.release)
- self.operating_version = toLatex(system.version)
- self.machine = toLatex(system.machine)
-
-
-class Sample(object):
- """Sample with reference to ReadSets, MappingRes, KrakenResr"""
-
- def __init__(self, name, path, reference, threads, techn):
- global mainResultsPath
- global num_threads
- global technology
- num_threads = threads
- mainResultsPath = path
- technology = techn
-
- self.start = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
- self.stop = None
-
- self.name = name
- self.technology = techn
- self.threads = threads
- self.phred = "phred33"
- self.readSets = []
-
- self.mainResultsPath = path
- self.mappingRes = None
- self.krakenRes = None
- self.reference = reference
- def get_all_qc_plots(self):
- qc_dict = OrderedDict([("setname", self.name),("raw",[]), ("trimmed",[])])
- for rs in self.readSets:
- qc_dict["raw"].append(rs.r1.qcRes.get_plots())
- qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.get_plots())
- if (rs.r2 is not None):
- qc_dict["raw"].append(rs.r2.qcRes.get_plots())
- qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.get_plots())
- return qc_dict
-
- def total_reads(self):
- total = 0
- for rs in self.readSets:
- if rs.trimRes:
- total += rs.trimRes.total
- return total
-
- def pTrimmed(self):
- perc = []
- for rs in self.readSets:
- if rs.trimRes:
- perc.append(rs.trimRes.pTrimmed)
- if len(perc) == 0 :
- return 0
- return round(np.mean(perc), 2)
- def nTrimmed(self):
- total = 0
- for rs in self.readSets:
- if rs.trimRes:
- total += rs.trimRes.nTrimmed
- return total
-
- def add_readSet(self, readSet):
- self.readSets.append(readSet)
- return self
-
- def run_Kraken(self, db):
- r1, r2 = self.get_all_reads()
- self.krakenRes = KrakenResult(r1,r2,db, self.name)
- return self
-
- def run_Bowtie2(self, bowtie_index, save_mapping ):
- r1, r2 = self.get_all_reads()
- self.mappingRes = MappingResult(self.reference, r1, r2, bowtie_index)
- self.mappingRes = self.mappingRes.run_bowtie(self.threads, save_mapping)
- return self.mappingRes
-
- def get_all_reads(self, trimmed = True):
- if trimmed:
- r1 = " ".join([x.trimRes.readset.r1.filename for x in self.readSets])#join_reads([x.trimRes.readset.r1.filename for x in self.readSets], self.mainResultsPath, self.name + "_R1")
- r2 = " ".join([x.trimRes.readset.r2.filename for x in self.readSets if x.trimRes.readset.r2 is not None])#join_reads([x.trimRes.readset.r2.filename for x in self.readSets if x.trimRes.readset.r2 is not None], self.mainResultsPath, self.name + "_R2")
- else:
- r1 = " ".join([x.r1.filename for x in self.readSets])#join_reads([x.r1.filename for x in self.readSets], self.mainResultsPath, self.name + "_R1")
- r2 = " ".join([x.r2.filename for x in self.readSets if x.r2 is not None]) #join_reads([x.r2.filename for x in self.readSets if x.r2 is not None], self.mainResultsPath, self.name + "_R2") #readnames = {'r1' : r1}
- return r1,r2
-
- def get_mean_trimRes(self):
- return round(np.mean([x.trimRes.shorter_fragments_percentage for x in self.readSets]),2)
-
-class ReadSet:
- ''' Readset contains of r1 and r2'''
- def __init__(self, r1 ,r2=None):
-
- self.r2 = None
- if type(r1) is str:
- self.r1 = FastQFile(r1) # FastQFile
- if r2 is not None:
- self.r2 = FastQFile(r2) # FastQFile
- else:
- self.r1 = r1
- if r2 is not None:
- self.r2 = r2
- self.numberOfReads = 0
- self.numberofTrimmedReads = 0
- self.trimRes = None
- #self.outputDir = outputDir
- self.paired = True
- if len( basename(self.r1.filename).split("_")[:-3]) > 4:
- self.setname = "_".join(basename(self.r1.filename).split("_")[:-3])
- else:
- self.setname = basename(self.r1.filename)
- if(r2 is None):
- self.paired = False
-
- def run_FastQC(self, output):
- self.r1 = self.r1.run_FastQC(output)
- if(self.r2 is not None):
- self.r2 = self.r2.run_FastQC(output)
- return self
-
- def run_Trimmomatic(self, adapter, palindrome, minlen, trimOption, betterTrimming):
- self.trimRes = TrimResult(self, adapter, palindrome, minlen, trimOption, betterTrimming )
- return self
-
- def get_all_qc(self):
- qc_results = [ (self.r1.qcRes, self.trimRes.readset.r1.qcRes) ]
- if (self.r2 is not None):
- qc_results.append((self.r2.qcRes, self.trimRes.readset.r2.qcRes) )
- return qc_results
-
- def get_all_qc_dict(self):
- '''qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
- qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_to_base64()]]
- qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_to_base64()]]
- if (self.r2 is not None):
- qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_to_base64()])
- qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_to_base64()])
- return qc_dict'''
- qc_dict = OrderedDict({"raw": [], "trimmed": []})
- qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_for_report()]]
- qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_for_report()]]
- if (self.r2 is not None):
- qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_for_report()])
- qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_for_report()])
- return qc_dict
-
-
-class FastQFile:
- def __init__(self, absFilename, concat_files = None):
- self.filename = absFilename
- self.qcRes = None
- self.log = ""
- self.phred="phred33"
- self.concat_files = None
-
- def run_FastQC(self, outputDir):
- process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), "--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
- for line in iter(process.stderr.readline, b''):
- line = line.decode("utf-8")
- line = line.replace("\n", "")
- print(line)
- for line in iter(process.stdout.readline, b''):
- line = line.decode("utf-8")
- if line.startswith("Approx"):
- print(line, end="\r")
- else:
- print(line, end="\r")
- pattern = re.match("Quality encoding detected as (?P<phred>\w+\d{2})", line)
- if pattern:
- self.phred = pattern.group("phred")
- print("")
- process.communicate()
- self.qcRes = QCResult(join(outputDir, getFilenameWithoutExtension(basename(self.filename)) + "_fastqc"), join(mainResultsPath, "Report", "src"))
- return self
- def get_filename(self):
- return toLatex(basename(self.filename))
-###########
-## Result classes
-###########
-
-class QCResult:
- def __init__(self, resultsPath, summary_output):
- self.path = resultsPath
- self.results = self.get_results()
- #write summary csv for boxplots
- write_fastqc_summary(resultsPath, summary_output)
-
- def get_results(self):
- summary_list = []
- no_plot=["Basic Statistics", "Overrepresented sequences"]
- with open(join(self.path, "summary.txt"), "r") as summary:
- reader = csv.reader(summary, delimiter = "\t")
- for row in reader:
- if (row[1] not in no_plot):
- try:
- plot = Plot(row[0], row[1], self.path)
- if exists(plot.file):
- summary_list.append(plot)
- else:
- print("No plot file ", plot.file, row[1])
-
- summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
- except:
- print(row[1], "does not exist")
- summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
- return summary_list
-
- def get_plots(self):
- print("extracting data ", self.path)
- store = False
- data = {"name": basename(self.path).replace("_fastc",""),
- "Sequence_Length_Distribution": [],
- "Per_sequence_GC_content": [],
- "Per_base_sequence_quality": []}
-
- with open(join(self.path, "fastqc_data.txt"), "r") as fastqcdatafile:
- box_values = []
- for line in iter(fastqcdatafile):
- if line.startswith("#"):
- pass
- elif line.startswith(">>END_MODULE"):
- if store==True:
- data[key] = get_distribution(box_values, key)
- store = False
- key = None
- box_values = []
- elif line.startswith(">>"):
- key = line.split("\t")[0]
- key=key.replace(">>","")
- key = key.replace(" ","_")
- if key in data.keys():
- store = True
- else:
- if store:
- box_values.append(line)
- return data
-
- def results_to_base64(self):
- converted_plot = []
- for plot in self.results:
- if not plot.file.startswith("NotExisting"):
- with open(plot.file, "rb") as imgfile:
- imgstring = base64.b64encode(imgfile.read())
- plot.file ='data:image/png;base64,' + imgstring.decode("utf-8")
- converted_plot.append(plot)
- return converted_plot
-
- def results_for_report(self):
- converted_plot = []
- for plot in self.results:
- if not plot.file.startswith("NotExisting"):
- plot.file =plot.file.replace(mainResultsPath, "..")
- converted_plot.append(plot)
- return converted_plot
-
-class Plot:
- def __init__(self, value, name, resultsPath):
- self.color = self.get_color(value)
- self.name = name
- self.file = self.name_to_file(resultsPath)
-
- def get_color(self, value):
- if(value == "PASS"):
- return "green"
- elif(value =="FAIL"):
- return "red"
- #elif (value =="WARN"):
- # return "orange"
- else:
- return "orange"#"grey"
-
- def name_to_file(self,resultsPath):
- if self.name in default_parameter["FastQC"].keys():
- return join(resultsPath, "Images", default_parameter["FastQC"][self.name])
- else:
- return ""
-
-
-class TrimParameters:
- illuminaClip_seedMismatch = str(default_parameter["Trimmomatic"]["illuminaClip_seedMismatch"]) #"2" #specifies the maximum mismatch count which will still allow a full match to be performed
- illuminaClip_simpleClip = str(default_parameter["Trimmomatic"]["illuminaClip_simpleClip"]) # "10" #specifies how accurate the match between any adapter etc. sequence must be against a read
- leading = str(default_parameter["Trimmomatic"]["leading"]) #"3" # remove leading low quality below 3
- trailing = str(default_parameter["Trimmomatic"]["trailing"]) #"3" # remove trailing quality below 3
- adapter_path = get_tool_path('adapter')
- def __init__(self):
- #self.palindrome = palindrome
- self.extraTrimOptions = ""
-
- def make_callstring(self, adapter,palindrome, minlen, trimOption, betterTrimmingOption ):
- call = []
- if technology =="Illumina":
- call.append("ILLUMINACLIP:" + join(self.adapter_path, adapter+ ".fa") + ":" + self.illuminaClip_seedMismatch + ":" + str(palindrome) + ":" + self.illuminaClip_simpleClip)
- call.append(trimOption)
- call.append(betterTrimmingOption)
- call.append("MINLEN:" + str(minlen))
- return " ".join(call)
-
-
-class TrimResult:
- def __init__(self, readset, adapter, palindrome, minlen, trimOption, betterTrimming):
- self.parameters = TrimParameters()
- self.readset = readset
- self.r1_paired = None
- self.r2_paired = None
- self.r1_unpaired = None
- self.r2_unpaired = None
-
- self.logs = ""
-
- #Results
- self.shorter_fragments_percentage = 0
- self.input_number = 0
- self.nTrimmed = 0
- self.pTrimmed = 0
- self.adapter = adapter
- self.run( palindrome, minlen, trimOption, betterTrimming)
-
- def make_callstring(self, readset, palindrome, minlen, trimOption, save = False, betterTrimmingOption =""):
- if readset.paired:
- if save:
- self.r1_paired =join(mainResultsPath, "Trimmed", "r1_P_" + getFilenameWithoutExtension(readset.r1.filename, getBase = True) + ".fastq")
- self.r1_unpaired = join(mainResultsPath, "Trimmed", "r1_UP_" + getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq")
- self.r2_paired =join(mainResultsPath, "Trimmed", "r2_P_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq")
- self.r2_unpaired =join(mainResultsPath, "Trimmed", "r2_UP_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq")
- output = " ".join([self.r1_paired, self.r1_unpaired, self.r2_paired, self.r2_unpaired])
- else:
- output = "/dev/null /dev/null /dev/null /dev/null"
- callProcess = [join(get_tool_path("trimmomatic"), "TrimmomaticPE "),
- "-threads", str(num_threads),
- "-" + readset.r1.phred,
- readset.r1.filename, # input 1
- readset.r2.filename, # input 2
- output,
- self.parameters.make_callstring(self.adapter,palindrome, minlen, trimOption, betterTrimmingOption)
- ]
- else:
- callProcess = [join(get_tool_path("trimmomatic"),"TrimmomaticSE "),
- "-threads", str(num_threads),
- "-" + readset.r1.phred,
- readset.r1.filename, # input 1
- join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq"),#output
- self.parameters.make_callstring(self.adapter, palindrome, minlen, trimOption, betterTrimmingOption)
- ]
- self.r1_unpaired = join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq")
- return callProcess
-
- def check_quality(self,readset, trim_perc):
- path = join(mainResultsPath, "Trimmed", "temp")
- if readset.paired:
- try:
- r1_name = join_reads([self.r1_paired, self.r1_unpaired], path, basename(readset.r1.filename))
-
- print("Run FastQC to optimize trimming.")
- process = subprocess.Popen( [join(get_tool_path('fastqc'), "fastqc"), r1_name,"--nogroup", "--extract", "-o", path,"--threads", str(num_threads)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- process.communicate()
- trim_bool, headcrop, crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r1_name)) + "_fastqc"), trim_perc)
-
- #if r1 was ok, go on and check r2
- print("Check 'per sequence base content' of R2.")
-
- r2_name = join_reads([self.r2_paired, self.r2_unpaired], path, basename(readset.r2.filename))
- process = subprocess.Popen(
- [join(get_tool_path('fastqc'), "fastqc"), r2_name,"--nogroup", "--extract", "-o",path, "--threads", str(num_threads)],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- process.communicate()
- trim_bool, r2_headcrop, r2_crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r2_name)) + "_fastqc"), trim_perc)
-
- return trim_bool, max(headcrop, r2_headcrop), min(crop, r2_crop)
- finally:
- shutil.rmtree(path)
- else:
- process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc"), self.r1_unpaired,"--nogroup", "--extract", "-o", path, "--threads", str(num_threads)],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- for line in iter(process.stdout.readline, b''):
- print(line, end='\r')
- process.communicate()
- trim_bool, headcrop, crop = optimize_trimming(join(path,getFilenameWithoutExtension(basename(self.r1_unpaired)) + "_fastqc"), trim_perc)
-
- return trim_bool, headcrop, crop
-
- def run(self, palindrome, minlen, trimOption, betterTrimming):
- # Report Trimmomatic Output
- # Run palindrome parameter which should be kept at last
-
- survived = {'30':0, '1000':0}
-
- call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True)
- process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
- survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process,call[0], True)
- process.communicate()
-
- ### Optimize trimming
- if betterTrimming !="":
- print("Improve trimming parameter")
- makedirs(join(mainResultsPath, "Trimmed","temp"), exist_ok=True)
- trim_bool, final_headcrop, final_crop = self.check_quality(self.readset, betterTrimming)
-
- if trim_bool:
- self.parameters.extraTrimOptions = "HEADCROP:"+str(final_headcrop) + " CROP:"+str(final_crop)
- call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True, betterTrimmingOption = self.parameters.extraTrimOptions)
- process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
- survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process, call[0], True)
- process.communicate()
-
- self.total = total
- self.nTrimmed = nTrimmedReads
- self.pTrimmed = pTrimmedReads
- self.logs += log
-
- ############################
-
- survived[str(palindrome)] = survived1
- if self.readset.r2 is not None:
- if palindrome==30:
- palindrome=1000
- else:
-
- palindrome=30
- process = subprocess.Popen(" ".join(self.make_callstring( self.readset, palindrome, minlen, trimOption, save=False, betterTrimmingOption = self.parameters.extraTrimOptions) ),
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE, shell=True)
- survived2, total,nTrimmedReads, pTrimmedReads, temp = trimmomatic_report(process, call[0],False)
- process.communicate()
- survived[str(palindrome)] = survived2
- self.shorter_fragments_percentage = round(survived["1000"] - survived["30"], 2)
-
- return self
-
- def run_FastQC(self, tempdir):
- if self.readset.paired:
- #tempdir = tempfile.mkdtemp()
- r1_name = join_reads([self.r1_paired, self.r1_unpaired], tempdir, basename(self.readset.r1.filename))
- r2_name = join_reads([self.r2_paired, self.r2_unpaired], tempdir, basename(self.readset.r2.filename))
- try:
- self.readset = ReadSet( r1_name, r2_name)
- self.readset.run_FastQC(join(mainResultsPath, "Trimmed/FastQC"))
- except:
- print(sys.exc_info())
- sys.exit()
- finally:
- remove(r1_name)
- remove(r2_name)
- self.readset.r1.filename = " ".join([self.r1_paired, self.r1_unpaired])
- self.readset.r2.filename = " ".join([self.r2_paired, self.r2_unpaired])
- else:
- self.readset = ReadSet( self.r1_unpaired)
- self.readset.run_FastQC(join(mainResultsPath, "Trimmed"))
- return self
-
-
- def print_param(self, palindrome, minlen, trimOption):
- return self.parameters.make_callstring(self.adapter, palindrome, minlen, trimOption, self.parameters.extraTrimOptions)
-
-class MappingParameter:
- def __init__(self, mapping):
- pass
-
-class MappingResult:
- def __init__(self, reference, r1_name, r2_name, bowtie_index):
- self.numberOfAlignedReads = 0
- self.percentAlignedReads= 0
- self.index = self.build_index(reference, bowtie_index)
- self.log = ""
- self.r1_name = r1_name
- self.r2_name = r2_name
-
- def build_index(self, reference, index):
- index_name = join(index, getFilenameWithoutExtension(basename(reference)) +"_bt2") # reference name "bowtie2 build index_name"
- if not isdir(index):
- return index
- process = subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference, index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
-
- for line in iter(process.stderr.readline, b''):
- line = line.decode("utf-8")
- print(line)
- process.communicate()
- print("Bowtie2-build. Done.")
- return index_name
-
-
- def run_bowtie(self, threads, save_mapping):
- print("Run Bowtie2")
- call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", self.index, "--no-unal --threads ", str(num_threads)]
-
- #if self.r2_name is not None:
- # call.extend(["-1", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
- # call.extend(['-2',re.sub(r"(?<!\\)\s",",",self.r2_name)])
- #else:
- # call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
- call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name + " " + self.r2_name)])
- call.extend(["-S", save_mapping, "2>&1"])
- #print("Call ", " ".join(call))
- process = subprocess.Popen(" ".join(call), stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell=True)
-
- for line in iter(process.stdout.readline, b''):
- line = line.decode("utf-8")
- print(line)
- self.log += toLatex(line) + "\n"
- pattern1 = re.match(".* (?P<aligned_exact>\d+) \((?P<percent_aligned_exact>\d+.\d+)%\) aligned exactly 1 time", line)
- pattern2 = re.match(".* (?P<aligned_more_than_one>\d+) \((?P<percent_aligned_more_than_one>\d+.\d+)%\) aligned >1 times", line)
- if pattern1:
- self.numberOfAlignedReads += int(pattern1.group("aligned_exact"))
- self.percentAlignedReads += float(pattern1.group("percent_aligned_exact").replace(",","."))
- if pattern2:
- self.numberOfAlignedReads += int(pattern2.group("aligned_more_than_one"))
- self.percentAlignedReads += float(pattern2.group("percent_aligned_more_than_one"))
- self.percentAlignedReads = round(self.percentAlignedReads,2)
-
- for line in iter(process.stderr.readline, b''):
- line = line.decode("utf-8")
- self.log += "\\textcolor{red}{" + toLatex(line) + "}\n"
- print(line)
- process.communicate()
- print("Bowtie2 done.")
- return self
-
-
-class KrakenResult:
- def __init__(self, r1,r2, db, name):
- self.name = name
- self.report_name = None
- self.report = ""
- self.pClassified = 0
- self.nClassified = 0
- self.log = ""
- self.run_kraken(r1,r2, db)
-
- def make_callstring(self, r1,r2, db):
- #input_type = ""
- #if fileext.endswith("")
- call = [join(get_tool_path('kraken') , "kraken") , "--db", db, r1]
- if r2 is not None:
- call.append( r2)# + " --paired")
- call.append("--preload")
- call.append("--threads " + str(num_threads))
- self.report_name = join(mainResultsPath, self.name + ".krakenreport")
- if exists(self.report_name):
- i = 2
- new_name = join(mainResultsPath, self.name + "_" +str(i) + ".krakenreport")
- while exists(new_name):
- i += 1
- new_name = join(mainResultsPath, self.name + "_" + str(i) + ".krakenreport")
- self.report_name=new_name
-
- call.append("--output " + self.report_name)
- return " ".join(call)
-
- def run_kraken(self, r1,r2, db):
- process = subprocess.Popen(self.make_callstring(r1,r2, db), shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- self.log += "\\textcolor{gray}{Using Parameter " + " ".join(["\path{"+ x + "}" for x in self.make_callstring(r1,r2, db).split(" ")]) + "}\\\\"
-
- for line in iter(process.stderr.readline, b''):
- line = line.decode("utf-8")
- print(line)
- if not (line.startswith("Loading database") or re.search("Processed \d+", line)):
- self.log += toLatex(line) +"\n"
- pattern = re.match("\s+(?P<nClassified>\d+) sequences classified \((?P<pClassified>\d+.\d+)%\)", line)
- if pattern:
- self.nClassified = pattern.group("nClassified")
- self.pClassified = pattern.group("pClassified")
-
- for line in iter(process.stdout.readline, b''):
- line = line.decode("utf-8")
- print(line)
- if not ( re.search("Processed \d+", line) or line.startswith("Loading database") ):
- self.output += line + "\n"
- process.communicate()
-
- print("Convert Kraken-Report..")
- self.convert_report(db)
- return self
-
- def convert_report(self, db):
- process = subprocess.check_output([join(get_tool_path('kraken') ,'kraken-report') , "--db", db, self.report_name])
- self.report = process.decode("utf-8")
- return self
-
-
+import QCumber
+import datetime
+import numpy as np
+import getpass
+import csv
+from helper import *
+import re
+from os.path import basename, splitext, join, exists, dirname, isdir
+from os import uname, listdir, remove
+import configparser
+import shutil
+from collections import OrderedDict
+import base64
+
+default_parameter = configparser.ConfigParser()
+default_parameter.read(join(dirname(__file__), "parameter.txt"))
+
+def get_tool_path(name, section="PATH"):
+ config = configparser.ConfigParser()
+ config.read(join(dirname(__file__), "config.txt"))
+ path_dict = config[section]
+ return path_dict[name]
+
+class Pipeline:
+ '''Get all tool versions '''
+ def __init__(self):
+
+ self.name = "Quality Control"
+ self.version = QCumber.__version__
+ self.user = getpass.getuser()
+ self.python_version = toLatex(sys.version)
+ self.fastqc_version = toLatex(subprocess.check_output(join(get_tool_path("fastqc"), "fastqc") + " --version", shell = True, stderr=subprocess.PIPE).decode("utf-8"))
+
+ try:
+ self.trimmo_version = subprocess.check_output(join(get_tool_path("trimmomatic"), "TrimmomaticSE") + " -version", shell=True, stderr=subprocess.PIPE).decode("utf-8")
+ except:
+ try:
+ from debpackageinfo import get_upstream_version
+ self.trimmo_version = get_upstream_version("trimmomatic")
+ #self.trimmo_version = re.search("Trimmomatic-(?P<version>\d.\d+)",get_tool_path("trimmomatic")).group("version")
+ except:
+ self.trimmo_version = "< 0.32"
+ bowtie_v = subprocess.check_output(join(get_tool_path("bowtie2"), "bowtie2") + " --version", shell=True).decode("utf-8")
+ self.bowtie_version = re.match(".*\)", toLatex(bowtie_v, True)).group()
+ self.kraken_version =toLatex(subprocess.check_output( join(get_tool_path('kraken'), "kraken") + " --version", shell = True).decode("utf-8"))
+
+ system = uname()
+ self.system = toLatex(system.sysname)
+ self.server = toLatex(system.nodename)
+ self.release = toLatex(system.release)
+ self.operating_version = toLatex(system.version)
+ self.machine = toLatex(system.machine)
+
+
+class Sample(object):
+ """Sample with reference to ReadSets, MappingRes, KrakenResr"""
+
+ def __init__(self, name, path, reference, threads, techn):
+ global mainResultsPath
+ global num_threads
+ global technology
+ num_threads = threads
+ mainResultsPath = path
+ technology = techn
+
+ self.start = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
+ self.stop = None
+
+ self.name = name
+ self.technology = techn
+ self.threads = threads
+ self.phred = "phred33"
+ self.readSets = []
+
+ self.mainResultsPath = path
+ self.mappingRes = None
+ self.krakenRes = None
+ self.reference = reference
+ def get_all_qc_plots(self):
+ qc_dict = OrderedDict([("setname", self.name),("raw",[]), ("trimmed",[])])
+ for rs in self.readSets:
+ qc_dict["raw"].append(rs.r1.qcRes.get_plots())
+ qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.get_plots())
+ if (rs.r2 is not None):
+ qc_dict["raw"].append(rs.r2.qcRes.get_plots())
+ qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.get_plots())
+ return qc_dict
+
+ def total_reads(self):
+ total = 0
+ for rs in self.readSets:
+ if rs.trimRes:
+ total += rs.trimRes.total
+ return total
+
+ def pTrimmed(self):
+ perc = []
+ for rs in self.readSets:
+ if rs.trimRes:
+ perc.append(rs.trimRes.pTrimmed)
+ if len(perc) == 0 :
+ return 0
+ return round(np.mean(perc), 2)
+ def nTrimmed(self):
+ total = 0
+ for rs in self.readSets:
+ if rs.trimRes:
+ total += rs.trimRes.nTrimmed
+ return total
+
+ def add_readSet(self, readSet):
+ self.readSets.append(readSet)
+ return self
+
+ def run_Kraken(self, db):
+ r1, r2 = self.get_all_reads()
+ self.krakenRes = KrakenResult(r1,r2,db, self.name)
+ return self
+
+ def run_Bowtie2(self, bowtie_index, save_mapping ):
+ r1, r2 = self.get_all_reads()
+ self.mappingRes = MappingResult(self.reference, r1, r2, bowtie_index)
+ self.mappingRes = self.mappingRes.run_bowtie(self.threads, save_mapping)
+ return self.mappingRes
+
+ def get_all_reads(self, trimmed = True):
+ if trimmed:
+ r1 = " ".join([x.trimRes.readset.r1.filename for x in self.readSets])#join_reads([x.trimRes.readset.r1.filename for x in self.readSets], self.mainResultsPath, self.name + "_R1")
+ r2 = " ".join([x.trimRes.readset.r2.filename for x in self.readSets if x.trimRes.readset.r2 is not None])#join_reads([x.trimRes.readset.r2.filename for x in self.readSets if x.trimRes.readset.r2 is not None], self.mainResultsPath, self.name + "_R2")
+ else:
+ r1 = " ".join([x.r1.filename for x in self.readSets])#join_reads([x.r1.filename for x in self.readSets], self.mainResultsPath, self.name + "_R1")
+ r2 = " ".join([x.r2.filename for x in self.readSets if x.r2 is not None]) #join_reads([x.r2.filename for x in self.readSets if x.r2 is not None], self.mainResultsPath, self.name + "_R2") #readnames = {'r1' : r1}
+ return r1,r2
+
+ def get_mean_trimRes(self):
+ return round(np.mean([x.trimRes.shorter_fragments_percentage for x in self.readSets]),2)
+
+class ReadSet:
+ ''' Readset contains of r1 and r2'''
+ def __init__(self, r1 ,r2=None):
+
+ self.r2 = None
+ if type(r1) is str:
+ self.r1 = FastQFile(r1) # FastQFile
+ if r2 is not None:
+ self.r2 = FastQFile(r2) # FastQFile
+ else:
+ self.r1 = r1
+ if r2 is not None:
+ self.r2 = r2
+ self.numberOfReads = 0
+ self.numberofTrimmedReads = 0
+ self.trimRes = None
+ #self.outputDir = outputDir
+ self.paired = True
+ if len( basename(self.r1.filename).split("_")[:-3]) > 4:
+ self.setname = "_".join(basename(self.r1.filename).split("_")[:-3])
+ else:
+ self.setname = basename(self.r1.filename)
+ if(r2 is None):
+ self.paired = False
+
+ def run_FastQC(self, output):
+ self.r1 = self.r1.run_FastQC(output)
+ if(self.r2 is not None):
+ self.r2 = self.r2.run_FastQC(output)
+ return self
+
+ def run_Trimmomatic(self, adapter, palindrome, minlen, trimOption, betterTrimming,gz):
+ self.trimRes = TrimResult(self, adapter, palindrome, minlen, trimOption, betterTrimming,gz )
+ return self
+
+ def get_all_qc(self):
+ qc_results = [ (self.r1.qcRes, self.trimRes.readset.r1.qcRes) ]
+ if (self.r2 is not None):
+ qc_results.append((self.r2.qcRes, self.trimRes.readset.r2.qcRes) )
+ return qc_results
+
+ def get_all_qc_dict(self):
+ '''qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
+ qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_to_base64()]]
+ qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_to_base64()]]
+ if (self.r2 is not None):
+ qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_to_base64()])
+ qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_to_base64()])
+ return qc_dict'''
+ qc_dict = OrderedDict({"raw": [], "trimmed": []})
+ qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_for_report()]]
+ qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_for_report()]]
+ if (self.r2 is not None):
+ qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_for_report()])
+ qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_for_report()])
+ return qc_dict
+
+
+class FastQFile:
+ def __init__(self, absFilename, concat_files = None):
+ self.filename = absFilename
+ self.qcRes = None
+ self.log = ""
+ self.phred="phred33"
+ self.concat_files = None
+
+ def run_FastQC(self, outputDir):
+ print("Run fastqc in fastqfile ", " ".join([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), "--extract"]))
+ process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), "--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+ for line in iter(process.stderr.readline, b''):
+ line = line.decode("utf-8")
+ line = line.replace("\n", "")
+ print(line, end="\r")
+ for line in iter(process.stdout.readline, b''):
+ line = line.decode("utf-8")
+ if line.startswith("Approx"):
+ print(line, end="\r")
+ else:
+ print(line, end="\r")
+ pattern = re.match("Quality encoding detected as (?P<phred>\w+\d{2})", line)
+ if pattern:
+ self.phred = pattern.group("phred")
+ print("")
+ process.communicate()
+ self.qcRes = QCResult(join(outputDir, getFilenameWithoutExtension(basename(self.filename)) + "_fastqc"), join(mainResultsPath, "Report", "src"))
+ return self
+ def get_filename(self):
+ return toLatex(basename(self.filename))
+###########
+## Result classes
+###########
+
+class QCResult:
+ def __init__(self, resultsPath, summary_output):
+ self.path = resultsPath
+ self.results = self.get_results()
+ #write summary csv for boxplots
+ write_fastqc_summary(resultsPath, summary_output)
+
+ def get_results(self):
+ summary_list = []
+ no_plot=["Basic Statistics", "Overrepresented sequences"]
+ with open(join(self.path, "summary.txt"), "r") as summary:
+ reader = csv.reader(summary, delimiter = "\t")
+ for row in reader:
+ if (row[1] not in no_plot):
+ try:
+ plot = Plot(row[0], row[1], self.path)
+ if exists(plot.file):
+ summary_list.append(plot)
+ else:
+ print("No plot file ", plot.file, row[1])
+
+ summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
+ except:
+ print(row[1], "does not exist")
+ summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
+ return summary_list
+
+ def get_plots(self):
+ print("extracting data ", self.path)
+ store = False
+ data = {"name": basename(self.path).replace("_fastc",""),
+ "Sequence_Length_Distribution": [],
+ "Per_sequence_GC_content": [],
+ "Per_base_sequence_quality": []}
+
+ with open(join(self.path, "fastqc_data.txt"), "r") as fastqcdatafile:
+ box_values = []
+ for line in iter(fastqcdatafile):
+ if line.startswith("#"):
+ pass
+ elif line.startswith(">>END_MODULE"):
+ if store==True:
+ data[key] = get_distribution(box_values, key)
+ store = False
+ key = None
+ box_values = []
+ elif line.startswith(">>"):
+ key = line.split("\t")[0]
+ key=key.replace(">>","")
+ key = key.replace(" ","_")
+ if key in data.keys():
+ store = True
+ else:
+ if store:
+ box_values.append(line)
+ return data
+
+ def results_to_base64(self):
+ converted_plot = []
+ for plot in self.results:
+ if not plot.file.startswith("NotExisting"):
+ with open(plot.file, "rb") as imgfile:
+ imgstring = base64.b64encode(imgfile.read())
+ plot.file ='data:image/png;base64,' + imgstring.decode("utf-8")
+ converted_plot.append(plot)
+ return converted_plot
+
+ def results_for_report(self):
+ converted_plot = []
+ for plot in self.results:
+ if not plot.file.startswith("NotExisting"):
+ plot.file =plot.file.replace(mainResultsPath, "..")
+ converted_plot.append(plot)
+ return converted_plot
+
+class Plot:
+ def __init__(self, value, name, resultsPath):
+ self.color = self.get_color(value)
+ self.name = name
+ self.file = self.name_to_file(resultsPath)
+
+ def get_color(self, value):
+ if(value == "PASS"):
+ return "green"
+ elif(value =="FAIL"):
+ return "red"
+ #elif (value =="WARN"):
+ # return "orange"
+ else:
+ return "orange"#"grey"
+
+ def name_to_file(self,resultsPath):
+ if self.name in default_parameter["FastQC"].keys():
+ return join(resultsPath, "Images", default_parameter["FastQC"][self.name])
+ else:
+ return ""
+
+
+class TrimParameters:
+ illuminaClip_seedMismatch = str(default_parameter["Trimmomatic"]["illuminaClip_seedMismatch"]) #"2" #specifies the maximum mismatch count which will still allow a full match to be performed
+ illuminaClip_simpleClip = str(default_parameter["Trimmomatic"]["illuminaClip_simpleClip"]) # "10" #specifies how accurate the match between any adapter etc. sequence must be against a read
+ leading = str(default_parameter["Trimmomatic"]["leading"]) #"3" # remove leading low quality below 3
+ trailing = str(default_parameter["Trimmomatic"]["trailing"]) #"3" # remove trailing quality below 3
+ adapter_path = get_tool_path('adapter')
+ def __init__(self):
+ #self.palindrome = palindrome
+ self.extraTrimOptions = ""
+
+ def make_callstring(self, adapter,palindrome, minlen, trimOption, betterTrimmingOption ):
+ call = []
+ if technology =="Illumina":
+ call.append("ILLUMINACLIP:" + join(self.adapter_path, adapter+ ".fa") + ":" + self.illuminaClip_seedMismatch + ":" + str(palindrome) + ":" + self.illuminaClip_simpleClip)
+ call.append(trimOption)
+ call.append(betterTrimmingOption)
+ call.append("MINLEN:" + str(minlen))
+ return " ".join(call)
+
+
+class TrimResult:
+ def __init__(self, readset, adapter, palindrome, minlen, trimOption, betterTrimming,gz):
+ self.parameters = TrimParameters()
+ self.readset = readset
+ self.r1_paired = None
+ self.r2_paired = None
+ self.r1_unpaired = None
+ self.r2_unpaired = None
+
+ self.logs = ""
+
+ #Results
+ self.shorter_fragments_percentage = 0
+ self.input_number = 0
+ self.nTrimmed = 0
+ self.pTrimmed = 0
+ self.adapter = adapter
+ self.run( palindrome, minlen, trimOption, betterTrimming, gz)
+
+ def make_callstring(self, readset, palindrome, minlen, trimOption, save = False, betterTrimmingOption ="", gz=True):
+ if gz:
+ extension= ".gz"
+ else:
+ extension =""
+ if readset.paired:
+ if save:
+ self.r1_paired =join(mainResultsPath, "Trimmed", "r1_P_" + getFilenameWithoutExtension(readset.r1.filename, getBase = True) + ".fastq" + extension)
+ self.r1_unpaired = join(mainResultsPath, "Trimmed", "r1_UP_" + getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq"+ extension)
+ self.r2_paired =join(mainResultsPath, "Trimmed", "r2_P_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq"+ extension)
+ self.r2_unpaired =join(mainResultsPath, "Trimmed", "r2_UP_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq"+ extension)
+ output = " ".join([self.r1_paired, self.r1_unpaired, self.r2_paired, self.r2_unpaired])
+ else:
+ output = "/dev/null /dev/null /dev/null /dev/null"
+ callProcess = [join(get_tool_path("trimmomatic"), "TrimmomaticPE "),
+ "-threads", str(num_threads),
+ "-" + readset.r1.phred,
+ readset.r1.filename, # input 1
+ readset.r2.filename, # input 2
+ output,
+ self.parameters.make_callstring(self.adapter,palindrome, minlen, trimOption, betterTrimmingOption)
+ ]
+ else:
+ callProcess = [join(get_tool_path("trimmomatic"),"TrimmomaticSE "),
+ "-threads", str(num_threads),
+ "-" + readset.r1.phred,
+ readset.r1.filename, # input 1
+ join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq"),#output
+ self.parameters.make_callstring(self.adapter, palindrome, minlen, trimOption, betterTrimmingOption)
+ ]
+ self.r1_unpaired = join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq"+ extension)
+ return callProcess
+
+ def check_quality(self,readset, trim_perc):
+ path = join(mainResultsPath, "Trimmed", "temp")
+ if readset.paired:
+ try:
+ r1_name = join_reads([self.r1_paired, self.r1_unpaired], path, basename(readset.r1.filename))
+
+ print("Run FastQC to optimize trimming.")
+ process = subprocess.Popen( [join(get_tool_path('fastqc'), "fastqc"), r1_name,"--nogroup", "--extract", "-o", path,"--threads", str(num_threads)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ process.communicate()
+ trim_bool, headcrop, crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r1_name)) + "_fastqc"), trim_perc)
+
+ #if r1 was ok, go on and check r2
+ print("Check 'per sequence base content' of R2.")
+
+ r2_name = join_reads([self.r2_paired, self.r2_unpaired], path, basename(readset.r2.filename))
+ process = subprocess.Popen(
+ [join(get_tool_path('fastqc'), "fastqc"), r2_name,"--nogroup", "--extract", "-o",path, "--threads", str(num_threads)],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ process.communicate()
+ r2_trim_bool, r2_headcrop, r2_crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r2_name)) + "_fastqc"), trim_perc)
+
+ return (trim_bool or r2_trim_bool), max(headcrop, r2_headcrop), min(crop, r2_crop)
+ finally:
+ shutil.rmtree(path)
+ else:
+ process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc"), self.r1_unpaired,"--nogroup", "--extract", "-o", path, "--threads", str(num_threads)],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ for line in iter(process.stdout.readline, b''):
+ print(line, end='\r')
+ process.communicate()
+ trim_bool, headcrop, crop = optimize_trimming(join(path,getFilenameWithoutExtension(basename(self.r1_unpaired)) + "_fastqc"), trim_perc)
+
+ return trim_bool, headcrop, crop
+
+ def run(self, palindrome, minlen, trimOption, betterTrimming, gz):
+ # Report Trimmomatic Output
+ # Run palindrome parameter which should be kept at last
+
+ survived = {'30':0, '1000':0}
+
+ call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True, gz=gz)
+ process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+ survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process,call[0], True)
+ process.communicate()
+
+ ### Optimize trimming
+ if betterTrimming !="":
+ print("Improve trimming parameter")
+ makedirs(join(mainResultsPath, "Trimmed","temp"), exist_ok=True)
+ trim_bool, final_headcrop, final_crop = self.check_quality(self.readset, betterTrimming)
+
+ if trim_bool:
+ self.parameters.extraTrimOptions = "HEADCROP:"+str(final_headcrop) + " CROP:"+str(final_crop)
+ call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True, betterTrimmingOption = self.parameters.extraTrimOptions, gz=gz)
+ process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+ survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process, call[0], True)
+ process.communicate()
+
+ self.total = total
+ self.nTrimmed = nTrimmedReads
+ self.pTrimmed = pTrimmedReads
+ self.logs += log
+
+ ############################
+
+ survived[str(palindrome)] = survived1
+ if self.readset.r2 is not None:
+ if palindrome==30:
+ palindrome=1000
+ else:
+
+ palindrome=30
+ process = subprocess.Popen(" ".join(self.make_callstring( self.readset, palindrome, minlen, trimOption, save=False, betterTrimmingOption = self.parameters.extraTrimOptions, gz=gz) ),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, shell=True)
+ survived2, total,nTrimmedReads, pTrimmedReads, temp = trimmomatic_report(process, call[0],False)
+ process.communicate()
+ survived[str(palindrome)] = survived2
+ self.shorter_fragments_percentage = round(survived["1000"] - survived["30"], 2)
+
+ return self
+
+ def run_FastQC(self, tempdir):
+ if self.readset.paired:
+ #tempdir = tempfile.mkdtemp()
+ r1_name = join_reads([self.r1_paired, self.r1_unpaired], tempdir, basename(self.readset.r1.filename))
+ r2_name = join_reads([self.r2_paired, self.r2_unpaired], tempdir, basename(self.readset.r2.filename))
+ try:
+ self.readset = ReadSet( r1_name, r2_name)
+ self.readset.run_FastQC(join(mainResultsPath, "Trimmed/FastQC"))
+ except:
+ print(sys.exc_info())
+ sys.exit()
+ finally:
+ #remove(r1_name)
+ #remove(r2_name)
+ self.readset.r1.filename = " ".join([self.r1_paired, self.r1_unpaired])
+ self.readset.r2.filename = " ".join([self.r2_paired, self.r2_unpaired])
+ else:
+ self.readset = ReadSet( self.r1_unpaired)
+ self.readset.run_FastQC(join(mainResultsPath, "Trimmed"))
+ return self
+
+
+ def print_param(self, palindrome, minlen, trimOption):
+ return self.parameters.make_callstring(self.adapter, palindrome, minlen, trimOption, self.parameters.extraTrimOptions)
+
+class MappingParameter:
+ def __init__(self, mapping):
+ pass
+
+class MappingResult:
+ def __init__(self, reference, r1_name, r2_name, bowtie_index):
+ self.numberOfAlignedReads = 0
+ self.percentAlignedReads= 0
+ self.index = self.build_index(reference, bowtie_index)
+ self.log = ""
+ self.r1_name = r1_name
+ self.r2_name = r2_name
+
+ def build_index(self, reference, index):
+ index_name = join(index, getFilenameWithoutExtension(basename(reference)) +"_bt2") # reference name "bowtie2 build index_name"
+ if not isdir(index):
+ return index
+ process = subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference, index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
+
+ for line in iter(process.stderr.readline, b''):
+ line = line.decode("utf-8")
+ print(line)
+ process.communicate()
+ print("Bowtie2-build. Done.")
+ return index_name
+
+
+ def run_bowtie(self, threads, save_mapping):
+ print("Run Bowtie2")
+ call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", self.index, "--no-unal --threads ", str(num_threads)]
+
+ #if self.r2_name is not None:
+ # call.extend(["-1", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
+ # call.extend(['-2',re.sub(r"(?<!\\)\s",",",self.r2_name)])
+ #else:
+ # call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
+ call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name + " " + self.r2_name)])
+ call.extend(["-S", save_mapping, "2>&1"])
+ #print("Call ", " ".join(call))
+ process = subprocess.Popen(" ".join(call), stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell=True)
+
+ for line in iter(process.stdout.readline, b''):
+ line = line.decode("utf-8")
+ print(line)
+ self.log += toLatex(line) + "\n"
+ pattern1 = re.match(".* (?P<aligned_exact>\d+) \((?P<percent_aligned_exact>\d+.\d+)%\) aligned exactly 1 time", line)
+ pattern2 = re.match(".* (?P<aligned_more_than_one>\d+) \((?P<percent_aligned_more_than_one>\d+.\d+)%\) aligned >1 times", line)
+ if pattern1:
+ self.numberOfAlignedReads += int(pattern1.group("aligned_exact"))
+ self.percentAlignedReads += float(pattern1.group("percent_aligned_exact").replace(",","."))
+ if pattern2:
+ self.numberOfAlignedReads += int(pattern2.group("aligned_more_than_one"))
+ self.percentAlignedReads += float(pattern2.group("percent_aligned_more_than_one"))
+ self.percentAlignedReads = round(self.percentAlignedReads,2)
+
+ for line in iter(process.stderr.readline, b''):
+ line = line.decode("utf-8")
+ self.log += "\\textcolor{red}{" + toLatex(line) + "}\n"
+ print(line)
+ process.communicate()
+ print("Bowtie2 done.")
+ return self
+
+
+class KrakenResult:
+ def __init__(self, r1,r2, db, name):
+ self.name = name
+ self.report_name = None
+ self.report = ""
+ self.pClassified = 0
+ self.nClassified = 0
+ self.log = ""
+ self.run_kraken(r1,r2, db)
+
+ def make_callstring(self, r1,r2, db):
+ #input_type = ""
+ #if fileext.endswith("")
+ call = [join(get_tool_path('kraken') , "kraken") , "--db", db, r1]
+ if r2 is not None:
+ call.append( r2)# + " --paired")
+ call.append("--preload")
+ call.append("--threads " + str(num_threads))
+ self.report_name = join(mainResultsPath, self.name + ".krakenreport")
+ if exists(self.report_name):
+ i = 2
+ new_name = join(mainResultsPath, self.name + "_" +str(i) + ".krakenreport")
+ while exists(new_name):
+ i += 1
+ new_name = join(mainResultsPath, self.name + "_" + str(i) + ".krakenreport")
+ self.report_name=new_name
+
+ call.append("--output " + self.report_name)
+ return " ".join(call)
+
+ def run_kraken(self, r1,r2, db):
+ process = subprocess.Popen(self.make_callstring(r1,r2, db), shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+ self.log += "\\textcolor{gray}{Using Parameter " + " ".join(["\path{"+ x + "}" for x in self.make_callstring(r1,r2, db).split(" ")]) + "}\\\\"
+
+ for line in iter(process.stderr.readline, b''):
+ line = line.decode("utf-8")
+ print(line)
+ if not (line.startswith("Loading database") or re.search("Processed \d+", line)):
+ self.log += toLatex(line) +"\n"
+ pattern = re.match("\s+(?P<nClassified>\d+) sequences classified \((?P<pClassified>\d+.\d+)%\)", line)
+ if pattern:
+ self.nClassified = pattern.group("nClassified")
+ self.pClassified = pattern.group("pClassified")
+
+ for line in iter(process.stdout.readline, b''):
+ line = line.decode("utf-8")
+ print(line)
+ if not ( re.search("Processed \d+", line) or line.startswith("Loading database") ):
+ self.output += line + "\n"
+ process.communicate()
+
+ print("Convert Kraken-Report..")
+ self.convert_report(db)
+ return self
+
+ def convert_report(self, db):
+ process = subprocess.check_output([join(get_tool_path('kraken') ,'kraken-report') , "--db", db, self.report_name])
+ self.report = process.decode("utf-8")
+ return self
+
+
diff --git a/config.txt b/config.txt
index 0e269df..124f887 100755
--- a/config.txt
+++ b/config.txt
@@ -1,10 +1,10 @@
-[DEFAULT]
-kraken_db =/home/andruscha/software/kraken/minikraken_20141208/
-
-[PATH]
-kraken =
-adapter = /data/GS/tools/Trimmomatic-0.32/adapters/
-fastqc = /home/lieuv/tools/FastQC/
-trimmomatic =
-bowtie2 =
-
+[DEFAULT]
+kraken_db =/opt/common/pipelines/databases/minikraken_20141208/
+
+[PATH]
+kraken =
+adapter = /data/GS/tools/Trimmomatic-0.32/adapters/
+fastqc = /opt/common/pipelines/tools/FastQC_0.11.5/
+trimmomatic =
+bowtie2 =
+
diff --git a/helper.py b/helper.py
index b1c906f..d7fb524 100755
--- a/helper.py
+++ b/helper.py
@@ -1,412 +1,374 @@
-__author__ = 'LieuV'
-
-from os.path import basename, splitext, join, isfile, dirname, abspath, exists
-from os import makedirs, listdir, remove
-import subprocess
-import re, sys, numpy, shutil
-import matplotlib
-matplotlib.use("Agg") # in case no display is set
-import matplotlib.pyplot as plt
-import xml.etree.ElementTree as ET
-from collections import OrderedDict
-import json
-
-def getDir(args, getAbs = False):
- if isfile(args[0]):
- args[0]= dirname(args[0])
- path = join(*args)
- if(getAbs):
- path = abspath(path)
- makedirs(path, exist_ok=True)
- return path
-
-def getFilenameWithoutExtension(string, getBase = False):
- ''' In case of double extension '''
- if getBase:
- string = basename(string)
- string = splitext(string)[0]
- i = 0
- while splitext(string)[-1] in [".gz", ".gzip", ".zip", ".bz", ".fasta", ".fastq", ".bam"]:
- string = splitext(string)[0]
- return string
-
-def bam_to_fastq(bamfile, output):
- print("Convert bam to fastq...", bamfile)
- outfile = join(output, getFilenameWithoutExtension(bamfile, getBase= True) +".fastq")
- subprocess.Popen("bamtools convert -format fastq -in " + bamfile + " -out " + outfile, shell = True)
- return outfile
-
-def toLatex(text, newline = False):
- text = text.replace("#","\#")
- text = text.replace("_", "\_")
- text = text.replace("\t", " ")
- text = text.replace("%", "\%")
-
- if newline:
- text = text.replace("\n", " ")
- return text
-
-def join_reads(read_list, outputDir, outputName):
- if len(read_list) ==0:
- return None
- elif len(read_list) ==1:
- return read_list[0]
- outfile = join(outputDir, getFilenameWithoutExtension(outputName) )
- outfile += splitext(read_list[0])[-1]
- #if read_list[0].endswith(".gz"):
- print("Concatenate files. This may take a while." )
- process = subprocess.call("cat " + " ".join(read_list) + " > " + outfile, shell=True)
- #else:
- # print("Decompress and concatenate files. This may take a while.")
- # process = subprocess.call("gzip --fast -c " + " ".join(read_list) + " > " + outfile, shell=True)
- if process != 0:
- sys.exit("Error with gzip")
- return outfile
-
-def trimmomatic_report(process, type, write = True):
- logs =""
- survived = 0
- total = 0
- pTrimmedReads = 0
- nTrimmedReads = 0
- for line in iter(process.stdout.readline, b''):
- print(line.decode("utf-8"))
- for line in iter(process.stderr.readline, b''):
- line = line.decode("utf-8")
- print(line)
- if not (line.startswith("Using Long Clipping Sequence") or line.startswith("Trimmomatic")):
- if write:
- logs += toLatex(line) + "\n"
- if re.match("Input Read", line):
- if re.search("TrimmomaticPE", type):
- pattern = re.match("Input Read Pairs:\s+(?P<total>\d+)\s+"
- "Both Surviving:\s+(?P<nSurvived>\d+) \((?P<pSurvived>\d+.\d+)%\)\s+"
- "Forward Only Surviving:\s+(?P<forward>\d+) \(\d+.\d+%\)\s+"
- "Reverse Only Surviving:\s+(?P<reverse>\d+) \(\d+.\d+%\).*", line)
-
- survived = float(pattern.group("pSurvived").replace(",", "."))
- total = int(pattern.group("total")) * 2
- nTrimmedReads = int(pattern.group("nSurvived"))*2 + int(pattern.group("forward")) + int(pattern.group("reverse"))
- pTrimmedReads = round(100 * nTrimmedReads/total, 2)
- else:
- pattern = re.match(".*Surviving: (?P<nSurvived>\d+) \((?P<survived>\d+.\d+)%\)", line)
- pattern2 = re.match("Input Reads: (?P<total>\d+)", line)
-
- survived = float(pattern.group("survived").replace(",","."))
- total = int(pattern2.group("total"))
- nTrimmedReads = int(pattern.group("nSurvived"))
- pTrimmedReads = survived
-
- return survived, total, nTrimmedReads, pTrimmedReads, logs
-
-def perc_slope(a,b,perc):
- if abs(numpy.diff([a,b]))>(b*float(perc)):
- return True
- return False
-
-def optimize_trimming(fastqc_folder,perc=0.1):
- mytable=None
- filename = join(fastqc_folder, "fastqc_data.txt")
- with open(filename, "rb") as fastqcdata:
- table = []
- ifwrite = False
- for line in fastqcdata.readlines():
- line = line.decode()
- line = line.replace("\n", "")
- if line.startswith(">>END_MODULE") and ifwrite:
- try:
- dtype = {'names': table[0], 'formats': ['|S15', float, float, float, float]}
- mytable = numpy.asarray([tuple(x) for x in table[1:]], dtype=dtype)
- break
- except:
- pass
- elif line.startswith(">>Per base sequence content"):
- ifwrite = True
- temp = line.split("\t")
-
- if temp[1].lower() == "pass":
- return False, 0,0#break
- elif ifwrite:
- table.append(line.split("\t"))
-
- headcrop = 0
- tailcrop = len(mytable["A"])
- trim_bool = False
- column = numpy.ma.array(mytable)
- for i in range(-4, int(round(len(mytable["A"]) / 3, 0)), 1):
- for nucl in ["A", "C", "G", "T"]:
- column[nucl].mask[max(i, 0):i + 5] = True
- if headcrop >0:
- column[nucl].mask[:headcrop] = True
- if tailcrop < len(mytable["A"]):
- column[nucl].mask[tailcrop:] = True
-
- # check heacrop
- if (perc_slope(numpy.mean(mytable[nucl][max(i, 0):i + 5]), numpy.mean(column[nucl]), perc=perc)) & (headcrop < (i + 5)):
- headcrop = i + 5
- trim_bool = True
- elif headcrop < i:
- column[nucl].mask[max(i, 0):i + 5] = False
-
- # now crop from the end
- column[nucl].mask[-(i + 5):(min(len(mytable[nucl]), len(mytable[nucl]) - i))] = True
- if (perc_slope(numpy.mean(mytable[nucl][-(i + 6): (min(len(mytable[nucl]) - 1, len(mytable[nucl]) - 1 - i))]),numpy.mean(column[nucl]), perc=perc)) & (tailcrop > len(mytable[nucl]) - (i + 5)):
- # if perc_slope(numpy.var(mytable[nucl][-(i+6): (min(len(mytable["A"])-1, len(mytable[nucl])-1-i))]), numpy.mean(numpy.var(column)), perc=perc):
- tailcrop = len(mytable[nucl]) - (i + 5)
- trim_bool = True
- else:
- column[nucl].mask[-(i + 5): (min(len(mytable["A"]) - 1, len(mytable[nucl]) - 1 - i))] = False
-
- return trim_bool, headcrop, tailcrop-headcrop
-
-def str_to_json(report_str):
- closings = []
- json_file = ""
- report_str = report_str.split("\n")
- pre_depth = -1
- for row in report_str:
- nNode = 0
- if row != "":
- row = row.split("\t")
- if row[0].strip() != "0.00" and row[5] != "unclassified":
- temp = len(row[5])
- name = row[5].strip(" ")
- curr_depth = int((temp - len(name)) / 2)
-
- if pre_depth < curr_depth and curr_depth > 0:
- for i in range(curr_depth - pre_depth):
- if json_file.endswith('"children":[ '):
- if json_file.endswith("}") or json_file.endswith("]"):
- json_file += ","
- json_file += '{"children":[ '
- closings.append("]")
- closings.append("}")
- else:
- json_file += ',"children":[ '
- closings.append("]")
- elif pre_depth > curr_depth:
- while pre_depth >= curr_depth:
- close_with = closings.pop()
- json_file += close_with # end node
- if close_with == "]":
- close_with = closings.pop()
- json_file += close_with # end node
- pre_depth -= 1
- elif pre_depth != -1:
- json_file += closings.pop() + ','
- if json_file.endswith("}")or json_file.endswith("]"):
- json_file+=","
- json_file += '{"name":"%s",' % name
- json_file += '"perc":%s,' % row[0]
- json_file += '"taxa":"%s"' % row[3]
- closings.append("}")
- nNode += 1
- pre_depth = curr_depth
- while len(closings) > 0:
- json_file += closings.pop() # end node
- return json_file
-
-def delete_files(path, extended="", pattern="_fastqc"):
- for f in listdir(join(path, extended)):
- filename =join(path,extended,f)
- if f.endswith(pattern):
- if isfile(filename):
- remove(filename)
- else:
- shutil.rmtree(filename)
-
-
-def get_distribution(lines, key):
- '''creates CSV used for boxplotting'''
- output = []
- for line in lines:
- line = line.strip()
- fields = line.split('\t')
- if key == "Sequence_Length_Distribution":
- l = fields[0].split('-')
- try:
- mean = (int(l[0]) + int(l[1])) / 2
- except:
- mean = int(l[0])
- output.extend([str(mean)] * int(float(fields[1])))
-
- # sequence quality or sequence GC content
- if key == 'Per_sequence_GC_content' or key == "Per_sequence_quality_scores":
- v = fields[0] # quality or gc [%]
- output.extend(int(v) * [int(float(fields[1]))])
- return output
-
-def createCSV(dataset, lines, key, path, trimmed):
- '''creates CSV used for boxplotting'''
- #output = []
- splitted_ds=dataset.split("_")
-
- csvhandle = open(path + '/' + key + ".csv", 'a')
- for line in lines:
- line = line.strip()
- fields = line.split('\t')
- #Sequence Length Distribution
- if key == "Sequence_Length_Distribution":
- l = fields[0].split('-')
- try:
- value = (int(l[0]) + int(l[1])) / 2
- except:
- value = int(l[0])
- count = int(float(fields[1]))
-
- #sequence quality or sequence GC content
- if key == 'Per_sequence_quality_scores' or key == 'Per_sequence_GC_content':
- value = fields[0] #quality or gc [%]
- count = int(float(fields[1]))
- try:
- csvhandle.write(",".join(["_".join(splitted_ds[:-4]), splitted_ds[-3], splitted_ds[-4],trimmed, str(value), str(count)])+"\n")
- except:
- csvhandle.write(",".join([dataset.replace("_fastqc",""), "NA", "NA" , trimmed, str(value), str(count)]) + "\n")
- csvhandle.close()
-
-def write_fastqc_summary(fastqcdir, output):
- store = False
- data = []
- fastqcdatafile = open(join(fastqcdir,"fastqc_data.txt"), "r")
- for line in iter(fastqcdatafile):
- if line.startswith("#"):
- pass # print("comment")
- # continue
- elif line.startswith('>>END_MODULE'):
- if len(data) > 0:
- name1 = basename(fastqcdir)
- if "Trimmed" in fastqcdir:
- # name1 = "Trimmed " + name1
- trimmed = "Trimmed"
- else:
- trimmed = "Raw"
- createCSV(name1, data[2:], key, output, trimmed)
- data = []
- store = False
- elif line.startswith('>>'):
- key = line.split("\t")[0]
- key = key.replace(">>","")
- key = key.replace(" ","_")
- if key in ['Sequence_Length_Distribution', 'Per_sequence_quality_scores','Per_sequence_GC_content']:
- store = True
- if store:
- data.extend([line])
- fastqcdatafile.close()
-
-def write_SAV(folder, output):
- print("Plot SAV images...")
- process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(dirname(__file__), "sav.R"),
- folder, join(output,"img")]),
- stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
- for line in iter(process.stderr.readline, b''):
- print(line)
- tree = ET.parse(join(folder,'RunInfo.xml'))
- root = tree.getroot()
- runinfo = OrderedDict()
-
- for child in root.iter("Run"):
- for grand in child.getchildren():
-
- if len(grand.attrib) != 0:
- runinfo[grand.tag] = grand.attrib
-
- if len(grand.getchildren()) !=0:
- for grandgrand in grand:
- if len(grandgrand.attrib) != 0:
- try:
- runinfo[grand.tag].append(grandgrand.attrib)
- except:
- runinfo[grand.tag] = [grandgrand.attrib]
-
- if grandgrand.text is not None:
- if grandgrand.text.strip() != "":
- if grand.tag not in runinfo.keys():
- runinfo[grand.tag]={}
- try:
- runinfo[grand.tag][grandgrand.tag].append(grandgrand.text)
- except:
- runinfo[grand.tag][grandgrand.tag] = [grandgrand.text]
-
- if grand.text is not None:
- if grand.text.strip()!="":
- if len(grand.attrib) != 0:
- runinfo[grand.tag][grand.tag] = grand.text
- else:
- runinfo[grand.tag] = grand.text
-
- runparameter_xml = ET.parse(join(folder, 'RunParameters.xml'))
- runparameters_unique = ["ExperimentName", "FPGAVersion", "CPLDVersion", "MCSVersion","RTAVersion", "CameraFirmware", "CameraDriver", "ClusteringChoice" ,"RapidRunChemistry", "RunMode", "ApplicationName", "ApplicationVersion"]
- runparameters_path = ["Setup/ReagentKits/Sbs/SbsReagentKit/ID","Setup/ReagentKits/Index/ReagentKit/ID","Setup/Sbs","Setup/Rehyb", "PR2BottleRFIDTag/PartNumber","FlowcellRFIDTag/PartNumber","ReagentKitRFIDTag/PartNumber"]
- root = runparameter_xml.getroot()
- runparameter = OrderedDict()
- for label in runparameters_unique:
- for i in root.iter(label):
- runparameter[label] = i.text
- #print(label, ": ", i.text)
-
- for label_path in runparameters_path:
- label_path = label_path.split("/")
- element = root.find(label_path[0])
- for label in label_path[1:]:
- if element is None:
- break
- element = element.find(label)
- if element is not None:
- runparameter["/".join(label_path)] = element.text
- #print(label_path, element.text)
- else:
- print(label_path, " does not exist.")
- runparameter.update(runinfo)
- json.dump({"tables":runparameter}, open(join(output, "sav.json"),"w"))
-
-'''def boxplot(data, title,output):
- bplotfont = {"weight": "bold", "size": 8}
- plt.rc("font", **bplotfont)
- xclasses = range(1, len(data) + 1)
- width = 4.5
- height = 3.5
- if (len(data) / 3) > width:
- width = len(data) / 3
- height = len(data) / 4
- # max width
- if width > 12:
- width = 12
- height = 7
- bplotfont = {"weight": "bold", "size": 8}
- plt.rc("font", **bplotfont)
-
- fig = plt.figure(1, figsize=(width, height))
- ax = fig.add_subplot(111)
- bp = ax.boxplot(data, notch=True, patch_artist=True)
-
- colors = ["cyan", "lightblue", "lightgreen", "tan", "pink"]
- for patch, color in zip(bp["boxes"], colors):
- patch.set_facecolor(color)
-
- # set props
- #whisker = {"color":"#b4b4b4", "linestyle":"-", "linewidth":0.75}
- #caps = {"color":"#b4b4b4", "linewidth":1}
- #flier = {"marker":"o", "lw":0, "color":"#e6e6e6", "markersize":0.5}
- #box = {"color":"#385d8a", "linewidth":0.75}"""
- #bp = ax.boxplot(data)# , capsprops = caps , flierprops = flier, boxprops =box)
-
- for box in bp["boxes"]:
- # outline color
- box.set(color="#385d8a", linewidth=0.75)
-
- for whisker in bp["whiskers"]:
- whisker.set(color="#b4b4b4", linestyle="-", linewidth=0.75)
-
- for cap in bp["caps"]:
- cap.set(color="#b4b4b4", linewidth=1)
-
- for median in bp["medians"]:
- median.set(color="#c0504d", linewidth=1)
-
- for flier in bp["fliers"]:
- flier.set(marker="o", lw=0, color="#e6e6e6", markersize=0.5)"""
- plt.title(title)
- plt.xticks(xclasses, xlabels, rotation=90)
- fig.savefig(output, bbox_inches="tight", dpi=200)
- fig.clf()
-'''
+__author__ = 'LieuV'
+
+from os.path import basename, splitext, join, isfile, dirname, abspath, exists
+from os import makedirs, listdir, remove
+import subprocess
+import re, sys, numpy, shutil
+import xml.etree.ElementTree as ET
+from collections import OrderedDict
+import json
+
+def getDir(args, getAbs = False):
+ if isfile(args[0]):
+ args[0]= dirname(args[0])
+ path = join(*args)
+ if(getAbs):
+ path = abspath(path)
+ makedirs(path, exist_ok=True)
+ return path
+
+def getFilenameWithoutExtension(string, getBase = False):
+ ''' In case of double extension '''
+ if getBase:
+ string = basename(string)
+ string = splitext(string)[0]
+ i = 0
+ while splitext(string)[-1] in [".gz", ".gzip", ".zip", ".bz", ".fasta", ".fastq", ".bam"]:
+ string = splitext(string)[0]
+ return string
+
+def bam_to_fastq(bamfile, output):
+ print("Convert bam to fastq...", bamfile)
+ outfile = join(output, getFilenameWithoutExtension(bamfile, getBase= True) +".fastq")
+ subprocess.Popen("bamtools convert -format fastq -in " + bamfile + " -out " + outfile, shell = True)
+ return outfile
+
+def toLatex(text, newline = False):
+ text = text.replace("#","\#")
+ text = text.replace("_", "\_")
+ text = text.replace("\t", " ")
+ text = text.replace("%", "\%")
+
+ if newline:
+ text = text.replace("\n", " ")
+ return text
+
+def join_reads(read_list, outputDir, outputName):
+ if len(read_list) ==0:
+ return None
+ elif len(read_list) ==1:
+ return read_list[0]
+ outfile = join(outputDir, getFilenameWithoutExtension(outputName) )
+ outfile += splitext(read_list[0])[-1]
+ #if read_list[0].endswith(".gz"):
+ print("Concatenate files. This may take a while." ," ".join(read_list) + " > " + outfile)
+ process = subprocess.call("cat " + " ".join(read_list) + " > " + outfile, shell=True)
+ #else:
+ # print("Decompress and concatenate files. This may take a while.")
+ # process = subprocess.call("gzip --fast -c " + " ".join(read_list) + " > " + outfile, shell=True)
+ if process != 0:
+ sys.exit("Error with gzip")
+ return outfile
+
+def trimmomatic_report(process, type, write = True):
+ logs =""
+ survived = 0
+ total = 0
+ pTrimmedReads = 0
+ nTrimmedReads = 0
+ for line in iter(process.stdout.readline, b''):
+ print(line.decode("utf-8"))
+ for line in iter(process.stderr.readline, b''):
+ line = line.decode("utf-8")
+ print(line)
+ if not (line.startswith("Using Long Clipping Sequence") or line.startswith("Trimmomatic")):
+ if write:
+ logs += toLatex(line) + "\n"
+ if re.match("Input Read", line):
+ if re.search("TrimmomaticPE", type):
+ pattern = re.match("Input Read Pairs:\s+(?P<total>\d+)\s+"
+ "Both Surviving:\s+(?P<nSurvived>\d+) \((?P<pSurvived>\d+.\d+)%\)\s+"
+ "Forward Only Surviving:\s+(?P<forward>\d+) \(\d+.\d+%\)\s+"
+ "Reverse Only Surviving:\s+(?P<reverse>\d+) \(\d+.\d+%\).*", line)
+
+ survived = float(pattern.group("pSurvived").replace(",", "."))
+ total = int(pattern.group("total")) * 2
+ nTrimmedReads = int(pattern.group("nSurvived"))*2 + int(pattern.group("forward")) + int(pattern.group("reverse"))
+ pTrimmedReads = round(100 * nTrimmedReads/total, 2)
+ else:
+ pattern = re.match(".*Surviving: (?P<nSurvived>\d+) \((?P<survived>\d+.\d+)%\)", line)
+ pattern2 = re.match("Input Reads: (?P<total>\d+)", line)
+
+ survived = float(pattern.group("survived").replace(",","."))
+ total = int(pattern2.group("total"))
+ nTrimmedReads = int(pattern.group("nSurvived"))
+ pTrimmedReads = survived
+
+ return survived, total, nTrimmedReads, pTrimmedReads, logs
+
+def perc_slope(a,b,perc):
+ if abs(numpy.diff([a,b]))>(b*float(perc)):
+ return True
+ return False
+
+def optimize_trimming(fastqc_folder,perc=0.1):
+ mytable=None
+ filename = join(fastqc_folder, "fastqc_data.txt")
+ with open(filename, "rb") as fastqcdata:
+ table = []
+ ifwrite = False
+ for line in fastqcdata.readlines():
+ line = line.decode()
+ line = line.replace("\n", "")
+ if line.startswith(">>END_MODULE") and ifwrite:
+ try:
+ dtype = {'names': table[0], 'formats': ['|S15', float, float, float, float]}
+ mytable = numpy.asarray([tuple(x) for x in table[1:]], dtype=dtype)
+ break
+ except:
+ pass
+ elif line.startswith("Sequence length"):
+ seq_length = int(line.split("\t")[-1].split("-")[-1])
+ elif line.startswith(">>Per base sequence content"):
+ ifwrite = True
+ temp = line.split("\t")
+
+ if temp[1].lower() == "pass":
+ return False, 0, seq_length #break
+ elif ifwrite:
+ table.append(line.split("\t"))
+
+ headcrop = 0
+ tailcrop = len(mytable["A"])
+ trim_bool = False
+ column = numpy.ma.array(mytable)
+ for i in range(-4, int(round(len(mytable["A"]) / 3, 0)), 1):
+ for nucl in ["A", "C", "G", "T"]:
+ column[nucl].mask[max(i, 0):i + 5] = True
+ if headcrop >0:
+ column[nucl].mask[:headcrop] = True
+ if tailcrop < len(mytable["A"]):
+ column[nucl].mask[tailcrop:] = True
+
+ # check heacrop
+ if (perc_slope(numpy.mean(mytable[nucl][max(i, 0):i + 5]), numpy.mean(column[nucl]), perc=perc)) & (headcrop < (i + 5)):
+ headcrop = i + 5
+ trim_bool = True
+ elif headcrop < i:
+ column[nucl].mask[max(i, 0):i + 5] = False
+
+ # now crop from the end
+ column[nucl].mask[-(i + 5):(min(len(mytable[nucl]), len(mytable[nucl]) - i))] = True
+ if (perc_slope(numpy.mean(mytable[nucl][-(i + 6): (min(len(mytable[nucl]) - 1, len(mytable[nucl]) - 1 - i))]),numpy.mean(column[nucl]), perc=perc)) & (tailcrop > len(mytable[nucl]) - (i + 5)):
+ # if perc_slope(numpy.var(mytable[nucl][-(i+6): (min(len(mytable["A"])-1, len(mytable[nucl])-1-i))]), numpy.mean(numpy.var(column)), perc=perc):
+ print("TAILCROP", tailcrop)
+ tailcrop = len(mytable[nucl]) - (i + 5)
+ trim_bool = True
+ else:
+ column[nucl].mask[-(i + 5): (min(len(mytable["A"]) - 1, len(mytable[nucl]) - 1 - i))] = False
+
+ return trim_bool, headcrop, tailcrop-headcrop
+
+def str_to_json(report_str):
+ closings = []
+ json_file = ""
+ report_str = report_str.split("\n")
+ pre_depth = -1
+ for row in report_str:
+ nNode = 0
+ if row != "":
+ row = row.split("\t")
+ if row[0].strip() != "0.00" and row[5] != "unclassified":
+ temp = len(row[5])
+ name = row[5].strip(" ")
+ curr_depth = int((temp - len(name)) / 2)
+
+ if pre_depth < curr_depth and curr_depth > 0:
+ for i in range(curr_depth - pre_depth):
+ if json_file.endswith('"children":[ '):
+ if json_file.endswith("}") or json_file.endswith("]"):
+ json_file += ","
+ json_file += '{"children":[ '
+ closings.append("]")
+ closings.append("}")
+ else:
+ json_file += ',"children":[ '
+ closings.append("]")
+ elif pre_depth > curr_depth:
+ while pre_depth >= curr_depth:
+ close_with = closings.pop()
+ json_file += close_with # end node
+ if close_with == "]":
+ close_with = closings.pop()
+ json_file += close_with # end node
+ pre_depth -= 1
+ elif pre_depth != -1:
+ json_file += closings.pop() + ','
+ if json_file.endswith("}")or json_file.endswith("]"):
+ json_file+=","
+ json_file += '{"name":"%s",' % name
+ json_file += '"perc":%s,' % row[0]
+ json_file += '"taxa":"%s"' % row[3]
+ closings.append("}")
+ nNode += 1
+ pre_depth = curr_depth
+ while len(closings) > 0:
+ json_file += closings.pop() # end node
+ return json_file
+
+def delete_files(path, extended="", pattern="_fastqc"):
+ for f in listdir(join(path, extended)):
+ filename =join(path,extended,f)
+ if f.endswith(pattern):
+ if isfile(filename):
+ remove(filename)
+ else:
+ shutil.rmtree(filename)
+
+
+def get_distribution(lines, key):
+ '''creates CSV used for boxplotting'''
+ output = []
+ for line in lines:
+ line = line.strip()
+ fields = line.split('\t')
+ if key == "Sequence_Length_Distribution":
+ l = fields[0].split('-')
+ try:
+ mean = (int(l[0]) + int(l[1])) / 2
+ except:
+ mean = int(l[0])
+ output.extend([str(mean)] * int(float(fields[1])))
+
+ # sequence quality or sequence GC content
+ if key == 'Per_sequence_GC_content' or key == "Per_sequence_quality_scores":
+ v = fields[0] # quality or gc [%]
+ output.extend(int(v) * [int(float(fields[1]))])
+ return output
+
+def createCSV(dataset, lines, key, path, trimmed):
+ '''creates CSV used for boxplotting'''
+ #output = []
+ splitted_ds=dataset.split("_")
+
+ csvhandle = open(path + '/' + key + ".csv", 'a')
+ for line in lines:
+ line = line.strip()
+ fields = line.split('\t')
+ #Sequence Length Distribution
+ if key == "Sequence_Length_Distribution":
+ l = fields[0].split('-')
+ try:
+ value = (int(l[0]) + int(l[1])) / 2
+ except:
+ value = int(l[0])
+ count = int(float(fields[1]))
+
+ #sequence quality or sequence GC content
+ if key == 'Per_sequence_quality_scores' or key == 'Per_sequence_GC_content':
+ value = fields[0] #quality or gc [%]
+ count = int(float(fields[1]))
+ try:
+ if splitted_ds[-2] == "R1" or splitted_ds[-2] == "R2":
+ # print(splitted_ds)
+ csvhandle.write(",".join(
+ ["_".join(splitted_ds[:-3]), splitted_ds[-2], splitted_ds[-3], trimmed, str(value),
+ str(count)]) + "\n")
+ else:
+ csvhandle.write(",".join(
+ ["_".join(splitted_ds[:-4]), splitted_ds[-3], splitted_ds[-4], trimmed, str(value),
+ str(count)]) + "\n")
+ except:
+ csvhandle.write(",".join([dataset.replace("_fastqc",""), "NA", "NA" , trimmed, str(value), str(count)]) + "\n")
+ csvhandle.close()
+
+def write_fastqc_summary(fastqcdir, output):
+ store = False
+ data = []
+ fastqcdatafile = open(join(fastqcdir,"fastqc_data.txt"), "r")
+ for line in iter(fastqcdatafile):
+ if line.startswith("#"):
+ pass # print("comment")
+ # continue
+ elif line.startswith('>>END_MODULE'):
+ if len(data) > 0:
+ name1 = basename(fastqcdir)
+ if "Trimmed" in fastqcdir:
+ # name1 = "Trimmed " + name1
+ trimmed = "Trimmed"
+ else:
+ trimmed = "Raw"
+ createCSV(name1, data[2:], key, output, trimmed)
+ data = []
+ store = False
+ elif line.startswith('>>'):
+ key = line.split("\t")[0]
+ key = key.replace(">>","")
+ key = key.replace(" ","_")
+ if key in ['Sequence_Length_Distribution', 'Per_sequence_quality_scores','Per_sequence_GC_content']:
+ store = True
+ if store:
+ data.extend([line])
+ fastqcdatafile.close()
+
+def write_SAV(folder, output):
+ print("Plot SAV images...")
+ process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(dirname(__file__), "sav.R"),
+ folder, join(output,"img")]),
+ stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+ for line in iter(process.stderr.readline, b''):
+ print(line)
+ tree = ET.parse(join(folder,'RunInfo.xml'))
+ root = tree.getroot()
+ runinfo = OrderedDict()
+
+ for child in root.iter("Run"):
+ for grand in child.getchildren():
+
+ if len(grand.attrib) != 0:
+ runinfo[grand.tag] = grand.attrib
+
+ if len(grand.getchildren()) !=0:
+ for grandgrand in grand:
+ if len(grandgrand.attrib) != 0:
+ try:
+ runinfo[grand.tag].append(grandgrand.attrib)
+ except:
+ runinfo[grand.tag] = [grandgrand.attrib]
+
+ if grandgrand.text is not None:
+ if grandgrand.text.strip() != "":
+ if grand.tag not in runinfo.keys():
+ runinfo[grand.tag]={}
+ try:
+ runinfo[grand.tag][grandgrand.tag].append(grandgrand.text)
+ except:
+ runinfo[grand.tag][grandgrand.tag] = [grandgrand.text]
+
+ if grand.text is not None:
+ if grand.text.strip()!="":
+ if len(grand.attrib) != 0:
+ runinfo[grand.tag][grand.tag] = grand.text
+ else:
+ runinfo[grand.tag] = grand.text
+
+ if exists(join(folder, 'RunParameters.xml')):
+ runparameter_xml = ET.parse(join(folder, 'RunParameters.xml'))
+ elif exists(join(folder, 'runParameters.xml')):
+ runparameter_xml = ET.parse(join(folder, 'runParameters.xml'))
+ else:
+ sys.exit("No RunParameter.xml found.")
+
+ runparameters_unique = ["ExperimentName", "FPGAVersion", "CPLDVersion", "MCSVersion","RTAVersion", "CameraFirmware", "CameraDriver", "ClusteringChoice" ,"RapidRunChemistry", "RunMode", "ApplicationName", "ApplicationVersion"]
+ runparameters_path = ["Setup/ReagentKits/Sbs/SbsReagentKit/ID","Setup/ReagentKits/Index/ReagentKit/ID","Setup/Sbs","Setup/Rehyb", "PR2BottleRFIDTag/PartNumber","FlowcellRFIDTag/PartNumber","ReagentKitRFIDTag/PartNumber"]
+ root = runparameter_xml.getroot()
+ runparameter = OrderedDict()
+ for label in runparameters_unique:
+ for i in root.iter(label):
+ runparameter[label] = i.text
+ #print(label, ": ", i.text)
+
+ for label_path in runparameters_path:
+ label_path = label_path.split("/")
+ element = root.find(label_path[0])
+ for label in label_path[1:]:
+ if element is None:
+ break
+ element = element.find(label)
+ if element is not None:
+ runparameter["/".join(label_path)] = element.text
+ #print(label_path, element.text)
+ else:
+ print(label_path, " does not exist.")
+ runparameter.update(runinfo)
+ json.dump({"tables":runparameter}, open(join(output, "sav.json"),"w"))
\ No newline at end of file
diff --git a/lib/box.js b/lib/box.js
index 471bb8e..812fa94 100755
--- a/lib/box.js
+++ b/lib/box.js
@@ -1,320 +1,320 @@
-(function() {
-
-// Inspired by http://informationandvisualization.de/blog/box-plot
-d3.box = function() {
- var width = 1,
- height = 1,
- duration = 0,
- domain = null,
- value = Number,
- whiskers = boxWhiskers,
- quartiles = boxQuartiles,
- showLabels = true, // whether or not to show text labels
- numBars = 4,
- curBar = 1,
- tickFormat = null;
-
- // For each small multiple…
- function box(g) {
- g.each(function(data, i) {
- //d = d.map(value).sort(d3.ascending);
- //var boxIndex = data[0];
- //var boxIndex = 1;
- var d = data[1].sort(d3.ascending);
-
- // console.log(boxIndex);
- //console.log(d);
-
- var g = d3.select(this),
- n = d.length,
- min = d[0],
- max = d[n - 1];
-
- // Compute quartiles. Must return exactly 3 elements.
- var quartileData = d.quartiles = quartiles(d);
-
- // Compute whiskers. Must return exactly 2 elements, or null.
- var whiskerIndices = whiskers && whiskers.call(this, d, i),
- whiskerData = whiskerIndices && whiskerIndices.map(function(i) { return d[i]; });
-
- // Compute outliers. If no whiskers are specified, all data are "outliers".
- // We compute the outliers as indices, so that we can join across transitions!
- var outlierIndices = whiskerIndices
- ? d3.range(0, whiskerIndices[0]).concat(d3.range(whiskerIndices[1] + 1, n))
- : d3.range(n);
-
- // Compute the new x-scale.
- var x1 = d3.scale.linear()
- .domain(domain && domain.call(this, d, i) || [min, max])
- .range([height, 0]);
-
- // Retrieve the old x-scale, if this is an update.
- var x0 = this.__chart__ || d3.scale.linear()
- .domain([0, Infinity])
- // .domain([0, max])
- .range(x1.range());
-
- // Stash the new scale.
- this.__chart__ = x1;
-
- // Note: the box, median, and box tick elements are fixed in number,
- // so we only have to handle enter and update. In contrast, the outliers
- // and other elements are variable, so we need to exit them! Variable
- // elements also fade in and out.
-
- // Update center line: the vertical line spanning the whiskers.
- var center = g.selectAll("line.center")
- .data(whiskerData ? [whiskerData] : []);
-
- //vertical line
- center.enter().insert("line", "rect")
- .attr("class", "center")
- .attr("x1", width / 2)
- .attr("y1", function(d) { return x0(d[0]); })
- .attr("x2", width / 2)
- .attr("y2", function(d) { return x0(d[1]); })
- .style("opacity", 1e-6)
- .transition()
- .duration(duration)
- .style("opacity", 1)
- .attr("y1", function(d) { return x1(d[0]); })
- .attr("y2", function(d) { return x1(d[1]); });
-
- center.transition()
- .duration(duration)
- .style("opacity", 1)
- .attr("y1", function(d) { return x1(d[0]); })
- .attr("y2", function(d) { return x1(d[1]); });
-
- center.exit().transition()
- .duration(duration)
- .style("opacity", 1e-6)
- .attr("y1", function(d) { return x1(d[0]); })
- .attr("y2", function(d) { return x1(d[1]); })
- .remove();
-
- // Update innerquartile box.
- var box = g.selectAll("rect.box")
- .data([quartileData]);
-
- box.enter().append("rect")
- .attr("class", "box")
- .attr("x", 0)
- .attr("y", function(d) { return x0(d[2]); })
- .attr("width", width)
- .attr("height", function(d) { return x0(d[0]) - x0(d[2]); })
- .transition()
- .duration(duration)
- .attr("y", function(d) { return x1(d[2]); })
- .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
-
- box.transition()
- .duration(duration)
- .attr("y", function(d) { return x1(d[2]); })
- .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
-
- // Update median line.
- var medianLine = g.selectAll("line.median")
- .data([quartileData[1]]);
-
- medianLine.enter().append("line")
- .attr("class", "median")
- .attr("x1", 0)
- .attr("y1", x0)
- .attr("x2", width)
- .attr("y2", x0)
- .transition()
- .duration(duration)
- .attr("y1", x1)
- .attr("y2", x1);
-
- medianLine.transition()
- .duration(duration)
- .attr("y1", x1)
- .attr("y2", x1);
-
- // Update whiskers.
- var whisker = g.selectAll("line.whisker")
- .data(whiskerData || []);
-
- whisker.enter().insert("line", "circle, text")
- .attr("class", "whisker")
- .attr("x1", 0)
- .attr("y1", x0)
- .attr("x2", 0 + width)
- .attr("y2", x0)
- .style("opacity", 1e-6)
- .transition()
- .duration(duration)
- .attr("y1", x1)
- .attr("y2", x1)
- .style("opacity", 1);
-
- whisker.transition()
- .duration(duration)
- .attr("y1", x1)
- .attr("y2", x1)
- .style("opacity", 1);
-
- whisker.exit().transition()
- .duration(duration)
- .attr("y1", x1)
- .attr("y2", x1)
- .style("opacity", 1e-6)
- .remove();
-
- // Update outliers.
- var outlier = g.selectAll("circle.outlier")
- .data(outlierIndices, Number);
-
- outlier.enter().insert("circle", "text")
- .attr("class", "outlier")
- .attr("r", 5)
- .attr("cx", width / 2)
- .attr("cy", function(i) { return x0(d[i]); })
- .style("opacity", 1e-6)
- .transition()
- .duration(duration)
- .attr("cy", function(i) { return x1(d[i]); })
- .style("opacity", 1);
-
- outlier.transition()
- .duration(duration)
- .attr("cy", function(i) { return x1(d[i]); })
- .style("opacity", 1);
-
- outlier.exit().transition()
- .duration(duration)
- .attr("cy", function(i) { return x1(d[i]); })
- .style("opacity", 1e-6)
- .remove();
-
- // Compute the tick format.
- var format = tickFormat || x1.tickFormat(8);
-
- // Update box ticks.
- var boxTick = g.selectAll("text.box")
- .data(quartileData);
- if(showLabels == true) {
- boxTick.enter().append("text")
- .attr("class", "box")
- .attr("dy", ".3em")
- .attr("dx", function(d, i) { return i & 1 ? 6 : -6 })
- .attr("x", function(d, i) { return i & 1 ? + width : 0 })
- .attr("y", x0)
- .attr("text-anchor", function(d, i) { return i & 1 ? "start" : "end"; })
- .text(format)
- .transition()
- .duration(duration)
- .attr("y", x1);
- }
-
- boxTick.transition()
- .duration(duration)
- .text(format)
- .attr("y", x1);
-
- // Update whisker ticks. These are handled separately from the box
- // ticks because they may or may not exist, and we want don't want
- // to join box ticks pre-transition with whisker ticks post-.
- var whiskerTick = g.selectAll("text.whisker")
- .data(whiskerData || []);
- if(showLabels == true) {
- whiskerTick.enter().append("text")
- .attr("class", "whisker")
- .attr("dy", ".3em")
- .attr("dx", 6)
- .attr("x", width)
- .attr("y", x0)
- .text(format)
- .style("opacity", 1e-6)
- .transition()
- .duration(duration)
- .attr("y", x1)
- .style("opacity", 1);
- }
- whiskerTick.transition()
- .duration(duration)
- .text(format)
- .attr("y", x1)
- .style("opacity", 1);
-
- whiskerTick.exit().transition()
- .duration(duration)
- .attr("y", x1)
- .style("opacity", 1e-6)
- .remove();
- });
- d3.timer.flush();
- }
-
- box.width = function(x) {
- if (!arguments.length) return width;
- width = x;
- return box;
- };
-
- box.height = function(x) {
- if (!arguments.length) return height;
- height = x;
- return box;
- };
-
- box.tickFormat = function(x) {
- if (!arguments.length) return tickFormat;
- tickFormat = x;
- return box;
- };
-
- box.duration = function(x) {
- if (!arguments.length) return duration;
- duration = x;
- return box;
- };
-
- box.domain = function(x) {
- if (!arguments.length) return domain;
- domain = x == null ? x : d3.functor(x);
- return box;
- };
-
- box.value = function(x) {
- if (!arguments.length) return value;
- value = x;
- return box;
- };
-
- box.whiskers = function(x) {
- if (!arguments.length) return whiskers;
- whiskers = x;
- return box;
- };
-
- box.showLabels = function(x) {
- if (!arguments.length) return showLabels;
- showLabels = x;
- return box;
- };
-
- box.quartiles = function(x) {
- if (!arguments.length) return quartiles;
- quartiles = x;
- return box;
- };
-
- return box;
-};
-
-function boxWhiskers(d) {
- return [0, d.length - 1];
-}
-
-function boxQuartiles(d) {
- return [
- d3.quantile(d, .25),
- d3.quantile(d, .5),
- d3.quantile(d, .75)
- ];
-}
-
+(function() {
+
+// Inspired by http://informationandvisualization.de/blog/box-plot
+d3.box = function() {
+ var width = 1,
+ height = 1,
+ duration = 0,
+ domain = null,
+ value = Number,
+ whiskers = boxWhiskers,
+ quartiles = boxQuartiles,
+ showLabels = true, // whether or not to show text labels
+ numBars = 4,
+ curBar = 1,
+ tickFormat = null;
+
+ // For each small multiple…
+ function box(g) {
+ g.each(function(data, i) {
+ //d = d.map(value).sort(d3.ascending);
+ //var boxIndex = data[0];
+ //var boxIndex = 1;
+ var d = data[1].sort(d3.ascending);
+
+ // console.log(boxIndex);
+ //console.log(d);
+
+ var g = d3.select(this),
+ n = d.length,
+ min = d[0],
+ max = d[n - 1];
+
+ // Compute quartiles. Must return exactly 3 elements.
+ var quartileData = d.quartiles = quartiles(d);
+
+ // Compute whiskers. Must return exactly 2 elements, or null.
+ var whiskerIndices = whiskers && whiskers.call(this, d, i),
+ whiskerData = whiskerIndices && whiskerIndices.map(function(i) { return d[i]; });
+
+ // Compute outliers. If no whiskers are specified, all data are "outliers".
+ // We compute the outliers as indices, so that we can join across transitions!
+ var outlierIndices = whiskerIndices
+ ? d3.range(0, whiskerIndices[0]).concat(d3.range(whiskerIndices[1] + 1, n))
+ : d3.range(n);
+
+ // Compute the new x-scale.
+ var x1 = d3.scale.linear()
+ .domain(domain && domain.call(this, d, i) || [min, max])
+ .range([height, 0]);
+
+ // Retrieve the old x-scale, if this is an update.
+ var x0 = this.__chart__ || d3.scale.linear()
+ .domain([0, Infinity])
+ // .domain([0, max])
+ .range(x1.range());
+
+ // Stash the new scale.
+ this.__chart__ = x1;
+
+ // Note: the box, median, and box tick elements are fixed in number,
+ // so we only have to handle enter and update. In contrast, the outliers
+ // and other elements are variable, so we need to exit them! Variable
+ // elements also fade in and out.
+
+ // Update center line: the vertical line spanning the whiskers.
+ var center = g.selectAll("line.center")
+ .data(whiskerData ? [whiskerData] : []);
+
+ //vertical line
+ center.enter().insert("line", "rect")
+ .attr("class", "center")
+ .attr("x1", width / 2)
+ .attr("y1", function(d) { return x0(d[0]); })
+ .attr("x2", width / 2)
+ .attr("y2", function(d) { return x0(d[1]); })
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .style("opacity", 1)
+ .attr("y1", function(d) { return x1(d[0]); })
+ .attr("y2", function(d) { return x1(d[1]); });
+
+ center.transition()
+ .duration(duration)
+ .style("opacity", 1)
+ .attr("y1", function(d) { return x1(d[0]); })
+ .attr("y2", function(d) { return x1(d[1]); });
+
+ center.exit().transition()
+ .duration(duration)
+ .style("opacity", 1e-6)
+ .attr("y1", function(d) { return x1(d[0]); })
+ .attr("y2", function(d) { return x1(d[1]); })
+ .remove();
+
+ // Update innerquartile box.
+ var box = g.selectAll("rect.box")
+ .data([quartileData]);
+
+ box.enter().append("rect")
+ .attr("class", "box")
+ .attr("x", 0)
+ .attr("y", function(d) { return x0(d[2]); })
+ .attr("width", width)
+ .attr("height", function(d) { return x0(d[0]) - x0(d[2]); })
+ .transition()
+ .duration(duration)
+ .attr("y", function(d) { return x1(d[2]); })
+ .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
+
+ box.transition()
+ .duration(duration)
+ .attr("y", function(d) { return x1(d[2]); })
+ .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
+
+ // Update median line.
+ var medianLine = g.selectAll("line.median")
+ .data([quartileData[1]]);
+
+ medianLine.enter().append("line")
+ .attr("class", "median")
+ .attr("x1", 0)
+ .attr("y1", x0)
+ .attr("x2", width)
+ .attr("y2", x0)
+ .transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1);
+
+ medianLine.transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1);
+
+ // Update whiskers.
+ var whisker = g.selectAll("line.whisker")
+ .data(whiskerData || []);
+
+ whisker.enter().insert("line", "circle, text")
+ .attr("class", "whisker")
+ .attr("x1", 0)
+ .attr("y1", x0)
+ .attr("x2", 0 + width)
+ .attr("y2", x0)
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1)
+ .style("opacity", 1);
+
+ whisker.transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1)
+ .style("opacity", 1);
+
+ whisker.exit().transition()
+ .duration(duration)
+ .attr("y1", x1)
+ .attr("y2", x1)
+ .style("opacity", 1e-6)
+ .remove();
+
+ // Update outliers.
+ var outlier = g.selectAll("circle.outlier")
+ .data(outlierIndices, Number);
+
+ outlier.enter().insert("circle", "text")
+ .attr("class", "outlier")
+ .attr("r", 5)
+ .attr("cx", width / 2)
+ .attr("cy", function(i) { return x0(d[i]); })
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .attr("cy", function(i) { return x1(d[i]); })
+ .style("opacity", 1);
+
+ outlier.transition()
+ .duration(duration)
+ .attr("cy", function(i) { return x1(d[i]); })
+ .style("opacity", 1);
+
+ outlier.exit().transition()
+ .duration(duration)
+ .attr("cy", function(i) { return x1(d[i]); })
+ .style("opacity", 1e-6)
+ .remove();
+
+ // Compute the tick format.
+ var format = tickFormat || x1.tickFormat(8);
+
+ // Update box ticks.
+ var boxTick = g.selectAll("text.box")
+ .data(quartileData);
+ if(showLabels == true) {
+ boxTick.enter().append("text")
+ .attr("class", "box")
+ .attr("dy", ".3em")
+ .attr("dx", function(d, i) { return i & 1 ? 6 : -6 })
+ .attr("x", function(d, i) { return i & 1 ? + width : 0 })
+ .attr("y", x0)
+ .attr("text-anchor", function(d, i) { return i & 1 ? "start" : "end"; })
+ .text(format)
+ .transition()
+ .duration(duration)
+ .attr("y", x1);
+ }
+
+ boxTick.transition()
+ .duration(duration)
+ .text(format)
+ .attr("y", x1);
+
+ // Update whisker ticks. These are handled separately from the box
+ // ticks because they may or may not exist, and we want don't want
+ // to join box ticks pre-transition with whisker ticks post-.
+ var whiskerTick = g.selectAll("text.whisker")
+ .data(whiskerData || []);
+ if(showLabels == true) {
+ whiskerTick.enter().append("text")
+ .attr("class", "whisker")
+ .attr("dy", ".3em")
+ .attr("dx", 6)
+ .attr("x", width)
+ .attr("y", x0)
+ .text(format)
+ .style("opacity", 1e-6)
+ .transition()
+ .duration(duration)
+ .attr("y", x1)
+ .style("opacity", 1);
+ }
+ whiskerTick.transition()
+ .duration(duration)
+ .text(format)
+ .attr("y", x1)
+ .style("opacity", 1);
+
+ whiskerTick.exit().transition()
+ .duration(duration)
+ .attr("y", x1)
+ .style("opacity", 1e-6)
+ .remove();
+ });
+ d3.timer.flush();
+ }
+
+ box.width = function(x) {
+ if (!arguments.length) return width;
+ width = x;
+ return box;
+ };
+
+ box.height = function(x) {
+ if (!arguments.length) return height;
+ height = x;
+ return box;
+ };
+
+ box.tickFormat = function(x) {
+ if (!arguments.length) return tickFormat;
+ tickFormat = x;
+ return box;
+ };
+
+ box.duration = function(x) {
+ if (!arguments.length) return duration;
+ duration = x;
+ return box;
+ };
+
+ box.domain = function(x) {
+ if (!arguments.length) return domain;
+ domain = x == null ? x : d3.functor(x);
+ return box;
+ };
+
+ box.value = function(x) {
+ if (!arguments.length) return value;
+ value = x;
+ return box;
+ };
+
+ box.whiskers = function(x) {
+ if (!arguments.length) return whiskers;
+ whiskers = x;
+ return box;
+ };
+
+ box.showLabels = function(x) {
+ if (!arguments.length) return showLabels;
+ showLabels = x;
+ return box;
+ };
+
+ box.quartiles = function(x) {
+ if (!arguments.length) return quartiles;
+ quartiles = x;
+ return box;
+ };
+
+ return box;
+};
+
+function boxWhiskers(d) {
+ return [0, d.length - 1];
+}
+
+function boxQuartiles(d) {
+ return [
+ d3.quantile(d, .25),
+ d3.quantile(d, .5),
+ d3.quantile(d, .75)
+ ];
+}
+
})();
\ No newline at end of file
diff --git a/lib/qcstyle.css b/lib/qcstyle.css
index 4bad164..08c806e 100755
--- a/lib/qcstyle.css
+++ b/lib/qcstyle.css
@@ -1,84 +1,84 @@
- .row_hidden {
- display: none;
- }
- .panel {
- background-color:#F7F7F7;
-
- }
- .panel-heading{
- cursor:pointer;
- }
- .show_row {
- display: show;
- }
-
- p.inbox_text {
- text-align: center;
- color: grey;
- margin: 0 0 0px;
- }
-
- .thumbnail {
- margin-bottom: 5px;
- border: 0px solid #000;
- height: 150px;
- width: 200px;
- }
-
- .sample_box {
- display: table-cell;
- margin: 10px;
- padding: 10px;
- color: lightgrey;
- border-radius: 5px;
- box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
- }
-
- .sidebar {
- float: both;
- overflow-y: auto;
- position: fixed;
- height: 100%;
- top: 50px;
- z-index: 999;
- padding-right: 0px;
- box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
- background: #f7f7f7;
- }
- .navbar-default{
- background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
- }
- .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
- background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
-
- }
- .navbar-default .navbar-brand {
- color: white;
- }
- h4{
- color: #0A6780;
- }
- h5{
- color: #0D7793;
- }
- .node {
- cursor: pointer;
- }
-
- .node circle {
- fill: #fff;
- stroke: steelblue;
- stroke-width: 1.5px;
- }
-
- .node text {
- font: 10px sans-serif;
- }
-
- .link {
- fill: none;
- stroke: #ccc;
- stroke-width: 1.5px;
- }
- .table{
- margin-bottom: auto;}
+ .row_hidden {
+ display: none;
+ }
+ .panel {
+ background-color:#F7F7F7;
+
+ }
+ .panel-heading{
+ cursor:pointer;
+ }
+ .show_row {
+ display: show;
+ }
+
+ p.inbox_text {
+ text-align: center;
+ color: grey;
+ margin: 0 0 0px;
+ }
+
+ .thumbnail {
+ margin-bottom: 5px;
+ border: 0px solid #000;
+ height: 150px;
+ width: 200px;
+ }
+
+ .sample_box {
+ display: table-cell;
+ margin: 10px;
+ padding: 10px;
+ color: lightgrey;
+ border-radius: 5px;
+ box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
+ }
+
+ .sidebar {
+ float: both;
+ overflow-y: auto;
+ position: fixed;
+ height: 100%;
+ top: 50px;
+ z-index: 999;
+ padding-right: 0px;
+ box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
+ background: #f7f7f7;
+ }
+ .navbar-default{
+ background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
+ }
+ .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
+ background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
+
+ }
+ .navbar-default .navbar-brand {
+ color: white;
+ }
+ h4{
+ color: #0A6780;
+ }
+ h5{
+ color: #0D7793;
+ }
+ .node {
+ cursor: pointer;
+ }
+
+ .node circle {
+ fill: #fff;
+ stroke: steelblue;
+ stroke-width: 1.5px;
+ }
+
+ .node text {
+ font: 10px sans-serif;
+ }
+
+ .link {
+ fill: none;
+ stroke: #ccc;
+ stroke-width: 1.5px;
+ }
+ .table{
+ margin-bottom: auto;}
diff --git a/license.txt b/license.txt
new file mode 100755
index 0000000..b14ca0a
--- /dev/null
+++ b/license.txt
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/parameter.txt b/parameter.txt
index f3b5b6c..14aeca2 100755
--- a/parameter.txt
+++ b/parameter.txt
@@ -1,47 +1,47 @@
-[FastQC]
-Per base sequence quality = per_base_quality.png
-Per sequence quality scores = per_sequence_quality.png
-Per base sequence content = per_base_sequence_content.png
-Per base GC content = per_base_gc_content.png
-Per sequence GC content = per_sequence_gc_content.png
-Per base N content = per_base_n_content.png
-Sequence Length Distribution= sequence_length_distribution.png
-Sequence Duplication Levels = duplication_levels.png
-Kmer Content = kmer_profiles.png
-Per tile sequence quality = per_tile_quality.png
-Adapter Content = adapter_content.png
-
-[Trimmomatic]
-illuminaClip_seedMismatch = 2
-illuminaClip_simpleClip = 10
-leading = 3
-trailing = 3
-trimOption = SLIDINGWINDOW:4:20
-trimBetter_threshold = 0.15
-
-[forAssembly.Illumina]
-trimBetter_threshold = 0.1
-trimOption = SLIDINGWINDOW:4:25
-
-[forAssembly.IonTorrent]
-trimBetter_threshold = 0.2
-trimOption = SLIDINGWINDOW:4:15
-
-[forMapping.Illumina]
-trimBetter_threshold = 0.15
-trimOption = SLIDINGWINDOW:4:15
-
-[forMapping.IonTorrent]
-trimBetter_threshold = 0.25
-trimOption = SLIDINGWINDOW:4:15
-
-[Adapter]
-Illumina Universal Adapter = TruSeq2
-
-[Fileextension]
-fastq = .fastq.gz, .fastq, .fq, .fq.gz, .bam
-
-[Pattern]
-R1 = _R1_
-R2 = _R2_
+[FastQC]
+Per base sequence quality = per_base_quality.png
+Per sequence quality scores = per_sequence_quality.png
+Per base sequence content = per_base_sequence_content.png
+Per base GC content = per_base_gc_content.png
+Per sequence GC content = per_sequence_gc_content.png
+Per base N content = per_base_n_content.png
+Sequence Length Distribution= sequence_length_distribution.png
+Sequence Duplication Levels = duplication_levels.png
+Kmer Content = kmer_profiles.png
+Per tile sequence quality = per_tile_quality.png
+Adapter Content = adapter_content.png
+
+[Trimmomatic]
+illuminaClip_seedMismatch = 2
+illuminaClip_simpleClip = 10
+leading = 3
+trailing = 3
+trimOption = SLIDINGWINDOW:4:20
+trimBetter_threshold = 0.15
+
+[forAssembly.Illumina]
+trimBetter_threshold = 0.1
+trimOption = SLIDINGWINDOW:4:25
+
+[forAssembly.IonTorrent]
+trimBetter_threshold = 0.2
+trimOption = SLIDINGWINDOW:4:15
+
+[forMapping.Illumina]
+trimBetter_threshold = 0.15
+trimOption = SLIDINGWINDOW:4:15
+
+[forMapping.IonTorrent]
+trimBetter_threshold = 0.25
+trimOption = SLIDINGWINDOW:4:15
+
+[Adapter]
+Illumina Universal Adapter = TruSeq2
+
+[Fileextension]
+fastq = .fastq.gz, .fastq, .fq, .fq.gz, .bam
+
+[Pattern]
+R1 = _R1_
+R2 = _R2_
lane = _L\d{3}_
\ No newline at end of file
diff --git a/readme.md b/readme.md
index 3dd3605..59bb665 100755
--- a/readme.md
+++ b/readme.md
@@ -117,3 +117,21 @@ python3 QCumber.py -1 sample_R1.fastq -2 sample_R2.fastq -technology Illumina -a
```
python3 QCumber.py -input myProjectFolder/ -technology IonTorrent -r myReference.fasta
```
+
+-------
+License
+-------
+Copyright (C) 2017 Vivi Hue-Trang Lieu
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License, version 3
+as published by the Free Software Foundation.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this program. If not, see
+<http://www.gnu.org/licenses/>.
\ No newline at end of file
diff --git a/report.tex b/report.tex
index ea7f2b7..e687aa0 100755
--- a/report.tex
+++ b/report.tex
@@ -1,184 +1,184 @@
-\documentclass[a4paper]{article}
-\usepackage[document]{ragged2e}
-\usepackage{python}
-\usepackage[english]{babel}
-\usepackage{graphicx}
-\usepackage[utf8]{inputenc}
-\usepackage{xcolor}
-\usepackage[T1]{fontenc}
-\renewcommand*\familydefault{\ttdefault} %% Only if the base font of the document is to be typewriter style
-\usepackage{subfigure}
-\usepackage{geometry}
-\usepackage{tcolorbox}
-\usepackage{adjustbox}
-\usepackage{setspace}
-\usepackage{float}
-\usepackage{url}
-\usepackage{grffile}
-
-\geometry{a4paper,left=25mm,right=20mm,
- top=12mm,
- bottom=20mm}
-
-\setlength\parindent{0pt}
-
-\tolerance=1
-\emergencystretch=\maxdimen
-\hyphenpenalty=1000
-\hbadness=1000
-
-\begin{document}
-
-{\bf {\LARGE{ {{pipeline.name}} } Version {{pipeline.version}} } }\\
-\line(1,0){ \textwidth }
-
-\begin{tabular}{p{0.25\textwidth} p{0.75\textwidth}}
-
-Executor: & {{pipeline.user}} \\
-Dataset: & {\bfseries{\verb|{{sample.name}}|} }\\
-Started: & {{sample.start}}\\
-Finished: & {{sample.stop}}\\
- & \\
-Operating System & \\
-Server: & {{pipeline.server}}\\
-System: & {{pipeline.system}}\\
-Release: & {{pipeline.release}}\\
-Version: & {{pipeline.operating_version}}\\
-Machine: & {{pipeline.machine}}\\
- & \\
- Tool versions & \\
-Python: & {{pipeline.python_version}}\\
-FastQC: & {{pipeline.fastqc_version}}\\
-Trimmomatic: & {{pipeline.trimmo_version}}\\
-{%if sample.mappingRes !=None%}Bowtie2: & {{pipeline.bowtie_version}}\\{%endif%}
-{%if sample.krakenRes != None%}Kraken: & {{pipeline.kraken_version}}\\{%endif%}
-\end{tabular}\\
-
-%----------------- Workflow -------------------%
-\line(1,0){ \textwidth } \\
-Processed reads: \\
-{%for read in sample.readSets %}
-{{read.r1.get_filename()}}
-{% if read.r2 %}
-{{read.r2.get_filename()}}
-{%endif%}
-{%endfor%}
-
-%-------------------- Summary -------------------%
-\begin{tcolorbox}
-{\large{Summary} } \\
-{{sample.nTrimmed()}} of {{sample.total_reads()}} ({{sample.pTrimmed()}}\%) reads remained after trimming \\
-{{sample.get_mean_trimRes()}} \% of fragments were shorter than read length\\
-
-{%if sample.mappingRes !=None%}{{sample.mappingRes.numberOfAlignedReads}} ({{sample.mappingRes.percentAlignedReads}}\%) of reads aligned to \path{ {{sample.reference}} }\\ {%endif%}
-{%if sample.krakenRes !=None%}{{sample.krakenRes.nClassified}} sequences classified ({{sample.krakenRes.pClassified}}\%) {%endif%}
-
-\end{tcolorbox}
-
-\line(1,0){\textwidth} \\
-\vspace{5mm}
-
-%-------------------- FASTQC Results -------------------%
-
-{\Large{FastQC (Pre | Post Trimming) } } \\
-\vspace{5mm}
-
-{%for read in sample.readSets %}
-
-{\underline{Readname: {{read.r1.get_filename()}} } }\\
-{%if read.r1.concat_files%}
-\vspace{5mm}
-Concatenated files:\\
-\begin{itemize}
-{%for file in read.r1.concat_files%}
-\item {{file}}
-{%endfor%}
-\end{itemize}
-{%endif%}
-{{read.r1.log}} \\
-
-Trimming Log: \\
-\textcolor{gray}{Using parameter: {{trim_param}} }\\
-{{read.trimRes.logs}} \\
-
-{% if read.trimRes.blobplot != "" %}
- \begin{figure}[H]
- \centering
- {\includegraphics[width=0.8 \textwidth]{/{{read.trimRes.blobplot}}} }
- \caption{Blobplot}
- \end{figure}
-{% endif %}
-%
-{% for i in range(read.r1.qcRes.results|length) %}
- \begin{figure}[H]
- \centering
- \begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = {{read.r1.qcRes.results[i].color}} 2}%
- \begin{subfigure}%
- {\includegraphics[width=\textwidth]{/{{read.r1.qcRes.results[i].file}}} }
- \end{subfigure}
- \end{adjustbox}\qquad
- \begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = {{read.trimRes.readset.r1.qcRes.results[i].color}} 2}%
- \begin{subfigure}%
- {\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r1.qcRes.results[i].file}}} }
- \end{subfigure}
- \end{adjustbox}
- \caption{ {{read.trimRes.readset.r1.qcRes.results[i].name}}}
- \end{figure}
-{% endfor %}
-
-{% if read.r2 %}
-{\underline{Readname: {{read.r2.get_filename()}} } } \\
-{%if read.r2.concat_files%}
-Concatenated files: \\
-\begin{itemize}
-{%for file in read.r2.concat_files%}
-\item {{file}}
-{%endfor%}
-\end{itemize}
-{%endif%}
-{{read.r2.log}}
-{% for i in range(read.r2.qcRes.results|length) %}
- \begin{figure}[H]
- \centering
- \begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = {{read.r2.qcRes.results[i].color}} 2}%
- \begin{subfigure}
- {\includegraphics[width=\textwidth]{/{{read.r2.qcRes.results[i].file}}} }
- \end{subfigure}
- \end{adjustbox}\qquad
- \begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = {{read.trimRes.readset.r2.qcRes.results[i].color}} 2}%
- \begin{subfigure}%
- {\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r2.qcRes.results[i].file}}} }
- \end{subfigure}
- \end{adjustbox}
- \caption{ {{read.trimRes.readset.r2.qcRes.results[i].name}}}
- \end{figure}
-{% endfor %}
-{% endif %}
-
-{% endfor %}\\
-
-%-------------------- Bowtie Results -------------------%
-{%if sample.mappingRes != None%}
-\line(1,0){\textwidth}
-\vspace{5mm}
-
-{\Large{Bowtie2} } - Map against \path{ {{sample.reference}} } \\
-{{sample.mappingRes.log}}
-{%endif%}
-%-------------------- Kraken Results -------------------%
-{%if sample.krakenRes != None%}
-\line(1,0){\textwidth} \\
-\vspace{5mm}
-
-{\Large{Kraken} } \\
-
-{{sample.krakenRes.log}}
-
-
-\begin{singlespace}
-\begin{verbatim}
-{{sample.krakenRes.report}}
-\end{verbatim}
-\end{singlespace}
-{%endif%}
+\documentclass[a4paper]{article}
+\usepackage[document]{ragged2e}
+\usepackage{python}
+\usepackage[english]{babel}
+\usepackage{graphicx}
+\usepackage[utf8]{inputenc}
+\usepackage{xcolor}
+\usepackage[T1]{fontenc}
+\renewcommand*\familydefault{\ttdefault} %% Only if the base font of the document is to be typewriter style
+\usepackage{subfigure}
+\usepackage{geometry}
+\usepackage{tcolorbox}
+\usepackage{adjustbox}
+\usepackage{setspace}
+\usepackage{float}
+\usepackage{url}
+\usepackage{grffile}
+
+\geometry{a4paper,left=25mm,right=20mm,
+ top=12mm,
+ bottom=20mm}
+
+\setlength\parindent{0pt}
+
+\tolerance=1
+\emergencystretch=\maxdimen
+\hyphenpenalty=1000
+\hbadness=1000
+
+\begin{document}
+
+{\bf {\LARGE{ {{pipeline.name}} } Version {{pipeline.version}} } }\\
+\line(1,0){ \textwidth }
+
+\begin{tabular}{p{0.25\textwidth} p{0.75\textwidth}}
+
+Executor: & {{pipeline.user}} \\
+Dataset: & {\bfseries{\verb|{{sample.name}}|} }\\
+Started: & {{sample.start}}\\
+Finished: & {{sample.stop}}\\
+ & \\
+Operating System & \\
+Server: & {{pipeline.server}}\\
+System: & {{pipeline.system}}\\
+Release: & {{pipeline.release}}\\
+Version: & {{pipeline.operating_version}}\\
+Machine: & {{pipeline.machine}}\\
+ & \\
+ Tool versions & \\
+Python: & {{pipeline.python_version}}\\
+FastQC: & {{pipeline.fastqc_version}}\\
+Trimmomatic: & {{pipeline.trimmo_version}}\\
+{%if sample.mappingRes !=None%}Bowtie2: & {{pipeline.bowtie_version}}\\{%endif%}
+{%if sample.krakenRes != None%}Kraken: & {{pipeline.kraken_version}}\\{%endif%}
+\end{tabular}\\
+
+%----------------- Workflow -------------------%
+\line(1,0){ \textwidth } \\
+Processed reads: \\
+{%for read in sample.readSets %}
+{{read.r1.get_filename()}}
+{% if read.r2 %}
+{{read.r2.get_filename()}}
+{%endif%}
+{%endfor%}
+
+%-------------------- Summary -------------------%
+\begin{tcolorbox}
+{\large{Summary} } \\
+{{sample.nTrimmed()}} of {{sample.total_reads()}} ({{sample.pTrimmed()}}\%) reads remained after trimming \\
+{{sample.get_mean_trimRes()}} \% of fragments were shorter than read length\\
+
+{%if sample.mappingRes !=None%}{{sample.mappingRes.numberOfAlignedReads}} ({{sample.mappingRes.percentAlignedReads}}\%) of reads aligned to \path{ {{sample.reference}} }\\ {%endif%}
+{%if sample.krakenRes !=None%}{{sample.krakenRes.nClassified}} sequences classified ({{sample.krakenRes.pClassified}}\%) {%endif%}
+
+\end{tcolorbox}
+
+\line(1,0){\textwidth} \\
+\vspace{5mm}
+
+%-------------------- FASTQC Results -------------------%
+
+{\Large{FastQC (Pre | Post Trimming) } } \\
+\vspace{5mm}
+
+{%for read in sample.readSets %}
+
+{\underline{Readname: {{read.r1.get_filename()}} } }\\
+{%if read.r1.concat_files%}
+\vspace{5mm}
+Concatenated files:\\
+\begin{itemize}
+{%for file in read.r1.concat_files%}
+\item {{file}}
+{%endfor%}
+\end{itemize}
+{%endif%}
+{{read.r1.log}} \\
+
+Trimming Log: \\
+\textcolor{gray}{Using parameter: {{trim_param}} }\\
+{{read.trimRes.logs}} \\
+
+{% if read.trimRes.blobplot != "" %}
+ \begin{figure}[H]
+ \centering
+ {\includegraphics[width=0.8 \textwidth]{/{{read.trimRes.blobplot}}} }
+ \caption{Blobplot}
+ \end{figure}
+{% endif %}
+%
+{% for i in range(read.r1.qcRes.results|length) %}
+ \begin{figure}[H]
+ \centering
+ \begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = {{read.r1.qcRes.results[i].color}} 2}%
+ \begin{subfigure}%
+ {\includegraphics[width=\textwidth]{/{{read.r1.qcRes.results[i].file}}} }
+ \end{subfigure}
+ \end{adjustbox}\qquad
+ \begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = {{read.trimRes.readset.r1.qcRes.results[i].color}} 2}%
+ \begin{subfigure}%
+ {\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r1.qcRes.results[i].file}}} }
+ \end{subfigure}
+ \end{adjustbox}
+ \caption{ {{read.trimRes.readset.r1.qcRes.results[i].name}}}
+ \end{figure}
+{% endfor %}
+
+{% if read.r2 %}
+{\underline{Readname: {{read.r2.get_filename()}} } } \\
+{%if read.r2.concat_files%}
+Concatenated files: \\
+\begin{itemize}
+{%for file in read.r2.concat_files%}
+\item {{file}}
+{%endfor%}
+\end{itemize}
+{%endif%}
+{{read.r2.log}}
+{% for i in range(read.r2.qcRes.results|length) %}
+ \begin{figure}[H]
+ \centering
+ \begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = {{read.r2.qcRes.results[i].color}} 2}%
+ \begin{subfigure}
+ {\includegraphics[width=\textwidth]{/{{read.r2.qcRes.results[i].file}}} }
+ \end{subfigure}
+ \end{adjustbox}\qquad
+ \begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = {{read.trimRes.readset.r2.qcRes.results[i].color}} 2}%
+ \begin{subfigure}%
+ {\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r2.qcRes.results[i].file}}} }
+ \end{subfigure}
+ \end{adjustbox}
+ \caption{ {{read.trimRes.readset.r2.qcRes.results[i].name}}}
+ \end{figure}
+{% endfor %}
+{% endif %}
+
+{% endfor %}\\
+
+%-------------------- Bowtie Results -------------------%
+{%if sample.mappingRes != None%}
+\line(1,0){\textwidth}
+\vspace{5mm}
+
+{\Large{Bowtie2} } - Map against \path{ {{sample.reference}} } \\
+{{sample.mappingRes.log}}
+{%endif%}
+%-------------------- Kraken Results -------------------%
+{%if sample.krakenRes != None%}
+\line(1,0){\textwidth} \\
+\vspace{5mm}
+
+{\Large{Kraken} } \\
+
+{{sample.krakenRes.log}}
+
+
+\begin{singlespace}
+\begin{verbatim}
+{{sample.krakenRes.report}}
+\end{verbatim}
+\end{singlespace}
+{%endif%}
\end{document}
\ No newline at end of file
diff --git a/sav.R b/sav.R
index 06a33b5..445e93f 100755
--- a/sav.R
+++ b/sav.R
@@ -1,150 +1,148 @@
-#!/usr/bin/env Rscript
-#source("http://bioconductor.org/biocLite.R")
-#biocLite("savR")
-#install.packages("Cairo")
-#library(Cairo)
-library(savR)
-library(reshape2)
-
-args = commandArgs(trailingOnly=TRUE)
-#args <- c("Z://testdaten/ProjectXY/SAV", "Z://testsav")
-project <- savR(args[1])
-
-################
-## Indexing ##
-################
-
-#total reads
-total_reads<- clusters(project, 1L)
-pf_reads<- pfClusters(project, 1L)
-
-
-################
-## Plots ##
-################
-
-##
-# Data By Cycle
-##
-
-extraction<- extractionMetrics((project))
-
-# Data By Cycle, FWHM/All Lanes / Both surfaces / All Bases
-reshaped_extraction <- melt(extraction, measure.vars= c("FWHM_A","FWHM_C", "FWHM_T","FWHM_G"))
-FWHM<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
-colnames(FWHM) <- c("Cycles","FWHM", "Value")
-FWHM$FWHM<- sub("FWHM_","",FWHM$FWHM)
-ggplot(data=FWHM )+
- geom_line( aes(x=Cycles , y =Value, color=FWHM)) +
- ggtitle("Data by Cycle - FWHM") +
- xlab("Cycle") +
- ylab("All bases FWHM")
-ggsave(paste(args[2], "/FWHM.png", sep=""))
-
-# Data By Cycle,Intensity /All Lanes / Both surfaces / All Bases
-reshaped_extraction <- melt(extraction, measure.vars= c("int_A","int_C", "int_T","int_G"))
-intensity<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
-colnames(intensity) <- c("Cycles","Intensity", "Value")
-intensity$Intensity<- sub("int_","", intensity$Intensity)
-ggplot(data=intensity )+
- geom_line( aes(x=Cycles , y =Value, color=Intensity))+
- ggtitle("Data By Cycle - Intensity")+
- xlab("Cycle")+ylab("All bases intensity")
-ggsave(paste(args[2], "/intensity.png", sep=""))
-# Data By Cycle, %Base /All Lanes / Both surfaces / All Bases
-#
-corr<- correctedIntensities(project)
-
-
-corr[,seq(14,17)]<-round(corr[,seq(14,17)] / apply(corr[,seq(14,17)], 1, sum) *100,2)
-corr<- melt(corr, measure.vars= c("num_A","num_C", "num_T","num_G"))
-corr<-(aggregate(corr$value, by=list(corr$cycle, corr$variable), FUN=mean))
-colnames(corr)<- c("Cycle", "Base", "Perc_Base")
-corr$Base<- sub("num_","", corr$Base)
-ggplot(corr) +
- geom_line(aes(x=Cycle, y= Perc_Base, color=Base)) +
- ylab("All Bases % Base") +
- ggtitle("Data by Cycle - % Base")
-ggsave(paste(args[2], "/base_perc.png" , sep =""))
-
-##
-# Data By Lane
-##
-
-tiles<- tileMetrics(project)
-# Density, Both Surfaces
-#pfBoxplot(project) # Generate a boxplot of the numbers of clusters and the number of Illumina pass-filter clusters per tile and lane
-dens <-(tiles[which(tiles$code==100 | tiles$code==101 ),])
-dens[which(dens$code==100),]$code <- "Raw Clusters"
-dens[which(dens$code==101),]$code<- "PF Clusters"
-dens$value <- dens$value/1000
-ggplot(data = dens , aes(x=lane, y=value, fill=code))+
- geom_boxplot() +
- ggtitle("Data By Lane - Cluster Density") +
- xlab("Lane")+ylab("Cluster Density (K/mm2)")
-ggsave(paste(args[2], "/density.png", sep=""))
-
-# Phasing, Both Surfaces, All Bases
-phasing_code <- seq(200, (200 + (length(project at reads)-1)*2),2)
-phasing <-(tiles[which(tiles$code %in% phasing_code) ,])
-for(i in phasing_code){
- cat(paste("Read ",((i-200)/2)+1))
- phasing[which(phasing$code==i),]$code = paste("Read ",((i-200)/2)+1)
-}
-ggplot(data = phasing[which(phasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
- geom_boxplot() +
- ggtitle("Data By Lane - Phasing")+
- xlab("Lane")+
- ylab("% Phasing")+
- scale_x_continuous(breaks = unique(phasing$lane))
-ggsave(paste(args[2], "/phasing.png", sep=""))
-
-
-# Pre-Phasing, Both Surfaces, All Bases
-prephasing_code <- seq(201, (201 + (length(project at reads)-1)*2),2)
-prephasing <-(tiles[which(tiles$code %in% prephasing_code) ,])
-for(i in prephasing_code){
-
- prephasing[which(prephasing$code==i),]$code = paste("Read ",((i-201)/2)+1)
-}
-ggplot(data = prephasing[which(prephasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
- geom_boxplot() +
- ggtitle("Data By Lane - Prephasing")+
- xlab("Lane")+
- ylab("% Prephasing") +
- scale_x_continuous(breaks = unique(prephasing$lane))
-ggsave(paste(args[2], "/prephasing.png", sep=""))
-
-
-
-##
-# QScore Heatmap
-##
-png(paste(args[2], "/qscore_heatmap.png", sep=""), height=1025, width = 2571, res = 200)
-qualityHeatmap(project, lane=seq(1,project at layout@lanecount) ,read=c(1,2))+ theme(axis.title.y = element_blank())
-dev.off()
-
-qualy<- qualityMetrics(project)
-qualy<- data.frame(apply(qualy, 2, as.numeric))
-qualy<- melt(qualy, measure.vars= colnames(qualy)[4:ncol(qualy)])
-
-qualy<- aggregate(qualy$value, by=list(qualy$variable), FUN=sum)
-colnames(qualy)<- c("QScore","Total")
-qualy$Total <- qualy$Total/1000000
-qualy$QScore <- as.numeric(qualy$QScore)
-ggplot(qualy, aes(x=QScore, y = Total )) +
- geom_bar(stat="identity", aes(fill=QScore>=30)) +
- ylab("Total (million)") +
- geom_vline(aes(xintercept=30), linetype="dashed") +
- geom_text(aes(x=35, y=max(Total)-max(Total)*0.1 ,label=(paste("QScore >=30 \n",
- round(sum(qualy[which(qualy$QScore>=30),]$Total)/1000,2),
- "G \n",
- round(sum(qualy[which(qualy$QScore>=30),]$Total)/ sum(qualy$Total)*100,2),
- "%")
- ))) +
- ggtitle("QScore Distribution") +
- theme(legend.position="none")
-ggsave(paste(args[2], "/qscore_distr.png", sep=""))
-
-
-
+#!/usr/bin/env Rscript
+#source("http://bioconductor.org/biocLite.R")
+#biocLite("savR")
+options(warn=-1)
+library(savR)
+library(reshape2)
+
+args = commandArgs(trailingOnly=TRUE)
+project <- savR(args[1])
+
+################
+## Indexing ##
+################
+
+#total reads
+total_reads<- clusters(project, 1L)
+pf_reads<- pfClusters(project, 1L)
+
+
+################
+## Plots ##
+################
+
+##
+# Data By Cycle
+##
+
+extraction<- extractionMetrics((project))
+
+# Data By Cycle, FWHM/All Lanes / Both surfaces / All Bases
+reshaped_extraction <- melt(extraction, measure.vars= c("FWHM_A","FWHM_C", "FWHM_T","FWHM_G"))
+FWHM<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
+colnames(FWHM) <- c("Cycles","FWHM", "Value")
+FWHM$FWHM<- sub("FWHM_","",FWHM$FWHM)
+ggplot(data=FWHM )+
+ geom_line( aes(x=Cycles , y =Value, color=FWHM)) +
+ ggtitle("Data by Cycle - FWHM") +
+ xlab("Cycle") +
+ ylab("All bases FWHM")
+ggsave(paste(args[2], "/FWHM.png", sep=""))
+
+# Data By Cycle,Intensity /All Lanes / Both surfaces / All Bases
+reshaped_extraction <- melt(extraction, measure.vars= c("int_A","int_C", "int_T","int_G"))
+intensity<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
+colnames(intensity) <- c("Cycles","Intensity", "Value")
+intensity$Intensity<- sub("int_","", intensity$Intensity)
+ggplot(data=intensity )+
+ geom_line( aes(x=Cycles , y =Value, color=Intensity))+
+ ggtitle("Data By Cycle - Intensity")+
+ xlab("Cycle")+ylab("All bases intensity")
+ggsave(paste(args[2], "/intensity.png", sep=""))
+# Data By Cycle, %Base /All Lanes / Both surfaces / All Bases
+#
+corr<- correctedIntensities(project)
+
+
+corr[,seq(14,17)]<-round(corr[,seq(14,17)] / apply(corr[,seq(14,17)], 1, sum) *100,2)
+corr<- melt(corr, measure.vars= c("num_A","num_C", "num_T","num_G"))
+corr<-(aggregate(corr$value, by=list(corr$cycle, corr$variable), FUN=mean))
+colnames(corr)<- c("Cycle", "Base", "Perc_Base")
+corr$Base<- sub("num_","", corr$Base)
+ggplot(corr) +
+ geom_line(aes(x=Cycle, y= Perc_Base, color=Base)) +
+ ylab("All Bases % Base") +
+ ggtitle("Data by Cycle - % Base")
+ggsave(paste(args[2], "/base_perc.png" , sep =""))
+
+##
+# Data By Lane
+##
+
+tiles<- tileMetrics(project)
+# Density, Both Surfaces
+#pfBoxplot(project) # Generate a boxplot of the numbers of clusters and the number of Illumina pass-filter clusters per tile and lane
+dens <-(tiles[which(tiles$code==100 | tiles$code==101 ),])
+dens[which(dens$code==100),]$code <- "Raw Clusters"
+dens[which(dens$code==101),]$code<- "PF Clusters"
+dens$value <- dens$value/1000
+ggplot(data = dens , aes(x=lane, y=value, fill=code))+
+ geom_boxplot() +
+ ggtitle("Data By Lane - Cluster Density") +
+ xlab("Lane")+ylab("Cluster Density (K/mm2)")
+ggsave(paste(args[2], "/density.png", sep=""))
+
+# Phasing, Both Surfaces, All Bases
+phasing_code <- seq(200, (200 + (length(project at reads)-1)*2),2)
+phasing <-(tiles[which(tiles$code %in% phasing_code) ,])
+for(i in phasing_code){
+ cat(paste("Read ",((i-200)/2)+1))
+ phasing[which(phasing$code==i),]$code = paste("Read ",((i-200)/2)+1)
+}
+ggplot(data = phasing[which(phasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
+ geom_boxplot() +
+ ggtitle("Data By Lane - Phasing")+
+ xlab("Lane")+
+ ylab("% Phasing")+
+ scale_x_continuous(breaks = unique(phasing$lane))
+ggsave(paste(args[2], "/phasing.png", sep=""))
+
+
+# Pre-Phasing, Both Surfaces, All Bases
+prephasing_code <- seq(201, (201 + (length(project at reads)-1)*2),2)
+prephasing <-(tiles[which(tiles$code %in% prephasing_code) ,])
+for(i in prephasing_code){
+
+ prephasing[which(prephasing$code==i),]$code = paste("Read ",((i-201)/2)+1)
+}
+ggplot(data = prephasing[which(prephasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
+ geom_boxplot() +
+ ggtitle("Data By Lane - Prephasing")+
+ xlab("Lane")+
+ ylab("% Prephasing") +
+ scale_x_continuous(breaks = unique(prephasing$lane))
+ggsave(paste(args[2], "/prephasing.png", sep=""))
+
+
+
+##
+# QScore Heatmap
+##
+png(paste(args[2], "/qscore_heatmap.png", sep=""), height=1025, width = 2571, res = 200)
+qualityHeatmap(project, lane=seq(1,project at layout@lanecount) ,read=c(1,2))+ theme(axis.title.y = element_blank())
+dev.off()
+
+qualy<- qualityMetrics(project)
+qualy<- data.frame(apply(qualy, 2, as.numeric))
+qualy<- melt(qualy, measure.vars= colnames(qualy)[4:ncol(qualy)])
+
+qualy<- aggregate(qualy$value, by=list(qualy$variable), FUN=sum)
+colnames(qualy)<- c("QScore","Total")
+qualy$Total <- qualy$Total/1000000
+qualy$QScore <- as.numeric(qualy$QScore)
+ggplot(qualy, aes(x=QScore, y = Total )) +
+ geom_bar(stat="identity", aes(fill=QScore>=30)) +
+ ylab("Total (million)") +
+ geom_vline(aes(xintercept=30), linetype="dashed") +
+ geom_text(aes(x=35, y=max(Total)-max(Total)*0.1 ,label=(paste("QScore >=30 \n",
+ round(sum(qualy[which(qualy$QScore>=30),]$Total)/1000,2),
+ "G \n",
+ round(sum(qualy[which(qualy$QScore>=30),]$Total)/ sum(qualy$Total)*100,2),
+ "%")
+ ))) +
+ ggtitle("QScore Distribution") +
+ theme(legend.position="none")
+ggsave(paste(args[2], "/qscore_distr.png", sep=""))
+
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qcumber.git
More information about the debian-med-commit
mailing list