[med-svn] [qcumber] 14/15: New upstream version 1.0.0+dfsg

Andreas Tille tille at debian.org
Wed Nov 23 08:21:09 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository qcumber.

commit 1df4686ff044b06133621fbf15cf67843e849458
Author: Andreas Tille <tille at debian.org>
Date:   Wed Nov 23 09:19:08 2016 +0100

    New upstream version 1.0.0+dfsg
---
 .gitignore        |   5 +
 QCumber.py        | 508 +++++++++++++++++++++++++++++++++++++++++++++++++++
 barplot.R         |  28 +++
 batch_report.html | 530 +++++++++++++++++++++++++++++++++++++-----------------
 boxplot.R         |  23 +++
 classes.py        | 440 ++++++++++++++++++++-------------------------
 combineFastQC.py  | 431 --------------------------------------------
 config.txt        |   8 +-
 helper.py         | 240 +++++++++++++++++++++++--
 lib/box.js        | 320 +++++++++++++++++++++++++++++++++
 lib/qcstyle.css   |  84 +++++++++
 parameter.txt     |   9 +-
 readme.md         | 119 ++++++++++++
 readme.txt        | 111 ------------
 runQCPipeline.py  | 484 -------------------------------------------------
 sav.R             | 150 ++++++++++++++++
 16 files changed, 2027 insertions(+), 1463 deletions(-)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4e330dc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.Rhistory
+.idea
+.svn
+__pycache__
+config.txt
diff --git a/QCumber.py b/QCumber.py
new file mode 100755
index 0000000..c21b0eb
--- /dev/null
+++ b/QCumber.py
@@ -0,0 +1,508 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+__author__ = 'LieuV'
+__version__ = "1.0.0"
+
+from classes import *
+from helper import *
+import os
+from itertools import groupby
+from jinja2 import Environment,FileSystemLoader
+import shutil
+import configparser
+from collections import OrderedDict
+import json
+
+#dependencies
+try:
+	import argparse
+except ImportError:
+	print('The "argparse" module is not available. Use Python >3.2.')
+	sys.exit(1)
+
+
+parameter = configparser.ConfigParser()
+parameter.read(join(dirname(__file__), "parameter.txt"))
+
+file_extensions = parameter["Fileextension"]["fastq"]
+r1_pattern = parameter["Pattern"]["R1"]
+r2_pattern = parameter["Pattern"]["R2"]
+lane_pattern = parameter["Pattern"]["lane"]
+
+def get_illumina_reads(  output, tmp):
+	## Add readsets to sample
+	readsets = []
+	# split by lanes
+	if isinstance(arguments["r1"], list):
+		for lane in sorted(set([re.search(lane_pattern, x).group() for x in arguments["r1"]])):
+			# concat same files
+			r1_reads = [x for x in arguments["r1"] if lane in x]
+			readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
+			if len(arguments["r1"]) != 1:
+				r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
+			else:
+				r1 = FastQFile(r1_reads[0])
+			if arguments["r2"]:
+				r2_reads = [x for x in arguments["r2"] if lane in x]
+
+				if len(r2_reads) != 1:
+					r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
+				else:
+					r2 = FastQFile(r2_reads[0])
+				#return [ReadSet( r1, r2)]
+				readsets.append(ReadSet(r1,r2))
+			else:
+				readsets.append(ReadSet(r1))
+	else:
+		r1 = FastQFile(arguments["r1"])
+		if arguments["r2"]:
+			r2 = FastQFile(arguments["r2"])
+			return [ReadSet(r1,r2)]
+		else:
+			return [ReadSet(r1)]
+
+
+	return readsets
+
+def run_analysis (readset, arguments, output, tmp):
+	print("Run FastQC...")
+	readset.run_FastQC(join(output, "FastQC"))
+	if arguments["trimBetter"]:
+		trimming_perc = arguments["trimBetter_threshold"]
+	else:
+		trimming_perc = ""
+	readset.run_Trimmomatic(arguments["adapter"], arguments["palindrome"],arguments["minlen"], arguments["trimOption"], trimming_perc)
+	readset.trimRes = readset.trimRes.run_FastQC( tmp)
+	return readset
+
+
+def check_input(arguments):
+	if arguments["r1"]:
+		if arguments["r2"]:
+			return [arguments["r1"], arguments["r2"]]
+		else:
+			return [arguments["r1"]]
+
+	if arguments["technology"]=="Illumina":
+		if os.path.isdir(arguments["input"]):
+			files = os.listdir(arguments["input"])
+			files = [os.path.join(arguments["input"], x) for x in files]
+			files = [x for x in files if os.path.isfile(x) & any([ext in x for ext in file_extensions])]
+			if len(files)==0:
+				sys.exit(arguments["input"] + " does not contain fastq files.")
+			return files
+		else:
+			return [arguments["input"]]
+	elif arguments["technology"]=="IonTorrent":
+
+		if os.path.isfile(arguments["input"]):
+
+			if arguments["input"].endswith(".bam"):
+				return arguments["input"]
+			else:
+				sys.exit("IonTorrent PGM input has to be a bam-file. Otherwise use -b. Use --help for more information.")
+		else:
+			sys.exit("Incorrect file input")
+	#elif arguments["technology"] == "Illumina-MiSeq":
+	#	if os.path.isfile(arguments["input"]):
+	#		if any([arguments["input"].endswith(x) for x in file_extensions]):
+	#			return arguments["input"]
+	#		else:
+	#			sys.exit("Not supported file extension.")
+	#	else:
+	#		sys.exit("Illumina MiSeq input has to be a fastq-file. Otherwise use -b for a batch run. Use --help for more information.")
+	else:
+		sys.exit("Incorrect file input")
+	return None
+
+def getSetname():
+	try:
+		if arguments["technology"] == "Illumina":
+
+			if isinstance(arguments["r1"], list):
+				return "_".join(basename(arguments["r1"][0]).split("_")[:-3])
+			else:
+				return getFilenameWithoutExtension(arguments["r1"], True)
+		else:
+			return getFilenameWithoutExtension(arguments["r1"], getBase=True)
+	except:
+		return getFilenameWithoutExtension(arguments["r1"], getBase=True)
+
+def fillClasses(temp_bowtie_path = "", tmp=''):
+	try:
+		output = getDir([arguments["output"], "QCResults"], True)
+
+		sample = Sample(getSetname(), output, arguments["reference"],arguments["threads"], arguments["technology"])
+		if arguments["technology"].startswith("Illumina"):
+			readsets = get_illumina_reads( output, tmp)
+		elif arguments["technology"] == "IonTorrent":
+			print("IonTorrent", arguments["r1"])
+			fastqfile = bam_to_fastq(arguments["r1"],tmp)
+			readsets = [ReadSet( fastqfile)]
+
+		#os.makedirs(tempdir, exist_ok=True)
+		for rs in readsets:
+			readset = run_analysis(rs, arguments,  output, tmp)
+			sample = sample.add_readSet(readset)
+		if not arguments["nomapping"]:
+			if arguments["save_mapping"]:
+				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"))
+			else:
+				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
+		if not arguments["nokraken"]:
+			print("Run Kraken.")
+			sample = sample.run_Kraken(arguments["db"])
+
+		sample.stop = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
+	finally:
+		shutil.rmtree(tmp)
+	return sample
+
+def writeReport(sample, arguments):
+	report_name = os.path.join(sample.mainResultsPath, "Report",
+		 "summary_" + sample.name + "_" + datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
+	print("Writing latex " ,report_name)
+	env = Environment()
+	env.loader = FileSystemLoader(os.path.dirname(__file__))
+
+	template = env.get_template("report.tex")
+	pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"], arguments["trimOption"]))
+
+	latex = open(report_name + ".tex", "w")
+	latex.write(pdf_latex)
+	latex.close()
+
+	process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+	for line in iter(process.stdout.readline, b''):
+		pass #print(line)
+	for line in iter(process.stderr.readline, b''):
+		print(line)
+
+	process.communicate()
+
+	for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
+		try:
+			os.remove(report_name + ext)
+		except OSError:
+			pass
+
+def main(arguments):
+
+	tmp = join(arguments["output"],"QCResults" ,"qc_tmp")
+	makedirs(tmp, exist_ok=True)
+	makedirs(join(arguments["output"], "QCResults"), exist_ok=True)
+	makedirs(join(arguments["output"], "QCResults", "FastQC"), exist_ok=True)
+	makedirs(join(arguments["output"], "QCResults", "Trimmed"), exist_ok=True)
+	makedirs(join(arguments["output"], "QCResults", "Trimmed", "FastQC"), exist_ok=True)
+	makedirs(join(arguments["output"], "QCResults", "Report"), exist_ok=True)
+	makedirs(join(arguments["output"], "QCResults", "Report", "src"), exist_ok=True)
+	makedirs(join(arguments["output"], "QCResults", "Report", "src", "img"), exist_ok=True)
+
+	# create new boxplot csv files
+	for csv in ["Per_sequence_quality_scores.csv", "Per_sequence_GC_content.csv", "Sequence_Length_Distribution.csv"]:
+		new_csv = open(join(arguments["output"], "QCResults", "Report", "src", csv), "w")
+		new_csv.write("Sample,Read,Lane,Trimmed,Value,Count\n")
+		new_csv.close()
+	##
+	# SAV
+	try:
+		write_SAV(arguments["sav"], join(arguments["output"], "QCResults", "Report", "src"))
+	except:
+		print("Couldnt write SAV.")
+	##
+
+	if not os.path.exists(join(arguments["output"], "QCResults", "Report", "lib" )):
+		shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], "QCResults", "Report", "lib" ))
+	if arguments["technology"].startswith("Illumina"):
+		technology = "Illumina"
+	else:
+		technology = "IonTorrent"
+
+	if arguments["forAssembly"]:
+		if not arguments["trimBetter_threshold"]:
+			arguments["trimBetter_threshold"] = parameter["forAssembly." + technology]["trimBetter_threshold"]
+		if not arguments["trimOption"]:
+			arguments["trimOption"] = parameter["forAssembly." + technology]["trimOption"]
+
+	elif arguments["forMapping"]:
+		if not arguments["trimBetter_threshold"]:
+			arguments["trimBetter_threshold"] = parameter["forMapping." + technology]["trimBetter_threshold"]
+		if not arguments["trimOption"]:
+			arguments["trimOption"] = parameter["forMapping." + technology]["trimOption"]
+
+	if not arguments["trimOption"]:
+		arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
+	if not arguments["trimBetter_threshold"]:
+		arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
+
+	# check input
+	pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
+	if(arguments["trimOption"]!=""):
+		if not ((pattern.group("option") =="SLIDINGWINDOW") or (pattern.group("option") =="MAXINFO")):
+			sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
+		try:
+			int(pattern.group("value1"))
+			int(pattern.group("value2"))
+		except:
+			sys.exit("Check trimOptions.")
+
+	if arguments['technology'] == "Illumina":
+		if not arguments['adapter']:
+			sys.exit("Illumina projects requires an adapter file")
+	if arguments["nomapping"]:
+		#print("no mapping")
+		arguments["reference"] = ""
+	else:
+		if not arguments["reference"]:
+			sys.exit("Mapping needs a reference.")
+		else:
+			if not os.path.exists(arguments["reference"]):
+				sys.exit("Reference does not exist.")
+
+	if not arguments["nokraken"]:
+		if not os.path.exists(arguments["db"]):
+			sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
+		else:
+			if "database.kdb" not in os.listdir(arguments["db"]):
+				sys.exit("database "+arguments["db"] +" does not contain necessary file database.kdb")
+	if not arguments["input"]:
+		if arguments["r1"]:
+			print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
+			if not os.path.exists(arguments["r1"]):
+				sys.exit(arguments["r1"] + " does not exist.")
+			#if arguments["technology"]=="IonTorrent":
+			#	arguments["input"] = arguments["r1"]
+			#	arguments["r1"] = ""
+		else:
+			sys.exit("Input file required. Use option -input or -1 / -2")
+
+		if arguments["r2"]:
+			if not os.path.exists(arguments["r2"]):
+				sys.exit(arguments["r2"] + " does not exist.")
+
+	if arguments["index"]:
+		mapping_index_path = arguments["index"]
+	else:
+		mapping_index_path = join(tmp, "bowtie2_index")
+		os.makedirs(mapping_index_path, exist_ok=True)
+	# RUN
+	try:
+		batch_json = json.dump({"commandline":arguments, "summary":[],"kraken":{}, "versions":Pipeline().__dict__}, open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
+		jsonfile_dict = []
+		kraken_reports = OrderedDict()
+		#boxplot_data = []
+		i = 1
+		project = []
+		tmp_overview = open(join(tmp, "overview.csv"), "w")
+		tmp_overview.write("Sample,Trimmed,Mapped,Classified\n")
+		myFiles = []
+
+		#filter samples and put it into for loop
+		# Get folder and filter by last three pattern splitted by "_"
+		if arguments["input"]:
+			input = [ join(arguments["input"], x) for x in sorted(os.listdir(arguments["input"]))]
+			#### check input format ####
+			# Case 1: Folder contains fastq files
+			if len([x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ])>0:
+				input= [x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ]
+				if arguments["technology"].startswith("Illumina"):
+					for setname, files in groupby(input, key=lambda x: "_".join(x.split("_")[:-3])):
+						#for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-2])):
+						temp_file =  sorted(list(files))
+						r1_reads = [x for x in  temp_file if r1_pattern in x]
+						r2_reads = [x for x in temp_file if r2_pattern in x]
+						myFiles.append([r1_reads, r2_reads])
+				else:
+					# IonTorrent
+					#for setname, files in groupby(input, key=lambda x: "_".join(x.split("_")[:-2])):
+					#	temp_file = sorted(list(files))
+					# TODO Changed
+					myFiles = input
+					#r1_reads = [x for x in input if r1_pattern in x]
+					#r2_reads = [x for x in input if r2_pattern in x]
+					#myFiles.append([r1_reads, r2_reads])
+
+			# Case 2: Folder contains subfolder for each sample
+			else:
+				input = [x for x in input if os.path.isdir(x)]
+
+				if len(input) > 0:
+					for sample in input:
+						files = sorted([join(sample,x) for x in listdir(sample) if any([x.endswith(ext) for ext in file_extensions])])
+						if arguments["technology"].startswith("Illumina"):# and len(files) > 1:
+							for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-3])):
+								temp_file = sorted(list(files))
+								r1_reads = [x for x in temp_file if r1_pattern in x]
+								r2_reads = [x for x in temp_file if r2_pattern in x]
+								myFiles.append([r1_reads, r2_reads])
+						else:
+							myFiles.append(sorted(list(files)))
+					files = None
+			if len(myFiles) == 0:
+					sys.exit("Enter a valid folder, which contain sample folders")
+		else:
+			if arguments["r1"]:
+				if not os.path.exists(arguments["r1"]):
+					sys.exit(arguments["r1"] + " does not exist.")
+
+			if arguments["r2"]:
+				myFiles.append([arguments["r1"], arguments["r2"]])
+			else:
+				myFiles.append([arguments["r1"]])
+
+		print("Found " + str(len(myFiles)) + " sample(s).")
+		for subset in myFiles:
+			print("Analyse " + str(i) + "/" + str(len(myFiles)))
+
+			sample_tmp = join(tmp, "Sample_" + str(i))
+			os.makedirs(sample_tmp, exist_ok=True)
+
+			if isinstance(subset, str):
+				arguments["r1"] = subset
+			else:
+				arguments["r1"] = subset[0]
+				if len(subset)>1:
+					arguments["r2"] = subset[1]
+
+			sample = fillClasses( mapping_index_path, sample_tmp )
+			if sample is not None:
+				project.append(sample)
+				try:
+					writeReport(sample, arguments)
+				except:
+					print("Couldnt write pdf.")
+				try:
+					batch_json = json.load(open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "r"), object_pairs_hook=OrderedDict)
+					if sample.mappingRes is None:
+						pMapping = 0
+						nMapping = 0
+					else:
+						pMapping = sample.mappingRes.percentAlignedReads
+						nMapping = sample.mappingRes.numberOfAlignedReads
+					if sample.krakenRes is None:
+						kraken = 0
+						kraken_reports ={}
+					else:
+						kraken = sample.krakenRes.pClassified
+						batch_json["kraken"][sample.name] = json.loads(str_to_json(sample.krakenRes.report))
+						#kraken_reports[sample.name] = json.loads(str_to_json(sample.krakenRes.report))
+					tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), str(nMapping), str(kraken)]) +"\n")
+					batch_json["summary"].append(
+					#jsonfile_dict.append(
+							OrderedDict([("setname", sample.name),
+										 ("Map to reference [#]", nMapping),
+										 ("Map to reference [%]", pMapping),
+										 ("Reads after trimming [#]", sample.nTrimmed()),
+										 ("Reads after trimming [%]", sample.pTrimmed()),
+										 ("Classified (Kraken)", kraken),
+										 ("Shorter fragments", sample.get_mean_trimRes()),
+										 ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"],arguments["trimOption"])),
+										 ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
+
+					json.dump(batch_json, open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
+
+					#boxplot_data.append(sample.get_all_qc_plots())
+				except:
+					print("Couldnt produce HTML report.")
+			sample = None
+			i+=1
+
+		try:
+			#json.dump({"summary": jsonfile_dict, "commandline":arguments, "kraken":kraken_reports},open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
+
+			# plot boxplots
+			boxplots = [{"file":"Per_sequence_quality_scores.csv",
+						 "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+						 "title": "Per sequence quality scores",
+						 "ylab": "Mean Sequence Quality (Phred Score)",
+						"xlab": "Sample" },
+						{"file": "Sequence_Length_Distribution.csv",
+						 "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+						 "title": "Sequence Length Distribution",
+						 "ylab": "Sequence Length (bp)",
+						 "xlab": "Sample"},
+						{"file": "Per_sequence_GC_content.csv",
+						 "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+						 "title": "Per sequence GC content",
+						 "ylab": "Mean GC content (%)",
+						 "xlab": "Sample"} ]
+			for plot in boxplots:
+				process = subprocess.Popen(" ".join(["Rscript --vanilla ",join(os.path.dirname(__file__) ,"boxplot.R"),
+						join(arguments["output"],"QCResults", "Report", "src", plot["file"]), plot["output"], '"'+ plot["title"]+'"', '"'+plot["xlab"]+'"','"'+ plot["ylab"]+'"']),
+						stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell =True)
+				for line in iter(process.stderr.readline, b''):
+					print(line)
+				process.communicate()
+			#plot barplots
+			tmp_overview.close()
+			process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"),join(tmp, "overview.csv"),
+												 join(arguments["output"], "QCResults/Report/src/img")]),stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+			for line in iter(process.stderr.readline, b''):
+				print(line)
+			process.communicate()
+			shutil.copy(join(os.path.dirname(__file__), "batch_report.html"), join(arguments["output"], "QCResults", "Report", "batch_report.html"))
+
+			summary_latex_name = join(arguments["output"], "QCResults", "Report", "summary.latex")
+			with open(summary_latex_name, "w") as summary_latex:
+				summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
+				summary_latex.write(
+					"\\rowcolor{tableBlue} \\textbf{Sample} & \\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & \\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} & \\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
+
+				for i in range(0, len(jsonfile_dict)):
+					summary_latex.write(
+						str(batch_json["summary"][i]["setname"]) + " & " +
+						str(batch_json["summary"][i]["Map to reference [#]"]) + " & " +
+						str(batch_json["summary"][i]["Map to reference [%]"]) + " & " +
+						str(batch_json["summary"][i]["Reads after trimming [#]"]) + " & " +
+						str(batch_json["summary"][i]["Reads after trimming [%]"]) + " & " +
+						str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
+						str(batch_json["summary"][i]["Shorter fragments"]) + "\\\\\n"
+					)
+				summary_latex.write("\\end{tabular}\n")
+				summary_latex.write("\\caption{Statistics on reads after trimming. }")
+				summary_latex.close()
+		except:
+
+			print("Couldnt produce html file.")
+	finally:
+
+		shutil.rmtree(tmp)
+		delete_files(join(arguments["output"],"QCResults"), "FastQC", "_fastqc.html")
+		delete_files(join(arguments["output"],"QCResults"), "Trimmed/FastQC", "_fastqc.html")
+
+if __name__ == "__main__":
+	config = configparser.ConfigParser()
+	config.read(join(dirname(__file__), "config.txt"))
+	kraken_db = ""
+	if "DEFAULT" in config:
+		if "kraken_db" in config["DEFAULT"].keys():
+			kraken_db = config["DEFAULT"]["kraken_db"]
+
+	parser = argparse.ArgumentParser()
+	parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
+	parser.add_argument('-1' , dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
+	parser.add_argument( '-2', dest='r2', help = "input file", required=False)
+
+	parser.add_argument('-output', dest='output', default="")
+	parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent"] ,required = True)
+	parser.add_argument('-threads', dest='threads', default = 4, type = int)
+	parser.add_argument('-adapter', dest = 'adapter', choices = ['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 'NexteraPE-PE' ])
+	parser.add_argument('-reference', dest='reference', required = False, help = "Map reads against reference")
+	parser.add_argument('-index', dest='index', required = False, help = "Mapping index")
+	parser.add_argument('-sav', dest='sav', required=False,help="Illumina folder for SAV. Requires RunInfo.xml, RunParamter.xml and Interop folder.")
+
+	parser.add_argument('-save_mapping', action = "store_true")
+	parser.add_argument('-db', dest='db', help='Kraken database', required = False, default= kraken_db) #"/home/andruscha/programs/kraken/minikraken_20141208/"
+	parser.add_argument('-palindrome', dest='palindrome', help= 'Use palindrome parameter 30 or 1000 for further analysis. Default:30', default=30, type = int)
+	parser.add_argument('-minlen', dest='minlen',help='Minlen parameter for Trimmomatic. Default:50', default=50, type=int)
+	parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', type= str)
+	parser.add_argument('-version', action='version', version='%(prog)s v' + __version__)
+	parser.add_argument('-nokraken', action = "store_true")
+	parser.add_argument('-nomapping', action = "store_true")
+	parser.add_argument('-trimBetter', action = "store_true")
+	parser.add_argument('-trimBetter_threshold', dest='trimBetter_threshold', help = "Default setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, type=float)
+	parser.add_argument('-forAssembly', action = "store_true", help = "Trim parameter are optimized for assemblies (trim more aggressive).")
+	parser.add_argument('-forMapping', action = "store_true", help = "Trim parameter are optimized for mapping (allow more errors).")
+
+	arguments = vars(parser.parse_args())
+	main(arguments)
diff --git a/barplot.R b/barplot.R
new file mode 100755
index 0000000..bcf3b94
--- /dev/null
+++ b/barplot.R
@@ -0,0 +1,28 @@
+library(ggplot2)
+args = commandArgs(trailingOnly=TRUE)
+
+summary<- as.data.frame(read.csv(args[1]))
+
+ggplot(summary, aes(x=Sample, y = Trimmed )) +
+  geom_bar(stat = "identity",  fill="#67a9cf") +
+  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+  ggtitle("Number of reads after trimming") +
+  xlab("Sample")+
+  ylab("Reads [#]")
+ggsave(paste(args[2], "/trimming.png", sep=""))
+
+ggplot(summary, aes(x=Sample, y = Mapped )) +
+  geom_bar(stat = "identity",  fill="#67a9cf") +
+  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+  ggtitle("Number of reads mapped to reference") +
+  xlab("Sample")+
+  ylab("Reads [#]")
+ggsave(paste(args[2], "/mapping.png", sep=""))
+
+ggplot(summary, aes(x=Sample, y = Classified )) +
+  geom_bar(stat = "identity",  fill="#67a9cf") +
+  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
+  ggtitle("Precentage of reads classified by Kraken") +
+  xlab("Sample")+
+  ylab("Reads [%]")
+ggsave(paste(args[2], "/kraken.png", sep=""))
diff --git a/batch_report.html b/batch_report.html
index af91d19..8c8c574 100755
--- a/batch_report.html
+++ b/batch_report.html
@@ -3,20 +3,26 @@
 <head>
     <meta charset="utf-8">
     <title>Batch Report</title>
-    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.0/jquery.min.js"></script>
-    <script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.4.5/angular.min.js"></script>
-    <script src="http://d3js.org/d3.v3.min.js"></script>
+    <script src="lib/jquery.min.js"></script>
+    <script src="lib/angular.min.js"></script>
+    <script src="lib/d3.v3.min.js"></script>
     <!-- Latest compiled and minified CSS -->
-    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css">
+    <link rel="stylesheet" href="lib/bootstrap.min.css">
     <!-- Optional theme -->
-    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap-theme.min.css">
+    <link rel="stylesheet" href="lib/bootstrap-theme.min.css">
     <!-- Latest compiled and minified JavaScript -->
-    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
+    <script src="lib/bootstrap.min.js"></script>
     <style>
         .row_hidden {
             display: none;
         }
+		.panel {
+		background-color:#F7F7F7;
 
+		}
+		.panel-heading{
+		cursor:pointer;
+		}
         .show_row {
             display: show;
         }
@@ -43,8 +49,9 @@
             box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
         }
 
-        #sidebar {
-            float: both;
+        .sidebar {
+
+			overflow-y: auto;
             position: fixed;
             height: 100%;
             top: 50px;
@@ -53,17 +60,22 @@
             box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
             background: #f7f7f7;
         }
-
-        #header {
-            background: #336AA2;
-            position: fixed;
-            width: 100%;
-            height: 50px;
-            top: 0;
-            z-index: 1000;
-            box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.16), 0 2px 10px 0 rgba(0, 0, 0, 0.12);
-        }
-
+		.navbar-default{
+		background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
+		}
+	.navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
+		background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
+
+	}
+	.navbar-default .navbar-brand {
+	color: white;
+	}
+        h4{
+		color: #0A6780;
+		}
+        h5{
+		color: #0D7793;
+		}
         .node {
             cursor: pointer;
         }
@@ -83,47 +95,21 @@
             stroke: #ccc;
             stroke-width: 1.5px;
         }
+		.table{
+		margin-bottom: auto;}
+
     </style>
     <script>
-        $(window).load(function () {
-            $("#main").css({left: $('#sidebar').width()});
-        });
-
-        $(document).ready(function () {
-
-            loadGallery(true, 'a.thumbnail');
+        //$(window).load(function () {
+        //    $("#main").css({left: $('#sidebar').width()});
+        //});
 
-            function loadGallery(setIDs, setClickAttr) {
-                var current_image,
-                        selector,
-                        counter = 0;
-
-                function updateGallery(selector) {
-                    var $sel = selector;
-                    current_image = $sel.data('image-id');
-                    $('#image-gallery-caption').text($sel.data('caption'));
-                    $('#image-gallery-title').text($sel.data('title'));
-                    $('#image-gallery-image').attr('src', $sel.data('image'));
-                    disableButtons(counter, $sel.data('image-id'));
-                }
-
-                if (setIDs == true) {
-                    $('[data-image-id]').each(function () {
-                        counter++;
-                        $(this).attr('data-image-id', counter);
-                    });
-                }
-                $(setClickAttr).on('click', function () {
-                    updateGallery($(this));
-                });
-            }
-        });
 
         //Start d3 tree
         function d3_kraken(root, id) {
             var margin = {top: 20, right: 120, bottom: 20, left: 120},
                     width = $(document).width(),//1600 - margin.right - margin.left,
-					height =$(document).height();
+                    height = $(document).height();
 
             var i = 0,
                     duration = 750//,root
@@ -157,6 +143,7 @@
                     d.children = null;
                 }
             }
+
             function uncollapse(d) {
                 if (d._children) {
                     d.children = d._children;
@@ -173,23 +160,23 @@
             d3.select(self.frameElement).style("height", "800px");
 
             function update(source) {
-var levelWidth = [1];
-        var childCount = function(level, n) {
+                var levelWidth = [1];
+                var childCount = function (level, n) {
 
-            if (n.children && n.children.length > 0) {
-                if (levelWidth.length <= level + 1) levelWidth.push(0);
+                    if (n.children && n.children.length > 0) {
+                        if (levelWidth.length <= level + 1) levelWidth.push(0);
 
-                levelWidth[level + 1] += n.children.length;
-                n.children.forEach(function(d) {
-                    childCount(level + 1, d);
-                });
-            }
-        };
-        childCount(0, root);
+                        levelWidth[level + 1] += n.children.length;
+                        n.children.forEach(function (d) {
+                            childCount(level + 1, d);
+                        });
+                    }
+                };
+                childCount(0, root);
 
 
-        var newHeight = d3.max(levelWidth) * 100; // 25 pixels per line
-        tree = tree.size([newHeight, width]);
+                var newHeight = d3.max(levelWidth) * 100; // 25 pixels per line
+                tree = tree.size([newHeight, width]);
                 // Compute the new tree layout.
                 var nodes = tree.nodes(root).reverse(),
                         links = tree.links(nodes);
@@ -212,8 +199,10 @@ var levelWidth = [1];
                             return "translate(" + source.y0 + "," + source.x0 + ")";
                         })
                         .on("click", click)
-.attr("data-toggle","tooltip")
-                        .attr("title", function(d) { return d.name +"\n" + d.perc + "%";})
+                        .attr("data-toggle", "tooltip")
+                        .attr("title", function (d) {
+                            return d.name + "\n" + d.perc + "%";
+                        })
                         ;
 
                 nodeEnter.append("circle")
@@ -313,9 +302,9 @@ var levelWidth = [1];
                 } else {
                     d.children = d._children;
                     d._children = null;
-                    if ( d.name == "root"){
-                         d.children.forEach(uncollapse);
-                     }
+                    if (d.name == "root") {
+                        d.children.forEach(uncollapse);
+                    }
                 }
                 update(d);
             }
@@ -323,38 +312,64 @@ var levelWidth = [1];
         ;
         // end d3 tree
         var myApp = angular.module('BatchReport', []);
-        myApp.controller('SummaryTable', ['$scope', function ($scope) {
+        myApp.controller('SummaryTable',function ($scope, $http) {
             // create a projects Object
             $scope.projects = {};
             // pretend data we just got back from the server
             // this is an ARRAY of OBJECTS
             $scope.information = {};
-            $scope.information.commandline = @= commandline =@;
+            //$scope.information.commandline= {"minlen": 50, "index": null, "blobplot": false, "output": "", "nomapping": true, "r1": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L001_R1_001.fastq", "/home/lieuv/testdaten/Subsampled_Testdata/HCV/subsampled/16-204_S3_L002_R1_001.fastq"], "reference": "", "threads": 4, "forMapping": false, "palindrome": 30, "technology": "Illumina", "adapter": "NexteraPE-PE", "r2": ["/home/lieuv/testdaten/Subsampled_Testdata/HCV/sub [...]
 
-            $scope.projects.samples = @= json_samples =@;
             $scope.show_summary = {};
             $scope.show_img = {};
             $scope.show_raw_data = true;
 
-            $scope.show_kraken = "";
-            $scope.kraken = @= kraken =@;
+            $scope.sequencer_information = [];
+			$scope.show_kraken = "";
+			$scope.kraken = {};
+
+			$http.get('src/summary.json')
+		   .success(function(res){
+				$scope.information.commandline= res.commandline;
+				$scope.information.versions= res.versions;
+				$scope.projects.samples = res.summary;
+				$scope.kraken = res.kraken;
+				 $.each($scope.kraken, function (key, value) {
+			           d3_kraken(value, key)
+            });
+			});
+
+
+			$http.get('src/sav.json')
+		   .success(function(res){
+				$scope.sequencer_information = res;
+			});
+
+
             $.each($scope.kraken, function (key, value) {
+
                 d3_kraken(value, key)
             });
             $scope.$watch("show_kraken", function (newVal, oldVal) {
-                d3.select("#kraken_" + newVal).attr("class", "show_row");
+			    d3.select("#kraken_" + newVal).attr("class", "show_row");
                 d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
 
             });
             $scope.collapse = true;
+			$scope.isArray = angular.isArray;
+			$scope.isString = angular.isString;
             $scope.$watch("collapse", function (newVal, oldVal) {
                 d3.select("#kraken_" + newVal).attr("class", "show_row");
                 d3.select("#kraken_" + oldVal).attr("class", "row_hidden");
 
             });
+			$scope.selected = {title:"", img:"", caption:""};
 
+        }
 
-        }]);
+		)
+
+		;
 
         myApp.filter('imgFilter', function () {
             return function (input, filter) {
@@ -393,92 +408,247 @@ var levelWidth = [1];
 <div ng-app="BatchReport">
 
     <!-- head Quality Control-->
-    <div class="row" id="header">
+    <nav class="navbar navbar-default navbar-fixed-top" >
+ <div class="container-fluid">
+    <div class="navbar-header">
+      <a class="navbar-brand" href="#">Quality Control</a>
+    </div>
+    <ul class="nav navbar-nav">
 
-        <div class="col-md-3"></div>
-        <div class="col-md-3">
-            <a href="#summary_plots" style="font-size: x-large;color: aliceblue;" data-toggle="tab">Quality Control</a>
-        </div>
-        <div class="col-md-1">
-            <a href="#summary" style="color: aliceblue;">Summary Table</a>
-        </div>
-        <div class="col-md-1">
-            <a href="#fastqc" style="color: aliceblue;" data-toggle="tab">FastQC</a>
-        </div>
-        <div class="col-md-1">
-            <a href="#kraken_report" style="color: aliceblue;" data-toggle="tab">Kraken</a>
-        </div>
-        <div class="col-md-1">
-            <a href="#information" style="color: aliceblue;" data-toggle="tab">Information</a>
-        </div>
+      <li><a href = "#sequencer_information" style="color: aliceblue;" data-toggle="tab">Sequencer Information</a></li>
+      <li class="active" ><a href="#summary" style="color: aliceblue;" data-toggle="tab">Summary Table</a></li>
+      <li> <a href="#fastqc" style="color: aliceblue;" data-toggle="tab">FastQC</a></li>
+      <li>  <a href="#kraken_report" style="color: aliceblue;" data-toggle="tab">Kraken</a></li>
+      <li>  <a href="#information" style="color: aliceblue;" data-toggle="tab">Information</a></li>
+    </ul>
+  </div>
 
-    </div>
+
+    </nav>
     <!-- END head Quality Control-->
 
     <!-- Summary Table Controller-->
     <div ng-controller="SummaryTable">
         <div class="row" style="margin-top: 50px">
-            <!--Sidebar-->
-            <div class="col-md-2" id="sidebar">
-                <h4 style="color: #336AA2">Filter by ...</h4>
-                <br/>
-                <h5 style="color: #336AA2">... dataset</h5>
-                <p>
-                    <input type="checkbox" ng-model="show_raw_data"/> show raw data
-                </p>
-                <br/>
-                <!--Filter by image name-->
-                <h5 style="color: #336AA2">..FastQC Images</h5>
-                <p ng-repeat="(type,bla) in projects.samples[0].images[0].raw[0]">
-                    <input type="checkbox" ng-model="show_img[bla.name]" ng-init="show_img[bla.name]=true"/>
-                    {{bla.name}}
-                </p>
-
-                <!-- END Filter by image name-->
-                <!--Filter by trimmed /untrimmed-->
-
-                <!-- END Filter by trimmed /untrimmed-->
 
-            </div>
-            <!-- END Sidebar-->
-
-            <div class="col-md-10" id="main" style="float: both;padding-left: 30px;">
-                <!-- Summary Table -->
-                <h4 id="summary" style="color:#336AA2">Summary</h4>
-                <table class="table table-striped">
-                    <thead style="font-weight:600;color:#336AA2">
-                    <tr>
-                        <td ng-repeat="(headline,bla) in projects.samples[0]" ng-if="headline!='images'">{{headline}}
-                        </td>
-                    </tr>
-                    </thead>
-                    <tr ng-repeat="sample in projects.samples | orderBy:'setname'">
-                        <td ng-repeat="(key,value) in sample " ng-if="key!='images'">
-                            <input type="checkbox" ng-model="show_summary[value]" ng-if="key=='setname'"
-                                   ng-init="show_summary[value]=true"/> {{value}}
-                        </td>
-
-                    </tr>
-                </table>
-                <!-- END Summary Table -->
+
+            <div class="col-md-12" id="main">
+
 
                 <!-- TAB-CONTENT-->
                 <div class="tab-content">
-                    <!-- FASTQC Images-->
-                    <div class="tab-pane fade " id="kraken_report">
+				<div class="tab-pane fade" id="sequencer_information">
+				 <!--Sidebar-->
+					<div class="col-md-2 sidebar" >
+					</br>
+					<ul class="nav">
+						  <li><a href = "#sequencer_information_table"><h5>Sequencer Information</h5></a></li>
+						  <li>
+						  <a href = "#sequencer_information_plots" ><h5>Sequencer Plots</h5></a></li>
+
+					</ul>
+					</div>
+					<!-- END Sidebar-->
+
+
+				<div class="col-md-10" style = "padding-left:17%;">
+                    <h4 id= "sequencer_information_table">Sequencer Information</h4>
+				<table class="table table-striped">
+							<tr ng-repeat="(attr, value) in sequencer_information.tables">
+								<td style="max-width:80px;">{{attr}}</td>
+								<td ng-if = "isArray(value)==false && isString(value)==true">{{value}}</td>
+								<td ng-if = "isArray(value)==true  " >
+									<table class="table table-sm"  style="background-color:inherit; width : auto;" >
+									<tr>
+									<td ng-repeat = "(headline, value) in value[0]">{{headline}}</td>
+									</tr>
+									<tr ng-repeat="nested in value"><td ng-repeat = "(headline, value) in nested">{{value}}</td></tr>
+								</table></td>
+								<td ng-if = "isArray(value)==false && isString(value)==false" >
+									<table class ="table table-sm" style="background-color:inherit;width:auto;">
+									<tr ng-repeat = "(headline, value) in value">
+									<td >{{headline}}</td>
+									<td ng-if ="isArray(value)==false"> {{value}}</td>
+									<td ng-repeat = "val in value" ng-if ="isArray(value)==true"> {{val}}</td>
+									</tr>
+
+								</table></td>
+
+							</tr>
+						</table>
+
+						<h4 id= "sequencer_information_plots">Sequencer Plots</h4>
+						<a ng-click="selected.title='Data by Cycle - Intensity'; selected.caption='This plot shows the intensity by color of the 90% percentile of the data for each cycle.'; selected.img='src/img/intensity.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img width="400px" height="300px" ng-src="src/img/intensity.png">
+                        </a>
+						<a ng-click="selected.title='Data by Cycle - FWHM'; selected.caption='The average full width of clusters at half maximum (in pixels).'; selected.img='src/img/FWHM.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img width="400px" height="300px" ng-src="src/img/FWHM.png">
+                        </a>
+						<a ng-click="selected.title='Data by Cycle - % Base'; selected.caption='The percentage of clusters for which the selected base has been called.'; selected.img='src/img/base_perc.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img width="400px" height="300px" ng-src="src/img/base_perc.png">
+                        </a>
+						<a ng-click="selected.title='Data by Lane - Phasing'; selected.caption='Thepercentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.'; selected.img='src/img/phasing.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img width="400px" height="300px" ng-src="src/img/phasing.png">
+                        </a>
+						<a  ng-click="selected.title='Data by Lane - Prephasing'; selected.caption='The percentage of molecules in a cluster for which sequencing falls behind (phasing) the current cycle within a read The graph is split out per read.';
+							   selected.img='src/img/prephasing.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img width="400px" height="300px" ng-src="src/img/prephasing.png">
+                        </a>
+
+
+						 <a ng-click="selected.caption='The density of clusters for each tile (in thousand per mm2)'; selected.title='Data by Lane - Density'; selected.img='src/img/density.png'"
+							   data-image-id="" data-toggle="modal" data-target="#image-gallery">
+								<img width="400px" height="300px" ng-src="src/img/density.png">
+                        </a>
+						<a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score distribution shows the number of reads by quality score. The quality score os cumulative for current cycle and previous cycles, and only
+							   reads that pass the quality filter are included. The Q-score is based on the Phred scale.'; selected.img='src/img/qscore_distr.png'"
+							   data-image-id="" data-toggle="modal" data-target="#image-gallery">
+								<img width="400px" height="300px" ng-src="src/img/qscore_distr.png">
+                        </a>
+						 <a ng-click="selected.title='QScore Distribution'; selected.caption='The Q-score heat map shows the Q-score by cycle for all lanes.'; selected.img='src/img/qscore_heatmap.png'"
+							   data-image-id="" data-toggle="modal" data-target="#image-gallery">
+								<img width="500px" height="300px" ng-src="src/img/qscore_heatmap.png">
+                        </a>
+
+
+						</div>
+
+
+					</div>
+					<!-- END Sequencer Information content -->
+                    <div class="tab-pane fade in active" id="summary">
+                        <!-- Summary Table -->
+                        <h4>Summary</h4>
+                        <table class="table table-striped">
+                            <thead style="font-weight:600;color:#0D7793">
+                            <tr>
+                                <td ng-repeat="(headline,bla) in projects.samples[0]" ng-if="headline!='images'">
+                                    {{headline}}
+                                </td>
+                            </tr>
+                            </thead>
+                            <tr ng-repeat="sample in projects.samples | orderBy:'setname'">
+                                <td ng-repeat="(key,value) in sample " ng-if="key!='images'">
+                                    <input type="checkbox" ng-model="show_summary[value]" ng-if="key=='setname'"
+                                           ng-init="show_summary[value]=true"/> {{value}}
+                                </td>
+
+                            </tr>
+                        </table>
+                        <!-- END Summary Table -->
+						<div class="col-md-4">
+
+						<a ng-click="selected.title='Per sequence quality scores'; selected.caption=''; selected.img='src/img/Per_sequence_quality_scores.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img height="500px" ng-src="src/img/Per_sequence_quality_scores.png">
+                        </a>
+						</div>
+						<div class="col-md-4">
+
+                        <a ng-click="selected.title='Per sequence GC content'; selected.caption=''; selected.img='src/img/Per_sequence_GC_content.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img height="500px" ng-src="src/img/Per_sequence_GC_content.png">
+                        </a>
+						</div>
+						<div class="col-md-4">
+
+                        <a ng-click="selected.title='Sequence length distribution'; selected.caption=''; selected.img='src/img/Sequence_Length_Distribution.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img height="500px" ng-src="src/img/Sequence_Length_Distribution.png">
+                        </a>
+						</div>
+                        </br>
+ 						<div class="col-md-4">
+
+                        <a ng-click="selected.title='Number of reads remained after trimming'; selected.caption=''; selected.img='src/img/trimming.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img height="500px" ng-src="src/img/trimming.png">
+                        </a>
+						</div>
+                        <div class="col-md-4">
+
+                        <a ng-click="selected.title='Number of reads mapped to reference'; selected.caption=''; selected.img='src/img/mapping.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img height="500px" ng-src="src/img/mapping.png">
+                        </a>
+						</div>
+
+                        <div class="col-md-4">
+
+                        <a ng-click="selected.title='Percentage of reads classified by Kraken'; selected.caption=''; selected.img='src/img/kraken.png'"
+							   data-image-id="" data-toggle="modal"
+							   data-target="#image-gallery">
+								<img height="500px" ng-src="src/img/kraken.png">
+                        </a>
+						</div>
 
-                        Show Kraken report of : <select id="select_kraken" ng-model="show_kraken">
-                        <option value="" disabled selected hidden>Choose sample...</option>
-                        <option ng-repeat="(name, value) in kraken" value="{{name}}">{{name}}</option>
-                    </select>
-                        <table id="kraken"></table>
-                    </div>
-                    <div class="tab-pane fade in active" id="summary_plots">
                     </div>
-                    <div class="tab-pane fade in active" id="fastqc">
-                        <div style="width: 100%; " class="row">
 
-                            <div style="display: table-row;" class="col-md-10">
+
+                    <div class="tab-pane fade" id="fastqc">
+                        <div style="width: 100%; " class="row">
+					 <!--Sidebar-->
+								<div class="col-md-2 sidebar" >
+									<h4>Filter by ...</h4>
+									<br/>
+									<div class="panel panel-default" >
+									<div class="panel-heading">
+									<h5 class="panel-title" data-toggle="collapse" data-target="#show_sample">... sample</h5>
+
+									</div>
+									<div id = "show_sample" >
+									<p  ng-repeat="(sample, value) in show_summary">
+										<input type="checkbox" ng-model="show_summary[sample]" ng-init="show_summary[sample] = value" /> {{sample}}
+									</p>
+									</div>
+</div>
+                            </tr>
+																<div class="panel panel-default" >
+									<div class="panel-heading">
+									<h5 class="panel-title" data-toggle="collapse" data-target="#show_raw">... dataset</h5>
+
+									</div>
+									<div id = "show_raw" class = "collapse"  >
+									<p>
+										<input type="checkbox" ng-model="show_raw_data"/> show raw data
+									</p>
+									</div>
+</div>
+									<!--Filter by image name-->
+
+									<div class="panel panel-default" >
+									<div class="panel-heading">
+									<h5 class="panel-title" data-toggle="collapse" data-target="#show_fastqc">... FastQC Images</h5>
+				</div>
+				<div class = "collapse" id="show_fastqc">
+									<p ng-repeat="(type,bla) in projects.samples[0].images[0].raw[0]">
+										<input type="checkbox" ng-model="show_img[bla.name]" ng-init="show_img[bla.name]=true"/>
+										{{bla.name}}
+									</p>
+</div></div>
+									<!-- END Filter by image name-->
+									<!--Filter by trimmed /untrimmed-->
+
+									<!-- END Filter by trimmed /untrimmed-->
+
+								</div>
+					<!-- END Sidebar-->
+                            <div style="display: table-row;padding-left:17%;" class="col-md-10">
 
                                 <div class="sample_box"
                                      ng-repeat="sample in projects.samples | orderBy:'setname'| typeFilter: show_summary ">
@@ -490,7 +660,9 @@ var levelWidth = [1];
                                         <div style="display:inline-block">
                                             <a class="thumbnail"
                                                ng-repeat="img in sample_img.raw[0]| imgFilter: show_img" href="#"
-                                               data-image-id="" data-toggle="modal" data-title="Raw {{sample.setname}} R1"
+											   ng-click="selected.caption=img.name; selected.title='Raw '+sample.setname +' R1'; selected.img=img.file"
+                                               data-image-id="" data-toggle="modal"
+                                               data-title="Raw {{sample.setname}} R1"
                                                data-caption="{{img.name}}" data-image="{{img.file}}"
                                                data-target="#image-gallery">
                                                 <img width="200px" height="150px" ng-src="{{img.file}}"
@@ -501,7 +673,9 @@ var levelWidth = [1];
                                         <div style="display:inline-block">
                                             <a class="thumbnail"
                                                ng-repeat="img in sample_img.raw[1]| imgFilter: show_img" href="#"
-                                               data-image-id="" data-toggle="modal" data-title="Raw {{sample.setname}} R2"
+											    ng-click="selected.caption=img.name; selected.title='Raw ' +  sample.setname + ' R2'; selected.img=img.file"
+                                               data-image-id="" data-toggle="modal"
+                                               data-title="Raw {{sample.setname}} R2"
                                                data-caption="{{img.name}}" data-image="{{img.file}}"
                                                data-target="#image-gallery">
                                                 <img width="200px" height="150px" ng-src="{{img.file}}"
@@ -514,7 +688,9 @@ var levelWidth = [1];
                                         <div style="display:inline-block">
                                             <a class="thumbnail"
                                                ng-repeat="img in sample_img.trimmed[0]| imgFilter: show_img" href="#"
-                                               data-image-id="" data-toggle="modal" data-title="Trimmed {{sample.setname}} R1"
+											    ng-click="selected.caption=img.name;  selected.title='Trimmed ' +  sample.setname + ' R1'; selected.img=img.file"
+                                               data-image-id="" data-toggle="modal"
+                                               data-title="Trimmed {{sample.setname}} R1"
                                                data-caption="{{img.name}}" data-image="{{img.file}}"
                                                data-target="#image-gallery">
                                                 <img width="200px" height="150px" ng-src="{{img.file}}"
@@ -525,7 +701,9 @@ var levelWidth = [1];
                                         <div style="display:inline-block">
                                             <a class="thumbnail"
                                                ng-repeat="img in sample_img.trimmed[1]| imgFilter: show_img" href="#"
-                                               data-image-id="" data-toggle="modal" data-title="Trimmed {{sample.setname}} R2"
+											    ng-click="selected.caption=img.name; selected.title='Trimmed ' +  sample.setname + ' R2'; selected.img=img.file"
+                                               data-image-id="" data-toggle="modal"
+                                               data-title="Trimmed {{sample.setname}} R2"
                                                data-caption="{{img.name}}" data-image="{{img.file}}"
                                                data-target="#image-gallery">
                                                 <img width="200px" height="150px" ng-src="{{img.file}}"
@@ -538,48 +716,70 @@ var levelWidth = [1];
                         </div>
                     </div>
                     <!-- END FastQC -->
+
+					   <!-- Kraken Report -->
+                    <div class="tab-pane fade " id="kraken_report">
+
+                        Show Kraken report of : <select id="select_kraken" ng-model="show_kraken">
+                        <option value="" disabled selected hidden>Choose sample...</option>
+                        <option ng-repeat="(name, value) in kraken" value="{{name}}">{{name}}</option>
+                    </select>
+                        <table id="kraken"></table>
+                    </div>
+					<!-- END Kraken Report -->
+
+
                     <div class="tab-pane fade" id="information">
                         <h4>Commandline</h4>
 
-                        <table class="table">
+                        <table class="table table-striped">
                             <tr ng-repeat="(attr, value) in information.commandline" ng-if="value !=''">
                                 <td>{{attr}}</td>
                                 <td>{{value}}</td>
                             </tr>
                         </table>
+						<h4>Version Control</h4>
+
+                        <table class="table table-striped">
+                            <tr ng-repeat="(attr, value) in information.versions" ng-if="value !=''">
+                                <td>{{attr}}</td>
+                                <td>{{value}}</td>
+                            </tr>
+                        </table>
                     </div>
                     <!-- END tab content-->
                 </div>
 
             </div>
         </div>
-    </div>
-    <!-- END div row-->
-    <!-- Trigger the modal with a button -->
-
-    <!-- Modal -->
+		 <!-- Modal -->
     <div class="modal fade" id="image-gallery" tabindex="-1" role="dialog" aria-labelledby="myModalLabel"
          aria-hidden="true">
-        <div class="modal-dialog">
+        <div class="modal-dialog modal-lg">
             <div class="modal-content">
                 <div class="modal-header">
                     <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span
                             aria-hidden="true">×</span></button>
-                    <div class="col-md-8 text-justify" id="image-gallery-title">
+                    <div class="col-md-8 text-justify" id="image-gallery-title" > <h4>{{selected.title}}</h4>
                     </div>
 
                 </div>
-                <div class="modal-body">
-                    <img id="image-gallery-image" class="img-responsive" src="">
+                <div class="modal-body modal-lg">
+                    <img id="image-gallery-image" style="height:100%;width:100%;" class="img-responsive" src="{{selected.img}}">
                 </div>
                 <div class="modal-footer">
-                    <div class="col-md-8 text-justify" id="image-gallery-caption">
+                    <div class="col-md-12 text-justify" id="image-gallery-caption">{{selected.caption}}
                     </div>
                 </div>
             </div>
         </div>
         <!-- END Modal -->
     </div>
+    <!-- END div row-->
+    <!-- Trigger the modal with a button -->
+
+
+    </div>
     <!-- END Summary Table Controller-->
 
 </div>
@@ -588,4 +788,4 @@ var levelWidth = [1];
 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/boxplot.R b/boxplot.R
new file mode 100755
index 0000000..c347713
--- /dev/null
+++ b/boxplot.R
@@ -0,0 +1,23 @@
+#!/usr/bin/env Rscript
+library(ggplot2)
+args = commandArgs(trailingOnly=TRUE)
+mytable<- read.csv(args[1])
+mytable<- mytable[which(mytable$Count>0),]
+if(!any(is.na(mytable$Read))){
+  ggplot(mytable, aes(fill=Read,group=interaction(Sample, Trimmed, Read,Lane),x=interaction(Sample, Lane), y=Value, weight=Count)) +  
+    geom_boxplot(outlier.size = 0.5) +theme(axis.text.x=element_text(angle=90, hjust=1,  vjust = 0.5)) + 
+    facet_wrap(~ Trimmed)+ 
+    scale_fill_manual(values=c("#ef8a62","#67a9cf","#D55E00",  "#999999",  "#56B4E9", "#F0E442", "#0072B2", "#CC79A7")) + 
+    ggtitle(args[3]) + 
+    xlab(args[4])+
+    ylab(args[5])
+}else{
+  ggplot(mytable, aes(fill=Sample,group=Sample,x=Sample, y=Value, weight=Count)) +  
+    geom_boxplot(outlier.size = 0.5) +theme(axis.text.x=element_text(angle=90, hjust=1,  vjust = 0.5),legend.position="none") + 
+    facet_wrap(~ Trimmed)+ 
+    scale_fill_manual(values=c("#ef8a62","#67a9cf","#D55E00",  "#999999",  "#56B4E9", "#F0E442", "#0072B2", "#CC79A7")) + 
+    ggtitle(args[3]) + 
+    xlab(args[4])+
+    ylab(args[5])
+}
+ggsave(args[2])
\ No newline at end of file
diff --git a/classes.py b/classes.py
index fb181fd..f8fb79c 100755
--- a/classes.py
+++ b/classes.py
@@ -1,4 +1,4 @@
-import runQCPipeline
+import QCumber
 import datetime
 import numpy as np
 import getpass
@@ -23,24 +23,22 @@ def get_tool_path(name, section="PATH"):
 
 class Pipeline:
 	'''Get all tool versions '''
-
-	name = "Quality Control"
-	version = runQCPipeline.__version__
-	user = getpass.getuser()
-	python_version = toLatex(sys.version)
-
 	def __init__(self):
-		self.fastqc_version = toLatex(subprocess.check_output(join(get_tool_path("fastqc"), "fastqc") + " --version", shell = True).decode("utf-8"))
+
+		self.name = "Quality Control"
+		self.version = QCumber.__version__
+		self.user = getpass.getuser()
+		self.python_version = toLatex(sys.version)
+		self.fastqc_version = toLatex(subprocess.check_output(join(get_tool_path("fastqc"), "fastqc") + " --version", shell = True, stderr=subprocess.PIPE).decode("utf-8"))
 
 		try:
-			self.trimmo_version = subprocess.check_output(join(get_tool_path("trimmomatic"), "TrimmomaticSE") + " -version", shell=True).decode("utf-8")
+			self.trimmo_version = subprocess.check_output(join(get_tool_path("trimmomatic"), "TrimmomaticSE") + " -version", shell=True, stderr=subprocess.PIPE).decode("utf-8")
 		except:
-			config = configparser.ConfigParser()
-			config.read(join(dirname(__file__), "config.txt"))
-			if "VERSION" in config:
-				self.trimmo_version = config["VERSION"]["trimmomatic"]
-			else:
-				self.trimmo_version = "0.32"
+			try:
+				from debpackageinfo import get_upstream_version
+				self.trimmo_version = get_upstream_version("trimmomatic")
+			except:
+				self.trimmo_version = "< 0.32"
 		bowtie_v = subprocess.check_output(join(get_tool_path("bowtie2"), "bowtie2") + " --version", shell=True).decode("utf-8")
 		self.bowtie_version = re.match(".*\)", toLatex(bowtie_v, True)).group()
 		self.kraken_version =toLatex(subprocess.check_output( join(get_tool_path('kraken'), "kraken") + " --version", shell = True).decode("utf-8"))
@@ -53,21 +51,39 @@ class Pipeline:
 		self.machine = toLatex(system.machine)
 
 
-class Sample:
+class Sample(object):
 	"""Sample with reference to ReadSets, MappingRes, KrakenResr"""
 
-	def __init__(self, name, path, reference, threads, technology):
+	def __init__(self, name, path, reference, threads, techn):
+		global mainResultsPath
+		global num_threads
+		global technology
+		num_threads = threads
+		mainResultsPath = path
+		technology = techn
+
 		self.start = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
 		self.stop = None
+
 		self.name = name
-		self.technology = technology
+		self.technology = techn
+		self.threads = threads
+		self.phred = "phred33"
 		self.readSets = []
-		#self.trimmed_readSets =[]
+
+		self.mainResultsPath = path
 		self.mappingRes = None
 		self.krakenRes = None
 		self.reference = reference
-		self.mainResultsPath = path
-		self.threads = threads
+	def get_all_qc_plots(self):
+		qc_dict = OrderedDict([("setname", self.name),("raw",[]), ("trimmed",[])])
+		for rs in self.readSets:
+			qc_dict["raw"].append(rs.r1.qcRes.get_plots())
+			qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.get_plots())
+			if (rs.r2 is not None):
+				qc_dict["raw"].append(rs.r2.qcRes.get_plots())
+				qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.get_plots())
+		return qc_dict
 
 	def total_reads(self):
 		total = 0
@@ -97,12 +113,12 @@ class Sample:
 
 	def run_Kraken(self, db):
 		r1, r2 = self.get_all_reads()
-		self.krakenRes = KrakenResult(r1,r2,db, self.mainResultsPath, self.threads, splitext(self.readSets[0].r1.filename)[-1], self.name)
+		self.krakenRes = KrakenResult(r1,r2,db, self.name)
 		return self
 
 	def run_Bowtie2(self, bowtie_index, save_mapping ):
 		r1, r2 = self.get_all_reads()
-		self.mappingRes = MappingResult(self.reference, r1, r2, self.readSets[0].r1.phred, bowtie_index)
+		self.mappingRes = MappingResult(self.reference, r1, r2, bowtie_index)
 		self.mappingRes = self.mappingRes.run_bowtie(self.threads, save_mapping)
 		return self.mappingRes
 
@@ -120,7 +136,8 @@ class Sample:
 
 class ReadSet:
 	''' Readset contains of r1 and r2'''
-	def __init__(self,outputDir,  r1 ,r2=None):
+	def __init__(self, r1 ,r2=None):
+
 		self.r2 = None
 		if type(r1) is str:
 			self.r1 = FastQFile(r1) # FastQFile
@@ -133,7 +150,7 @@ class ReadSet:
 		self.numberOfReads = 0
 		self.numberofTrimmedReads = 0
 		self.trimRes = None
-		self.outputDir = outputDir
+		#self.outputDir = outputDir
 		self.paired = True
 		if len( basename(self.r1.filename).split("_")[:-3]) > 4:
 			self.setname = "_".join(basename(self.r1.filename).split("_")[:-3])
@@ -142,55 +159,53 @@ class ReadSet:
 		if(r2 is None):
 			self.paired = False
 
-	def run_FastQC(self, threads):
-		self.r1 = self.r1.run_FastQC(threads, self.outputDir)
+	def run_FastQC(self, output):
+		self.r1 = self.r1.run_FastQC(output)
 		if(self.r2 is not None):
-			self.r2 = self.r2.run_FastQC(threads, self.outputDir)
+			self.r2 = self.r2.run_FastQC(output)
 		return self
 
-	def run_Trimmomatic(self, adapter, threads, palindrome, outputDir, technology, minlen, trimOption, betterTrimming):
-
-		self.trimRes = TrimResult(adapter, palindrome,self.r1,self.r2, threads,outputDir, technology, minlen, trimOption, betterTrimming )
+	def run_Trimmomatic(self, adapter, palindrome, minlen, trimOption, betterTrimming):
+		self.trimRes = TrimResult(self, adapter, palindrome, minlen, trimOption, betterTrimming )
 		return self
 
 	def get_all_qc(self):
-		qc_results = [ (self.r1.qcRes, self.trimRes.r1.qcRes) ]
+		qc_results = [ (self.r1.qcRes, self.trimRes.readset.r1.qcRes) ]
 		if (self.r2 is not None):
-			qc_results.append((self.r2.qcRes, self.trimRes.r2.qcRes) )
+			qc_results.append((self.r2.qcRes, self.trimRes.readset.r2.qcRes) )
 		return qc_results
 
-
 	def get_all_qc_dict(self):
-		qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
-
+		'''qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
 		qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_to_base64()]]
 		qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_to_base64()]]
-
 		if (self.r2 is not None):
 			qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_to_base64()])
 			qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_to_base64()])
-
+		return qc_dict'''
+		qc_dict = OrderedDict({"raw": [], "trimmed": []})
+		qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_for_report()]]
+		qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_for_report()]]
+		if (self.r2 is not None):
+			qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_for_report()])
+			qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_for_report()])
 		return qc_dict
 
 
 class FastQFile:
-	def __init__(self, absFilename):
+	def __init__(self, absFilename, concat_files = None):
 		self.filename = absFilename
 		self.qcRes = None
 		self.log = ""
 		self.phred="phred33"
 		self.concat_files = None
 
-	def run_FastQC(self, threads, outputDir):
-
-		outputDir = join(outputDir, "FastQC")
-		makedirs(outputDir, exist_ok= True)
-		process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(threads), "--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
-
+	def run_FastQC(self, outputDir):
+		process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), "--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
 		for line in iter(process.stderr.readline, b''):
 			line = line.decode("utf-8")
 			line = line.replace("\n", "")
-			print(line, end="\r")
+			print(line)
 		for line in iter(process.stdout.readline, b''):
 			line = line.decode("utf-8")
 			if line.startswith("Approx"):
@@ -202,7 +217,7 @@ class FastQFile:
 				self.phred = pattern.group("phred")
 		print("")
 		process.communicate()
-		self.qcRes = QCResult(join(outputDir, getFilenameWithoutExtension(basename(self.filename), level = 5) + "_fastqc"))
+		self.qcRes = QCResult(join(outputDir, getFilenameWithoutExtension(basename(self.filename)) + "_fastqc"), join(mainResultsPath, "Report", "src"))
 		return self
 	def get_filename(self):
 		return toLatex(basename(self.filename))
@@ -211,19 +226,21 @@ class FastQFile:
 ###########
 
 class QCResult:
-	def __init__(self, resultsPath):
+	def __init__(self, resultsPath, summary_output):
 		self.path = resultsPath
-		self.results = self.get_results(resultsPath)
+		self.results = self.get_results()
+		#write summary csv for boxplots
+		write_fastqc_summary(resultsPath, summary_output)
 
-	def get_results(self, resultsPath):
+	def get_results(self):
 		summary_list = []
 		no_plot=["Basic Statistics", "Overrepresented sequences"]
-		with open(join(resultsPath, "summary.txt"), "r") as summary:
+		with open(join(self.path, "summary.txt"), "r") as summary:
 			reader = csv.reader(summary, delimiter = "\t")
 			for row in reader:
 				if (row[1] not in no_plot):
 					try:
-						plot = Plot(row[0], row[1], resultsPath)
+						plot = Plot(row[0], row[1], self.path)
 						if exists(plot.file):
 							summary_list.append(plot)
 						else:
@@ -234,12 +251,52 @@ class QCResult:
 						print(row[1], "does not exist")
 						summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
 		return summary_list
+
+	def get_plots(self):
+		print("extracting data ", self.path)
+		store = False
+		data = {"name": basename(self.path).replace("_fastc",""),
+				"Sequence_Length_Distribution": [],
+				"Per_sequence_GC_content": [],
+				"Per_base_sequence_quality": []}
+
+		with open(join(self.path, "fastqc_data.txt"), "r") as fastqcdatafile:
+			box_values = []
+			for line in iter(fastqcdatafile):
+				if line.startswith("#"):
+					pass
+				elif line.startswith(">>END_MODULE"):
+					if store==True:
+						data[key] = get_distribution(box_values, key)
+					store = False
+					key = None
+					box_values = []
+				elif line.startswith(">>"):
+					key = line.split("\t")[0]
+					key=key.replace(">>","")
+					key = key.replace(" ","_")
+					if key in data.keys():
+						store = True
+				else:
+					if store:
+						box_values.append(line)
+		return data
+
 	def results_to_base64(self):
 		converted_plot = []
 		for plot in self.results:
-			with open(plot.file, "rb") as imgfile:
-				imgstring = base64.b64encode(imgfile.read())
-				plot.file ='data:image/png;base64,' + imgstring.decode("utf-8")
+			if not plot.file.startswith("NotExisting"):
+				with open(plot.file, "rb") as imgfile:
+					imgstring = base64.b64encode(imgfile.read())
+					plot.file ='data:image/png;base64,' + imgstring.decode("utf-8")
+					converted_plot.append(plot)
+		return converted_plot
+
+	def results_for_report(self):
+		converted_plot = []
+		for plot in self.results:
+			if not plot.file.startswith("NotExisting"):
+				plot.file =plot.file.replace(mainResultsPath, "..")
 				converted_plot.append(plot)
 		return converted_plot
 
@@ -272,8 +329,11 @@ class TrimParameters:
 	leading = str(default_parameter["Trimmomatic"]["leading"]) #"3" # remove leading low quality below 3
 	trailing = str(default_parameter["Trimmomatic"]["trailing"]) #"3" # remove trailing quality below 3
 	adapter_path = get_tool_path('adapter')
+	def __init__(self):
+		#self.palindrome = palindrome
+		self.extraTrimOptions = ""
 
-	def make_callstring(self, adapter, palindrome, technology, minlen, trimOption, betterTrimmingOption ):
+	def make_callstring(self, adapter,palindrome, minlen, trimOption, betterTrimmingOption ):
 		call = []
 		if technology =="Illumina":
 				call.append("ILLUMINACLIP:" + join(self.adapter_path, adapter+ ".fa") + ":" + self.illuminaClip_seedMismatch + ":" + str(palindrome) + ":" + self.illuminaClip_simpleClip)
@@ -284,104 +344,94 @@ class TrimParameters:
 
 
 class TrimResult:
-	parameters = TrimParameters()
-
-	def __init__(self, adapter, palindrome,r1,r2, threads, outputDir, technology, minlen, trimOption, betterTrimming):
-		self.readset = ReadSet(outputDir, r1,r2)
-		self.adapter = adapter
-		self.technology = technology
-		# pathnames
-		self.outputDir = outputDir
+	def __init__(self,  readset, adapter, palindrome, minlen, trimOption, betterTrimming):
+		self.parameters = TrimParameters()
+		self.readset = readset
 		self.r1_paired = None
 		self.r2_paired = None
 		self.r1_unpaired = None
 		self.r2_unpaired = None
+
 		self.logs = ""
+
+		#Results
 		self.shorter_fragments_percentage = 0
-		self.threads = threads
 		self.input_number = 0
 		self.nTrimmed = 0
 		self.pTrimmed = 0
-		self.extraTrimOptions = ""
-		self.run(palindrome, outputDir, minlen, trimOption, betterTrimming)
-		self.blobplot = ""
+		self.adapter = adapter
+		self.run(  palindrome, minlen, trimOption, betterTrimming)
 
-	def make_callstring(self, palindrome, path, technology, minlen, trimOption, save = False, betterTrimmingOption =""):
-		if self.readset.paired:
+	def make_callstring(self, readset, palindrome,  minlen, trimOption, save = False, betterTrimmingOption =""):
+		if readset.paired:
 			if save:
-				self.r1_paired =join(path, "r1_P_" + getFilenameWithoutExtension(self.readset.r1.filename, level = 2,getBase =  True) + ".fastq")
-				self.r1_unpaired = join(path, "r1_UP_" + getFilenameWithoutExtension(self.readset.r1.filename, level=2, getBase = True)+ ".fastq")
-				self.r2_paired =join(path, "r2_P_" + getFilenameWithoutExtension(self.readset.r2.filename, level=2,getBase = True)+ ".fastq")
-				self.r2_unpaired =join(path, "r2_UP_" + getFilenameWithoutExtension(self.readset.r2.filename,level=2, getBase = True)+ ".fastq")
+				self.r1_paired =join(mainResultsPath, "Trimmed", "r1_P_" + getFilenameWithoutExtension(readset.r1.filename,  getBase =  True) + ".fastq")
+				self.r1_unpaired = join(mainResultsPath, "Trimmed", "r1_UP_" + getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq")
+				self.r2_paired =join(mainResultsPath, "Trimmed", "r2_P_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq")
+				self.r2_unpaired =join(mainResultsPath, "Trimmed", "r2_UP_" + getFilenameWithoutExtension(readset.r2.filename, getBase = True)+ ".fastq")
 				output = " ".join([self.r1_paired, self.r1_unpaired, self.r2_paired, self.r2_unpaired])
 			else:
 				output = "/dev/null /dev/null /dev/null /dev/null"
 			callProcess = [join(get_tool_path("trimmomatic"), "TrimmomaticPE "),
-						   "-threads", str(self.threads),
-						   "-" + self.readset.r1.phred,
-						   self.readset.r1.filename,  # input 1
-						   self.readset.r2.filename,  # input 2
+						   "-threads", str(num_threads),
+						   "-" + readset.r1.phred,
+						   readset.r1.filename,  # input 1
+						   readset.r2.filename,  # input 2
 						   output,
-						   self.parameters.make_callstring(self.adapter, palindrome, technology, minlen, trimOption, betterTrimmingOption),
+						   self.parameters.make_callstring(self.adapter,palindrome, minlen, trimOption, betterTrimmingOption)
 						]
 		else:
 			callProcess = [join(get_tool_path("trimmomatic"),"TrimmomaticSE "),
-						   "-threads", str(self.threads),
-						   "-" + self.readset.r1.phred,
-						   self.readset.r1.filename,  # input 1
-						   join(path, getFilenameWithoutExtension(self.readset.r1.filename,level=2, getBase = True)+ ".fastq"),#output
-						   self.parameters.make_callstring(self.adapter, palindrome, technology, minlen, trimOption, betterTrimmingOption),
+						   "-threads", str(num_threads),
+						   "-" + readset.r1.phred,
+						   readset.r1.filename,  # input 1
+						   join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq"),#output
+						   self.parameters.make_callstring(self.adapter,  palindrome, minlen, trimOption, betterTrimmingOption)
 						  ]
-			self.r1_unpaired = join(path,getFilenameWithoutExtension(self.readset.r1.filename,level=2, getBase = True)+ ".fastq")
+			self.r1_unpaired = join(mainResultsPath, "Trimmed", getFilenameWithoutExtension(readset.r1.filename, getBase = True)+ ".fastq")
 		return callProcess
 
-	def check_quality(self, path, trim_perc):
-		#makedirs(join(path, "temp"), exist_ok=True)
-		#path= join(path, "temp")
-		if self.readset.paired:
+	def check_quality(self,readset, trim_perc):
+		path = join(mainResultsPath, "Trimmed", "temp")
+		if readset.paired:
 			try:
-				r1_name = join_reads([self.r1_paired, self.r1_unpaired], path, basename(self.readset.r1.filename))
+				r1_name = join_reads([self.r1_paired, self.r1_unpaired], path, basename(readset.r1.filename))
 
 				print("Run FastQC to optimize trimming.")
-				process = subprocess.Popen( [join(get_tool_path('fastqc'), "fastqc"), r1_name,"--nogroup", "--extract", "-o", path,"--threads", str(self.threads)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+				process = subprocess.Popen( [join(get_tool_path('fastqc'), "fastqc"), r1_name,"--nogroup", "--extract", "-o", path,"--threads", str(num_threads)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 				process.communicate()
-				trim_bool, headcrop, crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r1_name), level=5) + "_fastqc"),trim_perc)
+				trim_bool, headcrop, crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r1_name)) + "_fastqc"), trim_perc)
 
 				#if r1 was ok, go on and check r2
 				print("Check 'per sequence base content' of R2.")
 
-				r2_name = join_reads([self.r2_paired, self.r2_unpaired], path, basename(self.readset.r2.filename))
+				r2_name = join_reads([self.r2_paired, self.r2_unpaired], path, basename(readset.r2.filename))
 				process = subprocess.Popen(
-					[join(get_tool_path('fastqc'), "fastqc"), r2_name,"--nogroup", "--extract", "-o",path, "--threads", str(self.threads)],
+					[join(get_tool_path('fastqc'), "fastqc"), r2_name,"--nogroup", "--extract", "-o",path, "--threads", str(num_threads)],
 					stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-				for line in iter(process.stderr.readline, b''):
-					print(line)
-				for line in iter(process.stdout.readline, b''):
-					print(line)
 				process.communicate()
-				trim_bool, r2_headcrop, r2_crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r2_name), level=5) + "_fastqc"), trim_perc)
+				trim_bool, r2_headcrop, r2_crop = optimize_trimming(join(path, getFilenameWithoutExtension(basename(r2_name)) + "_fastqc"), trim_perc)
 
 				return trim_bool, max(headcrop, r2_headcrop), min(crop, r2_crop)
-				#if headcrop >= 0 or crop > 0:
 			finally:
 				shutil.rmtree(path)
 		else:
-			process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc"), self.r1_unpaired,"--nogroup", "--extract", "-o", join(path,"temp"), "--threads", str(self.threads)],
+			process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc"), self.r1_unpaired,"--nogroup", "--extract", "-o", path, "--threads", str(num_threads)],
 									   stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 			for line in iter(process.stdout.readline, b''):
-				print(line, end="\r")
+				print(line, end='\r')
 			process.communicate()
-			trim_bool, headcrop, crop = optimize_trimming(join(path,"temp", getFilenameWithoutExtension(basename(self.r1_unpaired), level=5) + "_fastqc"), trim_perc)
+			trim_bool, headcrop, crop = optimize_trimming(join(path,getFilenameWithoutExtension(basename(self.r1_unpaired)) + "_fastqc"), trim_perc)
 
 		return trim_bool, headcrop, crop
 
-	def run(self, palindrome, path, minlen, trimOption, betterTrimming):
-
+	def run(self,  palindrome, minlen, trimOption, betterTrimming):
 		# Report Trimmomatic Output
 		# Run palindrome parameter which should be kept at last
+
 		survived = {'30':0, '1000':0}
 
-		call = self.make_callstring(palindrome, path, self.technology,minlen, trimOption, save=True)
+		call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True)
 		process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
 		survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process,call[0], True)
 		process.communicate()
@@ -389,13 +439,12 @@ class TrimResult:
 		### Optimize trimming
 		if betterTrimming !="":
 			print("Improve trimming parameter")
-			makedirs(join(path,"temp"), exist_ok=True)
-			print(join(path, "temp"), betterTrimming )
-			trim_bool, final_headcrop, final_crop = self.check_quality(join(path, "temp"), betterTrimming)
+			makedirs(join(mainResultsPath, "Trimmed","temp"), exist_ok=True)
+			trim_bool, final_headcrop, final_crop = self.check_quality(self.readset, betterTrimming)
 
 			if trim_bool:
-				self.extraTrimOptions = "HEADCROP:"+str(final_headcrop) + " CROP:"+str(final_crop)
-				call = self.make_callstring(palindrome, path, self.technology, minlen, trimOption, save=True, betterTrimmingOption = self.extraTrimOptions)
+				self.parameters.extraTrimOptions = "HEADCROP:"+str(final_headcrop) + " CROP:"+str(final_crop)
+				call = self.make_callstring( self.readset, palindrome, minlen, trimOption, save=True, betterTrimmingOption = self.parameters.extraTrimOptions)
 				process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
 				survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process, call[0], True)
 				process.communicate()
@@ -406,13 +455,15 @@ class TrimResult:
 		self.logs += log
 
 		############################
+
 		survived[str(palindrome)] = survived1
 		if self.readset.r2 is not None:
 			if palindrome==30:
 				palindrome=1000
 			else:
+
 				palindrome=30
-			process = subprocess.Popen(" ".join(self.make_callstring(palindrome, path, self.technology, minlen, trimOption, save=False, betterTrimmingOption = self.extraTrimOptions)  ),
+			process = subprocess.Popen(" ".join(self.make_callstring( self.readset, palindrome, minlen, trimOption, save=False, betterTrimmingOption = self.parameters.extraTrimOptions)  ),
 							stdout=subprocess.PIPE,
 							stderr=subprocess.PIPE, shell=True)
 			survived2, total,nTrimmedReads, pTrimmedReads, temp = trimmomatic_report(process, call[0],False)
@@ -422,14 +473,14 @@ class TrimResult:
 
 		return self
 
-	def run_FastQC(self, outputDir, tempdir):
+	def run_FastQC(self,  tempdir):
 		if self.readset.paired:
 			#tempdir = tempfile.mkdtemp()
 			r1_name = join_reads([self.r1_paired, self.r1_unpaired], tempdir, basename(self.readset.r1.filename))
 			r2_name = join_reads([self.r2_paired, self.r2_unpaired], tempdir, basename(self.readset.r2.filename))
 			try:
-				self.readset = ReadSet(outputDir, r1_name, r2_name)
-				self.readset.run_FastQC(self.threads)
+				self.readset = ReadSet( r1_name, r2_name)
+				self.readset.run_FastQC(join(mainResultsPath, "Trimmed/FastQC"))
 			except:
 				print(sys.exc_info())
 				sys.exit()
@@ -439,34 +490,29 @@ class TrimResult:
 				self.readset.r1.filename = " ".join([self.r1_paired, self.r1_unpaired])
 				self.readset.r2.filename = " ".join([self.r2_paired, self.r2_unpaired])
 		else:
-			self.readset = ReadSet(outputDir, self.r1_unpaired)
-			self.readset.run_FastQC(self.threads)
+			self.readset = ReadSet( self.r1_unpaired)
+			self.readset.run_FastQC(join(mainResultsPath, "Trimmed"))
 		return self
 
-	def run_blobplot(self, db):
-		all_inputs = [x for x in [self.r1_paired, self.r1_unpaired, self.r2_paired, self.r2_unpaired] if x is not None]
-		blobplot = BlobResult()
-		self.blobplot = blobplot.run(all_inputs, self.outputDir, self.threads, self.readset.setname.split(".")[0], db)
-		return self
-	def print_param(self, minlen, trimOption, pali):
-		return self.parameters.make_callstring(self.adapter, pali, self.technology, minlen, trimOption, self.extraTrimOptions)
+
+	def print_param(self, palindrome, minlen, trimOption):
+		return self.parameters.make_callstring(self.adapter, palindrome, minlen, trimOption, self.parameters.extraTrimOptions)
 
 class MappingParameter:
 	def __init__(self, mapping):
 		pass
 
 class MappingResult:
-	def __init__(self, reference, r1_name, r2_name, phred, bowtie_index):
+	def __init__(self, reference, r1_name, r2_name, bowtie_index):
 		self.numberOfAlignedReads = 0
 		self.percentAlignedReads= 0
 		self.index = self.build_index(reference, bowtie_index)
 		self.log = ""
 		self.r1_name = r1_name
 		self.r2_name = r2_name
-		self.phred = phred
 
 	def build_index(self, reference, index):
-		index_name = join(index, getFilenameWithoutExtension(basename(reference), level=2) +"_bt2")  # reference name "bowtie2 build index_name"
+		index_name = join(index, getFilenameWithoutExtension(basename(reference)) +"_bt2")  # reference name "bowtie2 build index_name"
 		if not isdir(index):
 			return index
 		process = subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference,  index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
@@ -481,10 +527,14 @@ class MappingResult:
 
 	def run_bowtie(self, threads, save_mapping):
 		print("Run Bowtie2")
-		call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", self.index, "--no-unal --threads ", str(threads)]
-		call.extend(["-U", re.sub(r"(?<!\\)\s",",",self.r1_name)])
-		if self.r2_name is not None:
-			call.extend(['-U',re.sub(r"(?<!\\)\s",",",self.r2_name)])
+		call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", self.index, "--no-unal --threads ", str(num_threads)]
+
+		#if self.r2_name is not None:
+		#	call.extend(["-1", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
+		#	call.extend(['-2',re.sub(r"(?<!\\)\s",",",self.r2_name)])
+		#else:
+		#	call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name)])
+		call.extend(["-U", re.sub(r"(?<!\\)\s", ",", self.r1_name + " " +  self.r2_name)])
 		call.extend(["-S", save_mapping, "2>&1"])
 		#print("Call  ", " ".join(call))
 		process = subprocess.Popen(" ".join(call), stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell=True)
@@ -513,44 +563,43 @@ class MappingResult:
 
 
 class KrakenResult:
-	def __init__(self, r1,r2, db, outputDir, threads, fileext, project_name):
+	def __init__(self, r1,r2, db, name):
+		self.name = name
 		self.report_name = None
 		self.report = ""
 		self.pClassified = 0
 		self.nClassified = 0
 		self.log = ""
-		self.run_kraken(r1,r2, db, outputDir, threads, fileext, project_name)
+		self.run_kraken(r1,r2, db)
 
-	def make_callstring(self, r1,r2, db, outputDir, threads, fileext, project_name):
+	def make_callstring(self, r1,r2, db):
 		#input_type = ""
 		#if fileext.endswith("")
 		call = [join(get_tool_path('kraken') , "kraken") , "--db", db, r1]
 		if r2 is not None:
 			call.append( r2)# + " --paired")
 		call.append("--preload")
-		call.append("--threads " + str(threads))
-		self.report_name = join(outputDir, project_name + ".krakenreport")
+		call.append("--threads " + str(num_threads))
+		self.report_name = join(mainResultsPath, self.name + ".krakenreport")
 		if exists(self.report_name):
 			i = 2
-			new_name = join(outputDir, project_name + "_" +str(i) + ".krakenreport")
+			new_name = join(mainResultsPath, self.name + "_" +str(i) + ".krakenreport")
 			while exists(new_name):
 				i += 1
-				new_name = join(outputDir, project_name + "_" + str(i) + ".krakenreport")
+				new_name = join(mainResultsPath, self.name + "_" + str(i) + ".krakenreport")
 			self.report_name=new_name
 
 		call.append("--output " + self.report_name)
 		return " ".join(call)
 
-	def run_kraken(self, r1,r2, db, outputDir, threads, fileext, project_name):
-		#print(self.make_callstring(r1,r2, db, outputDir, threads, fileext, project_name))
-		process = subprocess.Popen(self.make_callstring(r1,r2, db, outputDir, threads, fileext, project_name), shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-		self.log += "\\textcolor{gray}{Using Parameter " + " ".join(["\path{"+ x + "}" for x in self.make_callstring(r1,r2, db, outputDir, threads, fileext, project_name).split(" ")]) + "}\\\\"
+	def run_kraken(self, r1,r2, db):
+		process = subprocess.Popen(self.make_callstring(r1,r2, db), shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+		self.log += "\\textcolor{gray}{Using Parameter " + " ".join(["\path{"+ x + "}" for x in self.make_callstring(r1,r2, db).split(" ")]) + "}\\\\"
 
 		for line in iter(process.stderr.readline, b''):
 			line = line.decode("utf-8")
 			print(line)
 			if not (line.startswith("Loading database") or re.search("Processed \d+", line)):
-				#self.output += line + "\n"
 				self.log += toLatex(line) +"\n"
 				pattern = re.match("\s+(?P<nClassified>\d+) sequences classified \((?P<pClassified>\d+.\d+)%\)", line)
 				if pattern:
@@ -562,7 +611,6 @@ class KrakenResult:
 			print(line)
 			if not ( re.search("Processed  \d+", line) or line.startswith("Loading database") ):
 				self.output += line + "\n"
-
 		process.communicate()
 
 		print("Convert Kraken-Report..")
@@ -572,110 +620,6 @@ class KrakenResult:
 	def convert_report(self, db):
 		process = subprocess.check_output([join(get_tool_path('kraken') ,'kraken-report') , "--db", db, self.report_name])
 		self.report = process.decode("utf-8")
-		#converted_kraken = open("kraken_output.csv","w")
-		#converted_kraken.write(self.report)
-		#converted_kraken.close()
 		return self
 
 
-class AssemblyParameter:
-	method = "velvet"
-	kmer = "35"
-	input_type = "-fmtAuto -short"
-
-class BlastParameter:
-	method="blastn"
-	task = "megablast"
-	evalue= "1e-5"
-	max_target_seqs = "1"
-	outfmt="'6 qseqid staxids bitscore std sscinames sskingdoms stitle'	"
-	extraParams = "-culling_limit 5"
-
-	def __init__(self):
-		self.db = join(get_tool_path("blast_db","DEFAULT"), "nt")
-
-	def print(self):
-		return " ".join([self.method, "-task", self.task, "-evalue", self.evalue, "-max_target_seqs", self.max_target_seqs, "-outfmt", self.outfmt, self.extraParams , "-db", self.db])
-
-class BlobParameter:
-	def __init__(self, taxonomy_path):
-		self.db = ""
-		self.nodes = join(taxonomy_path, "taxonomy/nodes.dmp")
-		self.names = join(taxonomy_path, "taxonomy/names.dmp")
-
-	def print(self):
-		if self.db !="":
-			return "--db " + self.db
-		else:
-			return " ".join(["--nodes", self.nodes, "--names", self.names])
-
-class BlobResult:
-	assParams = AssemblyParameter()
-	blastParams = BlastParameter()
-
-	def run(self, input, output, threads, setname, db):
-		print("Run blobtools...")
-		self.blobParams = BlobParameter(db)
-		contigs=""
-		#ASSEMBLY
-		print("...first assemble")
-		if self.assParams.method == "velvet":
-			concat_input = " -short ".join(input)
-			full_output = join(output, "Velvet",setname)
-			makedirs(full_output, exist_ok=True)
-			#print(" ".join(["velveth", full_output, self.assParams.kmer, self.assParams.input_type, concat_input]))
-
-			process = subprocess.Popen(" ".join(["velveth", full_output, self.assParams.kmer, self.assParams.input_type, concat_input]), shell= True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-			'''for line in iter(process.stderr.readline, b''):
-				print(line)
-			for line in iter(process.stdout.readline, b''):
-				print(line)'''
-			process.communicate()
-			print(["velvetg", full_output])
-			process = subprocess.Popen(" ".join(["velvetg", full_output]), shell=True, stderr=subprocess.PIPE,
-									   stdout=subprocess.PIPE)
-			'''for line in iter(process.stderr.readline, b''):
-				print(line)
-			for line in iter(process.stdout.readline, b''):
-				print(line)'''
-			process.communicate()
-
-			contigs = join(full_output, "contigs.fa")
-
-		# blast search
-		blast_result = join(output, setname + "." + self.blastParams.evalue + ".megablast")
-		print("...blastn ")
-
-		process = subprocess.Popen(" ".join([self.blastParams.print(), "-query", contigs, "-out",blast_result, "-num_threads", str(threads)]),shell = True,stderr = subprocess.PIPE, stdout = subprocess.PIPE)
-		for line in iter(process.stderr.readline, b''):
-			print(line)
-		for line in iter(process.stdout.readline, b''):
-			print(line)
-		process.communicate()
-
-		# blobtools
-		img = None
-		blob_output= join(output, setname)
-		print("Run blobtools")
-		process = subprocess.Popen(" ".join([join(get_tool_path("blobtools"),"blobtools"), "create", self.blobParams.print(), "-y", self.assParams.method, "-t", blast_result, "-i" , contigs, "-o", blob_output]),shell = True,
-						  stderr = subprocess.PIPE,
-						  stdout = subprocess.PIPE)
-		for line in iter(process.stderr.readline, b''):
-			print(line)
-		for line in iter(process.stdout.readline, b''):
-			print(line)
-		process.communicate()
-		process = subprocess.Popen(" ".join([join(get_tool_path("blobtools"),"blobtools"), "plot", "-i",blob_output + ".BlobDB.json"]),shell = True,
-						  stderr = subprocess.PIPE,
-						  stdout = subprocess.PIPE)
-		for line in iter(process.stdout.readline, b''):
-			line = line.decode()
-			if line.startswith("[STATUS]"):
-				if "Plotting" in line:
-					pattern = re.match("\[STATUS\]\t: Plotting (?P<path>.*)",line)
-					img = pattern.group("path")
-			print(line)
-
-		process.communicate()
-
-		return img
diff --git a/combineFastQC.py b/combineFastQC.py
deleted file mode 100755
index b0ca7d3..0000000
--- a/combineFastQC.py
+++ /dev/null
@@ -1,431 +0,0 @@
-  # !/usr/bin/env python  # -*- coding: utf-8 -*-
-# created by Stephan Fuchs (FG13, RKI), 2015
-
-'''
-Combine FastQC Results
-Input: Project/Sample Folder which was created by runQCPipeline (folder contains QCResults - <FastQC_Result>)
-Output: html file
-'''
-
-# config
-version = '0.2.9'
-logsep = '=====================================\n'
-
-import os
-import sys
-import time
-import shutil
-import base64
-import tempfile
-import matplotlib
-
-matplotlib.use("Agg")  #in case no display is set
-import matplotlib.pyplot as plt
-import argparse
-import numpy as np
-
-
-
-#advanced config - HTML output
-htmlheader = '<!doctype html>' \
-			 '<html>' \
-			 '<head>' \
-			 '<meta charset="utf-8">' \
-			 '<title>Combined FastQC Report</title>' \
-			 '<script language="javascript">top.DSfirstAccess = 1442945125;top.DSmaxTimeout=3600;top.DStimediff=1442945348 - parseInt(""+(new Date()).getTime()/1000);</script><SCRIPT language="javascript" id="dsshim" src="/dana-cached/js/shimdata.cgi"></SCRIPT><SCRIPT language="javascript" id="dsfunc" src="/dana-cached/js/oth.js?a4e535923255f21d1261a51909daf29f"></SCRIPT><SCRIPT language="javascript" id="dstimeout" src="/dana-cached/js/sessiontimeout.js?a4e535923255f21d1261a51909daf29f"></SCRIPT> [...]
-			 '//<![CDATA[' \
-			 'var dsnodocwrites = 0 ; var DanaCookie="MstrPgLd1=1; MstrPgLd2=1; UserContext=mBoHOGHwbUOz5yuRe-kSp4w1h6L4yNIICPjPhC-9ifhLTYXFL0EmDebAMytIx2kDia-CCTHmi8Y.; tzid=W. Europe Standard Time"; var DSHost="webmail.rki.de"; var DSDanaInfoStr=""; var DSLang="de"; var DSObfuscateHostname=0;var DSTBSettings=17425;var DSTBLU=\'/dana/home/starter.cgi?startpageonly=1\';;danaSetDSHost();' \
-			 '//]]>' \
-			 '</SCRIPT></head>' \
-			 '<body>'
-
-htmlfooter = '</table>\n</body>\n<SCRIPT language="javascript" id="dstb-id">dstb()</SCRIPT></html>\n'
-
-#advanced config - fastqc img extraction
-order = ['sld', 'sq', 'sgc', 'bq', 'bnc', 'kmer', 'dup']
-imgfiles = {'dup': 'duplication_levels.png',
-			'kmer': 'kmer_profiles.png',
-			'bgc': 'per_base_gc_content.png',
-			'bnc': 'per_base_n_content.png',
-			'bq': 'per_base_quality.png',
-			'bsc': 'per_base_sequence_content.png',
-			'sgc': 'per_sequence_gc_content.png',
-			'sq': 'per_sequence_quality.png',
-			'sld': 'sequence_length_distribution.png',
-			"ptsq": "per_tile_quality.png",
-			"ac": "adapter_content.png"
-			}
-#advanced config - fastqc data extraction
-modules = {'sld': '>>Sequence Length Distribution\t', 'sq': '>>Per sequence quality scores\t',
-		   'sgc': '>>Per sequence GC content\t'}
-all_modules = {'bq': ">>Per base sequence quality",
-			   'bsc': ">>Per base sequence content",
-			   'bgc': ">>Per base GC content",
-			   'bnc': ">>Per base N content",
-			   'sld': ">>Sequence Length Distribution",
-			   'dup': ">>Sequence Duplication Levels",
-			   'kmer': ">>Kmer Content",
-			   'sq': '>>Per sequence quality scores',
-			   'sgc': '>>Per sequence GC content',
-			   'ptsq': '>>Per tile sequence quality',
-			   'ac':'>>Adapter Content'
-			   }
-codes = {'comment': '#', 'modulestart': '>>', 'moduleend': '>>END_MODULE'}
-
-#advanced config - boxplot design
-
-#processing command line arguments
-'''parser = argparse.ArgumentParser(prog="COMBINE_FASTQC",
-								 description='performs FastQC for multiple fastq files and combines results in a single html file.')
-parser.add_argument('file', metavar="FILE(S)", help="input BAM file", type=argparse.FileType('r'), nargs='+')
-parser.add_argument('-t', metavar='INT', help="number of threads/CPUs used for FastQC", type=int, action="store",
-					default=4)
-parser.add_argument('--version', action='version', version='%(prog)s ' + version)
-
-args = parser.parse_args()
-'''
-
-
-#functions
-def createCSV(dataset, lines, key, path):
-	'''creates CSV used for boxplotting'''
-	output = []
-	for line in lines:
-		line = line.strip()
-		fields = line.split('\t')
-
-		#Sequence Length Distribution
-		if key == "sld":
-			l = fields[0].split('-')
-			try:
-				mean = (int(l[0]) + int(l[1])) / 2
-			except:
-				mean = int(l[0])
-			output.extend([str(mean)] * int(float(fields[1])))
-
-		#sequence quality or sequence GC content
-		if key == "sq" or key == 'sgc':
-			v = fields[0]  #quality or gc [%]
-			output.extend([str(v)] * int(float(fields[1])))
-
-		#duplicates
-		if key == 'dup':
-			c = fields[0].replace('>', '')  #count of duplicates
-			c = c.replace('k', '000')
-			c = c.replace('+', '')
-			output.extend([str(c)] * int(float(fields[1]) * 100))
-	csvhandle = open(path + '/' + key + ".csv", 'a')
-	csvhandle.write(dataset.replace(',', '_') + "," + ",".join(output) + '\n')
-	csvhandle.close()
-
-
-def color(line):
-	line = line.strip()
-	if line.endswith("pass"):
-		return "#3CBC3C"
-	elif line.endswith("warn"):
-		return "orange"
-	elif line.endswith("fail"):
-		return "red"
-	else:
-		return "black"
-
-
-def get_key(dict, line):
-	for key, value in dict.items():
-		if line.startswith(value):
-			return key
-	return None
-
-
-def barplot(overview, width, height, tmpdir, type, xlabel, ylabel, title):
-	bplotfont = {'weight': 'bold', 'size': 5}
-	plt.rc('font', **bplotfont)
-	fig = plt.figure(1, figsize=(width, height))
-	ax = fig.add_subplot(111)
-	index = np.arange(overview[1].shape[0])
-	opacity = 0.4
-	bar_width = 0.35
-	bplot = plt.bar(index, overview[1][type], bar_width,
-					alpha=opacity,
-					color='#385d8a')  #, label='Aligned Reads')
-	plt.xlabel(xlabel)
-	plt.ylabel(ylabel)
-	plt.title(title)
-	plt.xticks(index + (bar_width / 2), [x.decode("utf-8") for x in overview[1]['name']], rotation=90)
-	# plt.ylim((0,100))
-
-	for rect, label in zip(bplot, overview[1][type]):
-		ax.text(rect.get_x() + rect.get_width() / 2, rect.get_height() , str(label), ha='center', va='bottom')
-
-	fig.savefig(os.path.join(tmpdir ,type + ".png"), bbox_inches='tight', dpi=200)
-	fig.clf()
-
-
-def combineFastQC(project_folder, overview=None, tmp = "tmp"):
-
-	err = 1
-	while err > 0:
-		timecode = time.strftime("%Y-%m-%d_%H-%M-%S")
-		html_fname = "FASTQC_COMBINE_" + timecode + ".html"
-		if os.path.isfile(html_fname):
-			err += 1
-			time.sleep(1)
-		else:
-			err = 0
-		if err == 30:
-			print("ERROR: giving up to find available output file name")
-			sys.exit(1)
-	#working
-	try:
-		tmpdir = tempfile.mkdtemp(tmp)
-		os.makedirs(tmpdir, exist_ok=True)
-		chunkfiles = [tmpdir + '/' + html_fname + '1', tmpdir + '/' + html_fname + '2']
-		outhandle = open(chunkfiles[1], "w")
-
-		print("Summarize fastqc ", project_folder)
-		fastqc_dirs = []
-		#sample_name=[]
-		for sample in os.listdir(project_folder):
-			if os.path.isdir(os.path.join(project_folder, sample)):
-				if sample == "QCResults":
-					sample = ""
-				path = os.path.join(project_folder, sample)
-				if "QCResults" in os.listdir(path):
-					path = os.path.join(path, "QCResults")
-
-				if "FastQC" in os.listdir(path):
-					path = os.path.join(path, "FastQC")
-
-				print("Found path " + path)
-				if os.path.exists(path):
-					fastqc_folder = sorted( [x for x in os.listdir(path) if x.endswith("_fastqc")])
-
-					if len(fastqc_folder) != 0:
-						path_trimmed = os.path.join(path, "..", "Trimmed", "FastQC")
-						if os.path.exists(path_trimmed):
-							for fastqc in fastqc_folder:
-								fastqc_dirs.append(os.path.join(path, fastqc))
-								fastqc_dirs.append(os.path.join(path_trimmed, fastqc))
-					else:
-						fastqc_dirs = fastqc_folder
-						fastqc_folder = None
-
-		i = 0
-		outhandle.write('<table border = "1"> <tr>')
-
-		for fastqcdir in fastqc_dirs:
-			i += 1
-			print("extracting data ", fastqcdir)
-			store = False
-			data = []
-			fastqcdatafile = open(os.path.join(fastqcdir, "fastqc_data.txt"), "r")
-			frame_color = {}
-			for line in iter(fastqcdatafile):
-				if line.startswith(codes['comment']):
-					pass  #print("comment")
-				#continue
-				elif line.startswith(codes['moduleend']):
-					if len(data) > 0:
-						name1 = os.path.basename(fastqcdir)
-						if "/Trimmed/" in fastqcdir:
-							name1 = "Trimmed " + name1
-						createCSV(name1, data[2:], key, tmpdir)
-						data = []
-						store = False
-
-				elif line.startswith(codes['modulestart']):
-					key = get_key(all_modules, line)
-					if key in modules.keys():
-						store = True
-					if key is not None:
-						frame_color[key] = color(line)
-				if store:
-					data.extend([line])
-			fastqcdatafile.close()
-
-			#image processing
-			name = os.path.basename(fastqcdir)
-			if "/Trimmed/" in fastqcdir:
-				name = "Trimmed " + os.path.basename(fastqcdir)
-			outhandle.write('<tr>\n<td><b>' + name + "</b></td>\n")
-			for imgkey in order:
-				try:
-					with open(os.path.join(fastqcdir , "Images" , imgfiles[imgkey]), "rb") as imgfile:
-						imgstring = base64.b64encode(imgfile.read())
-						outhandle.write('<td><img style="border: 5px solid ' + frame_color[imgkey] + ';" alt="Embedded Image" src="data:image/png;base64,' + imgstring.decode(
-							"utf-8") + '" /></td>\n')
-				except:
-					outhandle.write('<td></td>\n')
-			outhandle.write('</tr>\n')
-			if "/Trimmed/" in fastqcdir:
-				outhandle.write('</tr><tr></tr><tr></tr><tr></tr><tr></tr><tr></tr><tr></tr>')
-
-		outhandle.write(htmlfooter)
-		outhandle.close()
-
-		bplotfont = {'weight': 'bold', 'size': 5}
-		plt.rc('font', **bplotfont)
-
-		#generate boxplots
-		print ("generating comparative boxplots...")
-		xclasses = range(1, len(fastqc_dirs) + 1)
-		width = 4.5
-		height = 3.5
-		if (len(fastqc_dirs) / 3) > width:
-			width = len(fastqc_dirs) / 3
-			height = len(fastqc_dirs) / 4
-			#max width
-			if width > 12:
-				width = 12
-				height = 7
-				bplotfont = {'weight': 'bold', 'size': 3}
-				plt.rc('font', **bplotfont)
-
-		xlabels = []
-		for key in order:
-			if key in modules:
-				data = []
-
-				plottitle = modules[key].replace(">", '')
-				plottitle = plottitle.replace("\t", '')
-				print("Plot ", plottitle )
-
-				#csvhandle = open(tmpdir + '/' + key + ".csv", 'r')
-				with open(tmpdir + '/' + key + ".csv", 'r') as csvfile:
-					csvhandle = csv.reader(csvfile, delimiter=',', quotechar='|')
-					for row in csvhandle:
-						xlabels.append(row[0])
-						data.append([float(x) for x in row[1:]])
-
-				fig = plt.figure(1, figsize=(width, height))
-				ax = fig.add_subplot(111)
-				#bp = ax.boxplot(data, notch=True, patch_artist=True)
-
-				# set props
-				'''whisker = {'color':'#b4b4b4', 'linestyle':"-", 'linewidth':0.75}
-				caps = {'color':'#b4b4b4', 'linewidth':1}
-				flier = {"marker":'o', "lw":0, "color":'#e6e6e6', "markersize":0.5}
-				box = {"color":'#385d8a', "linewidth":0.75}'''
-				bp = ax.boxplot(data)#,  capsprops = caps , flierprops = flier, boxprops =box)
-
-				for box in bp['boxes']:
-					#outline color
-					box.set(color='#385d8a', linewidth=0.75)
-
-				for whisker in bp['whiskers']:
-					whisker.set(color='#b4b4b4', linestyle="-", linewidth=0.75)
-
-				for cap in bp['caps']:
-					cap.set(color='#b4b4b4', linewidth=1)
-
-				for median in bp['medians']:
-					median.set(color='#c0504d', linewidth=1)
-
-				for flier in bp['fliers']:
-					flier.set(marker='o', lw=0, color='#e6e6e6', markersize=0.5)
-
-				plt.title(plottitle)
-				plt.xticks(xclasses, xlabels, rotation=90)
-				fig.savefig(tmpdir + '/' + key + ".png", bbox_inches='tight', dpi=200)
-				fig.clf()
-
-		if overview is not None:
-			try:
-				if len([x for x in overview[1]['mapping'] if x != 0.0]) != 0:
-					barplot(overview, width, height, tmpdir, 'mapping', 'Samples', 'Mapped reads [%]',
-						'Percentage of reads aligned to ' + overview[0])
-			except:
-				print("Couldnt plot mapping results")
-			try:
-				barplot(overview, width, height, tmpdir, 'trimming', 'Samples', 'Reads after trimming [%]',
-						'Percentage of reads after trimming')
-			except:
-				print("Couldnt plot trimming results")
-			try:
-				if len([x for x in overview[1]['shorter_reads'] if x != 0.0]) != 0:
-					barplot(overview, width, height, tmpdir, 'shorter_reads', 'Samples', 'Shorter fragments [%]',
-							'Percentage of fragments shorter than read length')
-			except:
-				print("Couldnt plot shorter fragments")
-			try:
-				if len([x for x in overview[1]['kraken'] if x != 0.0]) != 0:
-					barplot(overview, width, height, tmpdir, 'kraken', 'Samples', 'Classified reads [%]',
-						'Percentage of classified reads')
-			except:
-				print("Couldnt plot Kraken results")
-		#save boxplots to html
-		outhandle = open(chunkfiles[0], "w")
-		outhandle.write(htmlheader)
-		#for line in iter(outhandle):
-		#    if line.strip() == "<boxplotline \>":
-		outhandle.write("<tr>\n<td><b>summary</b></td>\n")
-		for key in order:
-			if key not in modules:
-				outhandle.write("<td>-</td>")
-			else:
-				imgfile = open(tmpdir + '/' + key + ".png", "rb")
-				imgstring = base64.b64encode(imgfile.read())
-				imgfile.close()
-				outhandle.write('<td><img alt="Embedded Image" src="data:image/png;base64,' + imgstring.decode(
-					"utf-8") + '" /></td>\n')
-		outhandle.write('</tr>\n')
-
-		outhandle.write("<tr>\n<td></td>\n")
-		if overview is not None:
-			for result in ['mapping', 'trimming', 'shorter_reads', 'kraken']:
-				try:
-					imgfile = open(os.path.join(tmpdir, result + ".png"), "rb")
-					imgstring = base64.b64encode(imgfile.read())
-					imgfile.close()
-					outhandle.write('<td><img alt="Embedded Image" src="data:image/png;base64,' + imgstring.decode(
-						"utf-8") + '" /></td>\n')
-				except:
-					pass
-			outhandle.write('</tr>\n')
-			outhandle.close()
-
-
-		#join sub-htm-files
-		print("Create result file:  " + os.path.join(project_folder, html_fname))
-
-		htmlhandle = open(os.path.join(project_folder, html_fname), 'w')
-		for chunkfile in chunkfiles:
-			chunkhandle = open(chunkfile, 'r')
-			for line in iter(chunkhandle):
-				htmlhandle.write(line)
-			chunkhandle.close()
-		htmlhandle.close()
-	finally:
-		#cleaning
-		shutil.rmtree(tmpdir)
-
-
-import csv
-
-if __name__ == "__main__":
-	start  =time.time()
-	print(start)
-	parser = argparse.ArgumentParser()
-	parser.add_argument('-folder', dest='folder', help="Sample folder", required=True)
-	parser.add_argument('-csv', dest='csv',
-						help="csv file.Format: Sample name | Mapping Result | Trimming Result | Kraken Result | Shorter Fragments ",
-						required=False)
-	parser.add_argument('-reference', dest="reference", help="Name of reference", required=False)
-	arguments = vars(parser.parse_args())
-
-	if arguments["csv"] and arguments["reference"]:
-
-		overview = []
-		with open(arguments["csv"], newline='') as csvfile:
-			infosheet = csv.reader(csvfile, delimiter=';', quotechar='|')
-			for row in infosheet:
-				overview.append(tuple(row))
-				print(row)
-		combineFastQC(arguments["folder"], (arguments["reference"],np.array(overview, dtype = [('name', '|S100'), ('mapping', float), ('trimming', float), ('kraken', float), ('shorter_reads', float)])))
-
-	elif (arguments["csv"] or arguments["reference"]):
-		sys.exit("csv and reference required")
-	else:
-		combineFastQC(arguments["folder"])
-	print(time.time() - start)
diff --git a/config.txt b/config.txt
index 39ba760..0e269df 100755
--- a/config.txt
+++ b/config.txt
@@ -1,14 +1,10 @@
 [DEFAULT]
-kraken_db =/home/andruscha/programs/kraken/minikraken_20141208/ 
-blast_db = /data/BLAST/nt/
+kraken_db =/home/andruscha/software/kraken/minikraken_20141208/
 
 [PATH]
-kraken = /home/andruscha/programs/kraken/kraken_build/
+kraken =
 adapter = /data/GS/tools/Trimmomatic-0.32/adapters/
 fastqc = /home/lieuv/tools/FastQC/
 trimmomatic =
 bowtie2 =
-blobtools = /home/lieuv/tools/blobtools
 
-[VERSION]
-trimmomatic = 0.32
diff --git a/helper.py b/helper.py
index e75cd69..b1c906f 100755
--- a/helper.py
+++ b/helper.py
@@ -1,9 +1,15 @@
 __author__ = 'LieuV'
 
 from os.path import basename, splitext, join, isfile, dirname, abspath, exists
-from os import makedirs
+from os import makedirs, listdir, remove
 import subprocess
-import re, sys, numpy
+import re, sys, numpy, shutil
+import matplotlib
+matplotlib.use("Agg")  # in case no display is set
+import matplotlib.pyplot as plt
+import xml.etree.ElementTree as ET
+from collections import OrderedDict
+import json
 
 def getDir(args, getAbs = False):
 	if isfile(args[0]):
@@ -14,21 +20,19 @@ def getDir(args, getAbs = False):
 	makedirs(path, exist_ok=True)
 	return path
 
-def getFilenameWithoutExtension(string, level = 1, getBase = False):
+def getFilenameWithoutExtension(string, getBase = False):
 	'''   In case of double extension   '''
 	if getBase:
 		string = basename(string)
 	string = splitext(string)[0]
 	i = 0
 	while splitext(string)[-1] in [".gz", ".gzip", ".zip", ".bz", ".fasta", ".fastq", ".bam"]:
-		#while i < level:
-		#i +=1
 		string = splitext(string)[0]
 	return string
 
 def bam_to_fastq(bamfile, output):
 	print("Convert bam to fastq...", bamfile)
-	outfile = join(output, getFilenameWithoutExtension(bamfile, level = 2, getBase= True)  +".fastq")
+	outfile = join(output, getFilenameWithoutExtension(bamfile, getBase= True)  +".fastq")
 	subprocess.Popen("bamtools convert -format fastq -in " + bamfile + " -out " + outfile, shell = True)
 	return outfile
 
@@ -47,15 +51,14 @@ def join_reads(read_list, outputDir, outputName):
 		return None
 	elif len(read_list) ==1:
 		return read_list[0]
-	outfile = join(outputDir, getFilenameWithoutExtension(outputName, True) +".gz")
-	#if not exists(outfile):
-	if read_list[0].endswith(".gz"):
-
-		print("Decompress and concatenate files. This may take a while." )
-		process = subprocess.call("zcat " + " ".join(read_list) + "|gzip --fast > " + outfile, shell=True)
-	else:
-		print("Decompress and concatenate files. This may take a while.")
-		process = subprocess.call("gzip --fast -c " + " ".join(read_list) + " > " + outfile, shell=True)
+	outfile = join(outputDir, getFilenameWithoutExtension(outputName) )
+	outfile += splitext(read_list[0])[-1]
+	#if read_list[0].endswith(".gz"):
+	print("Concatenate files. This may take a while." )
+	process = subprocess.call("cat " + " ".join(read_list) + " > " + outfile, shell=True)
+	#else:
+	#	print("Decompress and concatenate files. This may take a while.")
+	#	process = subprocess.call("gzip --fast -c " + " ".join(read_list) + " > " + outfile, shell=True)
 	if process != 0:
 		sys.exit("Error with gzip")
 	return outfile
@@ -201,4 +204,209 @@ def str_to_json(report_str):
 				pre_depth = curr_depth
 	while len(closings) > 0:
 		json_file += closings.pop()  # end node
-	return json_file
\ No newline at end of file
+	return json_file
+
+def delete_files(path, extended="", pattern="_fastqc"):
+	for f in listdir(join(path, extended)):
+		filename =join(path,extended,f)
+		if f.endswith(pattern):
+			if isfile(filename):
+				remove(filename)
+			else:
+				shutil.rmtree(filename)
+
+
+def get_distribution(lines, key):
+	'''creates CSV used for boxplotting'''
+	output = []
+	for line in lines:
+		line = line.strip()
+		fields = line.split('\t')
+		if key == "Sequence_Length_Distribution":
+			l = fields[0].split('-')
+			try:
+				mean = (int(l[0]) + int(l[1])) / 2
+			except:
+				mean = int(l[0])
+			output.extend([str(mean)] * int(float(fields[1])))
+
+			# sequence quality or sequence GC content
+		if key == 'Per_sequence_GC_content' or key == "Per_sequence_quality_scores":
+			v = fields[0]  # quality or gc [%]
+			output.extend(int(v) * [int(float(fields[1]))])
+	return output
+
+def createCSV(dataset, lines, key, path, trimmed):
+	'''creates CSV used for boxplotting'''
+	#output = []
+	splitted_ds=dataset.split("_")
+
+	csvhandle = open(path + '/' + key + ".csv", 'a')
+	for line in lines:
+		line = line.strip()
+		fields = line.split('\t')
+		#Sequence Length Distribution
+		if key ==  "Sequence_Length_Distribution":
+			l = fields[0].split('-')
+			try:
+				value = (int(l[0]) + int(l[1])) / 2
+			except:
+				value = int(l[0])
+			count = int(float(fields[1]))
+
+		#sequence quality or sequence GC content
+		if key == 'Per_sequence_quality_scores' or key == 'Per_sequence_GC_content':
+			value = fields[0]  #quality or gc [%]
+			count = int(float(fields[1]))
+		try:
+			csvhandle.write(",".join(["_".join(splitted_ds[:-4]), splitted_ds[-3], splitted_ds[-4],trimmed, str(value), str(count)])+"\n")
+		except:
+			csvhandle.write(",".join([dataset.replace("_fastqc",""), "NA", "NA" , trimmed, str(value), str(count)]) + "\n")
+	csvhandle.close()
+
+def write_fastqc_summary(fastqcdir, output):
+	store = False
+	data = []
+	fastqcdatafile = open(join(fastqcdir,"fastqc_data.txt"), "r")
+	for line in iter(fastqcdatafile):
+		if line.startswith("#"):
+			pass  # print("comment")
+		# continue
+		elif line.startswith('>>END_MODULE'):
+			if len(data) > 0:
+				name1 = basename(fastqcdir)
+				if "Trimmed" in fastqcdir:
+					# name1 = "Trimmed " + name1
+					trimmed = "Trimmed"
+				else:
+					trimmed = "Raw"
+				createCSV(name1, data[2:], key,  output, trimmed)
+				data = []
+				store = False
+		elif line.startswith('>>'):
+			key = line.split("\t")[0]
+			key = key.replace(">>","")
+			key = key.replace(" ","_")
+			if key in ['Sequence_Length_Distribution', 'Per_sequence_quality_scores','Per_sequence_GC_content']:
+				store = True
+		if store:
+			data.extend([line])
+	fastqcdatafile.close()
+
+def write_SAV(folder, output):
+	print("Plot SAV images...")
+	process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(dirname(__file__), "sav.R"),
+										folder, join(output,"img")]),
+							  stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+	for line in iter(process.stderr.readline, b''):
+		print(line)
+	tree = ET.parse(join(folder,'RunInfo.xml'))
+	root = tree.getroot()
+	runinfo = OrderedDict()
+
+	for child in root.iter("Run"):
+		for grand in child.getchildren():
+
+			if len(grand.attrib) != 0:
+				runinfo[grand.tag] = grand.attrib
+
+			if len(grand.getchildren()) !=0:
+				for grandgrand in grand:
+					if len(grandgrand.attrib) != 0:
+						try:
+							runinfo[grand.tag].append(grandgrand.attrib)
+						except:
+							runinfo[grand.tag] = [grandgrand.attrib]
+
+					if grandgrand.text is not None:
+						if grandgrand.text.strip() != "":
+							if grand.tag not in runinfo.keys():
+								runinfo[grand.tag]={}
+							try:
+								runinfo[grand.tag][grandgrand.tag].append(grandgrand.text)
+							except:
+								runinfo[grand.tag][grandgrand.tag] = [grandgrand.text]
+
+			if grand.text is not None:
+				if grand.text.strip()!="":
+					if len(grand.attrib) != 0:
+						runinfo[grand.tag][grand.tag] = grand.text
+					else:
+						runinfo[grand.tag] = grand.text
+
+	runparameter_xml = ET.parse(join(folder, 'RunParameters.xml'))
+	runparameters_unique = ["ExperimentName", "FPGAVersion", "CPLDVersion", "MCSVersion","RTAVersion", "CameraFirmware", "CameraDriver", "ClusteringChoice" ,"RapidRunChemistry", "RunMode", "ApplicationName", "ApplicationVersion"]
+	runparameters_path = ["Setup/ReagentKits/Sbs/SbsReagentKit/ID","Setup/ReagentKits/Index/ReagentKit/ID","Setup/Sbs","Setup/Rehyb", "PR2BottleRFIDTag/PartNumber","FlowcellRFIDTag/PartNumber","ReagentKitRFIDTag/PartNumber"]
+	root = runparameter_xml.getroot()
+	runparameter = OrderedDict()
+	for label in runparameters_unique:
+		for i in root.iter(label):
+			runparameter[label] =  i.text
+			#print(label, ": ", i.text)
+
+	for label_path in runparameters_path:
+		label_path = label_path.split("/")
+		element = root.find(label_path[0])
+		for label in label_path[1:]:
+			if element is None:
+				break
+			element = element.find(label)
+		if element is not None:
+			runparameter["/".join(label_path)] = element.text
+			#print(label_path, element.text)
+		else:
+			print(label_path, " does not exist.")
+	runparameter.update(runinfo)
+	json.dump({"tables":runparameter}, open(join(output, "sav.json"),"w"))
+
+'''def boxplot(data, title,output):
+	bplotfont = {"weight": "bold", "size": 8}
+	plt.rc("font", **bplotfont)
+	xclasses = range(1, len(data) + 1)
+	width = 4.5
+	height = 3.5
+	if (len(data) / 3) > width:
+		width = len(data) / 3
+		height = len(data) / 4
+		# max width
+		if width > 12:
+			width = 12
+			height = 7
+			bplotfont = {"weight": "bold", "size": 8}
+			plt.rc("font", **bplotfont)
+
+	fig = plt.figure(1, figsize=(width, height))
+	ax = fig.add_subplot(111)
+	bp = ax.boxplot(data, notch=True, patch_artist=True)
+
+	colors = ["cyan", "lightblue", "lightgreen", "tan", "pink"]
+	for patch, color in zip(bp["boxes"], colors):
+		patch.set_facecolor(color)
+
+		# set props
+	#whisker = {"color":"#b4b4b4", "linestyle":"-", "linewidth":0.75}
+	#caps = {"color":"#b4b4b4", "linewidth":1}
+	#flier = {"marker":"o", "lw":0, "color":"#e6e6e6", "markersize":0.5}
+	#box = {"color":"#385d8a", "linewidth":0.75}"""
+	#bp = ax.boxplot(data)# ,  capsprops = caps , flierprops = flier, boxprops =box)
+
+	for box in bp["boxes"]:
+		# outline color
+		box.set(color="#385d8a", linewidth=0.75)
+
+	for whisker in bp["whiskers"]:
+		whisker.set(color="#b4b4b4", linestyle="-", linewidth=0.75)
+
+	for cap in bp["caps"]:
+		cap.set(color="#b4b4b4", linewidth=1)
+
+	for median in bp["medians"]:
+		median.set(color="#c0504d", linewidth=1)
+
+	for flier in bp["fliers"]:
+		flier.set(marker="o", lw=0, color="#e6e6e6", markersize=0.5)"""
+	plt.title(title)
+	plt.xticks(xclasses, xlabels, rotation=90)
+	fig.savefig(output, bbox_inches="tight", dpi=200)
+	fig.clf()
+'''
diff --git a/lib/box.js b/lib/box.js
new file mode 100755
index 0000000..471bb8e
--- /dev/null
+++ b/lib/box.js
@@ -0,0 +1,320 @@
+(function() {
+
+// Inspired by http://informationandvisualization.de/blog/box-plot
+d3.box = function() {
+  var width = 1,
+      height = 1,
+      duration = 0,
+      domain = null,
+      value = Number,
+      whiskers = boxWhiskers,
+      quartiles = boxQuartiles,
+	  showLabels = true, // whether or not to show text labels
+	  numBars = 4,
+	  curBar = 1,
+      tickFormat = null;
+
+  // For each small multiple…
+  function box(g) {
+    g.each(function(data, i) {
+      //d = d.map(value).sort(d3.ascending);
+	  //var boxIndex = data[0];
+	  //var boxIndex = 1;
+	  var d = data[1].sort(d3.ascending);
+	  
+	 // console.log(boxIndex); 
+	  //console.log(d); 
+	  
+      var g = d3.select(this),
+          n = d.length,
+          min = d[0],
+          max = d[n - 1];
+
+      // Compute quartiles. Must return exactly 3 elements.
+      var quartileData = d.quartiles = quartiles(d);
+
+      // Compute whiskers. Must return exactly 2 elements, or null.
+      var whiskerIndices = whiskers && whiskers.call(this, d, i),
+          whiskerData = whiskerIndices && whiskerIndices.map(function(i) { return d[i]; });
+
+      // Compute outliers. If no whiskers are specified, all data are "outliers".
+      // We compute the outliers as indices, so that we can join across transitions!
+      var outlierIndices = whiskerIndices
+          ? d3.range(0, whiskerIndices[0]).concat(d3.range(whiskerIndices[1] + 1, n))
+          : d3.range(n);
+
+      // Compute the new x-scale.
+      var x1 = d3.scale.linear()
+          .domain(domain && domain.call(this, d, i) || [min, max])
+          .range([height, 0]);
+
+      // Retrieve the old x-scale, if this is an update.
+      var x0 = this.__chart__ || d3.scale.linear()
+          .domain([0, Infinity])
+		 // .domain([0, max])
+          .range(x1.range());
+
+      // Stash the new scale.
+      this.__chart__ = x1;
+
+      // Note: the box, median, and box tick elements are fixed in number,
+      // so we only have to handle enter and update. In contrast, the outliers
+      // and other elements are variable, so we need to exit them! Variable
+      // elements also fade in and out.
+
+      // Update center line: the vertical line spanning the whiskers.
+      var center = g.selectAll("line.center")
+          .data(whiskerData ? [whiskerData] : []);
+
+	 //vertical line
+      center.enter().insert("line", "rect")
+          .attr("class", "center")
+          .attr("x1", width / 2)
+          .attr("y1", function(d) { return x0(d[0]); })
+          .attr("x2", width / 2)
+          .attr("y2", function(d) { return x0(d[1]); })
+          .style("opacity", 1e-6)
+        .transition()
+          .duration(duration)
+          .style("opacity", 1)
+          .attr("y1", function(d) { return x1(d[0]); })
+          .attr("y2", function(d) { return x1(d[1]); });
+
+      center.transition()
+          .duration(duration)
+          .style("opacity", 1)
+          .attr("y1", function(d) { return x1(d[0]); })
+          .attr("y2", function(d) { return x1(d[1]); });
+
+      center.exit().transition()
+          .duration(duration)
+          .style("opacity", 1e-6)
+          .attr("y1", function(d) { return x1(d[0]); })
+          .attr("y2", function(d) { return x1(d[1]); })
+          .remove();
+
+      // Update innerquartile box.
+      var box = g.selectAll("rect.box")
+          .data([quartileData]);
+
+      box.enter().append("rect")
+          .attr("class", "box")
+          .attr("x", 0)
+          .attr("y", function(d) { return x0(d[2]); })
+          .attr("width", width)
+          .attr("height", function(d) { return x0(d[0]) - x0(d[2]); })
+        .transition()
+          .duration(duration)
+          .attr("y", function(d) { return x1(d[2]); })
+          .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
+
+      box.transition()
+          .duration(duration)
+          .attr("y", function(d) { return x1(d[2]); })
+          .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
+
+      // Update median line.
+      var medianLine = g.selectAll("line.median")
+          .data([quartileData[1]]);
+
+      medianLine.enter().append("line")
+          .attr("class", "median")
+          .attr("x1", 0)
+          .attr("y1", x0)
+          .attr("x2", width)
+          .attr("y2", x0)
+        .transition()
+          .duration(duration)
+          .attr("y1", x1)
+          .attr("y2", x1);
+
+      medianLine.transition()
+          .duration(duration)
+          .attr("y1", x1)
+          .attr("y2", x1);
+
+      // Update whiskers.
+      var whisker = g.selectAll("line.whisker")
+          .data(whiskerData || []);
+
+      whisker.enter().insert("line", "circle, text")
+          .attr("class", "whisker")
+          .attr("x1", 0)
+          .attr("y1", x0)
+          .attr("x2", 0 + width)
+          .attr("y2", x0)
+          .style("opacity", 1e-6)
+        .transition()
+          .duration(duration)
+          .attr("y1", x1)
+          .attr("y2", x1)
+          .style("opacity", 1);
+
+      whisker.transition()
+          .duration(duration)
+          .attr("y1", x1)
+          .attr("y2", x1)
+          .style("opacity", 1);
+
+      whisker.exit().transition()
+          .duration(duration)
+          .attr("y1", x1)
+          .attr("y2", x1)
+          .style("opacity", 1e-6)
+          .remove();
+
+      // Update outliers.
+      var outlier = g.selectAll("circle.outlier")
+          .data(outlierIndices, Number);
+
+      outlier.enter().insert("circle", "text")
+          .attr("class", "outlier")
+          .attr("r", 5)
+          .attr("cx", width / 2)
+          .attr("cy", function(i) { return x0(d[i]); })
+          .style("opacity", 1e-6)
+        .transition()
+          .duration(duration)
+          .attr("cy", function(i) { return x1(d[i]); })
+          .style("opacity", 1);
+
+      outlier.transition()
+          .duration(duration)
+          .attr("cy", function(i) { return x1(d[i]); })
+          .style("opacity", 1);
+
+      outlier.exit().transition()
+          .duration(duration)
+          .attr("cy", function(i) { return x1(d[i]); })
+          .style("opacity", 1e-6)
+          .remove();
+
+      // Compute the tick format.
+      var format = tickFormat || x1.tickFormat(8);
+
+      // Update box ticks.
+      var boxTick = g.selectAll("text.box")
+          .data(quartileData);
+	 if(showLabels == true) {
+      boxTick.enter().append("text")
+          .attr("class", "box")
+          .attr("dy", ".3em")
+          .attr("dx", function(d, i) { return i & 1 ? 6 : -6 })
+          .attr("x", function(d, i) { return i & 1 ?  + width : 0 })
+          .attr("y", x0)
+          .attr("text-anchor", function(d, i) { return i & 1 ? "start" : "end"; })
+          .text(format)
+        .transition()
+          .duration(duration)
+          .attr("y", x1);
+	}
+		 
+      boxTick.transition()
+          .duration(duration)
+          .text(format)
+          .attr("y", x1);
+
+      // Update whisker ticks. These are handled separately from the box
+      // ticks because they may or may not exist, and we want don't want
+      // to join box ticks pre-transition with whisker ticks post-.
+      var whiskerTick = g.selectAll("text.whisker")
+          .data(whiskerData || []);
+	if(showLabels == true) {
+      whiskerTick.enter().append("text")
+          .attr("class", "whisker")
+          .attr("dy", ".3em")
+          .attr("dx", 6)
+          .attr("x", width)
+          .attr("y", x0)
+          .text(format)
+          .style("opacity", 1e-6)
+        .transition()
+          .duration(duration)
+          .attr("y", x1)
+          .style("opacity", 1);
+	}
+      whiskerTick.transition()
+          .duration(duration)
+          .text(format)
+          .attr("y", x1)
+          .style("opacity", 1);
+
+      whiskerTick.exit().transition()
+          .duration(duration)
+          .attr("y", x1)
+          .style("opacity", 1e-6)
+          .remove();
+    });
+    d3.timer.flush();
+  }
+
+  box.width = function(x) {
+    if (!arguments.length) return width;
+    width = x;
+    return box;
+  };
+
+  box.height = function(x) {
+    if (!arguments.length) return height;
+    height = x;
+    return box;
+  };
+
+  box.tickFormat = function(x) {
+    if (!arguments.length) return tickFormat;
+    tickFormat = x;
+    return box;
+  };
+
+  box.duration = function(x) {
+    if (!arguments.length) return duration;
+    duration = x;
+    return box;
+  };
+
+  box.domain = function(x) {
+    if (!arguments.length) return domain;
+    domain = x == null ? x : d3.functor(x);
+    return box;
+  };
+
+  box.value = function(x) {
+    if (!arguments.length) return value;
+    value = x;
+    return box;
+  };
+
+  box.whiskers = function(x) {
+    if (!arguments.length) return whiskers;
+    whiskers = x;
+    return box;
+  };
+  
+  box.showLabels = function(x) {
+    if (!arguments.length) return showLabels;
+    showLabels = x;
+    return box;
+  };
+
+  box.quartiles = function(x) {
+    if (!arguments.length) return quartiles;
+    quartiles = x;
+    return box;
+  };
+
+  return box;
+};
+
+function boxWhiskers(d) {
+  return [0, d.length - 1];
+}
+
+function boxQuartiles(d) {
+  return [
+    d3.quantile(d, .25),
+    d3.quantile(d, .5),
+    d3.quantile(d, .75)
+  ];
+}
+
+})();
\ No newline at end of file
diff --git a/lib/qcstyle.css b/lib/qcstyle.css
new file mode 100755
index 0000000..4bad164
--- /dev/null
+++ b/lib/qcstyle.css
@@ -0,0 +1,84 @@
+       .row_hidden {
+            display: none;
+        }
+		.panel {
+		background-color:#F7F7F7;
+
+		}
+		.panel-heading{
+		cursor:pointer;
+		}
+        .show_row {
+            display: show;
+        }
+
+        p.inbox_text {
+            text-align: center;
+            color: grey;
+            margin: 0 0 0px;
+        } 
+
+        .thumbnail {
+            margin-bottom: 5px;
+            border: 0px solid #000;
+            height: 150px;
+            width: 200px;
+        }
+
+        .sample_box {
+            display: table-cell;
+            margin: 10px;
+            padding: 10px;
+            color: lightgrey;
+            border-radius: 5px;
+            box-shadow: 0 1px 3px rgba(0, 0, 0, .25);
+        }
+
+        .sidebar {
+            float: both;
+			overflow-y: auto;
+            position: fixed;
+            height: 100%;
+            top: 50px;
+            z-index: 999;
+            padding-right: 0px;
+            box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.12);
+            background: #f7f7f7;
+        }
+		.navbar-default{
+		background-image: linear-gradient(to bottom, #1D95B6 0px, #0C6A89 100%);
+		}
+	.navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .open > a{
+		background-image: linear-gradient(to bottom, rgb(32, 162, 198) 0px, rgb(21, 117, 149) 100%);
+
+	}
+	.navbar-default .navbar-brand {
+	color: white;
+	}
+        h4{
+		color: #0A6780;
+		}
+        h5{
+		color: #0D7793;
+		}
+        .node {
+            cursor: pointer;
+        }
+
+        .node circle {
+            fill: #fff;
+            stroke: steelblue;
+            stroke-width: 1.5px;
+        }
+
+        .node text {
+            font: 10px sans-serif;
+        }
+
+        .link {
+            fill: none;
+            stroke: #ccc;
+            stroke-width: 1.5px;
+        }
+		.table{
+		margin-bottom: auto;}
diff --git a/parameter.txt b/parameter.txt
index cc97f05..f3b5b6c 100755
--- a/parameter.txt
+++ b/parameter.txt
@@ -20,7 +20,7 @@ trimOption = SLIDINGWINDOW:4:20
 trimBetter_threshold = 0.15
 
 [forAssembly.Illumina]
-trimBetter_threshold = 0.15
+trimBetter_threshold = 0.1
 trimOption = SLIDINGWINDOW:4:25
 
 [forAssembly.IonTorrent]
@@ -39,4 +39,9 @@ trimOption = SLIDINGWINDOW:4:15
 Illumina Universal Adapter = TruSeq2
 
 [Fileextension]
-fastq = .fastq.gz, .fastq, .fq, .fq.gz
\ No newline at end of file
+fastq = .fastq.gz, .fastq, .fq, .fq.gz, .bam
+
+[Pattern]
+R1 = _R1_
+R2 = _R2_
+lane = _L\d{3}_
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100755
index 0000000..3dd3605
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,119 @@
+------------
+Introduction 
+------------
+
+QCumber is a tool for quality control and exploration of NGS data. The workflow is as follows:
+
+* optional: extract information from Sequence Analysis Viewer
+* Quality control with FastQC
+* Trim Reads with Trimmomatic 
+* optional: run FastQC and retrim if necessary
+* Quality control of trimmed reads with FastQC
+* optional: Map reads against reference using bowtie2
+* optional: Classify reads with Kraken
+
+------------
+Dependencies
+------------
+
+This tool was implemented in python and needs Python 3.4 or higher. For plotting and SAV extraction R (3.0.2) is required. Furhermore, FastQC (>v0.10.1), bowtie2 (> 2.2.3) and Kraken (0.10.5) are required.
+
+Further packages via apt-get install:
+* Python3-pip
+* libfreetype6-dev
+* r-cran-quantreg
+* r-bioc-savr
+
+Packages via pip3 install:
+* Jinja2 
+* matplotlib
+
+R packages:
+* ggplot2
+* savR
+
+
+To change tool or adapter path, change config.txt.
+
+-----
+Usage
+-----
+```
+python3 QCumber.py -i <input> -technology <Illumina/IonTorrent> <option(s)>
+```
+Input parameter:
+
+	-i, -input		sample folder/file. If Illumina folder, files has to match pattern <Sample name>_<lane>_<R1/R2>_<number>. 
+					Eg. Sample_12_345_R1_001.fastq. Otherwise use -1,-2
+	-1 , -2         alternatively to -i: filename. Must not match Illumina names.
+	-technology		sequencing technology (Illumina/IonTorrent)
+
+Options:
+
+	-output		            output folder, default: input folder
+	-reference              reference file
+	-adapter                adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
+	-sav 					Sequence Analysis Viewer folder. Requires Interop folder, RunInfo.xml and RunParameter.xml
+	-threads                threads. Default:4
+	-palindrome				palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
+	-db                     Kraken database
+	-trimOption             Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. 
+                            default: SLIDINGWINDOW:4:15
+	-trimBetter				Optimize trimming parameter using 'Per sequence base content' from fastqc
+	-trimBetter_threshold	Threshold for 'Per sequence base content' fluctuation. Default:0.15
+	-forAssembly			Trim parameter are optimized for assemblies (trim more aggressive).
+	-forMapping				Trim parameter are optimized for mapping(allow more errors).
+	-minlen                 Minlen parameter for Trimmomatic. Default:50
+	-index					Bowtie2 index if available
+	-save_mapping           Save sam files
+	-nokraken				skip Kraken
+	-nomapping				skip mapping
+	
+	-version                Get version
+
+Output:
+
+<Sample/Output Folder>
+|-- QCResult
+  |-- Report
+	  |-- PDF report per sample
+	  |-- HTML report for entire project
+	  |-- src
+	      |-- img
+	           |-- Summary images
+  |-- FastQC
+      |-- <output folder(s) from FastQC>
+  |-- Trimmed
+      |-- <trimmed reads>
+      |-- FastQC
+          |-- <output folder(s) from FastQC>
+
+-------------------
+Program Description
+-------------------
+
+This project consists of 6 files:
+
+QCumber.py		main script for running complete pipeline
+classes.py		script containing classes
+helper.py		small helper functions
+report.tex		Template for sample reports
+config.txt      configuration for Kraken and Trimmomatic
+boxplot.R		boxplots of fastqc output for batch report 
+paramter.txt	default parameter
+config.txt		tool location
+
+
+-------
+Example
+-------
+
+1. Simple usage for Illumina:
+```
+python3 QCumber.py -1 sample_R1.fastq -2 sample_R2.fastq -technology Illumina -adapter NexteraPE-PE -r myReference.fasta
+```
+
+2. Entering a project:
+```
+python3 QCumber.py -input myProjectFolder/ -technology IonTorrent -r myReference.fasta
+```
diff --git a/readme.txt b/readme.txt
deleted file mode 100755
index f4943c3..0000000
--- a/readme.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-------------
-Introduction 
-------------
-
-QCPipeline is a tool for quality control. The workflow is as followed:
-
-	1. Quality control with FastQC
-	2. Trim Reads with Trimmomatic
-	3. Quality control of trimmed reads with FastQC
-	4. Map reads against reference using bowtie2
-	5. Classify reads with Kraken
-
-------------
-Dependencies
-------------
-
-This tool was implemented in python and needs Python 3.4 or higher. Furhermore, FastQC (>v0.10.1), bowtie2 (> 2.2.3) and Kraken (0.10.5) are required.
-
-Further packages via apt-get install:
-* Python3-pip
-* libfreetype6-dev
-
-Packages via pip3 install:
-* Jinja2 
-* matplotlib
-
-
-To change tool or adapter path, change config.txt.
-
------
-Usage
------
-```
-python3.4 runQCPipeline.py -i <input> -technology <Illumina/IonTorrent> <option(s)>
-```
-Input parameter:
-
-	-i, -input		sample folder/file. If Illumina folder, files has to match pattern <Sample name>_<lane>_<R1/R2>_<number>. Eg. Sample_12_345_R1_001.fastq. Otherwise use -1,-2
-	-1 , -2         alternatively to -i: filename. Must not match Illumina names.
-	-technology		sequencing technology (Illumina/IonTorrent)
-
-Options:
-
-	-o , -output            output folder, default: input folder
-	-r 			            reference file
-	-a                      adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
-	-threads                threads. Default:4
-	-p                      palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
-	-db                     Kraken database
-	-b                      input is a project folder. A project folder contains furher sample folder(s)
-	-trimOption             Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. 
-                                default: SLIDINGWINDOW:4:15
-	-minlen                 Minlen parameter for Trimmomatic. Default:50
-	-nokraken				skip Kraken
-	-nomapping				skip mapping
-	
-	-version                Get version
-
-Output:
-
-<Sample/Output Folder>
-|-- QCResult
-  |-- summary_<sample name>.pdf (summarizes all FastQC, Trimming and Mapping Results)
-  |-- FastQC
-      |-- <output folder(s) from FastQC>
-  |-- Trimmed
-      |-- <trimmed reads>
-      |-- FastQC
-          |-- <output folder(s) from FastQC>
-[-- if project is entered (-b): FASTQC_COMBINE_<datetime>.html (summarizes all (plotted) results in oen html file) ]
-
--------------------
-Program Description
--------------------
-
-This project consists of 6 files:
-
-runQCPipeline.py	main script for running complete pipeline
-classes.py		script containing classes
-helper.py		small helper functions
-report.tex		Template for sample reports
-config.txt      configuration for Kraken and Trimmomatic
-
-combineFastQC.py	Written by Stefan Fuchs. It combines FastQC, mapping, trimming, classification results in a html file.
-			This file can be called without running runQCPipeline. 
-
-			Usage: python3.4 combineFastQC.py -f project_folder [-csv csvtable]
-
-			csv is used to plot mapping, trimming and classification results if the user does not want to run the complete QCPipeline.
-			Format: sample name | mapping result | trimming result | Kraken result | % of fragments shorter than read length 
-
-			The project folder must have the output structure (QCResults).
-
--------
-Example
--------
-
-1. Simple usage for Illumina:
-```
-python3.4 runQCPipeline.py -input mySampleFolder/ -technology Illumina -adapter NexteraPE-PE -r myReference.fasta
-```
-> Output files will be in SampleFolder/QcResults
-
-
-2. Entering a project:
-```
-python3.4 runQCPipeline.py -input myProjectFolder/ -technology IonTorrent -r myReference.fasta -b
-```
-> Output files will be in myProjectFolder/SampleFolder/QcResults and an overall summary will be in myProjectFolder. 
-
-
diff --git a/runQCPipeline.py b/runQCPipeline.py
deleted file mode 100755
index e794596..0000000
--- a/runQCPipeline.py
+++ /dev/null
@@ -1,484 +0,0 @@
-#!/usr/bin/env python3.4
-# -*- coding: utf-8 -*-
-__author__ = 'LieuV'
-__version__ = "1.1.2"
-
-from classes import *
-from helper import *
-import os
-from itertools import groupby
-from jinja2 import Environment,FileSystemLoader
-import shutil
-from combineFastQC import combineFastQC
-import configparser
-from collections import OrderedDict
-import json
-
-#dependencies
-try:
-	import argparse
-except ImportError:
-	print('The "argparse" module is not available. Use Python >3.2.')
-	sys.exit(1)
-
-
-
-file_extensions = ["fastq.gz",".fastq", ".fq","fq.gz"]
-
-def get_read(values,argMap, read = "_R1_"):
-	read_list = list(filter(lambda x: read in x, values))
-
-	if(len(read_list)==0):
-		return None
-	elif (len(read_list) == 1):
-		read = FastQFile(os.path.join(argMap["input"], os.path.basename(read_list[0])))
-	else:
-		concat_reads = join_reads(read_list, argMap["tmp"], "concat_" + "_".join(os.path.basename(read_list[0]).split("_")[:-1]) + os.path.splitext(read_list[0])[-1] )
-		read = FastQFile(concat_reads)
-		read.concat_files = [toLatex(os.path.basename(x)) for x in read_list]
-	return read
-
-
-def get_illumina_reads( files, argMap, output):
-	## Add readsets to sample
-	readsets = []
-	# concat same files
-	files = sorted(files)
-	if(len(files)==1):
-		return [ReadSet(output,files[0])]
-	if argMap["r1"]:
-		if argMap["r2"]:
-			return[ReadSet(output, FastQFile(argMap["r1"]), FastQFile(argMap["r2"]) )]
-		else:
-			return [ReadSet(output, FastQFile(argMap["r1"]))]
-
-	for p, values in groupby(files, key=lambda x:x.split("_")[:-2]):
-		val = list(values)
-		r1 = get_read(val,argMap,  "_R1_")
-		r2 = get_read(val,argMap, read = "_R2_")
-		readsets.append(ReadSet(output, r1,r2))
-	return readsets
-
-def run_analysis (readset, argMap, output):
-	print("Run FastQC...")
-	readset.run_FastQC(argMap["threads"])
-	trimmed_output = os.path.join(output, "Trimmed")
-	os.makedirs(trimmed_output, exist_ok=True)
-	if argMap["trimBetter"]:
-		trimming_perc = argMap["trimBetter_threshold"]
-	else:
-		trimming_perc = ""
-	readset.run_Trimmomatic(argMap["adapter"], argMap["threads"], argMap["palindrome"], trimmed_output, argMap["technology"], argMap["minlen"], argMap["trimOption"], trimming_perc)
-	readset.trimRes = readset.trimRes.run_FastQC(trimmed_output, argMap["tmp"])
-	if argMap["blobplot"]:
-		readset.trimRes.run_blobplot(argMap["db"])
-	return readset
-
-
-def check_input(argMap):
-	if argMap["r1"]:
-		if argMap["r2"]:
-			return [argMap["r1"], argMap["r2"]]
-		else:
-			return [argMap["r1"]]
-
-	if argMap["technology"]=="Illumina":
-		if os.path.isdir(argMap["input"]):
-			files = os.listdir(argMap["input"])
-			files = [os.path.join(argMap["input"], x) for x in files]
-			files = [x for x in files if os.path.isfile(x) & any([ext in x for ext in file_extensions])]
-			if len(files)==0:
-				sys.exit(argMap["input"] + " does not contain fastq files.")
-			return files
-		else:
-			return [argMap["input"]]
-	elif argMap["technology"]=="IonTorrent":
-
-		if os.path.isfile(argMap["input"]):
-
-			if argMap["input"].endswith(".bam"):
-				return argMap["input"]
-			else:
-				sys.exit("IonTorrent PGM input has to be a bam-file. Otherwise use -b. Use --help for more information.")
-		else:
-			sys.exit("Incorrect file input")
-	elif argMap["technology"] == "Illumina-MiSeq":
-		if os.path.isfile(argMap["input"]):
-			if any([argMap["input"].endswith(x) for x in file_extensions]):
-				return argMap["input"]
-			else:
-				sys.exit("Not supported file extension.")
-		else:
-			sys.exit("Illumina MiSeq input has to be a fastq-file. Otherwise use -b for a batch run. Use --help for more information.")
-	else:
-		sys.exit("Incorrect file input")
-	return None
-
-def fillClasses(argMap, temp_bowtie_path = "", tmp = ""):
-	#tempdir = tempfile.mkdtemp(dir = argMap["tmp"])
-	try:
-		if tmp!="":
-			argMap["tmp"] = tmp
-		if argMap["r1"]:
-			argMap["input"] = argMap["r1"]
-		argMap["input"] = os.path.abspath(argMap["input"])
-		if argMap["output"] == "":
-			output = getDir([os.curdir , "QCResults"], True)#getDir([argMap["input"] , "QCResults"], True)
-		else:
-			output = getDir([argMap["output"], "QCResults"], True)
-
-		sample = Sample(getFilenameWithoutExtension(argMap["input"], level = 2, getBase=True), output, argMap["reference"],argMap["threads"], argMap["technology"])
-		if argMap["technology"].startswith("Illumina"):
-			files = check_input(argMap)
-			if len(files) == 0:
-				return None
-			readsets = get_illumina_reads( files, argMap, output)
-
-			#os.makedirs(tempdir, exist_ok=True)
-			for rs in readsets:
-				readset = run_analysis(rs, argMap,  output)
-				sample = sample.add_readSet(readset)
-			if not argMap["nomapping"]:
-				if argMap["save_mapping"]:
-					sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(argMap["output"], "QCResults", sample.name +".bam"))
-				else:
-					sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
-			if not argMap["nokraken"]:
-				print("Run Kraken.")
-				sample = sample.run_Kraken(argMap["db"])
-
-		elif argMap["technology"] == "IonTorrent":
-			argMap["input"] = check_input(argMap)
-			print("IonTorrent", argMap["input"])
-			fastqfile = bam_to_fastq(argMap["input"], argMap["tmp"])
-			readset = ReadSet(output, fastqfile)
-			readset = run_analysis(readset,argMap, output)
-			sample.add_readSet(readset)
-			if not argMap["nomapping"]:
-				if argMap["save_mapping"]:
-					sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(argMap["output"], "QCResults", sample.name +".bam"))
-				else:
-					sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
-			if not argMap["nokraken"]:
-				print("Run Kraken.")
-				sample = sample.run_Kraken(argMap["db"])
-
-		sample.stop = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
-	finally:
-		shutil.rmtree(argMap["tmp"])
-	return sample
-
-def writeReport(sample, argMap):
-	report_name = os.path.join(sample.mainResultsPath,
-		 "summary_" + sample.name + "_" + datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
-	print("Writing latex " ,report_name)
-	env = Environment()
-	env.loader = FileSystemLoader(os.path.dirname(__file__))
-
-	template = env.get_template("report.tex")
-	pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = sample.readSets[0].trimRes.print_param(argMap["minlen"], argMap["trimOption"], argMap["palindrome"]))
-
-	latex = open(report_name + ".tex", "w")
-	latex.write(pdf_latex)
-	latex.close()
-
-	process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + sample.mainResultsPath, report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
-	for line in iter(process.stdout.readline, b''):
-		pass#print(line)
-	for line in iter(process.stderr.readline, b''):
-		print(line)
-
-	process.communicate()
-
-	for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
-		try:
-			os.remove(report_name + ext)
-		except OSError:
-			pass
-
-def check_project(argMap):
-	if argMap["technology"] =="IonTorrent":
-		if os.path.isdir(argMap["input"]):
-			files = os.listdir(argMap["input"])
-			files = [os.path.join(argMap["input"], x) for x in files if x.endswith(".bam")]
-			if len(files) == 0:
-				sys.exit("Folder does not contain bam-files")
-			return sorted(files)
-		else:
-			sys.exit("Enter a valid PGM folder, which contains bam-files.")
-	elif argMap["technology"] =="Illumina":
-		if os.path.isdir(argMap["input"]):
-			files = os.listdir(argMap["input"])
-			files = [os.path.join(argMap["input"], x) for x in files]
-			files = [x for x in files if os.path.isdir(x)]
-			if len(files) == 0:
-				sys.exit("Enter a valid folder, which contain sample folders")
-			return sorted(files)
-	else:
-		sys.exit("Illumina projects has to contain subfolders with fastq-files.")
-
-
-
-def main(arguments):
-	parameter = configparser.ConfigParser()
-	parameter.read(join(dirname(__file__), "parameter.txt"))
-
-	if not arguments["tmp"]:
-		arguments["tmp"] = join(arguments["output"], "qc_tmp")
-	else:
-		if os.path.exists(arguments["tmp"]):
-			arguments["tmp"] = join(arguments["tmp"], "qc_tmp")
-
-	if arguments["technology"].startswith("Illumina"):
-		technology = "Illumina"
-	else:
-		technology = "IonTorrent"
-
-	if arguments["forAssembly"]:
-		if not arguments["trimBetter_threshold"]:
-			arguments["trimBetter_threshold"] = parameter["forAssembly." + technology]["trimBetter_threshold"]
-		if not arguments["trimOption"]:
-			arguments["trimOption"] = parameter["forAssembly." + technology]["trimOption"]
-
-	elif arguments["forMapping"]:
-		if not arguments["trimBetter_threshold"]:
-			arguments["trimBetter_threshold"] = parameter["forMapping." + technology]["trimBetter_threshold"]
-		if not arguments["trimOption"]:
-			arguments["trimOption"] = parameter["forMapping." + technology]["trimOption"]
-
-	if not arguments["trimOption"]:
-		arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
-	if not arguments["trimBetter_threshold"]:
-		arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
-
-	# check input
-	pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
-	if(arguments["trimOption"]!=""):
-		if not ((pattern.group("option") =="SLIDINGWINDOW") or (pattern.group("option") =="MAXINFO")):
-			sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
-		try:
-			int(pattern.group("value1"))
-			int(pattern.group("value2"))
-		except:
-			sys.exit("Check trimOptions.")
-
-	if arguments['technology'] == "Illumina":
-		if not arguments['adapter']:
-			sys.exit("Illumina projects requires an adapter file")
-	if arguments["nomapping"]:
-		#print("no mapping")
-		arguments["reference"] = ""
-	else:
-		if not arguments["reference"]:
-			sys.exit("Mapping needs a reference.")
-		else:
-			if not os.path.exists(arguments["reference"]):
-				sys.exit("Reference does not exist.")
-
-	if not arguments["nokraken"]:
-		if not os.path.exists(arguments["db"]):
-			sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
-		else:
-			if "database.kdb" not in os.listdir(arguments["db"]):
-				sys.exit("database "+arguments["db"] +" does not contain necessary file database.kdb")
-	if not arguments["input"]:
-		if arguments["r1"]:
-			print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
-			if not os.path.exists(arguments["r1"]):
-				sys.exit(arguments["r1"] + " does not exist.")
-			if arguments["technology"]=="IonTorrent":
-				arguments["input"] = arguments["r1"]
-				arguments["r1"] = ""
-		else:
-			sys.exit("Input file required. Use option -input or -1 / -2")
-
-		if arguments["r2"]:
-			if not os.path.exists(arguments["r2"]):
-				sys.exit(arguments["r2"] + " does not exist.")
-
-
-	os.makedirs(arguments["tmp"], exist_ok=True)
-	if arguments["index"]:
-		mapping_index_path = arguments["index"]
-	else:
-		mapping_index_path = join(arguments["tmp"], "bowtie2_index")
-		os.makedirs(mapping_index_path, exist_ok=True)
-	# RUN
-	try:
-		jsonfile_dict = []
-		kraken_reports =OrderedDict()
-		if arguments["b"]:
-				print("A project was entered")
-				i=1
-				project = []
-				overview = []
-
-				if arguments["technology"]=="Illumina-MiSeq":
-
-					myFiles=[]
-					input = sorted(os.listdir(arguments["input"]), key=lambda x: x.split("_")[:-3])
-					#print(input)
-					input = [os.path.join(arguments["input"], x) for x in input if os.path.isfile(os.path.join(arguments["input"], x))and any([x.endswith(ext) for ext in file_extensions])]
-
-					for p, values in groupby(input, key=lambda x: x.split("_")[:-3]):
-						myFiles.append(sorted(list(values)))
-					print("Found " + str(len(myFiles)) + " samples.")
-					for subset in myFiles:
-						print("Analyse " + str(i) + "/" + str(len(myFiles)))
-						tmp = join(arguments["tmp"], "Sample_" + str(i))
-						os.makedirs(tmp, exist_ok=True)
-
-						arguments["r1"] = subset[0]
-						if len(subset)>1:
-							arguments["r2"] = subset[1]
-						sample = fillClasses(arguments, mapping_index_path,tmp )
-						if sample is not None:
-							project.append(sample)
-							try:
-								writeReport(sample, arguments)
-							except:
-								print("Couldnt write pdf.")
-							try:
-								if sample.mappingRes is None:
-									mapping = 0
-								else:
-									mapping = sample.mappingRes.percentAlignedReads
-								if sample.krakenRes is None:
-									kraken = 0
-								else:
-									kraken = sample.krakenRes.pClassified
-								jsonfile_dict.append(
-									OrderedDict([("setname", sample.name), ("Map to reference [%]", mapping),
-												 ("Reads after trimming", sample.pTrimmed()),
-												 ("Classified (Kraken)", kraken),
-												 ("Shorter fragments", sample.get_mean_trimRes()),
-												 ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
-								overview.append((sample.name, mapping, sample.pTrimmed(), kraken, sample.get_mean_trimRes()))
-							except:
-								print("Error in appending summary of all samples.")
-							sample = None
-							i+=1
-
-				else:
-					sample_list = check_project(arguments)
-					print("Found " + str(len(sample_list)) + " samples.")
-					for sub in sample_list:
-						print("Analyse " + str(i) + "/" + str(len(sample_list)))
-						tmp = join(arguments["tmp"], "Sample_" + str(i))
-						os.makedirs(tmp, exist_ok=True)
-
-						arguments["input"] = sub
-						sample = fillClasses(arguments, mapping_index_path, tmp)
-						if sample is not None:
-							project.append(sample)
-							try:
-								writeReport(sample, arguments)
-							except:
-								print("Couldnt write pdf.")
-							try:
-								if sample.mappingRes is None:
-									mapping = 0
-								else:
-									mapping = sample.mappingRes.percentAlignedReads
-								if sample.krakenRes is None:
-									kraken = 0
-								else:
-									kraken = sample.krakenRes.pClassified
-								jsonfile_dict.append(
-									OrderedDict([("setname", sample.name), ("Map to reference [%]", mapping),
-												 ("Reads after trimming", sample.pTrimmed()),
-												 ("Classified (Kraken)", kraken),
-												 ("Shorter fragments", sample.get_mean_trimRes()),
-												 ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["minlen"],arguments["trimOption"],arguments["palindrome"])),
-												 ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
-								overview.append((sample.name, mapping, sample.pTrimmed(), kraken , sample.get_mean_trimRes()))
-							except:
-								print("Error in appending summary of all samples.")
-
-							sample = None
-							i+=1
-				try:
-					combineFastQC(join(arguments["output"],"QCResults"), overview= ( getFilenameWithoutExtension(arguments["reference"], level=1, getBase = True), np.array(overview, dtype = [('name', '|S100'), ('mapping', float), ('trimming', float), ('kraken', float), ('shorter_reads', float)])), tmp = arguments["tmp"])
-				except:
-					print("Couldnt produce html.")
-
-		else: # no batch
-			if check_input(arguments):
-				sample = fillClasses(arguments, mapping_index_path)
-				try:
-					writeReport(sample, arguments)
-				except:
-					print("Couldnt write pdf")
-				if sample.mappingRes is None:
-					mapping = 0
-				else:
-					mapping = sample.mappingRes.percentAlignedReads
-				if sample.krakenRes is None:
-					kraken = 0
-				else:
-					kraken = sample.krakenRes.pClassified
-					kraken_reports[sample.name] = json.loads(str_to_json(sample.krakenRes.report))
-				jsonfile_dict.append(
-					OrderedDict([("setname", sample.name), ("Map to reference [%]", mapping),
-								 ("Reads after trimming", sample.pTrimmed()),
-								 ("Classified (Kraken)", kraken),
-								 ("Shorter fragments", sample.get_mean_trimRes()),
-								 ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["minlen"], arguments["trimOption"], arguments["palindrome"])),
-								 ("images", [x.get_all_qc_dict() for x in sample.readSets])
-								 ]))
-
-		try:
-			env = Environment(block_start_string='@@', block_end_string='@@', variable_start_string='@=',
-							  variable_end_string='=@')
-			env.loader = FileSystemLoader(os.path.dirname(__file__))
-			template = env.get_template("batch_report.html")
-			batch_report = template.render(json_samples=json.dumps(jsonfile_dict),
-										   commandline=json.dumps(arguments), kraken = json.dumps(kraken_reports))
-			batch_html = open(join(arguments["output"], "QCResults", "batch_report.html"), "w")
-			batch_html.write(batch_report)
-			batch_html.close()
-		except:
-			print("Couldnt produce html file.")
-	finally:
-
-		#if not arguments["index"]:
-		#	print(mapping_index_path)
-		shutil.rmtree(arguments["tmp"])
-
-if __name__ == "__main__":
-	config = configparser.ConfigParser()
-	config.read(join(dirname(__file__), "config.txt"))
-	kraken_db = ""
-	if "DEFAULT" in config:
-		if "kraken_db" in config["DEFAULT"].keys():
-			kraken_db = config["DEFAULT"]["kraken_db"]
-
-	parser = argparse.ArgumentParser()
-	parser.add_argument('-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
-	parser.add_argument('-1', dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
-	parser.add_argument('-2', dest='r2', help = "input file", required=False)
-
-	parser.add_argument('-output', dest='output', default="")
-	parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent", "Illumina-MiSeq"] ,required = True)
-	parser.add_argument('-threads', dest='threads', default = 4, type = int)
-	parser.add_argument('-adapter', dest = 'adapter', choices = ['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 'NexteraPE-PE' ])
-	parser.add_argument('-reference', dest='reference', required = False, help = "Map reads against reference")
-	parser.add_argument('-index', dest='index', required = False, help = "Mapping index")
-	parser.add_argument('-save_mapping', action = "store_true")
-	parser.add_argument('-db', dest='db', help='Kraken database', required = False, default= kraken_db) #"/home/andruscha/programs/kraken/minikraken_20141208/"
-	parser.add_argument('-palindrome', dest='palindrome', help= 'Use palindrome parameter 30 or 1000 for further analysis. Default:30', default=30, type = int)
-	parser.add_argument('-minlen', dest='minlen',help='Minlen parameter for Trimmomatic. Default:50', default=50, type=int)
-	parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', type= str)
-	parser.add_argument('-version', action='version', version='%(prog)s v' + __version__)
-	parser.add_argument('-b', action = "store_true", help = "Batch analysis. Run QC on a project.")
-	parser.add_argument('-nokraken', action = "store_true")
-	parser.add_argument('-nomapping', action = "store_true")
-	parser.add_argument('-trimBetter', action = "store_true")
-	parser.add_argument('-trimBetter_threshold', dest='trimBetter_threshold', help = "Default setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, type=float)
-	parser.add_argument('-blobplot', action = "store_true")
-	parser.add_argument('-forAssembly', action = "store_true", help = "Trim parameter are optimized for assemblies (trim more aggressive).")
-	parser.add_argument('-forMapping', action = "store_true", help = "Trim parameter are optimized for mapping (allow more errors).")
-	parser.add_argument('-tmp',dest="tmp", required = False, help= "Define Temp folder. Default: output folder.")
-
-	arguments = vars(parser.parse_args())
-	main(arguments)
\ No newline at end of file
diff --git a/sav.R b/sav.R
new file mode 100755
index 0000000..06a33b5
--- /dev/null
+++ b/sav.R
@@ -0,0 +1,150 @@
+#!/usr/bin/env Rscript
+#source("http://bioconductor.org/biocLite.R")
+#biocLite("savR")
+#install.packages("Cairo")
+#library(Cairo)
+library(savR)
+library(reshape2)
+
+args = commandArgs(trailingOnly=TRUE)
+#args <- c("Z://testdaten/ProjectXY/SAV", "Z://testsav")
+project <- savR(args[1])
+
+################
+##  Indexing  ##
+################
+
+#total reads
+total_reads<- clusters(project, 1L)
+pf_reads<- pfClusters(project, 1L)
+
+
+################
+##    Plots   ##
+################
+
+##
+# Data By Cycle
+##
+
+extraction<- extractionMetrics((project))
+
+# Data By Cycle, FWHM/All Lanes / Both surfaces / All Bases
+reshaped_extraction <- melt(extraction, measure.vars= c("FWHM_A","FWHM_C", "FWHM_T","FWHM_G"))
+FWHM<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
+colnames(FWHM) <- c("Cycles","FWHM", "Value")
+FWHM$FWHM<- sub("FWHM_","",FWHM$FWHM)
+ggplot(data=FWHM )+
+  geom_line( aes(x=Cycles , y =Value, color=FWHM)) +
+  ggtitle("Data by Cycle - FWHM") +
+  xlab("Cycle") +
+  ylab("All bases FWHM")
+ggsave(paste(args[2], "/FWHM.png", sep=""))
+
+# Data By Cycle,Intensity /All Lanes / Both surfaces / All Bases
+reshaped_extraction <- melt(extraction, measure.vars= c("int_A","int_C", "int_T","int_G"))
+intensity<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
+colnames(intensity) <- c("Cycles","Intensity", "Value")
+intensity$Intensity<- sub("int_","", intensity$Intensity)
+ggplot(data=intensity )+
+  geom_line( aes(x=Cycles , y =Value, color=Intensity))+
+  ggtitle("Data By Cycle - Intensity")+
+  xlab("Cycle")+ylab("All bases intensity")
+ggsave(paste(args[2], "/intensity.png", sep=""))
+# Data By Cycle, %Base /All Lanes / Both surfaces / All Bases
+#
+corr<- correctedIntensities(project)
+
+
+corr[,seq(14,17)]<-round(corr[,seq(14,17)] / apply(corr[,seq(14,17)], 1, sum) *100,2)
+corr<- melt(corr, measure.vars= c("num_A","num_C", "num_T","num_G"))
+corr<-(aggregate(corr$value, by=list(corr$cycle, corr$variable), FUN=mean))
+colnames(corr)<- c("Cycle", "Base", "Perc_Base")
+corr$Base<- sub("num_","", corr$Base)
+ggplot(corr) +
+  geom_line(aes(x=Cycle, y= Perc_Base, color=Base)) +
+  ylab("All Bases % Base") +
+  ggtitle("Data by Cycle - % Base")
+ggsave(paste(args[2], "/base_perc.png" , sep =""))
+
+##
+# Data By Lane
+##
+
+tiles<- tileMetrics(project)
+# Density, Both Surfaces
+#pfBoxplot(project) # Generate a boxplot of the numbers of clusters and the number of Illumina pass-filter clusters per tile and lane
+dens <-(tiles[which(tiles$code==100 | tiles$code==101 ),])
+dens[which(dens$code==100),]$code <- "Raw Clusters"
+dens[which(dens$code==101),]$code<- "PF Clusters"
+dens$value <- dens$value/1000
+ggplot(data = dens , aes(x=lane, y=value, fill=code))+
+  geom_boxplot() +
+  ggtitle("Data By Lane - Cluster Density") +
+  xlab("Lane")+ylab("Cluster Density (K/mm2)")
+ggsave(paste(args[2], "/density.png", sep=""))
+
+# Phasing, Both Surfaces, All Bases
+phasing_code <- seq(200, (200 + (length(project at reads)-1)*2),2)
+phasing <-(tiles[which(tiles$code %in% phasing_code) ,])
+for(i in phasing_code){
+  cat(paste("Read ",((i-200)/2)+1))
+  phasing[which(phasing$code==i),]$code = paste("Read ",((i-200)/2)+1)
+}
+ggplot(data = phasing[which(phasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
+  geom_boxplot() +
+  ggtitle("Data By Lane - Phasing")+
+  xlab("Lane")+
+  ylab("% Phasing")+
+  scale_x_continuous(breaks = unique(phasing$lane))
+ggsave(paste(args[2], "/phasing.png", sep=""))
+
+
+# Pre-Phasing, Both Surfaces, All Bases
+prephasing_code <- seq(201, (201 + (length(project at reads)-1)*2),2)
+prephasing <-(tiles[which(tiles$code %in% prephasing_code) ,])
+for(i in prephasing_code){
+
+  prephasing[which(prephasing$code==i),]$code = paste("Read ",((i-201)/2)+1)
+}
+ggplot(data = prephasing[which(prephasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
+  geom_boxplot() +
+  ggtitle("Data By Lane - Prephasing")+
+  xlab("Lane")+
+  ylab("% Prephasing") +
+  scale_x_continuous(breaks = unique(prephasing$lane))
+ggsave(paste(args[2], "/prephasing.png", sep=""))
+
+
+
+##
+# QScore Heatmap
+##
+png(paste(args[2], "/qscore_heatmap.png", sep=""), height=1025, width = 2571, res = 200)
+qualityHeatmap(project, lane=seq(1,project at layout@lanecount) ,read=c(1,2))+ theme(axis.title.y = element_blank())
+dev.off()
+
+qualy<- qualityMetrics(project)
+qualy<- data.frame(apply(qualy, 2, as.numeric))
+qualy<- melt(qualy, measure.vars= colnames(qualy)[4:ncol(qualy)])
+
+qualy<- aggregate(qualy$value, by=list(qualy$variable), FUN=sum)
+colnames(qualy)<- c("QScore","Total")
+qualy$Total <- qualy$Total/1000000
+qualy$QScore <- as.numeric(qualy$QScore)
+ggplot(qualy, aes(x=QScore, y = Total )) +
+  geom_bar(stat="identity", aes(fill=QScore>=30)) +
+  ylab("Total (million)") +
+  geom_vline(aes(xintercept=30), linetype="dashed") +
+  geom_text(aes(x=35, y=max(Total)-max(Total)*0.1 ,label=(paste("QScore >=30 \n",
+                                                 round(sum(qualy[which(qualy$QScore>=30),]$Total)/1000,2),
+                                                 "G \n",
+                                                 round(sum(qualy[which(qualy$QScore>=30),]$Total)/ sum(qualy$Total)*100,2),
+                                                 "%")
+                                           ))) +
+  ggtitle("QScore Distribution") +
+  theme(legend.position="none")
+ggsave(paste(args[2], "/qscore_distr.png", sep=""))
+
+
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qcumber.git



More information about the debian-med-commit mailing list