[med-svn] [qcumber] 02/06: New upstream version 1.0.9+dfsg

Andreas Tille tille at debian.org
Mon Mar 13 14:17:11 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository qcumber.

commit 966a33aad7fbeb70a28f2f08489c8662e01ad4a0
Author: Andreas Tille <tille at debian.org>
Date:   Mon Mar 13 14:13:56 2017 +0100

    New upstream version 1.0.9+dfsg
---
 QCumber.py        | 600 ++++++++++++++++++++++++++++--------------------------
 barplot.R         |  56 +++--
 batch_report.html |  10 +-
 classes.py        |  96 +++------
 config.txt        |   6 +-
 helper.py         |  11 +-
 parameter.txt     |  13 +-
 report.tex        |  14 +-
 8 files changed, 410 insertions(+), 396 deletions(-)

diff --git a/QCumber.py b/QCumber.py
index c56f2a7..c7eccde 100755
--- a/QCumber.py
+++ b/QCumber.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 __author__ = 'LieuV'
-__version__ = "1.0.5"
+__version__ = "1.0.9"
 
 from classes import *
 from helper import *
@@ -11,7 +11,6 @@ import shutil
 import configparser
 from collections import OrderedDict
 import json
-import csv
 
 #dependencies
 try:
@@ -20,95 +19,102 @@ except ImportError:
 	print('The "argparse" module is not available. Use Python >3.2.')
 	sys.exit(1)
 
+#parameter = configparser.ConfigParser(strict=False, interpolation=None)
+#parameter.read(join(dirname(__file__), "parameter.txt"))
 
-parameter = configparser.ConfigParser()
-parameter.read(join(dirname(__file__), "parameter.txt"))
+#r1_pattern = parameter["Pattern"]["R1"]
+#r2_pattern = parameter["Pattern"]["R2"]
+#sep_pattern = parameter["Pattern"]["sep"]
+#lane_pattern = parameter["Pattern"]["lane"]
+global parameter
+global r1_pattern
+global r2_pattern
+global sep_pattern
+global lane_pattern
 
-file_extensions = parameter["Fileextension"]["fastq"]
-r1_pattern = parameter["Pattern"]["R1"]
-r2_pattern = parameter["Pattern"]["R2"]
-lane_pattern = parameter["Pattern"]["lane"]
-
-def get_illumina_reads(  output, tmp):
-	## Add readsets to sample
+def get_illumina_reads(tmp):
 	readsets = []
+
 	# split by lanes
-	if isinstance(arguments["r1"], list):
-		for lane in sorted(set([re.search(lane_pattern, x).group() for x in arguments["r1"]])):
-			# concat same files
-			r1_reads = [x for x in arguments["r1"] if lane in x]
-			readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
-			if len(arguments["r1"]) != 1:
-				r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
-			else:
-				r1 = FastQFile(r1_reads[0])
-			if arguments["r2"]:
-				r2_reads = [x for x in arguments["r2"] if lane in x]
+	for lane in sorted(set([re.search(lane_pattern, x).group() for x in arguments["r1"]])):
+		# concat same lanes
+		r1_reads = [x for x in arguments["r1"] if lane in x]
+		readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
+		if len(arguments["r1"]) != 1:
+			r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
+		else:
+			r1 = FastQFile(r1_reads[0])
+		if arguments["r2"]:
+			r2_reads = [x for x in arguments["r2"] if lane in x]
 
-				if len(r2_reads) != 1:
-					r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
-				else:
-					r2 = FastQFile(r2_reads[0])
-				readsets.append(ReadSet(r1,r2))
+			if len(r2_reads) != 1:
+				r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
 			else:
-				readsets.append(ReadSet(r1))
-	else:
-		r1 = FastQFile(arguments["r1"])
-		if arguments["r2"]:
-			r2 = FastQFile(arguments["r2"])
-			return [ReadSet(r1,r2)]
+				r2 = FastQFile(r2_reads[0])
+			readsets.append(ReadSet(r1,r2))
 		else:
-			return [ReadSet(r1)]
-
-
+			readsets.append(ReadSet(r1))
 	return readsets
 
-def run_analysis (readset, arguments, output, tmp):
-	print("Run FastQC...")
-	readset.run_FastQC(join(output, "FastQC"))
-	if arguments["trimBetter"]:
-		trimming_perc = arguments["trimBetter_threshold"]
-	else:
-		trimming_perc = ""
-	readset.run_Trimmomatic(arguments["adapter"], arguments["palindrome"],arguments["minlen"], arguments["trimOption"], trimming_perc, arguments["gz"])
-	readset.trimRes = readset.trimRes.run_FastQC( tmp)
-	return readset
-
+def get_input():
 
-def check_input(arguments):
 	if arguments["r1"]:
 		if arguments["r2"]:
 			return [arguments["r1"], arguments["r2"]]
 		else:
 			return [arguments["r1"]]
 
-	if arguments["technology"]=="Illumina":
-		if os.path.isdir(arguments["input"]):
-			files = os.listdir(arguments["input"])
-			files = [os.path.join(arguments["input"], x) for x in files]
-			files = [x for x in files if os.path.isfile(x) & any([ext in x for ext in file_extensions])]
-			if len(files)==0:
-				sys.exit(arguments["input"] + " does not contain fastq files.")
-			return files
+	all_files = []
+	bam_ext = [x.strip(" ") for x in parameter["Fileextension"]["bam"].split(",")]
+	fastq_ext = [x.strip(" ") for x in parameter["Fileextension"]["fastq"].split(",")]
+	if arguments["technology"]:
+		if arguments["technology"]=="IonTorrent":
+			fastq_ext = []
 		else:
-			return [arguments["input"]]
-	elif arguments["technology"]=="IonTorrent":
+			bam_ext = []
 
-		if os.path.isfile(arguments["input"]):
+	for root, dirs, files in os.walk(arguments["input"]):
+		for file in files:
+			if any([file.endswith(ext) for ext in fastq_ext + bam_ext ]):
+				all_files.append(join(root,file))
 
-			if arguments["input"].endswith(".bam"):
-				return arguments["input"]
-			else:
-				sys.exit("IonTorrent PGM input has to be a bam-file. Otherwise use -b. Use --help for more information.")
+	if not arguments["technology"]:
+		if len([x for x in all_files if any([ext in x for ext in bam_ext])]) == len(all_files):
+			arguments["technology"] = "IonTorrent"
 		else:
-			sys.exit("Incorrect file input")
-	else:
-		sys.exit("Incorrect file input")
-	return None
+			arguments["technology"] = "Illumina"
+	#if arguments["technology"]:
+	#	if arguments["technology"]=="Illumina":
+	#		# fastq only
+	#		all_files = [x for x in all_files if any([ext in x for ext in fastq_ext])]
+	#	elif arguments["technology"]=="IonTorrent":
+	#		# bam only
+	#		all_files = [x for x in all_files if any([ext in x for ext in bam_ext])]
+
+	if (len(all_files) == 0):
+		sys.exit(arguments["input"] + " does not contain fastq or bam files.")
+
+	# find read pairs
+	all_files = sorted(list(all_files))
+	paired_reads_pattern =sep_pattern.join([lane_pattern , "(" + r1_pattern + "|" + r2_pattern + ")","\d{3}"])
+	if all([re.search(paired_reads_pattern, x) for x in all_files]):
+		paired_reads = []
+		for setname, files in groupby(all_files, key=lambda x: "_".join(x.split("_")[:-3])):
+			temp = sorted(list(files))
+			r1_reads = [x for x in temp if r1_pattern in x]
+			r2_reads = [x for x in temp if r2_pattern in x]
+			if len(r1_reads) != 0:
+				if len(r2_reads) != 0:
+					paired_reads.append([r1_reads, r2_reads])
+				else:
+					paired_reads.append(r1_reads)
+		return paired_reads
+	else: # treat each file as sample
+		return [[x] for x in all_files ]
+
 
 def getSetname():
 	try:
-		print("Renmae", arguments["rename"], arguments["r1"])
 		if arguments["rename"]:
 			new_name = [arguments["rename"][x] for x in arguments["rename"].keys() if basename(arguments["r1"][0]).startswith(x)]
 
@@ -120,51 +126,57 @@ def getSetname():
 			else:
 				print("Renamed to " ,new_name)
 				return new_name[0]
-
-		if arguments["technology"] == "Illumina":
-
-			if isinstance(arguments["r1"], list):
-				return "_".join(basename(arguments["r1"][0]).split("_")[:-3])
-			else:
-				return getFilenameWithoutExtension(arguments["r1"], True)
-		else:
-			return getFilenameWithoutExtension(arguments["r1"], getBase=True)
 	except:
+		print("Couldnt rename sample files.")
+	try:
+		paired_reads_pattern = sep_pattern.join([sep_pattern + lane_pattern, "(" + r1_pattern + "|" + r2_pattern + ")", "\d{3}.*"])
 		if isinstance(arguments["r1"], list):
-			return getFilenameWithoutExtension(arguments["r1"][0], getBase=True)
+			return re.sub(paired_reads_pattern, "" ,basename(arguments["r1"][0]))
 		else:
-			return getFilenameWithoutExtension(arguments["r1"], getBase=True)
+			return re.sub(paired_reads_pattern,"",basename(arguments["r1"]))
+	except:
+		print("Problems getting samplenames")
+		return arguments["r1"]
 
-def fillClasses(temp_bowtie_path = "", tmp=''):
+def runAnalyses(temp_bowtie_path, tmp):
 	try:
 		output = getDir([arguments["output"], "QCResults"], True)
-
 		sample = Sample(getSetname(), output, arguments["reference"],arguments["threads"], arguments["technology"])
+
 		if arguments["technology"].startswith("Illumina"):
-			readsets = get_illumina_reads( output, tmp)
+			readsets = get_illumina_reads(tmp)
 		elif arguments["technology"] == "IonTorrent":
 			print("IonTorrent", arguments["r1"])
 			fastqfile = bam_to_fastq(arguments["r1"],tmp)
 			readsets = [ReadSet( fastqfile)]
 
 		for rs in readsets:
-			readset = run_analysis(rs, arguments,  output, tmp)
-			sample = sample.add_readSet(readset)
+			print("Run FastQC...")
+			rs.run_FastQC(join(output, "FastQC"))
+			if not arguments["notrimming"]:
+				if arguments["trimBetter"]:
+					trimming_perc = arguments["trimBetter_threshold"]
+				else:
+					trimming_perc = ""
+				rs.run_Trimmomatic(arguments["adapter"], arguments["palindrome"], arguments["minlen"],
+										arguments["trimOption"], trimming_perc, arguments["gz"])
+				rs.trimRes = rs.trimRes.run_FastQC(tmp)
+			sample = sample.add_readSet(rs)
 		if not arguments["nomapping"]:
 			if arguments["save_mapping"]:
-				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"))
+				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"), not arguments["notrimming"])
 			else:
-				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
+				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null", not arguments["notrimming"])
 		if not arguments["nokraken"]:
 			print("Run Kraken.")
-			sample = sample.run_Kraken(arguments["db"])
+			sample = sample.run_Kraken(arguments["db"], not arguments["notrimming"])
 
 		sample.stop = datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
 	finally:
 		shutil.rmtree(tmp)
 	return sample
 
-def writeReport(sample, arguments):
+def writeReport(sample):
 	report_name = os.path.join(sample.mainResultsPath, "Report",
 		 "summary_" + sample.name + "_" + datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
 	print("Writing latex " ,report_name)
@@ -172,15 +184,17 @@ def writeReport(sample, arguments):
 	env.loader = FileSystemLoader(os.path.dirname(__file__))
 
 	template = env.get_template("report.tex")
-	pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"], arguments["trimOption"]))
+	try:
+		trim_param= sample.readSets[0].trimRes.print_param(arguments["palindrome"], arguments["minlen"], arguments["trimOption"])
+	except:
+		trim_param = "None"
+	pdf_latex = template.render(sample=sample, pipeline=Pipeline(), trim_param = trim_param )
 
 	latex = open(report_name + ".tex", "w")
 	latex.write(pdf_latex)
 	latex.close()
 
 	process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
-	for line in iter(process.stdout.readline, b''):
-		pass #print(line)
 	for line in iter(process.stderr.readline, b''):
 		print(line)
 
@@ -191,6 +205,140 @@ def writeReport(sample, arguments):
 			os.remove(report_name + ext)
 		except OSError:
 			pass
+def check_input_validity():
+	#
+	# Check options for Trimmomatic
+	#pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
+	#if (arguments["trimOption"] != ""):
+	#	if not ((pattern.group("option") == "SLIDINGWINDOW") or (pattern.group("option") == "MAXINFO")):
+	#		sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
+	#	try:
+	#		int(pattern.group("value1"))
+	#		int(pattern.group("value2"))
+	#	except:
+	#		sys.exit("Check trimOptions.")
+
+	#
+	# Check if adapter is required
+	if  not (arguments['notrimming'] or arguments['adapter'] or arguments["technology"] == "IonTorrent") :
+		sys.exit("Illumina projects requires an adapter file")
+
+	#
+	# Check if reference is required and valid
+	if arguments["nomapping"]:
+		arguments["reference"] = ""
+	else:
+		if not arguments["reference"]:
+			sys.exit("Mapping needs a reference.")
+		else:
+			if not os.path.exists(arguments["reference"]):
+				sys.exit("Reference does not exist.")
+
+	#
+	#Check validity of Kraken DB
+	if not arguments["nokraken"]:
+		if not os.path.exists(arguments["db"]):
+			sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
+		else:
+			if "database.kdb" not in os.listdir(arguments["db"]):
+				sys.exit("database " + arguments["db"] + " does not contain necessary file database.kdb")
+	#
+	# Check input
+	if arguments["input"]:
+		if not os.path.exists(arguments["input"]):
+			sys.exit(arguments["input"] + " does not exist.")
+	else:
+		if not os.path.exists(arguments["r1"]):
+			sys.exit(arguments["r1"] + " does not exist.Input file required. Use option -input or -1 / -2")
+
+		if arguments["r2"]:
+			if not os.path.exists(arguments["r2"]):
+				sys.exit(arguments["r2"] + " does not exist.")
+
+def get_defaults():
+	if arguments["forAssembly"]:
+		if not arguments["trimBetter_threshold"]:
+			arguments["trimBetter_threshold"] = parameter["forAssembly." + arguments['technology']][
+				"trimBetter_threshold"]
+		if not arguments["trimOption"]:
+			arguments["trimOption"] = parameter["forAssembly." + arguments['technology']]["trimOption"]
+
+	elif arguments["forMapping"]:
+		if not arguments["trimBetter_threshold"]:
+			arguments["trimBetter_threshold"] = parameter["forMapping." + arguments['technology']][
+				"trimBetter_threshold"]
+		if not arguments["trimOption"]:
+			arguments["trimOption"] = parameter["forMapping." + arguments['technology']]["trimOption"]
+
+	if not arguments["trimOption"]:
+		arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
+	if not arguments["trimBetter_threshold"]:
+		arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
+
+
+def plot():
+	try:
+		#
+		# Plot BOXPLOTS
+		boxplots = [{"file": "Per_sequence_quality_scores.csv",
+		             "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+		             "title": "Per sequence quality scores",
+		             "ylab": "Mean Sequence Quality (Phred Score)",
+		             "xlab": "Sample"},
+		            {"file": "Sequence_Length_Distribution.csv",
+		             "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+		             "title": "Sequence Length Distribution",
+		             "ylab": "Sequence Length (bp)",
+		             "xlab": "Sample"},
+		            {"file": "Per_sequence_GC_content.csv",
+		             "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+		             "title": "Per sequence GC content",
+		             "ylab": "Mean GC content (%)",
+		             "xlab": "Sample"}]
+		for plot in boxplots:
+			process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "boxplot.R"),
+			                                     join(arguments["output"], "QCResults", "Report", "src", plot["file"]),
+			                                     plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] + '"',
+			                                     '"' + plot["ylab"] + '"']),
+			                           stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+			#for line in iter(process.stderr.readline, b''):
+			#	print(line)
+			process.communicate()
+
+		#
+		# Plot BARPLOTS
+		process = subprocess.Popen(
+			" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"), join(arguments["output"], "QCResults/Report/src", "summary.json"),
+			          join(arguments["output"], "QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+		process.communicate()
+	except:
+		print("Couldnt plot summary")
+
+
+def writeSummaryTex(batch_json):
+	try:
+		summary_latex_name = join(arguments["output"], "QCResults", "Report", "summary.tex")
+		with open(summary_latex_name, "w") as summary_latex:
+			summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
+			summary_latex.write(
+				"\\rowcolor{tableBlue} \\textbf{Sample} &  \\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} &\\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & \\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
+
+			for i in range(0, len(batch_json["summary"])):
+				summary_latex.write(
+					str(batch_json["summary"][i]["setname"]) + " & " +
+					str(batch_json["summary"][i]["Reads after trimming [#]"]) + " & " +
+					str(batch_json["summary"][i]["Reads after trimming [%]"]) + " & " +
+					str(batch_json["summary"][i]["Map to reference [#]"]) + " & " +
+					str(batch_json["summary"][i]["Map to reference [%]"]) + " & " +
+					str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
+					str(batch_json["summary"][i]["Shorter fragments"]) + "\\\\\n"
+				)
+			summary_latex.write("\\end{tabular}\n")
+			summary_latex.write("\\caption{Statistics on reads after trimming. }")
+			summary_latex.close()
+	except:
+
+		print("Couldnt write summary.tex file.")
 
 def main(arguments):
 
@@ -204,7 +352,12 @@ def main(arguments):
 	makedirs(join(arguments["output"], "QCResults", "Report", "src"), exist_ok=True)
 	makedirs(join(arguments["output"], "QCResults", "Report", "src", "img"), exist_ok=True)
 
+	if not os.path.exists(join(arguments["output"], "QCResults", "Report", "lib" )):
+		shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], "QCResults", "Report", "lib" ))
+
+	####
 	# rename sample names
+	####
 	if arguments["rename"]:
 		rename_dict = {}
 		with open(arguments["rename"], "r") as csvfile:
@@ -212,159 +365,58 @@ def main(arguments):
 				row = line.split("\t")
 				rename_dict[row[0]] = row[1].strip()
 		arguments["rename"] = rename_dict
+
+	###
 	# create new boxplot csv files
+	###
 	for csv in ["Per_sequence_quality_scores.csv", "Per_sequence_GC_content.csv", "Sequence_Length_Distribution.csv"]:
 		new_csv = open(join(arguments["output"], "QCResults", "Report", "src", csv), "w")
 		new_csv.write("Sample,Read,Lane,Trimmed,Value,Count\n")
 		new_csv.close()
-	##
-	# SAV
+
+	###
+	# Write SAV
+	###
 	try:
 		if arguments["sav"]:
 			write_SAV(arguments["sav"], join(arguments["output"], "QCResults", "Report", "src"))
 	except:
 		print("Couldnt write SAV.")
-	##
 
-	if not os.path.exists(join(arguments["output"], "QCResults", "Report", "lib" )):
-		shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], "QCResults", "Report", "lib" ))
-	if arguments["technology"].startswith("Illumina"):
-		technology = "Illumina"
-	else:
-		technology = "IonTorrent"
 
-	if arguments["forAssembly"]:
-		if not arguments["trimBetter_threshold"]:
-			arguments["trimBetter_threshold"] = parameter["forAssembly." + technology]["trimBetter_threshold"]
-		if not arguments["trimOption"]:
-			arguments["trimOption"] = parameter["forAssembly." + technology]["trimOption"]
-
-	elif arguments["forMapping"]:
-		if not arguments["trimBetter_threshold"]:
-			arguments["trimBetter_threshold"] = parameter["forMapping." + technology]["trimBetter_threshold"]
-		if not arguments["trimOption"]:
-			arguments["trimOption"] = parameter["forMapping." + technology]["trimOption"]
-
-	if not arguments["trimOption"]:
-		arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
-	if not arguments["trimBetter_threshold"]:
-		arguments["trimBetter_threshold"] = parameter["Trimmomatic"]["trimBetter_threshold"]
-
-	# check input
-	pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", arguments["trimOption"])
-	if(arguments["trimOption"]!=""):
-		if not ((pattern.group("option") =="SLIDINGWINDOW") or (pattern.group("option") =="MAXINFO")):
-			sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 'MAXINFO' allowed")
-		try:
-			int(pattern.group("value1"))
-			int(pattern.group("value2"))
-		except:
-			sys.exit("Check trimOptions.")
-
-	if arguments['technology'] == "Illumina":
-		if not arguments['adapter']:
-			sys.exit("Illumina projects requires an adapter file")
-	if arguments["nomapping"]:
-		arguments["reference"] = ""
-	else:
-		if not arguments["reference"]:
-			sys.exit("Mapping needs a reference.")
-		else:
-			if not os.path.exists(arguments["reference"]):
-				sys.exit("Reference does not exist.")
-
-	if not arguments["nokraken"]:
-		if not os.path.exists(arguments["db"]):
-			sys.exit(arguments["db"] + " does not exist. Enter a valid database for kraken")
-		else:
-			if "database.kdb" not in os.listdir(arguments["db"]):
-				sys.exit("database "+arguments["db"] +" does not contain necessary file database.kdb")
-	if not arguments["input"]:
-		if arguments["r1"]:
-			print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
-			if not os.path.exists(arguments["r1"]):
-				sys.exit(arguments["r1"] + " does not exist.")
-		else:
-			sys.exit("Input file required. Use option -input or -1 / -2")
-
-		if arguments["r2"]:
-			if not os.path.exists(arguments["r2"]):
-				sys.exit(arguments["r2"] + " does not exist.")
+	#
+	# Check input validity
+	check_input_validity()
 
 	if arguments["index"]:
 		mapping_index_path = arguments["index"]
 	else:
 		mapping_index_path = join(tmp, "bowtie2_index")
 		os.makedirs(mapping_index_path, exist_ok=True)
-	# RUN
-	try:
-		batch_json = json.dump({"commandline":arguments, "summary":[],"kraken":{}, "versions":Pipeline().__dict__}, open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
-		i = 1
-		project = []
-		tmp_overview = open(join(tmp, "overview.csv"), "w")
-		tmp_overview.write("Sample,Trimmed,Mapped,Classified\n")
-		myFiles = []
-
-		#filter samples and put it into for loop
-		# Get folder and filter by last three pattern splitted by "_"
-		if arguments["input"]:
-			input = [ join(arguments["input"], x) for x in sorted(os.listdir(arguments["input"]))]
-			#### check input format ####
-			# Case 1: Folder contains fastq files
-			if len([x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ])>0:
-				input= [x for x in input if isfile(x) and any([x.endswith(ext) for ext in file_extensions]) ]
-				if len(input)!=0:
-					if arguments["technology"].startswith("Illumina"):
-						for setname, files in groupby(input, key=lambda x: "_".join(x.split("_")[:-3])):
-							#for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-2])):
-							temp_file =  sorted(list(files))
-							r1_reads = [x for x in  temp_file if r1_pattern in x]
-							r2_reads = [x for x in temp_file if r2_pattern in x]
-							if len(r1_reads) != 0:
-								if len(r2_reads) != 0:
-									myFiles.append([r1_reads, r2_reads])
-								else:
-									myFiles.append(r1_reads)
-					else:
-						# IonTorrent
-						myFiles = input
-			# Case 2: Folder contains subfolder for each sample
-			else:
-				input = [x for x in input if os.path.isdir(x)]
-
-				if len(input) > 0:
-					for sample in input:
-						files = sorted([join(sample,x) for x in listdir(sample) if any([x.endswith(ext) for ext in file_extensions])])
-						if arguments["technology"].startswith("Illumina"):# and len(files) > 1:
-							for setname, files in groupby(files, key=lambda x: "_".join(x.split("_")[:-3])):
-								temp_file = sorted(list(files))
-								r1_reads = [x for x in temp_file if r1_pattern in x]
-								r2_reads = [x for x in temp_file if r2_pattern in x]
-								if len(r1_reads)!=0:
-									if len(r2_reads)!=0:
-										myFiles.append([r1_reads, r2_reads])
-									else:
-										myFiles.append(r1_reads)
-						else:
-							myFiles.append(sorted(list(files)))
-					files = None
-			if len(myFiles) == 0:
-					sys.exit("Enter a valid folder, which contain sample folders")
-		else:
-			if arguments["r1"]:
-				if not os.path.exists(arguments["r1"]):
-					sys.exit(arguments["r1"] + " does not exist.")
 
-			if arguments["r2"]:
-				myFiles.append([arguments["r1"], arguments["r2"]])
-			else:
-				myFiles.append([arguments["r1"]])
+	myFiles = get_input()
+	print("Found " + str(len(myFiles)) + " sample(s).")
 
-		print("Found " + str(len(myFiles)) + " sample(s).")
+	#
+	# Get parameter defaults
+	get_defaults()
 
+	batch_json = json.dump({
+			"commandline": arguments,
+			"summary": [],
+			"kraken": {},
+			"versions": Pipeline().__dict__},
+	    open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
+
+	output = join(arguments["output"], "QCResults")  # getDir([arguments["output"], "QCResults"], True)
+
+	i = 1
+	#######
+	# Run for each sample
+	#######
+	try:
 		for subset in myFiles:
 			print("Analyse " + str(i) + "/" + str(len(myFiles)))
-
 			sample_tmp = join(tmp, "Sample_" + str(i))
 			os.makedirs(sample_tmp, exist_ok=True)
 
@@ -375,11 +427,17 @@ def main(arguments):
 				if len(subset)>1:
 					arguments["r2"] = subset[1]
 
-			sample = fillClasses( mapping_index_path, sample_tmp )
+			####
+			# Run Analyses
+			####
+			sample = runAnalyses( mapping_index_path, sample_tmp )
+
+			####
+			# Write output
+			####
 			if sample is not None:
-				project.append(sample)
 				try:
-					writeReport(sample, arguments)
+					writeReport(sample)
 				except:
 					print("Couldnt write pdf.")
 				try:
@@ -396,88 +454,43 @@ def main(arguments):
 					else:
 						kraken = sample.krakenRes.pClassified
 						batch_json["kraken"][sample.name] = json.loads(str_to_json(sample.krakenRes.report))
-					tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), str(nMapping), str(kraken)]) +"\n")
+					#tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), str(nMapping), str(kraken)]) +"\n")
+					try:
+						trim_param = sample.readSets[0].trimRes.print_param(arguments["palindrome"], arguments["minlen"],
+															   arguments["trimOption"])
+					except:
+						trim_param="None"
 					batch_json["summary"].append(
 							OrderedDict([("setname", sample.name),
-							             ("Total reads [#]", sample.total_reads()),
-							             ("Reads after trimming [#]", sample.nTrimmed()),
-							             ("Reads after trimming [%]", sample.pTrimmed()),
+										 ("Total reads [#]", sample.total_reads()),
+										 ("Reads after trimming [#]", sample.nTrimmed()),
+										 ("Reads after trimming [%]", sample.pTrimmed()),
 										 ("Map to reference [#]", nMapping),
 										 ("Map to reference [%]", pMapping),
 										 ("Classified (Kraken)", kraken),
 										 ("Shorter fragments", sample.get_mean_trimRes()),
-										 ("Trim Parameter", sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"],arguments["trimOption"])),
+										 ("Trim Parameter",trim_param),
 										 ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
 
 					json.dump(batch_json, open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
 				except:
 					print("Couldnt produce HTML report.")
+
 			sample = None
+			arguments["r1"]=None
+			arguments["r2"]=None
 			i+=1
+		# plot boxplots and barplots
+		plot()
+		writeSummaryTex(batch_json)
+		shutil.copy(join(os.path.dirname(__file__), "batch_report.html"), join(arguments["output"], "QCResults", "Report", "batch_report.html"))
 
-		try:
-			# plot boxplots
-			boxplots = [{"file":"Per_sequence_quality_scores.csv",
-						 "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
-						 "title": "Per sequence quality scores",
-						 "ylab": "Mean Sequence Quality (Phred Score)",
-						"xlab": "Sample" },
-						{"file": "Sequence_Length_Distribution.csv",
-						 "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
-						 "title": "Sequence Length Distribution",
-						 "ylab": "Sequence Length (bp)",
-						 "xlab": "Sample"},
-						{"file": "Per_sequence_GC_content.csv",
-						 "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
-						 "title": "Per sequence GC content",
-						 "ylab": "Mean GC content (%)",
-						 "xlab": "Sample"} ]
-			for plot in boxplots:
-				process = subprocess.Popen(" ".join(["Rscript --vanilla ",join(os.path.dirname(__file__) ,"boxplot.R"),
-						join(arguments["output"],"QCResults", "Report", "src", plot["file"]), plot["output"], '"'+ plot["title"]+'"', '"'+plot["xlab"]+'"','"'+ plot["ylab"]+'"']),
-						stderr=subprocess.PIPE, stdout= subprocess.PIPE, shell =True)
-				for line in iter(process.stderr.readline, b''):
-					print(line)
-				process.communicate()
-			#plot barplots
-			tmp_overview.close()
-			process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"),join(tmp, "overview.csv"),
-												 join(arguments["output"], "QCResults/Report/src/img")]),stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
-			for line in iter(process.stderr.readline, b''):
-				print(line)
-			process.communicate()
-			shutil.copy(join(os.path.dirname(__file__), "batch_report.html"), join(arguments["output"], "QCResults", "Report", "batch_report.html"))
-		except:
-			print("Couldnt plot boxplots.")
-		try:
-			summary_latex_name = join(arguments["output"], "QCResults", "Report", "summary.tex")
-			with open(summary_latex_name, "w") as summary_latex:
-				summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
-				summary_latex.write(
-					"\\rowcolor{tableBlue} \\textbf{Sample} & \\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & \\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} & \\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
-
-				for i in range(0, len(batch_json["summary"])):
-					summary_latex.write(
-						str(batch_json["summary"][i]["setname"]) + " & " +
-						str(batch_json["summary"][i]["Map to reference [#]"]) + " & " +
-						str(batch_json["summary"][i]["Map to reference [%]"]) + " & " +
-						str(batch_json["summary"][i]["Reads after trimming [#]"]) + " & " +
-						str(batch_json["summary"][i]["Reads after trimming [%]"]) + " & " +
-						str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
-						str(batch_json["summary"][i]["Shorter fragments"]) + "\\\\\n"
-					)
-				summary_latex.write("\\end{tabular}\n")
-				summary_latex.write("\\caption{Statistics on reads after trimming. }")
-				summary_latex.close()
-		except:
-
-			print("Couldnt write summary.tex file.")
 	finally:
 
 		shutil.rmtree(tmp)
 		delete_files(join(arguments["output"],"QCResults"), "FastQC", "_fastqc.html")
 		delete_files(join(arguments["output"],"QCResults"), "Trimmed/FastQC", "_fastqc.html")
-
+ 
 if __name__ == "__main__":
 	config = configparser.ConfigParser()
 	config.read(join(dirname(__file__), "config.txt"))
@@ -492,7 +505,7 @@ if __name__ == "__main__":
 	parser.add_argument( '-2', dest='r2', help = "input file", required=False)
 
 	parser.add_argument('-output', dest='output', default="")
-	parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent"] ,required = True)
+	parser.add_argument('-technology', dest='technology',choices=['Illumina',"IonTorrent"] ,required=False , help = "If not set, automatically determine technology and search for fastq and bam files. Set technology to IonTorrent if all files are bam-files, else set technology to Illumina.")
 	parser.add_argument('-threads', dest='threads', default = 4, type = int, help= "Number of threads. Default:  %(default)s")
 	parser.add_argument('-adapter', dest = 'adapter', choices = ['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 'NexteraPE-PE' ])
 	parser.add_argument('-reference', dest='reference', required = False, help = "Map reads against reference")
@@ -503,16 +516,25 @@ if __name__ == "__main__":
 	parser.add_argument('-db', dest='db', help='Kraken database', required = False, default= kraken_db)
 	parser.add_argument('-palindrome', dest='palindrome', default=30, help= 'Use palindrome parameter 30 or 1000 for further analysis. Default: %(default)s', type = int)
 	parser.add_argument('-minlen', default=50,dest='minlen',help='Minlen parameter for Trimmomatic. Default: %(default)s',  type=int)
-	parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', type= str)
+	parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>.', type= str)
 	parser.add_argument('-version', action='version', version='%(prog)s v' + __version__)
 	parser.add_argument('-nokraken', action = "store_true")
 	parser.add_argument('-nomapping', action = "store_true")
+	parser.add_argument('-notrimming', action = "store_true")
 	parser.add_argument('-gz', action = "store_true")
 	parser.add_argument('-trimBetter', action = "store_true", help= "Optimize trimming parameter using 'Per sequence base content' from fastqc. Not recommended for amplicons.")
 	parser.add_argument('-trimBetter_threshold', dest='trimBetter_threshold', help = "Set -trimBetter to use this option.Default setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, type=float)
 	parser.add_argument('-forAssembly', action = "store_true", help = "Set -trimBetter to use this option.Trim parameter are optimized for assemblies (trim more aggressive).")
 	parser.add_argument('-forMapping', action = "store_true", help = "Set -trimBetter to use this option.Trim parameter are optimized for mapping (allow more errors).")
 	parser.add_argument('-rename', dest="rename", required=False, help="TSV File with two columns: <old sample name> <new sample name>")
+	parser.add_argument('-parameters', dest = 'parameters', default = join(dirname(__file__), "parameter.txt"))
 
 	arguments = vars(parser.parse_args())
+	parameter = configparser.ConfigParser(strict=False, interpolation=None)
+	parameter.read(arguments['parameters'])
+
+	r1_pattern = parameter["Pattern"]["R1"]
+	r2_pattern = parameter["Pattern"]["R2"]
+	sep_pattern = parameter["Pattern"]["sep"]
+	lane_pattern = parameter["Pattern"]["lane"]
 	main(arguments)
diff --git a/barplot.R b/barplot.R
index 350342c..93dfd20 100755
--- a/barplot.R
+++ b/barplot.R
@@ -1,29 +1,45 @@
 options(warn=-1)
-library(ggplot2)
+require(jsonlite)
+require(ggplot2)
+
 args = commandArgs(trailingOnly=TRUE)
 
-summary<- as.data.frame(read.csv(args[1]))
+convert2filename<- function(string, ext=".png"){
+  string<- gsub("\\[%\\]", "percentage", string)
+  string<- gsub("\\[#\\]", "number", string)
+  string<- gsub(" ", "_", string)
+  return(paste(string, ext, sep=""))
+}
 
-ggplot(summary, aes(x=Sample, y = Trimmed )) +
-  geom_bar(stat = "identity",  fill="#67a9cf") +
-  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
-  ggtitle("Number of reads after trimming") +
-  xlab("Sample")+
-  ylab("Reads [#]")
-ggsave(paste(args[2], "/trimming.png", sep=""))
+summary_json<- jsonlite::fromJSON(args[1])$summary
+tablenames<- names(summary_json)[!names(summary_json) %in% c("images", "Trim Parameter")]
 
-ggplot(summary, aes(x=Sample, y = Mapped )) +
-  geom_bar(stat = "identity",  fill="#67a9cf") +
-  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
-  ggtitle("Number of reads mapped to reference") +
+summary_json<- summary_json[tablenames]
+#summary<- as.data.frame(read.csv(args[1]))
+for( i in tablenames[2:length(tablenames)]){
+
+ggplot(summary_json, aes(x=summary_json[,"setname"], y = summary_json[,i]), environment = environment()) +
+  geom_bar(stat = "identity",   fill="#67a9cf") +
+  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5), legend.position = "none") +
+  ggtitle(i) +
   xlab("Sample")+
-  ylab("Reads [#]")
-ggsave(paste(args[2], "/mapping.png", sep=""))
+  ylab(i)
+  ggsave(paste(args[2], convert2filename(i), sep="/"))
 
-ggplot(summary, aes(x=Sample, y = Classified )) +
-  geom_bar(stat = "identity",  fill="#67a9cf") +
+}
+temp_json <- data.frame(rbind(cbind( setname = summary_json$setname, type = "Total reads [#]", value= summary_json$`Total reads [#]`),
+                              cbind( setname = summary_json$setname, type = "Reads after trimming [#]", value= summary_json$`Reads after trimming [#]`)
+                   ))
+temp_json$value <- as.numeric(as.character(temp_json$value))
+
+ggplot(temp_json, aes(x= ))
+
+ggplot(temp_json, aes(x=setname, y = value, by=type, fill=type))+
+  geom_bar(stat="identity",position = "identity", alpha=  0.9) +
   theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
-  ggtitle("Precentage of reads classified by Kraken") +
+  ggtitle("Number of Reads") +
   xlab("Sample")+
-  ylab("Reads [%]")
-ggsave(paste(args[2], "/kraken.png", sep=""))
+  ylab("Number of Reads")
+ggsave(paste(args[2], "number_of_reads.png", sep="/"))
+
+
diff --git a/batch_report.html b/batch_report.html
index 23fc0f4..104a42b 100755
--- a/batch_report.html
+++ b/batch_report.html
@@ -573,18 +573,18 @@
                         </br>
  						<div class="col-md-4">
 
-                        <a ng-click="selected.title='Number of reads remained after trimming'; selected.caption=''; selected.img='src/img/trimming.png'"
+                        <a ng-click="selected.title='Number of reads remained after trimming'; selected.caption=''; selected.img='src/img/Reads_after_trimming_number.png'"
 							   data-image-id="" data-toggle="modal"
 							   data-target="#image-gallery">
-								<img height="500px" ng-src="src/img/trimming.png">
+								<img height="500px" ng-src="src/img/Reads_after_trimming_number.png">
                         </a>
 						</div>
                         <div class="col-md-4">
 
-                        <a ng-click="selected.title='Number of reads mapped to reference'; selected.caption=''; selected.img='src/img/mapping.png'"
+                        <a ng-click="selected.title='Number of reads mapped to reference'; selected.caption=''; selected.img='src/img/Map_to_reference_number.png'"
 							   data-image-id="" data-toggle="modal"
 							   data-target="#image-gallery">
-								<img height="500px" ng-src="src/img/mapping.png">
+								<img height="500px" ng-src="src/img/Map_to_reference_number.png">
                         </a>
 						</div>
 
@@ -593,7 +593,7 @@
                         <a ng-click="selected.title='Percentage of reads classified by Kraken'; selected.caption=''; selected.img='src/img/kraken.png'"
 							   data-image-id="" data-toggle="modal"
 							   data-target="#image-gallery">
-								<img height="500px" ng-src="src/img/kraken.png">
+								<img height="500px" ng-src="src/img/Classified_(Kraken).png">
                         </a>
 						</div>
 
diff --git a/classes.py b/classes.py
index bbfb42f..3066dde 100755
--- a/classes.py
+++ b/classes.py
@@ -79,18 +79,17 @@ class Sample(object):
 	def get_all_qc_plots(self):
 		qc_dict = OrderedDict([("setname", self.name),("raw",[]), ("trimmed",[])])
 		for rs in self.readSets:
-			qc_dict["raw"].append(rs.r1.qcRes.get_plots())
-			qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.get_plots())
+			qc_dict["raw"].append(rs.r1.qcRes.plots)
+			qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.plots)
 			if (rs.r2 is not None):
-				qc_dict["raw"].append(rs.r2.qcRes.get_plots())
-				qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.get_plots())
+				qc_dict["raw"].append(rs.r2.qcRes.plots)
+				qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.plots)
 		return qc_dict
 
 	def total_reads(self):
 		total = 0
 		for rs in self.readSets:
-			if rs.trimRes:
-				total += rs.trimRes.total
+			total += rs.numberOfReads
 		return total
 
 	def pTrimmed(self):
@@ -112,15 +111,15 @@ class Sample(object):
 		self.readSets.append(readSet)
 		return self
 
-	def run_Kraken(self, db):
-		r1, r2 = self.get_all_reads()
+	def run_Kraken(self, db, trimmed):
+		r1, r2 = self.get_all_reads(trimmed)
 		self.krakenRes = KrakenResult(r1,r2,db, self.name)
 		return self
 
-	def run_Bowtie2(self, bowtie_index, save_mapping ):
-		r1, r2 = self.get_all_reads()
+	def run_Bowtie2(self, bowtie_index, save_mapping, trimmed ):
+		r1, r2 = self.get_all_reads(trimmed)
 		self.mappingRes = MappingResult(self.reference, r1, r2, bowtie_index)
-		self.mappingRes = self.mappingRes.run_bowtie(self.threads, save_mapping)
+		self.mappingRes = self.mappingRes.run_bowtie(save_mapping)
 		return self.mappingRes
 
 	def get_all_reads(self, trimmed = True):
@@ -133,7 +132,10 @@ class Sample(object):
 		return r1,r2
 
 	def get_mean_trimRes(self):
-		return round(np.mean([x.trimRes.shorter_fragments_percentage for x in self.readSets]),2)
+		try:
+			return round(np.mean([x.trimRes.shorter_fragments_percentage for x in self.readSets]),2)
+		except:
+			return None
 
 class ReadSet:
 	''' Readset contains of r1 and r2'''
@@ -151,7 +153,6 @@ class ReadSet:
 		self.numberOfReads = 0
 		self.numberofTrimmedReads = 0
 		self.trimRes = None
-		#self.outputDir = outputDir
 		self.paired = True
 		if len( basename(self.r1.filename).split("_")[:-3]) > 4:
 			self.setname = "_".join(basename(self.r1.filename).split("_")[:-3])
@@ -162,8 +163,10 @@ class ReadSet:
 
 	def run_FastQC(self, output):
 		self.r1 = self.r1.run_FastQC(output)
+		self.numberOfReads += self.r1.qcRes.total_reads
 		if(self.r2 is not None):
 			self.r2 = self.r2.run_FastQC(output)
+			self.numberOfReads += self.r1.qcRes.total_reads
 		return self
 
 	def run_Trimmomatic(self, adapter, palindrome, minlen, trimOption, betterTrimming,gz):
@@ -177,19 +180,14 @@ class ReadSet:
 		return qc_results
 
 	def get_all_qc_dict(self):
-		'''qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
-		qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_to_base64()]]
-		qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_to_base64()]]
-		if (self.r2 is not None):
-			qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_to_base64()])
-			qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_to_base64()])
-		return qc_dict'''
 		qc_dict = OrderedDict({"raw": [], "trimmed": []})
 		qc_dict["raw"] = [[x.__dict__ for x in self.r1.qcRes.results_for_report()]]
-		qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_for_report()]]
+		if self.trimRes:
+			qc_dict["trimmed"] = [[x.__dict__ for x in self.trimRes.readset.r1.qcRes.results_for_report()]]
 		if (self.r2 is not None):
 			qc_dict["raw"].append([x.__dict__ for x in self.r2.qcRes.results_for_report()])
-			qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_for_report()])
+			if self.trimRes:
+				qc_dict["trimmed"].append([x.__dict__ for x in self.trimRes.readset.r2.qcRes.results_for_report()])
 		return qc_dict
 
 
@@ -202,7 +200,6 @@ class FastQFile:
 		self.concat_files = None
 
 	def run_FastQC(self, outputDir):
-		print("Run fastqc in fastqfile ", " ".join([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), "--extract"]))
 		process = subprocess.Popen([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), "--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
 		for line in iter(process.stderr.readline, b''):
 			line = line.decode("utf-8")
@@ -217,7 +214,6 @@ class FastQFile:
 			pattern = re.match("Quality encoding detected as (?P<phred>\w+\d{2})", line)
 			if pattern:
 				self.phred = pattern.group("phred")
-		print("")
 		process.communicate()
 		self.qcRes = QCResult(join(outputDir, getFilenameWithoutExtension(basename(self.filename)) + "_fastqc"), join(mainResultsPath, "Report", "src"))
 		return self
@@ -231,8 +227,8 @@ class QCResult:
 	def __init__(self, resultsPath, summary_output):
 		self.path = resultsPath
 		self.results = self.get_results()
-		#write summary csv for boxplots
-		write_fastqc_summary(resultsPath, summary_output)
+		#write summary csv for boxplots and assign total number of reads
+		self.total_reads = write_fastqc_summary(resultsPath, summary_output)
 
 	def get_results(self):
 		summary_list = []
@@ -254,36 +250,6 @@ class QCResult:
 						summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
 		return summary_list
 
-	def get_plots(self):
-		print("extracting data ", self.path)
-		store = False
-		data = {"name": basename(self.path).replace("_fastc",""),
-				"Sequence_Length_Distribution": [],
-				"Per_sequence_GC_content": [],
-				"Per_base_sequence_quality": []}
-
-		with open(join(self.path, "fastqc_data.txt"), "r") as fastqcdatafile:
-			box_values = []
-			for line in iter(fastqcdatafile):
-				if line.startswith("#"):
-					pass
-				elif line.startswith(">>END_MODULE"):
-					if store==True:
-						data[key] = get_distribution(box_values, key)
-					store = False
-					key = None
-					box_values = []
-				elif line.startswith(">>"):
-					key = line.split("\t")[0]
-					key=key.replace(">>","")
-					key = key.replace(" ","_")
-					if key in data.keys():
-						store = True
-				else:
-					if store:
-						box_values.append(line)
-		return data
-
 	def results_to_base64(self):
 		converted_plot = []
 		for plot in self.results:
@@ -357,6 +323,7 @@ class TrimResult:
 		self.logs = ""
 
 		#Results
+		self.total=0
 		self.shorter_fragments_percentage = 0
 		self.input_number = 0
 		self.nTrimmed = 0
@@ -454,6 +421,8 @@ class TrimResult:
 				process = subprocess.Popen(" ".join(call), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
 				survived1, total, nTrimmedReads, pTrimmedReads, log = trimmomatic_report(process, call[0], True)
 				process.communicate()
+		if survived1==0:
+			sys.exit("No reads remained after trimming. Check fastqc results before trimming in QCResults/Fastqc. Please rerun without -trimBetter or check -trimOptions and minlen again.")
 
 		self.total = total
 		self.nTrimmed = nTrimmedReads
@@ -521,17 +490,18 @@ class MappingResult:
 		index_name = join(index, getFilenameWithoutExtension(basename(reference)) +"_bt2")  # reference name "bowtie2 build index_name"
 		if not isdir(index):
 			return index
-		process = subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference,  index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
+		if not exists(index_name + ".1.bt2"):
+			process = subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference,  index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
 
-		for line in iter(process.stderr.readline, b''):
-			line = line.decode("utf-8")
-			print(line)
-		process.communicate()
-		print("Bowtie2-build. Done.")
+			for line in iter(process.stderr.readline, b''):
+				line = line.decode("utf-8")
+				print(line)
+			process.communicate()
+			print("Bowtie2-build. Done.")
 		return index_name
 
 
-	def run_bowtie(self, threads, save_mapping):
+	def run_bowtie(self, save_mapping):
 		print("Run Bowtie2")
 		call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", self.index, "--no-unal --threads ", str(num_threads)]
 
diff --git a/config.txt b/config.txt
index 124f887..258a35a 100755
--- a/config.txt
+++ b/config.txt
@@ -1,10 +1,10 @@
 [DEFAULT]
-kraken_db =/opt/common/pipelines/databases/minikraken_20141208/
+kraken_db = /opt/common/pipelines/databases/minikraken_20141208/
 
 [PATH]
 kraken =
-adapter = /data/GS/tools/Trimmomatic-0.32/adapters/
+adapter = /opt/common/pipelines/tools/Trimmomatic-0.36/adapters/
 fastqc = /opt/common/pipelines/tools/FastQC_0.11.5/
-trimmomatic =
+trimmomatic = /opt/common/pipelines/tools/Trimmomatic-0.36/
 bowtie2 =
 
diff --git a/helper.py b/helper.py
index d7fb524..c849e5a 100755
--- a/helper.py
+++ b/helper.py
@@ -151,7 +151,7 @@ def optimize_trimming(fastqc_folder,perc=0.1):
 			column[nucl].mask[-(i + 5):(min(len(mytable[nucl]), len(mytable[nucl]) - i))] = True
 			if (perc_slope(numpy.mean(mytable[nucl][-(i + 6): (min(len(mytable[nucl]) - 1, len(mytable[nucl]) - 1 - i))]),numpy.mean(column[nucl]), perc=perc)) & (tailcrop > len(mytable[nucl]) - (i + 5)):
 				# if perc_slope(numpy.var(mytable[nucl][-(i+6): (min(len(mytable["A"])-1, len(mytable[nucl])-1-i))]), numpy.mean(numpy.var(column)), perc=perc):
-				print("TAILCROP", tailcrop)
+				#print("TAILCROP", tailcrop)
 				tailcrop = len(mytable[nucl]) - (i + 5)
 				trim_bool = True
 			else:
@@ -215,9 +215,9 @@ def delete_files(path, extended="", pattern="_fastqc"):
 			else:
 				shutil.rmtree(filename)
 
-
+'''
 def get_distribution(lines, key):
-	'''creates CSV used for boxplotting'''
+	#creates CSV used for boxplotting
 	output = []
 	for line in lines:
 		line = line.strip()
@@ -235,7 +235,7 @@ def get_distribution(lines, key):
 			v = fields[0]  # quality or gc [%]
 			output.extend(int(v) * [int(float(fields[1]))])
 	return output
-
+'''
 def createCSV(dataset, lines, key, path, trimmed):
 	'''creates CSV used for boxplotting'''
 	#output = []
@@ -280,6 +280,8 @@ def write_fastqc_summary(fastqcdir, output):
 		if line.startswith("#"):
 			pass  # print("comment")
 		# continue
+		elif line.startswith("Total Sequences"):
+			total_seq = int(line.split("\t")[-1])
 		elif line.startswith('>>END_MODULE'):
 			if len(data) > 0:
 				name1 = basename(fastqcdir)
@@ -300,6 +302,7 @@ def write_fastqc_summary(fastqcdir, output):
 		if store:
 			data.extend([line])
 	fastqcdatafile.close()
+	return total_seq
 
 def write_SAV(folder, output):
 	print("Plot SAV images...")
diff --git a/parameter.txt b/parameter.txt
index 14aeca2..ea2902e 100755
--- a/parameter.txt
+++ b/parameter.txt
@@ -35,13 +35,12 @@ trimOption = SLIDINGWINDOW:4:15
 trimBetter_threshold = 0.25
 trimOption = SLIDINGWINDOW:4:15
 
-[Adapter]
-Illumina Universal Adapter = TruSeq2
-
 [Fileextension]
-fastq = .fastq.gz, .fastq, .fq, .fq.gz, .bam
+fastq = .fastq.gz, .fastq, .fq, .fq.gz
+bam = .bam
 
 [Pattern]
-R1 = _R1_
-R2 = _R2_
-lane = _L\d{3}_
\ No newline at end of file
+sep=_
+R1 = R1
+R2 = R2
+lane = L\d{3}
\ No newline at end of file
diff --git a/report.tex b/report.tex
index e687aa0..65558cc 100755
--- a/report.tex
+++ b/report.tex
@@ -67,9 +67,11 @@ Processed reads: \\
 %-------------------- Summary -------------------%
 \begin{tcolorbox}
 {\large{Summary} } \\
-{{sample.nTrimmed()}} of {{sample.total_reads()}} ({{sample.pTrimmed()}}\%) reads remained after trimming \\
+{% if sample.nTrimmed() != 0 %} {{sample.nTrimmed()}} of {{sample.total_reads()}} ({{sample.pTrimmed()}}\%) reads remained after trimming \\
 {{sample.get_mean_trimRes()}} \% of fragments were shorter than read length\\
-
+{% else %}
+No trimming was performed \\
+{% endif %}
 {%if sample.mappingRes !=None%}{{sample.mappingRes.numberOfAlignedReads}} ({{sample.mappingRes.percentAlignedReads}}\%) of reads aligned to \path{ {{sample.reference}} }\\ {%endif%}
 {%if sample.krakenRes !=None%}{{sample.krakenRes.nClassified}} sequences classified ({{sample.krakenRes.pClassified}}\%) {%endif%}
 
@@ -117,12 +119,13 @@ Trimming Log: \\
            {\includegraphics[width=\textwidth]{/{{read.r1.qcRes.results[i].file}}} }
         \end{subfigure}
         \end{adjustbox}\qquad
-        \begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = {{read.trimRes.readset.r1.qcRes.results[i].color}} 2}%
-        \begin{subfigure}%
+        {% if sample.nTrimmed() != 0 %} \begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = {{read.trimRes.readset.r1.qcRes.results[i].color}} 2}%
+            \begin{subfigure}%
            {\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r1.qcRes.results[i].file}}} }
         \end{subfigure}
         \end{adjustbox}
         \caption{ {{read.trimRes.readset.r1.qcRes.results[i].name}}}
+        {% endif %}
     \end{figure}
 {% endfor %}
 
@@ -145,12 +148,13 @@ Concatenated files: \\
            {\includegraphics[width=\textwidth]{/{{read.r2.qcRes.results[i].file}}} }
         \end{subfigure}
         \end{adjustbox}\qquad
-        \begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = {{read.trimRes.readset.r2.qcRes.results[i].color}} 2}%
+        {% if sample.nTrimmed() != 0 %} \begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = {{read.trimRes.readset.r2.qcRes.results[i].color}} 2}%
         \begin{subfigure}%
             {\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r2.qcRes.results[i].file}}} }
         \end{subfigure}
         \end{adjustbox}
         \caption{ {{read.trimRes.readset.r2.qcRes.results[i].name}}}
+        {% endif %}
     \end{figure}
 {% endfor %}
 {% endif %}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qcumber.git



More information about the debian-med-commit mailing list