[med-svn] [qcumber] 01/05: New upstream version 1.0.14+dfsg

Thu Apr 20 08:12:23 UTC 2017

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository qcumber.

commit f256679c229082b397698f7aa771aa9adff17379
Author: Andreas Tille <tille at debian.org>
Date:   Thu Apr 20 09:00:39 2017 +0200

    New upstream version 1.0.14+dfsg
---
 QCumber.py | 64 ++++++++++++++++++++++++------------------------
 classes.py |  2 +-
 readme.md  | 82 +++++++++++++++++++++++++++++++++-----------------------------
 report.tex | 21 +++++-----------
 4 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/QCumber.py b/QCumber.py
index ab279cb..9aa1319 100755
--- a/QCumber.py
+++ b/QCumber.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 __author__ = 'LieuV'
-__version__ = "1.0.12"
+__version__ = "1.0.14"
 
 from classes import *
 from helper import *
@@ -37,14 +37,14 @@ def get_illumina_reads(tmp):
 	if not all([re.search(lane_pattern, x) for x in arguments["r1"]]):
 		readname = re.sub(r1_pattern + ".*", "", os.path.basename(arguments["r1"][0]))
 		if len(arguments["r1"]) != 1:
-			r1 = FastQFile(join_reads(arguments["r1"], tmp, readname + "_R1"),
-			               [toLatex(os.path.basename(x)) for x in arguments["r1"]])
+			r1 = FastQFile(join_reads(arguments["r1"], tmp, readname + "R1"),
+						   [toLatex(os.path.basename(x)) for x in arguments["r1"]])
 		else:
 			r1 = FastQFile(arguments["r1"][0])
 		if arguments["r2"]:
 			if len(arguments["r2"]) != 1:
-				r2 = FastQFile(join_reads(arguments["r2"], tmp, readname + "_R2"),
-				               [toLatex(os.path.basename(x)) for x in arguments["r2"]])
+				r2 = FastQFile(join_reads(arguments["r2"], tmp, readname + "R2"),
+							   [toLatex(os.path.basename(x)) for x in arguments["r2"]])
 			else:
 				r2 = FastQFile(arguments["r2"][0])
 			readsets.append(ReadSet(r1, r2))
@@ -58,14 +58,14 @@ def get_illumina_reads(tmp):
 			r1_reads = [x for x in arguments["r1"] if lane in x]
 			readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
 			if len(arguments["r1"]) != 1:
-				r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
+				r1 = FastQFile(join_reads(r1_reads, tmp, readname + "R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
 			else:
 				r1 = FastQFile(r1_reads[0])
 			if arguments["r2"]:
 				r2_reads = [x for x in arguments["r2"] if lane in x]
 
 				if len(r2_reads) != 1:
-					r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
+					r2 = FastQFile(join_reads(r2_reads, tmp, readname + "R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
 				else:
 					r2 = FastQFile(r2_reads[0])
 				readsets.append(ReadSet(r1,r2))
@@ -187,7 +187,7 @@ def runAnalyses(temp_bowtie_path, tmp):
 			sample = sample.add_readSet(rs)
 		if not arguments["nomapping"]:
 			if arguments["save_mapping"]:
-				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"), not arguments["notrimming"])
+				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".sam"), not arguments["notrimming"])
 			else:
 				sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null", not arguments["notrimming"])
 		if not arguments["nokraken"]:
@@ -217,11 +217,11 @@ def writeReport(sample):
 	latex.write(pdf_latex)
 	latex.close()
 
-	process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+	process = subprocess.Popen(" ".join(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"]),shell=True, stdout = subprocess.DEVNULL, stderr = subprocess.PIPE)
 	for line in iter(process.stderr.readline, b''):
 		print(line)
-
 	process.communicate()
+	#os.system(" ".join(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex > " + join(sample.mainResultsPath, "Report", "latex.log 2&")]))
 
 	for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
 		try:
@@ -318,26 +318,26 @@ def plot():
 		#
 		# Plot BOXPLOTS
 		boxplots = [{"file": "Per_sequence_quality_scores.csv",
-		             "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
-		             "title": "Per sequence quality scores",
-		             "ylab": "Mean Sequence Quality (Phred Score)",
-		             "xlab": "Sample"},
-		            {"file": "Sequence_Length_Distribution.csv",
-		             "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
-		             "title": "Sequence Length Distribution",
-		             "ylab": "Sequence Length (bp)",
-		             "xlab": "Sample"},
-		            {"file": "Per_sequence_GC_content.csv",
-		             "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
-		             "title": "Per sequence GC content",
-		             "ylab": "Mean GC content (%)",
-		             "xlab": "Sample"}]
+					 "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+					 "title": "Per sequence quality scores",
+					 "ylab": "Mean Sequence Quality (Phred Score)",
+					 "xlab": "Sample"},
+					{"file": "Sequence_Length_Distribution.csv",
+					 "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+					 "title": "Sequence Length Distribution",
+					 "ylab": "Sequence Length (bp)",
+					 "xlab": "Sample"},
+					{"file": "Per_sequence_GC_content.csv",
+					 "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+					 "title": "Per sequence GC content",
+					 "ylab": "Mean GC content (%)",
+					 "xlab": "Sample"}]
 		for plot in boxplots:
 			process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "boxplot.R"),
-			                                     join(arguments["output"], "QCResults", "Report", "src", plot["file"]),
-			                                     plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] + '"',
-			                                     '"' + plot["ylab"] + '"']),
-			                           stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+												 join(arguments["output"], "QCResults", "Report", "src", plot["file"]),
+												 plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] + '"',
+												 '"' + plot["ylab"] + '"']),
+									   stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
 			#for line in iter(process.stderr.readline, b''):
 			#	print(line)
 			process.communicate()
@@ -346,7 +346,7 @@ def plot():
 		# Plot BARPLOTS
 		process = subprocess.Popen(
 			" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"), join(arguments["output"], "QCResults/Report/src", "summary.json"),
-			          join(arguments["output"], "QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+					  join(arguments["output"], "QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
 		process.communicate()
 	except:
 		print("Couldnt plot summary")
@@ -443,7 +443,7 @@ def main(arguments):
 			"summary": [],
 			"kraken": {},
 			"versions": Pipeline().__dict__},
-	    open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
+		open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
 
 	output = join(arguments["output"], "QCResults")  # getDir([arguments["output"], "QCResults"], True)
 
@@ -537,8 +537,8 @@ if __name__ == "__main__":
 			kraken_db = config["DEFAULT"]["kraken_db"]
 
 	parser = argparse.ArgumentParser()
-	parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
-	parser.add_argument('-1' , dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
+	parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames should end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq, to find the right paired set.", required=False)
+	parser.add_argument('-1' , dest='r1', help = "input file", required=False)
 	parser.add_argument( '-2', dest='r2', help = "input file", required=False)
 
 	parser.add_argument('-output', dest='output', default="")
diff --git a/classes.py b/classes.py
index 3066dde..2cc296c 100755
--- a/classes.py
+++ b/classes.py
@@ -195,7 +195,7 @@ class FastQFile:
 	def __init__(self, absFilename, concat_files = None):
 		self.filename = absFilename
 		self.qcRes = None
-		self.log = ""
+		#self.log = ""
 		self.phred="phred33"
 		self.concat_files = None
 
diff --git a/readme.md b/readme.md
index 59bb665..da25aab 100755
--- a/readme.md
+++ b/readme.md
@@ -2,15 +2,15 @@
 Introduction 
 ------------
 
-QCumber is a tool for quality control and exploration of NGS data. The workflow is as follows:
+QCumber is a tool for quality control and exploration of NGS data. All steps can be skipped if required. The workflow is as follows:
 
-* optional: extract information from Sequence Analysis Viewer
+* extract information from Sequence Analysis Viewer
 * Quality control with FastQC
-* Trim Reads with Trimmomatic 
-* optional: run FastQC and retrim if necessary
+* Trim Reads with Trimmomatic
+* run FastQC and retrim if necessary
 * Quality control of trimmed reads with FastQC
-* optional: Map reads against reference using bowtie2
-* optional: Classify reads with Kraken
+* Map reads against reference using bowtie2
+* Classify reads with Kraken
 
 ------------
 Dependencies
@@ -31,7 +31,7 @@ Packages via pip3 install:
 R packages:
 * ggplot2
 * savR
-
+* jsonlite
 
 To change tool or adapter path, change config.txt.
 
@@ -46,47 +46,52 @@ Input parameter:
 	-i, -input		sample folder/file. If Illumina folder, files has to match pattern <Sample name>_<lane>_<R1/R2>_<number>. 
 					Eg. Sample_12_345_R1_001.fastq. Otherwise use -1,-2
 	-1 , -2         alternatively to -i: filename. Must not match Illumina names.
-	-technology		sequencing technology (Illumina/IonTorrent)
+    -adapter        adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
 
 Options:
-
+    -technology     		sequencing technology (Illumina/IonTorrent). Use Illumina if files are fastq
 	-output		            output folder, default: input folder
 	-reference              reference file
-	-adapter                adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
+	-threads                number of threads
+
 	-sav 					Sequence Analysis Viewer folder. Requires Interop folder, RunInfo.xml and RunParameter.xml
-	-threads                threads. Default:4
-	-palindrome				palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
-	-db                     Kraken database
-	-trimOption             Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. 
+	-rename                 Rename sample names in report. TSV File with two columns: <old sample name> <new sample name>
+	-parameters             Use own standard parameter.
+	-trimOption             Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>.
                             default: SLIDINGWINDOW:4:15
 	-trimBetter				Optimize trimming parameter using 'Per sequence base content' from fastqc
 	-trimBetter_threshold	Threshold for 'Per sequence base content' fluctuation. Default:0.15
 	-forAssembly			Trim parameter are optimized for assemblies (trim more aggressive).
 	-forMapping				Trim parameter are optimized for mapping(allow more errors).
 	-minlen                 Minlen parameter for Trimmomatic. Default:50
+	-palindrome				palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
+    -gz                     Output trimmed files as .gz
+
+	-db                     Kraken database
+	-nokraken				skip Kraken
 	-index					Bowtie2 index if available
 	-save_mapping           Save sam files
-	-nokraken				skip Kraken
 	-nomapping				skip mapping
-	
-	-version                Get version
+    -notrimming             skip trimming
+
+    -version                Get version
 
 Output:
 
 <Sample/Output Folder>
-|-- QCResult
-  |-- Report
-	  |-- PDF report per sample
-	  |-- HTML report for entire project
-	  |-- src
-	      |-- img
-	           |-- Summary images
-  |-- FastQC
-      |-- <output folder(s) from FastQC>
-  |-- Trimmed
-      |-- <trimmed reads>
-      |-- FastQC
-          |-- <output folder(s) from FastQC>
+* QCResult
+    * Report
+        - PDF report per sample
+        - HTML report for entire project
+        * src
+            * img
+                - Summary images
+    * FastQC
+        - <output folder(s) from FastQC>
+    * Trimmed
+        - <trimmed reads>
+        * FastQC
+            - <output folder(s) from FastQC>
 
 -------------------
 Program Description
@@ -94,14 +99,15 @@ Program Description
 
 This project consists of 6 files:
 
-QCumber.py		main script for running complete pipeline
-classes.py		script containing classes
-helper.py		small helper functions
-report.tex		Template for sample reports
-config.txt      configuration for Kraken and Trimmomatic
-boxplot.R		boxplots of fastqc output for batch report 
-paramter.txt	default parameter
-config.txt		tool location
+* QCumber.py		main script for running complete pipeline
+* classes.py		script containing classes
+* helper.py		    small helper functions
+* report.tex		Template for sample reports
+* batch_report.html Template for batch report
+* config.txt        path to tools and adapter file
+* boxplot.R		    boxplots of fastqc output for batch report
+* barplot.R         barplots of read statistics
+* parameter.txt	    default parameter for trimming, set pattern for Illumina names,..
 
 
 -------
diff --git a/report.tex b/report.tex
index 65558cc..c0059e4 100755
--- a/report.tex
+++ b/report.tex
@@ -30,7 +30,7 @@
 \begin{document}
 
 {\bf {\LARGE{ {{pipeline.name}} } Version {{pipeline.version}}   } }\\
-\line(1,0){ \textwidth }
+\line(1,0){\textwidth} 
 
 \begin{tabular}{p{0.25\textwidth} p{0.75\textwidth}}
 
@@ -55,7 +55,7 @@ Trimmomatic: & {{pipeline.trimmo_version}}\\
 \end{tabular}\\
 
 %----------------- Workflow -------------------%
-\line(1,0){ \textwidth } \\
+\line(1,0){\textwidth} \\
 Processed reads: \\
 {%for read in sample.readSets %}
 {{read.r1.get_filename()}}
@@ -77,7 +77,7 @@ No trimming was performed \\
 
 \end{tcolorbox}
 
-\line(1,0){\textwidth} \\
+\line(1,0){\textwidth}  \\
 \vspace{5mm}
 
 %-------------------- FASTQC Results -------------------%
@@ -97,20 +97,11 @@ Concatenated files:\\
 {%endfor%}
 \end{itemize}
 {%endif%}
-{{read.r1.log}} \\
 
 Trimming Log: \\
 \textcolor{gray}{Using parameter:  {{trim_param}} }\\
 {{read.trimRes.logs}} \\
 
-{% if read.trimRes.blobplot != "" %}
-     \begin{figure}[H]
-     \centering
-    {\includegraphics[width=0.8 \textwidth]{/{{read.trimRes.blobplot}}} }
-    \caption{Blobplot}
-     \end{figure}
-{% endif %}
-%
 {% for i in range(read.r1.qcRes.results|length) %}
     \begin{figure}[H]
         \centering
@@ -139,7 +130,7 @@ Concatenated files: \\
 {%endfor%}
 \end{itemize}
 {%endif%}
-{{read.r2.log}}
+
 {% for i in range(read.r2.qcRes.results|length) %}
     \begin{figure}[H]
         \centering
@@ -163,7 +154,7 @@ Concatenated files: \\
 
 %-------------------- Bowtie Results -------------------%
 {%if sample.mappingRes != None%}
-\line(1,0){\textwidth}
+\line(1,0){\textwidth} 
 \vspace{5mm}
 
 {\Large{Bowtie2} } - Map against \path{ {{sample.reference}} } \\
@@ -171,7 +162,7 @@ Concatenated files: \\
 {%endif%}
 %-------------------- Kraken Results -------------------%
 {%if sample.krakenRes != None%}
-\line(1,0){\textwidth} \\
+\line(1,0){\textwidth} 
 \vspace{5mm}
 
 {\Large{Kraken} } \\

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qcumber.git