[med-svn] [qcumber] 01/05: New upstream version 1.0.14+dfsg
Andreas Tille
tille at debian.org
Thu Apr 20 08:12:23 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository qcumber.
commit f256679c229082b397698f7aa771aa9adff17379
Author: Andreas Tille <tille at debian.org>
Date: Thu Apr 20 09:00:39 2017 +0200
New upstream version 1.0.14+dfsg
---
QCumber.py | 64 ++++++++++++++++++++++++------------------------
classes.py | 2 +-
readme.md | 82 +++++++++++++++++++++++++++++++++-----------------------------
report.tex | 21 +++++-----------
4 files changed, 83 insertions(+), 86 deletions(-)
diff --git a/QCumber.py b/QCumber.py
index ab279cb..9aa1319 100755
--- a/QCumber.py
+++ b/QCumber.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
__author__ = 'LieuV'
-__version__ = "1.0.12"
+__version__ = "1.0.14"
from classes import *
from helper import *
@@ -37,14 +37,14 @@ def get_illumina_reads(tmp):
if not all([re.search(lane_pattern, x) for x in arguments["r1"]]):
readname = re.sub(r1_pattern + ".*", "", os.path.basename(arguments["r1"][0]))
if len(arguments["r1"]) != 1:
- r1 = FastQFile(join_reads(arguments["r1"], tmp, readname + "_R1"),
- [toLatex(os.path.basename(x)) for x in arguments["r1"]])
+ r1 = FastQFile(join_reads(arguments["r1"], tmp, readname + "R1"),
+ [toLatex(os.path.basename(x)) for x in arguments["r1"]])
else:
r1 = FastQFile(arguments["r1"][0])
if arguments["r2"]:
if len(arguments["r2"]) != 1:
- r2 = FastQFile(join_reads(arguments["r2"], tmp, readname + "_R2"),
- [toLatex(os.path.basename(x)) for x in arguments["r2"]])
+ r2 = FastQFile(join_reads(arguments["r2"], tmp, readname + "R2"),
+ [toLatex(os.path.basename(x)) for x in arguments["r2"]])
else:
r2 = FastQFile(arguments["r2"][0])
readsets.append(ReadSet(r1, r2))
@@ -58,14 +58,14 @@ def get_illumina_reads(tmp):
r1_reads = [x for x in arguments["r1"] if lane in x]
readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0]))
if len(arguments["r1"]) != 1:
- r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads] )
+ r1 = FastQFile(join_reads(r1_reads, tmp, readname + "R1"), [toLatex(os.path.basename(x)) for x in r1_reads] )
else:
r1 = FastQFile(r1_reads[0])
if arguments["r2"]:
r2_reads = [x for x in arguments["r2"] if lane in x]
if len(r2_reads) != 1:
- r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
+ r2 = FastQFile(join_reads(r2_reads, tmp, readname + "R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
else:
r2 = FastQFile(r2_reads[0])
readsets.append(ReadSet(r1,r2))
@@ -187,7 +187,7 @@ def runAnalyses(temp_bowtie_path, tmp):
sample = sample.add_readSet(rs)
if not arguments["nomapping"]:
if arguments["save_mapping"]:
- sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"), not arguments["notrimming"])
+ sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".sam"), not arguments["notrimming"])
else:
sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null", not arguments["notrimming"])
if not arguments["nokraken"]:
@@ -217,11 +217,11 @@ def writeReport(sample):
latex.write(pdf_latex)
latex.close()
- process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+ process = subprocess.Popen(" ".join(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"]),shell=True, stdout = subprocess.DEVNULL, stderr = subprocess.PIPE)
for line in iter(process.stderr.readline, b''):
print(line)
-
process.communicate()
+ #os.system(" ".join(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex > " + join(sample.mainResultsPath, "Report", "latex.log 2&")]))
for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"):
try:
@@ -318,26 +318,26 @@ def plot():
#
# Plot BOXPLOTS
boxplots = [{"file": "Per_sequence_quality_scores.csv",
- "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
- "title": "Per sequence quality scores",
- "ylab": "Mean Sequence Quality (Phred Score)",
- "xlab": "Sample"},
- {"file": "Sequence_Length_Distribution.csv",
- "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
- "title": "Sequence Length Distribution",
- "ylab": "Sequence Length (bp)",
- "xlab": "Sample"},
- {"file": "Per_sequence_GC_content.csv",
- "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
- "title": "Per sequence GC content",
- "ylab": "Mean GC content (%)",
- "xlab": "Sample"}]
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+ "title": "Per sequence quality scores",
+ "ylab": "Mean Sequence Quality (Phred Score)",
+ "xlab": "Sample"},
+ {"file": "Sequence_Length_Distribution.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+ "title": "Sequence Length Distribution",
+ "ylab": "Sequence Length (bp)",
+ "xlab": "Sample"},
+ {"file": "Per_sequence_GC_content.csv",
+ "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+ "title": "Per sequence GC content",
+ "ylab": "Mean GC content (%)",
+ "xlab": "Sample"}]
for plot in boxplots:
process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "boxplot.R"),
- join(arguments["output"], "QCResults", "Report", "src", plot["file"]),
- plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] + '"',
- '"' + plot["ylab"] + '"']),
- stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+ join(arguments["output"], "QCResults", "Report", "src", plot["file"]),
+ plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] + '"',
+ '"' + plot["ylab"] + '"']),
+ stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
#for line in iter(process.stderr.readline, b''):
# print(line)
process.communicate()
@@ -346,7 +346,7 @@ def plot():
# Plot BARPLOTS
process = subprocess.Popen(
" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"), join(arguments["output"], "QCResults/Report/src", "summary.json"),
- join(arguments["output"], "QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+ join(arguments["output"], "QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
process.communicate()
except:
print("Couldnt plot summary")
@@ -443,7 +443,7 @@ def main(arguments):
"summary": [],
"kraken": {},
"versions": Pipeline().__dict__},
- open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
+ open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
output = join(arguments["output"], "QCResults") # getDir([arguments["output"], "QCResults"], True)
@@ -537,8 +537,8 @@ if __name__ == "__main__":
kraken_db = config["DEFAULT"]["kraken_db"]
parser = argparse.ArgumentParser()
- parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False)
- parser.add_argument('-1' , dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False)
+ parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames should end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq, to find the right paired set.", required=False)
+ parser.add_argument('-1' , dest='r1', help = "input file", required=False)
parser.add_argument( '-2', dest='r2', help = "input file", required=False)
parser.add_argument('-output', dest='output', default="")
diff --git a/classes.py b/classes.py
index 3066dde..2cc296c 100755
--- a/classes.py
+++ b/classes.py
@@ -195,7 +195,7 @@ class FastQFile:
def __init__(self, absFilename, concat_files = None):
self.filename = absFilename
self.qcRes = None
- self.log = ""
+ #self.log = ""
self.phred="phred33"
self.concat_files = None
diff --git a/readme.md b/readme.md
index 59bb665..da25aab 100755
--- a/readme.md
+++ b/readme.md
@@ -2,15 +2,15 @@
Introduction
------------
-QCumber is a tool for quality control and exploration of NGS data. The workflow is as follows:
+QCumber is a tool for quality control and exploration of NGS data. All steps can be skipped if required. The workflow is as follows:
-* optional: extract information from Sequence Analysis Viewer
+* extract information from Sequence Analysis Viewer
* Quality control with FastQC
-* Trim Reads with Trimmomatic
-* optional: run FastQC and retrim if necessary
+* Trim Reads with Trimmomatic
+* run FastQC and retrim if necessary
* Quality control of trimmed reads with FastQC
-* optional: Map reads against reference using bowtie2
-* optional: Classify reads with Kraken
+* Map reads against reference using bowtie2
+* Classify reads with Kraken
------------
Dependencies
@@ -31,7 +31,7 @@ Packages via pip3 install:
R packages:
* ggplot2
* savR
-
+* jsonlite
To change tool or adapter path, change config.txt.
@@ -46,47 +46,52 @@ Input parameter:
-i, -input sample folder/file. If Illumina folder, files has to match pattern <Sample name>_<lane>_<R1/R2>_<number>.
Eg. Sample_12_345_R1_001.fastq. Otherwise use -1,-2
-1 , -2 alternatively to -i: filename. Must not match Illumina names.
- -technology sequencing technology (Illumina/IonTorrent)
+ -adapter adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
Options:
-
+ -technology sequencing technology (Illumina/IonTorrent). Use Illumina if files are fastq
-output output folder, default: input folder
-reference reference file
- -adapter adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
+ -threads number of threads
+
-sav Sequence Analysis Viewer folder. Requires Interop folder, RunInfo.xml and RunParameter.xml
- -threads threads. Default:4
- -palindrome palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
- -db Kraken database
- -trimOption Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>.
+ -rename Rename sample names in report. TSV File with two columns: <old sample name> <new sample name>
+ -parameters Use own standard parameter.
+ -trimOption Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>.
default: SLIDINGWINDOW:4:15
-trimBetter Optimize trimming parameter using 'Per sequence base content' from fastqc
-trimBetter_threshold Threshold for 'Per sequence base content' fluctuation. Default:0.15
-forAssembly Trim parameter are optimized for assemblies (trim more aggressive).
-forMapping Trim parameter are optimized for mapping(allow more errors).
-minlen Minlen parameter for Trimmomatic. Default:50
+ -palindrome palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30
+ -gz Output trimmed files as .gz
+
+ -db Kraken database
+ -nokraken skip Kraken
-index Bowtie2 index if available
-save_mapping Save sam files
- -nokraken skip Kraken
-nomapping skip mapping
-
- -version Get version
+ -notrimming skip trimming
+
+ -version Get version
Output:
<Sample/Output Folder>
-|-- QCResult
- |-- Report
- |-- PDF report per sample
- |-- HTML report for entire project
- |-- src
- |-- img
- |-- Summary images
- |-- FastQC
- |-- <output folder(s) from FastQC>
- |-- Trimmed
- |-- <trimmed reads>
- |-- FastQC
- |-- <output folder(s) from FastQC>
+* QCResult
+ * Report
+ - PDF report per sample
+ - HTML report for entire project
+ * src
+ * img
+ - Summary images
+ * FastQC
+ - <output folder(s) from FastQC>
+ * Trimmed
+ - <trimmed reads>
+ * FastQC
+ - <output folder(s) from FastQC>
-------------------
Program Description
@@ -94,14 +99,15 @@ Program Description
This project consists of 6 files:
-QCumber.py main script for running complete pipeline
-classes.py script containing classes
-helper.py small helper functions
-report.tex Template for sample reports
-config.txt configuration for Kraken and Trimmomatic
-boxplot.R boxplots of fastqc output for batch report
-paramter.txt default parameter
-config.txt tool location
+* QCumber.py main script for running complete pipeline
+* classes.py script containing classes
+* helper.py small helper functions
+* report.tex Template for sample reports
+* batch_report.html Template for batch report
+* config.txt path to tools and adapter file
+* boxplot.R boxplots of fastqc output for batch report
+* barplot.R barplots of read statistics
+* parameter.txt default parameter for trimming, set pattern for Illumina names,..
-------
diff --git a/report.tex b/report.tex
index 65558cc..c0059e4 100755
--- a/report.tex
+++ b/report.tex
@@ -30,7 +30,7 @@
\begin{document}
{\bf {\LARGE{ {{pipeline.name}} } Version {{pipeline.version}} } }\\
-\line(1,0){ \textwidth }
+\line(1,0){\textwidth}
\begin{tabular}{p{0.25\textwidth} p{0.75\textwidth}}
@@ -55,7 +55,7 @@ Trimmomatic: & {{pipeline.trimmo_version}}\\
\end{tabular}\\
%----------------- Workflow -------------------%
-\line(1,0){ \textwidth } \\
+\line(1,0){\textwidth} \\
Processed reads: \\
{%for read in sample.readSets %}
{{read.r1.get_filename()}}
@@ -77,7 +77,7 @@ No trimming was performed \\
\end{tcolorbox}
-\line(1,0){\textwidth} \\
+\line(1,0){\textwidth} \\
\vspace{5mm}
%-------------------- FASTQC Results -------------------%
@@ -97,20 +97,11 @@ Concatenated files:\\
{%endfor%}
\end{itemize}
{%endif%}
-{{read.r1.log}} \\
Trimming Log: \\
\textcolor{gray}{Using parameter: {{trim_param}} }\\
{{read.trimRes.logs}} \\
-{% if read.trimRes.blobplot != "" %}
- \begin{figure}[H]
- \centering
- {\includegraphics[width=0.8 \textwidth]{/{{read.trimRes.blobplot}}} }
- \caption{Blobplot}
- \end{figure}
-{% endif %}
-%
{% for i in range(read.r1.qcRes.results|length) %}
\begin{figure}[H]
\centering
@@ -139,7 +130,7 @@ Concatenated files: \\
{%endfor%}
\end{itemize}
{%endif%}
-{{read.r2.log}}
+
{% for i in range(read.r2.qcRes.results|length) %}
\begin{figure}[H]
\centering
@@ -163,7 +154,7 @@ Concatenated files: \\
%-------------------- Bowtie Results -------------------%
{%if sample.mappingRes != None%}
-\line(1,0){\textwidth}
+\line(1,0){\textwidth}
\vspace{5mm}
{\Large{Bowtie2} } - Map against \path{ {{sample.reference}} } \\
@@ -171,7 +162,7 @@ Concatenated files: \\
{%endif%}
%-------------------- Kraken Results -------------------%
{%if sample.krakenRes != None%}
-\line(1,0){\textwidth} \\
+\line(1,0){\textwidth}
\vspace{5mm}
{\Large{Kraken} } \\
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qcumber.git
More information about the debian-med-commit
mailing list