[med-svn] [snakemake] 01/03: New upstream version 3.9.0+dfsg
Kevin Murray
daube-guest at moszumanska.debian.org
Tue Dec 6 22:06:07 UTC 2016
This is an automated email from the git hooks/post-receive script.
daube-guest pushed a commit to branch master
in repository snakemake.
commit a51fb0f3aefa32b6410d26f1effca4c87555773e
Author: Kevin Murray <kdmfoss at gmail.com>
Date: Wed Dec 7 09:00:44 2016 +1100
New upstream version 3.9.0+dfsg
---
CHANGELOG.md | 15 ++
biglogo.pdf | Bin 0 -> 3806 bytes
biglogo.png | Bin 0 -> 17334 bytes
biglogo.svg | 141 +++++++++++
bitbucket-pipelines.yml | 1 -
environment.yml | 4 +
setup.py | 12 +-
snakemake/__init__.py | 47 +++-
snakemake/common.py | 10 +
snakemake/conda.py | 50 ++++
snakemake/dag.py | 22 +-
snakemake/exceptions.py | 8 +
snakemake/executors.py | 131 ++++++----
snakemake/futures.py | 36 ---
snakemake/io.py | 98 ++++++--
snakemake/jobs.py | 56 ++++-
snakemake/logging.py | 53 ++--
snakemake/parser.py | 110 ++++----
snakemake/persistence.py | 20 +-
snakemake/remote/HTTP.py | 14 +-
snakemake/remote/__init__.py | 4 +-
snakemake/report.py | 46 ++--
snakemake/rules.py | 53 +++-
snakemake/scheduler.py | 18 +-
snakemake/script.py | 160 +++++++-----
snakemake/shell.py | 8 +-
snakemake/version.py | 4 +-
snakemake/workflow.py | 45 +++-
snakemake/wrapper.py | 38 ++-
tests/test01/Snakefile | 2 +-
tests/test05/Snakefile | 4 +
tests/test09/Snakefile | 3 +
tests/test13/Snakefile | 4 +
tests/test14/Snakefile.nonstandard | 4 +
tests/test_ancient/Snakefile | 28 +++
tests/test_ancient/expected-results/A | 0
tests/test_ancient/expected-results/B | 0
tests/test_ancient/expected-results/C | 1 +
tests/test_ancient/expected-results/D | 1 +
tests/test_conda/Snakefile | 11 +
tests/test_conda/expected-results/test0.out | 279 +++++++++++++++++++++
tests/test_conda/expected-results/test1.out | 279 +++++++++++++++++++++
tests/test_conda/expected-results/test2.out | 279 +++++++++++++++++++++
tests/test_conda/test-env.yaml | 4 +
tests/test_delete_output/Snakefile | 52 +++-
tests/test_delete_output/nosuchfile | 0
tests/test_get_log_both/test.out | 2 -
tests/test_get_log_complex/test.out | 2 -
tests/test_get_log_stderr/test.out | 2 -
tests/test_get_log_stdout/test.out | 2 -
tests/test_issue381/Snakefile | 21 ++
tests/test_issue381/a.in | 0
tests/test_issue381/expected-results/b.out | 0
tests/test_persistent_dict/Snakefile | 44 ++--
tests/test_rule_defined_in_for_loop/Snakefile | 12 +
.../expected-results/iteration-01.txt | 0
.../expected-results/iteration-02.txt | 0
.../test_rule_defined_in_for_loop/iteration-01.txt | 0
tests/test_wrapper/Snakefile | 7 +
tests/test_wrapper/expected-results/test.vcf.gz | Bin 0 -> 438 bytes
tests/test_wrapper/test.vcf | 15 ++
tests/tests.py | 44 +++-
62 files changed, 1915 insertions(+), 391 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 639b25c..5accd50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,20 @@
# Change Log
+## [3.9.0] - 2016-11-15
+### Added
+- Ability to define isolated conda software environments (YAML) per rule. Environment will be deployed by Snakemake upon workflow execution.
+- Command line argument --wrapper-prefix in order to overwrite the default URL for looking up wrapper scripts.
+### Changed
+- --summary now displays the log files correspoding to each output file.
+- Fixed hangups when using run directive and a large number of jobs
+- Fixed pickling errors with anonymous rules and run directive.
+- Various small bug fixes
+
+## [3.8.2] - 2016-09-23
+### Changed
+- Add missing import in rules.py.
+- Use threading only in cluster jobs.
+
## [3.8.1] - 2016-09-14
### Changed
- Snakemake now warns when using relative paths starting with "./".
diff --git a/biglogo.pdf b/biglogo.pdf
new file mode 100644
index 0000000..572fea5
Binary files /dev/null and b/biglogo.pdf differ
diff --git a/biglogo.png b/biglogo.png
new file mode 100644
index 0000000..c45e72f
Binary files /dev/null and b/biglogo.png differ
diff --git a/biglogo.svg b/biglogo.svg
new file mode 100644
index 0000000..99f03a8
--- /dev/null
+++ b/biglogo.svg
@@ -0,0 +1,141 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="74.290588mm"
+ height="9.8288574mm"
+ viewBox="0 0 263.23437 34.82666"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.91 r13725"
+ sodipodi:docname="biglogo.svg">
+ <defs
+ id="defs4" />
+ <sodipodi:namedview
+ id="base"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:pageopacity="0.0"
+ inkscape:pageshadow="2"
+ inkscape:zoom="1.979899"
+ inkscape:cx="173.26643"
+ inkscape:cy="24.495162"
+ inkscape:document-units="px"
+ inkscape:current-layer="text3336"
+ showgrid="false"
+ fit-margin-top="0"
+ fit-margin-left="0"
+ fit-margin-right="0"
+ fit-margin-bottom="0"
+ inkscape:window-width="1600"
+ inkscape:window-height="845"
+ inkscape:window-x="0"
+ inkscape:window-y="27"
+ inkscape:window-maximized="1" />
+ <metadata
+ id="metadata7">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1"
+ transform="translate(-40.109165,-138.17274)">
+ <g
+ style="font-style:normal;font-weight:normal;font-size:16.25px;line-height:125%;font-family:sans-serif;letter-spacing:1px;word-spacing:0px;fill:#4d4d4d;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ id="text3336">
+ <g
+ id="g3379">
+ <path
+ id="path3361"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 61.224888,140.63368 0,4.32862 q -2.526856,-1.2085 -4.768067,-1.80176 -2.24121,-0.59326 -4.328613,-0.59326 -3.625488,0 -5.603027,1.40625 -1.955567,1.40625 -1.955567,3.99902 0,2.17529 1.296387,3.2959 1.31836,1.09863 4.96582,1.77978 l 2.680665,0.54932 q 4.96582,0.94482 7.316894,3.33984 2.373047,2.37305 2.373047,6.37207 0,4.76807 -3.208008,7.22901 -3.186035,2.46093 -9.360351,2.46093 -2.329102,0 -4.965821,-0.52734 -2.614746,-0.52734 -5.427246,-1.56006 l 0,-4.57031 q 2.702637,1 [...]
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3363"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 91.404575,157.50868 0,14.85352 -4.042968,0 0,-14.72168 q 0,-3.49365 -1.362305,-5.22949 -1.362305,-1.73584 -4.086914,-1.73584 -3.273926,0 -5.163574,2.0874 -1.889649,2.0874 -1.889649,5.69092 l 0,13.90869 -4.064941,0 0,-24.60938 4.064941,0 0,3.82325 q 1.450196,-2.21924 3.405762,-3.31788 1.977539,-1.09863 4.54834,-1.09863 4.240722,0 6.416015,2.63672 2.175293,2.61475 2.175293,7.7124 z"
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3365"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 111.69657,159.99159 q -4.8999,0 -6.78955,1.12061 -1.88965,1.1206 -1.88965,3.82324 0,2.15332 1.40625,3.42773 1.42822,1.25245 3.86719,1.25245 3.36181,0 5.3833,-2.37305 2.04345,-2.39502 2.04345,-6.3501 l 0,-0.90088 -4.02099,0 z m 8.06396,-1.66992 0,14.04053 -4.04297,0 0,-3.73535 q -1.38427,2.24121 -3.4497,3.31787 -2.06543,1.05468 -5.05371,1.05468 -3.7793,0 -6.02051,-2.10937 -2.21924,-2.13135 -2.21924,-5.69092 0,-4.15283 2.76855,-6.26221 2.79053,-2.10937 8.30567,-2.10937 l 5. [...]
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3367"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 128.95633,138.17274 4.06494,0 0,20.19288 12.06299,-10.6128 5.16358,0 -13.05176,11.51367 13.60107,13.09571 -5.27343,0 -12.50245,-12.01905 0,12.01905 -4.06494,0 0,-34.18946 z"
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3369"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 175.68143,159.04677 0,1.97754 -18.58887,0 q 0.26368,4.1748 2.50489,6.37207 2.26318,2.17529 6.28418,2.17529 2.3291,0 4.50439,-0.57129 2.19727,-0.57129 4.35059,-1.71387 l 0,3.82325 q -2.1753,0.92285 -4.46045,1.40625 -2.28516,0.48339 -4.63623,0.48339 -5.88867,0 -9.33838,-3.42773 -3.42774,-3.42773 -3.42774,-9.27246 0,-6.04248 3.25196,-9.58008 3.27392,-3.55957 8.81103,-3.55957 4.96582,0 7.84424,3.20801 2.90039,3.18603 2.90039,8.6792 z m -4.04297,-1.18653 q -0.0439,-3.31787 -1. [...]
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3371"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 202.47733,152.47694 q 1.51611,-2.72461 3.62549,-4.02099 2.10937,-1.29639 4.96582,-1.29639 3.84521,0 5.93262,2.70264 2.0874,2.68066 2.0874,7.64648 l 0,14.85352 -4.06494,0 0,-14.72168 q 0,-3.5376 -1.25245,-5.25147 -1.25244,-1.71386 -3.82324,-1.71386 -3.14209,0 -4.96582,2.0874 -1.82373,2.0874 -1.82373,5.69092 l 0,13.90869 -4.06494,0 0,-14.72168 q 0,-3.55957 -1.25244,-5.25147 -1.25244,-1.71386 -3.86719,-1.71386 -3.09814,0 -4.92187,2.10937 -1.82373,2.0874 -1.82373,5.66895 l 0, [...]
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3373"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 239.35868,159.99159 q -4.89991,0 -6.78955,1.12061 -1.88965,1.1206 -1.88965,3.82324 0,2.15332 1.40625,3.42773 1.42822,1.25245 3.86719,1.25245 3.36181,0 5.3833,-2.37305 2.04345,-2.39502 2.04345,-6.3501 l 0,-0.90088 -4.02099,0 z m 8.06396,-1.66992 0,14.04053 -4.04297,0 0,-3.73535 q -1.38427,2.24121 -3.4497,3.31787 -2.06543,1.05468 -5.05371,1.05468 -3.7793,0 -6.02051,-2.10937 -2.21924,-2.13135 -2.21924,-5.69092 0,-4.15283 2.76855,-6.26221 2.79053,-2.10937 8.30567,-2.10937 l 5 [...]
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3375"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 256.61844,138.17274 4.06494,0 0,20.19288 12.06299,-10.6128 5.16358,0 -13.05176,11.51367 13.60107,13.09571 -5.27343,0 -12.50245,-12.01905 0,12.01905 -4.06494,0 0,-34.18946 z"
+ inkscape:connector-curvature="0" />
+ <path
+ id="path3377"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#4d4d4d"
+ d="m 303.34354,159.04677 0,1.97754 -18.58887,0 q 0.26367,4.1748 2.50489,6.37207 2.26318,2.17529 6.28418,2.17529 2.3291,0 4.50439,-0.57129 2.19727,-0.57129 4.35059,-1.71387 l 0,3.82325 q -2.1753,0.92285 -4.46045,1.40625 -2.28516,0.48339 -4.63623,0.48339 -5.88868,0 -9.33838,-3.42773 -3.42774,-3.42773 -3.42774,-9.27246 0,-6.04248 3.25196,-9.58008 3.27392,-3.55957 8.81103,-3.55957 4.96582,0 7.84424,3.20801 2.90039,3.18603 2.90039,8.6792 z m -4.04297,-1.18653 q -0.0439,-3.31787 -1. [...]
+ inkscape:connector-curvature="0" />
+ </g>
+ </g>
+ <g
+ id="g3354"
+ transform="matrix(0.05703454,-0.32943954,0.32943954,0.05703454,24.362893,174.16199)">
+ <ellipse
+ id="path3761"
+ style="fill:#535353;fill-opacity:1;stroke-width:10;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
+ cx="113.13708"
+ cy="87.056862"
+ rx="11.111678"
+ ry="17.172592" />
+ <g
+ style="stroke:#535353;stroke-opacity:1"
+ transform="matrix(0.99202526,-0.12603922,0.12603922,0.99202526,-100.21414,-26.322478)"
+ id="g3788">
+ <path
+ style="fill:none;stroke:#535353;stroke-width:3;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+ d="m 195.96959,151.30611 c 0,0 -1.47462,16.22927 -7.78807,18.5863"
+ id="path3765"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ sodipodi:nodetypes="cc"
+ inkscape:connector-curvature="0"
+ id="path3786"
+ d="m 195.80643,151.56319 c 0,0 -3.70358,15.86969 1.54841,20.09246"
+ style="fill:none;stroke:#535353;stroke-width:3;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+ </g>
+ </g>
+ </g>
+</svg>
diff --git a/bitbucket-pipelines.yml b/bitbucket-pipelines.yml
index 50041c7..409eb37 100644
--- a/bitbucket-pipelines.yml
+++ b/bitbucket-pipelines.yml
@@ -3,7 +3,6 @@ pipelines:
default:
- step:
script:
- - conda update -y conda conda-env
- conda env create --file environment.yml --name snakemake
- source activate snakemake
- python setup.py nosetests
diff --git a/environment.yml b/environment.yml
index 0db7017..de19541 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,6 +1,8 @@
channels:
- bioconda
- r
+ - anaconda
+ - conda-forge
dependencies:
- rpy2 >=0.7.6
- boto
@@ -15,3 +17,5 @@ dependencies:
- dropbox
- numpy
- appdirs
+ - snakemake
+ - pytools
diff --git a/setup.py b/setup.py
index 22cda87..132630e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,5 @@
+# -*- coding: UTF-8 -*-
+
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2015, Johannes Köster"
__email__ = "koester at jimmy.harvard.edu"
@@ -8,6 +10,10 @@ from setuptools.command.test import test as TestCommand
import sys
+# load version info
+exec(open("snakemake/version.py").read())
+
+
if sys.version_info < (3, 3):
print("At least Python 3.3 is required.\n", file=sys.stderr)
exit(1)
@@ -21,10 +27,6 @@ except ImportError:
exit(1)
-# load version info
-exec(open("snakemake/version.py").read())
-
-
class NoseTestCommand(TestCommand):
user_options = [
('test-suite=', 's', "Test to run (e.g. test_shadow)")
@@ -61,7 +63,7 @@ setup(
},
package_data={'': ['*.css', '*.sh', '*.html']},
install_requires=['wrapt',],
- tests_require=['pytools', 'rpy2', 'httpretty==0.8.10', 'docutils', 'nose>=1.3', 'boto>=2.38.0', 'filechunkio>=1.6',
+ tests_require=['pytools', 'rpy2', 'httpretty==0.8.10', 'docutils', 'nose>=1.3', 'boto>=2.38.0', 'filechunkio>=1.6',
'moto>=0.4.14', 'ftputil>=3.2', 'pysftp>=0.2.8', 'requests>=2.8.1', 'dropbox>=5.2', 'pyyaml'],
test_suite='all',
cmdclass={'test': NoseTestCommand},
diff --git a/snakemake/__init__.py b/snakemake/__init__.py
index 13f7a3c..e04f2e4 100644
--- a/snakemake/__init__.py
+++ b/snakemake/__init__.py
@@ -23,6 +23,7 @@ from snakemake.version import __version__
from snakemake.io import load_configfile
from snakemake.shell import shell
from snakemake.utils import update_config, available_cpu_count
+from snakemake.common import Mode
def snakemake(snakefile,
listrules=False,
@@ -95,7 +96,10 @@ def snakemake(snakefile,
keep_logger=False,
max_jobs_per_second=None,
verbose=False,
- force_use_threads=False):
+ force_use_threads=False,
+ use_conda=False,
+ mode=Mode.default,
+ wrapper_prefix=None):
"""Run snakemake on a given snakefile.
This function provides access to the whole snakemake functionality. It is not thread-safe.
@@ -164,10 +168,13 @@ def snakemake(snakefile,
greediness (float): set the greediness of scheduling. This value between 0 and 1 determines how careful jobs are selected for execution. The default value (0.5 if prioritytargets are used, 1.0 else) provides the best speed and still acceptable scheduling quality.
overwrite_shellcmd (str): a shell command that shall be executed instead of those given in the workflow. This is for debugging purposes only.
updated_files(list): a list that will be filled with the files that are updated or created during the workflow execution
- verbose(bool): show additional debug output (default False)
+ verbose (bool): show additional debug output (default False)
log_handler (function): redirect snakemake output to this custom log handler, a function that takes a log message dictionary (see below) as its only argument (default None). The log message dictionary for the log handler has to following entries:
max_jobs_per_second: maximal number of cluster/drmaa jobs per second, None to impose no limit (default None)
- force_use_threads: whether to force use of threads over processes. helpful if shared memory is full or unavailable (default False)
+ force_use_threads: whether to force use of threads over processes. helpful if shared memory is full or unavailable (default False)
+ use_conda (bool): create conda environments for each job (defined with conda directive of rules)
+ mode (snakemake.common.Mode): Execution mode
+ wrapper_prefix (str): Prefix for wrapper script URLs (default None)
:level:
the log level ("info", "error", "debug", "progress", "job_info")
@@ -244,10 +251,11 @@ def snakemake(snakefile,
printreason=printreason,
printshellcmds=printshellcmds,
nocolor=nocolor,
- stdout=dryrun,
+ stdout=dryrun and not (printdag or printd3dag or printrulegraph),
debug=verbose,
timestamp=timestamp,
- use_threads=use_threads)
+ use_threads=use_threads,
+ mode=mode)
if greediness is None:
greediness = 0.5 if prioritytargets else 1.0
@@ -294,7 +302,10 @@ def snakemake(snakefile,
overwrite_configfile=configfile,
overwrite_clusterconfig=cluster_config,
config_args=config_args,
- debug=debug)
+ debug=debug,
+ use_conda=use_conda,
+ mode=mode,
+ wrapper_prefix=wrapper_prefix)
success = True
try:
workflow.include(snakefile,
@@ -888,12 +899,31 @@ def get_argument_parser():
"Profile Snakemake and write the output to FILE. This requires yappi "
"to be installed.")
parser.add_argument(
+ "--mode",
+ choices=[Mode.default, Mode.subprocess, Mode.cluster],
+ default=Mode.default,
+ type=int,
+ help="Set execution mode of Snakemake (internal use only)."
+ )
+ parser.add_argument(
"--bash-completion",
action="store_true",
help="Output code to register bash completion for snakemake. Put the "
"following in your .bashrc (including the accents): "
"`snakemake --bash-completion` or issue it in an open terminal "
"session.")
+ parser.add_argument(
+ "--use-conda",
+ action="store_true",
+ help="If defined in the rule, create job specific conda environments. "
+ "If this flag is not set, the conda directive is ignored.")
+ parser.add_argument(
+ "--wrapper-prefix",
+ default="https://bitbucket.org/snakemake/snakemake-wrappers/raw/",
+ help="Prefix for URL created from wrapper directive (default: "
+ "https://bitbucket.org/snakemake/snakemake-wrappers/raw/). Set this to "
+ "a different URL to use your fork or a local clone of the repository."
+ )
parser.add_argument("--version", "-v",
action="version",
version=__version__)
@@ -1035,7 +1065,10 @@ def main():
keep_shadow=args.keep_shadow,
allowed_rules=args.allowed_rules,
max_jobs_per_second=args.max_jobs_per_second,
- force_use_threads=args.force_use_threads)
+ force_use_threads=args.force_use_threads,
+ use_conda=args.use_conda,
+ mode=args.mode,
+ wrapper_prefix=args.wrapper_prefix)
if args.profile:
with open(args.profile, "w") as out:
diff --git a/snakemake/common.py b/snakemake/common.py
index 916866c..4c74a37 100644
--- a/snakemake/common.py
+++ b/snakemake/common.py
@@ -1 +1,11 @@
DYNAMIC_FILL = "__snakemake_dynamic__"
+
+
+class Mode:
+ """
+ Enum for execution mode of Snakemake.
+ This handles the behavior of e.g. the logger.
+ """
+ default = 0
+ subprocess = 1
+ cluster = 2
diff --git a/snakemake/conda.py b/snakemake/conda.py
new file mode 100644
index 0000000..9b16c0f
--- /dev/null
+++ b/snakemake/conda.py
@@ -0,0 +1,50 @@
+import os
+import subprocess
+import tempfile
+from urllib.request import urlopen
+import hashlib
+import shutil
+
+from snakemake.exceptions import CreateCondaEnvironmentException
+from snakemake.logging import logger
+
+
+def create_env(job):
+ """ Create conda enviroment for the given job. """
+ if shutil.which("conda") is None:
+ raise CreateCondaEnvironmentException("The 'conda' command is not available in $PATH.")
+
+ md5hash = hashlib.md5()
+ env_file = job.conda_env_file
+ if os.path.exists(env_file):
+ with open(env_file, 'rb') as f:
+ md5hash.update(f.read())
+ else:
+ content = urlopen(env_file).read()
+ md5hash.update(content)
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
+ tmp.write(content)
+ env_file = tmp.name
+
+ env_path = os.path.join(job.rule.workflow.persistence.conda_env_path, md5hash.hexdigest())
+ if not os.path.exists(env_path):
+ logger.info("Creating conda environment for {}...".format(job.conda_env_file))
+ try:
+ out = subprocess.check_output(["conda", "env", "create",
+ "--file", env_file,
+ "--prefix", env_path],
+ stderr=subprocess.STDOUT)
+ logger.debug(out)
+ logger.info("Environment for {} created.".format(job.conda_env_file))
+ except subprocess.CalledProcessError as e:
+ # remove potential partially installed environment
+ shutil.rmtree(env_path, ignore_errors=True)
+ raise CreateCondaEnvironmentException(
+ "Could not create conda environment from {}:\n".format(job.conda_env_file) +
+ e.output.decode())
+
+ if env_file != job.conda_env_file:
+ # temporary file was created
+ os.remove(env_file)
+
+ return env_path
diff --git a/snakemake/dag.py b/snakemake/dag.py
index 8b4f1d1..322afa3 100644
--- a/snakemake/dag.py
+++ b/snakemake/dag.py
@@ -335,12 +335,9 @@ class DAG:
def unneeded_files():
for job_, files in self.dependencies[job].items():
- for f in job_.temp_output & files:
- if not needed(job_, f):
- yield f
- for f in filterfalse(partial(needed, job), job.temp_output):
- if not f in self.targetfiles:
- yield f
+ yield from filterfalse(partial(needed, job_), job_.temp_output & files)
+ if job not in self.targetjobs:
+ yield from filterfalse(partial(needed, job), job.temp_output)
for f in unneeded_files():
logger.info("Removing temporary output file {}.".format(f))
@@ -892,6 +889,8 @@ class DAG:
return new_wildcards
def rule2job(self, targetrule):
+ if targetrule.has_wildcards():
+ raise WorkflowError("Target rules may not contain wildcards. Please specify concrete files or a rule without wildcards.")
return Job(targetrule, self)
def file2jobs(self, targetfile):
@@ -1015,9 +1014,9 @@ class DAG:
def summary(self, detailed=False):
if detailed:
- yield "output_file\tdate\trule\tversion\tinput_file(s)\tshellcmd\tstatus\tplan"
+ yield "output_file\tdate\trule\tversion\tlog-file(s)\tinput-file(s)\tshellcmd\tstatus\tplan"
else:
- yield "output_file\tdate\trule\tversion\tstatus\tplan"
+ yield "output_file\tdate\trule\tversion\tlog-file(s)\tstatus\tplan"
for job in self.jobs:
output = job.rule.output if self.dynamic(
@@ -1033,6 +1032,9 @@ class DAG:
pending = "update pending" if self.reason(job) else "no update"
+ log = self.workflow.persistence.log(f)
+ log = "-" if log is None else ",".join(log)
+
input = self.workflow.persistence.input(f)
input = "-" if input is None else ",".join(input)
@@ -1055,10 +1057,10 @@ class DAG:
elif self.workflow.persistence.params_changed(job, file=f):
status = "params changed"
if detailed:
- yield "\t".join((f, date, rule, version, input, shellcmd,
+ yield "\t".join((f, date, rule, version, log, input, shellcmd,
status, pending))
else:
- yield "\t".join((f, date, rule, version, status, pending))
+ yield "\t".join((f, date, rule, version, log, status, pending))
def d3dag(self, max_jobs=10000):
def node(job):
diff --git a/snakemake/exceptions.py b/snakemake/exceptions.py
index be14fd0..62e7a98 100644
--- a/snakemake/exceptions.py
+++ b/snakemake/exceptions.py
@@ -333,3 +333,11 @@ class CreateRuleException(RuleException):
class TerminatedException(Exception):
pass
+
+
+class CreateCondaEnvironmentException(Exception):
+ pass
+
+
+class SpawnedJobError(Exception):
+ pass
diff --git a/snakemake/executors.py b/snakemake/executors.py
index 93d320d..e1c96fa 100644
--- a/snakemake/executors.py
+++ b/snakemake/executors.py
@@ -30,8 +30,8 @@ from snakemake.utils import format, Unformattable
from snakemake.io import get_wildcard_names, Wildcards
from snakemake.exceptions import print_exception, get_exception_origin
from snakemake.exceptions import format_error, RuleException, log_verbose_traceback
-from snakemake.exceptions import ClusterJobException, ProtectedOutputException, WorkflowError, ImproperShadowException
-from snakemake.futures import ProcessPoolExecutor
+from snakemake.exceptions import ClusterJobException, ProtectedOutputException, WorkflowError, ImproperShadowException, SpawnedJobError
+from snakemake.common import Mode
class AbstractExecutor:
@@ -160,6 +160,30 @@ class RealExecutor(AbstractExecutor):
"directory {}".format(e,
self.workflow.persistence.path))
+ def format_job_pattern(self, pattern, job=None, **kwargs):
+ overwrite_workdir = []
+ if self.workflow.overwrite_workdir:
+ overwrite_workdir.extend(("--directory", self.workflow.overwrite_workdir))
+
+ overwrite_config = []
+ if self.workflow.overwrite_configfile:
+ overwrite_config.extend(("--configfile", self.workflow.overwrite_configfile))
+ if self.workflow.config_args:
+ overwrite_config.append("--config")
+ overwrite_config.extend(self.workflow.config_args)
+
+ target = job.output if job.output else job.rule.name
+
+ return format(pattern,
+ job=job,
+ overwrite_workdir=overwrite_workdir,
+ overwrite_config=overwrite_config,
+ workflow=self.workflow,
+ cores=self.cores,
+ benchmark_repeats=self.benchmark_repeats,
+ target=target,
+ **kwargs)
+
class TouchExecutor(RealExecutor):
def run(self, job,
@@ -192,7 +216,7 @@ class CPUExecutor(RealExecutor):
printreason=False,
quiet=False,
printshellcmds=False,
- threads=False,
+ use_threads=False,
latency_wait=3,
benchmark_repeats=1):
super().__init__(workflow, dag,
@@ -202,35 +226,54 @@ class CPUExecutor(RealExecutor):
latency_wait=latency_wait,
benchmark_repeats=benchmark_repeats)
- self.pool = (concurrent.futures.ThreadPoolExecutor(max_workers=workers)
- if threads else ProcessPoolExecutor(max_workers=workers))
- self.threadpool = concurrent.futures.ThreadPoolExecutor(max_workers=workers)
+ self.exec_job = '\\\n'.join((
+ 'cd {workflow.workdir_init} && ',
+ '{workflow.snakemakepath} {target} --snakefile {workflow.snakefile} ',
+ '--force -j{cores} --keep-target-files --keep-shadow --keep-remote ',
+ '--benchmark-repeats {benchmark_repeats} ',
+ '--force-use-threads --wrapper-prefix {workflow.wrapper_prefix} ',
+ '{overwrite_workdir} {overwrite_config} ',
+ '--notemp --quiet --no-hooks --nolock --mode {} '.format(Mode.subprocess)))
+ self.use_threads = use_threads
+ self.cores = workers
+ self.pool = concurrent.futures.ThreadPoolExecutor(max_workers=workers)
def run(self, job,
callback=None,
submit_callback=None,
error_callback=None):
- if (job.rule.shadow_depth and
- type(self) == concurrent.futures.ThreadPoolExecutor):
- raise ImproperShadowException(job.rule)
- job.prepare()
super()._run(job)
- benchmark = None
- if job.benchmark is not None:
- benchmark = str(job.benchmark)
-
- pool = self.pool if job.shellcmd is None or job.is_shadow else self.threadpool
- future = pool.submit(
- run_wrapper, job.rule.run_func, job.input.plainstrings(),
- job.output.plainstrings(), job.params, job.wildcards, job.threads,
- job.resources, job.log.plainstrings(), job.rule.version, benchmark,
- self.benchmark_repeats, self.workflow.linemaps, self.workflow.debug,
- shadow_dir=job.shadow_dir)
+ if self.use_threads or (not job.is_shadow and (job.is_shell or job.is_norun or job.is_script or job.is_wrapper)):
+ job.prepare()
+ job.create_conda_env()
+
+ benchmark = None
+ if job.benchmark is not None:
+ benchmark = str(job.benchmark)
+ future = self.pool.submit(
+ run_wrapper, job.rule.run_func, job.input.plainstrings(),
+ job.output.plainstrings(), job.params, job.wildcards, job.threads,
+ job.resources, job.log.plainstrings(), job.rule.version, benchmark,
+ self.benchmark_repeats, job.conda_env, self.workflow.linemaps,
+ self.workflow.debug, shadow_dir=job.shadow_dir)
+ else:
+ # run directive jobs are spawned into subprocesses
+ future = self.pool.submit(self.spawn_job, job)
future.add_done_callback(partial(self._callback, job, callback,
error_callback))
+ def spawn_job(self, job):
+ exec_job = self.exec_job
+ if not job.rule.is_branched:
+ exec_job += " --allowed-rules {}".format(job.rule)
+ cmd = self.format_job_pattern(exec_job, job=job)
+ try:
+ subprocess.check_call(cmd, shell=True)
+ except subprocess.CalledProcessError:
+ raise SpawnedJobError()
+
def shutdown(self):
self.pool.shutdown()
@@ -248,6 +291,11 @@ class CPUExecutor(RealExecutor):
job.cleanup()
self.workflow.persistence.cleanup(job)
# no error callback, just silently ignore the interrupt as the main scheduler is also killed
+ except SpawnedJobError:
+ # don't print error message, this is done by the spawned subprocess
+ job.cleanup()
+ self.workflow.persistence.cleanup(job)
+ error_callback(job)
except (Exception, BaseException) as ex:
self.print_job_error(job)
print_exception(ex, self.workflow.linemaps)
@@ -297,12 +345,13 @@ class ClusterExecutor(RealExecutor):
self.exec_job = '\\\n'.join((
'cd {workflow.workdir_init} && ',
- '{workflow.snakemakepath} --snakefile {workflow.snakefile} ',
+ '{workflow.snakemakepath} {target} --snakefile {workflow.snakefile} ',
'--force -j{cores} --keep-target-files --keep-shadow --keep-remote ',
'--wait-for-files {wait_for_files} --latency-wait {latency_wait} ',
'--benchmark-repeats {benchmark_repeats} ',
+ '--force-use-threads --wrapper-prefix {workflow.wrapper_prefix} ',
'{overwrite_workdir} {overwrite_config} --nocolor ',
- '--notemp --quiet --no-hooks --nolock {target}'))
+ '--notemp --quiet --no-hooks --nolock'))
if printshellcmds:
self.exec_job += " --printshellcmds "
@@ -371,34 +420,22 @@ class ClusterExecutor(RealExecutor):
cluster=self.cluster_wildcards(job)))
def spawn_jobscript(self, job, jobscript, **kwargs):
- overwrite_workdir = []
- if self.workflow.overwrite_workdir:
- overwrite_workdir.extend(("--directory", self.workflow.overwrite_workdir))
- overwrite_config = []
- if self.workflow.overwrite_configfile:
- overwrite_config.extend(("--configfile", self.workflow.overwrite_configfile))
- if self.workflow.config_args:
- overwrite_config.append("--config")
- overwrite_config.extend(self.workflow.config_args)
-
- target = job.output if job.output else job.rule.name
wait_for_files = list(job.local_input) + [self.tmpdir]
if job.shadow_dir:
wait_for_files.append(job.shadow_dir)
- format_p = partial(format,
+ if job.conda_env:
+ wait_for_files.append(job.conda_env)
+
+ format_p = partial(self.format_job_pattern,
job=job,
- overwrite_workdir=overwrite_workdir,
- overwrite_config=overwrite_config,
- workflow=self.workflow,
- cores=self.cores,
- properties=json.dumps(job.properties(cluster=self.cluster_params(job))),
+ properties=json.dumps(job.properties(
+ cluster=self.cluster_params(job))),
latency_wait=self.latency_wait,
- benchmark_repeats=self.benchmark_repeats,
- target=target,
wait_for_files=wait_for_files,
**kwargs)
try:
- exec_job = format_p(self.exec_job, _quote_all=True)
+ exec_job = self.exec_job
+ exec_job = format_p(exec_job, _quote_all=True)
with open(jobscript, "w") as f:
print(format_p(self.jobscript, exec_job=exec_job), file=f)
except KeyError as e:
@@ -659,10 +696,12 @@ class DRMAAExecutor(ClusterExecutor):
def cancel(self):
from drmaa.const import JobControlAction
+ from drmaa.errors import InvalidJobException, InternalException
for jobid in self.submitted:
try:
self.session.control(jobid, JobControlAction.TERMINATE)
- except drmaa.errors.InvalidJobException:
+ except (InvalidJobException, InternalException):
+ #This is common - logging a warning would probably confuse the user.
pass
self.shutdown()
@@ -760,7 +799,7 @@ def change_working_directory(directory=None):
def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
- version, benchmark, benchmark_repeats, linemaps, debug=False,
+ version, benchmark, benchmark_repeats, conda_env, linemaps, debug=False,
shadow_dir=None):
"""
Wrapper around the run method that handles exceptions and benchmarking.
@@ -785,7 +824,7 @@ def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
# execute the actual run method.
with change_working_directory(shadow_dir):
run(input, output, params, wildcards, threads, resources, log,
- version)
+ version, conda_env)
w = time.time() - w
wallclock.append(w)
diff --git a/snakemake/futures.py b/snakemake/futures.py
deleted file mode 100644
index aa05b48..0000000
--- a/snakemake/futures.py
+++ /dev/null
@@ -1,36 +0,0 @@
-__author__ = "Johannes Köster"
-__copyright__ = "Copyright 2015, Johannes Köster"
-__email__ = "koester at jimmy.harvard.edu"
-__license__ = "MIT"
-
-import sys
-import os
-import multiprocessing
-import concurrent.futures
-from concurrent.futures.process import _ResultItem, _process_worker
-
-
-def _graceful_process_worker(call_queue, result_queue):
- """Override the default _process_worker from concurrent.futures.
- We ensure here that KeyboardInterrupts lead to silent failures.
- """
- try:
- _process_worker(call_queue, result_queue)
- except KeyboardInterrupt:
- # let the process silently fail in case of a keyboard interrupt
- raise SystemExit()
-
-
-class ProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor):
- """Override the default ProcessPoolExecutor to gracefully handle KeyboardInterrupts."""
-
- def _adjust_process_count(self):
- for _ in range(len(self._processes), self._max_workers):
- p = multiprocessing.Process(
- target=_graceful_process_worker,
- args=(self._call_queue, self._result_queue))
- p.start()
- if sys.version_info < (3, 3):
- self._processes.add(p)
- else:
- self._processes[p.pid] = p
diff --git a/snakemake/io.py b/snakemake/io.py
index 5a3fe17..d60a055 100644
--- a/snakemake/io.py
+++ b/snakemake/io.py
@@ -8,14 +8,18 @@ import shutil
import re
import stat
import time
+import datetime
import json
import copy
import functools
+import subprocess as sp
from itertools import product, chain
from collections import Iterable, namedtuple
from snakemake.exceptions import MissingOutputException, WorkflowError, WildcardError, RemoteFileException
from snakemake.logging import logger
from inspect import isfunction, ismethod
+from copy import deepcopy
+
from snakemake.common import DYNAMIC_FILL
@@ -30,21 +34,31 @@ def lutime(f, times):
#target of a link.
if os.utime in os.supports_follow_symlinks:
#...utime is well behaved
- return os.utime(f, times, follow_symlinks=False)
+ os.utime(f, times, follow_symlinks=False)
elif not os.path.islink(f):
#...symlinks not an issue here
- return os.utime(f, times)
+ os.utime(f, times)
else:
+ try:
+ # try the system command
+ if times:
+ fmt_time = lambda sec: datetime.fromtimestamp(sec).strftime("%Y%m%d%H%M.%S")
+ atime, mtime = times
+ sp.check_call(["touch", "-h", f, "-a", "-t", fmt_time(atime)])
+ sp.check_call(["touch", "-h", f, "-m", "-t", fmt_time(mtime)])
+ else:
+ sp.check_call(["touch", "-h", f])
+ except sp.CalledProcessError:
+ pass
#...problem system. Do nothing.
- logger.warning("Unable to set utime on symlink {}. Your Python build does not support it.".format(f))
+ logger.warning("Unable to set utime on symlink {}. Your Python build does not support it.".format(f))
return None
def lchmod(f, mode):
- return os.chmod(
- f,
- mode,
- follow_symlinks=os.chmod not in os.supports_follow_symlinks)
+ os.chmod(f,
+ mode,
+ follow_symlinks=os.chmod not in os.supports_follow_symlinks)
def IOFile(file, rule=None):
@@ -88,6 +102,10 @@ class _IOFile(str):
def is_remote(self):
return is_flagged(self._file, "remote_object")
+ @property
+ def is_ancient(self):
+ return is_flagged(self._file, "ancient")
+
def update_remote_filepath(self):
# if the file string is different in the iofile, update the remote object
# (as in the case of wildcard expansion)
@@ -115,10 +133,17 @@ class _IOFile(str):
def check(self):
if self._file.startswith("./"):
- logger.warning("File path {} starts with './'. This is redundant "
+ logger.warning("Relative file path '{}' starts with './'. This is redundant "
"and strongly discouraged. It can also lead to "
"inconsistent results of the file-matching approach "
- "used by Snakemake.".format(self._file))
+ "used by Snakemake. You can simply omit the './' "
+ "for relative file paths.".format(self._file))
+ if self._file.startswith(" "):
+ logger.warning("File path '{}' starts with whitespace. This is likely unintended.")
+ if self._file.endswith(" "):
+ logger.warning("File path '{}' ends with whitespace. This is likely unintended.")
+ if "\n" in self._file:
+ logger.warning("File path '{}' contains line break. This is likely unintended.")
@property
@_refer_to_remote
@@ -145,7 +170,7 @@ class _IOFile(str):
@property
def mtime_local(self):
# do not follow symlinks for modification time
- return int(lstat(self.file).st_mtime)
+ return lstat(self.file).st_mtime
@property
def flags(self):
@@ -172,7 +197,9 @@ class _IOFile(str):
def is_newer(self, time):
""" Returns true of the file is newer than time, or if it is
a symlink that points to a file newer than time. """
- if self.is_remote:
+ if self.is_ancient:
+ return False
+ elif self.is_remote:
#If file is remote but provider does not override the implementation this
#is the best we can do.
return self.mtime > time
@@ -355,21 +382,25 @@ def contains_wildcard_constraints(pattern):
def remove(file, remove_non_empty_dir=False):
- if os.path.exists(file):
- if os.path.isdir(file):
- if remove_non_empty_dir:
- shutil.rmtree(file)
- else:
- try:
- os.removedirs(file)
- except OSError as e:
- # skip non empty directories
- if e.errno == 39:
- logger.info("Skipped removing empty directory {}".format(e.filename))
- else:
- logger.warning(str(e))
+ if os.path.isdir(file) and not os.path.islink(file):
+ if remove_non_empty_dir:
+ shutil.rmtree(file)
else:
+ try:
+ os.removedirs(file)
+ except OSError as e:
+ # skip non empty directories
+ if e.errno == 39:
+ logger.info("Skipped removing non-empty directory {}".format(e.filename))
+ else:
+ logger.warning(str(e))
+ #Remember that dangling symlinks fail the os.path.exists() test, but
+ #we definitely still want to zap them. try/except is the safest way.
+ else:
+ try:
os.remove(file)
+ except FileNotFoundError:
+ pass
def regex(filepattern):
@@ -458,6 +489,15 @@ def get_flag_value(value, flag_type):
else:
return None
+def ancient(value):
+ """
+ A flag for an input file that shall be considered ancient; i.e. its timestamp shall have no effect on which jobs to run.
+ """
+ if is_flagged(value, "remote"):
+ raise SyntaxError(
+ "Ancient and remote flags are mutually exclusive.")
+ return flag(value, "ancient")
+
def temp(value):
"""
@@ -575,7 +615,7 @@ def glob_wildcards(pattern, files=None):
pattern = re.compile(regex(pattern))
if files is None:
- files = ((os.path.join(dirpath, f) if dirpath != "." else f)
+ files = (os.path.normpath(os.path.join(dirpath, f))
for dirpath, dirnames, filenames in os.walk(dirname)
for f in chain(filenames, dirnames))
@@ -616,7 +656,13 @@ def update_wildcard_constraints(pattern,
return match.group(0)
examined_names = set()
- return re.sub(_wildcard_regex, replace_constraint, pattern)
+ updated = re.sub(_wildcard_regex, replace_constraint, pattern)
+
+ # inherit flags
+ if isinstance(pattern, AnnotatedString):
+ updated = AnnotatedString(updated)
+ updated.flags = deepcopy(pattern.flags)
+ return updated
diff --git a/snakemake/jobs.py b/snakemake/jobs.py
index 99ea315..2486ede 100644
--- a/snakemake/jobs.py
+++ b/snakemake/jobs.py
@@ -7,6 +7,8 @@ import os
import sys
import base64
import tempfile
+import subprocess
+import json
from collections import defaultdict
from itertools import chain
@@ -15,10 +17,11 @@ from operator import attrgetter
from snakemake.io import IOFile, Wildcards, Resources, _IOFile, is_flagged, contains_wildcard
from snakemake.utils import format, listfiles
-from snakemake.exceptions import RuleException, ProtectedOutputException
-from snakemake.exceptions import UnexpectedOutputException
+from snakemake.exceptions import RuleException, ProtectedOutputException, WorkflowError
+from snakemake.exceptions import UnexpectedOutputException, CreateCondaEnvironmentException
from snakemake.logging import logger
from snakemake.common import DYNAMIC_FILL
+from snakemake import conda, wrapper
def jobfiles(jobs, type):
@@ -45,6 +48,8 @@ class Job:
self._log = None
self._benchmark = None
self._resources = None
+ self._conda_env_file = None
+ self._conda_env = None
self.shadow_dir = None
self._inputsize = None
@@ -114,6 +119,32 @@ class Job:
return self._resources
@property
+ def conda_env_file(self):
+ if not self.rule.workflow.use_conda:
+ # if use_conda is False, ignore conda_env_file definition
+ return None
+
+ if self._conda_env_file is None:
+ self._conda_env_file = self.rule.expand_conda_env(self.wildcards_dict)
+ return self._conda_env_file
+
+ @property
+ def conda_env(self):
+ if self.conda_env_file:
+ if self._conda_env is None:
+ raise ValueError("create_conda_env() must be called before calling conda_env")
+ return self._conda_env
+ return None
+
+ def create_conda_env(self):
+ """Create conda environment if specified."""
+ if self.conda_env_file:
+ try:
+ self._conda_env = conda.create_env(self)
+ except CreateCondaEnvironmentException as e:
+ raise WorkflowError(e, rule=self.rule)
+
+ @property
def is_shadow(self):
return self.rule.shadow_depth is not None
@@ -163,6 +194,22 @@ class Job:
rule=self.rule)
@property
+ def is_shell(self):
+ return self.rule.shellcmd is not None
+
+ @property
+ def is_norun(self):
+ return self.rule.norun
+
+ @property
+ def is_script(self):
+ return self.rule.script is not None
+
+ @property
+ def is_wrapper(self):
+ return self.rule.wrapper is not None
+
+ @property
def expanded_output(self):
""" Iterate over output files while dynamic output is expanded. """
for f, f_ in zip(self.output, self.rule.output):
@@ -396,6 +443,8 @@ class Job:
"present when the DAG was created:\n{}".format(
self.rule, unexpected_output))
+ self.remove_existing_output()
+
for f, f_ in zip(self.output, self.rule.output):
f.prepare()
@@ -407,8 +456,6 @@ class Job:
if self.benchmark:
self.benchmark.prepare()
- self.remove_existing_output()
-
if not self.is_shadow:
return
# Create shadow directory structure
@@ -442,7 +489,6 @@ class Job:
link = os.path.join(self.shadow_dir, relative_source)
os.symlink(source, link)
-
def close_remote(self):
for f in (self.input + self.output):
if f.is_remote:
diff --git a/snakemake/logging.py b/snakemake/logging.py
index 3c0830b..46788da 100644
--- a/snakemake/logging.py
+++ b/snakemake/logging.py
@@ -9,16 +9,16 @@ import time
import sys
import os
import json
-import multiprocessing
import threading
import tempfile
from functools import partial
from snakemake.common import DYNAMIC_FILL
+from snakemake.common import Mode
class ColorizingStreamHandler(_logging.StreamHandler):
-
+
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
RESET_SEQ = "\033[0m"
@@ -33,21 +33,19 @@ class ColorizingStreamHandler(_logging.StreamHandler):
'ERROR': RED
}
- def __init__(self, nocolor=False, stream=sys.stderr, timestamp=False, use_threads=False):
+ def __init__(self, nocolor=False, stream=sys.stderr, timestamp=False, use_threads=False, mode=Mode.default):
super().__init__(stream=stream)
- if not use_threads:
- self._output_lock = multiprocessing.Lock()
- else:
- self._output_lock = threading.Lock()
+ self._output_lock = threading.Lock()
- self.nocolor = nocolor or not self.can_color_tty
+ self.nocolor = nocolor or not self.can_color_tty(mode)
self.timestamp = timestamp
- @property
- def can_color_tty(self):
+ def can_color_tty(self, mode):
if 'TERM' in os.environ and os.environ['TERM'] == 'dumb':
return False
+ if mode == Mode.subprocess:
+ return True
return self.is_tty and not platform.system() == 'Windows'
@property
@@ -104,6 +102,7 @@ class Logger:
self.logfile_handler.close()
os.close(self.logfile_fd)
os.remove(self.logfile)
+ self.log_handler = [self.text_handler]
def get_logfile(self):
if self.logfile is not None:
@@ -177,7 +176,7 @@ class Logger:
def format_item(item, omit=None, valueformat=str):
value = msg[item]
if value != omit:
- return "\t{}: {}".format(item, valueformat(value))
+ return " {}: {}".format(item, valueformat(value))
yield "{}rule {}:".format("local" if msg["local"] else "",
msg["name"])
@@ -196,7 +195,7 @@ class Logger:
wildcards = format_wildcards(msg["wildcards"])
if wildcards:
- yield "\twildcards: " + wildcards
+ yield " wildcards: " + wildcards
for item, omit in zip("priority threads".split(), [0, 1]):
fmt = format_item(item, omit=omit)
@@ -205,10 +204,10 @@ class Logger:
resources = format_resources(msg["resources"])
if resources:
- yield "\tresources: " + resources
+ yield " resources: " + resources
level = msg["level"]
- if level == "info":
+ if level == "info" and not self.quiet:
self.logger.warning(msg["msg"])
if level == "warning":
self.logger.warning(msg["msg"])
@@ -216,9 +215,9 @@ class Logger:
self.logger.error(msg["msg"])
elif level == "debug":
self.logger.debug(msg["msg"])
- elif level == "resources_info":
+ elif level == "resources_info" and not self.quiet:
self.logger.warning(msg["msg"])
- elif level == "run_info":
+ elif level == "run_info" and not self.quiet:
self.logger.warning(msg["msg"])
elif level == "progress" and not self.quiet:
done = msg["done"]
@@ -227,12 +226,14 @@ class Logger:
percent_fmt = ("{:.2%}" if p < 0.01 else "{:.0%}").format(p)
self.logger.info("{} of {} steps ({}) done".format(
done, total, percent_fmt))
- elif level == "job_info":
- if not self.quiet:
- if msg["msg"] is not None:
- self.logger.info(msg["msg"])
- else:
- self.logger.info("\n".join(job_info(msg)))
+ elif level == "job_info" and not self.quiet:
+ if msg["msg"] is not None:
+ self.logger.info(msg["msg"])
+ if self.printreason:
+ self.logger.info("Reason: {}".format(msg["reason"]))
+ else:
+ self.logger.info("\n".join(job_info(msg)))
+ self.logger.info("")
elif level == "shellcmd":
if self.printshellcmds:
self.logger.warning(msg["msg"])
@@ -242,7 +243,7 @@ class Logger:
elif level == "rule_info":
self.logger.info(msg["name"])
if msg["docstring"]:
- self.logger.info("\t" + msg["docstring"])
+ self.logger.info(" " + msg["docstring"])
elif level == "d3dag":
print(json.dumps({"nodes": msg["nodes"], "links": msg["edges"]}))
@@ -272,7 +273,8 @@ def setup_logger(handler=None,
stdout=False,
debug=False,
timestamp=False,
- use_threads=False):
+ use_threads=False,
+ mode=Mode.default):
logger.setup()
if handler is not None:
# custom log handler
@@ -283,7 +285,8 @@ def setup_logger(handler=None,
nocolor=nocolor,
stream=sys.stdout if stdout else sys.stderr,
timestamp=timestamp,
- use_threads=use_threads)
+ use_threads=use_threads,
+ mode=mode)
logger.set_stream_handler(stream_handler)
logger.set_level(_logging.DEBUG if debug else _logging.INFO)
diff --git a/snakemake/parser.py b/snakemake/parser.py
index 3179236..7bb8c53 100644
--- a/snakemake/parser.py
+++ b/snakemake/parser.py
@@ -398,13 +398,16 @@ class Benchmark(RuleKeywordState):
pass
+class Conda(RuleKeywordState):
+ pass
+
+
class WildcardConstraints(RuleKeywordState):
@property
def keyword(self):
return "wildcard_constraints"
-
class Run(RuleKeywordState):
def __init__(self, snakefile, rulename,
base_indent=0,
@@ -420,7 +423,8 @@ class Run(RuleKeywordState):
yield "@workflow.run"
yield "\n"
yield ("def __rule_{rulename}(input, output, params, wildcards, threads, "
- "resources, log, version):".format(rulename=self.rulename if self.rulename is not None else self.snakefile.rulecount))
+ "resources, log, version, conda_env):".format(
+ rulename=self.rulename if self.rulename is not None else self.snakefile.rulecount))
def end(self):
yield ""
@@ -430,9 +434,11 @@ class Run(RuleKeywordState):
) or is_eof(token)
-class Shell(Run):
+class AbstractCmd(Run):
- overwrite_shellcmd = None
+ overwrite_cmd = None
+ start_func = None
+ end_func = None
def __init__(self, snakefile, rulename,
base_indent=0,
@@ -442,16 +448,21 @@ class Shell(Run):
base_indent=base_indent,
dedent=dedent,
root=root)
- self.shellcmd = list()
+ self.cmd = list()
self.token = None
- if self.overwrite_shellcmd is not None:
+ if self.overwrite_cmd is not None:
self.block_content = self.overwrite_block_content
def is_block_end(self, token):
return (self.line and self.indent <= 0) or is_eof(token)
def start(self):
- yield "@workflow.shellcmd("
+ if self.start_func is not None:
+ yield self.start_func
+ yield "("
+
+ def args(self):
+ yield from []
def end(self):
# the end is detected. So we can savely reset the indent to zero here
@@ -463,8 +474,10 @@ class Shell(Run):
yield t
yield "\n"
yield INDENT * (self.effective_indent + 1)
- yield "shell("
- yield "\n".join(self.shellcmd)
+ yield self.end_func
+ yield "("
+ yield "\n".join(self.cmd)
+ yield from self.args()
yield "\n"
yield ")"
for t in super().end():
@@ -474,79 +487,47 @@ class Shell(Run):
if self.token is None:
# no block after shell keyword
self.error(
- "Shell command must be given as string after the shell keyword.",
+ "Command must be given as string after the shell keyword.",
token)
for t in self.end():
yield t, self.token
def block_content(self, token):
self.token = token
- self.shellcmd.append(token.string)
+ self.cmd.append(token.string)
yield token.string, token
def overwrite_block_content(self, token):
if self.token is None:
self.token = token
- shellcmd = '"{}"'.format(self.overwrite_shellcmd)
- self.shellcmd.append(shellcmd)
- yield shellcmd, token
+ cmd = '"{}"'.format(self.overwrite_cmd)
+ self.cmd.append(cmd)
+ yield cmd, token
-class Script(Run):
- def __init__(self, snakefile, rulename,
- base_indent=0,
- dedent=0,
- root=True):
- super().__init__(snakefile, rulename,
- base_indent=base_indent,
- dedent=dedent,
- root=root)
- self.path = list()
- self.token = None
+class Shell(AbstractCmd):
+ start_func = "@workflow.shellcmd"
+ end_func = "shell"
- def is_block_end(self, token):
- return (self.line and self.indent <= 0) or is_eof(token)
- def start(self):
- for t in super().start():
- yield t
- yield "\n"
- yield INDENT * (self.effective_indent + 1)
- yield "script("
- yield '"{}"'.format(
- os.path.abspath(os.path.dirname(self.snakefile.path)))
- yield ", "
+class Script(AbstractCmd):
+ start_func = "@workflow.script"
+ end_func = "script"
- def end(self):
- # the end is detected. So we can savely reset the indent to zero here
- self.indent = 0
- yield ", input, output, params, wildcards, threads, resources, log, config"
- yield ")"
- for t in super().end():
- yield t
-
- def decorate_end(self, token):
- if self.token is None:
- # no block after script keyword
- self.error(
- "Script path must be given as string after the script keyword.",
- token)
- for t in self.end():
- yield t, self.token
-
- def block_content(self, token):
- self.token = token
- self.path.append(token.string)
- yield token.string, token
+ def args(self):
+ # basedir
+ yield ', "{}"'.format(
+ os.path.abspath(os.path.dirname(self.snakefile.path)))
+ # other args
+ yield ", input, output, params, wildcards, threads, resources, log, config, conda_env"
class Wrapper(Script):
- def start(self):
- for t in super(Script, self).start():
- yield t
- yield "\n"
- yield INDENT * (self.effective_indent + 1)
- yield 'wrapper('
+ start_func = "@workflow.wrapper"
+ end_func = "wrapper"
+
+ def args(self):
+ yield ", input, output, params, wildcards, threads, resources, log, config, conda_env, workflow.wrapper_prefix"
class Rule(GlobalKeywordState):
@@ -560,6 +541,7 @@ class Rule(GlobalKeywordState):
log=Log,
message=Message,
benchmark=Benchmark,
+ conda=Conda,
wildcard_constraints=WildcardConstraints,
shadow=Shadow,
run=Run,
@@ -734,7 +716,7 @@ def format_tokens(tokens):
def parse(path, overwrite_shellcmd=None, rulecount=0):
- Shell.overwrite_shellcmd = overwrite_shellcmd
+ Shell.overwrite_cmd = overwrite_shellcmd
with Snakefile(path, rulecount=rulecount) as snakefile:
automaton = Python(snakefile)
linemap = dict()
diff --git a/snakemake/persistence.py b/snakemake/persistence.py
index 0bc77e6..b395731 100644
--- a/snakemake/persistence.py
+++ b/snakemake/persistence.py
@@ -34,13 +34,15 @@ class Persistence:
self._code_path = os.path.join(self.path, "code_tracking")
self._rule_path = os.path.join(self.path, "rule_tracking")
self._input_path = os.path.join(self.path, "input_tracking")
+ self._log_path = os.path.join(self.path, "log_tracking")
self._params_path = os.path.join(self.path, "params_tracking")
self._shellcmd_path = os.path.join(self.path, "shellcmd_tracking")
self.shadow_path = os.path.join(self.path, "shadow")
+ self.conda_env_path = os.path.join(self.path, "conda")
for d in (self._incomplete_path, self._version_path, self._code_path,
- self._rule_path, self._input_path, self._params_path,
- self._shellcmd_path, self.shadow_path):
+ self._rule_path, self._input_path, self._log_path, self._params_path,
+ self._shellcmd_path, self.shadow_path, self.conda_env_path):
if not os.path.exists(d):
os.mkdir(d)
@@ -110,6 +112,7 @@ class Persistence:
self._delete_record(self._code_path, path)
self._delete_record(self._rule_path, path)
self._delete_record(self._input_path, path)
+ self._delete_record(self._log_path, path)
self._delete_record(self._params_path, path)
self._delete_record(self._shellcmd_path, path)
@@ -127,6 +130,7 @@ class Persistence:
job.rule.version) if job.rule.version is not None else None
code = self._code(job.rule)
input = self._input(job)
+ log = self._log(job)
params = self._params(job)
shellcmd = self._shellcmd(job)
for f in job.expanded_output:
@@ -135,6 +139,7 @@ class Persistence:
self._record(self._code_path, code, f, bin=True)
self._record(self._rule_path, job.rule.name, f)
self._record(self._input_path, input, f)
+ self._record(self._log_path, log, f)
self._record(self._params_path, params, f)
self._record(self._shellcmd_path, shellcmd, f)
@@ -145,6 +150,7 @@ class Persistence:
self._delete_record(self._code_path, f)
self._delete_record(self._rule_path, f)
self._delete_record(self._input_path, f)
+ self._delete_record(self._log_path, f)
self._delete_record(self._params_path, f)
self._delete_record(self._shellcmd_path, f)
@@ -165,6 +171,12 @@ class Persistence:
return files.split("\n")
return None
+ def log(self, path):
+ files = self._read_record(self._log_path, path)
+ if files is not None:
+ return files.split("\n")
+ return None
+
def shellcmd(self, path):
return self._read_record(self._shellcmd_path, path)
@@ -216,6 +228,10 @@ class Persistence:
return "\n".join(sorted(job.input))
@lru_cache()
+ def _log(self, job):
+ return "\n".join(sorted(job.log))
+
+ @lru_cache()
def _params(self, job):
return "\n".join(sorted(map(repr, job.params)))
diff --git a/snakemake/remote/HTTP.py b/snakemake/remote/HTTP.py
index af035ee..5d1bae5 100644
--- a/snakemake/remote/HTTP.py
+++ b/snakemake/remote/HTTP.py
@@ -17,7 +17,7 @@ try:
# third-party modules
import requests
except ImportError as e:
- raise WorkflowError("The Python 3 package 'requests' " +
+ raise WorkflowError("The Python 3 package 'requests' " +
"must be installed to use HTTP(S) remote() file functionality. %s" % e.msg)
class RemoteProvider(AbstractRemoteProvider):
@@ -33,11 +33,11 @@ class RemoteObject(DomainObject):
self.insecure = insecure
self.additional_request_string = additional_request_string
-
+
# === Implementations of abstract class members ===
@contextmanager #makes this a context manager. after 'yield' is __exit__()
- def httpr(self, verb="GET", stream=False):
+ def httpr(self, verb="GET", stream=False):
# if args have been provided to remote(), use them over those given to RemoteProvider()
args_to_use = self.provider.args
if len(self.args):
@@ -95,11 +95,11 @@ class RemoteObject(DomainObject):
def mtime(self):
if self.exists():
with self.httpr(verb="HEAD") as httpr:
-
+
file_mtime = self.get_header_item(httpr, "last-modified", default=0)
modified_tuple = email.utils.parsedate_tz(file_mtime)
- epochTime = int(email.utils.mktime_tz(modified_tuple))
+ epochTime = email.utils.mktime_tz(modified_tuple)
return epochTime
else:
@@ -121,9 +121,9 @@ class RemoteObject(DomainObject):
# if the destination path does not exist
if make_dest_dirs:
os.makedirs(os.path.dirname(self.local_path), exist_ok=True)
-
+
with open(self.local_path, 'wb') as f:
- for chunk in httpr.iter_content(chunk_size=1024):
+ for chunk in httpr.iter_content(chunk_size=1024):
if chunk: # filter out keep-alives
f.write(chunk)
else:
diff --git a/snakemake/remote/__init__.py b/snakemake/remote/__init__.py
index 0f689a8..fa47b44 100644
--- a/snakemake/remote/__init__.py
+++ b/snakemake/remote/__init__.py
@@ -26,6 +26,9 @@ class StaticRemoteObjectProxy(ObjectProxy):
def mtime(self):
return float("-inf")
+ def is_newer(self, time):
+ return False
+
def __copy__(self):
copied_wrapped = copy.copy(self.__wrapped__)
return type(self)(copied_wrapped)
@@ -57,7 +60,6 @@ class AbstractRemoteProvider:
remote_object = provider.RemoteObject(*args, keep_local=keep_local, provider=provider.RemoteProvider(*self.args, **self.kwargs), **kwargs)
if static:
remote_object = StaticRemoteObjectProxy(remote_object)
-
return snakemake.io.flag(
value,
"remote_object",
diff --git a/snakemake/report.py b/snakemake/report.py
index 3fd9b2b..1b8c15e 100644
--- a/snakemake/report.py
+++ b/snakemake/report.py
@@ -97,28 +97,30 @@ def report(text, path,
text = format(textwrap.dedent(text), stepout=3)
- attachments = [textwrap.dedent("""
- .. container::
- :name: attachments
-
- """)]
- for name, _files in sorted(files.items()):
- if not isinstance(_files, list):
- _files = [_files]
- links = []
- for file in _files:
- data = data_uri(file)
- links.append(':raw-html:`<a href="{data}" download="{filename}" draggable="true">{filename}</a>`'.format(
- data=data, filename=os.path.basename(file)))
- links = "\n\n ".join(links)
- attachments.append('''
- .. container::
- :name: {name}
-
- {name}:
- {links}
- '''.format(name=name,
- links=links))
+ attachments = []
+ if files:
+ attachments = [textwrap.dedent("""
+ .. container::
+ :name: attachments
+
+ """)]
+ for name, _files in sorted(files.items()):
+ if not isinstance(_files, list):
+ _files = [_files]
+ links = []
+ for file in _files:
+ data = data_uri(file)
+ links.append(':raw-html:`<a href="{data}" download="{filename}" draggable="true">{filename}</a>`'.format(
+ data=data, filename=os.path.basename(file)))
+ links = "\n\n ".join(links)
+ attachments.append('''
+ .. container::
+ :name: {name}
+
+ {name}:
+ {links}
+ '''.format(name=name,
+ links=links))
text = definitions + text + "\n\n" + "\n\n".join(attachments) + metadata
diff --git a/snakemake/rules.py b/snakemake/rules.py
index 060fef1..b6e2a69 100644
--- a/snakemake/rules.py
+++ b/snakemake/rules.py
@@ -15,6 +15,7 @@ from snakemake.io import expand, InputFiles, OutputFiles, Wildcards, Params, Log
from snakemake.io import apply_wildcards, is_flagged, not_iterable
from snakemake.exceptions import RuleException, IOFileException, WildcardError, InputFunctionException, WorkflowError
from snakemake.logging import logger
+from snakemake.common import Mode
class Rule:
@@ -45,16 +46,19 @@ class Rule:
self.shadow_depth = None
self.resources = dict(_cores=1, _nodes=1)
self.priority = 0
- self.version = None
+ self._version = None
self._log = Log()
self._benchmark = None
+ self._conda_env = None
self.wildcard_names = set()
self.lineno = lineno
self.snakefile = snakefile
self.run_func = None
self.shellcmd = None
self.script = None
+ self.wrapper = None
self.norun = False
+ self.is_branched = False
elif len(args) == 1:
other = args[0]
self.name = other.name
@@ -78,13 +82,16 @@ class Rule:
self.version = other.version
self._log = other._log
self._benchmark = other._benchmark
+ self._conda_env = other._conda_env
self.wildcard_names = set(other.wildcard_names)
self.lineno = other.lineno
self.snakefile = other.snakefile
self.run_func = other.run_func
self.shellcmd = other.shellcmd
self.script = other.script
+ self.wrapper = other.wrapper
self.norun = other.norun
+ self.is_branched = True
def dynamic_branch(self, wildcards, input=True):
def get_io(rule):
@@ -156,6 +163,7 @@ class Rule:
branch._params = branch.expand_params(non_dynamic_wildcards, branch._input, branch.resources)
branch._log = branch.expand_log(non_dynamic_wildcards)
branch._benchmark = branch.expand_benchmark(non_dynamic_wildcards)
+ branch._conda_env = branch.expand_conda_env(non_dynamic_wildcards)
return branch, non_dynamic_wildcards
return branch
@@ -166,6 +174,16 @@ class Rule:
return bool(self.wildcard_names)
@property
+ def version(self):
+ return self._version
+
+ @version.setter
+ def version(self, version):
+ if isinstance(version, str) and "\n" in version:
+ raise WorkflowError("Version string may not contain line breaks.", rule=self)
+ self._version = version
+
+ @property
def benchmark(self):
return self._benchmark
@@ -174,6 +192,15 @@ class Rule:
self._benchmark = IOFile(benchmark, rule=self)
@property
+ def conda_env(self):
+ return self._conda_env
+
+ @conda_env.setter
+ def conda_env(self, conda_env):
+ self._conda_env = IOFile(conda_env, rule=self)
+
+
+ @property
def input(self):
return self._input
@@ -238,9 +265,10 @@ class Rule:
inoutput = self.output if output else self.input
if isinstance(item, str):
# add the rule to the dependencies
- if isinstance(item, _IOFile):
+ if isinstance(item, _IOFile) and item.rule:
self.dependencies[item] = item.rule
if output:
+ rule = self
if self.wildcard_constraints or self.workflow._wildcard_constraints:
try:
item = update_wildcard_constraints(
@@ -252,10 +280,12 @@ class Rule:
snakefile=self.snakefile,
lineno=self.lineno)
else:
- if contains_wildcard_constraints(item):
+ rule = None
+ if contains_wildcard_constraints(item) and self.workflow.mode != Mode.subprocess:
logger.warning(
"wildcard constraints in inputs are ignored")
- _item = IOFile(item, rule=self)
+ # record rule if this is an output file output
+ _item = IOFile(item, rule=rule)
if is_flagged(item, "temp"):
if output:
self.temp_output.add(_item)
@@ -528,6 +558,21 @@ class Rule:
resources = Resources(fromdict=resources)
return resources
+ def expand_conda_env(self, wildcards):
+ try:
+ conda_env = self.conda_env.apply_wildcards(
+ wildcards) if self.conda_env else None
+ except WildcardError as e:
+ raise WorkflowError(
+ "Wildcards in conda environment file cannot be "
+ "determined from output files:",
+ str(e), rule=self)
+
+ if conda_env is not None:
+ conda_env.check()
+
+ return conda_env
+
def is_producer(self, requested_output):
"""
Returns True if this rule is a producer of the requested output.
diff --git a/snakemake/scheduler.py b/snakemake/scheduler.py
index 246b605..0b29ed7 100644
--- a/snakemake/scheduler.py
+++ b/snakemake/scheduler.py
@@ -5,7 +5,6 @@ __license__ = "MIT"
import os, signal
import threading
-import multiprocessing
import operator
from functools import partial
from collections import defaultdict
@@ -35,7 +34,6 @@ class JobScheduler:
cluster_sync=None,
drmaa=None,
jobname=None,
- immediate_submit=False,
quiet=False,
printreason=False,
printshellcmds=False,
@@ -61,15 +59,9 @@ class JobScheduler:
self.resources = dict(self.workflow.global_resources)
- # we should use threads on a cluster, because shared memory /dev/shm may be full
- # which prevents the multiprocessing.Lock() semaphore from being created
use_threads = force_use_threads or (os.name != "posix") or cluster or cluster_sync or drmaa
- if not use_threads:
- self._open_jobs = multiprocessing.Event()
- self._lock = multiprocessing.Lock()
- else:
- self._open_jobs = threading.Event()
- self._lock = threading.Lock()
+ self._open_jobs = threading.Event()
+ self._lock = threading.Lock()
self._errors = False
self._finished = False
@@ -100,7 +92,7 @@ class JobScheduler:
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
- threads=use_threads,
+ use_threads=use_threads,
latency_wait=latency_wait,
benchmark_repeats=benchmark_repeats)
self.run = self.run_cluster_or_local
@@ -118,7 +110,7 @@ class JobScheduler:
latency_wait=latency_wait,
benchmark_repeats=benchmark_repeats,
max_jobs_per_second=max_jobs_per_second)
- if immediate_submit:
+ if workflow.immediate_submit:
self.job_reward = self.dryrun_job_reward
self._submit_callback = partial(self._proceed,
update_dynamic=False,
@@ -146,7 +138,7 @@ class JobScheduler:
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
- threads=use_threads,
+ use_threads=use_threads,
latency_wait=latency_wait,
benchmark_repeats=benchmark_repeats, )
self._open_jobs.set()
diff --git a/snakemake/script.py b/snakemake/script.py
index 3ff3d05..e4d2974 100644
--- a/snakemake/script.py
+++ b/snakemake/script.py
@@ -5,14 +5,28 @@ __license__ = "MIT"
import inspect
import os
+import tempfile
+import textwrap
+import sys
+import pickle
import traceback
+import subprocess
import collections
+import re
from urllib.request import urlopen
from urllib.error import URLError
from snakemake.utils import format
from snakemake.logging import logger
from snakemake.exceptions import WorkflowError
+from snakemake.shell import shell
+from snakemake.version import MIN_PY_VERSION
+
+
+PY_VER_RE = re.compile("Python (?P<ver_min>\d+\.\d+).*:")
+# TODO use this to find the right place for inserting the preamble
+PY_PREAMBLE_RE = re.compile(r"from( )+__future__( )+import.*?(?P<end>[;\n])")
+
class REncoder:
"""Encoding Pyton data structures into R."""
@@ -21,15 +35,15 @@ class REncoder:
def encode_value(cls, value):
if isinstance(value, str):
return repr(value)
- if isinstance(value, collections.Iterable):
- # convert all iterables to vectors
- return cls.encode_list(value)
elif isinstance(value, dict):
return cls.encode_dict(value)
elif isinstance(value, bool):
return "TRUE" if value else "FALSE"
elif isinstance(value, int) or isinstance(value, float):
return str(value)
+ elif isinstance(value, collections.Iterable):
+ # convert all iterables to vectors
+ return cls.encode_list(value)
else:
# Try to convert from numpy if numpy is present
try:
@@ -38,8 +52,8 @@ class REncoder:
return str(value)
except ImportError:
pass
- raise ValueError(
- "Unsupported value for conversion into R: {}".format(value))
+ raise ValueError(
+ "Unsupported value for conversion into R: {}".format(value))
@classmethod
def encode_list(cls, l):
@@ -60,10 +74,10 @@ class REncoder:
@classmethod
def encode_namedlist(cls, namedlist):
- positional = cls.encode_list(namedlist)
+ positional = ", ".join(map(cls.encode_value, namedlist))
named = cls.encode_items(namedlist.items())
source = "list("
- if positional != "c()":
+ if positional:
source += positional
if named:
source += ", " + named
@@ -133,8 +147,8 @@ class Snakemake:
return lookup[(stdout, stderr, append)].format(self.log)
-def script(basedir, path, input, output, params, wildcards, threads, resources,
- log, config):
+def script(path, basedir, input, output, params, wildcards, threads, resources,
+ log, config, conda_env):
"""
Load a script from the given basedir + path and execute it.
Supports Python 3 and R.
@@ -150,59 +164,89 @@ def script(basedir, path, input, output, params, wildcards, threads, resources,
try:
with urlopen(path) as source:
if path.endswith(".py"):
- try:
- exec(compile(source.read().decode(), path, "exec"), {
- "snakemake": Snakemake(input, output, params, wildcards,
- threads, resources, log, config)
- })
- except (Exception, BaseException) as ex:
- raise WorkflowError("".join(traceback.format_exception(type(ex), ex, ex.__traceback__)))
+ snakemake = Snakemake(input, output, params, wildcards,
+ threads, resources, log, config)
+ snakemake = pickle.dumps(snakemake)
+ # obtain search path for current snakemake module
+ # the module is needed for unpickling in the script
+ searchpath = os.path.dirname(os.path.dirname(__file__))
+ preamble = textwrap.dedent("""
+ ######## Snakemake header ########
+ import sys; sys.path.insert(0, "{}"); import pickle; snakemake = pickle.loads({})
+ ######## Original script #########
+ """).format(searchpath, snakemake)
elif path.endswith(".R"):
- try:
- import rpy2.robjects as robjects
- except ImportError:
- raise ValueError(
- "Python 3 package rpy2 needs to be installed to use the R function.")
- with urlopen(path) as source:
- preamble = """
- Snakemake <- setClass(
- "Snakemake",
- slots = c(
- input = "list",
- output = "list",
- params = "list",
- wildcards = "list",
- threads = "numeric",
- log = "list",
- resources = "list",
- config = "list"
- )
- )
- snakemake <- Snakemake(
- input = {},
- output = {},
- params = {},
- wildcards = {},
- threads = {},
- log = {},
- resources = {},
- config = {}
+ preamble = textwrap.dedent("""
+ ######## Snakemake header ########
+ library(methods)
+ Snakemake <- setClass(
+ "Snakemake",
+ slots = c(
+ input = "list",
+ output = "list",
+ params = "list",
+ wildcards = "list",
+ threads = "numeric",
+ log = "list",
+ resources = "list",
+ config = "list"
)
- """.format(REncoder.encode_namedlist(input),
- REncoder.encode_namedlist(output),
- REncoder.encode_namedlist(params),
- REncoder.encode_namedlist(wildcards), threads,
- REncoder.encode_namedlist(log),
- REncoder.encode_namedlist({
- name: value
- for name, value in resources.items()
- if name != "_cores" and name != "_nodes"
- }), REncoder.encode_dict(config))
- logger.debug(preamble)
- source = preamble + source.read().decode()
- robjects.r(source)
+ )
+ snakemake <- Snakemake(
+ input = {},
+ output = {},
+ params = {},
+ wildcards = {},
+ threads = {},
+ log = {},
+ resources = {},
+ config = {}
+ )
+ ######## Original script #########
+ """).format(REncoder.encode_namedlist(input),
+ REncoder.encode_namedlist(output),
+ REncoder.encode_namedlist(params),
+ REncoder.encode_namedlist(wildcards), threads,
+ REncoder.encode_namedlist(log),
+ REncoder.encode_namedlist({
+ name: value
+ for name, value in resources.items()
+ if name != "_cores" and name != "_nodes"
+ }), REncoder.encode_dict(config))
else:
raise ValueError(
"Unsupported script: Expecting either Python (.py) or R (.R) script.")
+
+ dir = ".snakemake/scripts"
+ os.makedirs(dir, exist_ok=True)
+ with tempfile.NamedTemporaryFile(
+ suffix="." + os.path.basename(path),
+ prefix="",
+ dir=dir,
+ delete=False) as f:
+ f.write(preamble.encode())
+ f.write(source.read())
+ if path.endswith(".py"):
+ py_exec = sys.executable
+ if conda_env is not None:
+ py = os.path.join(conda_env, "bin", "python")
+ if os.path.exists(py):
+ out = subprocess.check_output([py, "--version"])
+ ver = tuple(map(int, PY_VER_RE.match(out).group("ver_min").split(".")))
+ if ver >= MIN_PY_VERSION:
+ # Python version is new enough, make use of environment
+ # to execute script
+ py_exec = "python"
+ else:
+ logger.info("Conda environment defines Python "
+ "version < {}.{}. Using Python of the "
+ "master process to execute "
+ "script.".format(MIN_PY_VERSION))
+ # use the same Python as the running process or the one from the environment
+ shell("{py_exec} {f.name}")
+ elif path.endswith(".R"):
+ shell("Rscript {f.name}")
+ os.remove(f.name)
+
except URLError as e:
raise WorkflowError(e)
diff --git a/snakemake/shell.py b/snakemake/shell.py
index a7f569d..7ca00b7 100644
--- a/snakemake/shell.py
+++ b/snakemake/shell.py
@@ -7,6 +7,7 @@ import _io
import sys
import os
import subprocess as sp
+import inspect
from snakemake.utils import format
from snakemake.logging import logger
@@ -46,6 +47,7 @@ class shell:
if "stepout" in kwargs:
raise KeyError("Argument stepout is not allowed in shell command.")
cmd = format(cmd, *args, stepout=2, **kwargs)
+ context = inspect.currentframe().f_back.f_locals
logger.shellcmd(cmd)
@@ -53,7 +55,11 @@ class shell:
close_fds = sys.platform != 'win32'
- proc = sp.Popen("{} {} {}".format(
+ conda_env = context.get("conda_env", None)
+ env_prefix = "" if conda_env is None else "source activate {};".format(conda_env)
+
+ proc = sp.Popen("{} {} {} {}".format(
+ env_prefix,
cls._process_prefix,
cmd.rstrip(),
cls._process_suffix),
diff --git a/snakemake/version.py b/snakemake/version.py
index 2ae7a96..c490c1b 100644
--- a/snakemake/version.py
+++ b/snakemake/version.py
@@ -1 +1,3 @@
-__version__ = "3.8.2"
+__version__ = "3.9.0"
+
+MIN_PY_VERSION = (3, 3)
diff --git a/snakemake/workflow.py b/snakemake/workflow.py
index 8d78ceb..92c6962 100644
--- a/snakemake/workflow.py
+++ b/snakemake/workflow.py
@@ -23,11 +23,13 @@ from snakemake.dag import DAG
from snakemake.scheduler import JobScheduler
from snakemake.parser import parse
import snakemake.io
-from snakemake.io import protected, temp, temporary, expand, dynamic, glob_wildcards, flag, not_iterable, touch
+from snakemake.io import protected, temp, temporary, ancient, expand, dynamic, glob_wildcards, flag, not_iterable, touch
from snakemake.persistence import Persistence
from snakemake.utils import update_config
from snakemake.script import script
from snakemake.wrapper import wrapper
+import snakemake.wrapper
+from snakemake.common import Mode
class Workflow:
def __init__(self,
@@ -40,7 +42,10 @@ class Workflow:
overwrite_configfile=None,
overwrite_clusterconfig=dict(),
config_args=None,
- debug=False):
+ debug=False,
+ use_conda=False,
+ mode=Mode.default,
+ wrapper_prefix=None):
"""
Create the controller.
"""
@@ -68,12 +73,16 @@ class Workflow:
self.overwrite_configfile = overwrite_configfile
self.overwrite_clusterconfig = overwrite_clusterconfig
self.config_args = config_args
+ self.immediate_submit = None
self._onsuccess = lambda log: None
self._onerror = lambda log: None
self._onstart = lambda log: None
self._wildcard_constraints = dict()
self.debug = debug
self._rulecount = 0
+ self.use_conda = use_conda
+ self.mode = mode
+ self.wrapper_prefix = wrapper_prefix
global config
config = dict()
@@ -222,6 +231,7 @@ class Workflow:
self.global_resources = dict() if resources is None else resources
self.global_resources["_cores"] = cores
self.global_resources["_nodes"] = nodes
+ self.immediate_submit = immediate_submit
def rules(items):
return map(self._rules.__getitem__, filter(self.is_rule, items))
@@ -429,7 +439,6 @@ class Workflow:
cluster_sync=cluster_sync,
jobname=jobname,
max_jobs_per_second=max_jobs_per_second,
- immediate_submit=immediate_submit,
quiet=quiet,
keepgoing=keepgoing,
drmaa=drmaa,
@@ -618,10 +627,16 @@ class Workflow:
rule.message = ruleinfo.message
if ruleinfo.benchmark:
rule.benchmark = ruleinfo.benchmark
+ if ruleinfo.wrapper:
+ rule.conda_env = snakemake.wrapper.get_conda_env(ruleinfo.wrapper)
+ if ruleinfo.conda_env:
+ rule.conda_env = ruleinfo.conda_env
rule.norun = ruleinfo.norun
rule.docstring = ruleinfo.docstring
rule.run_func = ruleinfo.func
rule.shellcmd = ruleinfo.shellcmd
+ rule.script = ruleinfo.script
+ rule.wrapper = ruleinfo.wrapper
ruleinfo.func.__name__ = "__{}".format(name)
self.globals[ruleinfo.func.__name__] = ruleinfo.func
setattr(rules, name, rule)
@@ -678,6 +693,13 @@ class Workflow:
return decorate
+ def conda(self, conda_env):
+ def decorate(ruleinfo):
+ ruleinfo.conda_env = conda_env
+ return ruleinfo
+
+ return decorate
+
def threads(self, threads):
def decorate(ruleinfo):
ruleinfo.threads = threads
@@ -727,6 +749,20 @@ class Workflow:
return decorate
+ def script(self, script):
+ def decorate(ruleinfo):
+ ruleinfo.script = script
+ return ruleinfo
+
+ return decorate
+
+ def wrapper(self, wrapper):
+ def decorate(ruleinfo):
+ ruleinfo.wrapper = wrapper
+ return ruleinfo
+
+ return decorate
+
def norun(self):
def decorate(ruleinfo):
ruleinfo.norun = True
@@ -752,6 +788,7 @@ class RuleInfo:
self.params = None
self.message = None
self.benchmark = None
+ self.conda_env = None
self.wildcard_constraints = None
self.threads = None
self.shadow_depth = None
@@ -760,6 +797,8 @@ class RuleInfo:
self.version = None
self.log = None
self.docstring = None
+ self.script = None
+ self.wrapper = None
class Subworkflow:
diff --git a/snakemake/wrapper.py b/snakemake/wrapper.py
index e6cc2ec..f3aeb39 100644
--- a/snakemake/wrapper.py
+++ b/snakemake/wrapper.py
@@ -5,18 +5,42 @@ __license__ = "MIT"
import os
+import posixpath
from snakemake.script import script
-def wrapper(path, input, output, params, wildcards, threads, resources, log, config):
+def is_script(path):
+ return path.endswith("wrapper.py") or path.endswith("wrapper.R")
+
+
+def get_path(path, prefix=None):
+ if not (path.startswith("http") or path.startswith("file:")):
+ if prefix is None:
+ prefix = "https://bitbucket.org/snakemake/snakemake-wrappers/raw/"
+ path = prefix + path
+ return path
+
+
+def get_script(path, prefix=None):
+ path = get_path(path)
+ if not is_script(path):
+ path += "/wrapper.py"
+ return path
+
+
+def get_conda_env(path):
+ path = get_path(path)
+ if is_script(path):
+ # URLs and posixpaths share the same separator. Hence use posixpath here.
+ path = posixpath.dirname(path)
+ return path + "/environment.yaml"
+
+
+def wrapper(path, input, output, params, wildcards, threads, resources, log, config, conda_env, prefix):
"""
Load a wrapper from https://bitbucket.org/snakemake/snakemake-wrappers under
the given path + wrapper.py and execute it.
"""
- # TODO handle requirements.txt
- if not (path.startswith("http") or path.startswith("file:")):
- path = os.path.join("https://bitbucket.org/snakemake/snakemake-wrappers/raw", path)
- if not (path.endswith("wrapper.py") or path.endswith("wrapper.R")):
- path = os.path.join(path, "wrapper.py")
- script("", path, input, output, params, wildcards, threads, resources, log, config)
+ path = get_script(path, prefix=prefix)
+ script(path, "", input, output, params, wildcards, threads, resources, log, config, conda_env)
diff --git a/tests/test01/Snakefile b/tests/test01/Snakefile
index e9defea..6264373 100644
--- a/tests/test01/Snakefile
+++ b/tests/test01/Snakefile
@@ -50,7 +50,7 @@ rule rule1:
rule rule2:
input: testin
output: 'test.inter'
-# message: 'Copying {input[0]} to {output[0]}'
+ message: 'Copying {input[0]} to {output[0]}'
shell:
'''
cp {input[0]} {output[0]}
diff --git a/tests/test05/Snakefile b/tests/test05/Snakefile
index 33d8a4c..1abbd7e 100644
--- a/tests/test05/Snakefile
+++ b/tests/test05/Snakefile
@@ -1,3 +1,7 @@
+from snakemake import shell
+
+shell.executable("bash")
+
chromosomes = [1,2,3]
#shell('rm test.*.inter 2> /dev/null | true')
diff --git a/tests/test09/Snakefile b/tests/test09/Snakefile
index acacf6a..6f528f5 100644
--- a/tests/test09/Snakefile
+++ b/tests/test09/Snakefile
@@ -1,3 +1,6 @@
+from snakemake import shell
+
+shell.executable("bash")
def fail(input, output):
shell("false && cp {input} {output}")
diff --git a/tests/test13/Snakefile b/tests/test13/Snakefile
index f878f6d..9bc6577 100644
--- a/tests/test13/Snakefile
+++ b/tests/test13/Snakefile
@@ -1,3 +1,7 @@
+from snakemake import shell
+
+shell.executable("bash")
+
rule all:
input: 'test.algo1-p7-improved.out'
diff --git a/tests/test14/Snakefile.nonstandard b/tests/test14/Snakefile.nonstandard
index 0eb2605..7ff2cb0 100644
--- a/tests/test14/Snakefile.nonstandard
+++ b/tests/test14/Snakefile.nonstandard
@@ -1,3 +1,7 @@
+from snakemake import shell
+
+shell.executable("bash")
+
chromosomes = [1,2,3,4,5]
diff --git a/tests/test_ancient/Snakefile b/tests/test_ancient/Snakefile
new file mode 100644
index 0000000..404ce76
--- /dev/null
+++ b/tests/test_ancient/Snakefile
@@ -0,0 +1,28 @@
+shell("touch C;sleep 1;touch B;sleep 1;touch A;touch D")
+
+#Will not be executed even though A is newer
+rule a:
+ input:
+ ancient("A")
+ output:
+ "B"
+ shell:
+ "echo \"B recreated\" > {output}"
+
+#Will be executed because B is newer
+rule b:
+ input:
+ "B"
+ output:
+ "C"
+ shell:
+ "echo \"C recreated\" > {output}"
+
+#Will be executed because C was updated in rule b
+rule c:
+ input:
+ ancient("C")
+ output:
+ "D"
+ shell:
+ "echo \"D recreated\" > {output}"
diff --git a/tests/test_ancient/expected-results/A b/tests/test_ancient/expected-results/A
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_ancient/expected-results/B b/tests/test_ancient/expected-results/B
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_ancient/expected-results/C b/tests/test_ancient/expected-results/C
new file mode 100644
index 0000000..ed167ef
--- /dev/null
+++ b/tests/test_ancient/expected-results/C
@@ -0,0 +1 @@
+C recreated
diff --git a/tests/test_ancient/expected-results/D b/tests/test_ancient/expected-results/D
new file mode 100644
index 0000000..b686dc9
--- /dev/null
+++ b/tests/test_ancient/expected-results/D
@@ -0,0 +1 @@
+D recreated
diff --git a/tests/test_conda/Snakefile b/tests/test_conda/Snakefile
new file mode 100644
index 0000000..bd72012
--- /dev/null
+++ b/tests/test_conda/Snakefile
@@ -0,0 +1,11 @@
+rule all:
+ input:
+ expand("test{i}.out", i=range(3))
+
+rule a:
+ output:
+ "test{i}.out"
+ conda:
+ "test-env.yaml"
+ shell:
+ "snakemake --help > {output}"
diff --git a/tests/test_conda/expected-results/test0.out b/tests/test_conda/expected-results/test0.out
new file mode 100644
index 0000000..c47a427
--- /dev/null
+++ b/tests/test_conda/expected-results/test0.out
@@ -0,0 +1,279 @@
+usage: snakemake [-h] [--snakefile FILE] [--gui [PORT]] [--cores [N]]
+ [--local-cores N] [--resources [NAME=INT [NAME=INT ...]]]
+ [--config [KEY=VALUE [KEY=VALUE ...]]] [--configfile FILE]
+ [--list] [--list-target-rules] [--directory DIR] [--dryrun]
+ [--printshellcmds] [--dag] [--force-use-threads]
+ [--rulegraph] [--d3dag] [--summary] [--detailed-summary]
+ [--touch] [--keep-going] [--force] [--forceall]
+ [--forcerun [TARGET [TARGET ...]]]
+ [--prioritize TARGET [TARGET ...]]
+ [--until TARGET [TARGET ...]]
+ [--omit-from TARGET [TARGET ...]] [--allow-ambiguity]
+ [--cluster CMD | --cluster-sync CMD | --drmaa [ARGS]]
+ [--cluster-config FILE] [--immediate-submit]
+ [--jobscript SCRIPT] [--jobname NAME] [--reason]
+ [--stats FILE] [--nocolor] [--quiet] [--nolock] [--unlock]
+ [--cleanup-metadata FILE [FILE ...]] [--rerun-incomplete]
+ [--ignore-incomplete] [--list-version-changes]
+ [--list-code-changes] [--list-input-changes]
+ [--list-params-changes] [--latency-wait SECONDS]
+ [--wait-for-files [FILE [FILE ...]]] [--benchmark-repeats N]
+ [--notemp] [--keep-remote] [--keep-target-files]
+ [--keep-shadow]
+ [--allowed-rules ALLOWED_RULES [ALLOWED_RULES ...]]
+ [--max-jobs-per-second MAX_JOBS_PER_SECOND] [--timestamp]
+ [--greediness GREEDINESS] [--no-hooks] [--print-compilation]
+ [--overwrite-shellcmd OVERWRITE_SHELLCMD] [--verbose]
+ [--debug] [--profile FILE] [--bash-completion] [--version]
+ [target [target ...]]
+
+Snakemake is a Python based language and execution environment for GNU Make-
+like workflows.
+
+positional arguments:
+ target Targets to build. May be rules or files.
+
+optional arguments:
+ -h, --help show this help message and exit
+ --snakefile FILE, -s FILE
+ The workflow definition in a snakefile.
+ --gui [PORT] Serve an HTML based user interface to the given port
+ (default: 8000). If possible, a browser window is
+ opened.
+ --cores [N], --jobs [N], -j [N]
+ Use at most N cores in parallel (default: 1). If N is
+ omitted, the limit is set to the number of available
+ cores.
+ --local-cores N In cluster mode, use at most N cores of the host
+ machine in parallel (default: number of CPU cores of
+ the host). The cores are used to execute local rules.
+ This option is ignored when not in cluster mode.
+ --resources [NAME=INT [NAME=INT ...]], --res [NAME=INT [NAME=INT ...]]
+ Define additional resources that shall constrain the
+ scheduling analogously to threads (see above). A
+ resource is defined as a name and an integer value.
+ E.g. --resources gpu=1. Rules can use resources by
+ defining the resource keyword, e.g. resources: gpu=1.
+ If now two rules require 1 of the resource 'gpu' they
+ won't be run in parallel by the scheduler.
+ --config [KEY=VALUE [KEY=VALUE ...]], -C [KEY=VALUE [KEY=VALUE ...]]
+ Set or overwrite values in the workflow config object.
+ The workflow config object is accessible as variable
+ config inside the workflow. Default values can be set
+ by providing a JSON file (see Documentation).
+ --configfile FILE Specify or overwrite the config file of the workflow
+ (see the docs). Values specified in JSON or YAML
+ format are available in the global config dictionary
+ inside the workflow.
+ --list, -l Show availiable rules in given Snakefile.
+ --list-target-rules, --lt
+ Show available target rules in given Snakefile.
+ --directory DIR, -d DIR
+ Specify working directory (relative paths in the
+ snakefile will use this as their origin).
+ --dryrun, -n Do not execute anything.
+ --printshellcmds, -p Print out the shell commands that will be executed.
+ --dag Do not execute anything and print the directed acyclic
+ graph of jobs in the dot language. Recommended use on
+ Unix systems: snakemake --dag | dot | display
+ --force-use-threads Force threads rather than processes. Helpful if shared
+ memory (/dev/shm) is full or unavailable.
+ --rulegraph Do not execute anything and print the dependency graph
+ of rules in the dot language. This will be less
+ crowded than above DAG of jobs, but also show less
+ information. Note that each rule is displayed once,
+ hence the displayed graph will be cyclic if a rule
+ appears in several steps of the workflow. Use this if
+ above option leads to a DAG that is too large.
+ Recommended use on Unix systems: snakemake --rulegraph
+ | dot | display
+ --d3dag Print the DAG in D3.js compatible JSON format.
+ --summary, -S Print a summary of all files created by the workflow.
+ The has the following columns: filename, modification
+ time, rule version, status, plan. Thereby rule version
+ contains the versionthe file was created with (see the
+ version keyword of rules), and status denotes whether
+ the file is missing, its input files are newer or if
+ version or implementation of the rule changed since
+ file creation. Finally the last column denotes whether
+ the file will be updated or created during the next
+ workflow execution.
+ --detailed-summary, -D
+ Print a summary of all files created by the workflow.
+ The has the following columns: filename, modification
+ time, rule version, input file(s), shell command,
+ status, plan. Thereby rule version contains the
+ versionthe file was created with (see the version
+ keyword of rules), and status denotes whether the file
+ is missing, its input files are newer or if version or
+ implementation of the rule changed since file
+ creation. The input file and shell command columns are
+ selfexplanatory. Finally the last column denotes
+ whether the file will be updated or created during the
+ next workflow execution.
+ --touch, -t Touch output files (mark them up to date without
+ really changing them) instead of running their
+ commands. This is used to pretend that the rules were
+ executed, in order to fool future invocations of
+ snakemake. Fails if a file does not yet exist.
+ --keep-going, -k Go on with independent jobs if a job fails.
+ --force, -f Force the execution of the selected target or the
+ first rule regardless of already created output.
+ --forceall, -F Force the execution of the selected (or the first)
+ rule and all rules it is dependent on regardless of
+ already created output.
+ --forcerun [TARGET [TARGET ...]], -R [TARGET [TARGET ...]]
+ Force the re-execution or creation of the given rules
+ or files. Use this option if you changed a rule and
+ want to have all its output in your workflow updated.
+ --prioritize TARGET [TARGET ...], -P TARGET [TARGET ...]
+ Tell the scheduler to assign creation of given targets
+ (and all their dependencies) highest priority.
+ (EXPERIMENTAL)
+ --until TARGET [TARGET ...], -U TARGET [TARGET ...]
+ Runs the pipeline until it reaches the specified rules
+ or files. Only runs jobs that are dependencies of the
+ specified rule or files, does not run sibling DAGs.
+ --omit-from TARGET [TARGET ...], -O TARGET [TARGET ...]
+ Prevent the execution or creation of the given rules
+ or files as well as any rules or files that are
+ downstream of these targets in the DAG. Also runs jobs
+ in sibling DAGs that are independent of the rules or
+ files specified here.
+ --allow-ambiguity, -a
+ Don't check for ambiguous rules and simply use the
+ first if several can produce the same file. This
+ allows the user to prioritize rules by their order in
+ the snakefile.
+ --cluster CMD, -c CMD
+ Execute snakemake rules with the given submit command,
+ e.g. qsub. Snakemake compiles jobs into scripts that
+ are submitted to the cluster with the given command,
+ once all input files for a particular job are present.
+ The submit command can be decorated to make it aware
+ of certain job properties (input, output, params,
+ wildcards, log, threads and dependencies (see the
+ argument below)), e.g.: $ snakemake --cluster 'qsub
+ -pe threaded {threads}'.
+ --cluster-sync CMD cluster submission command will block, returning the
+ remote exitstatus upon remote termination (for
+ example, this should be usedif the cluster command is
+ 'qsub -sync y' (SGE)
+ --drmaa [ARGS] Execute snakemake on a cluster accessed via DRMAA,
+ Snakemake compiles jobs into scripts that are
+ submitted to the cluster with the given command, once
+ all input files for a particular job are present. ARGS
+ can be used to specify options of the underlying
+ cluster system, thereby using the job properties
+ input, output, params, wildcards, log, threads and
+ dependencies, e.g.: --drmaa ' -pe threaded {threads}'.
+ Note that ARGS must be given in quotes and with a
+ leading whitespace.
+ --cluster-config FILE, -u FILE
+ A JSON or YAML file that defines the wildcards used in
+ 'cluster'for specific rules, instead of having them
+ specified in the Snakefile. For example, for rule
+ 'job' you may define: { 'job' : { 'time' : '24:00:00'
+ } } to specify the time for rule 'job'. You can
+ specify more than one file. The configuration files
+ are merged with later values overriding earlier ones.
+ --immediate-submit, --is
+ Immediately submit all jobs to the cluster instead of
+ waiting for present input files. This will fail,
+ unless you make the cluster aware of job dependencies,
+ e.g. via: $ snakemake --cluster 'sbatch --dependency
+ {dependencies}. Assuming that your submit script (here
+ sbatch) outputs the generated job id to the first
+ stdout line, {dependencies} will be filled with space
+ separated job ids this job depends on.
+ --jobscript SCRIPT, --js SCRIPT
+ Provide a custom job script for submission to the
+ cluster. The default script resides as 'jobscript.sh'
+ in the installation directory.
+ --jobname NAME, --jn NAME
+ Provide a custom name for the jobscript that is
+ submitted to the cluster (see --cluster). NAME is
+ "snakejob.{rulename}.{jobid}.sh" per default. The
+ wildcard {jobid} has to be present in the name.
+ --reason, -r Print the reason for each executed rule.
+ --stats FILE Write stats about Snakefile execution in JSON format
+ to the given file.
+ --nocolor Do not use a colored output.
+ --quiet, -q Do not output any progress or rule information.
+ --nolock Do not lock the working directory
+ --unlock Remove a lock on the working directory.
+ --cleanup-metadata FILE [FILE ...], --cm FILE [FILE ...]
+ Cleanup the metadata of given files. That means that
+ snakemake removes any tracked version info, and any
+ marks that files are incomplete.
+ --rerun-incomplete, --ri
+ Re-run all jobs the output of which is recognized as
+ incomplete.
+ --ignore-incomplete, --ii
+ Do not check for incomplete output files.
+ --list-version-changes, --lv
+ List all output files that have been created with a
+ different version (as determined by the version
+ keyword).
+ --list-code-changes, --lc
+ List all output files for which the rule body (run or
+ shell) have changed in the Snakefile.
+ --list-input-changes, --li
+ List all output files for which the defined input
+ files have changed in the Snakefile (e.g. new input
+ files were added in the rule definition or files were
+ renamed). For listing input file modification in the
+ filesystem, use --summary.
+ --list-params-changes, --lp
+ List all output files for which the defined params
+ have changed in the Snakefile.
+ --latency-wait SECONDS, --output-wait SECONDS, -w SECONDS
+ Wait given seconds if an output file of a job is not
+ present after the job finished. This helps if your
+ filesystem suffers from latency (default 5).
+ --wait-for-files [FILE [FILE ...]]
+ Wait --latency-wait seconds for these files to be
+ present before executing the workflow. This option is
+ used internally to handle filesystem latency in
+ cluster environments.
+ --benchmark-repeats N
+ Repeat a job N times if marked for benchmarking
+ (default 1).
+ --notemp, --nt Ignore temp() declarations. This is useful when
+ running only a part of the workflow, since temp()
+ would lead to deletion of probably needed files by
+ other parts of the workflow.
+ --keep-remote Keep local copies of remote input files.
+ --keep-target-files Do not adjust the paths of given target files relative
+ to the working directory.
+ --keep-shadow Do not delete the shadow directory on snakemake
+ startup.
+ --allowed-rules ALLOWED_RULES [ALLOWED_RULES ...]
+ Only use given rules. If omitted, all rules in
+ Snakefile are used.
+ --max-jobs-per-second MAX_JOBS_PER_SECOND
+ Maximal number of cluster/drmaa jobs per second,
+ default is no limit
+ --timestamp, -T Add a timestamp to all logging output
+ --greediness GREEDINESS
+ Set the greediness of scheduling. This value between 0
+ and 1 determines how careful jobs are selected for
+ execution. The default value (1.0) provides the best
+ speed and still acceptable scheduling quality.
+ --no-hooks Do not invoke onstart, onsuccess or onerror hooks
+ after execution.
+ --print-compilation Print the python representation of the workflow.
+ --overwrite-shellcmd OVERWRITE_SHELLCMD
+ Provide a shell command that shall be executed instead
+ of those given in the workflow. This is for debugging
+ purposes only.
+ --verbose Print debugging output.
+ --debug Allow to debug rules with e.g. PDB. This flag allows
+ to set breakpoints in run blocks.
+ --profile FILE Profile Snakemake and write the output to FILE. This
+ requires yappi to be installed.
+ --bash-completion Output code to register bash completion for snakemake.
+ Put the following in your .bashrc (including the
+ accents): `snakemake --bash-completion` or issue it in
+ an open terminal session.
+ --version, -v show program's version number and exit
diff --git a/tests/test_conda/expected-results/test1.out b/tests/test_conda/expected-results/test1.out
new file mode 100644
index 0000000..c47a427
--- /dev/null
+++ b/tests/test_conda/expected-results/test1.out
@@ -0,0 +1,279 @@
+usage: snakemake [-h] [--snakefile FILE] [--gui [PORT]] [--cores [N]]
+ [--local-cores N] [--resources [NAME=INT [NAME=INT ...]]]
+ [--config [KEY=VALUE [KEY=VALUE ...]]] [--configfile FILE]
+ [--list] [--list-target-rules] [--directory DIR] [--dryrun]
+ [--printshellcmds] [--dag] [--force-use-threads]
+ [--rulegraph] [--d3dag] [--summary] [--detailed-summary]
+ [--touch] [--keep-going] [--force] [--forceall]
+ [--forcerun [TARGET [TARGET ...]]]
+ [--prioritize TARGET [TARGET ...]]
+ [--until TARGET [TARGET ...]]
+ [--omit-from TARGET [TARGET ...]] [--allow-ambiguity]
+ [--cluster CMD | --cluster-sync CMD | --drmaa [ARGS]]
+ [--cluster-config FILE] [--immediate-submit]
+ [--jobscript SCRIPT] [--jobname NAME] [--reason]
+ [--stats FILE] [--nocolor] [--quiet] [--nolock] [--unlock]
+ [--cleanup-metadata FILE [FILE ...]] [--rerun-incomplete]
+ [--ignore-incomplete] [--list-version-changes]
+ [--list-code-changes] [--list-input-changes]
+ [--list-params-changes] [--latency-wait SECONDS]
+ [--wait-for-files [FILE [FILE ...]]] [--benchmark-repeats N]
+ [--notemp] [--keep-remote] [--keep-target-files]
+ [--keep-shadow]
+ [--allowed-rules ALLOWED_RULES [ALLOWED_RULES ...]]
+ [--max-jobs-per-second MAX_JOBS_PER_SECOND] [--timestamp]
+ [--greediness GREEDINESS] [--no-hooks] [--print-compilation]
+ [--overwrite-shellcmd OVERWRITE_SHELLCMD] [--verbose]
+ [--debug] [--profile FILE] [--bash-completion] [--version]
+ [target [target ...]]
+
+Snakemake is a Python based language and execution environment for GNU Make-
+like workflows.
+
+positional arguments:
+ target Targets to build. May be rules or files.
+
+optional arguments:
+ -h, --help show this help message and exit
+ --snakefile FILE, -s FILE
+ The workflow definition in a snakefile.
+ --gui [PORT] Serve an HTML based user interface to the given port
+ (default: 8000). If possible, a browser window is
+ opened.
+ --cores [N], --jobs [N], -j [N]
+ Use at most N cores in parallel (default: 1). If N is
+ omitted, the limit is set to the number of available
+ cores.
+ --local-cores N In cluster mode, use at most N cores of the host
+ machine in parallel (default: number of CPU cores of
+ the host). The cores are used to execute local rules.
+ This option is ignored when not in cluster mode.
+ --resources [NAME=INT [NAME=INT ...]], --res [NAME=INT [NAME=INT ...]]
+ Define additional resources that shall constrain the
+ scheduling analogously to threads (see above). A
+ resource is defined as a name and an integer value.
+ E.g. --resources gpu=1. Rules can use resources by
+ defining the resource keyword, e.g. resources: gpu=1.
+ If now two rules require 1 of the resource 'gpu' they
+ won't be run in parallel by the scheduler.
+ --config [KEY=VALUE [KEY=VALUE ...]], -C [KEY=VALUE [KEY=VALUE ...]]
+ Set or overwrite values in the workflow config object.
+ The workflow config object is accessible as variable
+ config inside the workflow. Default values can be set
+ by providing a JSON file (see Documentation).
+ --configfile FILE Specify or overwrite the config file of the workflow
+ (see the docs). Values specified in JSON or YAML
+ format are available in the global config dictionary
+ inside the workflow.
+ --list, -l Show availiable rules in given Snakefile.
+ --list-target-rules, --lt
+ Show available target rules in given Snakefile.
+ --directory DIR, -d DIR
+ Specify working directory (relative paths in the
+ snakefile will use this as their origin).
+ --dryrun, -n Do not execute anything.
+ --printshellcmds, -p Print out the shell commands that will be executed.
+ --dag Do not execute anything and print the directed acyclic
+ graph of jobs in the dot language. Recommended use on
+ Unix systems: snakemake --dag | dot | display
+ --force-use-threads Force threads rather than processes. Helpful if shared
+ memory (/dev/shm) is full or unavailable.
+ --rulegraph Do not execute anything and print the dependency graph
+ of rules in the dot language. This will be less
+ crowded than above DAG of jobs, but also show less
+ information. Note that each rule is displayed once,
+ hence the displayed graph will be cyclic if a rule
+ appears in several steps of the workflow. Use this if
+ above option leads to a DAG that is too large.
+ Recommended use on Unix systems: snakemake --rulegraph
+ | dot | display
+ --d3dag Print the DAG in D3.js compatible JSON format.
+ --summary, -S Print a summary of all files created by the workflow.
+ The has the following columns: filename, modification
+ time, rule version, status, plan. Thereby rule version
+ contains the versionthe file was created with (see the
+ version keyword of rules), and status denotes whether
+ the file is missing, its input files are newer or if
+ version or implementation of the rule changed since
+ file creation. Finally the last column denotes whether
+ the file will be updated or created during the next
+ workflow execution.
+ --detailed-summary, -D
+ Print a summary of all files created by the workflow.
+ The has the following columns: filename, modification
+ time, rule version, input file(s), shell command,
+ status, plan. Thereby rule version contains the
+ versionthe file was created with (see the version
+ keyword of rules), and status denotes whether the file
+ is missing, its input files are newer or if version or
+ implementation of the rule changed since file
+ creation. The input file and shell command columns are
+ selfexplanatory. Finally the last column denotes
+ whether the file will be updated or created during the
+ next workflow execution.
+ --touch, -t Touch output files (mark them up to date without
+ really changing them) instead of running their
+ commands. This is used to pretend that the rules were
+ executed, in order to fool future invocations of
+ snakemake. Fails if a file does not yet exist.
+ --keep-going, -k Go on with independent jobs if a job fails.
+ --force, -f Force the execution of the selected target or the
+ first rule regardless of already created output.
+ --forceall, -F Force the execution of the selected (or the first)
+ rule and all rules it is dependent on regardless of
+ already created output.
+ --forcerun [TARGET [TARGET ...]], -R [TARGET [TARGET ...]]
+ Force the re-execution or creation of the given rules
+ or files. Use this option if you changed a rule and
+ want to have all its output in your workflow updated.
+ --prioritize TARGET [TARGET ...], -P TARGET [TARGET ...]
+ Tell the scheduler to assign creation of given targets
+ (and all their dependencies) highest priority.
+ (EXPERIMENTAL)
+ --until TARGET [TARGET ...], -U TARGET [TARGET ...]
+ Runs the pipeline until it reaches the specified rules
+ or files. Only runs jobs that are dependencies of the
+ specified rule or files, does not run sibling DAGs.
+ --omit-from TARGET [TARGET ...], -O TARGET [TARGET ...]
+ Prevent the execution or creation of the given rules
+ or files as well as any rules or files that are
+ downstream of these targets in the DAG. Also runs jobs
+ in sibling DAGs that are independent of the rules or
+ files specified here.
+ --allow-ambiguity, -a
+ Don't check for ambiguous rules and simply use the
+ first if several can produce the same file. This
+ allows the user to prioritize rules by their order in
+ the snakefile.
+ --cluster CMD, -c CMD
+ Execute snakemake rules with the given submit command,
+ e.g. qsub. Snakemake compiles jobs into scripts that
+ are submitted to the cluster with the given command,
+ once all input files for a particular job are present.
+ The submit command can be decorated to make it aware
+ of certain job properties (input, output, params,
+ wildcards, log, threads and dependencies (see the
+ argument below)), e.g.: $ snakemake --cluster 'qsub
+ -pe threaded {threads}'.
+ --cluster-sync CMD cluster submission command will block, returning the
+ remote exitstatus upon remote termination (for
+ example, this should be usedif the cluster command is
+ 'qsub -sync y' (SGE)
+ --drmaa [ARGS] Execute snakemake on a cluster accessed via DRMAA,
+ Snakemake compiles jobs into scripts that are
+ submitted to the cluster with the given command, once
+ all input files for a particular job are present. ARGS
+ can be used to specify options of the underlying
+ cluster system, thereby using the job properties
+ input, output, params, wildcards, log, threads and
+ dependencies, e.g.: --drmaa ' -pe threaded {threads}'.
+ Note that ARGS must be given in quotes and with a
+ leading whitespace.
+ --cluster-config FILE, -u FILE
+ A JSON or YAML file that defines the wildcards used in
+ 'cluster'for specific rules, instead of having them
+ specified in the Snakefile. For example, for rule
+ 'job' you may define: { 'job' : { 'time' : '24:00:00'
+ } } to specify the time for rule 'job'. You can
+ specify more than one file. The configuration files
+ are merged with later values overriding earlier ones.
+ --immediate-submit, --is
+ Immediately submit all jobs to the cluster instead of
+ waiting for present input files. This will fail,
+ unless you make the cluster aware of job dependencies,
+ e.g. via: $ snakemake --cluster 'sbatch --dependency
+ {dependencies}. Assuming that your submit script (here
+ sbatch) outputs the generated job id to the first
+ stdout line, {dependencies} will be filled with space
+ separated job ids this job depends on.
+ --jobscript SCRIPT, --js SCRIPT
+ Provide a custom job script for submission to the
+ cluster. The default script resides as 'jobscript.sh'
+ in the installation directory.
+ --jobname NAME, --jn NAME
+ Provide a custom name for the jobscript that is
+ submitted to the cluster (see --cluster). NAME is
+ "snakejob.{rulename}.{jobid}.sh" per default. The
+ wildcard {jobid} has to be present in the name.
+ --reason, -r Print the reason for each executed rule.
+ --stats FILE Write stats about Snakefile execution in JSON format
+ to the given file.
+ --nocolor Do not use a colored output.
+ --quiet, -q Do not output any progress or rule information.
+ --nolock Do not lock the working directory
+ --unlock Remove a lock on the working directory.
+ --cleanup-metadata FILE [FILE ...], --cm FILE [FILE ...]
+ Cleanup the metadata of given files. That means that
+ snakemake removes any tracked version info, and any
+ marks that files are incomplete.
+ --rerun-incomplete, --ri
+ Re-run all jobs the output of which is recognized as
+ incomplete.
+ --ignore-incomplete, --ii
+ Do not check for incomplete output files.
+ --list-version-changes, --lv
+ List all output files that have been created with a
+ different version (as determined by the version
+ keyword).
+ --list-code-changes, --lc
+ List all output files for which the rule body (run or
+ shell) have changed in the Snakefile.
+ --list-input-changes, --li
+ List all output files for which the defined input
+ files have changed in the Snakefile (e.g. new input
+ files were added in the rule definition or files were
+ renamed). For listing input file modification in the
+ filesystem, use --summary.
+ --list-params-changes, --lp
+ List all output files for which the defined params
+ have changed in the Snakefile.
+ --latency-wait SECONDS, --output-wait SECONDS, -w SECONDS
+ Wait given seconds if an output file of a job is not
+ present after the job finished. This helps if your
+ filesystem suffers from latency (default 5).
+ --wait-for-files [FILE [FILE ...]]
+ Wait --latency-wait seconds for these files to be
+ present before executing the workflow. This option is
+ used internally to handle filesystem latency in
+ cluster environments.
+ --benchmark-repeats N
+ Repeat a job N times if marked for benchmarking
+ (default 1).
+ --notemp, --nt Ignore temp() declarations. This is useful when
+ running only a part of the workflow, since temp()
+ would lead to deletion of probably needed files by
+ other parts of the workflow.
+ --keep-remote Keep local copies of remote input files.
+ --keep-target-files Do not adjust the paths of given target files relative
+ to the working directory.
+ --keep-shadow Do not delete the shadow directory on snakemake
+ startup.
+ --allowed-rules ALLOWED_RULES [ALLOWED_RULES ...]
+ Only use given rules. If omitted, all rules in
+ Snakefile are used.
+ --max-jobs-per-second MAX_JOBS_PER_SECOND
+ Maximal number of cluster/drmaa jobs per second,
+ default is no limit
+ --timestamp, -T Add a timestamp to all logging output
+ --greediness GREEDINESS
+ Set the greediness of scheduling. This value between 0
+ and 1 determines how careful jobs are selected for
+ execution. The default value (1.0) provides the best
+ speed and still acceptable scheduling quality.
+ --no-hooks Do not invoke onstart, onsuccess or onerror hooks
+ after execution.
+ --print-compilation Print the python representation of the workflow.
+ --overwrite-shellcmd OVERWRITE_SHELLCMD
+ Provide a shell command that shall be executed instead
+ of those given in the workflow. This is for debugging
+ purposes only.
+ --verbose Print debugging output.
+ --debug Allow to debug rules with e.g. PDB. This flag allows
+ to set breakpoints in run blocks.
+ --profile FILE Profile Snakemake and write the output to FILE. This
+ requires yappi to be installed.
+ --bash-completion Output code to register bash completion for snakemake.
+ Put the following in your .bashrc (including the
+ accents): `snakemake --bash-completion` or issue it in
+ an open terminal session.
+ --version, -v show program's version number and exit
diff --git a/tests/test_conda/expected-results/test2.out b/tests/test_conda/expected-results/test2.out
new file mode 100644
index 0000000..c47a427
--- /dev/null
+++ b/tests/test_conda/expected-results/test2.out
@@ -0,0 +1,279 @@
+usage: snakemake [-h] [--snakefile FILE] [--gui [PORT]] [--cores [N]]
+ [--local-cores N] [--resources [NAME=INT [NAME=INT ...]]]
+ [--config [KEY=VALUE [KEY=VALUE ...]]] [--configfile FILE]
+ [--list] [--list-target-rules] [--directory DIR] [--dryrun]
+ [--printshellcmds] [--dag] [--force-use-threads]
+ [--rulegraph] [--d3dag] [--summary] [--detailed-summary]
+ [--touch] [--keep-going] [--force] [--forceall]
+ [--forcerun [TARGET [TARGET ...]]]
+ [--prioritize TARGET [TARGET ...]]
+ [--until TARGET [TARGET ...]]
+ [--omit-from TARGET [TARGET ...]] [--allow-ambiguity]
+ [--cluster CMD | --cluster-sync CMD | --drmaa [ARGS]]
+ [--cluster-config FILE] [--immediate-submit]
+ [--jobscript SCRIPT] [--jobname NAME] [--reason]
+ [--stats FILE] [--nocolor] [--quiet] [--nolock] [--unlock]
+ [--cleanup-metadata FILE [FILE ...]] [--rerun-incomplete]
+ [--ignore-incomplete] [--list-version-changes]
+ [--list-code-changes] [--list-input-changes]
+ [--list-params-changes] [--latency-wait SECONDS]
+ [--wait-for-files [FILE [FILE ...]]] [--benchmark-repeats N]
+ [--notemp] [--keep-remote] [--keep-target-files]
+ [--keep-shadow]
+ [--allowed-rules ALLOWED_RULES [ALLOWED_RULES ...]]
+ [--max-jobs-per-second MAX_JOBS_PER_SECOND] [--timestamp]
+ [--greediness GREEDINESS] [--no-hooks] [--print-compilation]
+ [--overwrite-shellcmd OVERWRITE_SHELLCMD] [--verbose]
+ [--debug] [--profile FILE] [--bash-completion] [--version]
+ [target [target ...]]
+
+Snakemake is a Python based language and execution environment for GNU Make-
+like workflows.
+
+positional arguments:
+ target Targets to build. May be rules or files.
+
+optional arguments:
+ -h, --help show this help message and exit
+ --snakefile FILE, -s FILE
+ The workflow definition in a snakefile.
+ --gui [PORT] Serve an HTML based user interface to the given port
+ (default: 8000). If possible, a browser window is
+ opened.
+ --cores [N], --jobs [N], -j [N]
+ Use at most N cores in parallel (default: 1). If N is
+ omitted, the limit is set to the number of available
+ cores.
+ --local-cores N In cluster mode, use at most N cores of the host
+ machine in parallel (default: number of CPU cores of
+ the host). The cores are used to execute local rules.
+ This option is ignored when not in cluster mode.
+ --resources [NAME=INT [NAME=INT ...]], --res [NAME=INT [NAME=INT ...]]
+ Define additional resources that shall constrain the
+ scheduling analogously to threads (see above). A
+ resource is defined as a name and an integer value.
+ E.g. --resources gpu=1. Rules can use resources by
+ defining the resource keyword, e.g. resources: gpu=1.
+ If now two rules require 1 of the resource 'gpu' they
+ won't be run in parallel by the scheduler.
+ --config [KEY=VALUE [KEY=VALUE ...]], -C [KEY=VALUE [KEY=VALUE ...]]
+ Set or overwrite values in the workflow config object.
+ The workflow config object is accessible as variable
+ config inside the workflow. Default values can be set
+ by providing a JSON file (see Documentation).
+ --configfile FILE Specify or overwrite the config file of the workflow
+ (see the docs). Values specified in JSON or YAML
+ format are available in the global config dictionary
+ inside the workflow.
+ --list, -l Show availiable rules in given Snakefile.
+ --list-target-rules, --lt
+ Show available target rules in given Snakefile.
+ --directory DIR, -d DIR
+ Specify working directory (relative paths in the
+ snakefile will use this as their origin).
+ --dryrun, -n Do not execute anything.
+ --printshellcmds, -p Print out the shell commands that will be executed.
+ --dag Do not execute anything and print the directed acyclic
+ graph of jobs in the dot language. Recommended use on
+ Unix systems: snakemake --dag | dot | display
+ --force-use-threads Force threads rather than processes. Helpful if shared
+ memory (/dev/shm) is full or unavailable.
+ --rulegraph Do not execute anything and print the dependency graph
+ of rules in the dot language. This will be less
+ crowded than above DAG of jobs, but also show less
+ information. Note that each rule is displayed once,
+ hence the displayed graph will be cyclic if a rule
+ appears in several steps of the workflow. Use this if
+ above option leads to a DAG that is too large.
+ Recommended use on Unix systems: snakemake --rulegraph
+ | dot | display
+ --d3dag Print the DAG in D3.js compatible JSON format.
+ --summary, -S Print a summary of all files created by the workflow.
+ The has the following columns: filename, modification
+ time, rule version, status, plan. Thereby rule version
+ contains the versionthe file was created with (see the
+ version keyword of rules), and status denotes whether
+ the file is missing, its input files are newer or if
+ version or implementation of the rule changed since
+ file creation. Finally the last column denotes whether
+ the file will be updated or created during the next
+ workflow execution.
+ --detailed-summary, -D
+ Print a summary of all files created by the workflow.
+ The has the following columns: filename, modification
+ time, rule version, input file(s), shell command,
+ status, plan. Thereby rule version contains the
+ versionthe file was created with (see the version
+ keyword of rules), and status denotes whether the file
+ is missing, its input files are newer or if version or
+ implementation of the rule changed since file
+ creation. The input file and shell command columns are
+ selfexplanatory. Finally the last column denotes
+ whether the file will be updated or created during the
+ next workflow execution.
+ --touch, -t Touch output files (mark them up to date without
+ really changing them) instead of running their
+ commands. This is used to pretend that the rules were
+ executed, in order to fool future invocations of
+ snakemake. Fails if a file does not yet exist.
+ --keep-going, -k Go on with independent jobs if a job fails.
+ --force, -f Force the execution of the selected target or the
+ first rule regardless of already created output.
+ --forceall, -F Force the execution of the selected (or the first)
+ rule and all rules it is dependent on regardless of
+ already created output.
+ --forcerun [TARGET [TARGET ...]], -R [TARGET [TARGET ...]]
+ Force the re-execution or creation of the given rules
+ or files. Use this option if you changed a rule and
+ want to have all its output in your workflow updated.
+ --prioritize TARGET [TARGET ...], -P TARGET [TARGET ...]
+ Tell the scheduler to assign creation of given targets
+ (and all their dependencies) highest priority.
+ (EXPERIMENTAL)
+ --until TARGET [TARGET ...], -U TARGET [TARGET ...]
+ Runs the pipeline until it reaches the specified rules
+ or files. Only runs jobs that are dependencies of the
+ specified rule or files, does not run sibling DAGs.
+ --omit-from TARGET [TARGET ...], -O TARGET [TARGET ...]
+ Prevent the execution or creation of the given rules
+ or files as well as any rules or files that are
+ downstream of these targets in the DAG. Also runs jobs
+ in sibling DAGs that are independent of the rules or
+ files specified here.
+ --allow-ambiguity, -a
+ Don't check for ambiguous rules and simply use the
+ first if several can produce the same file. This
+ allows the user to prioritize rules by their order in
+ the snakefile.
+ --cluster CMD, -c CMD
+ Execute snakemake rules with the given submit command,
+ e.g. qsub. Snakemake compiles jobs into scripts that
+ are submitted to the cluster with the given command,
+ once all input files for a particular job are present.
+ The submit command can be decorated to make it aware
+ of certain job properties (input, output, params,
+ wildcards, log, threads and dependencies (see the
+ argument below)), e.g.: $ snakemake --cluster 'qsub
+ -pe threaded {threads}'.
+ --cluster-sync CMD cluster submission command will block, returning the
+ remote exitstatus upon remote termination (for
+ example, this should be usedif the cluster command is
+ 'qsub -sync y' (SGE)
+ --drmaa [ARGS] Execute snakemake on a cluster accessed via DRMAA,
+ Snakemake compiles jobs into scripts that are
+ submitted to the cluster with the given command, once
+ all input files for a particular job are present. ARGS
+ can be used to specify options of the underlying
+ cluster system, thereby using the job properties
+ input, output, params, wildcards, log, threads and
+ dependencies, e.g.: --drmaa ' -pe threaded {threads}'.
+ Note that ARGS must be given in quotes and with a
+ leading whitespace.
+ --cluster-config FILE, -u FILE
+ A JSON or YAML file that defines the wildcards used in
+ 'cluster'for specific rules, instead of having them
+ specified in the Snakefile. For example, for rule
+ 'job' you may define: { 'job' : { 'time' : '24:00:00'
+ } } to specify the time for rule 'job'. You can
+ specify more than one file. The configuration files
+ are merged with later values overriding earlier ones.
+ --immediate-submit, --is
+ Immediately submit all jobs to the cluster instead of
+ waiting for present input files. This will fail,
+ unless you make the cluster aware of job dependencies,
+ e.g. via: $ snakemake --cluster 'sbatch --dependency
+ {dependencies}. Assuming that your submit script (here
+ sbatch) outputs the generated job id to the first
+ stdout line, {dependencies} will be filled with space
+ separated job ids this job depends on.
+ --jobscript SCRIPT, --js SCRIPT
+ Provide a custom job script for submission to the
+ cluster. The default script resides as 'jobscript.sh'
+ in the installation directory.
+ --jobname NAME, --jn NAME
+ Provide a custom name for the jobscript that is
+ submitted to the cluster (see --cluster). NAME is
+ "snakejob.{rulename}.{jobid}.sh" per default. The
+ wildcard {jobid} has to be present in the name.
+ --reason, -r Print the reason for each executed rule.
+ --stats FILE Write stats about Snakefile execution in JSON format
+ to the given file.
+ --nocolor Do not use a colored output.
+ --quiet, -q Do not output any progress or rule information.
+ --nolock Do not lock the working directory
+ --unlock Remove a lock on the working directory.
+ --cleanup-metadata FILE [FILE ...], --cm FILE [FILE ...]
+ Cleanup the metadata of given files. That means that
+ snakemake removes any tracked version info, and any
+ marks that files are incomplete.
+ --rerun-incomplete, --ri
+ Re-run all jobs the output of which is recognized as
+ incomplete.
+ --ignore-incomplete, --ii
+ Do not check for incomplete output files.
+ --list-version-changes, --lv
+ List all output files that have been created with a
+ different version (as determined by the version
+ keyword).
+ --list-code-changes, --lc
+ List all output files for which the rule body (run or
+ shell) have changed in the Snakefile.
+ --list-input-changes, --li
+ List all output files for which the defined input
+ files have changed in the Snakefile (e.g. new input
+ files were added in the rule definition or files were
+ renamed). For listing input file modification in the
+ filesystem, use --summary.
+ --list-params-changes, --lp
+ List all output files for which the defined params
+ have changed in the Snakefile.
+ --latency-wait SECONDS, --output-wait SECONDS, -w SECONDS
+ Wait given seconds if an output file of a job is not
+ present after the job finished. This helps if your
+ filesystem suffers from latency (default 5).
+ --wait-for-files [FILE [FILE ...]]
+ Wait --latency-wait seconds for these files to be
+ present before executing the workflow. This option is
+ used internally to handle filesystem latency in
+ cluster environments.
+ --benchmark-repeats N
+ Repeat a job N times if marked for benchmarking
+ (default 1).
+ --notemp, --nt Ignore temp() declarations. This is useful when
+ running only a part of the workflow, since temp()
+ would lead to deletion of probably needed files by
+ other parts of the workflow.
+ --keep-remote Keep local copies of remote input files.
+ --keep-target-files Do not adjust the paths of given target files relative
+ to the working directory.
+ --keep-shadow Do not delete the shadow directory on snakemake
+ startup.
+ --allowed-rules ALLOWED_RULES [ALLOWED_RULES ...]
+ Only use given rules. If omitted, all rules in
+ Snakefile are used.
+ --max-jobs-per-second MAX_JOBS_PER_SECOND
+ Maximal number of cluster/drmaa jobs per second,
+ default is no limit
+ --timestamp, -T Add a timestamp to all logging output
+ --greediness GREEDINESS
+ Set the greediness of scheduling. This value between 0
+ and 1 determines how careful jobs are selected for
+ execution. The default value (1.0) provides the best
+ speed and still acceptable scheduling quality.
+ --no-hooks Do not invoke onstart, onsuccess or onerror hooks
+ after execution.
+ --print-compilation Print the python representation of the workflow.
+ --overwrite-shellcmd OVERWRITE_SHELLCMD
+ Provide a shell command that shall be executed instead
+ of those given in the workflow. This is for debugging
+ purposes only.
+ --verbose Print debugging output.
+ --debug Allow to debug rules with e.g. PDB. This flag allows
+ to set breakpoints in run blocks.
+ --profile FILE Profile Snakemake and write the output to FILE. This
+ requires yappi to be installed.
+ --bash-completion Output code to register bash completion for snakemake.
+ Put the following in your .bashrc (including the
+ accents): `snakemake --bash-completion` or issue it in
+ an open terminal session.
+ --version, -v show program's version number and exit
diff --git a/tests/test_conda/test-env.yaml b/tests/test_conda/test-env.yaml
new file mode 100644
index 0000000..d5af59b
--- /dev/null
+++ b/tests/test_conda/test-env.yaml
@@ -0,0 +1,4 @@
+channels:
+ - bioconda
+dependencies:
+ - snakemake ==3.8.2
diff --git a/tests/test_delete_output/Snakefile b/tests/test_delete_output/Snakefile
index b805f43..fb65479 100644
--- a/tests/test_delete_output/Snakefile
+++ b/tests/test_delete_output/Snakefile
@@ -1,23 +1,59 @@
-# Having decided to address bug #300, I need a test.
+# See bug #300. This tests that output files really are cleaned up
+# before running a rule, and touched afterwards.
#
# The output should be deleted before the job starts.
-# The output should be deleted on the the head node for cluster jobs.
-# The path for the output should be created (as it already is)
+# (The output should be deleted on the the head node for cluster jobs.)
+# The path to the output should be created
# The output should be touch'd on the head node to always be new.
#
-# This test should work without any cluster. Do I need a special test for
-# the cluster case? Not sure.
+# Additionally this should work for directories, symlinks and symlinks
+# to directories.
+#
+# TODO - consider adding a cluster-based test for point 2 above.
-# Setup - touch a fake input file and a fake output file.
+# Setup - touch a mock input file and an out-of-date output file.
shell("touch -t 201604010000 output.file")
shell("touch input.file")
+# An empty directory
+shell("mkdir -p output.dir ; touch -ch -t 201604010000 output.dir")
+# A dangling symlink
+shell("ln -fs nosuchfile output.link ; touch -ch -t 201604010000 output.link")
+# A symlink to an empty directory
+shell("mkdir -p an_empty_dir; ln -fs an_empty_dir output.dirlink ; touch -ch -t 201604010000 an_empty_dir output.dirlink")
+
+
rule main:
- input: "output.file"
+ input: "output.file", "output.dir", "output.link", "output.dirlink"
rule make_the_file:
output: "output.file", "foo/output.foo.file"
input: "input.file"
# Rule fails if any output.file is already present
- shell: "test ! -e output.file ; test -d foo ; test ! -e foo/* ; touch -t 201604010000 output.file ; touch foo/output.foo.file"
+ run:
+ shell("test ! -e output.file")
+ shell("test -d foo")
+ shell("test ! -e foo/*")
+ shell("touch -t 201604010000 output.file")
+ shell("touch foo/output.foo.file")
+
+rule make_the_dir:
+ output: "output.dir"
+ input: "input.file"
+ #mkdir fails if the dir is already present
+ run:
+ shell("mkdir output.dir")
+ shell("touch output.dir/foo")
+
+rule make_the_links:
+ output: "output.link", "output.dirlink"
+ input: "input.file"
+ # Both links should be gone, but an_empty_dir should not have been removed
+ # as it's not a direct target of the rule.
+ run:
+ shell("touch arealfile")
+ shell("ln -s arealfile output.link")
+ shell("test -d an_empty_dir")
+ shell("mkdir empty_dir2")
+ shell("ln -s empty_dir2 output.dirlink")
diff --git a/tests/test_delete_output/nosuchfile b/tests/test_delete_output/nosuchfile
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_get_log_both/test.out b/tests/test_get_log_both/test.out
deleted file mode 100644
index 3bd1f0e..0000000
--- a/tests/test_get_log_both/test.out
+++ /dev/null
@@ -1,2 +0,0 @@
-foo
-bar
diff --git a/tests/test_get_log_complex/test.out b/tests/test_get_log_complex/test.out
deleted file mode 100644
index 3bd1f0e..0000000
--- a/tests/test_get_log_complex/test.out
+++ /dev/null
@@ -1,2 +0,0 @@
-foo
-bar
diff --git a/tests/test_get_log_stderr/test.out b/tests/test_get_log_stderr/test.out
deleted file mode 100644
index 3bd1f0e..0000000
--- a/tests/test_get_log_stderr/test.out
+++ /dev/null
@@ -1,2 +0,0 @@
-foo
-bar
diff --git a/tests/test_get_log_stdout/test.out b/tests/test_get_log_stdout/test.out
deleted file mode 100644
index 3bd1f0e..0000000
--- a/tests/test_get_log_stdout/test.out
+++ /dev/null
@@ -1,2 +0,0 @@
-foo
-bar
diff --git a/tests/test_issue381/Snakefile b/tests/test_issue381/Snakefile
new file mode 100644
index 0000000..ec02547
--- /dev/null
+++ b/tests/test_issue381/Snakefile
@@ -0,0 +1,21 @@
+
+rule all:
+ input:
+ "b.out"
+
+rule a:
+ input:
+ "a.in"
+ output:
+ "a.out"
+ shell:
+ "touch {output}"
+
+
+rule b:
+ input:
+ rules.a.input
+ output:
+ "b.out"
+ shell:
+ "touch {output}"
diff --git a/tests/test_issue381/a.in b/tests/test_issue381/a.in
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_issue381/expected-results/b.out b/tests/test_issue381/expected-results/b.out
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_persistent_dict/Snakefile b/tests/test_persistent_dict/Snakefile
index 6360a42..1d77ecb 100644
--- a/tests/test_persistent_dict/Snakefile
+++ b/tests/test_persistent_dict/Snakefile
@@ -1,33 +1,27 @@
-try:
- from pytools.persistent_dict import PersistentDict
-
+from pytools.persistent_dict import PersistentDict
- storage = PersistentDict("mystorage")
- storage.store("var1", 100)
+storage = PersistentDict("mystorage")
- rule all:
- input: expand("test.{i}.out", i=range(3))
+storage.store("var1", 100)
+rule all:
+ input: expand("test.{i}.out", i=range(3))
- rule:
- input: "test.in"
- output: "test.{i}.out"
- run:
- assert storage.fetch("var1") == 100
- with open(output[0], "w") as out:
- v = storage.fetch("var2")
- assert v == 1
- print(v, file=out)
+rule:
+ input: "test.in"
+ output: "test.{i}.out"
+ run:
+ assert storage.fetch("var1") == 100
+ with open(output[0], "w") as out:
+ v = storage.fetch("var2")
+ assert v == 1
+ print(v, file=out)
- rule:
- output: temp("test.in") # mark output as temp, since var1 has to be stored in each run
- run:
- storage.store("var2", 1)
- shell("touch {output}")
-
-except ImportError:
- # do not run the test if pytools is not installed
- pass
+rule:
+ output: temp("test.in") # mark output as temp, since var1 has to be stored in each run
+ run:
+ storage.store("var2", 1)
+ shell("touch {output}")
diff --git a/tests/test_rule_defined_in_for_loop/Snakefile b/tests/test_rule_defined_in_for_loop/Snakefile
new file mode 100644
index 0000000..ccdcf6f
--- /dev/null
+++ b/tests/test_rule_defined_in_for_loop/Snakefile
@@ -0,0 +1,12 @@
+rule all:
+ input: 'iteration-02.txt'
+
+
+for i in range(2, 4):
+ rule:
+ output:
+ fasta='iteration-{nr:02d}.txt'.format(nr=i)
+ input:
+ fasta='iteration-{nr:02d}.txt'.format(nr=i-1)
+ run:
+ shell("cp -p {input.fasta} {output.fasta}")
diff --git a/tests/test_rule_defined_in_for_loop/expected-results/iteration-01.txt b/tests/test_rule_defined_in_for_loop/expected-results/iteration-01.txt
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_rule_defined_in_for_loop/expected-results/iteration-02.txt b/tests/test_rule_defined_in_for_loop/expected-results/iteration-02.txt
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_rule_defined_in_for_loop/iteration-01.txt b/tests/test_rule_defined_in_for_loop/iteration-01.txt
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_wrapper/Snakefile b/tests/test_wrapper/Snakefile
new file mode 100644
index 0000000..7f82bd3
--- /dev/null
+++ b/tests/test_wrapper/Snakefile
@@ -0,0 +1,7 @@
+rule compress_vcf:
+ input:
+ "test.vcf"
+ output:
+ "test.vcf.gz"
+ wrapper:
+ "0.9.0/bio/vcf/compress"
diff --git a/tests/test_wrapper/expected-results/test.vcf.gz b/tests/test_wrapper/expected-results/test.vcf.gz
new file mode 100644
index 0000000..5a3abc6
Binary files /dev/null and b/tests/test_wrapper/expected-results/test.vcf.gz differ
diff --git a/tests/test_wrapper/test.vcf b/tests/test_wrapper/test.vcf
new file mode 100644
index 0000000..f1f91cf
--- /dev/null
+++ b/tests/test_wrapper/test.vcf
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##INFO=<ID=S1,Number=1,Type=String,Description="Single INFO string">
+##INFO=<ID=N1,Number=1,Type=Integer,Description="Single INFO integer">
+##INFO=<ID=F1,Number=1,Type=Float,Description="Single INFO float">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=FS1,Number=1,Type=String,Description="Single FORMAT string">
+##FORMAT=<ID=FN1,Number=1,Type=Integer,Description="Single FORMAT integer">
+##FORMAT=<ID=FF1,Number=1,Type=Float,Description="Single FORMAT float">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two
+19 3111939 rs1234 A AG . PASS S1=string1;N1=1;F1=1.0 GT:FS1:FN1 ./1:LongString1:1 1/1:ss1:2
+19 3113255 rs2345 G GC . PASS S1=string2;N1=2;F1=2.0 GT:FS1:FN1 1|1:LongString2:1 1/1:ss2:2
+19 3113259 rs2345 G GC . PASS S1=string3;N1=3;F1=3.0 GT:FS1:FN1 0/1:.:1 1/1:ss3:2
+19 3113262 rs2345 G GC . PASS S1=string4;N1=4;F1=4.0 GT:FS1:FN1 0|1:LongString4:1 1/1:.:2
+19 3113268 rs2345 G GC . PASS S1=string5;N1=5;F1=5.0 GT:FS1:FN1 1|.:evenlength:1 1/1:veenlength:2
+19 3113272 rs2345 G GC . PASS S1=string6;N1=6;F1=6.0 GT:FS1:FN1 1/1:ss6:1 1/1:longstring6:2
diff --git a/tests/tests.py b/tests/tests.py
index b873051..c0965ca 100644
--- a/tests/tests.py
+++ b/tests/tests.py
@@ -10,7 +10,7 @@ from subprocess import call
from tempfile import mkdtemp
import hashlib
import urllib
-from shutil import rmtree
+from shutil import rmtree, which
from shlex import quote
from snakemake import snakemake
@@ -59,7 +59,7 @@ def run(path,
assert os.path.exists(snakefile)
assert os.path.exists(results_dir) and os.path.isdir(
results_dir), '{} does not exist'.format(results_dir)
- tmpdir = mkdtemp()
+ tmpdir = mkdtemp(prefix=".test", dir=os.path.abspath("."))
try:
config = {}
if subpath is not None:
@@ -164,6 +164,8 @@ def test14():
def test15():
run(dpath("test15"))
+def test_ancient():
+ run(dpath("test_ancient"), targets=['D'])
def test_report():
run(dpath("test_report"), check_md5=False)
@@ -217,7 +219,11 @@ def test_ruledeps():
def test_persistent_dict():
- run(dpath("test_persistent_dict"))
+ try:
+ import pytools
+ run(dpath("test_persistent_dict"))
+ except ImportError:
+ pass
def test_url_include():
@@ -326,7 +332,7 @@ def test_nonstr_params():
def test_delete_output():
- run(dpath("test_delete_output"))
+ run(dpath("test_delete_output"), cores=1)
def test_input_generator():
@@ -340,7 +346,26 @@ def test_symlink_time_handling():
def test_issue328():
- run(dpath("test_issue328"), forcerun=["split"])
+ try:
+ import pytools
+ run(dpath("test_issue328"), forcerun=["split"])
+ except ImportError:
+ # skip test if import fails
+ pass
+
+
+def test_conda():
+ if conda_available():
+ run(dpath("test_conda"), use_conda=True)
+
+
+def test_wrapper():
+ if conda_available():
+ run(dpath("test_wrapper"), use_conda=True)
+
+
+def conda_available():
+ return which("conda")
def test_get_log_none():
@@ -392,6 +417,15 @@ def test_format_params():
run(dpath("test_format_params"), check_md5=True)
+def test_rule_defined_in_for_loop():
+ # issue 257
+ run(dpath("test_rule_defined_in_for_loop"))
+
+
+def test_issue381():
+ run(dpath("test_issue381"))
+
+
if __name__ == '__main__':
import nose
nose.run(defaultTest=__name__)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/snakemake.git
More information about the debian-med-commit
mailing list