[med-svn] [snakemake] 01/03: Imported Upstream version 3.5.4+dfsg

Kevin Murray daube-guest at moszumanska.debian.org
Thu Dec 17 10:18:39 UTC 2015


This is an automated email from the git hooks/post-receive script.

daube-guest pushed a commit to branch master
in repository snakemake.

commit 300fdf5bbc739603a2165e79fb50b6d18cfee26d
Author: Kevin Murray <spam at kdmurray.id.au>
Date:   Thu Dec 17 21:08:02 2015 +1100

    Imported Upstream version 3.5.4+dfsg
---
 README                                             |   7 +-
 docker/Dockerfile                                  |   7 +
 docs/index.rst                                     |   2 +-
 misc/vim/README.md                                 |  13 +
 misc/{ => vim/syntax}/snakemake.vim                |   0
 setup.py                                           |  22 +-
 snakemake-tutorial.html                            | 605 +++++++++++++++++++++
 snakemake/__init__.py                              |  52 +-
 snakemake/dag.py                                   |  90 ++-
 snakemake/decorators.py                            |  17 +
 snakemake/exceptions.py                            |  36 +-
 snakemake/executors.py                             |  65 ++-
 snakemake/io.py                                    | 155 +++++-
 snakemake/jobs.py                                  | 210 ++++++-
 snakemake/output_index.py                          |   5 +-
 snakemake/parser.py                                |  82 ++-
 snakemake/persistence.py                           |  10 +-
 snakemake/remote/FTP.py                            | 126 +++++
 snakemake/remote/GS.py                             |  21 +
 snakemake/remote/HTTP.py                           | 143 +++++
 snakemake/remote/S3.py                             | 443 +++++++++++++++
 snakemake/remote/SFTP.py                           | 112 ++++
 snakemake/remote/__init__.py                       | 159 ++++++
 snakemake/remote/dropbox.py                        | 110 ++++
 snakemake/rules.py                                 |  86 ++-
 snakemake/scheduler.py                             | 116 ++--
 snakemake/script.py                                | 139 +++++
 snakemake/shell.py                                 |  13 +-
 snakemake/utils.py                                 |  25 +-
 snakemake/version.py                               |   2 +-
 snakemake/workflow.py                              |  37 +-
 tests/test_empty_include/Snakefile                 |   1 +
 .../test_empty_include/expected-results/.gitignore |   0
 tests/test_empty_include/include.rules             |   0
 tests/test_nonstr_params/Snakefile                 |   8 +
 tests/test_nonstr_params/expected-results/test.out |   0
 tests/test_remote/S3Mocked.py                      | 125 +++++
 tests/test_remote/Snakefile                        |  86 +++
 tests/test_remote/__init__.py                      |   0
 tests/test_remote/expected-results/out.txt         |   4 +
 tests/test_remote/test.txt                         |   4 +
 tests/test_script/Snakefile                        |  14 +
 tests/test_script/expected-results/test.in         |   1 +
 tests/test_script/expected-results/test.out        |   1 +
 tests/test_script/scripts/test.R                   |   8 +
 tests/test_script/scripts/test.py                  |   2 +
 tests/test_shadow/Snakefile                        |  26 +
 tests/test_shadow/expected-results/simple_full.out |   2 +
 .../expected-results/simple_shallow.out            |   2 +
 tests/test_shadow/test.in                          |   1 +
 tests/test_url_include/Snakefile                   |   2 +-
 tests/tests.py                                     |  34 +-
 52 files changed, 2977 insertions(+), 254 deletions(-)

diff --git a/README b/README
index 5166c44..6fea197 100644
--- a/README
+++ b/README
@@ -1,8 +1,7 @@
 Snakemake -- a pythonic workflow system
 
-Build systems like GNU Make are frequently used to create complicated workflows, 
-e.g. in bioinformatics. This project aims to reduce the complexity of creating 
-workflows by providing a clean and modern domain specific specification language (DSL) 
-in python style, together with a fast and comfortable execution environment.
+Snakemake is a workflow management system that aims to reduce the complexity of creating workflows by providing a fast and comfortable execution environment, together with a clean and modern specification language in python style. Snakemake workflows are essentially Python scripts extended by declarative code to define rules. Rules describe how to create output files from input files.
+
+Homepage: http://snakemake.bitbucket.org
 
 Copyright (c) 2015 Johannes Köster <johannes.koester at tu-dortmund.de> (see LICENSE)
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..9792c7a
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,7 @@
+# a docker image based on Ubuntu with snakemake installed
+FROM ubuntu:14.04
+MAINTAINER Johannes Köster <johannes.koester at tu-dortmund.de>
+RUN apt-get -qq update
+RUN apt-get install -qqy python3-setuptools python3-docutils python3-flask
+RUN easy_install3 snakemake
+ENTRYPOINT ["snakemake"]
diff --git a/docs/index.rst b/docs/index.rst
index de2b778..4871837 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,7 +10,7 @@ Build systems like GNU Make are frequently used to create complicated workflows,
 
 Apart from being a command line tool, Snakemake can also be called from withing other python code and hence serve as a framework for organizing workflows withing your software. These pages describe the public parts of the Snakemake api.
 
-For the user documentation and general information, see https://bitbucket.org/johanneskoester/snakemake.
+For the user documentation and general information, see http://snakemake.bitbucket.org.
 
 Contents:
 
diff --git a/misc/vim/README.md b/misc/vim/README.md
new file mode 100644
index 0000000..ae798fb
--- /dev/null
+++ b/misc/vim/README.md
@@ -0,0 +1,13 @@
+A vim syntax highlighting definition for Snakemake.
+You can copy the `snakemake.vim` file to `$HOME/.vim/syntax` directory and add
+
+    au BufNewFile,BufRead Snakefile set syntax=snakemake
+    au BufNewFile,BufRead *.rules set syntax=snakemake
+    au BufNewFile,BufRead *.snakefile set syntax=snakemake
+    au BufNewFile,BufRead *.snake set syntax=snakemake
+
+to your `$HOME/.vimrc` file. Highlighting can be forced in a vim session with `:set syntax=snakemake`.
+
+To install via Vundle use:
+
+    Plugin 'https://bitbucket.org/johanneskoester/snakemake.git', {'rtp': 'misc/vim/'}
diff --git a/misc/snakemake.vim b/misc/vim/syntax/snakemake.vim
similarity index 100%
rename from misc/snakemake.vim
rename to misc/vim/syntax/snakemake.vim
diff --git a/setup.py b/setup.py
index dfea1dd..de6757a 100644
--- a/setup.py
+++ b/setup.py
@@ -26,11 +26,6 @@ exec(open("snakemake/version.py").read())
 
 
 class NoseTestCommand(TestCommand):
-    def finalize_options(self):
-        TestCommand.finalize_options(self)
-        self.test_args = []
-        self.test_suite = True
-
     def run_tests(self):
         # Run nose ensuring that argv simulates running nosetests directly
         import nose
@@ -43,22 +38,23 @@ setup(
     author='Johannes Köster',
     author_email='johannes.koester at tu-dortmund.de',
     description=
-    'Build systems like GNU Make are frequently used to create complicated '
-    'workflows, e.g. in bioinformatics. This project aims to reduce the '
-    'complexity of creating workflows by providing a clean and modern domain '
-    'specific language (DSL) in python style, together with a fast and '
-    'comfortable execution environment.',
+    'Snakemake is a workflow management system that aims to reduce the complexity '
+    'of creating workflows by providing a fast and comfortable execution environment, '
+    'together with a clean and modern specification language in python style. '
+    'Snakemake workflows are essentially Python scripts extended by declarative '
+    'code to define rules. Rules describe how to create output files from input files.',
     zip_safe=False,
     license='MIT',
-    url='https://bitbucket.org/johanneskoester/snakemake',
-    packages=['snakemake'],
+    url='http://snakemake.bitbucket.org',
+    packages=['snakemake', 'snakemake.remote'],
     entry_points={
         "console_scripts":
         ["snakemake = snakemake:main",
          "snakemake-bash-completion = snakemake:bash_completion"]
     },
     package_data={'': ['*.css', '*.sh', '*.html']},
-    tests_require=['nose>=1.3'],
+    tests_require=['rpy2', 'docutils', 'nose>=1.3', 'boto>=2.38.0', 'filechunkio>=1.6', 
+                     'moto>=0.4.14', 'ftputil>=3.2', 'pysftp>=0.2.8', 'requests>=2.8.1', 'dropbox>=3.38'],
     cmdclass={'test': NoseTestCommand},
     classifiers=
     ["Development Status :: 5 - Production/Stable", "Environment :: Console",
diff --git a/snakemake-tutorial.html b/snakemake-tutorial.html
new file mode 100644
index 0000000..c712169
--- /dev/null
+++ b/snakemake-tutorial.html
@@ -0,0 +1,605 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <meta name="generator" content="pandoc">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+  <title>Snakemake Tutorial</title>
+  <style type="text/css">code{white-space: pre;}</style>
+  <!--[if lt IE 9]>
+    <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+  <![endif]-->
+  <style type="text/css">
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+  margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; }
+code > span.dt { color: #902000; }
+code > span.dv { color: #40a070; }
+code > span.bn { color: #40a070; }
+code > span.fl { color: #40a070; }
+code > span.ch { color: #4070a0; }
+code > span.st { color: #4070a0; }
+code > span.co { color: #60a0b0; font-style: italic; }
+code > span.ot { color: #007020; }
+code > span.al { color: #ff0000; font-weight: bold; }
+code > span.fu { color: #06287e; }
+code > span.er { color: #ff0000; font-weight: bold; }
+  </style>
+  <link href="data:text/css,%2F%2A%21%0A%20%2A%20Bootstrap%20v3%2E3%2E5%20%28http%3A%2F%2Fgetbootstrap%2Ecom%29%0A%20%2A%20Copyright%202011%2D2015%20Twitter%2C%20Inc%2E%0A%20%2A%20Licensed%20under%20MIT%20%28https%3A%2F%2Fgithub%2Ecom%2Ftwbs%2Fbootstrap%2Fblob%2Fmaster%2FLICENSE%29%0A%20%2A%2F%0A%2F%2A%21%20normalize%2Ecss%20v3%2E0%2E3%20%7C%20MIT%20License%20%7C%20github%2Ecom%2Fnecolas%2Fnormalize%2Ecss%20%2A%2F%0Ahtml%20%7B%0A%20%20font%2Dfamily%3A%20sans%2Dserif%3B%0A%20%20%2Dwebkit% [...]
+  <link href="data:text/css,body%20%7B%0A%20%20%20%20max%2Dwidth%3A%201000px%3B%0A%20%20%20%20margin%2Dleft%3A%20auto%3B%0A%20%20%20%20margin%2Dright%3A%20auto%3B%0A%7D%0A%0Abody%20p%20%7B%0A%20%20%20%20text%2Dalign%3A%20justify%3B%0A%7D%0A%0A%40media%20print%20%7B%0A%20%20%20%20%23TOC%20%7B%0A%20%20%20%20%20%20%20%20display%3A%20none%3B%0A%20%20%20%20%7D%0A%7D%0A" rel="stylesheet">
+</head>
+<body>
+<header>
+<h1 class="title">Snakemake Tutorial</h1>
+</header>
+<p>This tutorial introduces the text-based workflow system <a href="http://snakemake.bitbucket.org">Snakemake</a>. Snakemake follows the <a href="https://www.gnu.org/software/make">GNU Make</a> paradigm: workflows are defined in terms of rules that define how to create output files from input files. Dependencies between the rules are determined automatically, creating a DAG (directed acyclic graph) of jobs that can be automatically parallelized.</p>
+<p>Snakemake sets itself apart from existing text-based workflow systems in the following way. Hooking into the Python interpreter, Snakemake offers a definition language that is an extension of <a href="http://www.python.org">Python</a> with syntax to define rules and workflow specific properties. This allows to combine the flexibility of a plain scripting language with a pythonic workflow definition. The Python language is known to be concise yet readable and can appear almost like pse [...]
+<p>While the examples presented here come from Bioinformatics, Snakemake is considered a general-purpose workflow management system for any discipline.</p>
+<p><strong>Note:</strong> To get the most up-to-date version of this tutorial and further information on Snakemake, please visit the <a href="http://snakemake.bitbucket.org">Snakemake homepage</a>. Also have a look at the corresponding <a href="http://slides.com/johanneskoester/deck-1">slides</a>.</p>
+<hr />
+<h1 id="setup">Setup</h1>
+<p>To go through this tutorial, you need the following software installed:</p>
+<ul>
+<li><a href="http://www.python.org">Python</a> ≥3.3</li>
+<li><a href="http://snakemake.bitbucket.org">Snakemake</a> 3.4.2</li>
+<li><a href="http://bio-bwa.sourceforge.net">BWA</a> 0.7.12</li>
+<li><a href="http://www.htslib.org">SAMtools</a> 1.2</li>
+<li><a href="http://www.htslib.org">BCFtools</a> 1.2</li>
+<li><a href="http://www.graphviz.org">Graphviz</a> 2.38.0</li>
+<li><a href="http://pyyaml.org">PyYAML</a> 3.11</li>
+<li><a href="http://docutils.sourceforge.net">Docutils</a> 0.12</li>
+</ul>
+<p>The easiest way to setup these prerequisites is to use the <a href="http://conda.pydata.org/miniconda.html">Miniconda</a> Python 3 distribution. The tutorial assumes that you are using either Linux or MacOS X. Both Snakemake and Miniconda work also under Windows, but the Windows shell is too different to be able to provide generic examples.</p>
+<h2 id="setup-a-linux-vm-with-vagrant-under-windows">Setup a Linux VM with Vagrant under Windows</h2>
+<p>If you already use Linux or MacOS X, go on with <strong>Step 1</strong>. If you use Windows, you can setup a Linux virtual machine (VM) with <a href="https://www.vagrantup.com">Vagrant</a>. First, install Vagrant following the installation instructions in the <a href="https://docs.vagrantup.com">Vagrant Documentation</a>. Then, create a reasonable new directory you want to share with your Linux VM, e.g., create a folder <code>vagrant-linux</code> somewhere. Open a command line prompt, [...]
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">vagrant</span> init hashicorp/precise64
+<span class="kw">vagrant</span> up</code></pre>
+<p>If you decide to use a 32-bit image, you will need to download the 32-bit version of Miniconda in the next step. The contents of the <code>vagrant-linux</code> folder will be shared with the virtual machine that is set up by vagrant. You can log into the virtual machine via</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">vagrant</span> ssh</code></pre>
+<p>If this command tells you to install an SSH client, you can follow the instructions in this <a href="http://blog.osteel.me/posts/2015/01/25/how-to-use-vagrant-on-windows.html">Blogpost</a>. Now, you can follow the steps of our tutorial from within your Linux VM.</p>
+<h2 id="step-1-installing-miniconda-3">Step 1: Installing Miniconda 3</h2>
+<p>First, please <strong>open a terminal</strong> or make sure you are logged into your Vagrant Linux VM. Assuming that you have a 64-bit system, on Linux, download and install Miniconda 3 with</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">wget</span> https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+<span class="kw">bash</span> Miniconda3-latest-Linux-x86_64.sh</code></pre>
+<p>On MacOS X, download and install with</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">wget</span> https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
+<span class="kw">bash</span> Miniconda3-latest-MacOSX-x86_64.sh</code></pre>
+<p>For a 32-bit system, URLs and file names are analogous but without the <code>_64</code>. When you are asked the question</p>
+<pre class="sourceCode "><code>Do you wish the installer to prepend the Miniconda3 install location to PATH ...? [yes|no]</code></pre>
+<p>answer with <strong>yes</strong>. Along with a minimal Python 3 environment, Miniconda contains the package manager <a href="http://conda.pydata.org">Conda</a>. After opening a <strong>new terminal</strong>, you can use the new <code>conda</code> command to install software packages and create isolated environments to, e.g., use different versions of the same package. We will later use <a href="http://conda.pydata.org">Conda</a> to create an isolated enviroment with all required softw [...]
+<h2 id="step-2-preparing-a-working-directory">Step 2: Preparing a working directory</h2>
+<p>First, <strong>create a new directory</strong> <code>snakemake-tutorial</code> at a reasonable place and <strong>change into that directory</strong> in your terminal. If you use a Vagrant Linux VM from Windows as described above, create the directory under <code>/vagrant/</code>, so that the contents are shared with your host system (you can then edit all files from within Windows with an editor that supports Unix line breaks). In this directory, we will later create an example workfl [...]
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">wget</span> https://bitbucket.org/johanneskoester/snakemake/downloads/snakemake-tutorial-data.tar.gz
+<span class="kw">tar</span> -xf snakemake-tutorial-data.tar.gz</code></pre>
+<p>This will create a <code>data</code> folder and a <code>requirements.txt</code> file in the working directory.</p>
+<h2 id="step-3-creating-an-environment-with-the-required-software">Step 3: Creating an environment with the required software</h2>
+<p>The <code>requirements.txt</code> file can be used to install all required software into an isolated conda environment with the name <code>snakemake-tutorial</code> via</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">conda</span> create -n snakemake-tutorial -c bioconda --file requirements.txt</code></pre>
+<p>Note that the arguments after the <code>-c</code> flags define software channels that shall be used in addition to the main <code>conda</code> repository. Here, we use the <a href="https://bioconda.github.io">Bioconda</a> channel, which contains a growing collection of bioinformatics software packaged for Conda.</p>
+<h2 id="step-4-activating-the-environment">Step 4: Activating the environment</h2>
+<p>To activate the <code>snakemake-tutorial</code> enviroment, execute</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">source</span> activate snakemake-tutorial</code></pre>
+<p>Now you can use the installed tools. Execute</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> --help</code></pre>
+<p>to test this and get information about the command-line interface of Snakemake. To exit the environment, you can execute</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">source</span> deactivate</code></pre>
+<p>but <strong>don’t do that now</strong>, since we finally want to start working with Snakemake :-).</p>
+<hr />
+<h1 id="basics-an-example-workflow">Basics: An example workflow</h1>
+<p>Please make sure that you have <strong>activated</strong> the environment we created before, and that you have an open terminal in the working directory you have created.</p>
+<p><strong>A Snakemake workflow is defined by specifying rules in a Snakefile</strong>. <strong>Rules decompose the workflow into small steps</strong> (e.g., the application of a single tool) by specifying how to create sets of <strong>output files</strong> from sets of <strong>input files</strong>. Snakemake automatically <strong>determines the dependencies</strong> between the rules by matching file names.</p>
+<p>The Snakemake language extends the Python language, adding syntactic structures for rule definition and additional controls. All added syntactic structures begin with a keyword followed by a code block that is either in the same line or indented and consisting of multiple lines. The resulting syntax resembles that of original Python constructs.</p>
+<p>In the following, we will introduce the Snakemake syntax by creating an example workflow. The workflow will map sequencing reads to a reference genome and call variants on the mapped reads.</p>
+<h2 id="step-1-mapping-reads">Step 1: Mapping reads</h2>
+<p>Our first Snakemake rule maps reads of a given sample to a given reference genome. In the working directory, <strong>create a new file</strong> called <code>Snakefile</code> with an editor of your choice. We propose to use the <a href="https://atom.io">Atom</a> editor, since it provides out-of-the-box syntax highlighting for Snakemake. In the Snakefile, define the following rule:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="st">"data/samples/A.fastq"</span>
+    <span class="kw">output</span>:
+        <span class="st">"mapped_reads/A.bam"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"bwa mem {input} | samtools view -Sb - > {output}"</span></code></pre>
+<p>A Snakemake rule has a name (here <code>bwa_map</code>) and a number of directives, here <code>input</code>, <code>output</code> and <code>shell</code>. The <code>input</code> and <code>output</code> directives are followed by lists of files that are expected to be used or created by the rule. In the simplest case, these are just explicit Python strings. The <code>shell</code> directive is followed by a Python string containing the shell command to execute. In the shell command string [...]
+<p>When a workflow is executed, Snakemake tries to generate given <strong>target</strong> files. Target files can be specified via the command line. By executing</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -np mapped_reads/A.bam</code></pre>
+<p>in the working directory containing the Snakefile, we tell Snakemake to generate the target file <code>mapped_reads/A.bam</code>. Since we used the <code>-n</code> (or <code>--dryrun</code>) flag, Snakemake will only show the execution plan instead of actually perform the steps. The <code>-p</code> flag instructs Snakemake to also print the resulting shell command for illustation. To generate the target files, <strong>Snakemake applies the rules given in the Snakefile in a top-down wa [...]
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> mapped_reads/A.bam</code></pre>
+<p>Note that, after completion of above command, Snakemake will not try to create <code>mapped_reads/A.bam</code> again, because it is already present in the file system. Snakemake <strong>only re-runs jobs if one of the input files is newer than one of the output files or one of the input files will be updated by another job</strong>.</p>
+<h2 id="step-2-generalizing-the-read-mapping-rule">Step 2: Generalizing the read mapping rule</h2>
+<p>Obviously, the rule will only work for a single sample with reads in the file <code>data/samples/A.fastq</code>. However, Snakemake allows to <strong>generalize rules by using named wildcards</strong>. Simply replace the <code>A</code> in the second input file and in the output file with the wildcard <code>{sample}</code>, leading to</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="st">"data/samples/{sample}.fastq"</span>
+    <span class="kw">output</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"bwa mem {input} | samtools view -Sb - > {output}"</span></code></pre>
+<p>When Snakemake determines that this rule can be applied to generate a target file by replacing the wildcard <code>{sample}</code> in the output file with an appropriate value, it will propagate that value to all occurences of <code>{sample}</code> in the input files and thereby determine the necessary input for the resulting job. Note that you can have multiple wildcards in your file paths, however, to avoid conflicts with other jobs of the same rule, <strong>all output files</strong> [...]
+<p>When executing</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -np mapped_reads/B.bam</code></pre>
+<p>Snakemake will determine that the rule <code>bwa_map</code> can be applied to generate the target file by replacing the wildcard <code>{sample}</code> with the value <code>B</code>. In the output of the dry-run, you will see how the wildcard value is propagated to the input files and all filenames in the shell command. You can also <strong>specify multiple targets</strong>, e.g.:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -np mapped_reads/A.bam mapped_reads/B.bam</code></pre>
+<p>Some <a href="http://www.tldp.org/LDP/Bash-Beginners-Guide/html">Bash</a> magic can make this particularly handy. For example, you can alternatively compose our multiple targets in a single pass via</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -np mapped_reads/<span class="dt">{A,B}</span>.bam</code></pre>
+<p>Note that this is not a special Snakemake syntax. Bash is just expanding the given path into two, one for each element of the set <code>{A,B}</code>.</p>
+<p>In both cases, you will see that Snakemake only proposes to create the output file <code>mapped_reads/B.bam</code>. This is because you already executed the workflow before (see the previous step) and no input file is newer than the output file <code>mapped_reads/A.bam</code>. You can update the file modification date of the input file <code>data/samples/A.fastq</code> via</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">touch</span> data/samples/A.fastq</code></pre>
+<p>and see how Snakemake wants to re-run the job to create the file <code>mapped_reads/A.bam</code> by executing</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -np mapped_reads/A.bam mapped_reads/B.bam</code></pre>
+<h2 id="step-3-sorting-read-alignments">Step 3: Sorting read alignments</h2>
+<p>For later steps, we need the read alignments in the BAM files to be sorted. This can be achieved with the <code>samtools</code> command. We add the following rule beneath the <code>bwa_map</code> rule:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> samtools_sort:
+    <span class="kw">input</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">output</span>:
+        <span class="st">"sorted_reads/{sample}.bam"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools sort -T sorted_reads/{wildcards.sample} "</span>
+        <span class="st">"-O bam {input} > {output}"</span></code></pre>
+<p>This rule will take the input file from the <code>mapped_reads</code> directory and store a sorted version in the <code>sorted_reads</code> directory. Note that Snakemake <strong>automatically creates missing directories</strong> before jobs are executed. For sorting, <code>samtools</code> requires a prefix specified with the flag <code>-T</code>. Here, we need the value of the wildcard <code>sample</code>. Snakemake allows to access wildcards in the shell command via the <code>wildca [...]
+<p>When issuing</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -np sorted_reads/B.bam</code></pre>
+<p>you will see how Snakemake wants to run first the rule <code>bwa_map</code> and then the rule <code>samtools_sort</code> to create the desired target file: as mentioned before, the dependencies are resolved automatically by matching file names.</p>
+<h2 id="step-4-indexing-read-alignments-and-visualizing-the-dag-of-jobs">Step 4: Indexing read alignments and visualizing the DAG of jobs</h2>
+<p>Next, we need to index the sorted read alignments for random access. This can be done with the following rule:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> samtools_index:
+    <span class="kw">input</span>:
+        <span class="st">"sorted_reads/{sample}.bam"</span>
+    <span class="kw">output</span>:
+        <span class="st">"sorted_reads/{sample}.bam.bai"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools index {input}"</span></code></pre>
+<p>Having three steps already, it is a good time to take a closer look at the resulting DAG of jobs. By executing</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> --dag sorted_reads/<span class="dt">{A,B}</span>.bam.bai <span class="kw">|</span> <span class="kw">dot</span> -Tsvg <span class="kw">></span> dag.svg</code></pre>
+<p>we create a <strong>visualization of the DAG</strong> using the <code>dot</code> command provided by <a href="http://www.graphviz.org">Graphviz</a>. For the given target files, Snakemake specifies the DAG in the dot language and pipes it into the <code>dot</code> command, which renders the definition into SVG format. The rendered DAG is piped into the file <code>dag.svg</code> and will look similar to this:</p>
+<p><img src=" [...]
+<p>The DAG contains a node for each job and edges representing the dependencies. Jobs that don’t need to be run because their output is up-to-date are dashed. For rules with wildcards, the value of the wildcard for the particular job is displayed in the job node.</p>
+<h3 id="exercise">Exercise</h3>
+<ul>
+<li>Run parts of the workflow using different targets. Recreate the DAG and see how different rules become dashed because their output is present and up-to-date.</li>
+</ul>
+<h2 id="step-5-calling-genomic-variants">Step 5: Calling genomic variants</h2>
+<p>The next step in our workflow will aggregate the aligned reads from all samples and jointly call genomic variants on them. Snakemake provides a <strong>helper function for collecting input files</strong>. With</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">expand</span>(<span class="st">"sorted_reads/{sample}.bam"</span>, sample=SAMPLES)</code></pre>
+<p>we obtain a list of files where the given pattern <code>"sorted_reads/{sample}.bam"</code> was formatted with the values in the given list of samples <code>SAMPLES</code>, i.e.</p>
+<pre class="sourceCode bash"><code class="sourceCode bash">[<span class="st">"sorted_reads/A.bam"</span>, <span class="st">"sorted_reads/B.bam"</span>]</code></pre>
+<p>The function is particularly useful when the pattern contains multiple wildcards. For example,</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">expand</span>(<span class="st">"sorted_reads/{sample}.{replicate}.bam"</span>, sample=SAMPLES, replicate=[0, 1])</code></pre>
+<p>would create the product of all elements of <code>SAMPLES</code> and the list <code>[0, 1]</code>, yielding</p>
+<pre class="sourceCode bash"><code class="sourceCode bash">[<span class="st">"sorted_reads/A.0.bam"</span>, <span class="st">"sorted_reads/A.1.bam"</span>, <span class="st">"sorted_reads/B.0.bam"</span>, <span class="st">"sorted_reads/B.1.bam"</span>]</code></pre>
+<p>For more information, see the <a href="https://bitbucket.org/snakemake/snakemake/wiki/Documentation">Documentation</a>. Here, we use only the simple case of <code>expand</code>. We first let Snakemake know which samples we want to consider. Remember that Snakemake works top-down, it does not automatically infer this from, e.g., the fastq files in the data folder. Remember that Snakefiles are in principle Python code enhanced by some declarative statements to define workflows. Hence, w [...]
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">SAMPLES</span> = [<span class="st">"A"</span>, <span class="st">"B"</span>]</code></pre>
+<p>Later, we will learn about more sophisticated ways like <strong>config files</strong>. Now, we can add the following rule to our Snakefile:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bcftools_call:
+    <span class="kw">input</span>:
+        <span class="ot">fa=</span><span class="st">"data/genome.fa"</span>,
+        <span class="ot">bam=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam"</span>, sample<span class="ot">=</span>SAMPLES<span class="ot">)</span>,
+        <span class="ot">bai=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam.bai"</span>, sample<span class="ot">=</span>SAMPLES<span class="ot">)</span>
+    <span class="kw">output</span>:
+        <span class="st">"calls/all.vcf"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools mpileup -g -f {input.fa} {input.bam} | "</span>
+        <span class="st">"bcftools call -mv - > {output}"</span></code></pre>
+<p>With multiple input or output files, it is sometimes handy to refer them separately in the shell command. This can be done by <strong>specifying names for input or output files</strong> (here, e.g., <code>fa=...</code>). The files can then be referred in the shell command via, e.g., <code>{input.fa}</code>. For <strong>long shell commands</strong> like this one, it is advisable to <strong>split the string over multiple indented lines</strong>. Python will automatically merge it into o [...]
+<h3 id="exercise-1">Exercise</h3>
+<ul>
+<li>obtain the updated DAG of jobs for the target file <code>calls/all.vcf</code>, it should look like this:</li>
+</ul>
+<p><img src=" [...]
+<h2 id="step-6-writing-a-report">Step 6: Writing a report</h2>
+<p>Although Snakemake workflows are already self-documenting to a certain degree, it is often useful to summarize the obtained results and performed steps in a comprehensive <strong>report</strong>. With Snakemake, such reports can be composed easily with the built-in <code>report</code> function. It is best practice to create reports in a separate rule that takes all desired results as input files and provides a <strong>single HTML file as output</strong>.</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> report:
+    <span class="kw">input</span>:
+        <span class="st">"calls/all.vcf"</span>
+    <span class="kw">output</span>:
+        <span class="st">"report.html"</span>
+    <span class="kw">run</span>:
+        <span class="kw">from</span> snakemake.utils import report
+        <span class="kw">with</span> open(input[0]) <span class="kw">as</span> vcf:
+            <span class="kw">n_calls</span> = sum(1 for l in vcf if not l.startswith(<span class="st">"#"</span>))
+
+        <span class="kw">report</span>(<span class="st">"""</span>
+<span class="st">        An example variant calling workflow</span>
+<span class="st">        ===================================</span>
+
+<span class="st">        Reads were mapped to the Yeast</span>
+<span class="st">        reference genome and variants were called jointly with</span>
+<span class="st">        SAMtools/BCFtools.</span>
+
+<span class="st">        This resulted in {n_calls} variants (see Table T1_).</span>
+<span class="st">        """</span>, output[0], T1=input[0])</code></pre>
+<p>First, we notice that this rule does not entail a shell command. Instead, we use the <code>run</code> directive, which is followed by plain Python code. Similar to the shell case, we have access to <code>input</code> and <code>output</code> files, which we can handle as plain Python objects (no braces notation here).</p>
+<p>We go through the <code>run</code> block line by line. First, we import the <code>report</code> function from <code>snakemake.utils</code>. Second, we open the VCF file by accessing it via its index in the input files (i.e. <code>input[0]</code>), and count the number of non-header lines (which is equivalent to the number of variant calls). Third, we create the report using the <code>report</code> function. The function takes a string that contains <a href="http://docutils.sourceforge [...]
+<p>When having many result files, it is sometimes handy to define the names already in the list of input files and unpack these into keyword arguments as follows:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">report</span>(<span class="st">"""..."""</span>, output[0], **input)</code></pre>
+<p>Further, you can add meta data in the form of any string that will be displayed in the footer of the report, e.g.</p>
+<pre class="sourceCode bash"><code class="sourceCode bash">report<span class="kw">(</span><span class="st">"""..."""</span>, output[0], metadata=<span class="st">"Author: Johannes Köster (koester at jimmy.harvard.edu)"</span>, <span class="kw">**input)</span></code></pre>
+<h2 id="step-7-adding-a-target-rule">Step 7: Adding a target rule</h2>
+<p>So far, we always executed the workflow by specifying a target file at the command line. Apart from filenames, Snakemake <strong>also accepts rule names as targets</strong> if the referred rule does not have wildcards. Hence, it is possible to write target rules collecting particular subsets of the desired results or all results. Moreover, if no target is given at the command line, Snakemake will define the <strong>first rule</strong> of the Snakefile as the target. Hence, it is best  [...]
+<p>Here, this means that we add a rule</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> all:
+    <span class="kw">input</span>:
+        <span class="st">"report.html"</span></code></pre>
+<p>to the top of our workflow. When executing Snakemake with</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -n</code></pre>
+<p>the execution plan for creating the file <code>report.html</code> which contains and summarizes all our results will be shown. Note that, apart from Snakemake considering the first rule of the workflow as default target, <strong>the appearance of rules in the Snakefile is arbitrary and does not influence the DAG of jobs</strong>.</p>
+<h3 id="exercise-2">Exercise</h3>
+<ul>
+<li>Create the DAG of jobs for the complete workflow.</li>
+<li>Execute the complete workflow and have a look at the resulting <code>report.html</code> in your browser.</li>
+<li>Snakemake provides handy flags for forcing re-execution of parts of the workflow. Have a look at the command line help with <code>snakemake --help</code> and search for the flag <code>--forcerun</code>. Then, use this flag to re-execute the rule <code>samtools_sort</code> and see what happens.</li>
+<li>With <code>--reason</code> it is possible to display the execution reason for each job. Try this flag together with a dry-run and the <code>--forcerun</code> flag to understand the decisions of Snakemake.</li>
+</ul>
+<h2 id="summary">Summary</h2>
+<p>In total, the resulting workflow looks like this:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">SAMPLES</span> = [<span class="st">"A"</span>, <span class="st">"B"</span>]
+
+
+<span class="kw">rule</span> all:
+    <span class="kw">input</span>:
+        <span class="st">"report.html"</span>
+
+
+<span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="st">"data/samples/{sample}.fastq"</span>
+    <span class="kw">output</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"bwa mem {input} | samtools view -Sb - > {output}"</span>
+
+
+<span class="kw">rule</span> samtools_sort:
+    <span class="kw">input</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">output</span>:
+        <span class="st">"sorted_reads/{sample}.bam"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools sort -T sorted_reads/{wildcards.sample} "</span>
+        <span class="st">"-O bam {input} > {output}"</span>
+
+
+<span class="kw">rule</span> samtools_index:
+    <span class="kw">input</span>:
+        <span class="st">"sorted_reads/{sample}.bam"</span>
+    <span class="kw">output</span>:
+        <span class="st">"sorted_reads/{sample}.bam.bai"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools index {input}"</span>
+
+
+<span class="kw">rule</span> bcftools_call:
+    <span class="kw">input</span>:
+        <span class="ot">fa=</span><span class="st">"data/genome.fa"</span>,
+        <span class="ot">bam=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam"</span>, sample<span class="ot">=</span>SAMPLES<span class="ot">)</span>,
+        <span class="ot">bai=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam.bai"</span>, sample<span class="ot">=</span>SAMPLES<span class="ot">)</span>
+    <span class="kw">output</span>:
+        <span class="st">"calls/all.vcf"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools mpileup -g -f {input.fa} {input.bam} | "</span>
+        <span class="st">"bcftools call -mv - > {output}"</span>
+
+
+<span class="kw">rule</span> report:
+    <span class="kw">input</span>:
+        <span class="st">"calls/all.vcf"</span>
+    <span class="kw">output</span>:
+        <span class="st">"report.html"</span>
+    <span class="kw">run</span>:
+        <span class="kw">from</span> snakemake.utils import report
+        <span class="kw">with</span> open(input[0]) <span class="kw">as</span> vcf:
+            <span class="kw">n_calls</span> = sum(1 for l in vcf if not l.startswith(<span class="st">"#"</span>))
+
+        <span class="kw">report</span>(<span class="st">"""</span>
+<span class="st">        An example variant calling workflow</span>
+<span class="st">        ===================================</span>
+
+<span class="st">        Reads were mapped to the Yeast</span>
+<span class="st">        reference genome and variants were called jointly with</span>
+<span class="st">        SAMtools/BCFtools.</span>
+
+<span class="st">        This resulted in {n_calls} variants (see Table T1_).</span>
+<span class="st">        """</span>, output[0], T1=input[0])</code></pre>
+<hr />
+<h1 id="advanced-decorating-the-example-workflow">Advanced: Decorating the example workflow</h1>
+<p>Now that the basic concepts of Snakemake have been illustrated, we can introduce advanced topics.</p>
+<h2 id="step-1-specifying-the-number-of-used-threads">Step 1: Specifying the number of used threads</h2>
+<p>For some tools, it is advisable to use more than one thread in order to speed up the computation. <strong>Snakemake can be made aware of the threads a rule needs</strong> with the <code>threads</code> directive. In our example workflow, it makes sense to use multiple threads for the rule <code>bwa_map</code>:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="st">"data/samples/{sample}.fastq"</span>
+    <span class="kw">output</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">threads</span>: 8
+    <span class="kw">shell</span>:
+        <span class="st">"bwa mem -t {threads} {input} | samtools view -Sb - > {output}"</span></code></pre>
+<p>The number of threads can be propagated to the shell command with the familiar braces notation (i.e. <code>{threads}</code>). If no <code>threads</code> directive is given, a rule is assumed to need 1 thread.</p>
+<p>When a workflow is executed, <strong>the number of threads the jobs need is considered by the Snakemake scheduler</strong>. In particular, the scheduler ensures that the sum of the threads of all running jobs does not exceed a given number of available CPU cores. This number can be given with the <code>--cores</code> command line argument (per default, Snakemake uses only 1 CPU core). For example</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> --cores 10</code></pre>
+<p>would execute the workflow with 10 cores. Since the rule <code>bwa_map</code> needs 8 threads, only one job of the rule can run at a time, and the Snakemake scheduler will try to saturate the remaining cores with other jobs like, e.g., <code>samtools_sort</code>. The threads directive in a rule is interpreted as a maximum: when <strong>less cores than threads</strong> are provided, the number of threads a rule uses will be <strong>reduced to the number of given cores</strong>.</p>
+<p>Apart from the very common thread resource, Snakemake provides a <code>resources</code> directive that can be used to <strong>specify arbitrary resources</strong>, e.g., memory usage or auxiliary computing devices like GPUs. Similar to threads, these can be considered by the scheduler when an available amount of that resource is given with the command line argument <code>--resources</code>. Details can be found in the Snakemake <a href="https://bitbucket.org/snakemake/snakemake/wiki/D [...]
+<h3 id="exercise-3">Exercise</h3>
+<ul>
+<li>With the flag <code>--forceall</code> you can enforce a complete re-execution of the workflow. Combine this flag with different values for <code>--cores</code> and examine how the scheduler selects jobs to run in parallel.</li>
+</ul>
+<h2 id="step-2-config-files">Step 2: Config files</h2>
+<p>So far, we specified the samples to consider in a Python list within the Snakefile. However, often you want your workflow to be customizable, so that it can be easily adapted to new data. For this purpose, Snakemake provides a config file mechanism. Config files can be written in <a href="http://json.org">JSON</a> or <a href="http://yaml.org">YAML</a>, and loaded with the <code>configfile</code> directive. In our example workflow, we add the line</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">configfile</span>: <span class="st">"config.yaml"</span></code></pre>
+<p>to the top of the Snakefile. Snakemake will load the config file and store its contents into a globally available dictionary named <code>config</code>. In our case, it makes sense to specify the samples in <code>config.yaml</code> as</p>
+<pre class="sourceCode yaml"><code class="sourceCode yaml"><span class="fu">samples:</span>
+    <span class="fu">A:</span> data/samples/A.fastq
+    <span class="fu">B:</span> data/samples/B.fastq</code></pre>
+<p>Now, we can remove the statement defining <code>SAMPLES</code> from the Snakefile and change the rule <code>bcftools_call</code> to</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bcftools_call:
+    <span class="kw">input</span>:
+        <span class="ot">fa=</span><span class="st">"data/genome.fa"</span>,
+        <span class="ot">bam=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam"</span>, sample<span class="ot">=</span>config<span class="ot">[</span><span class="st">"samples"</span><span class="ot">])</span>,
+        <span class="ot">bai=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam.bai"</span>, sample<span class="ot">=</span>config<span class="ot">[</span><span class="st">"samples"</span><span class="ot">])</span>
+    <span class="kw">output</span>:
+        <span class="st">"calls/all.vcf"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools mpileup -g -f {input.fa} {input.bam} | "</span>
+        <span class="st">"bcftools call -mv - > {output}"</span></code></pre>
+<h2 id="step-3-input-functions">Step 3: Input functions</h2>
+<p>Since we have stored the path to the FASTQ files in the config file, we can also generalize the rule <code>bwa_map</code> to use these paths. This case is different to the rule <code>bcftools_call</code> we modified above. To understand this, it is important to know that Snakemake workflows are executed in three phases.</p>
+<ul>
+<li>In the <strong>initialization</strong> phase, the workflow is parsed and all rules are instantiated.</li>
+<li>In the <strong>DAG</strong> phase, the DAG of jobs is built by filling wildcards and matching input files to output files.</li>
+<li>In the <strong>scheduling</strong> phase, the DAG of jobs is executed.</li>
+</ul>
+<p>The expand functions in the list of input files of the rule <code>bcftools_call</code> are executed during the initialization phase. In this phase, we don’t know about jobs, wildcard values and rule dependencies. Hence, we cannot determine the FASTQ paths for rule <code>bwa_map</code> from the config file in this phase, because we don’t even know which jobs will be generated from that rule. Instead, we need to defer the determination of input files to the DAG phase. This can be achiev [...]
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="kw">lambda</span> wildcards: config[<span class="st">"samples"</span>][wildcards.sample]
+    <span class="kw">output</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">threads</span>: 8
+    <span class="kw">shell</span>:
+        <span class="st">"bwa mem -t {threads} {input} | samtools view -Sb - > {output}"</span></code></pre>
+<p>Here, we use an anonymous function, also called <strong>lambda expression</strong>. Any normal function would work as well. Input functions take as <strong>single argument</strong> a <code>wildcards</code> object, that allows to access the wildcards values via attributes (here <code>wilcards.sample</code>). They <strong>return a string or a list of strings</strong>, that are interpeted as paths to input files (here, we return the path that is stored for the sample in the config file). [...]
+<h3 id="exercise-4">Exercise</h3>
+<ul>
+<li>In the <code>data/samples</code> folder, there is an additional sample <code>C.fastq</code>. Add that sample to the config file and see how Snakemake wants to recompute the part of the workflow belonging to the new sample.</li>
+</ul>
+<h2 id="step-4-rule-parameters">Step 4: Rule parameters</h2>
+<p>Sometimes, shell commands are not only composed of input and output files and some static flags. In particular, it can happen that additional parameters need to be set depending on the wildcard values of the job. For this, Snakemake allows to <strong>define arbitrary parameters</strong> for rules with the <code>params</code> directive. In our workflow, it is reasonable to annotate aligned reads with so-called read groups, that contain metadata like the sample name. We modify the rule  [...]
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="kw">lambda</span> wildcards: config[<span class="st">"samples"</span>][wildcards.sample]
+    <span class="kw">output</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">params</span>:
+        <span class="ot">rg=</span><span class="st">"@RG\tID:{sample}\tSM:{sample}"</span>
+    <span class="kw">threads</span>: 8
+    <span class="kw">shell</span>:
+        <span class="st">"bwa mem -R '{params.rg}' -t {threads} {input} | samtools view -Sb - > {output}"</span></code></pre>
+<p>Similar to input and output files, <code>params</code> can be accessed from the shell command. Moreover, the <code>params</code> directive can also take functions like in Step 3 to defer initialization to the DAG phase.</p>
+<h3 id="exercise-5">Exercise</h3>
+<ul>
+<li>Variant calling can consider a lot of parameters. A particularly important one is the prior mutation rate (1e-3 per default). It is set via the flag <code>-P</code> of the <code>bcftools call</code> command. Consider making this flag configurable via adding a new key to the config file and using the <code>params</code> directive in the rule <code>bcftools_call</code> to propagate it to the shell command.</li>
+</ul>
+<h2 id="step-5-logging">Step 5: Logging</h2>
+<p>When executing a large workflow, it is usually desirable to store the output of each job persistently in files instead of just printing it to the terminal. For this purpose, Snakemake allows to <strong>specify log files</strong> for rules. Log files are defined via the <code>log</code> directive and handled similarly to output files, but they are not subject of rule matching and are not cleaned up when a job fails. We modify our rule <code>bwa_map</code> as follows:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="kw">lambda</span> wildcards: config[<span class="st">"samples"</span>][wildcards.sample]
+    <span class="kw">output</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">params</span>:
+        <span class="ot">rg=</span><span class="st">"@RG\tID:{sample}\tSM:{sample}"</span>
+    <span class="kw">log</span>:
+        <span class="st">"logs/bwa_map/{sample}.log"</span>
+    <span class="kw">threads</span>: 8
+    <span class="kw">shell</span>:
+        <span class="st">"(bwa mem -R '{params.rg}' -t {threads} {input} | "</span>
+        <span class="st">"samtools view -Sb - > {output}) 2> {log}"</span></code></pre>
+<p>The shell command is modified to collect STDERR output of both <code>bwa</code> and <code>samtools</code> and pipe it into the file referred by <code>{log}</code>. It is best practice to store all log files in a <code>logs</code> subdirectory, prefixed by the rule or tool name. Log files must contain exactly the same wildcards as the output files to avoid clashes.</p>
+<h3 id="exercise-6">Exercise</h3>
+<ul>
+<li>Add a log directive to the <code>bcftools_call</code> rule as well.</li>
+<li>Time to re-run the whole workflow (remember the command line flags to force re-execution). See how log files are created for variant calling and read mapping.</li>
+<li>The ability to track the provenance of each generated result is an important step towards reproducible analyses. Apart from the <code>report</code> functionality discussed before, Snakemake can summarize various provenance information for all output files of the workflow. The flag <code>--summary</code> prints a table associating each output file with the rule used to generate it, the creation date and optionally the version of the tool used for creation is provided. Further, the tab [...]
+</ul>
+<h2 id="step-6-temporary-and-protected-files">Step 6: Temporary and protected files</h2>
+<p>In our workflow, we create two BAM files for each sample, namely the output of the rules <code>bwa_map</code> and <code>samtools_sort</code>. When not dealing with examples, the underlying data is usually huge. Hence, the resulting BAM files need a lot of disk space and their creation takes some time. Snakemake allows to <strong>mark output files as temporary</strong>, such that they are deleted once every consuming job has been executed, in order to save disk space. We use this mecha [...]
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="kw">lambda</span> wildcards: config[<span class="st">"samples"</span>][wildcards.sample]
+    <span class="kw">output</span>:
+        <span class="kw">temp</span>(<span class="st">"mapped_reads/{sample}.bam"</span>)
+    <span class="kw">params</span>:
+        <span class="ot">rg=</span><span class="st">"@RG\tID:{sample}\tSM:{sample}"</span>
+    <span class="kw">log</span>:
+        <span class="st">"logs/bwa_map/{sample}.log"</span>
+    <span class="kw">threads</span>: 8
+    <span class="kw">shell</span>:
+        <span class="st">"(bwa mem -R '{params.rg}' -t {threads} {input} | "</span>
+        <span class="st">"samtools view -Sb - > {output}) 2> {log}"</span></code></pre>
+<p>This results in the deletion of the BAM file once the corresponding <code>samtools_sort</code> job has been executed. Since the creation of BAM files via read mapping and sorting is computationally expensive, it is reasonable to <strong>protect</strong> the final BAM file <strong>from accidental deletion or modification</strong>. We modify the rule <code>samtools_sort</code> by marking it’s output file as <code>protected</code>:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> samtools_sort:
+    <span class="kw">input</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">output</span>:
+        <span class="kw">protected</span>(<span class="st">"sorted_reads/{sample}.bam"</span>)
+    <span class="kw">shell</span>:
+        <span class="st">"samtools sort -T sorted_reads/{wildcards.sample} "</span>
+        <span class="st">"-O bam {input} > {output}"</span></code></pre>
+<p>After execution of the job, Snakemake will write-protect the output file in the filesystem, so that it can’t be overwritten or deleted accidentally.</p>
+<h3 id="exercise-7">Exercise</h3>
+<ul>
+<li>Re-execute the whole workflow and observe how Snakemake handles the temporary and protected files.</li>
+<li>Run Snakemake with the target <code>mapped_reads/A.bam</code>. Although the file is marked as temporary, you will see that Snakemake does not delete it because it is specified as a target file.</li>
+<li>Try to re-execute the whole workflow again with the dry-run option. You will see that it fails (as intended) because Snakemake cannot overwrite the protected output files.</li>
+</ul>
+<h2 id="summary-1">Summary</h2>
+<p>The final version of our workflow looks like this:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">configfile</span>: <span class="st">"config.yaml"</span>
+
+
+<span class="kw">rule</span> all:
+    <span class="kw">input</span>:
+        <span class="st">"report.html"</span>
+
+
+<span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="kw">lambda</span> wildcards: config[<span class="st">"samples"</span>][wildcards.sample]
+    <span class="kw">output</span>:
+        <span class="kw">temp</span>(<span class="st">"mapped_reads/{sample}.bam"</span>)
+    <span class="kw">params</span>:
+        <span class="ot">rg=</span><span class="st">"@RG\tID:{sample}\tSM:{sample}"</span>
+    <span class="kw">log</span>:
+        <span class="st">"logs/bwa_map/{sample}.log"</span>
+    <span class="kw">threads</span>: 8
+    <span class="kw">shell</span>:
+        <span class="st">"(bwa mem -R '{params.rg}' -t {threads} {input} | "</span>
+        <span class="st">"samtools view -Sb - > {output}) 2> {log}"</span>
+
+
+<span class="kw">rule</span> samtools_sort:
+    <span class="kw">input</span>:
+        <span class="st">"mapped_reads/{sample}.bam"</span>
+    <span class="kw">output</span>:
+        <span class="kw">protected</span>(<span class="st">"sorted_reads/{sample}.bam"</span>)
+    <span class="kw">shell</span>:
+        <span class="st">"samtools sort -T sorted_reads/{wildcards.sample} "</span>
+        <span class="st">"-O bam {input} > {output}"</span>
+
+
+<span class="kw">rule</span> samtools_index:
+    <span class="kw">input</span>:
+        <span class="st">"sorted_reads/{sample}.bam"</span>
+    <span class="kw">output</span>:
+        <span class="st">"sorted_reads/{sample}.bam.bai"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools index {input}"</span>
+
+
+<span class="kw">rule</span> bcftools_call:
+    <span class="kw">input</span>:
+        <span class="ot">fa=</span><span class="st">"data/genome.fa"</span>,
+        <span class="ot">bam=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam"</span>, sample<span class="ot">=</span>config<span class="ot">[</span><span class="st">"samples"</span><span class="ot">])</span>,
+        <span class="ot">bai=</span>expand<span class="ot">(</span><span class="st">"sorted_reads/{sample}.bam.bai"</span>, sample<span class="ot">=</span>config<span class="ot">[</span><span class="st">"samples"</span><span class="ot">])</span>
+    <span class="kw">output</span>:
+        <span class="st">"calls/all.vcf"</span>
+    <span class="kw">shell</span>:
+        <span class="st">"samtools mpileup -g -f {input.fa} {input.bam} | "</span>
+        <span class="st">"bcftools call -mv - > {output}"</span>
+
+
+<span class="kw">rule</span> report:
+    <span class="kw">input</span>:
+        <span class="st">"calls/all.vcf"</span>
+    <span class="kw">output</span>:
+        <span class="st">"report.html"</span>
+    <span class="kw">run</span>:
+        <span class="kw">from</span> snakemake.utils import report
+        <span class="kw">with</span> open(input[0]) <span class="kw">as</span> vcf:
+            <span class="kw">n_calls</span> = sum(1 for l in vcf if not l.startswith(<span class="st">"#"</span>))
+
+        <span class="kw">report</span>(<span class="st">"""</span>
+<span class="st">        An example variant calling workflow</span>
+<span class="st">        ===================================</span>
+
+<span class="st">        Reads were mapped to the Yeast</span>
+<span class="st">        reference genome and variants were called jointly with</span>
+<span class="st">        SAMtools/BCFtools.</span>
+
+<span class="st">        This resulted in {n_calls} variants (see Table T1_).</span>
+<span class="st">        """</span>, output[0], T1=input[0])</code></pre>
+<hr />
+<h1 id="additional-features">Additional features</h1>
+<p>In the following, we introduce some features that are beyond the scope of above example workflow. For details and even more features, see the <a href="https://bitbucket.org/snakemake/snakemake/wiki/Documentation">Documentation</a>, the <a href="https://bitbucket.org/snakemake/snakemake/wiki/FAQ">FAQ</a> and the command line help (<code>snakemake --help</code>).</p>
+<h2 id="benchmarking">Benchmarking</h2>
+<p>With the <code>benchmark</code> directive, Snakemake can be instructed to <strong>measure the wall clock time of a job</strong>. We activate benchmarking for the rule <code>bwa_map</code>:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> bwa_map:
+    <span class="kw">input</span>:
+        <span class="st">"data/genome.fa"</span>,
+        <span class="kw">lambda</span> wildcards: config[<span class="st">"samples"</span>][wildcards.sample]
+    <span class="kw">output</span>:
+        <span class="kw">temp</span>(<span class="st">"mapped_reads/{sample}.bam"</span>)
+    <span class="kw">params</span>:
+        <span class="ot">rg=</span><span class="st">"@RG\tID:{sample}\tSM:{sample}"</span>
+    <span class="kw">log</span>:
+        <span class="st">"logs/bwa_map/{sample}.log"</span>
+    <span class="kw">benchmark</span>:
+        <span class="st">"benchmarks/{sample}.bwa.benchmark.txt"</span>
+    <span class="kw">threads</span>: 8
+    <span class="kw">shell</span>:
+        <span class="st">"(bwa mem -R '{params.rg}' -t {threads} {input} | "</span>
+        <span class="st">"samtools view -Sb - > {output}) 2> {log}"</span></code></pre>
+<p>The <code>benchmark</code> directive takes a string that points to the file where benchmarking results shall be stored. Similar to output files, the path can contain wildcards (it must be the same wildcards as in the output files). When a job derived from the rule is executed, Snakemake will measure the wall clock time and store it in the file in tab-delimited format. With the command line flag <code>--benchmark-repeats</code>, Snakemake can be instructed to perform repetetive measure [...]
+<p>We can include the benchmark results into our report:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">rule</span> report:
+    <span class="kw">input</span>:
+        <span class="ot">T1=</span><span class="st">"calls/all.vcf"</span>,
+        <span class="ot">T2=</span>expand<span class="ot">(</span><span class="st">"benchmarks/{sample}.bwa.benchmark.txt"</span>, sample<span class="ot">=</span>config<span class="ot">[</span><span class="st">"samples"</span><span class="ot">])</span>
+    <span class="kw">output</span>:
+        <span class="st">"report.html"</span>
+    <span class="kw">run</span>:
+        <span class="kw">from</span> snakemake.utils import report
+        <span class="kw">with</span> open(input[0]) <span class="kw">as</span> vcf:
+            <span class="kw">n_calls</span> = sum(1 for l in vcf if not l.startswith(<span class="st">"#"</span>))
+
+        <span class="kw">report</span>(<span class="st">"""</span>
+<span class="st">        An example variant calling workflow</span>
+<span class="st">        ===================================</span>
+
+<span class="st">        Reads were mapped to the Yeast</span>
+<span class="st">        reference genome and variants were called jointly with</span>
+<span class="st">        SAMtools/BCFtools.</span>
+
+<span class="st">        This resulted in {n_calls} variants (see Table T1_).</span>
+<span class="st">        Benchmark results for BWA can be found in the tables T2_.</span>
+<span class="st">        """</span>, output[0], **input)</code></pre>
+<p>We use the <code>expand</code> function to collect the benchmark files for all samples. Here, we directly provide names for the input files. In particular, we can also name the whole list of benchmark files returned by the <code>expand</code> function as <code>T2</code>. When invoking the <code>report</code> function, we just unpack <code>input</code> into keyword arguments (resulting in <code>T1</code> and <code>T2</code>). In the text, we refer with <code>T2_</code> to the list of b [...]
+<h3 id="exercise-8">Exercise</h3>
+<ul>
+<li>Re-execute the workflow and benchmark <code>bwa_map</code> with 3 repeats. Open the report and see how the list of benchmark files is presented in the HTML report.</li>
+</ul>
+<h2 id="modularization">Modularization</h2>
+<p>In order to re-use building blocks or simply to structure large workflows, it is sometimes reasonable to <strong>split a workflow into modules</strong>. For this, Snakemake provides the <code>include</code> directive to include another Snakefile into the current one, e.g.:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">include</span>: <span class="st">"path/to/other.snakefile"</span></code></pre>
+<p>Alternatively, Snakemake allows to <strong>define sub-workflows</strong>. A sub-workflow refers to a working directory with a complete Snakemake workflow. Output files of that sub-workflow can be used in the current Snakefile. When executing, Snakemake ensures that the output files of the sub-workflow are up-to-date before executing the current workflow. This mechanism is particularly useful when you want to extend a previous analysis without modifying it. For details about sub-workfl [...]
+<h3 id="exercise-9">Exercise</h3>
+<ul>
+<li>Put the read mapping related rules into a separate Snakefile and use the <code>include</code> directive to make them available in our example workflow again.</li>
+</ul>
+<h2 id="using-custom-scripts">Using custom scripts</h2>
+<p>With Snakemake, there are two strategies to use custom scripts in your workflow.</p>
+<h3 id="method-1-use-the-run-directive">Method 1: Use the run directive</h3>
+<p>With the <code>run</code> directive, Snakemake already provides a way to execute plain Python code in a rule. Apart from Python, this can also be used to execute <a href="https://www.r-project.org">R</a> code by using the Python package <a href="http://rpy.sourceforge.net">rpy2</a>. Further, Snakemake provides a function <code>snakemake.utils.R</code> that allows to pass a string that is interpreted as R code. The code can thereby refer to, e.g., input and output files or parameters o [...]
+<h3 id="method-2-create-separate-scripts">Method 2: Create separate scripts</h3>
+<p>For larger tasks, it is a good idea to <strong>create separate scripts</strong> with command line interfaces, which are then invoked from Snakemake via the <code>shell</code> directive. This also allows to re-use the scripts in other analyses. In case of R, we propose to use <a href="https://stat.ethz.ch/R-manual/R-devel/library/utils/html/Rscript.html">Rscript</a> for invocation.</p>
+<h2 id="cluster-execution">Cluster execution</h2>
+<p>By default, Snakemake executes jobs on the local machine it is invoked on. Alternatively, it can execute jobs in <strong>distributed environments, e.g., compute clusters or batch systems</strong>. If the nodes share a common file system, Snakemake supports three alternative execution modes.</p>
+<p>In cluster enviroments, compute jobs are usually submitted as shell scripts via commands like <code>qsub</code>. Snakemake provides a <strong>generic mode</strong> to execute on such clusters. By invoking Snakemake with</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> --cluster qsub --jobs 100</code></pre>
+<p>each job will be compiled into a shell script that is submitted with the given command (here <code>qsub</code>). The <code>--jobs</code> flag limits the number of concurrently submitted jobs to 100. This basic mode assumes that the submission command returns immediately after submitting the job. Some clusters allow to run the submission command in <strong>synchronous mode</strong>, such that it waits until the job has been executed. In such cases, we can invoke e.g.</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> --cluster-sync <span class="st">"qsub -sync yes"</span> --jobs 100</code></pre>
+<p>The specified submission command can also be <strong>decorated with additional parameters taken from the submitted job</strong>. For example, the number of used threads can be accessed in braces similarly to the formatting of shell commands, e.g.</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> --cluster <span class="st">"qsub -pe threaded {threads}"</span> --jobs 100</code></pre>
+<p>Alternatively, Snakemake can use the Distributed Resource Management Application API (<a href="http://www.drmaa.org">DRMAA</a>). This API provides a common interface to control various resource management systems. The <strong>DRMAA support</strong> can be activated by invoking Snakemake as follows:</p>
+<pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> --drmaa --jobs 100</code></pre>
+<p>If available, <strong>DRMAA is preferable over the generic cluster modes</strong> because it provides better control and error handling. To support additional cluster specific parametrization, a Snakefile can be complementd by a <a href="https://bitbucket.org/snakemake/snakemake/wiki/Documentation#markdown-header-cluster-configuration">cluster configuration</a>.</p>
+<h2 id="constraining-wildcards">Constraining wildcards</h2>
+<p>Snakemake uses regular expressions to match output files to input files and determine dependencies between the jobs. Sometimes it is useful to constrain the values a wildcard can have. This can be achieved by adding a regular expression that describes the set of allowed wildcard values. For example, the wildcard <code>sample</code> in the output file <code>"sorted_reads/{sample}.bam"</code> can be constrained to only allow alphanumeric sample names as <code>"sorted_read [...]
+<ul>
+<li>It can help to avoid ambiguous rules, i.e. two or more rules that can be applied to generate the same output file. Other ways of handling ambiguous rules are described in the <a href="https://bitbucket.org/snakemake/snakemake/wiki/Documentation">Documentation</a>.</li>
+<li>It can help to guide the regular expression based matching so that wildcards are assigned to the right parts of a file name. Consider the output file <code>{sample}.{group}.txt</code> and assume that the target file is <code>A.1.normal.txt</code>. It is not clear whether <code>dataset="A.1"</code> and <code>group="normal"</code> or <code>dataset="A"</code> and <code>group="1.normal"</code> is the right assignment. Here, constraining the dataset [...]
+</ul>
+<p>When dealing with ambiguous rules, it is best practice to first try to solve the ambiguity by using a proper file structure, for example, by separating the output files of different steps in different directories.</p>
+</body>
+</html>
diff --git a/snakemake/__init__.py b/snakemake/__init__.py
index b7225f4..566e443 100644
--- a/snakemake/__init__.py
+++ b/snakemake/__init__.py
@@ -81,6 +81,7 @@ def snakemake(snakefile,
               notemp=False,
               nodeps=False,
               keep_target_files=False,
+              keep_shadow=False,
               allowed_rules=None,
               jobscript=None,
               timestamp=False,
@@ -151,6 +152,7 @@ def snakemake(snakefile,
         notemp (bool):              ignore temp file flags, e.g. do not delete output files marked as temp after use (default False)
         nodeps (bool):              ignore dependencies (default False)
         keep_target_files (bool):   Do not adjust the paths of given target files relative to the working directory.
+        keep_shadow (bool):         Do not delete the shadow directory on snakemake startup.
         allowed_rules (set):        Restrict allowed rules to the given set. If None or empty, all rules are used.
         jobscript (str):            path to a custom shell script template for cluster jobs (default None)
         timestamp (bool):           print time stamps in front of any output (default False)
@@ -336,7 +338,8 @@ def snakemake(snakefile,
                                        overwrite_shellcmd=overwrite_shellcmd,
                                        config=config,
                                        config_args=config_args,
-                                       keep_logger=True)
+                                       keep_logger=True,
+                                       keep_shadow=True)
                 success = workflow.execute(
                     targets=targets,
                     dryrun=dryrun,
@@ -380,6 +383,7 @@ def snakemake(snakefile,
                     notemp=notemp,
                     nodeps=nodeps,
                     keep_target_files=keep_target_files,
+                    keep_shadow=keep_shadow,
                     cleanup_metadata=cleanup_metadata,
                     subsnakemake=subsnakemake,
                     updated_files=updated_files,
@@ -446,7 +450,9 @@ def parse_config(args):
             for parser in parsers:
                 try:
                     v = parser(val)
-                    break
+                    # avoid accidental interpretation as function
+                    if not isinstance(v, callable):
+                        break
                 except:
                     pass
             assert v is not None
@@ -788,6 +794,11 @@ def get_argument_parser():
         help=
         "Do not adjust the paths of given target files relative to the working directory.")
     parser.add_argument(
+        "--keep-shadow",
+        action="store_true",
+        help=
+        "Do not delete the shadow directory on snakemake startup.")
+    parser.add_argument(
         "--allowed-rules",
         nargs="+",
         help=
@@ -847,7 +858,8 @@ def main():
     args = parser.parse_args()
 
     if args.bash_completion:
-        print("complete -C snakemake-bash-completion snakemake")
+        cmd = b"complete -o bashdefault -C snakemake-bash-completion snakemake"
+        sys.stdout.buffer.write(cmd)
         sys.exit(0)
 
     snakemakepath = sys.argv[0]
@@ -970,6 +982,7 @@ def main():
                             benchmark_repeats=args.benchmark_repeats,
                             wait_for_files=args.wait_for_files,
                             keep_target_files=args.keep_target_files,
+                            keep_shadow=args.keep_shadow,
                             allowed_rules=args.allowed_rules)
 
     if args.profile:
@@ -987,30 +1000,31 @@ def bash_completion(snakefile="Snakefile"):
             "Calculate bash completion for snakemake. This tool shall not be invoked by hand.")
         sys.exit(1)
 
+    def print_candidates(candidates):
+        if candidates:
+            candidates = sorted(set(candidates))
+            ## Use bytes for avoiding '^M' under Windows.
+            sys.stdout.buffer.write(b'\n'.join(s.encode() for s in candidates))
+
     prefix = sys.argv[2]
 
     if prefix.startswith("-"):
-        opts = [action.option_strings[0]
-                for action in get_argument_parser()._actions
-                if action.option_strings and
-                action.option_strings[0].startswith(prefix)]
-        print(*opts, sep="\n")
+        print_candidates(action.option_strings[0]
+                         for action in get_argument_parser()._actions
+                         if action.option_strings and
+                         action.option_strings[0].startswith(prefix))
     else:
         files = glob.glob("{}*".format(prefix))
         if files:
-            print(*files, sep="\n")
+            print_candidates(files)
         elif os.path.exists(snakefile):
             workflow = Workflow(snakefile=snakefile, snakemakepath="snakemake")
             workflow.include(snakefile)
 
-            workflow_files = sorted(set(file
-                                        for file in workflow.concrete_files
-                                        if file.startswith(prefix)))
-            if workflow_files:
-                print(*workflow_files, sep="\n")
-
-            rules = [rule.name for rule in workflow.rules
-                     if rule.name.startswith(prefix)]
-            if rules:
-                print(*rules, sep="\n")
+            print_candidates([file
+                              for file in workflow.concrete_files
+                              if file.startswith(prefix)] +
+                             [rule.name
+                              for rule in workflow.rules
+                              if rule.name.startswith(prefix)])
     sys.exit(0)
diff --git a/snakemake/dag.py b/snakemake/dag.py
index f1ead14..a767bf1 100644
--- a/snakemake/dag.py
+++ b/snakemake/dag.py
@@ -3,20 +3,24 @@ __copyright__ = "Copyright 2015, Johannes Köster"
 __email__ = "koester at jimmy.harvard.edu"
 __license__ = "MIT"
 
+import os
+import shutil
 import textwrap
 import time
 from collections import defaultdict, Counter
 from itertools import chain, combinations, filterfalse, product, groupby
 from functools import partial, lru_cache
+from inspect import isfunction, ismethod
 from operator import itemgetter, attrgetter
 
-from snakemake.io import IOFile, _IOFile, PeriodicityDetector, wait_for_files
+from snakemake.io import IOFile, _IOFile, PeriodicityDetector, wait_for_files, is_flagged, contains_wildcard
 from snakemake.jobs import Job, Reason
 from snakemake.exceptions import RuleException, MissingInputException
 from snakemake.exceptions import MissingRuleException, AmbiguousRuleException
 from snakemake.exceptions import CyclicGraphException, MissingOutputException
 from snakemake.exceptions import IncompleteFilesException
 from snakemake.exceptions import PeriodicWildcardError
+from snakemake.exceptions import RemoteFileException
 from snakemake.exceptions import UnexpectedOutputException, InputFunctionException
 from snakemake.logging import logger
 from snakemake.output_index import OutputIndex
@@ -44,6 +48,7 @@ class DAG:
         self._needrun = set()
         self._priority = dict()
         self._downstream_size = dict()
+        self._temp_input_count = dict()
         self._reason = defaultdict(Reason)
         self._finished = set()
         self._dynamic = set()
@@ -160,14 +165,8 @@ class DAG:
     def downstream_size(self, job):
         return self._downstream_size[job]
 
-    def _job_values(self, jobs, values):
-        return [values[job] for job in jobs]
-
-    def priorities(self, jobs):
-        return self._job_values(jobs, self._priority)
-
-    def downstream_sizes(self, jobs):
-        return self._job_values(jobs, self._downstream_size)
+    def temp_input_count(self, job):
+        return self._temp_input_count[job]
 
     def noneedrun_finished(self, job):
         """
@@ -222,7 +221,7 @@ class DAG:
     def check_output(self, job, wait=3):
         """ Raise exception if output files of job are missing. """
         try:
-            wait_for_files(job.expanded_output, latency_wait=wait)
+            wait_for_files(job.expanded_shadowed_output, latency_wait=wait)
         except IOError as e:
             raise MissingOutputException(str(e), rule=job.rule)
 
@@ -238,6 +237,20 @@ class DAG:
                         ", ".join(job.expanded_output)),
                     rule=job.rule)
 
+    def unshadow_output(self, job):
+        """ Move files from shadow directory to real output paths. """
+        if not job.shadow_dir or not job.expanded_output:
+            return
+        cwd = os.getcwd()
+        for real_output in job.expanded_output:
+            shadow_output = os.path.join(job.shadow_dir, real_output)
+            if os.path.realpath(shadow_output) == os.path.realpath(real_output):
+                continue
+            logger.info("Moving shadow output {} to destination {}".format(
+                shadow_output, real_output))
+            shutil.move(shadow_output, real_output)
+        shutil.rmtree(job.shadow_dir)
+
     def check_periodic_wildcards(self, job):
         """ Raise an exception if a wildcard of the given job appears to be periodic,
         indicating a cyclic dependency. """
@@ -266,6 +279,12 @@ class DAG:
                 logger.info("Touching output file {}.".format(f))
                 f.touch_or_create()
 
+    def temp_input(self, job):
+        for job_, files in self.dependencies[job].items():
+            for f in filter(job_.temp_output.__contains__, files):
+                yield f
+
+
     def handle_temp(self, job):
         """ Remove temp files if they are no longer needed. """
         if self.notemp:
@@ -288,6 +307,52 @@ class DAG:
             logger.info("Removing temporary output file {}.".format(f))
             f.remove()
 
+    def handle_remote(self, job):
+        """ Remove local files if they are no longer needed, and upload to S3. """
+
+        needed = lambda job_, f: any(
+            f in files for j, files in self.depending[job_].items()
+            if not self.finished(j) and self.needrun(j) and j != job)
+
+        def unneeded_files():
+            putative = lambda f: f.is_remote and not f.protected and not f.should_keep_local
+            generated_input = set()
+            for job_, files in self.dependencies[job].items():
+                generated_input |= files
+                for f in filter(putative, files):
+                    if not needed(job_, f):
+                        yield f
+            for f in filter(putative, job.output):
+                if not needed(job, f) and not f in self.targetfiles:
+                    for f_ in job.expand_dynamic(f):
+                        yield f
+            for f in filter(putative, job.input):
+                # TODO what about remote inputs that are used by multiple jobs?
+                if f not in generated_input:
+                    yield f
+
+        for f in job.expanded_output:
+            if f.is_remote and not f.exists_remote:
+                f.upload_to_remote()
+                remote_mtime = f.mtime
+                # immediately force local mtime to match remote,
+                # since conversions from S3 headers are not 100% reliable
+                # without this, newness comparisons may fail down the line
+                f.touch(times=(remote_mtime, remote_mtime))
+
+                if not f.exists_remote:
+                    raise RemoteFileException(
+                        "The file upload was attempted, but it does not "
+                        "exist on remote. Check that your credentials have "
+                        "read AND write permissions.")
+
+        for f in unneeded_files():
+            logger.info("Removing local output file: {}".format(f))
+            f.remove()
+
+        job.rmdir_empty_remote_dirs()
+
+
     def jobid(self, job):
         if job not in self._jobid:
             self._jobid[job] = len(self._jobid)
@@ -480,11 +545,16 @@ class DAG:
                 1 for _ in self.bfs(self.depending, job,
                                     stop=self.noneedrun_finished)) - 1
 
+    def update_temp_input_count(self):
+        for job in self.needrun_jobs:
+            self._temp_input_count[job] = sum(1 for _ in self.temp_input(job))
+
     def postprocess(self):
         self.update_needrun()
         self.update_priority()
         self.update_ready()
         self.update_downstream_size()
+        self.update_temp_input_count()
 
     def _ready(self, job):
         return self._finished.issuperset(
diff --git a/snakemake/decorators.py b/snakemake/decorators.py
new file mode 100644
index 0000000..5050336
--- /dev/null
+++ b/snakemake/decorators.py
@@ -0,0 +1,17 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+import functools
+import inspect
+
+def dec_all_methods(decorator, prefix='test_'):
+
+    def dec_class(cls):
+        for name, m in inspect.getmembers(cls, inspect.isfunction):
+            if prefix == None or name.startswith(prefix):
+                setattr(cls, name, decorator(m))
+        return cls
+
+    return dec_class
diff --git a/snakemake/exceptions.py b/snakemake/exceptions.py
index 0c547b3..2e564c3 100644
--- a/snakemake/exceptions.py
+++ b/snakemake/exceptions.py
@@ -55,6 +55,11 @@ def format_traceback(tb, linemaps):
             yield '  File "{}", line {}, in {}'.format(file, lineno, function)
 
 
+def log_verbose_traceback(ex):
+    tb = "Full " + "".join(traceback.format_exception(type(ex), ex, ex.__traceback__))
+    logger.debug(tb)
+
+
 def print_exception(ex, linemaps):
     """
     Print an error message for a given exception.
@@ -64,8 +69,7 @@ def print_exception(ex, linemaps):
     linemaps -- a dict of a dict that maps for each snakefile
         the compiled lines to source code lines in the snakefile.
     """
-    tb = "Full " + "".join(traceback.format_exception(type(ex), ex, ex.__traceback__))
-    logger.debug(tb)
+    log_verbose_traceback(ex)
     if isinstance(ex, SyntaxError) or isinstance(ex, IndentationError):
         logger.error(format_error(ex, ex.lineno,
                                   linemaps=linemaps,
@@ -220,6 +224,11 @@ class UnexpectedOutputException(IOException):
                          lineno=lineno,
                          snakefile=snakefile)
 
+class ImproperShadowException(RuleException):
+    def __init__(self, rule, lineno=None, snakefile=None):
+        super().__init__("Rule cannot shadow if using ThreadPoolExecutor",
+                         rule=rule, lineno=lineno, snakefile=snakefile)
+
 
 class AmbiguousRuleException(RuleException):
     def __init__(self, filename, job_a, job_b, lineno=None, snakefile=None):
@@ -282,6 +291,29 @@ class IOFileException(RuleException):
     def __init__(self, msg, lineno=None, snakefile=None):
         super().__init__(msg, lineno=lineno, snakefile=snakefile)
 
+class RemoteFileException(RuleException):
+    def __init__(self, msg, lineno=None, snakefile=None):
+        super().__init__(msg, lineno=lineno, snakefile=snakefile)
+
+class HTTPFileException(RuleException):
+    def __init__(self, msg, lineno=None, snakefile=None):
+        super().__init__(msg, lineno=lineno, snakefile=snakefile)
+
+class FTPFileException(RuleException):
+    def __init__(self, msg, lineno=None, snakefile=None):
+        super().__init__(msg, lineno=lineno, snakefile=snakefile)
+
+class S3FileException(RuleException):
+    def __init__(self, msg, lineno=None, snakefile=None):
+        super().__init__(msg, lineno=lineno, snakefile=snakefile)
+
+class SFTPFileException(RuleException):
+    def __init__(self, msg, lineno=None, snakefile=None):
+        super().__init__(msg, lineno=lineno, snakefile=snakefile)
+
+class DropboxFileException(RuleException):
+    def __init__(self, msg, lineno=None, snakefile=None):
+        super().__init__(msg, lineno=lineno, snakefile=snakefile)
 
 class ClusterJobException(RuleException):
     def __init__(self, job, jobid, jobscript):
diff --git a/snakemake/executors.py b/snakemake/executors.py
index e3ce9c5..7bb145e 100644
--- a/snakemake/executors.py
+++ b/snakemake/executors.py
@@ -6,6 +6,7 @@ __license__ = "MIT"
 
 import os
 import sys
+import contextlib
 import time
 import datetime
 import json
@@ -29,8 +30,8 @@ from snakemake.stats import Stats
 from snakemake.utils import format, Unformattable
 from snakemake.io import get_wildcard_names, Wildcards
 from snakemake.exceptions import print_exception, get_exception_origin
-from snakemake.exceptions import format_error, RuleException
-from snakemake.exceptions import ClusterJobException, ProtectedOutputException, WorkflowError
+from snakemake.exceptions import format_error, RuleException, log_verbose_traceback
+from snakemake.exceptions import ClusterJobException, ProtectedOutputException, WorkflowError, ImproperShadowException
 from snakemake.futures import ProcessPoolExecutor
 
 
@@ -109,6 +110,8 @@ class AbstractExecutor:
     def finish_job(self, job):
         self.dag.handle_touch(job)
         self.dag.check_output(job, wait=self.latency_wait)
+        self.dag.unshadow_output(job)
+        self.dag.handle_remote(job)
         self.dag.handle_protected(job)
         self.dag.handle_temp(job)
 
@@ -209,6 +212,9 @@ class CPUExecutor(RealExecutor):
             callback=None,
             submit_callback=None,
             error_callback=None):
+        if (job.rule.shadow_depth and
+            type(self) == concurrent.futures.ThreadPoolExecutor):
+            raise ImproperShadowException(job.rule)
         job.prepare()
         super()._run(job)
 
@@ -216,12 +222,13 @@ class CPUExecutor(RealExecutor):
         if job.benchmark is not None:
             benchmark = str(job.benchmark)
 
-        pool = self.threadpool if job.shellcmd is not None else self.pool
+        pool = self.pool if job.shellcmd is None or job.is_shadow else self.threadpool
         future = pool.submit(
             run_wrapper, job.rule.run_func, job.input.plainstrings(),
             job.output.plainstrings(), job.params, job.wildcards, job.threads,
             job.resources, job.log.plainstrings(), job.rule.version, benchmark,
-            self.benchmark_repeats, self.workflow.linemaps, self.workflow.debug)
+            self.benchmark_repeats, self.workflow.linemaps, self.workflow.debug,
+            shadow_dir=job.shadow_dir)
 
         future.add_done_callback(partial(self._callback, job, callback,
                                          error_callback))
@@ -262,7 +269,8 @@ class ClusterExecutor(RealExecutor):
                  printshellcmds=False,
                  latency_wait=3,
                  benchmark_repeats=1,
-                 cluster_config=None):
+                 cluster_config=None, local_input=None):
+        local_input = local_input or []
         super().__init__(workflow, dag,
                          printreason=printreason,
                          quiet=quiet,
@@ -290,8 +298,8 @@ class ClusterExecutor(RealExecutor):
         self.exec_job = (
             'cd {workflow.workdir_init} && '
             '{workflow.snakemakepath} --snakefile {workflow.snakefile} '
-            '--force -j{cores} --keep-target-files '
-            '--wait-for-files {job.input} --latency-wait {latency_wait} '
+            '--force -j{cores} --keep-target-files --keep-shadow '
+            '--wait-for-files {local_input} --latency-wait {latency_wait} '
             '--benchmark-repeats {benchmark_repeats} '
             '{overwrite_workdir} {overwrite_config} --nocolor '
             '--notemp --quiet --no-hooks --nolock {target}')
@@ -360,6 +368,7 @@ class ClusterExecutor(RealExecutor):
                 " ".join(self.workflow.config_args))
 
         target = job.output if job.output else job.rule.name
+        local_input = " ".join(job.local_input)
         format = partial(str.format,
                          job=job,
                          overwrite_workdir=overwrite_workdir,
@@ -369,7 +378,7 @@ class ClusterExecutor(RealExecutor):
                          properties=job.json(),
                          latency_wait=self.latency_wait,
                          benchmark_repeats=self.benchmark_repeats,
-                         target=target, **kwargs)
+                         target=target, local_input=local_input, **kwargs)
         try:
             exec_job = format(self.exec_job)
             with open(jobscript, "w") as f:
@@ -692,19 +701,35 @@ class DRMAAExecutor(ClusterExecutor):
             time.sleep(1)
 
 
+ at contextlib.contextmanager
+def change_working_directory(directory=None):
+    """ Change working directory in execution context if provided. """
+    if directory:
+        try:
+            saved_directory = os.getcwd()
+            logger.info("Changing to shadow directory: {}".format(directory))
+            os.chdir(directory)
+            yield
+        finally:
+            os.chdir(saved_directory)
+    else:
+        yield
+
+
 def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
-                version, benchmark, benchmark_repeats, linemaps, debug=False):
+                version, benchmark, benchmark_repeats, linemaps, debug=False,
+                shadow_dir=None):
     """
-    Wrapper around the run method that handles directory creation and
-    output file deletion on error.
+    Wrapper around the run method that handles exceptions and benchmarking.
 
     Arguments
-    run       -- the run method
-    input     -- list of input files
-    output    -- list of output files
-    wildcards -- so far processed wildcards
-    threads   -- usable threads
-    log       -- list of log files
+    run        -- the run method
+    input      -- list of input files
+    output     -- list of output files
+    wildcards  -- so far processed wildcards
+    threads    -- usable threads
+    log        -- list of log files
+    shadow_dir -- optional shadow directory root
     """
     if os.name == "posix" and debug:
         sys.stdin = open('/dev/stdin')
@@ -715,8 +740,9 @@ def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
         for i in range(runs):
             w = time.time()
             # execute the actual run method.
-            run(input, output, params, wildcards, threads, resources, log,
-                version)
+            with change_working_directory(shadow_dir):
+                run(input, output, params, wildcards, threads, resources, log,
+                    version)
             w = time.time() - w
             wallclock.append(w)
 
@@ -724,6 +750,7 @@ def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
         # re-raise the keyboard interrupt in order to record an error in the scheduler but ignore it
         raise e
     except (Exception, BaseException) as ex:
+        log_verbose_traceback(ex)
         # this ensures that exception can be re-raised in the parent thread
         lineno, file = get_exception_origin(ex, linemaps)
         raise RuleException(format_error(ex, lineno,
diff --git a/snakemake/io.py b/snakemake/io.py
index 0ba9cbd..0e7999b 100644
--- a/snakemake/io.py
+++ b/snakemake/io.py
@@ -8,11 +8,12 @@ import re
 import stat
 import time
 import json
+import functools
 from itertools import product, chain
 from collections import Iterable, namedtuple
-from snakemake.exceptions import MissingOutputException, WorkflowError, WildcardError
+from snakemake.exceptions import MissingOutputException, WorkflowError, WildcardError, RemoteFileException
 from snakemake.logging import logger
-
+from inspect import isfunction, ismethod
 
 def lstat(f):
     return os.stat(f, follow_symlinks=os.stat not in os.supports_follow_symlinks)
@@ -41,13 +42,48 @@ class _IOFile(str):
 
     def __new__(cls, file):
         obj = str.__new__(cls, file)
-        obj._is_function = type(file).__name__ == "function"
+        obj._is_function = isfunction(file) or ismethod(file)
         obj._file = file
         obj.rule = None
         obj._regex = None
+
         return obj
 
+    def _refer_to_remote(func):
+        """
+            A decorator so that if the file is remote and has a version
+            of the same file-related function, call that version instead.
+        """
+        @functools.wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self.is_remote:
+                self.update_remote_filepath()
+                if hasattr( self.remote_object, func.__name__):
+                    return getattr( self.remote_object, func.__name__)(*args, **kwargs)
+            return func(self, *args, **kwargs)
+        return wrapper
+
+    @property
+    def is_remote(self):
+        return is_flagged(self._file, "remote_object")
+
+    def update_remote_filepath(self):
+        # if the file string is different in the iofile, update the remote object
+        # (as in the case of wildcard expansion)
+        if get_flag_value(self._file, "remote_object").file != self._file:
+            get_flag_value(self._file, "remote_object")._iofile = self
+
+    @property
+    def should_keep_local(self):
+        return get_flag_value(self._file, "remote_object").keep_local
+
     @property
+    def remote_object(self):
+        self.update_remote_filepath()
+        return get_flag_value(self._file, "remote_object")
+
+    @property
+    @_refer_to_remote
     def file(self):
         if not self._is_function:
             return self._file
@@ -56,32 +92,69 @@ class _IOFile(str):
                              "may not be used directly.")
 
     @property
+    @_refer_to_remote
     def exists(self):
+        return self.exists_local
+
+    @property
+    def exists_local(self):
         return os.path.exists(self.file)
 
     @property
-    def protected(self):
-        return self.exists and not os.access(self.file, os.W_OK)
+    def exists_remote(self):
+        return (self.is_remote and self.remote_object.exists())
 
     @property
+    def protected(self):
+        return self.exists_local and not os.access(self.file, os.W_OK)
+    
+    @property
+    @_refer_to_remote
     def mtime(self):
+        return self.mtime_local
+
+    @property
+    def mtime_local(self):
         # do not follow symlinks for modification time
-        return lstat(self.file).st_mtime
+        return int(lstat(self.file).st_mtime)
 
     @property
+    def flags(self):
+        return getattr(self._file, "flags", {})
+
+    @property
+    @_refer_to_remote
     def size(self):
+        return self.size_local
+
+    @property
+    def size_local(self):
         # follow symlinks but throw error if invalid
         self.check_broken_symlink()
         return os.path.getsize(self.file)
 
     def check_broken_symlink(self):
         """ Raise WorkflowError if file is a broken symlink. """
-        if not self.exists and lstat(self.file):
+        if not self.exists_local and lstat(self.file):
             raise WorkflowError("File {} seems to be a broken symlink.".format(self.file))
 
     def is_newer(self, time):
         return self.mtime > time
 
+    def download_from_remote(self):
+        if self.is_remote and self.remote_object.exists():
+            logger.info("Downloading from remote: {}".format(self.file))
+            self.remote_object.download()
+        else:
+            raise RemoteFileException("The file to be downloaded does not seem to exist remotely.")
+ 
+    def upload_to_remote(self):
+        if self.is_remote and not self.remote_object.exists():
+            logger.info("Uploading to remote: {}".format(self.file))
+            self.remote_object.upload()
+        else:
+            raise RemoteFileException("The file to be uploaded does not seem to exist remotely.")
+
     def prepare(self):
         path_until_wildcard = re.split(self.dynamic_fill, self.file)[0]
         dir = os.path.dirname(path_until_wildcard)
@@ -108,9 +181,10 @@ class _IOFile(str):
     def remove(self):
         remove(self.file)
 
-    def touch(self):
+    def touch(self, times=None):
+        """ times must be 2-tuple: (atime, mtime) """
         try:
-            lutime(self.file, None)
+            lutime(self.file, times)
         except OSError as e:
             if e.errno == 2:
                 raise MissingOutputException(
@@ -136,11 +210,18 @@ class _IOFile(str):
         if self._is_function:
             f = self._file(Namedlist(fromdict=wildcards))
 
-        return IOFile(apply_wildcards(f, wildcards,
+        # this bit ensures flags are transferred over to files after
+        # wildcards are applied
+
+        file_with_wildcards_applied = IOFile(apply_wildcards(f, wildcards,
                                       fill_missing=fill_missing,
                                       fail_dynamic=fail_dynamic,
                                       dynamic_fill=self.dynamic_fill),
-                      rule=self.rule)
+                                      rule=self.rule)
+
+        file_with_wildcards_applied.clone_flags( self )
+
+        return file_with_wildcards_applied
 
     def get_wildcard_names(self):
         return get_wildcard_names(self.file)
@@ -166,6 +247,17 @@ class _IOFile(str):
     def format_dynamic(self):
         return self.replace(self.dynamic_fill, "{*}")
 
+    def clone_flags(self, other):
+        if isinstance(self._file, str):
+            self._file = AnnotatedString(self._file)
+        if isinstance(other._file, AnnotatedString):
+            self._file.flags = getattr(other._file, "flags", {})
+
+    def set_flags(self, flags):
+        if isinstance(self._file, str):
+            self._file = AnnotatedString(self._file)
+        self._file.flags = flags
+
     def __eq__(self, other):
         f = other._file if isinstance(other, _IOFile) else other
         return self._file == f
@@ -286,9 +378,17 @@ def flag(value, flag_type, flag_value=True):
 
 def is_flagged(value, flag):
     if isinstance(value, AnnotatedString):
-        return flag in value.flags
+        return flag in value.flags and value.flags[flag]
+    if isinstance(value, _IOFile):
+        return flag in value.flags and value.flags[flag]
     return False
 
+def get_flag_value(value, flag_type):
+    if isinstance(value, AnnotatedString):
+        if flag_type in value.flags:
+            return value.flags[flag_type]
+        else:
+            return None
 
 def temp(value):
     """
@@ -297,6 +397,9 @@ def temp(value):
     if is_flagged(value, "protected"):
         raise SyntaxError(
             "Protected and temporary flags are mutually exclusive.")
+    if is_flagged(value, "remote"):
+        raise SyntaxError(
+            "Remote and temporary flags are mutually exclusive.")
     return flag(value, "temp")
 
 
@@ -310,6 +413,9 @@ def protected(value):
     if is_flagged(value, "temp"):
         raise SyntaxError(
             "Protected and temporary flags are mutually exclusive.")
+    if is_flagged(value, "remote"):
+        raise SyntaxError(
+            "Remote and protected flags are mutually exclusive.")
     return flag(value, "protected")
 
 
@@ -318,7 +424,7 @@ def dynamic(value):
     A flag for a file that shall be dynamic, i.e. the multiplicity
     (and wildcard values) will be expanded after a certain
     rule has been run """
-    annotated = flag(value, "dynamic")
+    annotated = flag(value, "dynamic", True)
     tocheck = [annotated] if not_iterable(annotated) else annotated
     for file in tocheck:
         matches = list(_wildcard_regex.finditer(file))
@@ -334,7 +440,6 @@ def dynamic(value):
 def touch(value):
     return flag(value, "touch")
 
-
 def expand(*args, **wildcards):
     """
     Expand wildcards in given filepatterns.
@@ -382,7 +487,7 @@ def limit(pattern, **wildcards):
     })
 
 
-def glob_wildcards(pattern):
+def glob_wildcards(pattern, files=None):
     """
     Glob the values of the wildcards by matching the given pattern to the filesystem.
     Returns a named tuple with a list of values for each wildcard.
@@ -400,16 +505,18 @@ def glob_wildcards(pattern):
     wildcards = Wildcards(*[list() for name in names])
 
     pattern = re.compile(regex(pattern))
-    for dirpath, dirnames, filenames in os.walk(dirname):
-        for f in chain(filenames, dirnames):
-            if dirpath != ".":
-                f = os.path.join(dirpath, f)
-            match = re.match(pattern, f)
-            if match:
-                for name, value in match.groupdict().items():
-                    getattr(wildcards, name).append(value)
-    return wildcards
 
+    if files is None:
+        files = ((os.path.join(dirpath, f) if dirpath != "." else f) 
+                    for dirpath, dirnames, filenames in os.walk(dirname) 
+                    for f in chain(filenames, dirnames))
+
+    for f in files:
+        match = re.match(pattern, f)
+        if match:
+            for name, value in match.groupdict().items():
+                getattr(wildcards, name).append(value)
+    return wildcards
 
 # TODO rewrite Namedlist!
 class Namedlist(list):
diff --git a/snakemake/jobs.py b/snakemake/jobs.py
index fdba8b5..7c548aa 100644
--- a/snakemake/jobs.py
+++ b/snakemake/jobs.py
@@ -7,13 +7,14 @@ import os
 import sys
 import base64
 import json
+import tempfile
 
 from collections import defaultdict
 from itertools import chain
 from functools import partial
 from operator import attrgetter
 
-from snakemake.io import IOFile, Wildcards, Resources, _IOFile
+from snakemake.io import IOFile, Wildcards, Resources, _IOFile, is_flagged, contains_wildcard
 from snakemake.utils import format, listfiles
 from snakemake.exceptions import RuleException, ProtectedOutputException
 from snakemake.exceptions import UnexpectedOutputException
@@ -47,6 +48,7 @@ class Job:
         }
         self.threads = self.resources_dict["_cores"]
         self.resources = Resources(fromdict=self.resources_dict)
+        self.shadow_dir = None
         self._inputsize = None
 
         self.dynamic_output, self.dynamic_input = set(), set()
@@ -75,6 +77,10 @@ class Job:
                 self._hash ^= o.__hash__()
 
     @property
+    def is_shadow(self):
+        return self.rule.shadow_depth is not None
+
+    @property
     def priority(self):
         return self.dag.priority(self)
 
@@ -124,27 +130,33 @@ class Job:
         """ Iterate over output files while dynamic output is expanded. """
         for f, f_ in zip(self.output, self.rule.output):
             if f in self.dynamic_output:
-                expansion = self.expand_dynamic(
-                    f_,
-                    restriction=self.wildcards,
-                    omit_value=_IOFile.dynamic_fill)
+                expansion = self.expand_dynamic(f_)
                 if not expansion:
                     yield f_
                 for f, _ in expansion:
-                    yield IOFile(f, self.rule)
+                    file_to_yield = IOFile(f, self.rule)
+
+                    file_to_yield.clone_flags(f_)
+
+                    yield file_to_yield
             else:
                 yield f
 
     @property
+    def expanded_shadowed_output(self):
+        """ Get the paths of output files, resolving shadow directory. """
+        if not self.shadow_dir:
+            return self.expanded_output
+        for f in self.expanded_output:
+            yield os.path.join(self.shadow_dir, f)
+
+    @property
     def dynamic_wildcards(self):
         """ Return all wildcard values determined from dynamic output. """
         combinations = set()
         for f, f_ in zip(self.output, self.rule.output):
             if f in self.dynamic_output:
-                for f, w in self.expand_dynamic(
-                    f_,
-                    restriction=self.wildcards,
-                    omit_value=_IOFile.dynamic_fill):
+                for f, w in self.expand_dynamic(f_):
                     combinations.add(tuple(w.items()))
         wildcards = defaultdict(list)
         for combination in combinations:
@@ -159,6 +171,34 @@ class Job:
         return set(f for f in self.input
                    if not f.exists and not f in self.subworkflow_input)
 
+
+    @property
+    def existing_remote_input(self):
+        files = set()
+
+        for f in self.input:
+            if f.is_remote:
+                if f.exists_remote:
+                    files.add(f)
+        return files
+
+    @property
+    def existing_remote_output(self):
+        files = set()
+
+        for f in self.remote_output:
+            if f.exists_remote:
+                files.add(f)
+        return files
+
+    @property
+    def missing_remote_input(self):
+        return self.remote_input - self.existing_remote_input
+
+    @property
+    def missing_remote_output(self):
+        return self.remote_output - self.existing_remote_output
+
     @property
     def output_mintime(self):
         """ Return oldest output file. """
@@ -188,16 +228,92 @@ class Job:
         for f, f_ in zip(self.output, self.rule.output):
             if requested is None or f in requested:
                 if f in self.dynamic_output:
-                    if not self.expand_dynamic(
-                        f_,
-                        restriction=self.wildcards,
-                        omit_value=_IOFile.dynamic_fill):
+                    if not self.expand_dynamic(f_):
                         files.add("{} (dynamic)".format(f_))
                 elif not f.exists:
                     files.add(f)
         return files
 
     @property
+    def local_input(self):
+        for f in self.input:
+            if not f.is_remote:
+                yield f
+
+    @property
+    def local_output(self):
+        for f in self.output:
+            if not f.is_remote:
+                yield f
+
+    @property
+    def remote_input(self):
+        for f in self.input:
+            if f.is_remote:
+                yield f
+
+    @property
+    def remote_output(self):
+        for f in self.output:
+            if f.is_remote:
+                yield f
+
+    @property
+    def remote_input_newer_than_local(self):
+        files = set()
+        for f in self.remote_input:
+            if (f.exists_remote and f.exists_local) and (f.mtime > f.mtime_local):
+                files.add(f)
+        return files
+
+    @property
+    def remote_input_older_than_local(self):
+        files = set()
+        for f in self.remote_input:
+            if (f.exists_remote and f.exists_local) and (f.mtime < f.mtime_local):
+                files.add(f)
+        return files
+
+    @property
+    def remote_output_newer_than_local(self):
+        files = set()
+        for f in self.remote_output:
+            if (f.exists_remote and f.exists_local) and (f.mtime > f.mtime_local):
+                files.add(f)
+        return files
+
+    @property
+    def remote_output_older_than_local(self):
+        files = set()
+        for f in self.remote_output:
+            if (f.exists_remote and f.exists_local) and (f.mtime < f.mtime_local):
+                files.add(f)
+        return files
+
+    def transfer_updated_files(self):
+        for f in self.remote_output_older_than_local | self.remote_input_older_than_local:
+            f.upload_to_remote()
+
+        for f in self.remote_output_newer_than_local | self.remote_input_newer_than_local:
+            f.download_from_remote()
+
+    @property
+    def files_to_download(self):
+        toDownload = set()
+
+        for f in self.input:
+            if f.is_remote:
+                if not f.exists_local and f.exists_remote:
+                    toDownload.add(f)
+
+        toDownload = toDownload | self.remote_input_newer_than_local
+        return toDownload
+
+    @property
+    def files_to_upload(self):
+        return self.missing_remote_input & self.remote_input_older_than_local
+
+    @property
     def existing_output(self):
         return filter(lambda f: f.exists, self.expanded_output)
 
@@ -211,6 +327,7 @@ class Job:
         Prepare execution of job.
         This includes creation of directories and deletion of previously
         created dynamic files.
+        Creates a shadow directory for the job if specified.
         """
 
         self.check_protected_output()
@@ -224,21 +341,57 @@ class Job:
                     self.rule, unexpected_output))
 
         if self.dynamic_output:
-            for f, _ in chain(*map(partial(self.expand_dynamic,
-                                           restriction=self.wildcards,
-                                           omit_value=_IOFile.dynamic_fill),
+            for f, _ in chain(*map(self.expand_dynamic,
                                    self.rule.dynamic_output)):
                 os.remove(f)
         for f, f_ in zip(self.output, self.rule.output):
             f.prepare()
+
+        for f in self.files_to_download:
+            f.download_from_remote()
+
         for f in self.log:
             f.prepare()
         if self.benchmark:
             self.benchmark.prepare()
 
+        if not self.is_shadow:
+            return
+        # Create shadow directory structure
+        self.shadow_dir = tempfile.mkdtemp(
+            dir=self.rule.workflow.persistence.shadow_path)
+        cwd = os.getcwd()
+        # Shallow simply symlink everything in the working directory.
+        if self.rule.shadow_depth == "shallow":
+            for source in os.listdir(cwd):
+                link = os.path.join(self.shadow_dir, source)
+                os.symlink(os.path.abspath(source), link)
+        elif self.rule.shadow_depth == "full":
+            snakemake_dir = os.path.join(cwd, ".snakemake")
+            for dirpath, dirnames, filenames in os.walk(cwd):
+                # Must exclude .snakemake and its children to avoid infinite
+                # loop of symlinks.
+                if os.path.commonprefix([snakemake_dir, dirpath]) == snakemake_dir:
+                    continue
+                for dirname in dirnames:
+                    if dirname == ".snakemake":
+                        continue
+                    relative_source = os.path.relpath(os.path.join(dirpath, dirname))
+                    shadow = os.path.join(self.shadow_dir, relative_source)
+                    os.mkdir(shadow)
+
+                for filename in filenames:
+                    source = os.path.join(dirpath, filename)
+                    relative_source = os.path.relpath(source)
+                    link = os.path.join(self.shadow_dir, relative_source)
+                    os.symlink(source, link)
+
     def cleanup(self):
         """ Cleanup output files. """
         to_remove = [f for f in self.expanded_output if f.exists]
+
+        to_remove.extend([f for f in self.remote_input if f.exists])
+        to_remove.extend([f for f in self.remote_output if f.exists_local])
         if to_remove:
             logger.info("Removing output files of failed job {}"
                         " since they might be corrupted:\n{}".format(
@@ -246,6 +399,22 @@ class Job:
             for f in to_remove:
                 f.remove()
 
+            self.rmdir_empty_remote_dirs()
+
+    @property
+    def empty_remote_dirs(self):
+        for f in (set(self.output) | set(self.input)):
+            if f.is_remote:
+                if os.path.exists(os.path.dirname(f)) and not len( os.listdir( os.path.dirname(f))):
+                    yield os.path.dirname(f)
+
+    def rmdir_empty_remote_dirs(self):
+        for d in self.empty_remote_dirs:
+            try:
+                os.removedirs(d)
+            except:
+                pass # it's ok if we can't remove the leaf
+
     def format_wildcards(self, string, **variables):
         """ Format a string with variables from the job. """
         _variables = dict()
@@ -306,12 +475,11 @@ class Job:
     def __hash__(self):
         return self._hash
 
-    @staticmethod
-    def expand_dynamic(pattern, restriction=None, omit_value=None):
+    def expand_dynamic(self, pattern):
         """ Expand dynamic files. """
         return list(listfiles(pattern,
-                              restriction=restriction,
-                              omit_value=omit_value))
+                              restriction=self.wildcards,
+                              omit_value=_IOFile.dynamic_fill))
 
 
 class Reason:
diff --git a/snakemake/output_index.py b/snakemake/output_index.py
index dbcfd95..2c9456f 100644
--- a/snakemake/output_index.py
+++ b/snakemake/output_index.py
@@ -16,7 +16,7 @@ class Node:
         self.children = defaultdict(Node)
 
     def __repr__(self):
-        return "({}) -> {}".format(self.rules, dict(self.children))
+        return "({}) -> {}".format(list(map(str, self.rules)), dict(self.children))
 
 
 class OutputIndex:
@@ -27,7 +27,8 @@ class OutputIndex:
             output = list(rule.output)
             if rule.benchmark:
                 output.append(rule.benchmark)
-            for constant_prefix in sorted(map(_IOFile.constant_prefix, output)):
+            for constant_prefix in sorted(map(_IOFile.constant_prefix,
+                                              output)):
                 self.add_output(rule, constant_prefix)
 
     def add_output(self, rule, constant_prefix):
diff --git a/snakemake/parser.py b/snakemake/parser.py
index 831a2f7..e1eccf9 100644
--- a/snakemake/parser.py
+++ b/snakemake/parser.py
@@ -358,6 +358,10 @@ class Threads(RuleKeywordState):
     pass
 
 
+class Shadow(RuleKeywordState):
+    pass
+
+
 class Resources(RuleKeywordState):
     pass
 
@@ -396,14 +400,15 @@ class Run(RuleKeywordState):
     def start(self):
         yield "@workflow.run"
         yield "\n"
-        yield ("def __{rulename}(input, output, params, wildcards, threads, "
-               "resources, log, version):".format(rulename=self.rulename))
+        yield ("def __rule_{rulename}(input, output, params, wildcards, threads, "
+               "resources, log, version):".format(rulename=self.rulename if self.rulename is not None else self.snakefile.rulecount))
 
     def end(self):
         yield ""
 
     def is_block_end(self, token):
-        return (self.line and self.was_indented and self.indent <= 0) or is_eof(token)
+        return (self.line and self.was_indented and self.indent <= 0
+                ) or is_eof(token)
 
 
 class Shell(Run):
@@ -467,6 +472,54 @@ class Shell(Run):
             yield shellcmd, token
 
 
+class Script(Run):
+    def __init__(self, snakefile, rulename,
+                 base_indent=0,
+                 dedent=0,
+                 root=True):
+        super().__init__(snakefile, rulename,
+                         base_indent=base_indent,
+                         dedent=dedent,
+                         root=root)
+        self.path = list()
+        self.token = None
+
+    def is_block_end(self, token):
+        return (self.line and self.indent <= 0) or is_eof(token)
+
+    def start(self):
+        for t in super().start():
+            yield t
+        yield "\n"
+        yield INDENT * (self.effective_indent + 1)
+        yield "script("
+        yield '"{}"'.format(
+            os.path.abspath(os.path.dirname(self.snakefile.path)))
+        yield ", "
+
+    def end(self):
+        # the end is detected. So we can savely reset the indent to zero here
+        self.indent = 0
+        yield ", input, output, params, wildcards, threads, resources, log, config"
+        yield ")"
+        for t in super().end():
+            yield t
+
+    def decorate_end(self, token):
+        if self.token is None:
+            # no block after script keyword
+            self.error(
+                "Script path must be given as string after the script keyword.",
+                token)
+        for t in self.end():
+            yield t, self.token
+
+    def block_content(self, token):
+        self.token = token
+        self.path.append(token.string)
+        yield token.string, token
+
+
 class Rule(GlobalKeywordState):
     subautomata = dict(input=Input,
                        output=Output,
@@ -478,8 +531,10 @@ class Rule(GlobalKeywordState):
                        log=Log,
                        message=Message,
                        benchmark=Benchmark,
+                       shadow=Shadow,
                        run=Run,
-                       shell=Shell)
+                       shell=Shell,
+                       script=Script)
 
     def __init__(self, snakefile, base_indent=0, dedent=0, root=True):
         super().__init__(snakefile,
@@ -487,8 +542,8 @@ class Rule(GlobalKeywordState):
                          dedent=dedent,
                          root=root)
         self.state = self.name
-        self.rulename = None
         self.lineno = None
+        self.rulename = None
         self.run = False
         self.snakefile.rulecount += 1
 
@@ -527,7 +582,7 @@ class Rule(GlobalKeywordState):
     def block_content(self, token):
         if is_name(token):
             try:
-                if token.string == "run" or token.string == "shell":
+                if token.string == "run" or token.string == "shell" or token.string == "script":
                     if self.run:
                         raise self.error(
                             "Multiple run or shell keywords in rule {}.".format(
@@ -604,7 +659,7 @@ class Python(TokenAutomaton):
 
 
 class Snakefile:
-    def __init__(self, path):
+    def __init__(self, path, rulecount=0):
         self.path = path
         try:
             self.file = open(self.path, encoding="utf-8")
@@ -616,7 +671,7 @@ class Snakefile:
                 raise WorkflowError("Failed to open {}.".format(path))
 
         self.tokens = tokenize.generate_tokens(self.file.readline)
-        self.rulecount = 0
+        self.rulecount = rulecount
         self.lines = 0
 
     def __next__(self):
@@ -641,9 +696,9 @@ def format_tokens(tokens):
         t_ = t
 
 
-def parse(path, overwrite_shellcmd=None):
+def parse(path, overwrite_shellcmd=None, rulecount=0):
     Shell.overwrite_shellcmd = overwrite_shellcmd
-    with Snakefile(path) as snakefile:
+    with Snakefile(path, rulecount=rulecount) as snakefile:
         automaton = Python(snakefile)
         linemap = dict()
         compilation = list()
@@ -654,6 +709,7 @@ def parse(path, overwrite_shellcmd=None):
             snakefile.lines += t.count("\n")
             compilation.append(t)
         compilation = "".join(format_tokens(compilation))
-        last = max(linemap)
-        linemap[last + 1] = linemap[last]
-        return compilation, linemap
+        if linemap:
+            last = max(linemap)
+            linemap[last + 1] = linemap[last]
+        return compilation, linemap, snakefile.rulecount
diff --git a/snakemake/persistence.py b/snakemake/persistence.py
index eba80c0..b33b519 100644
--- a/snakemake/persistence.py
+++ b/snakemake/persistence.py
@@ -36,10 +36,11 @@ class Persistence:
         self._input_path = os.path.join(self.path, "input_tracking")
         self._params_path = os.path.join(self.path, "params_tracking")
         self._shellcmd_path = os.path.join(self.path, "shellcmd_tracking")
+        self.shadow_path = os.path.join(self.path, "shadow")
 
         for d in (self._incomplete_path, self._version_path, self._code_path,
                   self._rule_path, self._input_path, self._params_path,
-                  self._shellcmd_path):
+                  self._shellcmd_path, self.shadow_path):
             if not os.path.exists(d):
                 os.mkdir(d)
 
@@ -110,6 +111,11 @@ class Persistence:
         self._delete_record(self._params_path, path)
         self._delete_record(self._shellcmd_path, path)
 
+    def cleanup_shadow(self):
+        if os.path.exists(self.shadow_path):
+            shutil.rmtree(self.shadow_path)
+            os.mkdir(self.shadow_path)
+
     def started(self, job):
         for f in job.output:
             self._record(self._incomplete_path, "", f)
@@ -209,7 +215,7 @@ class Persistence:
 
     @lru_cache()
     def _params(self, job):
-        return "\n".join(sorted(job.params))
+        return "\n".join(sorted(map(repr, job.params)))
 
     @lru_cache()
     def _output(self, job):
diff --git a/snakemake/remote/FTP.py b/snakemake/remote/FTP.py
new file mode 100644
index 0000000..8ce6153
--- /dev/null
+++ b/snakemake/remote/FTP.py
@@ -0,0 +1,126 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+import os, re, ftplib
+from itertools import product, chain
+from contextlib import contextmanager
+
+# module-specific
+from snakemake.remote import AbstractRemoteProvider, DomainObject
+from snakemake.exceptions import FTPFileException
+import snakemake.io 
+
+try:
+    # third-party modules
+    import ftputil
+    import ftputil.session
+except ImportError as e:
+    raise WorkflowError("The Python 3 package 'ftputil' " + 
+        "must be installed to use SFTP remote() file functionality. %s" % e.msg)
+
+class RemoteProvider(AbstractRemoteProvider):
+    def __init__(self, *args, **kwargs):
+        super(RemoteProvider, self).__init__(*args, **kwargs)
+
+class RemoteObject(DomainObject):
+    """ This is a class to interact with an FTP server.
+    """
+
+    def __init__(self, *args, keep_local=False, provider=None, encrypt_data_channel=False, **kwargs):
+        super(RemoteObject, self).__init__(*args, keep_local=keep_local, provider=provider, **kwargs)
+
+        self.encrypt_data_channel = encrypt_data_channel
+        
+    # === Implementations of abstract class members ===
+
+    @contextmanager #makes this a context manager. after 'yield' is __exit__()
+    def ftpc(self):     
+        # if args have been provided to remote(), use them over those given to RemoteProvider()
+        args_to_use = self.provider.args
+        if len(self.args):
+            args_to_use = self.args
+
+        # use kwargs passed in to remote() to override those given to the RemoteProvider()
+        # default to the host and port given as part of the file, falling back to one specified
+        # as a kwarg to remote() or the RemoteProvider (overriding the latter with the former if both)
+        kwargs_to_use = {}
+        kwargs_to_use["host"] = self.host
+        kwargs_to_use["username"] = None
+        kwargs_to_use["password"] = None
+        kwargs_to_use["port"] = int(self.port) if self.port else 21
+        kwargs_to_use["encrypt_data_channel"] = self.encrypt_data_channel
+
+        for k,v in self.provider.kwargs.items():
+            kwargs_to_use[k] = v
+        for k,v in self.kwargs.items():
+            kwargs_to_use[k] = v
+
+        ftp_base_class = ftplib.FTP_TLS if kwargs_to_use["encrypt_data_channel"] else ftplib.FTP
+
+        ftp_session_factory = ftputil.session.session_factory(
+                       base_class=ftp_base_class,
+                       port=kwargs_to_use["port"],
+                       encrypt_data_channel= kwargs_to_use["encrypt_data_channel"],
+                       debug_level=None)
+
+        conn = ftputil.FTPHost(kwargs_to_use["host"], kwargs_to_use["username"], kwargs_to_use["password"], session_factory=ftp_session_factory)
+        yield conn
+        conn.close()
+
+    def exists(self):
+        if self._matched_address:
+            with self.ftpc() as ftpc:
+                return ftpc.path.exists(self.remote_path)
+                if ftpc.path.exists(self.remote_path):
+                    return ftpc.path.isfile(self.remote_path)
+            return False
+        else:
+            raise SFTPFileException("The file cannot be parsed as an FTP path in form 'host:port/abs/path/to/file': %s" % self.file())
+
+    def mtime(self):
+        if self.exists():
+            with self.ftpc() as ftpc:
+                ftpc.synchronize_times()
+                return ftpc.path.getmtime(self.remote_path)
+        else:
+            raise SFTPFileException("The file does not seem to exist remotely: %s" % self.file())
+
+    def size(self):
+        if self.exists():
+            with self.ftpc() as ftpc:
+                return ftpc.path.getsize(self.remote_path)
+        else:
+            return self._iofile.size_local
+
+    def download(self, make_dest_dirs=True):
+        with self.ftpc() as ftpc:
+            if self.exists():
+                # if the destination path does not exist
+                if make_dest_dirs:
+                    os.makedirs(os.path.dirname(self.local_path), exist_ok=True)
+                ftpc.synchronize_times()
+                ftpc.download(source=self.remote_path, target=self.local_path)
+            else:
+                raise SFTPFileException("The file does not seem to exist remotely: %s" % self.file())
+
+    def upload(self):
+        with self.ftpc() as ftpc:
+            ftpc.synchronize_times()
+            ftpc.upload(source=self.local_path, target=self.remote_path)
+
+    @property
+    def list(self):
+        file_list = []
+
+        first_wildcard = self._iofile.constant_prefix()
+        dirname = first_wildcard.replace(self.path_prefix, "")
+
+        with self.ftpc() as ftpc:
+            file_list = [(os.path.join(dirpath, f) if dirpath != "." else f) 
+                    for dirpath, dirnames, filenames in ftpc.walk(dirname) 
+                    for f in chain(filenames, dirnames)]
+            file_list = [file_path[1:] if file_path[0] == "/" else file_path for file_path in file_list]
+
+        return file_list
diff --git a/snakemake/remote/GS.py b/snakemake/remote/GS.py
new file mode 100644
index 0000000..d7febbe
--- /dev/null
+++ b/snakemake/remote/GS.py
@@ -0,0 +1,21 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+# module-specific
+from snakemake.remote.S3 import RemoteObject, RemoteProvider as S3RemoteProvider
+
+try:
+    # third-party modules
+    import boto
+    from boto.s3.key import Key
+    from filechunkio import FileChunkIO
+except ImportError as e:
+    raise WorkflowError("The Python 3 packages 'boto' and 'filechunkio' " + 
+        "need to be installed to use S3 remote() file functionality. %s" % e.msg)
+
+class RemoteProvider(S3RemoteProvider):
+    def __init__(self, *args, **kwargs):
+        kwargs["host"] = "storage.googleapis.com"
+        super(RemoteProvider, self).__init__(*args, **kwargs)
diff --git a/snakemake/remote/HTTP.py b/snakemake/remote/HTTP.py
new file mode 100644
index 0000000..b529763
--- /dev/null
+++ b/snakemake/remote/HTTP.py
@@ -0,0 +1,143 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+import os, re, http.client
+import email.utils
+#from itertools import product, chain
+from contextlib import contextmanager
+
+# module-specific
+from snakemake.remote import AbstractRemoteProvider, DomainObject
+from snakemake.exceptions import HTTPFileException
+import snakemake.io
+
+try:
+    # third-party modules
+    import requests
+except ImportError as e:
+    raise WorkflowError("The Python 3 package 'requests' " + 
+        "must be installed to use HTTP(S) remote() file functionality. %s" % e.msg)
+
+class RemoteProvider(AbstractRemoteProvider):
+    def __init__(self, *args, **kwargs):
+        super(RemoteProvider, self).__init__(*args, **kwargs)
+
+class RemoteObject(DomainObject):
+    """ This is a class to interact with an HTTP server.
+    """
+
+    def __init__(self, *args, keep_local=False, provider=None, insecure=False, additional_request_string="", **kwargs):
+        super(RemoteObject, self).__init__(*args, keep_local=keep_local, provider=provider, **kwargs)
+
+        self.insecure = insecure
+        self.additional_request_string = additional_request_string
+        
+    # === Implementations of abstract class members ===
+
+    @contextmanager #makes this a context manager. after 'yield' is __exit__()
+    def httpr(self, verb="GET", stream=False):     
+        # if args have been provided to remote(), use them over those given to RemoteProvider()
+        args_to_use = self.provider.args
+        if len(self.args):
+            args_to_use = self.args
+
+        # use kwargs passed in to remote() to override those given to the RemoteProvider()
+        # default to the host and port given as part of the file, falling back to one specified
+        # as a kwarg to remote() or the RemoteProvider (overriding the latter with the former if both)
+        kwargs_to_use = {}
+        kwargs_to_use["username"] = None
+        kwargs_to_use["password"] = None
+        kwargs_to_use["auth"] = None
+
+        for k,v in self.provider.kwargs.items():
+            kwargs_to_use[k] = v
+        for k,v in self.kwargs.items():
+            kwargs_to_use[k] = v
+
+        if kwargs_to_use["username"] and kwargs_to_use["password"]:
+            kwargs_to_use["auth"] = ('user', 'pass')
+        else:
+            del kwargs_to_use["username"]
+            del kwargs_to_use["password"]
+
+        url = self._iofile._file + self.additional_request_string
+        # default to HTTPS
+        if not self.insecure:
+            protocol = "https://"
+        else:
+            protocol = "http://"
+        url = protocol + url
+
+        if verb.upper() == "GET":
+            r = requests.get(url, *args_to_use, stream=stream, **kwargs_to_use)
+        if verb.upper() == "HEAD":
+            r = requests.head(url, *args_to_use, **kwargs_to_use)
+
+        yield r
+        r.close()
+
+    def exists(self):
+        if self._matched_address:
+            with self.httpr(verb="HEAD") as httpr:
+                return httpr.status_code == requests.codes.ok
+            return False
+        else:
+            raise HTTPFileException("The file cannot be parsed as an HTTP path in form 'host:port/abs/path/to/file': %s" % self.file())
+
+    def mtime(self):
+        if self.exists():
+            with self.httpr(verb="HEAD") as httpr:
+                
+                file_mtime = self.get_header_item(httpr, "last-modified", default=0)
+
+                modified_tuple = email.utils.parsedate_tz(file_mtime)
+                epochTime = int(email.utils.mktime_tz(modified_tuple))
+
+                return epochTime
+        else:
+            raise HTTPFileException("The file does not seem to exist remotely: %s" % self.file())
+
+    def size(self):
+        if self.exists():
+            with self.httpr(verb="HEAD") as httpr:
+
+                content_size = int(self.get_header_item(httpr, "content-size", default=0))
+
+                return content_size
+        else:
+            return self._iofile.size_local
+
+    def download(self, make_dest_dirs=True):
+        with self.httpr(stream=True) as httpr:
+            if self.exists():
+                # if the destination path does not exist
+                if make_dest_dirs:
+                    os.makedirs(os.path.dirname(self.local_path), exist_ok=True)
+                
+                    with open(self.local_path, 'wb') as f:
+                        for chunk in httpr.iter_content(chunk_size=1024): 
+                            if chunk: # filter out keep-alives
+                                f.write(chunk)
+            else:
+                raise HTTPFileException("The file does not seem to exist remotely: %s" % self.file())
+
+    def upload(self):
+        raise HTTPFileException("Upload is not permitted for the HTTP remote provider. Is an output set to HTTP.remote()?")
+
+    def get_header_item(self, httpr, header_name, default):
+        """
+            Since HTTP header capitalization may differ, this returns
+            a header value regardless of case
+        """
+
+        header_value = default
+        for k,v in httpr.headers.items():
+            if k.lower() == header_name:
+                header_value = v
+        return header_value
+
+    @property
+    def list(self):
+        raise HTTPFileException("The HTTP Remote Provider does not currently support list-based operations like glob_wildcards().")
diff --git a/snakemake/remote/S3.py b/snakemake/remote/S3.py
new file mode 100644
index 0000000..29442d5
--- /dev/null
+++ b/snakemake/remote/S3.py
@@ -0,0 +1,443 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+# built-ins
+import os, re, sys
+import math
+import time
+import email.utils
+from time import mktime
+import datetime
+import functools
+import concurrent.futures
+
+# module-specific
+from snakemake.remote import AbstractRemoteObject, AbstractRemoteProvider
+from snakemake.exceptions import MissingOutputException, WorkflowError, WildcardError, RemoteFileException, S3FileException
+import snakemake.io 
+
+try:
+    # third-party modules
+    import boto
+    from boto.s3.key import Key
+    from filechunkio import FileChunkIO
+except ImportError as e:
+    raise WorkflowError("The Python 3 packages 'boto' and 'filechunkio' " + 
+        "need to be installed to use S3 remote() file functionality. %s" % e.msg)
+
+class RemoteProvider(AbstractRemoteProvider):
+    def __init__(self, *args, **kwargs):
+        super(RemoteProvider, self).__init__(*args, **kwargs)
+
+        self._s3c = S3Helper(*args, **kwargs)
+    
+    def remote_interface(self):
+        return self._s3c
+
+class RemoteObject(AbstractRemoteObject):
+    """ This is a class to interact with the AWS S3 object store.
+    """
+
+    def __init__(self, *args, keep_local=False, provider=None, **kwargs):
+        super(RemoteObject, self).__init__(*args, keep_local=keep_local, provider=provider, **kwargs)
+
+        if provider:
+            self._s3c = provider.remote_interface()
+        else:
+            self._s3c = S3Helper(*args, **kwargs)
+
+    # === Implementations of abstract class members ===
+
+    def exists(self):
+        if self._matched_s3_path:
+            return self._s3c.exists_in_bucket(self.s3_bucket, self.s3_key)
+        else:
+            raise S3FileException("The file cannot be parsed as an s3 path in form 'bucket/key': %s" % self.file())
+
+    def mtime(self):
+        if self.exists():
+            return self._s3c.key_last_modified(self.s3_bucket, self.s3_key)
+        else:
+            raise S3FileException("The file does not seem to exist remotely: %s" % self.file())
+
+    def size(self):
+        if self.exists():
+            return self._s3c.key_size(self.s3_bucket, self.s3_key)
+        else:
+            return self._iofile.size_local
+
+    def download(self):
+        self._s3c.download_from_s3(self.s3_bucket, self.s3_key, self.file())
+
+    def upload(self):
+        if self.size() > 10 * 1024 * 1024: # S3 complains if multipart uploads are <10MB
+            self._s3c.upload_to_s3_multipart(self.s3_bucket, self.file(), self.s3_key)
+        else:
+            self._s3c.upload_to_s3(self.s3_bucket, self.file(), self.s3_key)
+
+    @property
+    def list(self):
+        return [k.name for k in self._s3c.list_keys(self.s3_bucket)]
+
+    # === Related methods ===
+
+    @property
+    def _matched_s3_path(self):
+        return re.search("(?P<bucket>[^/]*)/(?P<key>.*)", self.file())
+
+    @property
+    def s3_bucket(self):
+        if len(self._matched_s3_path.groups()) == 2:
+            return self._matched_s3_path.group("bucket")
+        return None
+
+    @property
+    def name(self):
+        return self.s3_key
+
+    @property
+    def s3_key(self):
+        if len(self._matched_s3_path.groups()) == 2:
+            return self._matched_s3_path.group("key")
+
+    def s3_create_stub(self):
+        if self._matched_s3_path:
+            if not self.exists:
+                self._s3c.download_from_s3(self.s3_bucket, self.s3_key, self.file, create_stub_only=True)
+        else:
+            raise S3FileException("The file to be downloaded cannot be parsed as an s3 path in form 'bucket/key': %s" %
+                                  self.file())
+
+class S3Helper(object):
+
+    def __init__(self, *args, **kwargs):
+        # as per boto, expects the environment variables to be set:
+        # AWS_ACCESS_KEY_ID
+        # AWS_SECRET_ACCESS_KEY
+        # Otherwise these values need to be passed in as kwargs
+
+        # allow key_id and secret to be specified with aws_, gs_, or no prefix. 
+        # Standardize to the aws_ prefix expected by boto.
+        if "gs_access_key_id" in kwargs:
+            kwargs["aws_access_key_id"] = kwargs.pop("gs_access_key_id")
+        if "gs_secret_access_key" in kwargs:
+            kwargs["aws_secret_access_key"] = kwargs.pop("gs_secret_access_key")
+        if "access_key_id" in kwargs:
+            kwargs["aws_access_key_id"] = kwargs.pop("access_key_id")
+        if "secret_access_key" in kwargs:
+            kwargs["aws_secret_access_key"] = kwargs.pop("secret_access_key")
+        
+        self.conn = boto.connect_s3(*args, **kwargs)
+
+    def upload_to_s3(
+            self,
+            bucket_name,
+            file_path,
+            key=None,
+            use_relative_path_for_key=True,
+            relative_start_dir=None,
+            replace=False,
+            reduced_redundancy=False,
+            headers=None):
+        """ Upload a file to S3
+
+            This function uploads a file to an AWS S3 bucket.
+
+            Args:
+                bucket_name: the name of the S3 bucket to use (bucket name only, not ARN)
+                file_path: The path to the file to upload.
+                key: The key to set for the file on S3. If not specified, this will default to the
+                    name of the file.
+                use_relative_path_for_key: If set to True (default), and key is None, the S3 key will include slashes
+                    representing the path of the file relative to the CWD. If False only the
+                    file basename will be used for the key.
+                relative_start_dir: The start dir to use for use_relative_path_for_key. No effect if key is set.
+                replace: If True a file with the same key will be replaced with the one being written
+                reduced_redundancy: Sets the file to AWS reduced redundancy storage.
+                headers: additional heads to pass to AWS
+
+            Returns: The key of the file on S3 if written, None otherwise
+        """
+        file_path = os.path.realpath(os.path.expanduser(file_path))
+
+        assert bucket_name, "bucket_name must be specified"
+        assert os.path.exists(file_path), "The file path specified does not exist: %s" % file_path
+        assert os.path.isfile(file_path), "The file path specified does not appear to be a file: %s" % file_path
+
+        try:
+            b = self.conn.get_bucket(bucket_name)
+        except:
+            b = self.conn.create_bucket(bucket_name)
+
+        k = Key(b)
+
+        if key:
+            k.key = key
+        else:
+            if use_relative_path_for_key:
+                if relative_start_dir:
+                    path_key = os.path.relpath(file_path, relative_start_dir)
+                else:
+                    path_key = os.path.relpath(file_path)
+            else:
+                path_key = os.path.basename(file_path)
+            k.key = path_key
+        try:
+            bytes_written = k.set_contents_from_filename(
+                file_path,
+                replace=replace,
+                reduced_redundancy=reduced_redundancy,
+                headers=headers)
+            if bytes_written:
+                return k.key
+            else:
+                return None
+        except:
+            return None
+
+    def download_from_s3(
+            self,
+            bucket_name,
+            key,
+            destination_path=None,
+            expandKeyIntoDirs=True,
+            make_dest_dirs=True,
+            headers=None, create_stub_only=False):
+        """ Download a file from s3
+
+            This function downloads an object from a specified AWS S3 bucket.
+
+            Args:
+                bucket_name: the name of the S3 bucket to use (bucket name only, not ARN)
+                destination_path: If specified, the file will be saved to this path, otherwise cwd.
+                expandKeyIntoDirs: Since S3 keys can include slashes, if this is True (defult)
+                    then S3 keys with slashes are expanded into directories on the receiving end.
+                    If it is False, the key is passed to os.path.basename() to get the substring
+                    following the last slash.
+                make_dest_dirs: If this is True (default) and the destination path includes directories
+                    that do not exist, they will be created.
+                headers: Additional headers to pass to AWS
+
+            Returns:
+                The destination path of the downloaded file on the receiving end, or None if the destination_path
+                could not be downloaded
+        """
+        assert bucket_name, "bucket_name must be specified"
+        assert key, "Key must be specified"
+
+        b = self.conn.get_bucket(bucket_name)
+        k = Key(b)
+
+        if destination_path:
+            destination_path = os.path.realpath(os.path.expanduser(destination_path))
+        else:
+            if expandKeyIntoDirs:
+                destination_path = os.path.join(os.getcwd(), key)
+            else:
+                destination_path = os.path.join(os.getcwd(), os.path.basename(key))
+
+        # if the destination path does not exist
+        if make_dest_dirs:
+            os.makedirs(os.path.dirname(destination_path), exist_ok=True)
+
+        k.key = key if key else os.path.basename(destination_path)
+
+        try:
+            if not create_stub_only:
+                k.get_contents_to_filename(destination_path, headers=headers)
+            else:
+                # just create an empty file with the right timestamps
+                with open(destination_path, 'wb') as fp:
+                    modified_tuple = email.utils.parsedate_tz(k.last_modified)
+                    modified_stamp = int(email.utils.mktime_tz(modified_tuple))
+                    os.utime(fp.name, (modified_stamp, modified_stamp))
+            return destination_path
+        except:
+            return None
+
+    def _upload_part(self, bucket_name, multipart_id, part_num, source_path, offset, bytes_to_write, number_of_retries=5):
+
+        def _upload(retries_remaining=number_of_retries):
+            try:
+                b = self.conn.get_bucket(bucket_name)
+                for mp in b.get_all_multipart_uploads():
+                    if mp.id == multipart_id:
+                        with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes_to_write) as fp:
+                            mp.upload_part_from_file(fp=fp, part_num=part_num)
+                        break
+            except Exception() as e:
+                if retries_remaining:
+                    _upload(retries_remaining=retries_remaining - 1)
+                else:
+                    raise e
+
+        _upload()
+
+    def upload_to_s3_multipart(
+            self,
+            bucket_name,
+            file_path,
+            key=None,
+            use_relative_path_for_key=True,
+            relative_start_dir=None,
+            replace=False,
+            reduced_redundancy=False,
+            headers=None,
+            parallel_processes=4):
+        """ Upload a file to S3
+
+            This function uploads a file to an AWS S3 bucket.
+
+            Args:
+                bucket_name: the name of the S3 bucket to use (bucket name only, not ARN)
+                file_path: The path to the file to upload.
+                key: The key to set for the file on S3. If not specified, this will default to the
+                    name of the file.
+                use_relative_path_for_key: If set to True (default), and key is None, the S3 key will include slashes
+                    representing the path of the file relative to the CWD. If False only the
+                    file basename will be used for the key.
+                relative_start_dir: The start dir to use for use_relative_path_for_key. No effect if key is set.
+                replace: If True a file with the same key will be replaced with the one being written
+                reduced_redundancy: Sets the file to AWS reduced redundancy storage.
+                headers: additional heads to pass to AWS
+                parallel_processes: Number of concurrent uploads
+
+            Returns: The key of the file on S3 if written, None otherwise
+        """
+        file_path = os.path.realpath(os.path.expanduser(file_path))
+
+        assert bucket_name, "bucket_name must be specified"
+        assert os.path.exists(file_path), "The file path specified does not exist: %s" % file_path
+        assert os.path.isfile(file_path), "The file path specified does not appear to be a file: %s" % file_path
+
+        try:
+            b = self.conn.get_bucket(bucket_name)
+        except:
+            b = self.conn.create_bucket(bucket_name)
+
+        path_key = None
+        if key:
+            path_key = key
+        else:
+            if use_relative_path_for_key:
+                if relative_start_dir:
+                    path_key = os.path.relpath(file_path, relative_start_dir)
+                else:
+                    path_key = os.path.relpath(file_path)
+            else:
+                path_key = os.path.basename(file_path)
+
+        mp = b.initiate_multipart_upload(path_key, headers=headers)
+
+        source_size = os.stat(file_path).st_size
+
+        bytes_per_chunk = 52428800  # 50MB = 50 * 1024 * 1024
+        chunk_count = int(math.ceil(source_size / float(bytes_per_chunk)))
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_processes) as executor:
+            for i in range(chunk_count):
+                offset = i * bytes_per_chunk
+                remaining_bytes = source_size - offset
+                bytes_to_write = min([bytes_per_chunk, remaining_bytes])
+                part_num = i + 1
+                executor.submit(functools.partial(self._upload_part, bucket_name, mp.id, part_num, file_path, offset, bytes_to_write))
+
+        if len(mp.get_all_parts()) == chunk_count:
+            mp.complete_upload()
+            try:
+                key = b.get_key(path_key)
+                return key.key
+            except:
+                return None
+        else:
+            mp.cancel_upload()
+            return None
+
+    def delete_from_bucket(self, bucket_name, key, headers=None):
+        """ Delete a file from s3
+
+            This function deletes an object from a specified AWS S3 bucket.
+
+            Args:
+                bucket_name: the name of the S3 bucket to use (bucket name only, not ARN)
+                key: the key of the object to delete from the bucket
+                headers: Additional headers to pass to AWS
+
+            Returns:
+                The name of the object deleted
+        """
+        assert bucket_name, "bucket_name must be specified"
+        assert key, "Key must be specified"
+
+        b = self.conn.get_bucket(bucket_name)
+        k = Key(b)
+        k.key = key
+        ret = k.delete(headers=headers)
+        return ret.name
+
+    def exists_in_bucket(self, bucket_name, key, headers=None):
+        """ Returns whether the key exists in the bucket
+
+            Args:
+                bucket_name: the name of the S3 bucket to use (bucket name only, not ARN)
+                key: the key of the object to delete from the bucket
+                headers: Additional headers to pass to AWS
+
+            Returns:
+                True | False
+        """
+        assert bucket_name, "bucket_name must be specified"
+        assert key, "Key must be specified"
+
+        b = self.conn.get_bucket(bucket_name)
+        k = Key(b)
+        k.key = key
+        return k.exists(headers=headers)
+
+    def key_size(self, bucket_name, key, headers=None):
+        """ Returns the size of a key based on a HEAD request
+
+            Args:
+                bucket_name: the name of the S3 bucket to use (bucket name only, not ARN)
+                key: the key of the object to delete from the bucket
+                headers: Additional headers to pass to AWS
+
+            Returns:
+                Size in kb
+        """
+        assert bucket_name, "bucket_name must be specified"
+        assert key, "Key must be specified"
+
+        b = self.conn.get_bucket(bucket_name)
+        k = b.lookup(key)
+
+        return k.size
+
+    def key_last_modified(self, bucket_name, key, headers=None):
+        """ Returns a timestamp of a key based on a HEAD request
+
+            Args:
+                bucket_name: the name of the S3 bucket to use (bucket name only, not ARN)
+                key: the key of the object to delete from the bucket
+                headers: Additional headers to pass to AWS
+
+            Returns:
+                timestamp
+        """
+        assert bucket_name, "bucket_name must be specified"
+        assert key, "Key must be specified"
+
+        b = self.conn.get_bucket(bucket_name)
+        k = b.lookup(key)
+
+        # email.utils parsing of timestamp mirrors boto whereas
+        # time.strptime() can have TZ issues due to DST
+        modified_tuple = email.utils.parsedate_tz(k.last_modified)
+        epochTime = int(email.utils.mktime_tz(modified_tuple))
+
+        return epochTime
+
+    def list_keys(self, bucket_name):
+        return self.conn.get_bucket(bucket_name).list()
diff --git a/snakemake/remote/SFTP.py b/snakemake/remote/SFTP.py
new file mode 100644
index 0000000..d16b354
--- /dev/null
+++ b/snakemake/remote/SFTP.py
@@ -0,0 +1,112 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+import os, re
+from contextlib import contextmanager
+
+# module-specific
+from snakemake.remote import AbstractRemoteProvider, DomainObject
+from snakemake.exceptions import SFTPFileException
+import snakemake.io 
+
+try:
+    # third-party modules
+    import pysftp
+except ImportError as e:
+    raise WorkflowError("The Python 3 package 'pysftp' " + 
+        "must be installed to use SFTP remote() file functionality. %s" % e.msg)
+
+
+class RemoteProvider(AbstractRemoteProvider):
+    def __init__(self, *args, **kwargs):
+        super(RemoteProvider, self).__init__(*args, **kwargs)
+
+class RemoteObject(DomainObject):
+    """ This is a class to interact with an SFTP server.
+    """
+
+    def __init__(self, *args, keep_local=False, provider=None, **kwargs):
+        super(RemoteObject, self).__init__(*args, keep_local=keep_local, provider=provider, **kwargs)
+        
+    # === Implementations of abstract class members ===
+
+    @contextmanager #makes this a context manager. after 'yield' is __exit__()
+    def sftpc(self):     
+        # if args have been provided to remote(), use them over those given to RemoteProvider()
+        args_to_use = self.provider.args
+        if len(self.args):
+            args_to_use = self.args
+
+        # use kwargs passed in to remote() to override those given to the RemoteProvider()
+        # default to the host and port given as part of the file, falling back to one specified
+        # as a kwarg to remote() or the RemoteProvider (overriding the latter with the former if both)
+        kwargs_to_use = {}
+        kwargs_to_use["host"] = self.host
+        kwargs_to_use["port"] = int(self.port) if self.port else 22
+        for k,v in self.provider.kwargs.items():
+            kwargs_to_use[k] = v
+        for k,v in self.kwargs.items():
+            kwargs_to_use[k] = v
+
+        conn = pysftp.Connection(*args_to_use, **kwargs_to_use)
+        yield conn
+        conn.close()
+
+    def exists(self):
+        if self._matched_address:
+            with self.sftpc() as sftpc:
+                return sftpc.exists(self.remote_path)
+                if sftpc.exists(self.remote_path):
+                    return sftpc.isfile(self.remote_path)
+            return False
+        else:
+            raise SFTPFileException("The file cannot be parsed as an SFTP path in form 'host:port/path/to/file': %s" % self.file())
+
+    def mtime(self):
+        if self.exists():
+            with self.sftpc() as sftpc:
+                attr = sftpc.stat(self.remote_path)
+                return int(attr.st_mtime)
+        else:
+            raise SFTPFileException("The file does not seem to exist remotely: %s" % self.file())
+
+    def size(self):
+        if self.exists():
+            with self.sftpc() as sftpc:
+                attr = sftpc.stat(self.remote_path)
+                return int(attr.st_size)
+        else:
+            return self._iofile.size_local
+
+    def download(self, make_dest_dirs=True):
+        with self.sftpc() as sftpc:
+            if self.exists():
+                # if the destination path does not exist
+                if make_dest_dirs:
+                    os.makedirs(os.path.dirname(self.local_path), exist_ok=True)
+
+                sftpc.get(remotepath=self.remote_path, localpath=self.local_path, preserve_mtime=True)
+            else:
+                raise SFTPFileException("The file does not seem to exist remotely: %s" % self.file())
+
+    def upload(self):
+        with self.sftpc() as sftpc:
+            sftpc.put(localpath=self.local_path, remotepath=self.remote_path, confirm=True, preserve_mtime=True)
+
+    @property
+    def list(self):
+        file_list = []
+
+        first_wildcard = self._iofile.constant_prefix()
+        dirname = first_wildcard.replace(self.path_prefix, "")
+
+        with self.sftpc() as sftpc:
+            def _append_item(file_path):
+                file_path = file_path.lstrip("/")
+                file_list.append(file_path)
+
+            sftpc.walktree(dirname, fcallback=_append_item, dcallback=_append_item, ucallback=_append_item)
+
+        return file_list
diff --git a/snakemake/remote/__init__.py b/snakemake/remote/__init__.py
new file mode 100644
index 0000000..8667584
--- /dev/null
+++ b/snakemake/remote/__init__.py
@@ -0,0 +1,159 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+# built-ins
+import os, sys, re
+from abc import ABCMeta, abstractmethod
+
+# module-specific
+import snakemake.io
+from snakemake.exceptions import RemoteFileException
+
+class AbstractRemoteProvider:
+    """ This is an abstract class to be used to derive remote provider classes. These might be used to hold common credentials,
+        and are then passed to RemoteObjects.
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(self, *args, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+
+    def remote(self, value, *args, keep_local=False, **kwargs):
+        if snakemake.io.is_flagged(value, "temp"):
+            raise SyntaxError(
+                "Remote and temporary flags are mutually exclusive.")
+        if snakemake.io.is_flagged(value, "protected"):
+            raise SyntaxError(
+                "Remote and protected flags are mutually exclusive.")
+
+        provider = sys.modules[self.__module__] # get module of derived class
+        remote_object = provider.RemoteObject(*args, keep_local=keep_local, provider=provider.RemoteProvider(*self.args,  **self.kwargs), **kwargs)
+
+        return snakemake.io.flag(
+                value, 
+                "remote_object",
+                remote_object
+            )
+
+    def glob_wildcards(self, pattern, *args, **kwargs):
+        args   = self.args if not args else args
+        kwargs = self.kwargs if not kwargs else kwargs
+        
+        referenceObj = snakemake.io.IOFile(self.remote(pattern, *args, **kwargs))
+
+        pattern = "./"+ referenceObj.remote_object.name
+        pattern = os.path.normpath(pattern)
+
+        key_list = [k for k in referenceObj.remote_object.list] 
+
+        return snakemake.io.glob_wildcards(pattern, files=key_list)
+
+    @abstractmethod
+    def remote_interface(self):
+        pass
+
+class AbstractRemoteObject:
+    """ This is an abstract class to be used to derive remote object classes for 
+        different cloud storage providers. For example, there could be classes for interacting with 
+        Amazon AWS S3 and Google Cloud Storage, both derived from this common base class.
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(self, *args, keep_local=False, provider=None, **kwargs):
+        # self._iofile must be set before the remote object can be used, in io.py or elsewhere
+        self._iofile = None
+        self.args = args
+        self.kwargs = kwargs
+
+        self.keep_local = keep_local
+        self.provider = provider
+
+    @property
+    def _file(self):
+        return self._iofile._file
+    
+    def file(self):
+        return self._file
+
+    @abstractmethod
+    def exists(self):
+        pass
+
+    @abstractmethod
+    def mtime(self):
+        pass
+
+    @abstractmethod
+    def size(self):
+        pass
+
+    @abstractmethod
+    def download(self, *args, **kwargs):
+        pass
+
+    @abstractmethod
+    def upload(self, *args, **kwargs):
+        pass
+
+    @abstractmethod
+    def list(self, *args, **kwargs):
+        pass
+
+    @abstractmethod
+    def name(self, *args, **kwargs):
+        pass
+
+    @abstractmethod
+    def remote(self, value, keep_local=False):
+        pass
+
+class DomainObject(AbstractRemoteObject):
+    """This is a mixin related to parsing components
+        out of a location path specified as
+        (host|IP):port/remote/location
+    """
+    def __init__(self, *args, **kwargs):
+            super(DomainObject, self).__init__(*args, **kwargs)
+
+    @property
+    def _matched_address(self):
+        return re.search("^(?P<host>[A-Za-z0-9\-\.]+)(?:\:(?P<port>[0-9]+))?(?P<path_remainder>.*)$", self._iofile._file)
+
+    @property
+    def name(self):
+        return self.path_remainder
+    
+    @property
+    def protocol(self):
+        if self._matched_address:
+            return self._matched_address.group("protocol")
+
+    @property
+    def host(self):
+        if self._matched_address:
+            return self._matched_address.group("host")
+
+    @property
+    def port(self):
+        return self._matched_address.group("port")
+    
+    @property
+    def path_prefix(self):
+        # this is the domain and port, however specified before the path remainder
+        return self._iofile._file[:self._iofile._file.index(self.path_remainder)]
+    
+    @property
+    def path_remainder(self):
+        if self._matched_address:
+            return self._matched_address.group("path_remainder")
+
+    @property
+    def local_path(self):
+        return self._iofile._file
+
+    @property
+    def remote_path(self):
+        return self.path_remainder
diff --git a/snakemake/remote/dropbox.py b/snakemake/remote/dropbox.py
new file mode 100644
index 0000000..258b6a5
--- /dev/null
+++ b/snakemake/remote/dropbox.py
@@ -0,0 +1,110 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+import os, re
+from contextlib import contextmanager
+
+# module-specific
+from snakemake.remote import AbstractRemoteProvider, AbstractRemoteObject
+from snakemake.exceptions import DropboxFileException
+import snakemake.io 
+
+try:
+    # third-party modules
+    import dropbox # The official Dropbox API library
+except ImportError as e:
+    raise WorkflowError("The Python 3 package 'dropbox' " + 
+        "must be installed to use Dropbox remote() file functionality. %s" % e.msg)
+
+
+class RemoteProvider(AbstractRemoteProvider):
+    def __init__(self, *args, **kwargs):
+        super(RemoteProvider, self).__init__(*args, **kwargs)
+
+        self._dropboxc = dropbox.Dropbox(*args, **kwargs)
+        try:
+            self._dropboxc.users_get_current_account()
+        except dropbox.exceptions.AuthError as err:
+                DropboxFileException("ERROR: Invalid Dropbox OAuth access token; try re-generating an access token from the app console on the web.")
+
+    def remote_interface(self):
+        return self._dropboxc
+
+class RemoteObject(AbstractRemoteObject):
+    """ This is a class to interact with the AWS S3 object store.
+    """
+
+    def __init__(self, *args, keep_local=False, provider=None, **kwargs):
+        super(RemoteObject, self).__init__(*args, keep_local=keep_local, provider=provider, **kwargs)
+
+        if provider:
+            self._dropboxc = provider.remote_interface()
+        else:
+            self._dropboxc = dropbox.Dropbox(*args, **kwargs)
+            try:
+                self._dropboxc.users_get_current_account()
+            except dropbox.exceptions.AuthError as err:
+                    DropboxFileException("ERROR: Invalid Dropbox OAuth access token; try re-generating an access token from the app console on the web.")
+        
+    # === Implementations of abstract class members ===
+  
+    def exists(self):
+        try:
+            metadata = self._dropboxc.files_get_metadata(self.remote_file())
+            return True
+        except:
+            return False
+
+    def mtime(self):
+        if self.exists():
+            metadata = self._dropboxc.files_get_metadata(self.remote_file())
+            epochTime = metadata.server_modified.timestamp()
+            return epochTime
+        else:
+            raise DropboxFileException("The file does not seem to exist remotely: %s" % self.remote_file())
+
+    def size(self):
+        if self.exists():
+            metadata = self._dropboxc.files_get_metadata(self.remote_file())
+            return int(metadata.size)
+        else:
+            return self._iofile.size_local
+
+    def download(self, make_dest_dirs=True):
+        if self.exists():
+            # if the destination path does not exist, make it
+            if make_dest_dirs:
+                os.makedirs(os.path.dirname(self.file()), exist_ok=True)
+
+            self._dropboxc.files_download_to_file(self.file(), self.remote_file())
+        else:
+            raise DropboxFileException("The file does not seem to exist remotely: %s" % self.remote_file())
+
+    def upload(self, mode=dropbox.files.WriteMode('overwrite')):
+        with open(self.file(),'rb') as f:
+            self._dropboxc.files_upload(f, self.remote_file(), mode=mode)
+
+    def remote_file(self):
+        return "/"+self.file() if not self.file().startswith("/") else self.file()
+
+    @property
+    def name(self):
+        return self.file()
+
+    @property
+    def list(self):
+        file_list = []
+
+        first_wildcard = self._iofile.constant_prefix()
+        dirname = "/" + first_wildcard if not first_wildcard.startswith("/") else first_wildcard
+
+        while '//' in dirname:
+            dirname = dirname.replace('//', '/')
+        dirname = dirname.rstrip('/')
+
+        for item in self._dropboxc.files_list_folder(dirname, recursive=True).entries:
+            file_list.append( os.path.join(os.path.dirname(item.path_lower), item.name).lstrip("/") )
+
+        return file_list
diff --git a/snakemake/rules.py b/snakemake/rules.py
index 3608167..cdd0d00 100644
--- a/snakemake/rules.py
+++ b/snakemake/rules.py
@@ -8,9 +8,9 @@ import re
 import sys
 import inspect
 import sre_constants
-from collections import defaultdict
+from collections import defaultdict, Iterable
 
-from snakemake.io import IOFile, _IOFile, protected, temp, dynamic, Namedlist
+from snakemake.io import IOFile, _IOFile, protected, temp, dynamic, Namedlist, AnnotatedString
 from snakemake.io import expand, InputFiles, OutputFiles, Wildcards, Params, Log
 from snakemake.io import apply_wildcards, is_flagged, not_iterable
 from snakemake.exceptions import RuleException, IOFileException, WildcardError, InputFunctionException
@@ -40,6 +40,7 @@ class Rule:
             self.protected_output = set()
             self.touch_output = set()
             self.subworkflow_input = dict()
+            self.shadow_depth = None
             self.resources = dict(_cores=1, _nodes=1)
             self.priority = 0
             self.version = None
@@ -50,6 +51,7 @@ class Rule:
             self.snakefile = snakefile
             self.run_func = None
             self.shellcmd = None
+            self.script = None
             self.norun = False
         elif len(args) == 1:
             other = args[0]
@@ -67,6 +69,7 @@ class Rule:
             self.protected_output = set(other.protected_output)
             self.touch_output = set(other.touch_output)
             self.subworkflow_input = dict(other.subworkflow_input)
+            self.shadow_depth = other.shadow_depth
             self.resources = other.resources
             self.priority = other.priority
             self.version = other.version
@@ -77,6 +80,7 @@ class Rule:
             self.snakefile = other.snakefile
             self.run_func = other.run_func
             self.shellcmd = other.shellcmd
+            self.script = other.script
             self.norun = other.norun
 
     def dynamic_branch(self, wildcards, input=True):
@@ -85,6 +89,22 @@ class Rule:
                 rule.output, rule.dynamic_output
             )
 
+        def partially_expand(f, wildcards):
+            """Expand the wildcards in f from the ones present in wildcards
+
+            This is done by replacing all wildcard delimiters by `{{` or `}}`
+            that are not in `wildcards.keys()`.
+            """
+            # perform the partial expansion from f's string representation
+            s = str(f).replace('{', '{{').replace('}', '}}')
+            for key in wildcards.keys():
+                s = s.replace('{{{{{}}}}}'.format(key),
+                              '{{{}}}'.format(key))
+            # build result
+            anno_s = AnnotatedString(s)
+            anno_s.flags = f.flags
+            return IOFile(anno_s, f.rule)
+
         io, dynamic_io = get_io(self)
 
         branch = Rule(self)
@@ -93,9 +113,15 @@ class Rule:
         expansion = defaultdict(list)
         for i, f in enumerate(io):
             if f in dynamic_io:
+                f = partially_expand(f, wildcards)
                 try:
                     for e in reversed(expand(f, zip, **wildcards)):
-                        expansion[i].append(IOFile(e, rule=branch))
+                        # need to clone the flags so intermediate
+                        # dynamic remote file paths are expanded and
+                        # removed appropriately
+                        ioFile = IOFile(e, rule=branch)
+                        ioFile.clone_flags(f)
+                        expansion[i].append(ioFile)
                 except KeyError:
                     return None
 
@@ -269,7 +295,7 @@ class Rule:
             self._set_params_item(item, name=name)
 
     def _set_params_item(self, item, name=None):
-        if isinstance(item, str) or callable(item):
+        if not_iterable(item) or callable(item):
             self.params.append(item)
             if name:
                 self.params.add_name(name)
@@ -324,38 +350,44 @@ class Rule:
                                          fill_missing=f in self.dynamic_input,
                                          fail_dynamic=self.dynamic_output)
 
+        def concretize_param(p, wildcards):
+            if isinstance(p, str):
+                return apply_wildcards(p, wildcards)
+            return p
+
+        def check_input_function(f):
+            if (not_iterable(f) and not isinstance(f, str)) or not all(isinstance(f_, str) for f_ in f):
+                raise RuleException(
+                    "Input function did not return str or list of str.",
+                    rule=self)
+
+        def check_param_function(f):
+            pass
+
         def _apply_wildcards(newitems, olditems, wildcards, wildcards_obj,
                              concretize=apply_wildcards,
+                             check_function_return=check_input_function,
                              ruleio=None):
             for name, item in olditems.allitems():
                 start = len(newitems)
                 is_iterable = True
+
                 if callable(item):
                     try:
                         item = item(wildcards_obj)
                     except (Exception, BaseException) as e:
                         raise InputFunctionException(e, rule=self)
-                    if not_iterable(item):
-                        item = [item]
-                        is_iterable = False
-                    for item_ in item:
-                        if not isinstance(item_, str):
-                            raise RuleException(
-                                "Input function did not return str or list of str.",
-                                rule=self)
-                        concrete = concretize(item_, wildcards)
-                        newitems.append(concrete)
-                        if ruleio is not None:
-                            ruleio[concrete] = item_
-                else:
-                    if not_iterable(item):
-                        item = [item]
-                        is_iterable = False
-                    for item_ in item:
-                        concrete = concretize(item_, wildcards)
-                        newitems.append(concrete)
-                        if ruleio is not None:
-                            ruleio[concrete] = item_
+                    check_function_return(item)
+
+                if not_iterable(item):
+                    item = [item]
+                    is_iterable = False
+                for item_ in item:
+                    concrete = concretize(item_, wildcards)
+                    newitems.append(concrete)
+                    if ruleio is not None:
+                        ruleio[concrete] = item_
+
                 if name:
                     newitems.set_name(
                         name, start,
@@ -382,7 +414,9 @@ class Rule:
                              ruleio=ruleio)
 
             params = Params()
-            _apply_wildcards(params, self.params, wildcards, wildcards_obj)
+            _apply_wildcards(params, self.params, wildcards, wildcards_obj,
+                             concretize=concretize_param,
+                             check_function_return=check_param_function)
 
             output = OutputFiles(o.apply_wildcards(wildcards)
                                  for o in self.output)
diff --git a/snakemake/scheduler.py b/snakemake/scheduler.py
index a3258b8..ca7a1ce 100644
--- a/snakemake/scheduler.py
+++ b/snakemake/scheduler.py
@@ -55,10 +55,7 @@ class JobScheduler:
         self.running = set()
         self.failed = set()
         self.finished_jobs = 0
-        self.greediness = greediness
-        self.select_by_rule = False
-        if not self.select_by_rule:
-            self.greediness = 1
+        self.greediness = 1
 
         self.resources = dict(self.workflow.global_resources)
 
@@ -84,7 +81,6 @@ class JobScheduler:
                                             quiet=quiet,
                                             printshellcmds=printshellcmds,
                                             latency_wait=latency_wait)
-            self.rule_reward = self.dryrun_rule_reward
             self.job_reward = self.dryrun_job_reward
         elif touch:
             self._executor = TouchExecutor(workflow, dag,
@@ -93,8 +89,7 @@ class JobScheduler:
                                            printshellcmds=printshellcmds,
                                            latency_wait=latency_wait)
         elif cluster or cluster_sync or (drmaa is not None):
-            workers = min(sum(1 for _ in dag.local_needrun_jobs),
-                          local_cores)
+            workers = min(max(1, sum(1 for _ in dag.local_needrun_jobs)), local_cores)
             self._local_executor = CPUExecutor(
                 workflow, dag, workers,
                 printreason=printreason,
@@ -118,7 +113,6 @@ class JobScheduler:
                     latency_wait=latency_wait,
                     benchmark_repeats=benchmark_repeats, )
                 if immediate_submit:
-                    self.rule_reward = self.dryrun_rule_reward
                     self.job_reward = self.dryrun_job_reward
                     self._submit_callback = partial(self._proceed,
                                                     update_dynamic=False,
@@ -140,7 +134,7 @@ class JobScheduler:
             # calculate how many parallel workers the executor shall spawn
             # each job has at least one thread, hence we need to have
             # the minimum of given cores and number of jobs
-            workers = min(cores, len(dag))
+            workers = min(cores, max(1, len(dag)))
             self._executor = CPUExecutor(workflow, dag, workers,
                                          printreason=printreason,
                                          quiet=quiet,
@@ -176,22 +170,30 @@ class JobScheduler:
                 while not self._open_jobs.wait(1):
                     pass
 
+                # obtain needrun and running jobs in a thread-safe way
+                with self._lock:
+                    needrun = list(self.open_jobs)
+                    running = list(self.running)
+                # free the event
                 self._open_jobs.clear()
+
+                # handle errors
                 if not self.keepgoing and self._errors:
                     logger.info("Will exit after finishing "
                                 "currently running jobs.")
-                    if not self.running:
+                    if not running:
                         self._executor.shutdown()
                         logger.error(_ERROR_MSG_FINAL)
                         return False
                     continue
-                if not any(self.open_jobs) and not self.running:
+                # normal shutdown because all jobs have been finished
+                if not needrun and not running:
                     self._executor.shutdown()
                     if self._errors:
                         logger.error(_ERROR_MSG_FINAL)
                     return not self._errors
 
-                needrun = list(self.open_jobs)
+                # continue if no new job needs to be executed
                 if not needrun:
                     continue
 
@@ -200,18 +202,24 @@ class JobScheduler:
                 logger.debug("Ready jobs ({}):\n\t".format(len(needrun)) +
                              "\n\t".join(map(str, needrun)))
 
+                # select jobs by solving knapsack problem
                 run = self.job_selector(needrun)
                 logger.debug("Selected jobs ({}):\n\t".format(len(run)) +
                              "\n\t".join(map(str, run)))
-                self.running.update(run)
+                # update running jobs
+                with self._lock:
+                    self.running.update(run)
                 logger.debug(
                     "Resources after job selection: {}".format(self.resources))
+                # actually run jobs
                 for job in run:
                     self.run(job)
         except (KeyboardInterrupt, SystemExit):
             logger.info("Terminating processes on user request.")
             self._executor.cancel()
-            for job in self.running:
+            with self._lock:
+                running = list(self.running)
+            for job in running:
                 job.cleanup()
             return False
 
@@ -283,46 +291,16 @@ Problem", Akcay, Li, Xu, Annals of Operations Research, 2012
             jobs (list):    list of jobs
         """
         with self._lock:
-            if self.select_by_rule:
-                # solve over the rules instead of jobs (much less, but might miss the best solution)
-                # each rule is an item with as many copies as jobs
-                _jobs = defaultdict(list)
-                for job in jobs:
-                    _jobs[job.rule].append(job)
-
-                jobs = _jobs
-
-                # sort the jobs by priority
-                for _jobs in jobs.values():
-                    _jobs.sort(key=self.dag.priority, reverse=True)
-                rules = list(jobs)
-
-                # Step 1: initialization
-                n = len(rules)
-                x = [0] * n  # selected jobs of each rule
-                E = set(range(n))  # rules free to select
-                u = [len(jobs[rule]) for rule in rules]  # number of jobs left
-                a = list(map(self.rule_weight,
-                             rules))  # resource usage of rules
-                c = list(map(partial(self.rule_reward,
-                                     jobs=jobs),
-                             rules))  # matrix of cumulative rewards over jobs
-
-                def calc_reward():
-                    return [([(crit[x_j + y_j] - crit[x_j]) for crit in c_j] if
-                             j in E else [0] * len(c_j))
-                            for j, (c_j, y_j, x_j) in enumerate(zip(c, y, x))]
-            else:
-                # each job is an item with one copy (0-1 MDKP)
-                n = len(jobs)
-                x = [0] * n  # selected jobs
-                E = set(range(n))  # jobs still free to select
-                u = [1] * n
-                a = list(map(self.job_weight, jobs))  # resource usage of jobs
-                c = list(map(self.job_reward, jobs))  # job rewards
+            # each job is an item with one copy (0-1 MDKP)
+            n = len(jobs)
+            x = [0] * n  # selected jobs
+            E = set(range(n))  # jobs still free to select
+            u = [1] * n
+            a = list(map(self.job_weight, jobs))  # resource usage of jobs
+            c = list(map(self.job_reward, jobs))  # job rewards
 
-                def calc_reward():
-                    return [c_j * y_j for c_j, y_j in zip(c, y)]
+            def calc_reward():
+                return [c_j * y_j for c_j, y_j in zip(c, y)]
 
             b = [self.resources[name]
                  for name in self.workflow.global_resources
@@ -354,12 +332,7 @@ Problem", Akcay, Li, Xu, Annals of Operations Research, 2012
                 if not E:
                     break
 
-            if self.select_by_rule:
-                # Solution is the list of jobs that was selected from the selected rules
-                solution = list(chain(*[jobs[rules[j]][:x_]
-                                        for j, x_ in enumerate(x)]))
-            else:
-                solution = [job for job, sel in zip(jobs, x) if sel]
+            solution = [job for job, sel in zip(jobs, x) if sel]
             # update resources
             for name, b_i in zip(self.workflow.global_resources, b):
                 self.resources[name] = b_i
@@ -373,38 +346,17 @@ Problem", Akcay, Li, Xu, Annals of Operations Research, 2012
         return [self.calc_resource(name, res.get(name, 0))
                 for name in self.workflow.global_resources]
 
-    def rule_reward(self, rule, jobs=None):
-        jobs = jobs[rule]
-        return (self.priority_reward(jobs), self.downstream_reward(jobs),
-                cumsum([job.inputsize for job in jobs]))
-
-    def dryrun_rule_reward(self, rule, jobs=None):
-        jobs = jobs[rule]
-        return (self.priority_reward(jobs), self.downstream_reward(jobs),
-                [0] * (len(jobs) + 1))
-
-    def priority_reward(self, jobs):
-        return cumsum(self.dag.priorities(jobs))
-
-    def downstream_reward(self, jobs):
-        return cumsum(self.dag.downstream_sizes(jobs))
-
-    def thread_reward(self, jobs):
-        """ Thread-based reward for jobs. Using this maximizes core
-        saturation, but does not lead to faster computation in general."""
-        return cumsum([job.threads for job in jobs])
-
     def job_weight(self, job):
         res = job.resources_dict
         return [self.calc_resource(name, res.get(name, 0))
                 for name in self.workflow.global_resources]
 
     def job_reward(self, job):
-        return (self.dag.priority(job), self.dag.downstream_size(job),
+        return (self.dag.priority(job), self.dag.temp_input_count(job), self.dag.downstream_size(job),
                 job.inputsize)
 
     def dryrun_job_reward(self, job):
-        return (self.dag.priority(job), self.dag.downstream_size(job))
+        return (self.dag.priority(job), self.dag.temp_input_count(job), self.dag.downstream_size(job))
 
     def progress(self):
         """ Display the progress. """
diff --git a/snakemake/script.py b/snakemake/script.py
new file mode 100644
index 0000000..faedbe1
--- /dev/null
+++ b/snakemake/script.py
@@ -0,0 +1,139 @@
+__author__ = "Johannes Köster"
+__copyright__ = "Copyright 2015, Johannes Köster"
+__email__ = "koester at jimmy.harvard.edu"
+__license__ = "MIT"
+
+import inspect
+import os
+import traceback
+
+from snakemake.utils import format
+from snakemake.logging import logger
+from snakemake.exceptions import WorkflowError
+
+class REncoder:
+    """Encoding Pyton data structures into R."""
+
+    @classmethod
+    def encode_value(cls, value):
+        if isinstance(value, list):
+            return cls.encode_list(value)
+        elif isinstance(value, dict):
+            return cls.encode_dict(value)
+        elif isinstance(value, str):
+            return repr(value)
+        elif isinstance(value, bool):
+            return "TRUE" if value else "FALSE"
+        elif isinstance(value, int) or isinstance(value, float):
+            return str(value)
+        else:
+            raise ValueError(
+                "Unsupported value for conversion into R: {}".format(value))
+
+    @classmethod
+    def encode_list(cls, l):
+        return "c({})".format(", ".join(map(cls.encode_value, l)))
+
+    @classmethod
+    def encode_items(cls, items):
+        def encode_item(item):
+            name, value = item
+            return '"{}" = {}'.format(name, cls.encode_value(value))
+
+        return ", ".join(map(encode_item, items))
+
+    @classmethod
+    def encode_dict(cls, d):
+        d = "list({})".format(cls.encode_items(d.items()))
+        return d
+
+    @classmethod
+    def encode_namedlist(cls, namedlist):
+        positional = cls.encode_list(namedlist)
+        named = cls.encode_items(namedlist.items())
+        source = "list("
+        if positional != "c()":
+            source += positional
+        if named:
+            source += ", " + named
+        source += ")"
+        return source
+
+
+class Snakemake:
+    def __init__(self, input, output, params, wildcards, threads, resources,
+                 log, config):
+        self.input = input
+        self.output = output
+        self.params = params
+        self.wildcards = wildcards
+        self.threads = threads
+        self.resources = resources
+        self.log = log
+        self.config = config
+
+
+def script(basedir, path, input, output, params, wildcards, threads, resources,
+           log, config):
+    """
+    Load a script from the given basedir + path and execute it.
+    Supports Python 3 and R.
+    """
+    path = format(os.path.join(basedir, path), stepout=1)
+
+    if path.endswith(".py"):
+        with open(path) as source:
+            try:
+                exec(compile(source.read(), path, "exec"), {
+                    "snakemake": Snakemake(input, output, params, wildcards,
+                                           threads, resources, log, config)
+                })
+            except (Exception, BaseException) as ex:
+                raise WorkflowError("".join(traceback.format_exception(type(ex), ex, ex.__traceback__)))
+    elif path.endswith(".R"):
+        try:
+            import rpy2.robjects as robjects
+        except ImportError:
+            raise ValueError(
+                "Python 3 package rpy2 needs to be installed to use the R function.")
+        with open(path) as source:
+            preamble = """
+            Snakemake <- setClass(
+                "Snakemake",
+                slots = c(
+                    input = "list",
+                    output = "list",
+                    params = "list",
+                    wildcards = "list",
+                    threads = "numeric",
+                    log = "list",
+                    resources = "list",
+                    config = "list"
+                )
+            )
+            snakemake <- Snakemake(
+                input = {},
+                output = {},
+                params = {},
+                wildcards = {},
+                threads = {},
+                log = {},
+                resources = {},
+                config = {}
+            )
+            """.format(REncoder.encode_namedlist(input),
+                       REncoder.encode_namedlist(output),
+                       REncoder.encode_namedlist(params),
+                       REncoder.encode_namedlist(wildcards), threads,
+                       REncoder.encode_namedlist(log),
+                       REncoder.encode_namedlist({
+                           name: value
+                           for name, value in resources.items()
+                           if name != "_cores" and name != "_nodes"
+                       }), REncoder.encode_dict(config))
+            logger.debug(preamble)
+            source = preamble + source.read()
+            robjects.r(source)
+    else:
+        raise ValueError(
+            "Unsupported script: Expecting either Python (.py) or R (.R) script.")
diff --git a/snakemake/shell.py b/snakemake/shell.py
index 83e73c5..bf59350 100644
--- a/snakemake/shell.py
+++ b/snakemake/shell.py
@@ -23,17 +23,22 @@ if not isinstance(sys.stdout, _io.TextIOWrapper):
 class shell:
     _process_args = {}
     _process_prefix = ""
+    _process_suffix = ""
 
     @classmethod
     def executable(cls, cmd):
         if os.path.split(cmd)[-1] == "bash":
-            cls._process_prefix = "set -o pipefail; "
+            cls._process_prefix = "set -e -o pipefail; "
         cls._process_args["executable"] = cmd
 
     @classmethod
     def prefix(cls, prefix):
         cls._process_prefix = format(prefix, stepout=2)
 
+    @classmethod
+    def suffix(cls, suffix):
+        cls._process_suffix = format(suffix, stepout=2)
+
     def __new__(cls, cmd, *args,
                 async=False,
                 iterable=False,
@@ -47,7 +52,11 @@ class shell:
         stdout = sp.PIPE if iterable or async or read else STDOUT
 
         close_fds = sys.platform != 'win32'
-        proc = sp.Popen(cls._process_prefix + cmd,
+
+        proc = sp.Popen("{} {} {}".format(
+                            cls._process_prefix,
+                            cmd,
+                            cls._process_suffix),
                         bufsize=-1,
                         shell=True,
                         stdout=stdout,
diff --git a/snakemake/utils.py b/snakemake/utils.py
index 6730d95..fbfa9ed 100644
--- a/snakemake/utils.py
+++ b/snakemake/utils.py
@@ -73,8 +73,7 @@ def makedirs(dirnames):
     if isinstance(dirnames, str):
         dirnames = [dirnames]
     for dirname in dirnames:
-        if not os.path.exists(dirname):
-            os.makedirs(dirname)
+        os.makedirs(dirname, exist_ok=True)
 
 
 def report(text, path,
@@ -144,7 +143,7 @@ def R(code):
     try:
         import rpy2.robjects as robjects
     except ImportError:
-        raise WorkflowError(
+        raise ValueError(
             "Python 3 package rpy2 needs to be installed to use the R function.")
     robjects.r(format(textwrap.dedent(code), stepout=2))
 
@@ -234,11 +233,29 @@ def update_config(config, overwrite_config):
       overwrite_config (dict): dictionary whose items will overwrite those in config
 
     """
+
     def _update(d, u):
         for (key, value) in u.items():
             if (isinstance(value, Mapping)):
-                d[key]= _update(d.get(key, {}), value)
+                d[key] = _update(d.get(key, {}), value)
             else:
                 d[key] = value
         return d
+
     _update(config, overwrite_config)
+
+
+def set_temporary_output(*rules):
+    """Set the output of rules to temporary"""
+    for rule in rules:
+        logger.debug(
+            "setting output of rule '{rule}' to temporary".format(rule=rule))
+        rule.temp_output = set(rule.output)
+
+
+def set_protected_output(*rules):
+    """Set the output of rules to protected"""
+    for rule in rules:
+        logger.debug(
+            "setting output of rule '{rule}' to protected".format(rule=rule))
+        rule.protected_output = set(rule.output)
diff --git a/snakemake/version.py b/snakemake/version.py
index 46aa803..7a82992 100644
--- a/snakemake/version.py
+++ b/snakemake/version.py
@@ -1 +1 @@
-__version__ = "3.4.2"
+__version__ = "3.5.4"
diff --git a/snakemake/workflow.py b/snakemake/workflow.py
index b035bc3..a222fc0 100644
--- a/snakemake/workflow.py
+++ b/snakemake/workflow.py
@@ -26,7 +26,7 @@ import snakemake.io
 from snakemake.io import protected, temp, temporary, expand, dynamic, glob_wildcards, flag, not_iterable, touch
 from snakemake.persistence import Persistence
 from snakemake.utils import update_config
-
+from snakemake.script import script
 
 class Workflow:
     def __init__(self,
@@ -68,6 +68,7 @@ class Workflow:
         self._onsuccess = lambda log: None
         self._onerror = lambda log: None
         self.debug = debug
+        self._rulecount = 0
 
         global config
         config = dict()
@@ -200,6 +201,7 @@ class Workflow:
                 subsnakemake=None,
                 updated_files=None,
                 keep_target_files=False,
+                keep_shadow=False,
                 allowed_rules=None,
                 greediness=1.0,
                 no_hooks=False):
@@ -386,6 +388,9 @@ class Workflow:
                 print(*items, sep="\n")
             return True
 
+        if not keep_shadow:
+            self.persistence.cleanup_shadow()
+
         scheduler = JobScheduler(self, dag, cores,
                                  local_cores=local_cores,
                                  dryrun=dryrun,
@@ -456,7 +461,9 @@ class Workflow:
             if not os.path.isabs(snakefile) and self.included_stack:
                 current_path = os.path.dirname(self.included_stack[-1])
                 snakefile = os.path.join(current_path, snakefile)
-            snakefile = os.path.abspath(snakefile)
+            # Could still be an url if relative import was used
+            if not urllib.parse.urlparse(snakefile).scheme:
+                snakefile = os.path.abspath(snakefile)
         # else it could be an url.
         # at least we don't want to modify the path for clarity.
 
@@ -471,8 +478,10 @@ class Workflow:
         workflow = self
 
         first_rule = self.first_rule
-        code, linemap = parse(snakefile,
-                              overwrite_shellcmd=self.overwrite_shellcmd)
+        code, linemap, rulecount = parse(snakefile,
+                                         overwrite_shellcmd=self.overwrite_shellcmd,
+                                         rulecount=self._rulecount)
+        self._rulecount = rulecount
 
         if print_compilation:
             print(code)
@@ -495,8 +504,7 @@ class Workflow:
 
     def workdir(self, workdir):
         if self.overwrite_workdir is None:
-            if not os.path.exists(workdir):
-                os.makedirs(workdir)
+            os.makedirs(workdir, exist_ok=True)
             self._workdir = workdir
             os.chdir(workdir)
 
@@ -534,6 +542,15 @@ class Workflow:
                     raise RuleException("Threads value has to be an integer.",
                                         rule=rule)
                 rule.resources["_cores"] = ruleinfo.threads
+            if ruleinfo.shadow_depth:
+                if ruleinfo.shadow_depth not in (True, "shallow", "full"):
+                    raise RuleException(
+                        "Shadow must either be 'shallow', 'full', "
+                        "or True (equivalent to 'full')", rule=rule)
+                if ruleinfo.shadow_depth is True:
+                    rule.shadow_depth = 'full'
+                else:
+                    rule.shadow_depth = ruleinfo.shadow_depth
             if ruleinfo.resources:
                 args, resources = ruleinfo.resources
                 if args:
@@ -618,6 +635,13 @@ class Workflow:
 
         return decorate
 
+    def shadow(self, shadow_depth):
+        def decorate(ruleinfo):
+            ruleinfo.shadow_depth = shadow_depth
+            return ruleinfo
+
+        return decorate
+
     def resources(self, *args, **resources):
         def decorate(ruleinfo):
             ruleinfo.resources = (args, resources)
@@ -679,6 +703,7 @@ class RuleInfo:
         self.message = None
         self.benchmark = None
         self.threads = None
+        self.shadow_depth = None
         self.resources = None
         self.priority = None
         self.version = None
diff --git a/tests/test_empty_include/Snakefile b/tests/test_empty_include/Snakefile
new file mode 100644
index 0000000..6930b16
--- /dev/null
+++ b/tests/test_empty_include/Snakefile
@@ -0,0 +1 @@
+include: "include.rules"
diff --git a/tests/test_empty_include/expected-results/.gitignore b/tests/test_empty_include/expected-results/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_empty_include/include.rules b/tests/test_empty_include/include.rules
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_nonstr_params/Snakefile b/tests/test_nonstr_params/Snakefile
new file mode 100644
index 0000000..3bb81b7
--- /dev/null
+++ b/tests/test_nonstr_params/Snakefile
@@ -0,0 +1,8 @@
+rule:
+    output:
+        "test.out"
+    params:
+        test=True
+    run:
+        assert params.test is True
+        shell("touch {output}")
diff --git a/tests/test_nonstr_params/expected-results/test.out b/tests/test_nonstr_params/expected-results/test.out
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_remote/S3Mocked.py b/tests/test_remote/S3Mocked.py
new file mode 100644
index 0000000..241869d
--- /dev/null
+++ b/tests/test_remote/S3Mocked.py
@@ -0,0 +1,125 @@
+__author__ = "Christopher Tomkins-Tinch"
+__copyright__ = "Copyright 2015, Christopher Tomkins-Tinch"
+__email__ = "tomkinsc at broadinstitute.org"
+__license__ = "MIT"
+
+# built-ins
+import os, sys
+from contextlib import contextmanager
+import pickle
+import time
+import threading
+import functools
+
+# intra-module
+from snakemake.remote.S3 import RemoteObject as S3RemoteObject, RemoteProvider as S3RemoteProvider
+from snakemake.remote.S3 import S3Helper
+from snakemake.decorators import dec_all_methods
+from snakemake.exceptions import WorkflowError
+
+try:
+    # third-party
+    import boto
+    from moto import mock_s3
+    import filechunkio
+except ImportError as e:
+    raise WorkflowError("The Python 3 packages 'moto', boto' and 'filechunkio' " + 
+        "need to be installed to use S3Mocked remote() file functionality. %s" % e.msg)
+
+def noop():
+    pass
+
+def pickled_moto_wrapper(func):
+    """
+        This is a class decorator that in turn decorates all methods within
+        a class to mock out boto calls with moto-simulated ones.
+        Since the moto backends are not presistent across calls by default, 
+        the wrapper also pickles the bucket state after each function call,
+        and restores it before execution. This way uploaded files are available
+        for follow-on tasks. Since snakemake may execute with multiple threads
+        it also waits for the pickled bucket state file to be available before
+        loading it in. This is a hackey alternative to using proper locks,
+        but works ok in practice.
+    """
+    def wrapper_func(self, *args, **kwargs):
+        moto_context_file = "motoState.p"
+
+        moto_context = mock_s3()
+        moto_context.start()
+
+        moto_context.backends["global"].reset = noop
+
+        # load moto buckets from pickle
+        if os.path.isfile(moto_context_file) and os.path.getsize(moto_context_file) > 0:
+            with file_lock(moto_context_file):
+                with open( moto_context_file, "rb" ) as f:
+                    moto_context.backends["global"].buckets = pickle.load( f )
+
+        mocked_function = moto_context(func)
+        retval = mocked_function(self, *args, **kwargs)
+
+        with file_lock(moto_context_file):
+            with open( moto_context_file, "wb" ) as f:
+                pickle.dump(moto_context.backends["global"].buckets, f)
+
+        moto_context.stop()
+
+        return retval
+    functools.update_wrapper(wrapper_func, func)
+    wrapper_func.__wrapped__ = func
+    return wrapper_func
+
+ at dec_all_methods(pickled_moto_wrapper, prefix=None)
+class RemoteProvider(S3RemoteProvider):
+    def __init__(self, *args, **kwargs):
+        super(RemoteProvider, self).__init__(*args, **kwargs)
+        
+ at dec_all_methods(pickled_moto_wrapper, prefix=None)
+class RemoteObject(S3RemoteObject):
+    """ 
+        This is a derivative of the S3 remote provider that mocks
+        out boto-based S3 calls using the "moto" Python package.
+        Only the initializer is different; it "uploads" the input 
+        test file to the moto-simulated bucket at the start.
+    """
+
+    def __init__(self, *args, keep_local=False, provider=None, **kwargs):
+        super(RemoteObject, self).__init__(*args, keep_local=keep_local, provider=provider, **kwargs)
+
+        bucket_name = 'test-remote-bucket'
+        test_file = "test.txt"
+
+        conn = boto.connect_s3()
+        if bucket_name not in [b.name for b in conn.get_all_buckets()]:
+            conn.create_bucket(bucket_name)
+
+        # "Upload" files that should be in S3 before tests...
+        s3c = S3Helper()
+        if not s3c.exists_in_bucket(bucket_name, test_file):
+            s3c.upload_to_s3(bucket_name, test_file)
+
+
+# ====== Helpers =====
+
+def touch(fname, mode=0o666, dir_fd=None, **kwargs):
+    # create lock file faster
+    # https://stackoverflow.com/a/1160227
+    flags = os.O_CREAT | os.O_APPEND
+    with os.fdopen(os.open(fname, flags=flags, mode=mode, dir_fd=dir_fd)) as f:
+        os.utime(f.fileno() if os.utime in os.supports_fd else fname,
+            dir_fd=None if os.supports_fd else dir_fd, **kwargs)
+
+ at contextmanager
+def file_lock(filepath):
+    lock_file = filepath + ".lock"
+
+    while os.path.isfile(lock_file):
+        time.sleep(2)
+
+    touch(lock_file)
+
+    try:
+        yield
+    finally:
+        if os.path.isfile(lock_file):
+            os.remove(lock_file)
diff --git a/tests/test_remote/Snakefile b/tests/test_remote/Snakefile
new file mode 100644
index 0000000..d24ef7d
--- /dev/null
+++ b/tests/test_remote/Snakefile
@@ -0,0 +1,86 @@
+#import re, os, sys
+
+from S3Mocked import RemoteProvider as S3RemoteProvider
+
+S3 = S3RemoteProvider()
+
+# remote dynamic file test
+# This makes use of a special provider that mocks up S3 using the moto
+# library so that boto calls hit local "buckets"
+rule all:
+    input:
+        # only keeping the file so we can copy it out to the cwd
+        S3.remote("test-remote-bucket/out.txt", keep_local=True)
+    run:
+        shell("mv test-remote-bucket/out.txt ./")
+
+rule split:
+    input: S3.remote('test-remote-bucket/test.txt')
+    output: S3.remote(dynamic('test-remote-bucket/prefix{split_id}.txt'))
+    run:
+        shell('split -l 2 {input} test-remote-bucket/prefix')
+        for f in os.listdir(os.getcwd()+"/test-remote-bucket"):
+            if re.search('prefix[a-z][a-z]', f):
+                os.rename("test-remote-bucket/"+f, "test-remote-bucket/"+f + '.txt')
+
+rule cut:
+    input: S3.remote('test-remote-bucket/prefix{split_id,[a-z][a-z]}.txt')
+    output: 
+        S3.remote('test-remote-bucket/{split_id}_cut.txt')
+    shell: 'cut -f 1,2 {input} > {output}'
+
+rule merge:
+    input: 
+        S3.remote(dynamic('test-remote-bucket/{split_id}_cut.txt'))
+    output: 
+        S3.remote('test-remote-bucket/out.txt'),
+    run: 
+        shell('echo {input}; cat {input} > {output}')
+
+# after we finish, we need to remove the pickle storing
+# the local moto "buckets" so we are starting fresh
+# next time this test is run. This file is created by
+# the moto wrapper defined in S3Mocked.py
+onsuccess:
+    shell("rm ./motoState.p")
+
+onerror:
+    shell("rm ./motoState.p")
+
+# or if you prefer to not instantiate a RemoteProvider object, and rely on the module
+# import S3Mocked as S3Mocked
+# # remote dynamic file test
+# # This makes use of a special provider that mocks up S3 using the moto
+# # library so that boto calls hit local "buckets"
+# rule all:
+#     input:
+#         # only keeping the file so we can copy it out to the cwd
+#         remote("test-remote-bucket/out.txt", keep_local=True, provider=S3Mocked, additional_kwargs={})
+#     run:
+#         shell("mv test-remote-bucket/out.txt ./")
+
+# rule split:
+#     input: remote('test-remote-bucket/test.txt', keep_local=False, provider=S3Mocked, additional_kwargs={})
+#     output: remote(dynamic('test-remote-bucket/prefix{split_id}.txt'), provider=S3Mocked, additional_kwargs={})
+#     run:
+#         shell('split -l 2 {input} test-remote-bucket/prefix')
+#         for f in os.listdir(os.getcwd()+"/test-remote-bucket"):
+#             if re.search('prefix[a-z][a-z]', f):
+#                 os.rename("test-remote-bucket/"+f, "test-remote-bucket/"+f + '.txt')
+
+# rule cut:
+#     input: remote('test-remote-bucket/prefix{split_id,[a-z][a-z]}.txt', provider=S3Mocked, additional_kwargs={})
+#     output: 
+#         remote('test-remote-bucket/{split_id}_cut.txt', provider=S3Mocked, additional_kwargs={})
+#     shell: 'cut -f 1,2 {input} > {output}'
+
+# rule merge:
+#     input: 
+#         remote(dynamic('test-remote-bucket/{split_id}_cut.txt'), provider=S3Mocked, additional_kwargs={})
+#     output: 
+#         remote('test-remote-bucket/out.txt', provider=S3Mocked, additional_kwargs={}),
+#     run: 
+#         shell('echo {input}; cat {input} > {output}')
+
+
+
diff --git a/tests/test_remote/__init__.py b/tests/test_remote/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_remote/expected-results/out.txt b/tests/test_remote/expected-results/out.txt
new file mode 100644
index 0000000..818b3c5
--- /dev/null
+++ b/tests/test_remote/expected-results/out.txt
@@ -0,0 +1,4 @@
+0       1       2
+0       1       2
+0       1       2
+0       1       2
diff --git a/tests/test_remote/test.txt b/tests/test_remote/test.txt
new file mode 100644
index 0000000..818b3c5
--- /dev/null
+++ b/tests/test_remote/test.txt
@@ -0,0 +1,4 @@
+0       1       2
+0       1       2
+0       1       2
+0       1       2
diff --git a/tests/test_script/Snakefile b/tests/test_script/Snakefile
new file mode 100644
index 0000000..8486300
--- /dev/null
+++ b/tests/test_script/Snakefile
@@ -0,0 +1,14 @@
+
+rule:
+    input:
+        "test.in"
+    output:
+        txt="test.out"
+    script:
+        "scripts/test.R"
+
+rule:
+    output:
+        "test.in"
+    script:
+        "scripts/test.py"
diff --git a/tests/test_script/expected-results/test.in b/tests/test_script/expected-results/test.in
new file mode 100644
index 0000000..b85905e
--- /dev/null
+++ b/tests/test_script/expected-results/test.in
@@ -0,0 +1 @@
+1 2 3
diff --git a/tests/test_script/expected-results/test.out b/tests/test_script/expected-results/test.out
new file mode 100644
index 0000000..b85905e
--- /dev/null
+++ b/tests/test_script/expected-results/test.out
@@ -0,0 +1 @@
+1 2 3
diff --git a/tests/test_script/scripts/test.R b/tests/test_script/scripts/test.R
new file mode 100644
index 0000000..1b146b0
--- /dev/null
+++ b/tests/test_script/scripts/test.R
@@ -0,0 +1,8 @@
+print(snakemake at wildcards)
+print(snakemake at threads)
+print(snakemake at log)
+print(snakemake at config)
+print(snakemake at params)
+
+values <- scan(snakemake at input[[1]])
+write(values, file = snakemake at output[["txt"]])
diff --git a/tests/test_script/scripts/test.py b/tests/test_script/scripts/test.py
new file mode 100644
index 0000000..f4df5e7
--- /dev/null
+++ b/tests/test_script/scripts/test.py
@@ -0,0 +1,2 @@
+with open(snakemake.output[0], "w") as out:
+    print(1, 2, 3, file=out)
diff --git a/tests/test_shadow/Snakefile b/tests/test_shadow/Snakefile
new file mode 100644
index 0000000..425f4a0
--- /dev/null
+++ b/tests/test_shadow/Snakefile
@@ -0,0 +1,26 @@
+rule all:
+    input: ["simple_shallow.out", "simple_full.out"]
+
+rule shallow:
+    input: "test.in"
+    output: "simple_shallow.out"
+    shadow: "shallow"
+    shell:
+        """
+        echo 1 > junk.out
+        cat {input} >> {output}
+        echo simple_shallow >> {output}
+        test ! -f more_junk.out
+        """
+
+rule full:
+    input: "test.in"
+    output: "simple_full.out"
+    shadow: "full"
+    shell:
+        """
+        echo 1 > more_junk.out
+        cat {input} > {output}
+        echo simple_full >> {output}
+        test ! -f junk.out
+        """
diff --git a/tests/test_shadow/expected-results/simple_full.out b/tests/test_shadow/expected-results/simple_full.out
new file mode 100644
index 0000000..2bbc727
--- /dev/null
+++ b/tests/test_shadow/expected-results/simple_full.out
@@ -0,0 +1,2 @@
+in
+simple_full
diff --git a/tests/test_shadow/expected-results/simple_shallow.out b/tests/test_shadow/expected-results/simple_shallow.out
new file mode 100644
index 0000000..3db70a5
--- /dev/null
+++ b/tests/test_shadow/expected-results/simple_shallow.out
@@ -0,0 +1,2 @@
+in
+simple_shallow
diff --git a/tests/test_shadow/test.in b/tests/test_shadow/test.in
new file mode 100644
index 0000000..4935e88
--- /dev/null
+++ b/tests/test_shadow/test.in
@@ -0,0 +1 @@
+in
diff --git a/tests/test_url_include/Snakefile b/tests/test_url_include/Snakefile
index cb72233..b2d23f1 100644
--- a/tests/test_url_include/Snakefile
+++ b/tests/test_url_include/Snakefile
@@ -1,6 +1,6 @@
 
 
-include: "https://bitbucket.org/johanneskoester/snakemake/raw/master/tests/test05/Snakefile"
+include: "https://bitbucket.org/snakemake/snakemake/raw/master/tests/test05/Snakefile"
 
 rule:
 	input: "test.predictions"
diff --git a/tests/tests.py b/tests/tests.py
index 37dd180..011ddc4 100644
--- a/tests/tests.py
+++ b/tests/tests.py
@@ -42,7 +42,7 @@ def run(path,
         needs_connection=False,
         snakefile="Snakefile",
         subpath=None,
-        check_md5=True, **params):
+        check_md5=True, cores=3, **params):
     """
     Test the Snakefile in path.
     There must be a Snakefile in the path and a subdirectory named
@@ -76,7 +76,7 @@ def run(path,
         call('cp `find {} -maxdepth 1 -type f` {}'.format(path, tmpdir),
              shell=True)
         success = snakemake(snakefile,
-                            cores=3,
+                            cores=cores,
                             workdir=tmpdir,
                             stats="stats.txt",
                             snakemakepath=SCRIPTPATH,
@@ -266,15 +266,45 @@ def test_yaml_config():
     run(dpath("test_yaml_config"))
 
 
+def test_remote():
+    try:
+        import moto
+        import boto
+        import filechunkio
+
+        # only run the remote file test if the dependencies
+        # are installed, otherwise do nothing
+        run(dpath("test_remote"), cores=1)
+    except ImportError:
+        pass
+
+
 def test_cluster_sync():
     run(dpath("test14"),
         snakefile="Snakefile.nonstandard",
         cluster_sync="./qsub")
 
+
 def test_symlink_temp():
     run(dpath("test_symlink_temp"), shouldfail=True)
 
 
+def test_empty_include():
+    run(dpath("test_empty_include"))
+
+
+def test_script():
+    run(dpath("test_script"))
+
+
+def test_shadow():
+    run(dpath("test_shadow"))
+
+
+def test_nonstr_params():
+    run(dpath("test_nonstr_params"))
+
+
 if __name__ == '__main__':
     import nose
     nose.run(defaultTest=__name__)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/snakemake.git



More information about the debian-med-commit mailing list