[med-svn] [python-pyflow] 01/02: New upstream version 1.1.13

Tue Nov 15 15:01:27 UTC 2016

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository python-pyflow.

commit 7e7a7d12449e83e7512a9165a5a9f3904721a4a5
Author: Andreas Tille <tille at debian.org>
Date:   Tue Nov 15 15:59:38 2016 +0100

    New upstream version 1.1.13
---
 .appveyor.yml                                      |   15 +
 .gitattributes                                     |    1 +
 .gitignore                                         |    3 +
 .travis.yml                                        |   44 +
 README.md                                          |   64 +
 pyflow/COPYRIGHT.txt                               |   28 +
 pyflow/README.md                                   |  189 +
 pyflow/demo/README.txt                             |   33 +
 pyflow/demo/bclToBwaBam/README.txt                 |   27 +
 pyflow/demo/bclToBwaBam/bwaworkflow.py             |  676 ++++
 pyflow/demo/bclToBwaBam/configBclToBwaBam.py       |  397 ++
 pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini   |    5 +
 pyflow/demo/bclToBwaBam/example_configuration.bash |   18 +
 pyflow/demo/cwdDemo/cwdDemo.py                     |   85 +
 pyflow/demo/envDemo/envDemo.py                     |   96 +
 pyflow/demo/helloWorld/README.txt                  |    3 +
 pyflow/demo/helloWorld/helloWorld.py               |   77 +
 pyflow/demo/makeDemo/.hidden                       |    7 +
 pyflow/demo/makeDemo/makeDemo.py                   |   87 +
 pyflow/demo/memoryDemo/memoryDemo.py               |   83 +
 pyflow/demo/mutexDemo/mutexDemo.py                 |   89 +
 pyflow/demo/mutexDemo/testtasks/sleeper.bash       |   13 +
 pyflow/demo/retryDemo/retryDemo.py                 |   90 +
 pyflow/demo/runOptionsDemo/getDemoRunOptions.py    |  133 +
 pyflow/demo/runOptionsDemo/runOptionsDemo.py       |  109 +
 pyflow/demo/runOptionsDemo/testtasks/sleeper.bash  |   13 +
 pyflow/demo/runOptionsDemo/testtasks/yeller.bash   |   16 +
 pyflow/demo/simpleDemo/simpleDemo.py               |  177 +
 pyflow/demo/simpleDemo/testtasks/runner.bash       |   17 +
 pyflow/demo/simpleDemo/testtasks/runner.c          |   16 +
 pyflow/demo/simpleDemo/testtasks/sleeper.bash      |   13 +
 pyflow/demo/simpleDemo/testtasks/yeller.bash       |   16 +
 pyflow/demo/subWorkflow/subWorkflow.py             |  115 +
 pyflow/demo/subWorkflow/testtasks/runner.bash      |   17 +
 pyflow/demo/subWorkflow/testtasks/runner.c         |   16 +
 pyflow/demo/subWorkflow/testtasks/sleeper.bash     |   13 +
 pyflow/demo/subWorkflow/testtasks/yeller.bash      |   16 +
 pyflow/demo/successMsgDemo/successMsgDemo.py       |   81 +
 pyflow/doc/ChangeLog.txt                           |  202 +
 pyflow/doc/README.txt                              |    4 +
 pyflow/doc/client_api/README                       |   12 +
 .../make_WorkflowRunner_API_html_doc.bash          |    6 +
 .../make_WorkflowRunner_API_simple_doc.py          |   13 +
 pyflow/doc/developer/README                        |    1 +
 .../developer/make_pyflow_developer_html_doc.bash  |    6 +
 pyflow/setup.py                                    |   11 +
 pyflow/src/__init__.py                             |    1 +
 pyflow/src/pyflow.py                               | 4175 ++++++++++++++++++++
 pyflow/src/pyflowConfig.py                         |  213 +
 pyflow/src/pyflowTaskWrapper.py                    |  338 ++
 scratch/README.txt                                 |   18 +
 scratch/delete_trailing_wspace.bash                |   33 +
 scratch/make_release_tarball.bash                  |   65 +
 scratch/notes/design.notes                         |   74 +
 scratch/notes/todo                                 |   53 +
 scratch/pybox/email_test.py                        |   29 +
 scratch/pybox/hijack.py                            |   25 +
 scratch/pybox/inspect.py                           |    7 +
 scratch/pybox/memTest.py                           |   46 +
 scratch/test/README.md                             |   30 +
 scratch/test/pyflow_basic_feature_runner.py        |  116 +
 scratch/test/pyflow_unit_tests.py                  |  430 ++
 scratch/test/test_pyflow.py                        |   63 +
 scratch/test/test_release_tarball.bash             |   50 +
 scratch/test/testtasks/runner.bash                 |   14 +
 scratch/test/testtasks/runner.c                    |   16 +
 scratch/test/testtasks/sleeper.bash                |   13 +
 scratch/test/testtasks/slow_yeller.py              |   24 +
 scratch/test/testtasks/yeller.bash                 |   16 +
 69 files changed, 9002 insertions(+)

diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 0000000..007af0e
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,15 @@
+
+install:
+  # Check the python version: 
+  - "python.exe --version"
+
+build: false  # Not a C# project
+
+test_script:
+  # Build the compiled extension and run the project tests
+  - "python.exe scratch/test/test_pyflow.py"
+
+notifications:
+  - provider: Email
+    to:
+      - csaunders at illumina.com
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..3d3fd16
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+pyflow/README.txt export-subst
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8ede833
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+*~
+pyflow.data
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..bec5286
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,44 @@
+language: python
+
+# Note PYVER drives hack to use python 2.4, this is
+# actually pretty ugly on travis -- process is:
+# 1) install # python2.4 from deadsnakes ppa
+# 2) shove 2.4 in /usr/bin/python
+# 3) set PATH back to /usr/bin
+#
+# This removes the system python link which is probably not
+# smart, but the test works so leaving it for now.
+# 
+matrix: 
+  include: 
+    - os: linux 
+      sudo: required 
+      python: "2.7"
+    - os: linux 
+      sudo: required 
+      python: "2.7"
+      env: PYVER="2.4"
+
+before_install:
+  - date -u
+  - uname -a
+  - lsb_release -a
+  - if [ "$PYVER" == "2.4" ]; then sudo add-apt-repository -y ppa:fkrull/deadsnakes && sudo apt-get update -qq; fi
+
+install:
+  - if [ "$PYVER" == "2.4" ]; then sudo apt-get install python2.4 -y && python2.4 -V; fi
+  - if [ "$PYVER" == "2.4" ]; then sudo rm -f /usr/bin/python && sudo ln -s /usr/bin/python2.4 /usr/bin/python; fi
+  - if [ "$PYVER" == "2.4" ]; then export PATH=/usr/bin:$PATH; fi
+  - python -V
+
+script:
+  - cd scratch/test && bash ./test_release_tarball.bash -nosge 
+
+branches:
+  only:
+    - master
+
+notifications:
+  email:
+    recipients:
+      - csaunders at illumina.com
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..33102f7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,64 @@
+pyFlow - a lightweight parallel task engine
+===========================================
+
+[![Build Status] [tcistatus]] [tcihome]
+[![Build status] [acistatus]] [acihome]
+
+
+pyFlow is a tool to manage tasks in the context of a task dependency
+graph. It has some similarities to make. pyFlow is not a program – it
+is a python module, and workflows are defined using pyFlow by writing
+regular python code with the pyFlow API
+
+For more information, please see the [pyFlow website] [site].
+
+[site]:http://illumina.github.io/pyflow/
+
+[tcistatus]:https://travis-ci.org/Illumina/pyflow.svg?branch=master
+[tcihome]:https://travis-ci.org/Illumina/pyflow
+
+[acistatus]:https://ci.appveyor.com/api/projects/status/fkovw5ife59ae48t/branch/master?svg=true
+[acihome]:https://ci.appveyor.com/project/ctsa/pyflow/branch/master
+
+
+License
+-------
+
+pyFlow source code is provided under the [BSD 2-Clause License]
+(pyflow/COPYRIGHT.txt).
+
+
+Releases
+--------
+
+Recent release tarballs can be found on the github release list here:
+
+https://github.com/Illumina/pyflow/releases
+
+To create a release tarball corresponding to any other version, run:
+
+    git clone git://github.com/Illumina/pyflow.git pyflow
+    cd pyflow
+    git checkout ${VERSION}
+    ./scratch/make_release_tarball.bash
+    # tarball is "./pyflow-${VERSION}.tar.gz"
+
+Note this README is at the root of the pyflow development repository
+and is not part of the python source release.
+
+
+Contents
+--------
+
+For the development repository (this directory), the sub-directories are:
+
+pyflow/
+
+Contains all pyflow code intended for distribution, plus demo code and
+documentation.
+
+scratch/
+
+This directory contains support scripts for tests/cleanup/release
+tarballing.. etc.
+
diff --git a/pyflow/COPYRIGHT.txt b/pyflow/COPYRIGHT.txt
new file mode 100644
index 0000000..984089a
--- /dev/null
+++ b/pyflow/COPYRIGHT.txt
@@ -0,0 +1,28 @@
+pyFlow - a lightweight parallel task engine
+
+Copyright (c) 2012-2015 Illumina, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the
+   distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/pyflow/README.md b/pyflow/README.md
new file mode 100644
index 0000000..4ea7759
--- /dev/null
+++ b/pyflow/README.md
@@ -0,0 +1,189 @@
+
+pyFlow - a lightweight parallel task engine 
+===========================================
+
+Chris Saunders (csaunders at illumina.com)  
+Version: ${VERSION}
+
+
+SUMMARY:
+--------
+
+pyFlow manages running tasks in the context of a task dependency
+graph. It has some similarities to make. pyFlow is not a program -- it
+is a python module, and workflows are defined using pyFlow by writing
+regular python code with the pyFlow API.
+
+FEATURES:
+---------
+
+- Define workflows as python code
+- Run workflows on localhost or sge
+- Continue workflows which have partially completed
+- Task resource management: Specify number of threads and memory
+  required for each task.
+- Recursive workflow specification: take any existing pyFlow object and
+  use it as a task in another pyFlow.
+- Dynamic workflow specification: define a wait on task specification rather
+  than just tasks, so that tasks can be defined based on the results of 
+  upstream tasks (note: recursive workflows are an even better way to do this)
+- Detects and reports all failed tasks with consistent workflow-level logging.
+- Task-level logging: All task stderr is logged and decorated,
+  eg. [time][host][workflow_run][taskid]
+- Task timing: Task wrapper function provides wall time for every task
+- Task priority: Tasks which are simultanously eligible to run can be assigned
+relative priorities to be run or queued first. 
+- Task mutex sets: define sets of tasks which access an exclusive resource
+- Email notification on job completion/error/exception
+- Provide ongoing task summary report at specified intervals
+- Output task graph in dot format
+
+LICENSE:
+--------
+
+pyFlow source code is provided under the [BSD 2-Clause License] (COPYRIGHT.txt).
+
+INSTALL:
+--------
+
+pyFlow can be installed and used on python versions in the 2.4 to
+2.7 series
+
+The pyflow module can be installed using standard python distutils
+intallation. To do so unpack the tarball and use the setup script
+as follows:
+
+```
+tar -xzf pyflow-X.Y.Z.tar.gz
+cd pyflow-X.Y.Z
+python setup.py build install
+```
+
+If installation in not convenient, you can simply add the pyflow 
+src/ directory to the system search path. For instance:
+
+usepyflow.py:
+```
+import sys
+sys.path.append("/path/to/pyflow/src")
+
+from pyflow import WorkflowRunner
+```
+
+
+
+WRITING WORKFLOWS:
+------------------
+
+Briefly, pyFlow workflows are written by creating a new class which
+inherits from pyflow.WorkflowRunner. This class then defines its
+workflow by overloading the WorkflowRunner.workflow()
+method. Workflows are run by instantiating a workflow class and
+calling the WorkflowRunner.run() method.
+
+A very simple demonstration of the minimal workflow setup and run
+described above is available in the directory: `${pyflowDir}/demo/helloWorld/`
+
+Several other demonstration workflows are available:
+`${pyflowDir}/demo/simpleDemo` – a basic feature sandbox
+`${pyflowDir}/demo/subWorkflow` – shows how recursive workflow invocation works
+
+The developer documentation for the pyflow API can be generated by running
+`${pyflowDir}/doc/getApiDoc.py` or `python ${pyflowDir}/src/pydoc.py`
+
+An advanced proof-of-concept demonstration of bclToBam conversion
+is also available in `${pyflowDir}/demo/bclToBwaBam`
+
+
+
+USING WORKFLOWS:
+----------------
+
+When running a pyFlow workflow, all logs and state information are
+written into a single "pyflow.data" directory. The root of this
+directory is specified in the workflow.run() call.
+
+### Logging:
+
+pyFlow creates a primary workflow-level log, and 2 log files to
+capture all task stdout and stderr, respectively.
+
+Workflow-level log information is copied to both stderr and
+pyflow.data/logs/pyflow_log.txt.  All workflow log messages are
+prefixed with "[time] [hosname] [workflow_run] [component] ". Where:
+
+- 'time' is UTC in ISO 8601 format.
+- 'workflow_run' is an id that's weakly unique for each run of the workflow. It 
+is composed of (1) the run() PID and (2) the number of times run() has been called on
+the workflow by the same process. These two values are joined by an underscore
+- 'component' - the name of the pyflow thread, the primary threads are
+  'WorkflowManager' which runs the worklow() method, and 'TaskManager' which
+  polls the task graph and launches jobs. 
+
+In the task logs, only the stderr stream is decorated. The prefix in
+this case is: "[time] [hostname] [workflow_run] [taskname] ". The
+'taskname" is usually the label provided for each task in its
+addTask() call. All tasks are launched by a task wrapping function,
+and any messages from the taskWrapper (as opposed to the task command
+itself) will use an extended taskname:
+"pyflowTaskWrapper:${tasklabel}". One example where the task wrapper
+writes to the log is to report the total runtime for its task.
+
+All logging is append only -- pyFlow does not overwrite logs even over
+multiple runs. The workflow_run id can be used to select out the 
+information from a specific run if restarting/continuing a run 
+multiple times.
+
+### State:
+
+pyFlow continues jobs by marking their status in a file, *not* by
+looking for the presence of file targets. This is a major difference
+from make and must be kept in mind when restarting interrupted
+workflows.
+
+The runstate of each task is in
+pyflow.data/state/pyflow_tasks_runstate.txt, the description of each
+task is in pyflow.data/state/pyflow_tasks_info.txt. At the beginning of
+each run any existing task files are backed up in
+pyflow.data/state/backup.
+ 
+### Other:
+
+#### Email notification:
+
+When running a workflow with one or more email addresses given in the
+mailTo argument, pyflow will attempt to send a notification describing the
+outcome of the run under any circumstance short of host hardware failure.
+The email should result from 1 of 3 outcomes: (1) successful run completion
+(2) the first unrecoverable task failure, with a description of the error
+(3) an unhandled software exception. Mail comes by default from
+"pyflow-bot at YOURDOMAIN" (configurable). Note that (1) you may 
+have to change the email address from the automatically detected domain to
+to recieve emails, and (2) you may need to check your junk-mail
+filter to recieve notifications. It is best to configure one of the demo
+scripts to email you on a new machine to test out any issues before starting
+a production run.
+
+#### Graph output:
+
+pyFlow provides a script which can be used to produce a graph of the current
+task dependencies, where each node colored by the task status. The graph
+generation script is automatically created for each run in the pyflow state
+directory here:
+
+pyflow.data/state/make_pyflow_task_graph.py
+
+This script can be run without arguments to produce the current task graph in
+dot format based on the data files in the pyflow.data/state/ directory.
+
+#### Site configuration:
+
+The file ${pyflowDir}/src/pyflowConfig.py contains any pyflow variables or
+functions which would be likely to need configuration at a new site. This
+currently incudes:
+
+- from: email address from pyflow
+- default memory per task
+- default memory available per thread in localhost mode
+- qsub arguments given in response to a resource request.
+
diff --git a/pyflow/demo/README.txt b/pyflow/demo/README.txt
new file mode 100644
index 0000000..5db7f7e
--- /dev/null
+++ b/pyflow/demo/README.txt
@@ -0,0 +1,33 @@
+
+This directory contains small demonstration workflows for various
+pyflow features. If you are new to pyflow, a recommended order to
+become familiar with its features is:
+
+1. helloWorld
+This demonstrates a minimum single-task pyflow workflow.
+
+2. simpleDemo
+This workflow demonstrates a number of commonly used pyflow features
+by setting up a number of tasks and showing different ways to specify
+task resource requirements and dependencies.
+
+3. subWorkflow 
+This workflow demonstrates the more advanced workflow recursion feature.
+
+4. runOptionsDemo
+This workflow demostrates one possible way the pyflow API runtime
+options could be translated to user command-line arguments if building
+a command-line utility.
+
+5. bclToBwaBam
+This workflow demonstrates a much larger 'real-world' script which
+performs bcl to fasta conversion from mulitple flowcells, alignment
+with BWA and translation of the BWA output to a single sorted and
+indexed BAM file. It has numerous dependencies required to actually
+run -- it's primary purpose here is to provide an example of how a
+larger scale pyflow workflow might look.
+
+
+Most of the remaining workflows demonstrate/test the use of specific
+pyflow features.
+
diff --git a/pyflow/demo/bclToBwaBam/README.txt b/pyflow/demo/bclToBwaBam/README.txt
new file mode 100644
index 0000000..d034b84
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/README.txt
@@ -0,0 +1,27 @@
+
+This demo shows the use of pyflow on a production-scale problem.
+
+The "configBclToBwaBam.py" script here will take one or more bcl
+basecalls directories, run them through CASAVA 1.8 bcl conversion and
+align/sort/merge/markdup each sample into a single BAM file. A list of
+sample names may be given to restrict the analysis post bcl
+conversion.
+
+Help for the configuration script is available by typing
+"./configBclToBwaBam.py -h". To run, the script requires at minimum a
+bcl basecalls directory and a BWA index genome fasta file.
+
+This directory contains a configuration file
+"configBclToBwaBam.py.ini" which contains paths for bwa, samtools,
+Picard and CASAVA. You may need to change these to reflect the
+installed location at your site before running
+
+If on the sd-isilon, the file "example_configuration.bash" will call
+"configBclToBwaBam.py" with a pointer to a subsampled bcl directory to
+quickly demonstate the use of this script on real data.
+
+Note that once all arguments are provided and the configuration script
+completes, a run script will be generated in the output directory
+which can be used to actually execute the workflow, allowing for
+local/sge and total job limit specification.
+
diff --git a/pyflow/demo/bclToBwaBam/bwaworkflow.py b/pyflow/demo/bclToBwaBam/bwaworkflow.py
new file mode 100644
index 0000000..42bff5e
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/bwaworkflow.py
@@ -0,0 +1,676 @@
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+"""
+bwaworkflow -- a pyflow demonstration module
+
+This is a quick-and-dirty BCL to BWA BAM workflow to demonstrate
+how pyflow could be used on a production-scale problem.
+
+__author__ = "Christopher Saunders"
+"""
+
+
+import os.path
+import sys
+
+# In production, pyflow can either be installed, or we can distribute
+# workflow to external users with pyflow in the same directory/fixed
+# relative directory or a configured directory macro-ed in by cmake,
+# etc
+#
+# For now we add the module path by hand:
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# utility methods:
+#
+
+def ensureDir(d):
+    """
+    make directory if it doesn't already exist, raise exception is something else is in the way:
+    """
+    if os.path.exists(d):
+        if not os.path.isdir(d) :
+            raise Exception("Can't create directory: %s" % (d))
+    else :
+        os.makedirs(d)
+
+
+def skipJoin(sep, a, b) :
+    if a == "" : return b
+    elif b == "" : return a
+    return a + sep + b
+
+
+def preJoin(a, b) :
+    return skipJoin('_', a, b)
+
+
+#
+# All of these "flow" functions take a set of task dependencies as
+# input and report a set of tasks on output, and thus are designed to
+# be plugged together to create workflows which are initiated in
+# the WorkflowRunner.workflow() method.
+#
+# Note that this style is not a design constraint of pyflow, it was
+# just one natural way to write the bwa workflow, and demonstrates an
+# extensible model wherein various flow functions could be stored in
+# external modules and combined as required.
+#
+# Note that these flow functions are written to lookup dependencies
+# from the WorkflowRunner class, so they are really class
+# methods. Although they could also lookup data from the derived BWA
+# class, they don't -- this allows them to be reused by other
+# WorkflowRunner classes.
+#
+
+
+
+def casava18BclToFastqFlow(self, taskPrefix="", dependencies=set()) :
+    """
+    CASAVA 1.8 bcl to fastq conversion
+
+    This assumes the bclBasecallsDir is generated in a CASAVA 1.8
+    compatible format, and uses CASAVA 1.8 to convert to fastq
+
+    This demonstrates pyflow's makefile handling option, where
+    you specify a makefile directory instead of a regular command, and
+    pyflow runs make/qmake according to the run mode.
+
+    params:
+    casavaDir
+    bclBasecallsDir
+    flowcellFastqDir
+    bclTilePattern
+    bclToFastqMaxCores
+    """
+
+    # configure bcl2fastq makefile:
+    configBclToFastqCmd = "perl %s/bin/configureBclToFastq.pl" % (self.params.casavaDir)
+    configBclToFastqCmd += " --input-dir=%s" % self.params.bclBasecallsDir
+    configBclToFastqCmd += " --output-dir=%s" % self.params.flowcellFastqDir
+    configBclToFastqCmd += " --force"  # always a good idea for CASAVA
+    configBclToFastqCmd += " --ignore-missing-bcl"
+    configBclToFastqCmd += " --ignore-missing-stats"
+    configBclToFastqCmd += " --ignore-missing-control"
+    if self.params.bclTilePattern != None :
+        configBclToFastqCmd += " --tiles=%s" % (self.params.bclTilePattern)
+
+    # run configuration:
+    configLabel = self.addTask(preJoin(taskPrefix, "configBclToFastq"), configBclToFastqCmd, isForceLocal=True, dependencies=dependencies)
+
+    # for the bcl to fastq step, we use another workflow manager, so
+    # we just run it as one huge task and handle the mode ourselves:
+    nCores = self.getNCores()
+    mode = self.getRunMode()
+
+    maxCores = self.params.bclToFastqMaxCores
+    if (nCores == "unlimited") or (nCores > maxCores) :
+        nCores = maxCores
+
+    # run the fastq conversion:
+    bclToFastqLabel = self.addTask(preJoin(taskPrefix, "bclToFastq"),
+                                 self.params.flowcellFastqDir,
+                                 nCores=nCores,
+                                 dependencies=configLabel,
+                                 isCommandMakePath=True)
+
+    return set([bclToFastqLabel])
+
+
+
+class FastqPairToBwaBamFlow(WorkflowRunner) :
+    """
+    Given a read1 and read2 pair of fastq files, create an aligned and
+    sorted bamFile.  Don't delete input fastaq files.
+    """
+
+    def __init__(self, params, suggestedAlignThreadCount=2) :
+        """
+        suggestedAlignThreadCount -- Number of threads to use in bwa aln
+                                     step. The workflow will lower this if
+                                     it exceeds the total number of cores
+                                     available in the run, or if it
+                                     exceeds alnMaxCores
+
+        params:
+        fastq1File
+        fastq2File
+        bamFile
+        alnMaxCores
+        bwaBin
+        genomeFasta
+        samtoolsBin
+        samtoolsSortMemPerCore
+        isKeepFastq
+        """
+        self.params = params
+        self.suggestedAlignThreadCount = suggestedAlignThreadCount
+
+
+    def workflow(self) :
+        bamDir = os.path.dirname(self.params.bamFile)
+        ensureDir(bamDir)
+
+        (bamPrefix, bamExt) = os.path.splitext(self.params.bamFile)
+
+        # must end in ".bam" for samtools
+        if bamExt != ".bam" :
+            raise Exception("bamFile argument must end in '.bam'. bamFile is: %s" % (bamFile))
+        if bamPrefix == "" :
+            raise Exception("bamFile argument must have a prefix before the '.bam' extension.")
+
+        # assuming many fastq pairs are running, good total throughput given cluster nodes with 2G of ram each
+        # should be achieved by given the align processes 2 threads each:
+
+        # grab total cores to make sure we don't exceed it:
+        totalCores = self.getNCores()
+
+        #
+        # setup aln step:
+        #
+
+        # set alnCores
+        alnCores = int(self.suggestedAlignThreadCount)
+        if (totalCores != "unlimited") and (alnCores > totalCores) :
+            alnCores = int(totalCores)
+        if (alnCores > self.params.alnMaxCores) :
+            alnCores = int(self.params.alnMaxCores)
+
+        bwaBaseCmd = "%s aln -t %i %s" % (self.params.bwaBin, alnCores, self.params.genomeFasta)
+
+        peDependencies = set()
+
+        def getReadLabel(i) : return "Read%iBwaAlign" % (i)
+        def getReadSaiFile(i) : return "%s.read%i.sai" % (self.params.bamFile, i)
+        def getReadFastqFile(i) : return (self.params.fastq1File, self.params.fastq2File)[i - 1]
+
+        for read in (1, 2) :
+            readAlnCmd = "%s %s >| %s" % (bwaBaseCmd, getReadFastqFile(read), getReadSaiFile(read))
+            peDependencies.add(self.addTask(getReadLabel(read), readAlnCmd, nCores=alnCores))
+
+        #
+        # setup sampe step:
+        #
+
+        # with all the pipes, the sampe step is probably a 2 core? this lets sort use more mem too:
+        peCores = 2
+        if (totalCores != "unlimited") and (peCores > totalCores) :
+            peCores = int(totalCores)
+
+        peCmd = "%s sampe %s %s %s %s %s" % (self.params.bwaBin, self.params.genomeFasta,
+                                            getReadSaiFile(1), getReadSaiFile(2),
+                                            getReadFastqFile(1), getReadFastqFile(2))
+
+        peCmd += " | %s view -uS -" % (self.params.samtoolsBin)
+
+        # For a real pipeline, we'd probably prefer Picard sort, but I don't want to add another
+        # dependency to the trial workflow:
+        #
+        peCmd += " | %s sort -m %i - %s" % (self.params.samtoolsBin,
+                                          self.params.samtoolsSortMemPerCore,  # *peCores, need to leave memory for bwa...
+                                          bamPrefix)
+
+        peTaskLabel = self.addTask("BwaSamPESort", peCmd, nCores=peCores, dependencies=peDependencies)
+
+        # delete sai files:
+        rmCmd = "rm -f"
+        for read in (1, 2) :
+            rmCmd += " %s" % (getReadSaiFile(read))
+        self.addTask("RmSai", rmCmd, dependencies=peTaskLabel, isForceLocal=True)
+
+
+        # optionally delete input fastqs:
+        if not self.params.isKeepFastq :
+            fastqRmCmd = "rm -f"
+            for read in (1, 2) :
+                fastqRmCmd += " %s" % (getReadFastqFile(read))
+            self.addTask("RmFastq", fastqRmCmd, dependencies=peTaskLabel, isForceLocal=True)
+
+
+
+
+class FileDigger(object) :
+    """
+    Digs into a well-defined directory structure with prefixed
+    folder names to extract all files associated with
+    combinations of directory names.
+
+    This is written primarily to go through the CASAVA 1.8 output
+    structure.
+
+    #casava 1.8 fastq example:
+    fqDigger=FileDigger('.fastq.gz',['Project_','Sample_'])
+    """
+
+    def __init__(self, targetExtension, prefixList) :
+        self.targetExtension = targetExtension
+        self.prefixList = prefixList
+
+    def getNextFile(self, dir, depth=0, ans=tuple()) :
+        """
+        generator of a tuple: (flowcell,project,sample,bamfile)
+        given a multi-flowcell directory
+        """
+        if depth < len(self.prefixList) :
+            for d in os.listdir(dir) :
+                nextDir = os.path.join(dir, d)
+                if not os.path.isdir(nextDir) : continue
+                if not d.startswith(self.prefixList[depth]) : continue
+                value = d[len(self.prefixList[depth]):]
+                for val in self.getNextFile(nextDir, depth + 1, ans + tuple([value])) :
+                    yield val
+        else:
+            for f in os.listdir(dir) :
+                file = os.path.join(dir, f)
+                if not os.path.isfile(file) : continue
+                if not f.endswith(self.targetExtension) : continue
+                yield ans + tuple([file])
+
+
+
+def flowcellDirFastqToBwaBamFlow(self, taskPrefix="", dependencies=set()) :
+    """
+    Takes as input 'flowcellFastqDir' pointing to the CASAVA 1.8 flowcell
+    project/sample fastq directory structure. For each project/sample,
+    the fastqs are aligned using BWA, sorted and merged into a single
+    BAM file. The bam output is placed in a parallel project/sample
+    directory structure below 'flowcellBamDir'
+
+    params:
+    samtoolsBin
+    flowcellFastqDir
+    flowcellBamDir
+
+    calls:
+    FastqPairToBwaBamFlow
+        supplies:
+        bamFile
+        fastq1File
+        fastq2File
+    """
+
+    #
+    # 1. separate fastqs into matching pairs:
+    #
+    fqs = {}
+    fqDigger = FileDigger(".fastq.gz", ["Project_", "Sample_"])
+    for (project, sample, fqPath) in fqDigger.getNextFile(self.params.flowcellFastqDir) :
+        if (self.params.sampleNameList != None) and \
+           (len(self.params.sampleNameList) != 0) and \
+           (sample not in self.params.sampleNameList) : continue
+
+        fqFile = os.path.basename(fqPath)
+        w = (fqFile.split(".")[0]).split("_")
+        if len(w) != 5 :
+            raise Exception("Unexpected fastq filename format: '%s'" % (fqPath))
+
+        (sample2, index, lane, read, num) = w
+        if sample != sample2 :
+            raise Exception("Fastq name sample disagrees with directory sample: '%s;" % (fqPath))
+
+        key = (project, sample, index, lane, num)
+        if key not in fqs : fqs[key] = [None, None]
+
+        readNo = int(read[1])
+        if fqs[key][readNo - 1] != None :
+            raise Exceptoin("Unresolvable repeated fastq file pattern in sample: '%s'" % (fqPath))
+        fqs[key][readNo - 1] = fqPath
+
+
+    ensureDir(self.params.flowcellBamDir)
+
+    #
+    # 2. run all fastq pairs through BWA:
+    #
+    nextWait = set()
+
+    for key in fqs.keys() :
+        (project, sample, index, lane, num) = key
+        sampleBamDir = os.path.join(self.params.flowcellBamDir, "Project_" + project, "Sample_" + sample)
+        ensureDir(sampleBamDir)
+        keytag = "_".join(key)
+        self.params.bamFile = os.path.join(sampleBamDir, keytag + ".bam")
+        self.params.fastq1File = fqs[key][0]
+        self.params.fastq2File = fqs[key][1]
+        nextWait.add(self.addWorkflowTask(preJoin(taskPrefix, keytag), FastqPairToBwaBamFlow(self.params), dependencies=dependencies))
+
+    return nextWait
+
+
+
+class FlowcellDirFastqToBwaBamFlow(WorkflowRunner) :
+    """
+    Takes as input 'flowcellFastqDir' pointing to the CASAVA 1.8 flowcell
+    project/sample fastq directory structure. For each project/sample,
+    the fastqs are aligned using BWA, sorted and merged into a single
+    BAM file. The bam output is placed in a parallel project/sample
+    directory structure below 'flowcellBamDir'
+
+    params:
+    flowcellFastqDir
+    flowcellBamDir
+    """
+
+    def __init__(self, params) :
+        self.params = params
+
+    def workflow(self) :
+        flowcellDirFastqToBwaBamFlow(self)
+
+
+
+
+
+# use a really boring flowcell label everywhere right now:
+def getFlowcellLabel(self, i) :
+    return "Flowcell_FC%i" % (i)
+
+
+
+
+def casava18BclToBamListFlow(self, taskPrefix="", dependencies=set()) :
+    """
+    Runs bcl conversion and alignment on multiple flowcells for a subset of samples.
+    Writes BAM files to parallel fastq Project/Sample directory structure. Does not
+    merge individual BAMs. Deletes fastqs on alignment when option is set to do so.
+
+    params:
+    allFlowcellDir
+    bclBasecallsDirList
+    bclTilePatternList
+
+    calls:
+    casava18BclToFastqFlow
+        supplies:
+        bclBasecallsDir
+        flowcellFastqDir
+    FlowcellDirFastqToBwaBamFlow
+        supplies:
+        flowcellFastqDir
+        flowcellBamDir
+
+    """
+
+    ensureDir(self.params.allFlowcellDir)
+
+    # first bcl->fastq->bwa bam for requested samples in all flowcells:
+    nextWait = set()
+    for i, self.params.bclBasecallsDir in enumerate(self.params.bclBasecallsDirList) :
+        flowcellLabel = getFlowcellLabel(self, i)
+        flowcellDir = os.path.join(self.params.allFlowcellDir, flowcellLabel)
+
+        ensureDir(flowcellDir)
+
+        self.params.flowcellFastqDir = os.path.join(flowcellDir, "fastq")
+        self.params.flowcellBamDir = os.path.join(flowcellDir, "bam")
+        if self.params.bclTilePatternList == None :
+            self.params.bclTilePattern = None
+        else :
+            self.params.bclTilePattern = self.params.bclTilePatternList[i]
+
+        fastqFinal = casava18BclToFastqFlow(self, taskPrefix=flowcellLabel)
+
+        label = preJoin(taskPrefix, "_".join((flowcellLabel, "FastqToBwaBam")))
+        nextWait.add(self.addWorkflowTask(label, FlowcellDirFastqToBwaBamFlow(self.params), dependencies=fastqFinal))
+
+    return nextWait
+
+
+
+
+def mergeBamListFlow(self, taskPrefix="", dependencies=set()) :
+    """
+    Take a list of sorted bam files from the same sample, merge them together,
+    and delete input bams, final output to mergeBamName
+
+    params:
+    mergeBamList
+    mergeBamName
+    samtoolsBin
+    """
+
+    for bamFile in self.params.mergeBamList :
+        if not os.path.isfile(bamFile) :
+            raise Exception("Can't find bam file: '%s'" % (bamFile))
+
+    mergeTasks = set()
+    mergeLabel = preJoin(taskPrefix, "merge")
+    if len(self.params.mergeBamList) > 1 :
+        mergeCmd = "%s merge -f %s %s" % (self.params.samtoolsBin, self.params.mergeBamName, " ".join(self.params.mergeBamList))
+        mergeTasks.add(self.addTask(mergeLabel, mergeCmd, dependencies=dependencies, isTaskStable=False))
+
+        rmCmd = "rm -f"
+        for bamFile in self.params.mergeBamList :
+            rmCmd += " %s" % (bamFile)
+
+        self.addTask(preJoin(taskPrefix, "rmBam"), rmCmd, dependencies=mergeLabel, isForceLocal=True)
+    elif len(self.params.mergeBamList) == 1 :
+        mvCmd = "mv %s %s" % (self.params.mergeBamList[0], self.params.mergeBamName)
+        # *must* have same taskLabel as merge command for continuation
+        # to work correctly because of the potential for partial
+        # deletion of the input bam files:
+        mergeTasks.add(self.addTask(mergeLabel, mvCmd, dependencies=dependencies, isForceLocal=True, isTaskStable=False))
+
+    return mergeTasks
+
+
+
+def flowcellBamListMergeFlow(self, taskPrefix="", dependencies=set()) :
+    """
+    given a root flowcell directory and list of samples, merge sample
+    bams across flowcells and dedup.
+
+    ?? Will we be in a situation where sample has more than one library
+    -- this affects the debup order & logic ??
+
+    params:
+    allFlowcellDir
+    mergedDir
+    sampleNameList
+    picardDir
+
+    calls:
+    mergeBamListFlow
+        supplies:
+        mergeBamList
+        mergeBamName
+    """
+
+    #
+    # 1) get a list of bams associated with each project/sample combination:
+    #
+
+    # TODO: what if there's an NFS delay updating all the bams while
+    # we're reading them out here? make this process more robust -- we
+    # should know how many BAM's we're expecting, in a way that's
+    # robust to interuption/restart
+    #
+    bams = {}
+    bamDigger = FileDigger(".bam", ["Flowcell_", "bam", "Project_", "Sample_"])
+    for (flowcell, nothing, project, sample, bamFile) in bamDigger.getNextFile(self.params.allFlowcellDir) :
+        if (self.params.sampleNameList != None) and \
+           (len(self.params.sampleNameList) != 0) and \
+           (sample not in self.params.sampleNameList) : continue
+        key = (project, sample)
+        if key not in bams : bams[key] = []
+        bams[key].append(bamFile)
+
+    mergedBamExt = ".merged.bam"
+    markDupBamExt = ".markdup.bam"
+
+    #
+    # 2) merge and delete smaller bams:
+    #
+    mergedBams = {}
+
+    mergedBamDir = os.path.join(self.params.mergedDir, "bam")
+    sampleTasks = {}
+    if len(bams) :  # skip this section if smaller bams have already been deleted
+        ensureDir(mergedBamDir)
+
+        for key in bams.keys() :
+            (project, sample) = key
+            mergedSampleDir = os.path.join(mergedBamDir, "Project_" + project, "Sample_" + sample)
+            ensureDir(mergedSampleDir)
+            self.params.mergeBamList = bams[key]
+            self.params.mergeBamName = os.path.join(mergedSampleDir, sample + mergedBamExt)
+            mergedBams[key] = self.params.mergeBamName
+            outTaskPrefix = preJoin(taskPrefix, "_".join(key))
+            sampleTasks[key] = mergeBamListFlow(self, outTaskPrefix, dependencies)
+
+    if not os.path.isdir(mergedBamDir) : return
+
+
+    #
+    # 3) mark dup:
+    #
+
+    # mergedBams contains all bams from the current run, we also add any from a
+    # previous interupted run:
+    mergedBamDigger = FileDigger(mergedBamExt, ["Project_", "Sample_"])
+    for (project, sample, bamFile) in mergedBamDigger.getNextFile(mergedBamDir) :
+        key = (project, sample)
+        if key in mergedBams :
+            assert (mergedBams[key] == bamFile)
+        else :
+            mergedBams[key] = bamFile
+
+    nextWait = set()
+    totalCores = self.getNCores()
+
+    for sampleKey in mergedBams.keys() :
+        markDupDep = set()
+        if sampleKey in sampleTasks : markDupDep = sampleTasks[sampleKey]
+
+        fullName = "_".join(sampleKey)
+
+        markDupBamFile = mergedBams[sampleKey][:-(len(mergedBamExt))] + markDupBamExt
+        markDupMetricsFile = markDupBamFile[:-(len(".bam"))] + ".metrics.txt"
+        markDupTmpDir = markDupBamFile + ".tmpdir"
+
+        # for now, solve the memory problem with lots of threads:
+        nCores = 4
+        if (totalCores != "unlimited") and (totalCores < nCores) :
+            nCores = totalCores
+        gigs = 2 * nCores
+        javaOpts = "-Xmx%ig" % (gigs)
+        markDupFiles = "INPUT=%s OUTPUT=%s METRICS_FILE=%s" % (mergedBams[sampleKey], markDupBamFile, markDupMetricsFile)
+        markDupOpts = "REMOVE_DUPLICATES=false ASSUME_SORTED=true VALIDATION_STRINGENCY=SILENT CREATE_INDEX=true TMP_DIR=%s" % (markDupTmpDir)
+        markDupJar = os.path.join(self.params.picardDir, "MarkDuplicates.jar")
+        markDupCmd = "java %s -jar %s %s %s" % (javaOpts, markDupJar, markDupFiles, markDupOpts)
+        markDupTask = self.addTask(preJoin(taskPrefix, fullName + "_dupmark"), markDupCmd, dependencies=markDupDep)
+
+        # link index filename to something samtools can understand:
+        #
+        markDupPicardBaiFile = markDupBamFile[:-(len(".bam"))] + ".bai"
+        markDupSamtoolsBaiFile = markDupBamFile + ".bai"
+        indexLinkCmd = "ln %s %s" % (markDupPicardBaiFile, markDupSamtoolsBaiFile)
+        indexLinkTask = self.addTask(preJoin(taskPrefix, fullName + "_indexLink"), indexLinkCmd, dependencies=markDupTask, isForceLocal=True)
+
+        nextWait.add(indexLinkTask)
+
+        # delete TmpDir:
+        #
+        rmMarkDupTmpCmd = "rm -rf %s" % (markDupTmpDir)
+        self.addTask(preJoin(taskPrefix, fullName + "_rmMarkDupTmp"), rmMarkDupTmpCmd, dependencies=markDupTask, isForceLocal=True)
+
+        # now remove the original file:
+        #
+        rmCmd = "rm -f %s" % (mergedBams[sampleKey])
+        self.addTask(preJoin(taskPrefix, fullName + "_rmMerge"), rmCmd, dependencies=markDupTask, isForceLocal=True)
+
+    return nextWait
+
+
+
+
+class FlowcellBamListMergeFlow(WorkflowRunner) :
+
+    def __init__(self, params) :
+        self.params = params
+
+    def workflow(self) :
+        flowcellBamListMergeFlow(self)
+
+
+
+class BWAWorkflow(WorkflowRunner) :
+    """
+    pyflow BCL to BAM BWA workflow
+    """
+
+    def __init__(self, params) :
+        self.params = params
+
+        # make sure working directory is setup:
+        self.params.outputDir = os.path.abspath(self.params.outputDir)
+        ensureDir(self.params.outputDir)
+
+        self.params.allFlowcellDir = os.path.join(self.params.outputDir, "flowcell_results")
+        self.params.mergedDir = os.path.join(self.params.outputDir, "merged_results")
+
+        # Verify/manipulate various input options:
+        #
+        # this is mostly repeated in the conflig script now... get this minimized with auto verification:
+        #
+        self.params.bclBasecallsDirList = map(os.path.abspath, self.params.bclBasecallsDirList)
+        for dir in self.params.bclBasecallsDirList :
+            if not os.path.isdir(dir) :
+                raise Exception("Input BCL basecalls directory not found: '%s'" % (dir))
+
+        self.params.samtoolsSortMemPerCore = int(self.params.samtoolsSortMemPerCore)
+        minSortMem = 1000000
+        if self.params.samtoolsSortMemPerCore < minSortMem :
+            raise Exception("samtoolsSortMemPerCore must be an integer greater than minSortMem")
+
+        if self.params.genomeFasta == None:
+            raise Exception("No bwa genome file defined.")
+        else:
+            if not os.path.isfile(self.params.genomeFasta) :
+                raise Exception("Can't find bwa genome file '%s'" % (self.params.genomeFasta))
+
+
+    def workflow(self) :
+
+        alignTasks = casava18BclToBamListFlow(self)
+        mergeTask = self.addWorkflowTask("mergeBams", FlowcellBamListMergeFlow(self.params), dependencies=alignTasks)
+
+
+
diff --git a/pyflow/demo/bclToBwaBam/configBclToBwaBam.py b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py
new file mode 100755
index 0000000..19833b4
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+This demonstrates a run of a prototype BCL to BWA BAM workflow
+created as a production-scale proof of concept for pyflow.
+
+The bwa workflow is written into the BWAWorkflow object. See
+bwaworkflow.py for implementation details of this class.
+
+Finally, make sure configuration settings in BWAWorkflowConfig
+are appropriate before running.
+"""
+
+import os, sys
+
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+scriptName = os.path.basename(__file__)
+
+
+runScript1 = """#!/usr/bin/env python
+# BWAWorkflow run script auto-generated by command: %s
+
+import os.path, sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append('%s')
+from bwaworkflow import BWAWorkflow
+
+class WorkflowOptions(object) :
+""" % (" ".join(sys.argv), scriptDir)
+
+runScript2 = """
+def get_run_options() :
+    from optparse import OptionParser
+    import textwrap
+
+    epilog=\"""Note this script can be re-run to continue the workflow run in case of interuption.
+Also note that dryRun option has limited utility when task definition depends on upstream task results,
+in which case the dry run will not cover the full 'live' run task set)\"""
+
+    # no epilog in py 2.4! hack-in the feature instead:
+    class MyOptionParser(OptionParser) :
+        def __init__(self, *args, **kwargs):
+            self.myepilog = None
+            try:
+                self.myepilog = kwargs.pop('epilog')
+            except KeyError:
+                pass
+            OptionParser.__init__(self,*args, **kwargs)
+
+        def print_help(self,*args,**kwargs) :
+            OptionParser.print_help(self,*args, **kwargs)
+            if self.myepilog != None :
+                sys.stdout.write("%s\\n" % (textwrap.fill(self.myepilog)))
+
+    parser = MyOptionParser(epilog=epilog)
+
+
+    parser.add_option("-m", "--mode", type="string",dest="mode",
+                      help="select run mode (local|sge)")
+    parser.add_option("-j", "--jobs", type="string",dest="jobs",
+	              help="number of jobs (default: 1 for local mode, 'unlimited' for sge mode)")
+    parser.add_option("-e","--mailTo", type="string",dest="mailTo",action="append",
+	              help="send email notification of job completion status to this address (may be provided multiple times for more than one email address)")
+    parser.add_option("-d","--dryRun", dest="isDryRUn",action="store_true",
+                      help="dryRun workflow code without actually running command-tasks")
+
+
+    (options,args) = parser.parse_args()
+
+    if len(args) :
+        parser.print_help()
+	sys.exit(2)
+
+    if options.mode == None :
+        parser.print_help()
+	sys.exit(2)
+    elif options.mode not in ["local","sge"] :
+        parser.error("Invalid mode. Available modes are: local, sge")
+
+    if options.jobs == None :
+        if options.mode == "sge" :
+	    options.jobs == "unlimited"
+	else :
+	    options.jobs == "1"
+    elif (options.jobs != "unlimited") and (int(options.jobs) <= 0) :
+        parser.error("Jobs must be 'unlimited' or an integer greater than 1")
+
+    return options
+
+runOptions=get_run_options()
+flowOptions=WorkflowOptions()
+flowOptions.outputDir=scriptDir
+wflow = BWAWorkflow(flowOptions)
+retval=wflow.run(mode=runOptions.mode,
+                 nCores=runOptions.jobs,
+                 dataDirRoot=scriptDir,
+                 mailTo=runOptions.mailTo,
+		 isContinue="Auto",
+                 isForceContinue=True,
+                 isDryRun=runOptions.isDryRUn)
+sys.exit(retval)
+"""
+
+
+
+
+def checkArg(x, label, checkfunc) :
+    if x != None:
+        x = os.path.abspath(x)
+        if not checkfunc(x) :
+            raise Exception("Can't find %s: '%s'" % (label, x))
+    return x
+
+def checkDirArg(dir, label) :
+    return checkArg(dir, label, os.path.isdir)
+
+def checkFileArg(file, label) :
+    return checkArg(file, label, os.path.isfile)
+
+
+
+def get_option_parser(defaults, configFileName, isAllHelp=False) :
+    from optparse import OptionGroup, OptionParser, SUPPRESS_HELP
+    import textwrap
+
+    description = """This script configures a bcl to BWA alignmed BAM workflow.
+Given a bcl basecalls directory the workflow will create fastq's using CASAVA's
+bcl to fastq converter, then align each fastq using bwa, and finally consolidate
+the output into a single BAM file for for each Project/Sample combination.
+
+The configuration process will produce a workflow run script, which can be used to
+execute the workflow on a single node or through sge with a specific job limit.
+"""
+
+    epilog = """Default parameters will always be read from the file '%s' if it exists.
+This file is searched for in the current working directory first -- if
+it is not found then the directory containing this script is searched as well.
+The current set of default parameters may be written to this file using the --writeConfig switch,
+which takes all current defaults and arguments, writes these to the
+configuration file and exits without setting up a workflow run script as usual.
+""" % (configFileName)
+
+    # no epilog in py 2.4! hack-in the feature instead:
+    class MyOptionParser(OptionParser) :
+        def __init__(self, *args, **kwargs):
+            self.myepilog = None
+            try:
+                self.myepilog = kwargs.pop('epilog')
+            except KeyError:
+                pass
+            OptionParser.__init__(self, *args, **kwargs)
+
+        def print_help(self, *args, **kwargs) :
+            OptionParser.print_help(self, *args, **kwargs)
+            if self.myepilog != None :
+                sys.stdout.write("%s\n" % (textwrap.fill(self.myepilog)))
+
+
+    parser = MyOptionParser(description=description, epilog=epilog)
+
+    parser.set_defaults(**defaults)
+
+    parser.add_option("--allHelp", action="store_true", dest="isAllHelp",
+                      help="show all extended/hidden options")
+
+    group = OptionGroup(parser, "Workflow options")
+    group.add_option("--bclBasecallsDir", type="string", dest="bclBasecallsDirList", metavar="DIR", action="append",
+                      help="BCL basecalls directory. Call this option multiple times to specify multiple bcl directories, samples with the same name will be combined over all flowcells after alignmnet. [required] (default: %default)")
+    group.add_option("--bclTilePattern", type="string", dest="bclTilePatternList", metavar="PATTERN", action="append",
+                      help="BCL converter tiles expression used to select a subsset of tiles (eg. 's_1') call this option either once for each basecalls dir or not at all (default: %default)")
+    group.add_option("--genomeFasta", type="string", dest="genomeFasta",
+	              help="Genome fasta file which includes BWA index in the same directory [required] (default: %default)")
+    group.add_option("--outputDir", type="string", dest="outputDir",
+                      help="BCL basecalls directory [required] (default: %default)")
+    group.add_option("--sampleName", type="string", dest="sampleNameList", metavar="sampleName", action="append",
+                     help="Restrict analysis to given sampleName. This option can be provided more than once for multiple sample names. If no names are provided all samples are analyzed (default: %default)")
+    parser.add_option_group(group)
+
+    secgroup = OptionGroup(parser, "Extended options",
+                                  "These options are not likely to be reset after initial configuration in a new site, they will not be printed here if a default exists from the configuration file or otherwise, unless --allHelp is specified")
+
+    # used to access isAnyHelp from the maybeHelp function
+    class Hack : isAnyHelp = False
+
+    def maybeDefHelp(key, msg) :
+        if isAllHelp or (key not in defaults) :
+            Hack.isAnyHelp = True
+            return msg
+        return SUPPRESS_HELP
+
+    secgroup.add_option("--casavaDir", type="string", dest="casavaDir",
+                     help=maybeDefHelp("casavaDir", "casava 1.8.2+ installation directory [required] (default: %default)"))
+    secgroup.add_option("--bwaBin", type="string", dest="bwaBin",
+                     help=maybeDefHelp("bwaBin", "bwa binary [required] (default: %default)"))
+    secgroup.add_option("--samtoolsBin", type="string", dest="samtoolsBin",
+                     help=maybeDefHelp("samtoolsBin", "samtools binary [required] (default: %default)"))
+    secgroup.add_option("--picardDir", type="string", dest="picardDir",
+                     help=maybeDefHelp("picardDir", "casava 1.8.2+ installation directory [required] (default: %default)"))
+    if not Hack.isAnyHelp:
+        secgroup.description = "hidden"
+    parser.add_option_group(secgroup)
+
+    def maybeHelp(key, msg) :
+        if isAllHelp : return msg
+        return SUPPRESS_HELP
+
+    configgroup = OptionGroup(parser, "Config options")
+    configgroup.add_option("--writeConfig", action="store_true", dest="isWriteConfig",
+                           help=maybeHelp("writeConfig", "Write new default configuration file based on current defaults and agruments. Defaults written to: '%s'" % (configFileName)))
+    if not isAllHelp :
+        configgroup.description = "hidden"
+    parser.add_option_group(configgroup)
+
+    return parser
+
+
+
+def get_run_options() :
+    from ConfigParser import SafeConfigParser
+
+    configFileName = scriptName + ".ini"
+    if not os.path.isfile(configFileName) :
+        configPath = os.path.join(scriptDir, configFileName)
+    else :
+        configPath = os.path.join('.', configFileName)
+
+    configSectionName = scriptName
+
+    config = SafeConfigParser()
+    config.optionxform = str
+    config.read(configPath)
+
+    configOptions = {}
+    if config.has_section(configSectionName) :
+        for (k, v) in config.items(configSectionName) :
+            if v == "" : continue
+            configOptions[k] = v
+
+    defaults = { 'outputDir' : './results',
+                 'bclToFastqMaxCores' : 12,
+                 'samtoolsSortMemPerCore' : 1000000000,  # samtools sort uses about 2x what you tell it to...
+                 'alnMaxCores' : 8,  # presumably bwa aln will become increasingly inefficient per core, so we don't want to let this go forever...
+                 'isKeepFastq' : True,  # important to keep these during testing, but not for production
+               }
+
+    defaults.update(configOptions)
+
+    parser = get_option_parser(defaults, configFileName)
+    (options, args) = parser.parse_args()
+
+    if options.isAllHelp :
+        parser = get_option_parser(defaults, configFileName, True)
+        parser.print_help()
+        sys.exit(2)
+
+    if len(args) :  # or (len(sys.argv) == 1):
+        parser.print_help()
+	sys.exit(2)
+
+    # sanitize arguments before writing defaults, check for missing arguments after:
+    #
+    def checkListRepeats(list, itemLabel) :
+        if list == None : return
+        if len(set(list)) != len(list) :
+            parser.error("Repeated %s entries" % (itemLabel))
+
+    if options.bclBasecallsDirList != None :
+        for i, bclDir in enumerate(options.bclBasecallsDirList) :
+            options.bclBasecallsDirList[i] = checkDirArg(bclDir, "bcl basecalls directory")
+    # tmp for testing:
+    # checkListRepeats(options.bclBasecallsDirList,"bcl basecalls directory")
+    if (options.bclTilePatternList != None) and \
+       (len(options.bclBasecallsDirList) != len(options.bclTilePatternList)) :
+        parser.error("Unexpected number of bclTilPattern entries")
+    checkListRepeats(options.sampleNameList, "sample name")
+
+    options.casavaDir = checkDirArg(options.casavaDir, "casava directory")
+
+    options.genomeFasta = checkFileArg(options.genomeFasta, "genome fasta file")
+    options.bwaBin = checkFileArg(options.bwaBin, "bwa binary")
+    options.samtoolsBin = checkFileArg(options.samtoolsBin, "samtools binary")
+
+    if options.isWriteConfig == True :
+        if not config.has_section(configSectionName) :
+            config.add_section(configSectionName)
+        for k, v in vars(options).iteritems() :
+            if k == "isWriteConfig" : continue
+            if v == None : v = ""
+            config.set(configSectionName, k, str(v))
+        configfp = open(configFileName, "w")
+        config.write(configfp)
+        configfp.close()
+        sys.exit(0)
+
+    def noArgOrError(msg) :
+        if len(sys.argv) <= 1 :
+            parser.print_help()
+            sys.exit(2)
+        else :
+            parser.error(msg)
+
+    def assertOption(arg, label) :
+        if arg == None:
+            noArgOrError("No %s specified" % (label))
+
+    def assertList(list, itemLabel) :
+        if (list == None) or (len(list) == 0) :
+            noArgOrError("List containing %s (s) is empty or missing" % (itemLabel))
+        else :
+            for item in list :
+                assertOption(item, itemLabel)
+
+    assertList(options.bclBasecallsDirList, "bcl basecalls directory")
+    assertList(options.sampleNameList, "sample name")
+    assertOption(options.genomeFasta, "genome fasta file")
+    assertOption(options.outputDir, "output directory")
+    assertOption(options.casavaDir, "casava directory")
+    assertOption(options.picardDir, "picard directory")
+    assertOption(options.bwaBin, "bwa binary")
+    assertOption(options.samtoolsBin, "samtools binary")
+
+    return options
+
+
+
+from bwaworkflow import BWAWorkflow, ensureDir
+
+
+def main() :
+
+    options = get_run_options()
+
+    # instantiate workflow object to trigger parameter validation only
+    #
+    wflow = BWAWorkflow(options)
+
+    # generate runscript:
+    #
+    scriptFile = os.path.join(options.outputDir, "runWorkflow.py")
+    ensureDir(options.outputDir)
+
+    sfp = open(scriptFile, "w")
+    sfp.write(runScript1)
+    # there must be a nicer way to reverse eval() an object -- maybe human readable pickle is what we want here?
+    for k, v in vars(options).iteritems() :
+        if isinstance(v, basestring) :
+            sfp.write("    %s = '%s'\n" % (k, v))
+        else:
+            sfp.write("    %s = %s\n" % (k, v))
+    sfp.write("\n")
+    sfp.write(runScript2)
+    sfp.close()
+    os.chmod(scriptFile, 0755)
+
+    notefp = sys.stdout
+    notefp.write("""
+Successfully created workflow run script. To execute the workflow, run the following script and set appropriate options:
+
+%s
+""" % (scriptFile))
+
+
+if __name__ == "__main__" :
+    main()
+
diff --git a/pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini
new file mode 100644
index 0000000..1c86828
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini
@@ -0,0 +1,5 @@
+[configBclToBwaBam.py]
+bwaBin = /home/csaunders/opt/x86_64-linux/bwa/bwa
+samtoolsBin = /illumina/thirdparty/samtools/samtools-0.1.14/samtools
+casavaDir = /illumina/software/casava/CASAVA-1.8.2
+picardDir = /home/csaunders/opt/noarch/picard-tools
diff --git a/pyflow/demo/bclToBwaBam/example_configuration.bash b/pyflow/demo/bclToBwaBam/example_configuration.bash
new file mode 100755
index 0000000..a2468d6
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/example_configuration.bash
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+set -o xtrace
+
+#
+# executes the configure script for a small bcl directory -- note that
+# the tile mask is required for this bcl directory because it has been
+# extensively subsampled for testing purposes
+#
+
+./configBclToBwaBam.py \
+--bclBasecallsDir /home/csaunders/proj/bwa_workflow_hashout/create_small_lane/small_lane/111119_SN192_0307_BD0FNCACXX_Genentech/Data/Intensities/BaseCalls \
+--bclTilePattern "s_8_[02468][0-9][0-9]1" \
+--bclBasecallsDir /home/csaunders/proj/bwa_workflow_hashout/create_small_lane/small_lane/111119_SN192_0307_BD0FNCACXX_Genentech/Data/Intensities/BaseCalls \
+--bclTilePattern "s_8_[13579][0-9][0-9]1" \
+--genomeFasta /illumina/scratch/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa \
+--sampleName "lane8"
+
diff --git a/pyflow/demo/cwdDemo/cwdDemo.py b/pyflow/demo/cwdDemo/cwdDemo.py
new file mode 100755
index 0000000..9b6eda3
--- /dev/null
+++ b/pyflow/demo/cwdDemo/cwdDemo.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# demonstrate/test addTask() cwd option
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class CwdWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the
+    # WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # get cwd and its parent for the addTask cwd test
+        #
+        cwd=os.getcwd()
+        parentdir=os.path.abspath(os.path.join(cwd,".."))
+
+        self.flowLog("testing pyflow cwd: '%s' parentdir: '%s'" % (cwd,parentdir))
+
+        # task will fail unless pwd == parentdir:
+        #
+        # test both absolute and relative cwd arguments:
+        #
+        self.addTask("testAbsCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd=parentdir)
+        self.addTask("testRelCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd="..")
+
+
+
+# Instantiate the workflow
+#
+wflow = CwdWorkflow()
+
+# Run the worklow:
+#
+retval=wflow.run(mode="local")
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/envDemo/envDemo.py b/pyflow/demo/envDemo/envDemo.py
new file mode 100755
index 0000000..e72608b
--- /dev/null
+++ b/pyflow/demo/envDemo/envDemo.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# demonstrate/test addTask() env option
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class EnvWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the
+    # WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+        # run a task with the parent env:
+        #
+        home = os.environ["HOME"]
+        self.addTask("testDefEnv", "[ $HOME == '%s' ]; exit $?" % (home))
+
+        # create a minimal test environment
+        #
+        new_path = "/bin"
+        min_env = { "PATH" : new_path }
+        self.addTask("testMinEnv", "[ $PATH == '%s' ]; exit $?" % (new_path), env=min_env)
+
+        # augment parent env with additional settings:
+        #
+        augmented_env = os.environ.copy()
+        augmented_env["FOO"] = "BAZ"
+        self.addTask("testAugmentedEnv", "[ $FOO == 'BAZ' ]; exit $?", env=augmented_env)
+
+        # test funny characters that have shown to cause trouble on some sge installations
+        funky_env = {}
+        funky_env["PATH"] = "/bin"
+        funky_env["_"] = "| %s %F \n"
+        # in this case we just want the job to run at all:
+        self.addTask("testFunkyEnv", "echo 'foo'; exit $?", env=funky_env)
+
+        assert("FOO" not in os.environ)
+
+
+
+# Instantiate the workflow
+#
+wflow = EnvWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run(mode="local")
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/helloWorld/README.txt b/pyflow/demo/helloWorld/README.txt
new file mode 100644
index 0000000..ea6117b
--- /dev/null
+++ b/pyflow/demo/helloWorld/README.txt
@@ -0,0 +1,3 @@
+The following demo shows a very simple pyFlow composed of only a
+single task -- a command which echos a simple message.  You can run
+this workflow by typing "python ./helloWorld.py"
diff --git a/pyflow/demo/helloWorld/helloWorld.py b/pyflow/demo/helloWorld/helloWorld.py
new file mode 100755
index 0000000..bede07e
--- /dev/null
+++ b/pyflow/demo/helloWorld/helloWorld.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# This demo shows possibly the simplist possible pyflow we can create --
+# a single 'hello world' task. After experimenting with this file
+# please see the 'simpleDemo' for coverage of a few more pyflow features
+#
+
+import os.path
+import sys
+
+# add module path
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir,"src")))
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class HelloWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        #
+        # The output for this task will be written to the file helloWorld.out.txt
+        #
+        self.addTask("easy_task1", "echo 'Hello World!' > helloWorld.out.txt")
+
+
+
+# Instantiate the workflow
+#
+wflow = HelloWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run()
+
+# done!
+sys.exit(retval)
+
diff --git a/pyflow/demo/makeDemo/.hidden b/pyflow/demo/makeDemo/.hidden
new file mode 100644
index 0000000..af44150
--- /dev/null
+++ b/pyflow/demo/makeDemo/.hidden
@@ -0,0 +1,7 @@
+
+.PHONY: A B
+A: B
+	@echo "Made it!"
+
+B:
+	sleep 5
diff --git a/pyflow/demo/makeDemo/makeDemo.py b/pyflow/demo/makeDemo/makeDemo.py
new file mode 100755
index 0000000..d3af256
--- /dev/null
+++ b/pyflow/demo/makeDemo/makeDemo.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class MakeWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the
+    # WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # This command 'configures' a makefile
+        #
+        self.addTask("task1", "cd %s; cp .hidden Makefile" % scriptDir)
+
+        # Sometimes you get to deal with make. The task below
+        # demonstates a make command which starts when the above task
+        # completes. Make tasks are specified as directories which
+        # contain a makefile. This task points to the direcotry of
+        # this demo script, which contains has a Makefile at the
+        # completion of task1.
+        # pyflow will switch the task command between make and qmake
+        # depending on run type.
+        #
+        self.addTask("make_task", scriptDir, isCommandMakePath=True, nCores=2, dependencies="task1")
+
+        # This command 'unconfigures' the makefile
+        #
+        self.addTask("task2", "rm -f %s/Makefile" % scriptDir, dependencies="make_task")
+
+
+# Instantiate the workflow
+#
+# parameters are passed into the workflow via its constructor:
+#
+wflow = MakeWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run(mode="local", nCores=8)
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/memoryDemo/memoryDemo.py b/pyflow/demo/memoryDemo/memoryDemo.py
new file mode 100755
index 0000000..eec236c
--- /dev/null
+++ b/pyflow/demo/memoryDemo/memoryDemo.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# This is a very simple demo/test of pyFlow's new (@ v0.4) memory
+# resource feature.
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src")
+
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class MemTestWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # Each task has a default memory request of 2048 megabytes
+        # but this is site-configurable in pyflowConfig.py, so we
+        # specify it for every task here
+        #
+        # This works correctly if task 4 is the only task run in
+        # parallel with one of the other 3 tasks.
+        #
+        self.addTask("task1", "echo 'Hello World!'", memMb=2048)
+        self.addTask("task2", "echo 'Hello World!'", memMb=2048)
+        self.addTask("task3", "echo 'Hello World!'", memMb=2048)
+        self.addTask("task4", "echo 'Hello World!'", memMb=1)
+
+
+
+# Instantiate the workflow
+#
+wflow = MemTestWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run(nCores=8, memMb=2049)
+
+# done!
+sys.exit(retval)
+
diff --git a/pyflow/demo/mutexDemo/mutexDemo.py b/pyflow/demo/mutexDemo/mutexDemo.py
new file mode 100755
index 0000000..ee4bdd8
--- /dev/null
+++ b/pyflow/demo/mutexDemo/mutexDemo.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir = os.path.join(scriptDir, "testtasks")
+
+sleepjob = os.path.join(testJobDir, "sleeper.bash")  # sleeps
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class MutexWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the
+    # WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # create an array of mutex restricted tasks which can only run
+        # once at a time:
+        for i in range(8) :
+            self.addTask("mutex_task_" + str(i), sleepjob + " 1", mutex="test")
+
+        # and add an array of 'normal' tasks for comparison:
+        for i in range(16) :
+            self.addTask("normal_task_" + str(i), sleepjob + " 1")
+
+
+
+
+def main() :
+    # Instantiate the workflow
+    wflow = MutexWorkflow()
+
+    # Run the worklow:
+    retval = wflow.run(mode="local", nCores=6)
+
+    sys.exit(retval)
+
+
+
+if __name__ == "__main__" :
+    main()
diff --git a/pyflow/demo/mutexDemo/testtasks/sleeper.bash b/pyflow/demo/mutexDemo/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/mutexDemo/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/retryDemo/retryDemo.py b/pyflow/demo/retryDemo/retryDemo.py
new file mode 100755
index 0000000..33eeb8b
--- /dev/null
+++ b/pyflow/demo/retryDemo/retryDemo.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# This is a very simple demo/test of pyFlow's new (@ v0.4) memory
+# resource feature.
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class RetryWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # this task behaves correctly it retries the job 4 times before failing, no automated way
+        # to confirm success right now.
+        #
+        self.flowLog("****** NOTE: This demo is supposed to fail ******")
+        self.addTask("retry_task_success", "exit 0", retryMax=8, retryWait=2, retryWindow=0, retryMode="all")
+        self.addTask("retry_task_fail", "exit 1", retryMax=3, retryWait=2, retryWindow=0, retryMode="all")
+
+
+
+# Instantiate the workflow
+#
+wflow = RetryWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run()
+
+if retval == 0 :
+    raise Exception("Example workflow is expected to fail, but did not.")
+else :
+    sys.stderr.write("INFO: Demo workflow failed as expected.\n\n")
+
+
+# Run the workflow again to demonstrate that global settings are overridden by task retry settings:
+#
+retval = wflow.run(retryMax=0)
+
+if retval == 0 :
+    raise Exception("Example workflow is expected to fail, but did not.")
+else :
+    sys.stderr.write("INFO: Demo workflow failed as expected.\n\n")
+
+
diff --git a/pyflow/demo/runOptionsDemo/getDemoRunOptions.py b/pyflow/demo/runOptionsDemo/getDemoRunOptions.py
new file mode 100644
index 0000000..813bc47
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/getDemoRunOptions.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+pyflowDir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))
+sys.path.append(pyflowDir)
+
+from optparse import OptionParser, SUPPRESS_HELP
+from pyflow import WorkflowRunner
+
+from pyflow import isLocalSmtp
+
+
+localDefaultCores = WorkflowRunner.runModeDefaultCores('local')
+sgeDefaultCores = WorkflowRunner.runModeDefaultCores('sge')
+
+
+
+def getDemoRunOptions() :
+    """
+    This routine is shared by a demo programs to demostrate how to pass pyflow's runtime options on to command-line options. It is not intended to be a demo program itself.
+    """
+
+    parser = OptionParser()
+
+
+    parser.add_option("-m", "--mode", type="string", dest="mode",
+                      help="Select run mode {local,sge} (required)")
+    parser.add_option("-q", "--queue", type="string", dest="queue",
+                      help="Specify sge queue name. Argument ignored if mode is not sge")
+    parser.add_option("-j", "--jobs", type="string", dest="jobs",
+	                  help="Number of jobs, must be an integer or 'unlimited' (default: %s for local mode, %s for sge mode)" % (localDefaultCores, sgeDefaultCores))
+    parser.add_option("-g", "--memGb", type="string", dest="memGb",
+	               help="Gigabytes of memory available to run workflow -- only meaningful in local mode, must be an integer or 'unlimited' (default: 2*jobs for local mode, 'unlimited' for sge mode)")
+    parser.add_option("-r", "--resume", dest="isResume", action="store_true", default=False,
+                      help="Resume a workflow from the point of interuption. This flag has no effect on a new workflow run.")
+
+    isEmail = isLocalSmtp()
+    emailHelp=SUPPRESS_HELP
+    if isEmail:
+        emailHelp="Send email notification of job completion status to this address (may be provided multiple times for more than one email address)"
+
+    parser.add_option("-e", "--mailTo", type="string", dest="mailTo", action="append",
+                      help=emailHelp)
+
+
+    (options, args) = parser.parse_args()
+
+    if not isEmail :
+        options.mailTo = None
+
+    if len(args) :
+        parser.print_help()
+        sys.exit(2)
+
+    if options.mode is None :
+        parser.print_help()
+        sys.stderr.write("\n\nERROR: must specify run mode\n\n")
+        sys.exit(2)
+    elif options.mode not in ["local", "sge"] :
+        parser.error("Invalid mode. Available modes are: local, sge")
+
+    if options.jobs is None :
+        if options.mode == "sge" :
+            options.jobs = sgeDefaultCores
+        else :
+            options.jobs = localDefaultCores
+    if options.jobs != "unlimited" :
+        options.jobs = int(options.jobs)
+        if options.jobs <= 0 :
+            parser.error("Jobs must be 'unlimited' or an integer greater than 1")
+
+    # note that the user sees gigs, but we set megs
+    if options.memGb is None :
+        if options.mode == "sge" :
+            options.memMb = "unlimited"
+        else :
+            if options.jobs == "unlimited" :
+                options.memMb = "unlimited"
+            else :
+                options.memMb = 2 * 1024 * options.jobs
+    elif options.memGb != "unlimited" :
+        options.memGb = int(options.memGb)
+        if options.memGb <= 0 :
+            parser.error("memGb must be 'unlimited' or an integer greater than 1")
+        options.memMb = 1024 * options.memGb
+    else :
+        options.memMb = options.memGb
+
+    options.schedulerArgList = []
+    if options.queue is not None :
+        options.schedulerArgList = ["-q", options.queue]
+
+    return options
+
+
+
+if __name__ == "__main__" :
+    help(getDemoRunOptions)
+
diff --git a/pyflow/demo/runOptionsDemo/runOptionsDemo.py b/pyflow/demo/runOptionsDemo/runOptionsDemo.py
new file mode 100755
index 0000000..01d5a44
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/runOptionsDemo.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module paths
+#
+filePath = os.path.dirname(__file__)
+pyflowPath = os.path.abspath(os.path.join(filePath, "../../src"))
+sys.path.append(pyflowPath)
+
+from pyflow import WorkflowRunner
+from getDemoRunOptions import getDemoRunOptions
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir = os.path.join(filePath, "testtasks")
+
+sleepjob = os.path.join(testJobDir, "sleeper.bash")  # sleeps
+yelljob = os.path.join(testJobDir, "yeller.bash")  # generates some i/o
+runjob = os.path.join(testJobDir, "runner.bash")  # runs at 100% cpu
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class TestWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the
+    # WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # A simple command task with no dependencies, labeled 'task1'.
+        #
+        cmd = "%s 1" % (yelljob)
+        self.addTask("task1", cmd)
+
+        # Another task which runs the same command, this time the
+        # command is provided as an argument list. An argument list
+        # can be useful when a command has many arguments or
+        # complicated quoting issues:
+        #
+        cmd = [yelljob, "1"]
+        self.addTask("task2", cmd)
+
+        # This task will always run on the local machine, no matter
+        # what the run mode is. The force local option is useful for
+        # non-cpu intensive jobs which are taking care of minor
+        # workflow overhead (moving/touching files, etc)
+        #
+        self.addTask("task3a", sleepjob + " 10", isForceLocal=True)
+
+
+# get runtime options
+#
+runOptions = getDemoRunOptions()
+
+# Instantiate the workflow
+#
+wflow = TestWorkflow()
+
+# Run the worklow with runtime options specified on the command-line:
+#
+retval = wflow.run(mode=runOptions.mode,
+                 nCores=runOptions.jobs,
+                 memMb=runOptions.memMb,
+                 mailTo=runOptions.mailTo,
+                 isContinue=(runOptions.isResume and "Auto" or False),
+                 isForceContinue=True,
+                 schedulerArgList=runOptions.schedulerArgList)
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/runOptionsDemo/testtasks/sleeper.bash b/pyflow/demo/runOptionsDemo/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/runOptionsDemo/testtasks/yeller.bash b/pyflow/demo/runOptionsDemo/testtasks/yeller.bash
new file mode 100755
index 0000000..87bb49c
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell
+for i in {1..100}; do
+    echo "Yeller $pid yellin $i stdout"
+    echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/simpleDemo/simpleDemo.py b/pyflow/demo/simpleDemo/simpleDemo.py
new file mode 100755
index 0000000..e3e530d
--- /dev/null
+++ b/pyflow/demo/simpleDemo/simpleDemo.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir=os.path.join(scriptDir,"testtasks")
+
+sleepjob=os.path.join(testJobDir,"sleeper.bash")  # sleeps
+yelljob=os.path.join(testJobDir,"yeller.bash")    # generates some i/o
+runjob=os.path.join(testJobDir,"runner.bash")     # runs at 100% cpu
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class SimpleWorkflow(WorkflowRunner) :
+
+    # WorkflowRunner objects can create regular constructors to hold
+    # run parameters or other state information:
+    #
+    def __init__(self,params) :
+        self.params=params
+
+
+    # a workflow is defined by overloading the
+    # WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # A simple command task with no dependencies, labeled 'task1'.
+        #
+        cmd="%s 1" % (yelljob)
+        self.addTask("task1",cmd)
+
+        # Another task which runs the same command, this time the
+        # command is provided as an argument list. An argument list
+        # can be useful when a command has many arguments or
+        # complicated quoting issues:
+        #
+        cmd=[yelljob,"1"]
+        self.addTask("task2",cmd)
+
+        # This task will always run on the local machine, no matter
+        # what the run mode is. The force local option is useful for
+        # non-cpu intensive jobs which are taking care of minor
+        # workflow overhead (moving/touching files, etc)
+        #
+        self.addTask("task3a",sleepjob+" 10",isForceLocal=True)
+
+        # This job is requesting 2 threads:
+        #
+        self.addTask("task3b",runjob+" 10",nCores=2)
+
+        # This job is requesting 2 threads and 3 gigs of ram:
+        #
+        self.addTask("task3c",runjob+" 10",nCores=2,memMb=3*1024)
+
+
+        # addTask and addWorkflowTask always return their task labels
+        # as a simple convenience. taskName is set to "task4" now.
+        #
+        taskName=self.addTask("task4",sleepjob+" 1")
+
+        # an example task dependency:
+        #
+        # pyflow stores dependencies in set() objects, but you can
+        # provide a list,tuple,set or single string as the argument to
+        # dependencies:
+        #
+        # all the task5* tasks below specify "task4" as their
+        # dependency:
+        #
+        self.addTask("task5a",yelljob+" 2",dependencies=taskName)
+        self.addTask("task5b",yelljob+" 2",dependencies="task4")
+        self.addTask("task5c",yelljob+" 2",dependencies=["task4"])
+        self.addTask("task5d",yelljob+" 2",dependencies=[taskName])
+
+        # this time we launch a number of sleep tasks based on the
+        # workflow parameters:
+        #
+        # we store all tasks in sleepTasks -- which we use to make
+        # other tasks wait for this entire set of jobs to complete:
+        #
+        sleepTasks=set()
+        for i in range(self.params["numSleepTasks"]) :
+            taskName="sleep_task%i" % (i)
+            sleepTasks.add(taskName)
+            self.addTask(taskName,sleepjob+" 1",dependencies="task5a")
+
+            ## note the three lines above could have been written in a
+            ## more compact single-line format:
+            ##
+            #sleepTasks.add(self.addTask("sleep_task%i" % (i),sleepjob+" 1",dependencies="task5a"))
+
+        # this job cannot start until all tasks in the above loop complete:
+        self.addTask("task6",runjob+" 2",nCores=3,dependencies=sleepTasks)
+
+        # This task is supposed to fail, uncomment to see error reporting:
+        #
+        #self.addTask("task7",sleepjob)
+
+        # Note that no command is provided to this task. It will not
+        # be distributed locally or to sge, but does provide a
+        # convenient label for a set of tasks that other processes
+        # depend on. There is no special "checkpoint-task" type in
+        # pyflow -- but any task can function like one per this
+        # example:
+        #
+        self.addTask("checkpoint_task",dependencies=["task1","task6","task5a"])
+
+        # The final task depends on the above checkpoint:
+        #
+        self.addTask("task8",yelljob+" 2",dependencies="checkpoint_task")
+
+
+
+# simulated workflow parameters
+#
+myRunParams={"numSleepTasks" : 15}
+
+
+# Instantiate the workflow
+#
+# parameters are passed into the workflow via its constructor:
+#
+wflow = SimpleWorkflow(myRunParams)
+
+# Run the worklow:
+#
+retval=wflow.run(mode="local",nCores=8)
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/simpleDemo/testtasks/runner.bash b/pyflow/demo/simpleDemo/testtasks/runner.bash
new file mode 100755
index 0000000..df97eb1
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/runner.bash
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+cd $thisdir
+
+if ! [ -e ./runner ]; then
+    # turning on -O2 is too variable accross different platforms, so leave off:
+    #
+    # the move and sleep steps here help to make sure that we don't get a "text file busy"
+    # error on the ./runner call below:
+    #
+    gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner && sleep 1
+fi
+
+./runner $1
+
diff --git a/pyflow/demo/simpleDemo/testtasks/runner.c b/pyflow/demo/simpleDemo/testtasks/runner.c
new file mode 100644
index 0000000..5fad9c8
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/runner.c
@@ -0,0 +1,16 @@
+#include "math.h"
+#include "assert.h"
+
+int main(int argc, char**argv) {
+assert(argc==2);
+int mult=atoi(argv[1]);
+int i,j;
+double a=0;
+long total=50000000;
+for(j=0;j<mult;++j) {
+for(i=0;i<total;++i) {
+  a+=i;a=sqrt(a);
+}
+}
+return 0;
+}
diff --git a/pyflow/demo/simpleDemo/testtasks/sleeper.bash b/pyflow/demo/simpleDemo/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/simpleDemo/testtasks/yeller.bash b/pyflow/demo/simpleDemo/testtasks/yeller.bash
new file mode 100755
index 0000000..87bb49c
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell
+for i in {1..100}; do
+    echo "Yeller $pid yellin $i stdout"
+    echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/subWorkflow/subWorkflow.py b/pyflow/demo/subWorkflow/subWorkflow.py
new file mode 100755
index 0000000..ebabc3f
--- /dev/null
+++ b/pyflow/demo/subWorkflow/subWorkflow.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir=os.path.join(scriptDir,"testtasks")
+
+sleepjob=os.path.join(testJobDir,"sleeper.bash")  # sleeps
+yelljob=os.path.join(testJobDir,"yeller.bash")    # generates some i/o
+runjob=os.path.join(testJobDir,"runner.bash")     # runs at 100% cpu
+
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+# this workflow  is a simple example of a workflow we can either run directly,
+# or run as a task within another workflow:
+#
+class SubWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+        # this workflow executes a simple dependency diamond:
+        self.addTask("task1",yelljob+" 1")
+        self.addTask("task2a",yelljob+" 1",dependencies="task1")
+        self.addTask("task2b",yelljob+" 1",dependencies="task1")
+        self.addTask("task3",yelljob+" 1",dependencies=("task2a","task2b"))
+
+
+#
+# This workflow will use SubWorkflow as a task:
+#
+class SimpleWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # it's fine to repeat task names in two workflows, even if you're sub-tasking one from the other
+        self.addTask("task1",yelljob+" 1")
+        self.addTask("task2",runjob+" 3")
+
+        # instantiate a new workflow and run it as soon as task1 and task2 complete
+        wflow=SubWorkflow()
+        self.addWorkflowTask("subwf_task3",wflow,dependencies=("task1","task2"))
+
+        # this job will not run until the workflow-task completes. This means that all of the
+        # tasks that SubWorkflow launches will need to complete successfully beforehand:
+        #
+        self.addTask("task4",sleepjob+" 1",dependencies="subwf_task3")
+
+
+# Instantiate our workflow
+#
+wflow = SimpleWorkflow()
+
+# Run the worklow:
+#
+retval=wflow.run(mode="local",nCores=8)
+
+
+# If we want to run the SubWorkflow as a regular workflow, that can be done as well:
+#
+
+#wflow2 = SubWorkflow()
+#retval2=wflow2.run()
+
+
+sys.exit(retval)
+
+
diff --git a/pyflow/demo/subWorkflow/testtasks/runner.bash b/pyflow/demo/subWorkflow/testtasks/runner.bash
new file mode 100755
index 0000000..df97eb1
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/runner.bash
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+cd $thisdir
+
+if ! [ -e ./runner ]; then
+    # turning on -O2 is too variable accross different platforms, so leave off:
+    #
+    # the move and sleep steps here help to make sure that we don't get a "text file busy"
+    # error on the ./runner call below:
+    #
+    gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner && sleep 1
+fi
+
+./runner $1
+
diff --git a/pyflow/demo/subWorkflow/testtasks/runner.c b/pyflow/demo/subWorkflow/testtasks/runner.c
new file mode 100644
index 0000000..5fad9c8
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/runner.c
@@ -0,0 +1,16 @@
+#include "math.h"
+#include "assert.h"
+
+int main(int argc, char**argv) {
+assert(argc==2);
+int mult=atoi(argv[1]);
+int i,j;
+double a=0;
+long total=50000000;
+for(j=0;j<mult;++j) {
+for(i=0;i<total;++i) {
+  a+=i;a=sqrt(a);
+}
+}
+return 0;
+}
diff --git a/pyflow/demo/subWorkflow/testtasks/sleeper.bash b/pyflow/demo/subWorkflow/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/subWorkflow/testtasks/yeller.bash b/pyflow/demo/subWorkflow/testtasks/yeller.bash
new file mode 100755
index 0000000..87bb49c
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell
+for i in {1..100}; do
+    echo "Yeller $pid yellin $i stdout"
+    echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/successMsgDemo/successMsgDemo.py b/pyflow/demo/successMsgDemo/successMsgDemo.py
new file mode 100755
index 0000000..1e53d9e
--- /dev/null
+++ b/pyflow/demo/successMsgDemo/successMsgDemo.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# test and demostrate the use of a custom success message
+# at the end of a workflow
+#
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class SuccessWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the
+    # WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # provide a minimum task
+        self.addTask("task1","touch success\! && exit 0")
+
+
+
+
+# Instantiate the workflow
+#
+wflow = SuccessWorkflow()
+
+# Run the worklow:
+#
+cwd=os.getcwd()
+successMsg  = "SuccessWorkflow has successfully succeeded!\n"
+successMsg += "\tPlease find your token of successful succeeding here: '%s'\n" % (cwd)
+retval=wflow.run(mode="local",nCores=8,successMsg=successMsg,mailTo="csaunders at illumina.com")
+
+sys.exit(retval)
+
diff --git a/pyflow/doc/ChangeLog.txt b/pyflow/doc/ChangeLog.txt
new file mode 100644
index 0000000..bc0bcab
--- /dev/null
+++ b/pyflow/doc/ChangeLog.txt
@@ -0,0 +1,202 @@
+v1.1.13 20160414
+* fix rare issue with sets of dependent checkpoint tasks
+* fix for travis CI script from Dominic Jodoin
+v1.1.12 20151203
+* lengthen signal file delay tolerance to 4 mins
+* [#14] Filter environment variables to remove bash functions. This
+eliminates complications between shellshock bash update and SGE.
+v1.1.11 20151125
+* Improve SGE robustness
+v1.1.10 20150927
+* Remove old custom cluster SGE logic and standardize on h_vmem
+v1.1.9 20150923
+* Add windows CI script to run cross-platform tests on master 
+* Add new cross-platform test script and adjust all unit tests
+to run on windows
+* Improve error reporting for missing signal file case
+v1.1.8 20150918
+* Improve windows shell compatibility
+* [#10] If an error occurs creating the task visualization script,
+issue warning and allow workflow to continue
+v1.1.7 20150806
+* [#9] improve robustness to filesystem delays for task
+wrapper parameter file
+* [#9] improve error log specificity when anomolous task
+wrapper output occurs in localhost run mode
+v1.1.6 20150713
+* Fix multithread conflict introduced by [#5] fix
+v1.1.5 20150710
+* Changed to BSD 2-Clause license
+* [#5] fix in-workflow check for nested workflows
+v1.1.4 20150527
+* added check for local SMTP service before sending email notification
+* added extra check aginst workflow methods running outside of pyflow runtime
+v1.1.3 20141028
+* fix master node memory spike for SGE jobs identified by Lilian Janin
+* added Windows OS patches form Tobias Mann
+v1.1.2 20131026
+* added python distutils setup.py provided by Ryan Kelley
+v1.1.1 20130716
+* fix issue with new startFromTasks feature when used with subWorkflows
+v1.1.0 20130715
+* add new features to allow only part of workflow to be run and/or continued
+v1.0.1 20130710
+* Fix O(exp) scaling problems in DAG handling methods
+v1.0.0 20130507
+* relicenced to Illumina Open Source Software License v1
+v0.6.26 20130304
+* fix bug in forceContinue'd workflow runs
+v0.6.25 20130221
+* Add optional warning and error log which contains all logged warning or error messages.
+* allow specification of a custom notification message on successful workflow completion
+* allow any task to specify its own retry parameters, overriding the
+run-level parameters of the same name
+* add retryMode to allow task retry to be applied to local mode
+v0.6.24 20121128
+* accelerate localhost task sweep so that short-running task workflows can
+execute more quickly
+* create new mutex option to addTask, this allows a set of tasks to share a mutex
+id, causing no more than one in the group to be executed at the same time
+v0.6.23 20121018
+* change cwd parameter to not require directory to exist
+* fix version number search so that an non-installed version does not require git to be installed
+v0.6.22 20121002
+* fix custom environment option to be more robust in various sge contexts
+v0.6.21 20120925
+* add option to supply custom environment variables to any task
+* fix error message when invalid runmode is given
+v0.6.20 20120920
+* increase # of retries and timeout length for qstat call
+v0.6.19 20120914
+* check for and allow OSError on fsync call.
+v0.6.18 201207
+* ignore available memory limit in non-local run modes
+* detect if multiple active pyflow jobs are attempting to use the same data
+directory
+v0.6.17 20120622
+* minor cleanups: add python version to reports and python 2.7.2 warning to
+logs
+v0.6.16 20120529
+* issue error when task specifies itself as a dependency
+* fix issue which could cause pyflow to hang when using python 2.7.2 in sge
+mode, hang can still occur in local mode. python 2.7.2 should be avoided.
+v0.6.15 20120525
+* Improved developer logging: dump stack for all threads in python 2.5+ during the update interval
+* Additional sge command error logging
+* automate pyflow version setting and add this to workflow logs
+* improved logging scalability for 10000+ task workflows
+* improved API documentation
+v0.6.14.1 20120518
+* remove typo bug
+v0.6.14 20120507
+* Add timeouts to sge qsub and qstat commands in case these hang (observed at
+low fequency on sd clusters)
+* Write SGE job_number to log for evey qsub-ed job
+* Write the name of the longest queued and longest running tasks in the status
+update report.
+* Add new demo demonstrating commandline settings for workflow run arguments
+v0.6.13 20120503
+* Fix regression in sge make jobs introduced with queue option
+v0.6.12 20120429
+* Add cwd argument to addTask to change wd before task execution
+v0.6.11 20120424
+* Remove sub-workflows from status update report
+* Dump full qsub arg list for each sge job to temporary logs in case of sge anomoly
+* Log sge job number in case of anomolous state at end of sge job
+* taskWrapper logs hostname as soon as possible in case of error
+* More reliable (but slower) flush used for log writes
+* Add option to provide a list of arguments to qsub/qmake (to specify queue
+most likely)
+* Add option to turn off logging to stderr.
+v0.6.10 20120419
+* Provide a 'heartbeat' task status update to the log at a specified
+interval.
+v0.6.9
+* Improve robustness against NFS update delay for task wrapper file
+* Include more sge error details in Notification email 
+v0.6.8 20120319
+* Better handling on terminal hang-up: capture and ignore SIGHUP and 
+handle failed writes to stderr. You should still use nohup where needed,
+but if you don't, then proper logging and notification will continue.
+* flush log writes
+v0.6.7 20120316
+* add tail of task stderr stream to nofications when tasks fail
+* apply ctrl-C task shutdown to SIGTERM as well
+v0.6.6 20120315
+* include configuration for uscp-prd cluster
+* Passive graph creation
+* Reduce thread stack size in later versions of python
+* More robust ctrl-C shutdown behavior (master workflow on own thread)
+v0.6.5 20120308
+* bagPipes fix
+v0.6.4 20120307
+* Consume filehandles more efficiently when running a very large number of
+local jobs. Reset SGE default max jobs to something reasonable (128). Allow
+logging to continue even after filehandles are exhausted in case it manages
+to happen.
+v0.6.3 20120305
+* Switch sge mode from using qsub sync to a more scalable qsub-and-poll scheme
+This immediately removes the 99 job sge limit, and provides the infrastructure
+for queueing or running timeout on tasks.
+v0.6.2
+* allow commands to be submitted as argument lists (in addition to shell
+strings, as before). Argument lists allow longer commands and obviate a
+variety of quoting issues. 
+* Change site configuration to an object scheme which simplifies site
+customization.
+* change qmake tasks from parallel env to dynamic allocation mode 
+* allow qmake jobs to retry without an expiration window
+* improved reporting of failed make jobs
+* fixed minor issue with make path cleanup
+v0.6.1 20120228
+* primarily a bagPipes release
+* fixed isForceLocal task bug introduced in 0.6
+* fixed make task bug introduced in 0.6
+v0.6 20120227
+* primarily a bagPipes release
+* added task priority option
+
+v0.5.5 20120224
+* more robust hostname lookup combines ip query and alias list
+* fix runner.bash demo failures on some machines
+* fix pyflowTaskWrapper stderr caching
+v0.5.4 20120224
+* fix configuration for non-sge hosts 
+v0.5.2 20120222
+* chuk pthread fix for pyflow tasks
+v0.5.1 20120221
+* Added autmatic chuk sge configuration to allow bagPipes to complete
+in the uk.
+* Various fixes from uk testing: (1) full hostname is correctly found in 
+the uk now (2) default configuration for email is it now comes form 
+"pyflow-bot@"YOUR_DOMAIN_NAME now. This is required to correctly
+get mail sent from a uk box.
+v0.5 20120220
+* Cutting version of pyFlow to sync with first bagPipes prototype
+* add max SGE jobs to configuration parameters -- default set to 96
+* Fix sub-workflows to shutdown properly after task shutdown
+* More robust handling of commands with quoting and special characters
+* Non-breaking API change: isTaskComplete lets you query whether a task
+is in the workflow and completed  -- useful for sane interupt/resume behavior
+* Non-breaking API change: limitNCores(n) and limitMemMb(n) can be used now to
+reduce your resource request to the maximum available for this run.
+
+v0.4 20120216
+* Added memory to the task resource tracking
+* Created pyflowConfig file which contains site specific code --
+moved resource to qsub argument translation functions into this
+config file
+* Non-breaking API change: Added isCmdStable option to addTask to specify that a command
+can change on workflow resume.
+* Non-breaking API change: all add*() methods return task label
+
+v0.3 20120213
+* Full support for recursive WorkflowRunner task specification -- provide any
+other WorkflowRunner instance as a task in a workflow() definition.
+* Report Exceptions in TaskRunner objects as task errors
+* Report list of failed tasks even during a waitForTasks() holding loop.
+
+v0.2 20120207
+First versioned released. Major addition is a complete command-line interface
+for the bcl converter demo
+
diff --git a/pyflow/doc/README.txt b/pyflow/doc/README.txt
new file mode 100644
index 0000000..7812fff
--- /dev/null
+++ b/pyflow/doc/README.txt
@@ -0,0 +1,4 @@
+client_api/ -> contains documetation on the pyflow API which you can use to create your own workflow scripts
+
+developer/ -> contains documenation that's only useful if you'd like to change or add features to pyflow itself
+
diff --git a/pyflow/doc/client_api/README b/pyflow/doc/client_api/README
new file mode 100644
index 0000000..503650b
--- /dev/null
+++ b/pyflow/doc/client_api/README
@@ -0,0 +1,12 @@
+Pre-generated client API documentation is here:
+
+WorkflowRunner_API_html_doc
+
+Full API documentation can be created as an html tree using the script:
+'make_WorkflowRunner_API_html_doc.bash'
+
+Note this requires the program 'epydoc'.
+
+A simpler ascii documentation page can be generated in pydoc by running:
+'make_WorkflowRunner_API_simple_doc.py'
+
diff --git a/pyflow/doc/client_api/make_WorkflowRunner_API_html_doc.bash b/pyflow/doc/client_api/make_WorkflowRunner_API_html_doc.bash
new file mode 100755
index 0000000..4067ac8
--- /dev/null
+++ b/pyflow/doc/client_api/make_WorkflowRunner_API_html_doc.bash
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+PYTHONPATH=$thisdir/../../src epydoc pyflow.WorkflowRunner --no-private -o WorkflowRunner_API_html_doc
+
diff --git a/pyflow/doc/client_api/make_WorkflowRunner_API_simple_doc.py b/pyflow/doc/client_api/make_WorkflowRunner_API_simple_doc.py
new file mode 100755
index 0000000..ed75429
--- /dev/null
+++ b/pyflow/doc/client_api/make_WorkflowRunner_API_simple_doc.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import os.path
+import sys
+
+sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src")
+
+import pyflow
+
+# Document the public functions of pyflow's only public class:
+#
+help(pyflow.WorkflowRunner)
+
diff --git a/pyflow/doc/developer/README b/pyflow/doc/developer/README
new file mode 100644
index 0000000..76ebfb8
--- /dev/null
+++ b/pyflow/doc/developer/README
@@ -0,0 +1 @@
+This documentation is intended for anyone interested in changing pyflow itself. For documentation on the API to *use* pyflow, please see pyflow/doc/client_api and demo programs in pyflow/demo
diff --git a/pyflow/doc/developer/make_pyflow_developer_html_doc.bash b/pyflow/doc/developer/make_pyflow_developer_html_doc.bash
new file mode 100755
index 0000000..ac453e1
--- /dev/null
+++ b/pyflow/doc/developer/make_pyflow_developer_html_doc.bash
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+epydoc $thisdir/../../src/*.py -o pyflow_developer_html_doc -v --graph all
+
diff --git a/pyflow/setup.py b/pyflow/setup.py
new file mode 100644
index 0000000..1345197
--- /dev/null
+++ b/pyflow/setup.py
@@ -0,0 +1,11 @@
+from distutils.core import setup
+
+setup(
+      name='pyFlow',
+      version='${VERSION}',
+      description='A lightweight parallel task engine',
+      author='Chris Saunders',
+      author_email='csaunders at illumina.com',
+      packages=['pyflow'],
+      package_dir={'pyflow': 'src'}
+)
diff --git a/pyflow/src/__init__.py b/pyflow/src/__init__.py
new file mode 100644
index 0000000..b69891e
--- /dev/null
+++ b/pyflow/src/__init__.py
@@ -0,0 +1 @@
+from pyflow import *
diff --git a/pyflow/src/pyflow.py b/pyflow/src/pyflow.py
new file mode 100644
index 0000000..0ed516d
--- /dev/null
+++ b/pyflow/src/pyflow.py
@@ -0,0 +1,4175 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+pyflow -- a lightweight parallel task engine
+"""
+
+__author__ = 'Christopher Saunders'
+
+
+import copy
+import datetime
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import time
+import traceback
+
+from pyflowConfig import siteConfig
+
+
+moduleDir = os.path.abspath(os.path.dirname(__file__))
+
+
+# minimum python version
+#
+pyver = sys.version_info
+if pyver[0] != 2 or (pyver[0] == 2 and pyver[1] < 4) :
+    raise Exception("pyflow module has only been tested for python versions [2.4,3.0)")
+
+# problem python versions:
+#
+# Internal interpreter deadlock issue in python 2.7.2:
+# http://bugs.python.org/issue13817
+# ..is so bad that pyflow can partially, but not completely, work around it -- so issue a warning for this case.
+if pyver[0] == 2 and pyver[1] == 7 and pyver[2] == 2 :
+    raise Exception("Python interpreter errors in python 2.7.2 may cause a pyflow workflow hang or crash. Please use a different python version.")
+
+
+# The line below is a workaround for a python 2.4/2.5 bug in
+# the subprocess module.
+#
+# Bug is described here: http://bugs.python.org/issue1731717
+# Workaround is described here: http://bugs.python.org/issue1236
+#
+subprocess._cleanup = lambda: None
+
+
+# In python 2.5 or greater, we can lower the per-thread stack size to
+# improve memory consumption when a very large number of jobs are
+# run. Below it is lowered to 256Kb (compare to linux default of
+# 8Mb).
+#
+try:
+    threading.stack_size(min(256 * 1024, threading.stack_size))
+except AttributeError:
+    # Assuming this means python version < 2.5
+    pass
+
+
+class GlobalSync :
+    """
+    Control total memory usage in non-local run modes by
+    limiting the number of simultaneous subprocess calls
+
+    Note that in practice this only controls the total number
+    of qsub/qstat calls in SGE mode
+    """
+    maxSubprocess = 2
+    subprocessControl = threading.Semaphore(maxSubprocess)
+
+
+
+def getPythonVersion() :
+    python_version = sys.version_info
+    return ".".join([str(i) for i in python_version])
+
+pythonVersion = getPythonVersion()
+
+
+# Get pyflow version number
+#
+
+def getPyflowVersion() :
+    # this will be automatically macro-ed in for pyflow releases:
+    pyflowAutoVersion = None
+
+    # Get version number in regular release code:
+    if pyflowAutoVersion is not None : return pyflowAutoVersion
+
+    # Get version number during dev:
+    try :
+        proc = subprocess.Popen(["git", "describe"], stdout=subprocess.PIPE, stderr=open(os.devnull, "w"), cwd=moduleDir, shell=False)
+        (stdout, _stderr) = proc.communicate()
+        retval = proc.wait()
+        stdoutList = stdout.split("\n")[:-1]
+        if (retval == 0) and (len(stdoutList) == 1) : return stdoutList[0]
+    except OSError:
+        # no git installed
+        pass
+
+    return "unknown"
+
+
+__version__ = getPyflowVersion()
+
+
+# portability functions:
+#
+
+def _isWindows() :
+    import platform
+    return (platform.system().find("Windows") > -1)
+
+class GlobalConstants :
+    isWindows=_isWindows()
+
+
+def isWindows() :
+    return GlobalConstants.isWindows
+
+
+
+
+def forceRename(src,dst) :
+    """
+    dst is only overwritten in a single atomic operation on *nix
+    on windows, we can't have atomic rename, but we can recreate the behavior otherwise
+    """
+    if isWindows() :
+        if os.path.exists(dst) :
+            os.remove(dst)
+
+    maxTrials=5
+    for trial in range(maxTrials) :
+        try :
+            os.rename(src,dst)
+            return
+        except OSError :
+            if (trial+1) >= maxTrials : raise
+            time.sleep(5)
+
+
+
+def cleanEnv() :
+    """
+    clear bash functions out of the env
+
+    without this change the shellshock security update causes pyflow SGE jobs to
+    fail with the behavior of current (201512) versions of SGE qsub
+    """
+
+    ekeys = os.environ.keys()
+    for key in ekeys :
+        if key.endswith("()") :
+            del os.environ[key]
+
+
+# utility values and functions:
+#
+
+def ensureDir(d):
+    """
+    make directory if it doesn't already exist, raise exception if
+    something else is in the way:
+    """
+    if os.path.exists(d):
+        if not os.path.isdir(d) :
+            raise Exception("Can't create directory: %s" % (d))
+    else :
+        os.makedirs(d)
+
+
+#
+# time functions -- note there's an additional copy in the pyflow wrapper script:
+#
+# all times in pyflow are utc (never local) and printed to iso8601
+#
+def timeStampToTimeStr(ts) :
+    """
+    converts time.time() output to timenow() string
+    """
+    return datetime.datetime.utcfromtimestamp(ts).isoformat()
+
+def timeStrNow():
+    return timeStampToTimeStr(time.time())
+
+def timeStrToTimeStamp(ts):
+    import calendar
+    d = datetime.datetime(*map(int, re.split(r'[^\d]', ts)[:-1]))
+    return calendar.timegm(d.timetuple())
+
+
+
+def isInt(x) :
+    return isinstance(x, (int, long))
+
+def isString(x):
+    return isinstance(x, basestring)
+
+
+def isIterable(x):
+    return (getattr(x, '__iter__', False) != False)
+
+
+def lister(x):
+    """
+    Convert input into a list, whether it's already iterable or
+    not. Make an exception for individual strings to be returned
+    as a list of one string, instead of being chopped into letters
+    Also, convert None type to empty list:
+    """
+    # special handling in case a single string is given:
+    if x is None : return []
+    if (isString(x) or (not isIterable(x))) : return [x]
+    return list(x)
+
+
+
+def setzer(x) :
+    """
+    convert user input into a set, handling the pathological case
+    that you have been handed a single string, and you don't want
+    a set of letters:
+    """
+    return set(lister(x))
+
+
+
+class LogState :
+    """
+    A simple logging enum
+    """
+    INFO = 1
+    WARNING = 2
+    ERROR = 3
+
+    @classmethod
+    def toString(cls,logState) :
+        if logState == cls.INFO : return "INFO"
+        if logState == cls.WARNING : return "WARNING"
+        if logState == cls.ERROR : return "ERROR"
+
+        raise Exception("Unknown log state: " + str(logState))
+
+
+# allow fsync to be globally turned off
+class LogGlobals :
+    isFsync = True
+
+
+def hardFlush(ofp):
+    ofp.flush()
+    if ofp.isatty() : return
+    # fsync call has been reported to consistently fail in some contexts (rsh?)
+    # so allow OSError
+    if not LogGlobals.isFsync : return
+    try :
+        os.fsync(ofp.fileno())
+    except OSError:
+        LogGlobals.isFsync = False
+
+
+
+def log(ofpList, msgList, linePrefix=None):
+    """
+    General logging function.
+
+    @param ofpList: A container of file objects to write to
+
+    @param msgList: A container of (or a single) multi-line log message
+               string. Final newlines are not required
+
+    @param linePrefix: A prefix to add before every line. This will come
+                  *after* the log function's own '[time] [hostname]'
+                  prefix.
+
+    @return: Returns a boolean tuple of size ofpList indicating the success of
+      writing to each file object
+    """
+    msgList = lister(msgList)
+    ofpList = setzer(ofpList)
+    retval = [True] * len(ofpList)
+    for msg in msgList :
+        # strip final trailing newline if it exists:
+        if (len(msg) > 0) and (msg[-1] == "\n") : msg = msg[:-1]
+        linePrefixOut = "[%s] [%s]" % (timeStrNow(), siteConfig.getHostName())
+        if linePrefix is not None : linePrefixOut += " " + linePrefix
+        # split message into prefixable lines:
+        for i, ofp in enumerate(ofpList):
+            # skip io streams which have failed before:
+            if not retval[i] : continue
+            try :
+                for line in msg.split("\n") :
+                    ofp.write("%s %s\n" % (linePrefixOut, line))
+                hardFlush(ofp)
+            except IOError:
+                retval[i] = False
+    return retval
+
+
+
+def getThreadName():
+    return threading.currentThread().getName()
+
+def isMainThread() :
+    return (getThreadName == "MainThread")
+
+
+class StrFileObject(object) :
+    """
+    fakes a filehandle for library functions which write to a stream,
+    and captures output in a string
+    """
+    def __init__(self) :
+        self.str = ""
+
+    def write(self, string) :
+        self.str += string
+
+    def __str__(self) :
+        return self.str
+
+
+def getTracebackStr() :
+    return traceback.format_exc()
+
+
+def getExceptionMsg() :
+
+    msg = ("Unhandled Exception in %s\n" % (getThreadName())) + getTracebackStr()
+    if msg[-1] == "\n" : msg = msg[:-1]
+    return msg.split("\n")
+
+
+def cmdline() :
+    return " ".join(sys.argv)
+
+
+
+def msgListToMsg(msgList):
+    """
+    convert string or list of strings into a single string message
+    """
+    msg = ""
+    isFirst=True
+    for chunk in lister(msgList) :
+        if isFirst :
+            isFirst = False
+        else :
+            msg += "\n"
+        if ((len(chunk)>0) and (chunk[-1] == '\n')) :
+            chunk = chunk[:-1]
+        msg += chunk
+
+    return msg
+
+
+
+emailRegex = re.compile(r"(?:^|\s)[-a-z0-9_.]+@(?:[-a-z0-9]+\.)+[a-z]{2,6}(?:\s|$)", re.IGNORECASE)
+
+def verifyEmailAddy(x) :
+    return (emailRegex.match(x) is not None)
+
+
+def isLocalSmtp() :
+    """
+    return true if a local smtp server is available
+    """
+    import smtplib
+    try :
+        s = smtplib.SMTP('localhost')
+    except :
+        return False
+    return True
+
+
+def sendEmail(mailTo, mailFrom, subject, msgList) :
+    import smtplib
+    # this is the way to import MIMEText in py 2.4:
+    from email.MIMEText import MIMEText
+
+    # format message list into a single string:
+    msg = msgListToMsg(msgList)
+
+    mailTo = setzer(mailTo)
+
+    msg = MIMEText(msg)
+    msg["Subject"] = subject
+    msg["From"] = mailFrom
+    msg["To"] = ", ".join(mailTo)
+
+    s = smtplib.SMTP('localhost')
+    s.sendmail(mailFrom, list(mailTo), msg.as_string())
+    s.quit()
+
+
+def boolToStr(b) :
+    return str(int(b))
+
+
+def argToBool(x) :
+    """
+    convert argument of unknown type to a bool:
+    """
+    class FalseStrings :
+        val = ("", "0", "false", "f", "no", "n", "off")
+
+    if isinstance(x, basestring) :
+        return (x.lower() not in FalseStrings.val)
+    return bool(x)
+
+
+def hashObjectValue(obj) :
+    """
+    This function hashes objects values -- the hash will be the
+    same for two objects containing the same methods and data, so
+    it corresponds to 'A==B' and *not* 'A is B'.
+    """
+    import pickle
+    import hashlib
+    hashlib.md5(pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)).hexdigest()
+
+
+namespaceSep = "+"
+
+
+def namespaceJoin(a, b) :
+    """
+    join two strings with a separator only if a exists
+    """
+    if a == "" : return b
+    elif b == "" : return a
+    return a + namespaceSep + b
+
+
+def namespaceLabel(namespace) :
+    """
+    provide a consistent naming scheme to users for embedded workflows
+    """
+    if namespace == "" :
+        return "master workflow"
+    else :
+        return "sub-workflow '%s'" % (namespace)
+
+
+
+class ExpWaiter(object) :
+    """
+    Convenience object to setup exponentially increasing wait/polling times
+    """
+    def __init__(self, startSec, factor, maxSec, event = None) :
+        """
+        optionally allow an event to interrupt wait cycle
+        """
+        assert (startSec > 0.)
+        assert (factor > 1.)
+        assert (maxSec >= startSec)
+        self.startSec = startSec
+        self.factor = factor
+        self.maxSec = maxSec
+        self.event = event
+
+        self.sec = self.startSec
+        self.isMax = False
+
+    def reset(self) :
+        self.sec = self.startSec
+
+    def wait(self) :
+        if self.event is None :
+            time.sleep(self.sec)
+        else :
+            self.event.wait(self.sec)
+        if self.isMax : return
+        self.sec = min(self.sec * self.factor, self.maxSec)
+        self.isMax = (self.sec == self.maxSec)
+        assert self.sec <= self.maxSec
+
+
+
+def lockMethod(f):
+    """
+    method decorator acquires/releases object's lock
+    """
+
+    def wrapped(self, *args, **kw):
+        if not hasattr(self,"lock") :
+            self.lock = threading.RLock()
+
+        self.lock.acquire()
+        try:
+            return f(self, *args, **kw)
+        finally:
+            self.lock.release()
+    return wrapped
+
+
+
+class Bunch:
+    """
+    generic struct with named argument constructor
+    """
+    def __init__(self, **kwds):
+        self.__dict__.update(kwds)
+
+
+
+def stackDump(dumpfp):
+    """
+    adapted from haridsv @ stackoverflow:
+    """
+
+    athreads = threading.enumerate()
+    tnames = [(th.getName()) for th in athreads]
+
+    frames = None
+    try:
+        frames = sys._current_frames()
+    except AttributeError:
+        # python version < 2.5
+        pass
+
+    id2name = {}
+    try:
+        id2name = dict([(th.ident, th.getName()) for th in athreads])
+    except AttributeError :
+        # python version < 2.6
+        pass
+
+    if (frames is None) or (len(tnames) > 50) :
+        dumpfp.write("ActiveThreadCount: %i\n" % (len(tnames)))
+        dumpfp.write("KnownActiveThreadNames:\n")
+        for name in tnames : dumpfp.write("  %s\n" % (name))
+        dumpfp.write("\n")
+        return
+
+    dumpfp.write("ActiveThreadCount: %i\n" % (len(frames)))
+    dumpfp.write("KnownActiveThreadNames:\n")
+    for name in tnames : dumpfp.write("  %s\n" % (name))
+    dumpfp.write("\n")
+
+    for tid, stack in frames.items():
+        dumpfp.write("Thread: %d %s\n" % (tid, id2name.get(tid, "NAME_UNKNOWN")))
+        for filename, lineno, name, line in traceback.extract_stack(stack):
+            dumpfp.write('File: "%s", line %d, in %s\n' % (filename, lineno, name))
+            if line is not None:
+                dumpfp.write("  %s\n" % (line.strip()))
+        dumpfp.write("\n")
+    dumpfp.write("\n")
+
+
+
+
+#######################################################################
+#
+# these functions are written out to a utility script which allows users
+# to make a dot graph from their current state directory output. We
+# keep it in pyflow as working code so that pyflow can call sections of it.
+#
+
+def taskStateHeader() :
+    return "#taskLabel\ttaskNamespace\trunState\terrorCode\trunStateUpdateTime\n"
+
+
+def taskStateParser(stateFile) :
+    class Constants :
+        nStateCols = 5
+
+    for line in open(stateFile) :
+        if len(line) and line[0] == "#" : continue
+        line = line.strip()
+        w = line.split("\t")
+        if len(w) != Constants.nStateCols :
+            raise Exception("Unexpected format in taskStateFile: '%s' line: '%s'" % (stateFile, line))
+        yield [x.strip() for x in w]
+
+
+def taskInfoHeader() :
+    return "#%s\n" % ("\t".join(("taskLabel", "taskNamespace", "taskType", "nCores", "memMb", "priority", "isForceLocal", "dependencies", "cwd", "command")))
+
+
+def taskInfoParser(infoFile) :
+    class Constants :
+        nInfoCols = 10
+
+    for line in open(infoFile) :
+        if len(line) and line[0] == "#" : continue
+        line = line.lstrip()
+        w = line.split("\t", (Constants.nInfoCols - 1))
+        if len(w) != Constants.nInfoCols :
+            raise Exception("Unexpected format in taskInfoFile: '%s' line: '%s'" % (infoFile, line))
+        yield [x.strip() for x in w]
+
+
+def getTaskInfoDepSet(s) :
+    # reconstruct dependencies allowing for extraneous whitespace in the file:
+    s = s.strip()
+    if s == "" : return []
+    return set([d.strip() for d in s.split(",")])
+
+
+
+class TaskNodeConstants(object) :
+
+    validRunstates = ("complete", "running", "queued", "waiting", "error")
+
+
+
+class DotConfig(object) :
+    """
+    A static container of configuration data for dot graph output
+    """
+
+    runstateDotColor = {"waiting" : "grey",
+                        "running" : "green",
+                        "queued" : "yellow",
+                        "error" : "red",
+                        "complete" : "blue" }
+
+    runstateDotStyle = {"waiting" : "dashed",
+                        "running" : None,
+                        "queued" : None,
+                        "error" : "bold",
+                        "complete" : None }
+
+    @staticmethod
+    def getRunstateDotAttrib(runstate) :
+        color = DotConfig.runstateDotColor[runstate]
+        style = DotConfig.runstateDotStyle[runstate]
+        attrib = ""
+        if color is not None : attrib += " color=%s" % (color)
+        if style is not None : attrib += " style=%s" % (style)
+        return attrib
+
+    @staticmethod
+    def getTypeDotAttrib(nodeType) :
+        attrib = ""
+        if nodeType == "workflow" :
+            attrib += " shape=rect style=rounded"
+        return attrib
+
+    @staticmethod
+    def getDotLegend() :
+        string = '{ rank = source; Legend [shape=none, margin=0, label=<\n'
+        string += '<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="4">\n'
+        string += '<TR><TD COLSPAN="2">Legend</TD></TR>\n'
+        for state in TaskNodeConstants.validRunstates :
+            color = DotConfig.runstateDotColor[state]
+            string += '<TR> <TD>%s</TD> <TD BGCOLOR="%s"></TD> </TR>\n' % (state, color)
+        string += '</TABLE>>];}\n'
+        return string
+
+
+
+def writeDotGraph(taskInfoFile, taskStateFile, workflowClassName) :
+    """
+    write out the current graph state in dot format
+    """
+
+    addOrder = []
+    taskInfo = {}
+    headNodes = set()
+    tailNodes = set()
+
+    # read info file:
+    for (label, namespace, ptype, _nCores, _memMb, _priority, _isForceLocal, depStr, _cwdStr, _command) in taskInfoParser(taskInfoFile) :
+        tid = (namespace, label)
+        addOrder.append(tid)
+        taskInfo[tid] = Bunch(ptype=ptype,
+                              parentLabels=getTaskInfoDepSet(depStr))
+        if len(taskInfo[tid].parentLabels) == 0 : headNodes.add(tid)
+        tailNodes.add(tid)
+        for plabel in taskInfo[tid].parentLabels :
+            ptid = (namespace, plabel)
+            if ptid in tailNodes : tailNodes.remove(ptid)
+
+    for (label, namespace, runState, _errorCode, _time) in taskStateParser(taskStateFile) :
+        tid = (namespace, label)
+        taskInfo[tid].runState = runState
+
+    dotFp = sys.stdout
+    dotFp.write("// Task graph from pyflow object '%s'\n" % (workflowClassName))
+    dotFp.write("// Process command: '%s'\n" % (cmdline()))
+    dotFp.write("// Process working dir: '%s'\n" % (os.getcwd()))
+    dotFp.write("// Graph capture time: %s\n" % (timeStrNow()))
+    dotFp.write("\n")
+    dotFp.write("digraph %s {\n" % (workflowClassName + "Graph"))
+    dotFp.write("\tcompound=true;\nrankdir=LR;\nnode[fontsize=10];\n")
+    labelToSym = {}
+    namespaceGraph = {}
+    for (i, (namespace, label)) in enumerate(addOrder) :
+        tid = (namespace, label)
+        if namespace not in namespaceGraph :
+            namespaceGraph[namespace] = ""
+        sym = "n%i" % i
+        labelToSym[tid] = sym
+        attrib1 = DotConfig.getRunstateDotAttrib(taskInfo[tid].runState)
+        attrib2 = DotConfig.getTypeDotAttrib(taskInfo[tid].ptype)
+        namespaceGraph[namespace] += "\t\t%s [label=\"%s\"%s%s];\n" % (sym, label, attrib1, attrib2)
+
+    for (namespace, label) in addOrder :
+        tid = (namespace, label)
+        sym = labelToSym[tid]
+        for plabel in taskInfo[tid].parentLabels :
+            ptid = (namespace, plabel)
+            namespaceGraph[namespace] += ("\t\t%s -> %s;\n" % (labelToSym[ptid], sym))
+
+    for (i, ns) in enumerate(namespaceGraph.keys()) :
+        isNs = ((ns is not None) and (ns != ""))
+        dotFp.write("\tsubgraph cluster_sg%i {\n" % (i))
+        if isNs :
+            dotFp.write("\t\tlabel = \"%s\";\n" % (ns))
+        else :
+            dotFp.write("\t\tlabel = \"%s\";\n" % (workflowClassName))
+        dotFp.write(namespaceGraph[ns])
+        dotFp.write("\t\tbegin%i [label=\"begin\" shape=diamond];\n" % (i))
+        dotFp.write("\t\tend%i [label=\"end\" shape=diamond];\n" % (i))
+        for (namespace, label) in headNodes :
+            if namespace != ns : continue
+            sym = labelToSym[(namespace, label)]
+            dotFp.write("\t\tbegin%i -> %s;\n" % (i, sym))
+        for (namespace, label) in tailNodes :
+            if namespace != ns : continue
+            sym = labelToSym[(namespace, label)]
+            dotFp.write("\t\t%s -> end%i;\n" % (sym, i))
+        dotFp.write("\t}\n")
+        if ns in labelToSym :
+            dotFp.write("\t%s -> begin%i [style=dotted];\n" % (labelToSym[ns], i))
+            # in LR orientation this will make the graph look messy:
+            # dotFp.write("\tend%i -> %s [style=invis];\n" % (i,labelToSym[ns]))
+
+    dotFp.write(DotConfig.getDotLegend())
+    dotFp.write("}\n")
+    hardFlush(dotFp)
+
+
+
+def writeDotScript(taskDotScriptFile,
+                   taskInfoFileName, taskStateFileName,
+                   workflowClassName) :
+    """
+    write dot task graph creation script
+    """
+    import inspect
+
+    dsfp = os.fdopen(os.open(taskDotScriptFile, os.O_WRONLY | os.O_CREAT, 0755), 'w')
+
+    dsfp.write("""#!/usr/bin/env python
+#
+# This is a script to create a dot graph from pyflow state files.
+# Usage: $script >| task_graph.dot
+#
+# Note that script assumes the default pyflow state files are in the script directory.
+#
+# This file was autogenerated by process: '%s'
+# ...from working directory: '%s'
+#
+
+import datetime,os,sys,time
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+""" % (os.getcwd(), cmdline()))
+
+    for dobj in (timeStampToTimeStr, timeStrNow, cmdline, Bunch, LogGlobals, hardFlush, TaskNodeConstants, DotConfig, taskStateParser, taskInfoParser, getTaskInfoDepSet, writeDotGraph) :
+        dsfp.write("\n\n")
+        dsfp.write(inspect.getsource(dobj))
+
+    dsfp.write("""
+
+if __name__ == '__main__' :
+    writeDotGraph(os.path.join(scriptDir,'%s'),os.path.join(scriptDir,'%s'),'%s')
+
+""" % (taskInfoFileName, taskStateFileName, workflowClassName))
+
+
+
+################################################################
+#
+# workflowRunner Helper Classes:
+#
+#
+
+
+class Command(object) :
+    """
+    Commands can be presented as strings or argument lists (or none)
+    """
+
+    def __init__(self, cmd, cwd, env=None) :
+        # 1: sanitize/error-check cmd
+        if ((cmd is None)  or
+            (cmd == "") or
+            (isIterable(cmd) and len(cmd) == 0)) :
+            self.cmd = None
+            self.type = "none"
+        elif isString(cmd) :
+            self.cmd = Command.cleanStr(cmd)
+            self.type = "str"
+        elif isIterable(cmd) :
+            self.cmd = []
+            for i, s in enumerate(cmd):
+                if not (isString(s) or isInt(s)):
+                    raise Exception("Argument: '%s' from position %i in argument list command is not a string or integer. Full command: '%s'" %
+                                    (str(s), (i + 1), " ".join([str(s) for s in cmd])))
+                self.cmd.append(Command.cleanStr(s))
+            self.type = "list"
+        else :
+            raise Exception("Invalid task command: '%s'" % (str(cmd)))
+
+        # 2: sanitize cwd
+        self.cwd = ""
+        if cwd is not None and cwd != "" :
+            self.cwd = os.path.abspath(cwd)
+            if os.path.exists(self.cwd) and not os.path.isdir(self.cwd) :
+                raise Exception("Cwd argument is not a directory: '%s', provided for command '%s'" % (cwd, str(cmd)))
+
+        # copy env:
+        self.env = env
+
+    def __repr__(self) :
+        if self.cmd is None : return ""
+        if self.type == "str" : return self.cmd
+        return " ".join(self.cmd)
+
+    @staticmethod
+    def cleanStr(s) :
+        if isInt(s) : s = str(s)
+        if "\n" in s : raise Exception("Task command/argument contains newline characters: '%s'" % (s))
+        return s.strip()
+
+
+
+class StoppableThread(threading.Thread):
+    """
+    Thread class with a stop() method. The thread itself has to check
+    regularly for the stopped() condition.
+
+    Note that this is a very new thread base class for pyflow, and most
+    threads do not (yet) check their stopped status.
+
+    """
+
+    _stopAll = threading.Event()
+
+    def __init__(self, *args, **kw):
+        threading.Thread.__init__(self, *args, **kw)
+        self._stop = threading.Event()
+
+    def stop(self):
+        "thread specific stop method, may be overridden to add async thread-specific kill behavior"
+        self._stop.set()
+
+    @staticmethod
+    def stopAll():
+        "quick global stop signal for threads that happen to poll stopped() very soon after event"
+        StoppableThread._stopAll.set()
+
+    def stopped(self):
+        return (StoppableThread._stopAll.isSet() or self._stop.isSet())
+
+
+
+def getSGEJobsDefault() :
+    if ((siteConfig.maxSGEJobs is not None) and
+        (siteConfig.maxSGEJobs != "") and
+        (siteConfig.maxSGEJobs != "unlimited")) :
+        return int(siteConfig.maxSGEJobs)
+    return "unlimited"
+
+
+
+class ModeInfo(object) :
+    """
+    Stores default values associated with each runmode: local,sge,...
+    """
+    def __init__(self, defaultCores, defaultMemMbPerCore, defaultIsRetry) :
+        self.defaultCores = defaultCores
+        self.defaultMemMbPerCore = defaultMemMbPerCore
+        self.defaultIsRetry = defaultIsRetry
+
+
+
+class RunMode(object):
+
+    data = { "local" : ModeInfo(defaultCores=1,
+                                defaultMemMbPerCore=siteConfig.defaultHostMemMbPerCore,
+                                defaultIsRetry=False),
+             "sge"   : ModeInfo(defaultCores=getSGEJobsDefault(),
+                                defaultMemMbPerCore="unlimited",
+                                defaultIsRetry=True) }
+
+
+
+class RetryParam(object) :
+    """
+    parameters pertaining to task retry behavior
+    """
+    allowed_modes = [ "nonlocal" , "all" ]
+
+    def __init__(self, run_mode, retry_max, wait, window, retry_mode) :
+        if retry_mode not in self.allowed_modes :
+            raise Exception("Invalid retry mode parameter '%s'. Accepted retry modes are {%s}." \
+                            % (retry_mode, ",".join(self.allowed_modes)))
+
+        self._retry_max = retry_max
+        self.wait = wait
+        self.window = window
+        self._retry_mode = retry_mode
+        self._run_mode = run_mode
+
+        self._finalize()
+        self.validate()
+
+
+    def _finalize(self) :
+        """
+        decide whether to turn retry off based on retry and run modes:
+        """
+        if (self._retry_mode == "nonlocal") and \
+                (not RunMode.data[self._run_mode].defaultIsRetry) :
+            self.max = 0
+        else :
+            self.max = int(self._retry_max)
+
+
+    def validate(self):
+        """
+        check that the public parameters are valid
+        """
+        def nonNegParamCheck(val, valLabel) :
+            if val < 0 : raise Exception("Parameter %s must be non-negative" % valLabel)
+
+        nonNegParamCheck(self.max, "retryMax")
+        nonNegParamCheck(self.wait, "retryWait")
+        nonNegParamCheck(self.window, "retryWindow")
+
+
+    def getTaskCopy(self,retry_max, wait, window, retry_mode):
+        """
+        return a deepcopy of the class customized for each individual task for
+        any retry parameters which are not None
+        """
+        taskself = copy.deepcopy(self)
+
+        if retry_max is not None:
+            taskself._retry_max = retry_max
+        if wait is not None:
+            taskself.wait = wait
+        if window is not None:
+            taskself.window = window
+        if retry_mode is not None :
+            taskself._retry_mode = retry_mode
+
+        taskself._finalize()
+        taskself.validate()
+        return taskself
+
+
+class RunningTaskStatus(object) :
+    """
+    simple object allowing remote task threads to communicate their
+    status back to the TaskManager
+    """
+    def __init__(self,isFinishedEvent) :
+        self.isFinishedEvent = isFinishedEvent
+        self.isComplete = threading.Event()
+        self.errorCode = 0
+
+        # errorMessage is filled in by sub-workflow
+        # and command-line tasks.
+        #
+        # Sub-workflows use this to convey whether they have
+        # failed (1) because of failures of their own tasks or (2)
+        # because of an exception in the sub-workflow code, in which
+        # case the exception message and stacktrace are provided.
+        #
+        # command tasks use this to report the stderr tail of a failing
+        # task
+        #
+        self.errorMessage = ""
+
+        # only used by sub-workflows to indicate that all tasks have been specified
+        self.isSpecificationComplete = threading.Event()
+
+
+class BaseTaskRunner(StoppableThread) :
+    """
+    Each individual command-task or sub workflow task
+    is run on its own thread using a class inherited from
+    BaseTaskRunner
+    """
+
+    def __init__(self, runStatus, taskStr, sharedFlowLog, setRunstate) :
+        StoppableThread.__init__(self)
+        self.setDaemon(True)
+        self.taskStr = taskStr
+        self.setName("TaskRunner-Thread-%s" % (taskStr))
+        self.runStatus = runStatus
+        self._sharedFlowLog = sharedFlowLog
+        self.lock = threading.RLock()
+
+        # allows taskRunner to update between queued and running status:
+        self._setRunstate = setRunstate
+
+        # this is moved into the ctor now, so that a race condition that would double-launch a task
+        # is now not possible (however unlikely it was before):
+        self.setInitialRunstate()
+
+
+    def run(self) :
+        """
+        BaseTaskRunner's run() method ensures that we can
+        capture exceptions which might occur in this thread.
+        Do not override this method -- instead define the core
+        logic for the task run operation in '_run()'
+
+        Note that for sub-workflow tasks we're interpreting raw
+        client python code on this thread, so exceptions are
+        *very likely* here -- this is not a corner case.
+        """
+        retval = 1
+        retmsg = ""
+        try:
+            (retval, retmsg) = self._run()
+        except WorkflowRunner._AbortWorkflowException :
+            # This indicates an intended workflow interruption.
+            # send a retval of 1 but not an error message
+            pass
+        except:
+            retmsg = getExceptionMsg()
+        self.runStatus.errorCode = retval
+        self.runStatus.errorMessage = retmsg
+        # this indicates that this specific task has finished:
+        self.runStatus.isComplete.set()
+        # this indicates that *any* task has just finished, so
+        # taskmanager can stop polling and immediately sweep
+        self.runStatus.isFinishedEvent.set()
+        return retval
+
+    def setRunstate(self, *args, **kw) :
+        if self._setRunstate is None : return
+        self._setRunstate(*args, **kw)
+
+    def setInitialRunstate(self) :
+        self.setRunstate("running")
+
+    def flowLog(self, msg, logState) :
+        linePrefixOut = "[TaskRunner:%s]" % (self.taskStr)
+        self._sharedFlowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+    def infoLog(self, msg) :
+        self.flowLog(msg, logState=LogState.INFO)
+
+    def warningLog(self, msg) :
+        self.flowLog(msg, logState=LogState.WARNING)
+
+    def errorLog(self, msg) :
+        self.flowLog(msg, logState=LogState.ERROR)
+
+
+
+class WorkflowTaskRunner(BaseTaskRunner) :
+    """
+    Manages a sub-workflow task
+    """
+
+    def __init__(self, runStatus, taskStr, workflow, sharedFlowLog, setRunstate) :
+        BaseTaskRunner.__init__(self, runStatus, taskStr, sharedFlowLog, setRunstate)
+        self.workflow = workflow
+
+    def _run(self) :
+        namespace = self.workflow._getNamespace()
+        nsLabel = namespaceLabel(namespace)
+        self.infoLog("Starting task specification for %s" % (nsLabel))
+        self.workflow._setRunning(True)
+        self.workflow.workflow()
+        self.workflow._setRunning(False)
+        self.runStatus.isSpecificationComplete.set()
+        self.infoLog("Finished task specification for %s, waiting for task completion" % (nsLabel))
+        retval = self.workflow._waitForTasksCore(namespace, isVerbose=False)
+        retmsg = ""
+        return (retval, retmsg)
+
+
+class CommandTaskRunner(BaseTaskRunner) :
+    """
+    Parent to local and SGE TaskRunner specializations for command tasks
+    """
+
+    taskWrapper = os.path.join(moduleDir, "pyflowTaskWrapper.py")
+
+    def __init__(self, runStatus, runid, taskStr, cmd, nCores, memMb, retry, isDryRun,
+                 outFile, errFile, tmpDir, schedulerArgList,
+                 sharedFlowLog, setRunstate) :
+        """
+        @param outFile: stdout file
+        @param errFile: stderr file
+        @param tmpDir: location to write files containing output from
+                  the task wrapper script (and not the wrapped task)
+        """
+        import pickle
+
+        BaseTaskRunner.__init__(self, runStatus, taskStr, sharedFlowLog, setRunstate)
+
+        self.cmd = cmd
+        self.nCores = nCores
+        self.memMb = memMb
+        self.retry = retry
+        self.isDryRun = isDryRun
+        self.outFile = outFile
+        self.errFile = errFile
+        self.tmpDir = tmpDir
+        self.schedulerArgList = schedulerArgList
+        if not os.path.isfile(self.taskWrapper) :
+            raise Exception("Can't find task wrapper script: %s" % self.taskWrapper)
+
+        ensureDir(self.tmpDir)
+        self.wrapFile = os.path.join(self.tmpDir, "pyflowTaskWrapper.signal.txt")
+
+        # setup all the data to be passed to the taskWrapper and put this in argFile:
+        taskInfo = { 'nCores' : nCores,
+                   'outFile' : outFile, 'errFile' : errFile,
+                   'cwd' : cmd.cwd, 'env' : cmd.env,
+                   'cmd' : cmd.cmd, 'isShellCmd' : (cmd.type == "str") }
+
+        argFile = os.path.join(self.tmpDir, "taskWrapperParameters.pickle")
+        pickle.dump(taskInfo, open(argFile, "w"))
+
+        self.wrapperCmd = [self.taskWrapper, runid, taskStr, argFile]
+
+
+
+    def _run(self) :
+        """
+        Outer loop of _run() handles task retry behavior:
+        """
+        startTime = time.time()
+        retries = 0
+        retInfo = Bunch(retval=1, taskExitMsg="", isAllowRetry=False)
+
+        while not self.stopped() :
+            if retries :
+                self.infoLog("Retrying task: '%s'. Total prior task failures: %i" % (self.taskStr, retries))
+
+            if self.isDryRun :
+                self.infoLog("Dryrunning task: '%s' task arg list: [%s]" % (self.taskStr, ",".join(['"%s"' % (s) for s in self.getFullCmd()])))
+                retInfo.retval = 0
+            else :
+                self.runOnce(retInfo)
+
+            if retInfo.retval == 0 : break
+            if retries >= self.retry.max : break
+            elapsed = (time.time() - startTime)
+            if (self.retry.window > 0) and \
+               (elapsed >= self.retry.window) : break
+            if self.stopped() : break
+            if not retInfo.isAllowRetry : break
+            retries += 1
+            self.warningLog("Task: '%s' failed but qualifies for retry. Total task failures (including this one): %i. Task command: '%s'" % (self.taskStr, retries, str(self.cmd)))
+            retInfo = Bunch(retval=1, taskExitMsg="", isAllowRetry=False)
+            time.sleep(self.retry.wait)
+
+        return (retInfo.retval, retInfo.taskExitMsg)
+
+
+    def getExitMsg(self) :
+        """
+        Attempt to extract exit message from a failed command task, do not complain in
+        case of any errors in task signal file for this case.
+        """
+        msgSize = None
+        wrapFp = open(self.wrapFile)
+        for line in wrapFp:
+            w = line.strip().split()
+            if (len(w) < 6) or (w[4] != "[wrapperSignal]") :
+                break
+            if w[5] == "taskStderrTail" :
+                if (len(w) == 7) : msgSize = int(w[6])
+                break
+
+        taskExitMsg = ""
+        if msgSize is not None :
+            i = 0
+            for line in wrapFp:
+                if i >= msgSize : break
+                taskExitMsg += line
+                i += 1
+        wrapFp.close()
+        return taskExitMsg
+
+
+    def getWrapFileResult(self) :
+        """
+        When the task is theoretically done, go and read the task wrapper to
+        see the actual task exit code. This is required because:
+
+        1) On SGE or similar: We have no other way to get the exit code
+
+        2) On all systems, we can distinguish between a conventional task error
+        and other problems, such as (a) linux OOM killer (b) exception in the
+        task wrapper itself (c) filesystem failures.
+        """
+
+        def checkWrapFileExit(result) :
+            """
+            return isError=True on error in file format only, missing or incomplete file
+            is not considered an error and the function should not return an error for this
+            case.
+            """
+
+            if not os.path.isfile(self.wrapFile) : return
+
+            for line in open(self.wrapFile) :
+                # an incomplete line indicates that the file is still being written:
+                if len(line) == 0 or line[-1] != '\n' : return
+
+                w = line.strip().split()
+
+                if len(w) < 6 :
+                    result.isError = True
+                    return
+                if (w[4] != "[wrapperSignal]") :
+                    result.isError = True
+                    return
+                if w[5] == "taskExitCode" :
+                    if (len(w) == 7) :
+                        result.taskExitCode = int(w[6])
+                    return
+
+        retryCount = 8
+        retryDelaySec = 30
+
+        wrapResult = Bunch(taskExitCode=None, isError=False)
+
+        totalDelaySec = 0
+        for trialIndex in range(retryCount) :
+            # if the problem occurs at 0 seconds don't bother with a warning, but
+            # if we've gone through a full retry cycle, then the filesystem delay is
+            # getting unusual and should be a warning:
+            if trialIndex > 1 :
+                msg = "No complete signal file found after %i seconds, retrying after delay. Signal file path: '%s'" % (totalDelaySec,self.wrapFile)
+                self.flowLog(msg, logState=LogState.WARNING)
+
+            if trialIndex != 0 :
+                time.sleep(retryDelaySec)
+                totalDelaySec += retryDelaySec
+
+            checkWrapFileExit(wrapResult)
+            if wrapResult.isError : break
+            if wrapResult.taskExitCode is not None : break
+
+        return wrapResult
+
+
+    def getWrapperErrorMsg(self) :
+        if os.path.isfile(self.wrapFile) :
+            stderrList = open(self.wrapFile).readlines()
+            taskExitMsg = ["Anomalous task wrapper stderr output. Wrapper signal file: '%s'" % (self.wrapFile),
+                                  "Logging %i line(s) of task wrapper log output below:" % (len(stderrList))]
+            linePrefix = "[taskWrapper-stderr]"
+            taskExitMsg.extend([linePrefix + " " + line for line in stderrList])
+        else :
+            taskExitMsg = ["Anomalous task wrapper condition: Wrapper signal file is missing: '%s'" % (self.wrapFile)]
+
+        return taskExitMsg
+
+
+
+class LocalTaskRunner(CommandTaskRunner) :
+
+    def getFullCmd(self) :
+        return [sys.executable] + self.wrapperCmd
+
+    def runOnce(self, retInfo) :
+        #        sys.stderr.write("starting subprocess call. task '%s' cmd '%s'" % (self.taskStr,self.cmd))
+        #        sys.stderr.write("full cmd: "+" ".join(self.getFullCmd()) + "\n")
+        wrapFp = open(self.wrapFile, "w")
+        proc = subprocess.Popen(self.getFullCmd(), stdout=wrapFp, stderr=subprocess.STDOUT, shell=False, bufsize=1)
+        self.infoLog("Task initiated on local node")
+        retInfo.retval = proc.wait()
+        wrapFp.close()
+
+        wrapResult = self.getWrapFileResult()
+
+        if (wrapResult.taskExitCode is None) or (wrapResult.taskExitCode != retInfo.retval):
+            retInfo.taskExitMsg = self.getWrapperErrorMsg()
+            retInfo.retval = 1
+            return retInfo
+        elif retInfo.retval != 0 :
+            retInfo.taskExitMsg = self.getExitMsg()
+
+        retInfo.isAllowRetry = True
+
+        # success! (taskWrapper, but maybe not for the task...)
+        return retInfo
+
+
+
+class QCaller(threading.Thread) :
+    """
+    Calls to both qsub and qstat go through this run() method so that we
+    can time them out:
+    """
+
+    def __init__(self, cmd, infoLog) :
+        threading.Thread.__init__(self)
+        self.setDaemon(True)
+        self.setName("QCaller-Timeout-Thread")
+        self.lock = threading.RLock()
+        self.cmd = cmd
+        self.infoLog = infoLog
+        self.results = Bunch(isComplete=False, retval=1, outList=[])
+        self.proc = None
+        self.is_kill_attempt = False
+
+    def run(self) :
+        # Note: Moved Popen() call outside of the mutex and
+        # stopped using proc.communicate() here after
+        # observing python interpreter bug:
+        # http://bugs.python.org/issue13817
+        #
+        # The interpreter deadlock for this issue has been
+        # observed to block the Popen() call below when using
+        # python 2.7.2:
+        #
+        # Oct 2014 - also wrapped this call with a semaphore because
+        # of the high memory usage associated with each qsub/qstat
+        # subprocess. This was causing pyflow jobs to become unstable
+        # as they would spontaneously exceed the maximum allowed master
+        # process memory.
+        #
+        GlobalSync.subprocessControl.acquire()
+        try :
+            tmp_proc = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False)
+            self.lock.acquire()
+            try:
+                self.proc = tmp_proc
+                # handle the case where Popen was taking its good sweet time and a killProc() was sent in the meantime:
+                if self.is_kill_attempt: self.killProc()
+            finally:
+                self.lock.release()
+
+            if self.is_kill_attempt: return
+
+            for line in self.proc.stdout :
+                self.results.outList.append(line)
+            self.results.retval = self.proc.wait()
+        finally:
+            GlobalSync.subprocessControl.release()
+        self.results.isComplete = True
+
+    @lockMethod
+    def killProc(self) :
+        import signal
+
+        self.is_kill_attempt = True
+
+        if self.proc is None : return
+
+        try:
+            os.kill(self.proc.pid , signal.SIGTERM)
+            self.infoLog("Sent SIGTERM to sge command process id: %i" % (self.proc.pid))
+        except OSError :
+            # process ended before we could kill it (hopefully rare, but possible race condition artifact)
+            pass
+
+
+
+class SGETaskRunner(CommandTaskRunner) :
+
+    def getFullCmd(self):
+        # qsub options:
+        #
+        qsubCmd = ["qsub",
+                 "-V",  # import environment variables from shell
+                 "-cwd",  # use current working directory
+                 "-S", sys.executable,  # The taskwrapper script is python
+                 "-o", self.wrapFile,
+                 "-e", self.wrapFile]
+
+        qsubCmd.extend(self.schedulerArgList)
+        qsubCmd.extend(siteConfig.qsubResourceArg(self.nCores, self.memMb))
+        qsubCmd.extend(self.wrapperCmd)
+
+        return tuple(qsubCmd)
+
+
+    def setInitialRunstate(self) :
+        self.setRunstate("queued")
+
+
+    @lockMethod
+    def setNewJobId(self, jobId) :
+        """
+        if stopped here, this is the case where a ctrl-c was entered while the qsub
+        command was being submitted, so we must kill the job here:
+        """
+        self.jobId = jobId
+        if self.stopped(): self._killJob()
+
+
+    def runOnce(self, retInfo) :
+
+        def qcallWithTimeouts(cmd, maxQcallAttempt=1) :
+            maxQcallWait = 180
+            qcall = None
+            for i in range(maxQcallAttempt) :
+                qcall = QCaller(cmd,self.infoLog)
+                qcall.start()
+                qcall.join(maxQcallWait)
+                if not qcall.isAlive() : break
+                self.infoLog("Trial %i of sge command has timed out. Killing process for cmd '%s'" % ((i + 1), cmd))
+                qcall.killProc()
+                self.infoLog("Finished attempting to kill sge command")
+
+            return qcall.results
+
+        # 1) call qsub, check for errors and retrieve taskId:
+        #
+        if os.path.isfile(self.wrapFile): os.remove(self.wrapFile)
+
+        # write extra info, just in case we need it for post-mortem debug:
+        qsubFile = os.path.join(os.path.dirname(self.wrapFile), "qsub.args.txt")
+        if os.path.isfile(qsubFile): os.remove(qsubFile)
+        qsubfp = open(qsubFile, "w")
+        for arg in self.getFullCmd() :
+            qsubfp.write(arg + "\n")
+        qsubfp.close()
+
+        results = qcallWithTimeouts(self.getFullCmd())
+
+        isQsubError = False
+        self.jobId = None
+        if len(results.outList) != 1 :
+            isQsubError = True
+        else :
+            w = results.outList[0].split()
+            if (len(w) > 3) and (w[0] == "Your") and (w[1] == "job") :
+                self.setNewJobId(int(w[2]))
+            else :
+                isQsubError = True
+
+        if not results.isComplete :
+            self._killJob()  # just in case...
+            retInfo.taskExitMsg = ["Job submission failure -- qsub command timed-out"]
+            return retInfo
+
+        if isQsubError or (self.jobId is None):
+            retInfo.taskExitMsg = ["Unexpected qsub output. Logging %i line(s) of qsub output below:" % (len(results.outList)) ]
+            retInfo.taskExitMsg.extend([ "[qsub-out] " + line for line in results.outList ])
+            return retInfo
+
+        if results.retval != 0 :
+            retInfo.retval = results.retval
+            retInfo.taskExitMsg = ["Job submission failure -- qsub returned exit code: %i" % (retInfo.retval)]
+            return retInfo
+
+        # No qsub errors detected and an sge job_number is acquired -- success!
+        self.infoLog("Task submitted to sge queue with job_number: %i" % (self.jobId))
+
+
+        # 2) poll jobId until sge indicates it's not running or queued:
+        #
+        queueStatus = Bunch(isQueued=True, runStartTimeStamp=None)
+
+        def checkWrapFileRunStart(result) :
+            """
+            check wrapper file for a line indicating that it has transitioned from queued to
+            running state. Allow for NFS delay or incomplete file
+            """
+            if not os.path.isfile(self.wrapFile) : return
+            for line in open(self.wrapFile) :
+                w = line.strip().split()
+                if (len(w) < 6) or (w[4] != "[wrapperSignal]") :
+                    # this could be incomplete flush to the signal file, so
+                    # don't treat it as error:
+                    return
+                if w[5] == "taskStart" :
+                    result.runStartTimeStamp = timeStrToTimeStamp(w[0].strip('[]'))
+                    result.isQueued = False
+                    return
+
+
+        # exponential polling times -- make small jobs responsive but give sge a break on long runs...
+        ewaiter = ExpWaiter(5, 1.7, 60)
+
+        pollCmd = ("/bin/bash", "--noprofile", "-o", "pipefail", "-c", "qstat -j %i | awk '/^error reason/'" % (self.jobId))
+        while not self.stopped():
+            results = qcallWithTimeouts(pollCmd, 6)
+            isQstatError = False
+            if results.retval != 0:
+                if ((len(results.outList) == 2) and
+                     (results.outList[0].strip() == "Following jobs do not exist:") and
+                     (int(results.outList[1]) == self.jobId)) :
+                    break
+                else :
+                    isQstatError = True
+            else :
+                if (len(results.outList) != 0) :
+                    isQstatError = True
+
+            if isQstatError :
+                if not results.isComplete :
+                    retInfo.taskExitMsg = ["The qstat command for sge job_number %i has timed out for all attempted retries" % (self.jobId)]
+                    self._killJob()
+                else :
+                    retInfo.taskExitMsg = ["Unexpected qstat output or task has entered sge error state. Sge job_number: %i" % (self.jobId)]
+                    retInfo.taskExitMsg.extend(["Logging %i line(s) of qstat output below:" % (len(results.outList)) ])
+                    retInfo.taskExitMsg.extend([ "[qstat-out] " + line for line in results.outList ])
+                    # self._killJob() # leave the job there so the user can better diagnose whetever unexpected pattern has occurred
+                return retInfo
+
+            # also check to see if job has transitioned from queued to running state:
+            if queueStatus.isQueued :
+                checkWrapFileRunStart(queueStatus)
+                if not queueStatus.isQueued :
+                    self.setRunstate("running", queueStatus.runStartTimeStamp)
+
+            ewaiter.wait()
+
+        if self.stopped() :
+            # self._killJob() # no need, job should already have been killed at the stop() call...
+            return retInfo
+
+        lastJobId = self.jobId
+
+        # if we've correctly communicated with SGE, then its roll is done here
+        # if a job kill is required for any of the error states above, it needs to be
+        # added before this point:
+        self.jobId = None
+
+        wrapResult = self.getWrapFileResult()
+
+        if wrapResult.taskExitCode is None :
+            retInfo.taskExitMsg = ["Sge job_number: '%s'" % (lastJobId)]
+            retInfo.taskExitMsg.extend(self.getWrapperErrorMsg())
+            retInfo.retval = 1
+            return retInfo
+        elif wrapResult.taskExitCode != 0 :
+            retInfo.taskExitMsg = self.getExitMsg()
+
+        retInfo.retval = wrapResult.taskExitCode
+        retInfo.isAllowRetry = True
+
+        # success! (for sge & taskWrapper, but maybe not for the task...)
+        return retInfo
+
+
+    @lockMethod
+    def _killJob(self) :
+        """
+        (possibly) asynchronous job kill
+        """
+        try                  : isKilled = self.isKilled
+        except AttributeError: isKilled = False
+        if isKilled: return
+
+        try                  : jobId = self.jobId
+        except AttributeError: jobId = None
+        if jobId is None: return
+        killCmd = ["qdel", "%i" % (int(jobId))]
+        # don't wait for or check exit code of kill cmd... just give it one try
+        # because we want cleanup to go as quickly as possible
+        subprocess.Popen(killCmd, shell=False)
+        self.isKilled = True
+
+
+    @lockMethod
+    def stop(self) :
+        """
+        overload thead stop function to provide a
+        qdel any running tasks.
+        """
+        CommandTaskRunner.stop(self)
+        self._killJob()
+
+
+
+class TaskManager(StoppableThread) :
+    """
+    This class runs on a separate thread from workflowRunner,
+    launching jobs based on the current state of the TaskDAG
+    """
+
+    def __init__(self, cdata, tdag) :
+        """
+        @param cdata: data from WorkflowRunner instance which will be
+                 constant during the lifetime of the TaskManager,
+                 should be safe to lookup w/o locking
+        @param tdag: task graph
+        """
+        StoppableThread.__init__(self)
+        # parameter copy:
+        self._cdata = cdata
+        self.tdag = tdag
+        # thread settings:
+        self.setDaemon(True)
+        self.setName("TaskManager-Thread")
+        # lock is used for function (harvest), which is checked by
+        # the WorkflowRunner under (literally) exceptional circumstances only
+        self.lock = threading.RLock()
+        # rm configuration:
+        self.freeCores = self._cdata.param.nCores
+        self.freeMemMb = self._cdata.param.memMb
+        self.runningTasks = {}
+
+        # This is used to track 'pyflow mutexes' -- for each key only a single
+        # task can run at once. Key is set to True if mutex is occupied.
+        self.taskMutexState = {}
+
+
+
+    def run(self) :
+        """
+        TaskManager runs so long as there are outstanding jobs
+        """
+
+        try:
+            cleanEnv()
+            while not self._isTerm() :
+                # update status of running jobs
+                self.tdag.isFinishedEvent.clear()
+                self.harvestTasks()
+                # try to launch jobs:
+                if self.stopped() : continue
+                self._startTasks()
+                self.tdag.isFinishedEvent.wait(5)
+        except:
+            msg = getExceptionMsg()
+            self._flowLog(msg,logState=LogState.ERROR)
+            self._cdata.emailNotification(msg, self._flowLog)
+            self._cdata.setTaskManagerException()
+
+
+    def _getCommandTaskRunner(self, task) :
+        """
+        assist launch of a command-task
+        """
+
+        # shortcuts:
+        payload = task.payload
+        param = self._cdata.param
+
+        if payload.cmd.cmd is None :
+            # Note these should have been marked off by the TaskManager already:
+            raise Exception("Attempting to launch checkpoint task: %s" % (task.fullLabel()))
+
+        isForcedLocal = ((param.mode != "local") and (payload.isForceLocal))
+
+        # mark task resources as occupied:
+        if not isForcedLocal :
+            if self.freeCores != "unlimited" :
+                if (self.freeCores < payload.nCores) :
+                    raise Exception("Not enough free cores to launch task")
+                self.freeCores -= payload.nCores
+
+            if self.freeMemMb != "unlimited" :
+                if (self.freeMemMb < payload.memMb) :
+                    raise Exception("Not enough free memory to launch task")
+                self.freeMemMb -= payload.memMb
+
+        if payload.mutex is not None :
+            self.taskMutexState[payload.mutex] = True
+
+        TaskRunner = None
+        if param.mode == "local" or payload.isForceLocal or payload.isCmdMakePath :
+            TaskRunner = LocalTaskRunner
+        elif param.mode == "sge" :
+            TaskRunner = SGETaskRunner
+        else :
+            raise Exception("Can't support mode: '%s'" % (param.mode))
+
+        #
+        # TODO: find less hacky way to handle make tasks:
+        #
+        taskRetry = payload.retry
+
+        if payload.isCmdMakePath :
+            taskRetry = copy.deepcopy(payload.retry)
+            taskRetry.window = 0
+
+            if param.mode == "local" or payload.isForceLocal :
+                launchCmdList = ["make", "-j", str(payload.nCores)]
+            elif param.mode == "sge" :
+                launchCmdList = siteConfig.getSgeMakePrefix(payload.nCores, payload.memMb, param.schedulerArgList)
+            else :
+                raise Exception("Can't support mode: '%s'" % (param.mode))
+
+            launchCmdList.extend(["-C", payload.cmd.cmd])
+            payload.launchCmd = Command(launchCmdList, payload.cmd.cwd, payload.cmd.env)
+
+        #
+        # each commandTaskRunner requires a unique tmp dir to write
+        # wrapper signals to. TaskRunner will create this directory -- it does not bother to destroy it right now:
+        #
+
+        # split the task id into two parts to keep from adding too many files to one directory:
+        tmpDirId1 = "%03i" % ((int(task.id) / 1000))
+        tmpDirId2 = "%03i" % ((int(task.id) % 1000))
+        taskRunnerTmpDir = os.path.join(self._cdata.wrapperLogDir, tmpDirId1, tmpDirId2)
+
+        return TaskRunner(task.runStatus, self._cdata.getRunid(),
+                          task.fullLabel(), payload.launchCmd,
+                          payload.nCores, payload.memMb,
+                          taskRetry, param.isDryRun,
+                          self._cdata.taskStdoutFile,
+                          self._cdata.taskStderrFile,
+                          taskRunnerTmpDir,
+                          param.schedulerArgList,
+                          self._cdata.flowLog,
+                          task.setRunstate)
+
+
+    def _getWorkflowTaskRunner(self, task) :
+        """
+        assist launch of a workflow-task
+        """
+        return WorkflowTaskRunner(task.runStatus, task.fullLabel(), task.payload.workflow,
+                                  self._cdata.flowLog, task.setRunstate)
+
+
+    def _launchTask(self, task) :
+        """
+        launch a specific task
+        """
+
+        if   task.payload.type() == "command" :
+            trun = self._getCommandTaskRunner(task)
+        elif task.payload.type() == "workflow" :
+            trun = self._getWorkflowTaskRunner(task)
+        else :
+            assert 0
+
+        self._infoLog("Launching %s: '%s' from %s" % (task.payload.desc(), task.fullLabel(), namespaceLabel(task.namespace)))
+        trun.start()
+        self.runningTasks[task] = trun
+
+
+    @lockMethod
+    def _startTasks(self) :
+        """
+        determine what tasks, if any, can be started
+
+        Note that the lock is here to protect self.runningTasks
+        """
+        # trace through DAG, completing any empty-command checkpoints
+        # found with all dependencies completed:
+        (ready, completed) = self.tdag.getReadyTasks()
+        for node in completed:
+            if self.stopped() : return
+            self._infoLog("Completed %s: '%s' launched from %s" % (node.payload.desc(), node.fullLabel(), namespaceLabel(node.namespace)))
+
+        # launch all workflows first, then command tasks as resources
+        # allow:
+        ready_workflows = [r for r in ready if r.payload.type() == "workflow"]
+        for task in ready_workflows :
+            if self.stopped() : return
+            self._launchTask(task)
+
+        # task submission could be shutdown, eg. in response to a task
+        # error:
+        if (not self._cdata.isTaskSubmissionActive()) : return
+
+        isNonLocal = (self._cdata.param.mode != "local")
+
+        # start command task launch:
+        ready_commands = [r for r in ready if r.payload.type() == "command"]
+        ready_commands.sort(key=lambda t: (t.payload.priority, t.payload.nCores), reverse=True)
+        for task in ready_commands :
+            if self.stopped() : return
+
+            # In a non-local run mode, "isForceLocal" tasks are not subject to
+            # global core and memory restrictions:
+            isForcedLocal = (isNonLocal and task.payload.isForceLocal)
+            if not isForcedLocal :
+                if ((self.freeCores != "unlimited") and (task.payload.nCores > self.freeCores)) : continue
+                if ((self.freeMemMb != "unlimited") and (task.payload.memMb > self.freeMemMb)) : continue
+
+            # all command tasks must obey separate mutex restrictions:
+            if ((task.payload.mutex is not None) and
+                (task.payload.mutex in self.taskMutexState) and
+                (self.taskMutexState[task.payload.mutex])) : continue
+
+            self._launchTask(task)
+
+
+
+    @lockMethod
+    def harvestTasks(self) :
+        """
+        Check the set of running tasks to see if they've completed and update
+        Node status accordingly:
+        """
+        notrunning = set()
+        for task in self.runningTasks.keys() :
+            if self.stopped() : break
+            trun = self.runningTasks[task]
+            if not task.runStatus.isComplete.isSet() :
+                if trun.isAlive() : continue
+                # if not complete and thread is dead then we don't know what happened, very bad!:
+                task.errorstate = 1
+                task.errorMessage = "Thread: '%s', has stopped without a traceable cause" % (trun.getName())
+            else :
+                task.errorstate = task.runStatus.errorCode
+                task.errorMessage = task.runStatus.errorMessage
+
+            if task.errorstate == 0 :
+                task.setRunstate("complete")
+            else:
+                task.setRunstate("error")
+
+            notrunning.add(task)
+
+            if not task.isError() :
+                self._infoLog("Completed %s: '%s' launched from %s" % (task.payload.desc(), task.fullLabel(), namespaceLabel(task.namespace)))
+            else:
+                msg = task.getTaskErrorMsg()
+
+                if self._cdata.isTaskSubmissionActive() :
+                    # if this is the first error in the workflow, then
+                    # we elaborate a bit on the workflow's response to
+                    # the error. We also send any email-notifications
+                    # for the first error only:
+                    msg.extend(["Shutting down task submission. Waiting for remaining tasks to complete."])
+
+                self._errorLog(msg)
+                if self._cdata.isTaskSubmissionActive() :
+                    self._cdata.emailNotification(msg, self._flowLog)
+
+                # Be sure to send notifications *before* setting error
+                # bits, because the WorkflowRunner may decide to
+                # immediately shutdown all tasks and pyflow threads on
+                # the first error:
+                self._cdata.setTaskError(task)
+
+        # shortcut:
+        param = self._cdata.param
+
+        # recover task resources:
+        for task in notrunning :
+            if task.payload.type() == "command" :
+                isForcedLocal = ((param.mode != "local") and (task.payload.isForceLocal))
+                if not isForcedLocal :
+                    if self.freeCores != "unlimited" :
+                        self.freeCores += task.payload.nCores
+                    if self.freeMemMb != "unlimited" :
+                        self.freeMemMb += task.payload.memMb
+
+                if task.payload.mutex is not None :
+                    self.taskMutexState[task.payload.mutex] = False
+
+        for task in notrunning:
+            del self.runningTasks[task]
+
+
+    @lockMethod
+    def stop(self) :
+        StoppableThread.stop(self)
+        for trun in self.runningTasks.values() :
+            trun.stop()
+
+
+    @lockMethod
+    def _areTasksDead(self) :
+        for trun in self.runningTasks.values() :
+            if trun.isAlive(): return False
+        return True
+
+
+    def _isTerm(self) :
+        # check for explicit thread stop request (presumably from the workflowManager):
+        # if this happens we exit the polling loop
+        #
+        if self.stopped() :
+            while True :
+                if self._areTasksDead() : return True
+                time.sleep(1)
+
+        # check for "regular" termination conditions:
+        if (not self._cdata.isTaskSubmissionActive()) :
+            return (len(self.runningTasks) == 0)
+        else :
+            if self.tdag.isRunComplete() :
+                if (len(self.runningTasks) != 0) :
+                    raise Exception("Inconsistent TaskManager state: workflow appears complete but there are still running tasks")
+                return True
+            elif self.tdag.isRunExhausted() :
+                return True
+            else :
+                return False
+
+
+    def _flowLog(self, msg, logState) :
+        linePrefixOut = "[TaskManager]"
+        # if linePrefix is not None : linePrefixOut+=" "+linePrefix
+        self._cdata.flowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+
+    def _infoLog(self, msg) :
+        self._flowLog(msg, logState=LogState.INFO)
+
+    def _errorLog(self, msg) :
+        self._flowLog(msg, logState=LogState.ERROR)
+
+
+
+
+# payloads are used to manage the different
+# possible actions attributed to task nodes:
+#
+class CmdPayload(object) :
+    def __init__(self, fullLabel, cmd, nCores, memMb, priority,
+                 isForceLocal, isCmdMakePath=False, isTaskStable=True,
+                 mutex=None, retry=None) :
+        self.cmd = cmd
+        self.nCores = nCores
+        self.memMb = memMb
+        self.priority = priority
+        self.isForceLocal = isForceLocal
+        self.isCmdMakePath = isCmdMakePath
+        self.isTaskStable = isTaskStable
+        self.mutex = mutex
+        self.retry = retry
+
+        # launch command includes make/qmake wrapper for Make path commands:
+        self.launchCmd = cmd
+
+        if (cmd.cmd is None) and ((nCores != 0) or (memMb != 0)) :
+            raise Exception("Null tasks should not have resource requirements. task: '%s'" % (fullLabel))
+
+    def type(self) :
+        return "command"
+
+    def desc(self) :
+        return "command task"
+
+
+class WorkflowPayload(object) :
+    def __init__(self, workflow) :
+        self.workflow = workflow
+        self.isTaskStable = True
+
+    def type(self) :
+        return "workflow"
+
+    def name(self) :
+        if self.workflow is None :
+            return "None"
+        else :
+            return self.workflow._whoami()
+
+    def desc(self) :
+        return "sub-workflow task"
+
+
+
+class TaskNode(object) :
+    """
+    Represents an individual task in the task graph
+    """
+
+    def __init__(self, tdag, lock, init_id, namespace, label, payload, isContinued, isFinishedEvent) :
+        self.tdag = tdag
+        self.lock = lock
+        self.id = init_id
+        self.namespace = namespace
+        self.label = label
+        self.payload = payload
+        self.isContinued = isContinued
+
+        # if true, do not execute this task or honor it as a dependency for child tasks
+        self.isIgnoreThis = False
+
+        # if true, set the ignore state for all children of this task to true
+        self.isIgnoreChildren = False
+
+        # if true, this task and its dependents will be automatically marked as completed (until
+        # a startFromTasks node is found)
+        self.isAutoCompleted = False
+
+        # task is reset to waiting runstate in a continued run
+        self.isReset = False
+
+        self.parents = set()
+        self.children = set()
+        self.runstateUpdateTimeStamp = time.time()
+        if self.isContinued:
+            self.runstate = "complete"
+        else:
+            self.runstate = "waiting"
+        self.errorstate = 0
+
+        # errorMessage is used by sub-workflow tasks, but not by command taks:
+        self.errorMessage = ""
+
+        # This is a link to the live status object updated by TaskRunner:
+        self.runStatus = RunningTaskStatus(isFinishedEvent)
+
+    def __str__(self) :
+        msg = "TASK id: %s state: %s error: %i" % (self.fullLabel(), self.runstate, self.errorstate)
+        return msg
+
+    def fullLabel(self) :
+        return namespaceJoin(self.namespace, self.label)
+
+    @lockMethod
+    def isDone(self) :
+        "task has gone as far as it can"
+        return ((self.runstate == "error") or (self.runstate == "complete"))
+
+    @lockMethod
+    def isError(self) :
+        "true if an error occurred in this node"
+        return ((self.errorstate != 0) or (self.runstate == "error"))
+
+    @lockMethod
+    def isComplete(self) :
+        "task completed without error"
+        return ((self.errorstate == 0) and (self.runstate == "complete"))
+
+    @lockMethod
+    def isReady(self) :
+        "task is ready to be run"
+        retval = ((self.runstate == "waiting") and (self.errorstate == 0) and (not self.isIgnoreThis))
+        if retval :
+            for p in self.parents :
+                if p.isIgnoreThis : continue
+                if not p.isComplete() :
+                    retval = False
+                    break
+        return retval
+
+
+    def _isDeadWalker(self, searched) :
+        "recursive helper function for isDead()"
+
+        # the fact that you're still searching means that it must have returned False last time:
+        if self in searched : return False
+        searched.add(self)
+
+        if self.isError() : return True
+        if self.isComplete() : return False
+        for p in self.parents :
+            if p._isDeadWalker(searched) : return True
+        return False
+
+    @lockMethod
+    def isDead(self) :
+        """
+        If true, there's no longer a point to waiting for this task,
+        because it either has an error or there is an error in an
+        upstream dependency
+        """
+
+        # searched is used to restrict the complexity of this
+        # operation on large graphs:
+        searched = set()
+        return self._isDeadWalker(searched)
+
+    @lockMethod
+    def setRunstate(self, runstate, updateTimeStamp=None) :
+        """
+        updateTimeStamp is only supplied in the case where the state
+        transition time is interestingly different than the function
+        call time. This can happen with the state update comes from
+        a polling function with a long poll interval.
+        """
+        if runstate not in TaskNodeConstants.validRunstates :
+            raise Exception("Can't set TaskNode runstate to %s" % (runstate))
+
+        if updateTimeStamp is None :
+            self.runstateUpdateTimeStamp = time.time()
+        else :
+            self.runstateUpdateTimeStamp = updateTimeStamp
+        self.runstate = runstate
+        self.tdag.writeTaskStatus()
+
+    #def getParents(self) :
+    #    return self.parents
+
+    #def getChildren(self) :
+    #    return self.children
+
+    @lockMethod
+    def getTaskErrorMsg(self) :
+        """
+        generate consistent task error message from task state
+        """
+
+        if not self.isError() : return []
+
+        msg = "Failed to complete %s: '%s' launched from %s" % (self.payload.desc(), self.fullLabel(), namespaceLabel(self.namespace))
+        if self.payload.type() == "command" :
+            msg += ", error code: %s, command: '%s'" % (str(self.errorstate), str(self.payload.launchCmd))
+        elif self.payload.type() == "workflow" :
+            msg += ", failed sub-workflow classname: '%s'" % (self.payload.name())
+        else :
+            assert 0
+
+        msg = lister(msg)
+
+        if self.errorMessage != "" :
+            msg2 = ["Error Message:"]
+            msg2.extend(lister(self.errorMessage))
+            linePrefix = "[%s] " % (self.fullLabel())
+            for i in range(len(msg2)) :
+                msg2[i] = linePrefix + msg2[i]
+            msg.extend(msg2)
+
+        return msg
+
+
+
+class TaskDAG(object) :
+    """
+    Holds all tasks and their dependencies.
+
+    Also responsible for task state persistence/continue across
+    interrupted runs. Object is accessed by both the workflow and
+    taskrunner threads, so it needs to be thread-safe.
+    """
+
+    def __init__(self, isContinue, isForceContinue, isDryRun,
+                 taskInfoFile, taskStateFile, workflowClassName,
+                 startFromTasks, ignoreTasksAfter, resetTasks,
+                 flowLog) :
+        """
+        No other object gets to access the taskStateFile, file locks
+        are not required (but thread locks are)
+        """
+        self.isContinue = isContinue
+        self.isForceContinue = isForceContinue
+        self.isDryRun = isDryRun
+        self.taskInfoFile = taskInfoFile
+        self.taskStateFile = taskStateFile
+        self.workflowClassName = workflowClassName
+        self.startFromTasks = startFromTasks
+        self.ignoreTasksAfter = ignoreTasksAfter
+        self.resetTasks = resetTasks
+        self.flowLog = flowLog
+
+        # unique id for each task in each run -- not persistent across continued runs:
+        self.taskId = 0
+
+        # it will be easier for people to read the task status file if
+        # the tasks are in approximately the same order as they were
+        # added by the workflow:
+        self.addOrder = []
+        self.labelMap = {}
+        self.headNodes = set()
+        self.tailNodes = set()
+        self.lock = threading.RLock()
+
+        # this event can be used to optionally accelerate the task cycle
+        # when running in modes where task can set this event on completion
+        # (ie. local mode but not sge), if this isn't set the normal polling
+        # cycle applies
+        self.isFinishedEvent = threading.Event()
+
+    @lockMethod
+    def isTaskPresent(self, namespace, label) :
+        return ((namespace, label) in self.labelMap)
+
+    @lockMethod
+    def getTask(self, namespace, label) :
+        if (namespace, label) in self.labelMap :
+            return self.labelMap[(namespace, label)]
+        return None
+
+    @lockMethod
+    def getHeadNodes(self) :
+        "all tasks with no parents"
+        return list(self.headNodes)
+
+    @lockMethod
+    def getTailNodes(self) :
+        "all tasks with no (runnable) children"
+        return list(self.tailNodes)
+
+    @lockMethod
+    def getAllNodes(self, namespace="") :
+        "get all nodes in this namespace"
+        retval = []
+        for (taskNamespace, taskLabel) in self.addOrder :
+            if namespace != taskNamespace : continue
+            node=self.labelMap[(taskNamespace, taskLabel)]
+            if node.isIgnoreThis : continue
+            retval.append(node)
+        return retval
+
+    def _isRunExhaustedNode(self, node, searched) :
+
+        # the fact that you're still searching means that it must have returned true last time:
+        if node in searched : return True
+        searched.add(node)
+
+        if not node.isIgnoreThis :
+            if not node.isDone() :
+                return False
+            if node.isComplete() :
+                for c in node.children :
+                    if not self._isRunExhaustedNode(c, searched) :
+                        return False
+        return True
+
+    @lockMethod
+    def isRunExhausted(self) :
+        """
+        Returns true if the run is as complete as possible due to errors
+        """
+
+        # searched is used to restrict the complexity of this
+        # operation on large graphs:
+        searched = set()
+        for node in self.getHeadNodes() :
+            if not self._isRunExhaustedNode(node,searched) :
+                return False
+        return True
+
+
+    @lockMethod
+    def isRunComplete(self) :
+        "returns true if run is complete and error free"
+        for node in self.labelMap.values():
+            if node.isIgnoreThis : continue
+            if not node.isComplete() :
+                return False
+        return True
+
+
+    def _getReadyTasksFromNode(self, node, ready, searched) :
+        "helper function for getReadyTasks"
+
+        if node.isIgnoreThis : return
+
+        if node in searched : return
+        searched.add(node)
+
+        if node.isReady() :
+            ready.add(node)
+        else:
+            if not node.isComplete() :
+                for c in node.parents :
+                    self._getReadyTasksFromNode(c, ready, searched)
+
+
+    @lockMethod
+    def getReadyTasks(self) :
+        """
+        Go through DAG from the tail nodes and find all tasks which
+        have all prerequisites completed:
+        """
+
+        completed = self.markCheckPointsComplete()
+        ready = set()
+        # searched is used to restrict the complexity of this
+        # operation on large graphs:
+        searched = set()
+        for node in self.getTailNodes() :
+            self._getReadyTasksFromNode(node, ready, searched)
+        return (list(ready), list(completed))
+
+
+    def _markCheckPointsCompleteFromNode(self, node, completed, searched) :
+        "helper function for markCheckPointsComplete"
+
+        if node.isIgnoreThis : return
+
+        if node in searched : return
+        searched.add(node)
+
+        if node.isComplete() : return
+
+        for c in node.parents :
+            self._markCheckPointsCompleteFromNode(c, completed, searched)
+
+        if (node.payload.type() == "command") and (node.payload.cmd.cmd is None) and (node.isReady()) :
+            node.setRunstate("complete")
+            completed.add(node)
+
+
+    @lockMethod
+    def markCheckPointsComplete(self) :
+        """
+        traverse from tail nodes up, marking any checkpoint tasks
+        (task.cmd=None) jobs that are ready as complete, return set
+        of newly completed tasks:
+        """
+        completed = set()
+        # searched is used to restrict the complexity of this
+        # operation on large graphs:
+        searched = set()
+        for node in self.getTailNodes() :
+            self._markCheckPointsCompleteFromNode(node, completed, searched)
+        return completed
+
+
+    @lockMethod
+    def addTask(self, namespace, label, payload, dependencies, isContinued=False) :
+        """
+        add new task to the DAG
+
+        isContinued indicates the task is being read from state history during a continuation run
+        """
+        # internal data structures use these separately, but for logging we
+        # create one string:
+        fullLabel = namespaceJoin(namespace, label)
+
+        # first check to see if task exists in DAG already, this is not allowed unless
+        # we are continuing a previous run, in which case it's allowed once:
+        if not isContinued and self.isTaskPresent(namespace, label):
+            if self.isContinue and self.labelMap[(namespace, label)].isContinued:
+                # confirm that task is a match, flip off the isContinued flag and return:
+                task = self.labelMap[(namespace, label)]
+                parentLabels = set([p.label for p in task.parents])
+                excPrefix = "Task: '%s' does not match previous definition defined in '%s'." % (fullLabel, self.taskInfoFile)
+                if task.payload.type() != payload.type() :
+                    msg = excPrefix + " New/old payload type: '%s'/'%s'" % (payload.type(), task.payload.type())
+                    raise Exception(msg)
+                if payload.isTaskStable :
+                    if (payload.type() == "command") and (str(task.payload.cmd) != str(payload.cmd)) :
+                        msg = excPrefix + " New/old command: '%s'/'%s'" % (str(payload.cmd), str(task.payload.cmd))
+                        if self.isForceContinue : self.flowLog(msg,logState=LogState.WARNING)
+                        else :                    raise Exception(msg)
+                    if (parentLabels != set(dependencies)) :
+                        msg = excPrefix + " New/old dependencies: '%s'/'%s'" % (",".join(dependencies), ",".join(parentLabels))
+                        if self.isForceContinue : self.flowLog(msg,logState=LogState.WARNING)
+                        else :                    raise Exception(msg)
+                if payload.type() == "command" :
+                    task.payload.cmd = payload.cmd
+                    task.payload.isCmdMakePath = payload.isCmdMakePath
+                task.isContinued = False
+                return
+            else:
+                raise Exception("Task: '%s' is already in TaskDAG" % (fullLabel))
+
+        task = TaskNode(self, self.lock, self.taskId, namespace, label, payload, isContinued, self.isFinishedEvent)
+
+        self.taskId += 1
+
+        self.addOrder.append((namespace, label))
+        self.labelMap[(namespace, label)] = task
+
+        for d in dependencies :
+            parent = self.getTask(namespace, d)
+            if parent is task :
+                raise Exception("Task: '%s' cannot specify its own task label as a dependency" % (fullLabel))
+            if parent is None :
+                raise Exception("Dependency: '%s' for task: '%s' does not exist in TaskDAG" % (namespaceJoin(namespace, d), fullLabel))
+            task.parents.add(parent)
+            parent.children.add(task)
+
+
+        if isContinued :
+            isReset=False
+            if label in self.resetTasks :
+                isReset=True
+            else :
+                for p in task.parents :
+                    if p.isReset :
+                        isReset = True
+                        break
+            if isReset :
+                task.setRunstate("waiting")
+                task.isReset=True
+
+        if not isContinued:
+            self.writeTaskInfo(task)
+            self.writeTaskStatus()
+
+        # determine if this is an ignoreTasksAfter node
+        if label in self.ignoreTasksAfter :
+            task.isIgnoreChildren = True
+
+        # determine if this is an ignoreTasksAfter descendent
+        for p in task.parents :
+            if p.isIgnoreChildren :
+                task.isIgnoreThis = True
+                task.isIgnoreChildren = True
+                break
+
+        # update headNodes
+        if len(task.parents) == 0 :
+            self.headNodes.add(task)
+
+        # update isAutoCompleted:
+        if (self.startFromTasks and
+            (label not in self.startFromTasks)) :
+            task.isAutoCompleted = True
+            for p in task.parents :
+                if not p.isAutoCompleted :
+                    task.isAutoCompleted = False
+                    break
+
+            #  in case of no-parents, also check sub-workflow node
+            if task.isAutoCompleted and (len(task.parents) == 0) and (namespace != ""):
+                wval=namespace.rsplit(namespaceSep,1)
+                if len(wval) == 2 :
+                    (workflowNamespace,workflowLabel)=wval
+                else :
+                    workflowNamespace=""
+                    workflowLabel=wval[0]
+                workflowParent = self.labelMap[(workflowNamespace, workflowLabel)]
+                if not workflowParent.isAutoCompleted :
+                    task.isAutoCompleted = False
+
+        if task.isAutoCompleted :
+            task.setRunstate("complete")
+
+        # update tailNodes:
+        if not task.isIgnoreThis :
+            self.tailNodes.add(task)
+            for p in task.parents :
+                if p in self.tailNodes :
+                    self.tailNodes.remove(p)
+
+        # check dependency runState consistency:
+        if task.isDone() :
+            for p in task.parents :
+                if p.isIgnoreThis : continue
+                if p.isComplete() : continue
+                raise Exception("Task: '%s' has invalid continuation state. Task dependencies are incomplete")
+
+
+
+    @lockMethod
+    def writeTaskStatus(self) :
+        """
+        (atomic on *nix) update of the runstate and errorstate for all tasks
+        """
+        # don't write task status during dry runs:
+        if self.isDryRun : return
+
+        tmpFile = self.taskStateFile + ".update.incomplete"
+        tmpFp = open(tmpFile, "w")
+        tmpFp.write(taskStateHeader())
+        for (namespace, label) in self.addOrder :
+            node = self.labelMap[(namespace, label)]
+            runstateUpdateTimeStr = timeStampToTimeStr(node.runstateUpdateTimeStamp)
+            tmpFp.write("%s\t%s\t%s\t%i\t%s\n" % (label, namespace, node.runstate, node.errorstate, runstateUpdateTimeStr))
+        tmpFp.close()
+
+        forceRename(tmpFile, self.taskStateFile)
+
+
+    @lockMethod
+    def getTaskStatus(self) :
+        """
+        Enumerate status of command tasks (but look at sub-workflows to determine if specification is complete)
+        """
+
+        val = Bunch(waiting=0, queued=0, running=0, complete=0, error=0, isAllSpecComplete=True,
+                    longestQueueSec=0, longestRunSec=0, longestQueueName="", longestRunName="")
+
+        currentSec = time.time()
+        for (namespace, label) in self.addOrder :
+            node = self.labelMap[(namespace, label)]
+            # special check just for workflow tasks:
+            if node.payload.type() == "workflow" :
+                if not node.runStatus.isSpecificationComplete.isSet() :
+                    val.isAllSpecComplete = False
+
+                # the rest of this enumeration is for command tasks only:
+                continue
+
+            taskTime = int(currentSec - node.runstateUpdateTimeStamp)
+
+            if node.runstate == "waiting" :
+                val.waiting += 1
+            elif node.runstate == "queued" :
+                val.queued += 1
+                if val.longestQueueSec < taskTime :
+                    val.longestQueueSec = taskTime
+                    val.longestQueueName = node.fullLabel()
+            elif node.runstate == "running" :
+                val.running += 1
+                if val.longestRunSec < taskTime :
+                    val.longestRunSec = taskTime
+                    val.longestRunName = node.fullLabel()
+            elif node.runstate == "complete" :
+                val.complete += 1
+            elif node.runstate == "error" :
+                val.error += 1
+
+        return val
+
+
+    @lockMethod
+    def writeTaskInfo(self, task) :
+        """
+        appends a description of new tasks to the taskInfo file
+        """
+        depstring = ""
+        if len(task.parents) :
+            depstring = ",".join([p.label for p in task.parents])
+
+        cmdstring = ""
+        nCores = "0"
+        memMb = "0"
+        priority = "0"
+        isForceLocal = "0"
+        payload = task.payload
+        cwdstring = ""
+        if   payload.type() == "command" :
+            cmdstring = str(payload.cmd)
+            nCores = str(payload.nCores)
+            memMb = str(payload.memMb)
+            priority = str(payload.priority)
+            isForceLocal = boolToStr(payload.isForceLocal)
+            cwdstring = payload.cmd.cwd
+        elif payload.type() == "workflow" :
+            cmdstring = payload.name()
+        else :
+            assert 0
+        taskline = "\t".join((task.label, task.namespace, payload.type(),
+                            nCores, memMb, priority,
+                            isForceLocal, depstring, cwdstring, cmdstring))
+        fp = open(self.taskInfoFile, "a")
+        fp.write(taskline + "\n")
+        fp.close()
+
+
+
+# workflowRunner:
+#
+
+
+# special exception used for the case where pyflow data dir is already in use:
+#
+class DataDirException(Exception) :
+    def __init__(self, msg) :
+        Exception.__init__(self)
+        self.msg = msg
+
+
+
+class WorkflowRunnerThreadSharedData(object) :
+    """
+    All data used by the WorkflowRunner which will be constant over
+    the lifetime of a TaskManager instance. All of the information in
+    this class will be accessed by both threads without locking.
+    """
+
+    def __init__(self) :
+        self.lock = threading.RLock()
+        self.pid = os.getpid()
+        self.runcount = 0
+        self.cwd = os.path.abspath(os.getcwd())
+
+        self.markFile = None
+
+        # we potentially have to log before the logfile is setup (eg
+        # an exception is thrown reading run parameters), so provide
+        # an explicit notification that there's no log file:
+        self.flowLogFp = None
+
+        self.warningLogFp = None
+        self.errorLogFp = None
+
+        self.resetRun()
+
+        # two elements required to implement a nohup-like behavior:
+        self.isHangUp = threading.Event()
+        self._isStderrAlive = True
+
+
+    @staticmethod
+    def _validateFixParam(param):
+        """
+        validate and refine raw run() parameters for use by workflow
+        """
+
+        param.mailTo = setzer(param.mailTo)
+        param.schedulerArgList = lister(param.schedulerArgList)
+        if param.successMsg is not None :
+            if not isString(param.successMsg) :
+                raise Exception("successMsg argument to WorkflowRunner.run() is not a string")
+
+        # create combined task retry settings manager:
+        param.retry=RetryParam(param.mode,
+                               param.retryMax,
+                               param.retryWait,
+                               param.retryWindow,
+                               param.retryMode)
+
+        # setup resource parameters
+        if param.nCores is None :
+            param.nCores = RunMode.data[param.mode].defaultCores
+
+        # ignore total available memory settings in non-local modes:
+        if param.mode != "local" :
+            param.memMb = "unlimited"
+
+        if param.mode == "sge" :
+            if siteConfig.maxSGEJobs != "unlimited" :
+                if ((param.nCores == "unlimited") or
+                    (int(param.nCores) > int(siteConfig.maxSGEJobs))) :
+                    param.nCores = int(siteConfig.maxSGEJobs)
+
+        if param.nCores != "unlimited" :
+            param.nCores = int(param.nCores)
+            if param.nCores < 1 :
+                raise Exception("Invalid run mode nCores argument: %s. Value must be 'unlimited' or an integer no less than 1" % (param.nCores))
+
+        if param.memMb is None :
+            if param.nCores == "unlimited" :
+                param.memMb = "unlimited"
+            mpc = RunMode.data[param.mode].defaultMemMbPerCore
+            if mpc == "unlimited" :
+                param.memMb = "unlimited"
+            else :
+                param.memMb = mpc * param.nCores
+        elif param.memMb != "unlimited" :
+            param.memMb = int(param.memMb)
+            if param.memMb < 1 :
+                raise Exception("Invalid run mode memMb argument: %s. Value must be 'unlimited' or an integer no less than 1" % (param.memMb))
+
+        # verify/normalize input settings:
+        if param.mode not in RunMode.data.keys() :
+            raise Exception("Invalid mode argument '%s'. Accepted modes are {%s}." \
+                            % (param.mode, ",".join(RunMode.data.keys())))
+
+        if param.mode == "sge" :
+            # TODO not-portable to windows (but is this a moot point -- all of sge mode is non-portable, no?):
+            def checkSgeProg(prog) :
+                proc = subprocess.Popen(("which", prog), stdout=open(os.devnull, "w"), shell=False)
+                retval = proc.wait()
+                if retval != 0 : raise Exception("Run mode is sge, but no %s in path" % (prog))
+            checkSgeProg("qsub")
+            checkSgeProg("qstat")
+
+
+        stateDir = os.path.join(param.dataDir, "state")
+        if param.isContinue == "Auto" :
+            param.isContinue = os.path.exists(stateDir)
+
+        if param.isContinue :
+            if not os.path.exists(stateDir) :
+                raise Exception("Cannot continue run without providing a pyflow dataDir containing previous state.: '%s'" % (stateDir))
+
+        for email in param.mailTo :
+            if not verifyEmailAddy(email):
+                raise Exception("Invalid email address: '%s'" % (email))
+
+
+
+    def _setCustomLogs(self) :
+        if (self.warningLogFp is None) and (self.param.warningLogFile is not None) :
+            self.warningLogFp = open(self.param.warningLogFile,"w")
+
+        if (self.errorLogFp is None) and (self.param.errorLogFile is not None) :
+            self.errorLogFp = open(self.param.errorLogFile,"w")
+
+
+
+    def setupNewRun(self, param) :
+        self.param = param
+
+        # setup log file-handle first, then run the rest of parameter validation:
+        # (hold this file open so that we can still log if pyflow runs out of filehandles)
+        self.param.dataDir = os.path.abspath(self.param.dataDir)
+        self.param.dataDir = os.path.join(self.param.dataDir, "pyflow.data")
+        logDir = os.path.join(self.param.dataDir, "logs")
+        ensureDir(logDir)
+        self.flowLogFile = os.path.join(logDir, "pyflow_log.txt")
+        self.flowLogFp = open(self.flowLogFile, "a")
+
+        # run remaining validation
+        self._validateFixParam(self.param)
+
+        #  initial per-run data
+        self.taskErrors = set()  # this set actually contains every task that failed -- tasks contain all of their own error info
+        self.isTaskManagerException = False
+
+        # create data directory if it does not exist
+        ensureDir(self.param.dataDir)
+
+        # check whether a process already exists:
+        self.markFile = os.path.join(self.param.dataDir, "active_pyflow_process.txt")
+        if os.path.exists(self.markFile) :
+            # Non-conventional logging situation -- another pyflow process is possibly using this same data directory, so we want
+            # to log to stderr (even if the user has set isQuiet) and not interfere with the other process's log
+            self.flowLogFp = None
+            self.param.isQuiet = False
+            msg = [ "Can't initialize pyflow run because the data directory appears to be in use by another process.",
+                    "\tData directory: '%s'" % (self.param.dataDir),
+                    "\tIt is possible that a previous process was abruptly interrupted and did not clean up properly. To determine if this is",
+                    "\tthe case, please refer to the file '%s'" % (self.markFile),
+                    "\tIf this file refers to a non-running process, delete the file and relaunch pyflow,",
+                    "\totherwise, specify a new data directory. At the API-level this can be done with the dataDirRoot option." ]
+            self.markFile = None  # this keeps pyflow from deleting this file, as it normally would on exit
+            raise DataDirException(msg)
+        else :
+            mfp = open(self.markFile, "w")
+            msg = """
+This file provides details of the pyflow instance currently using this data directory.
+During normal pyflow run termination (due to job completion, error, SIGINT, etc...),
+this file should be deleted. If this file is present it should mean either:
+(1) the data directory is still in use by a running workflow
+(2) a sudden job failure occurred that prevented normal run termination
+
+The associated pyflow job details are as follows:
+"""
+            mfp.write(msg + "\n")
+            for line in self.getInfoMsg() :
+                mfp.write(line + "\n")
+            mfp.write("\n")
+            mfp.close()
+
+        stateDir = os.path.join(self.param.dataDir, "state")
+        ensureDir(stateDir)
+
+        # setup other instance data:
+        self.runcount += 1
+
+        # initialize directories
+        self.wrapperLogDir = os.path.join(logDir, "tmp", "taskWrapperLogs")
+        ensureDir(self.wrapperLogDir)
+        stackDumpLogDir = os.path.join(logDir, "tmp", "stackDumpLog")
+        ensureDir(stackDumpLogDir)
+
+        # initialize filenames:
+        taskStateFileName = "pyflow_tasks_runstate.txt"
+        taskInfoFileName = "pyflow_tasks_info.txt"
+
+        self.taskStdoutFile = os.path.join(logDir, "pyflow_tasks_stdout_log.txt")
+        self.taskStderrFile = os.path.join(logDir, "pyflow_tasks_stderr_log.txt")
+        self.taskStateFile = os.path.join(stateDir, taskStateFileName)
+        self.taskInfoFile = os.path.join(stateDir, taskInfoFileName)
+        self.taskDotScriptFile = os.path.join(stateDir, "make_pyflow_task_graph.py")
+
+        self.stackDumpLogFile = os.path.join(stackDumpLogDir, "pyflow_stack_dump.txt")
+
+        # empty file:
+        if not self.param.isContinue:
+            fp = open(self.taskInfoFile, "w")
+            fp.write(taskInfoHeader())
+            fp.close()
+
+        self._setCustomLogs()
+
+        # finally write dot task graph creation script:
+        #
+        # this could fail because of script permission settings, buk it is not critical for
+        # workflow completion so we get away with a warning
+        try :
+            writeDotScript(self.taskDotScriptFile, taskInfoFileName, taskStateFileName, self.param.workflowClassName)
+        except OSError:
+            msg = ["Failed to write task graph visualization script to %s" % (self.taskDotScriptFile)]
+            self.flowLog(msg,logState=LogState.WARNING)
+
+
+    def resetRun(self) :
+        """
+        Anything that needs to be cleaned up at the end of a run
+
+        Right now this just make sure we don't log to the previous run's log file
+        """
+        self.flowLogFile = None
+        self.param = None
+        if self.flowLogFp is not None :
+            self.flowLogFp.close()
+            self.flowLogFp = None
+
+        if self.warningLogFp is not None :
+            self.warningLogFp.close()
+            self.warningLogFp = None
+
+        if self.errorLogFp is not None :
+            self.errorLogFp.close()
+            self.errorLogFp = None
+
+        if self.markFile is not None :
+            if os.path.exists(self.markFile) : os.unlink(self.markFile)
+
+        self.markFile = None
+
+    def getRunid(self) :
+        return "%s_%s" % (self.pid, self.runcount)
+
+    @lockMethod
+    def setTaskError(self, task) :
+        self.taskErrors.add(task)
+
+    @lockMethod
+    def isTaskError(self) :
+        return (len(self.taskErrors) != 0)
+
+    def isTaskSubmissionActive(self) :
+        """
+        wait() pollers need to know if task submission has been
+        shutdown to implement sane behavior.
+        """
+        return (not self.isTaskError())
+
+    @lockMethod
+    def setTaskManagerException(self) :
+        self.isTaskManagerException = True
+
+    @lockMethod
+    def flowLog(self, msg, linePrefix=None, logState = LogState.INFO) :
+        linePrefixOut = "[%s]" % (self.getRunid())
+        if linePrefix is not None :
+            linePrefixOut += " " + linePrefix
+
+        if (logState == LogState.ERROR) or (logState == LogState.WARNING) :
+            linePrefixOut += " [" + LogState.toString(logState) + "]"
+
+        ofpList = []
+        isAddStderr = (self._isStderrAlive and ((self.flowLogFp is None) or (self.param is None) or (not self.param.isQuiet)))
+        if isAddStderr:
+            ofpList.append(sys.stderr)
+        if self.flowLogFp is not None :
+            ofpList.append(self.flowLogFp)
+
+        # make a last ditch effort to open the special error logs if these are not available already:
+        try :
+            self._setCustomLogs()
+        except :
+            pass
+
+        if (self.warningLogFp is not None) and (logState == LogState.WARNING) :
+            ofpList.append(self.warningLogFp)
+        if (self.errorLogFp is not None) and (logState == LogState.ERROR) :
+            ofpList.append(self.errorLogFp)
+
+        if len(ofpList) == 0 : return
+        retval = log(ofpList, msg, linePrefixOut)
+
+        # check if stderr stream failed. If so, turn it off for the remainder of run (assume terminal hup):
+        if isAddStderr and (not retval[0]) :
+            if self.isHangUp.isSet() :
+                self._isStderrAlive = False
+
+
+    def getInfoMsg(self) :
+        """
+        return a string array with general stats about this run
+        """
+
+        msg = [ "%s\t%s" % ("pyFlowClientWorkflowClass:", self.param.workflowClassName),
+              "%s\t%s" % ("pyFlowVersion:", __version__),
+              "%s\t%s" % ("pythonVersion:", pythonVersion),
+              "%s\t%s" % ("Runid:", self.getRunid()),
+              "%s\t%s UTC" % ("RunStartTime:", self.param.logRunStartTime),
+              "%s\t%s UTC" % ("NotificationTime:", timeStrNow()),
+              "%s\t%s" % ("HostName:", siteConfig.getHostName()),
+              "%s\t%s" % ("WorkingDir:", self.cwd),
+              "%s\t%s" % ("DataDir:", self.param.dataDir),
+              "%s\t'%s'" % ("ProcessCmdLine:", cmdline()) ]
+        return msg
+
+
+    def emailNotification(self, msgList, emailErrorLog=None) :
+        #
+        # email addy might not be setup yet:
+        #
+        # if errorLog is specified, then an email send exception will
+        # be handled and logged, otherwise the exception will be re-raised
+        # down to the caller.
+        #
+
+        if self.param is None : return
+        if len(self.param.mailTo) == 0 : return
+
+        if not isLocalSmtp() :
+            if emailErrorLog :
+                msg = ["email notification failed, no local smtp server"]
+                emailErrorLog(msg,logState=LogState.WARNING)
+            return
+
+        mailTo = sorted(list(self.param.mailTo))
+        subject = "pyflow notification from %s run: %s" % (self.param.workflowClassName, self.getRunid())
+        msg = msgListToMsg(msgList)
+        fullMsgList = ["Message:",
+                      '"""',
+                      msg,
+                      '"""']
+        fullMsgList.extend(self.getInfoMsg())
+
+        import smtplib
+        try:
+            sendEmail(mailTo, siteConfig.mailFrom, subject, fullMsgList)
+        except smtplib.SMTPException :
+            if emailErrorLog is None : raise
+            msg = ["email notification failed"]
+            eMsg = lister(getExceptionMsg())
+            msg.extend(eMsg)
+            emailErrorLog(msg,logState=LogState.WARNING)
+
+
+
+class WorkflowRunner(object) :
+    """
+    This object is designed to be inherited by a class in
+    client code. This inheriting class can override the
+    L{workflow()<WorkflowRunner.workflow>} method to define the
+    tasks that need to be run and their dependencies.
+
+    The inheriting class defining a workflow can be executed in
+    client code by calling the WorkflowRunner.run() method.
+    This method provides various run options such as whether
+    to run locally or on sge.
+    """
+
+
+    _maxWorkflowRecursion = 30
+    """
+    This limit protects against a runaway forkbomb in case a
+    workflow task recursively adds itself w/o termination:
+    """
+
+
+    def run(self,
+            mode="local",
+            dataDirRoot=".",
+            isContinue=False,
+            isForceContinue=False,
+            nCores=None,
+            memMb=None,
+            isDryRun=False,
+            retryMax=2,
+            retryWait=90,
+            retryWindow=360,
+            retryMode="nonlocal",
+            mailTo=None,
+            updateInterval=60,
+            schedulerArgList=None,
+            isQuiet=False,
+            warningLogFile=None,
+            errorLogFile=None,
+            successMsg=None,
+            startFromTasks=None,
+            ignoreTasksAfter=None,
+            resetTasks=None) :
+        """
+        Call this method to execute the workflow() method overridden
+        in a child class and specify the resources available for the
+        workflow to run.
+
+        Task retry behavior: Retry attempts will be made per the
+        arguments below for distributed workflow runs (eg. sge run
+        mode). Note this means that retries will be attempted for
+        tasks with an 'isForceLocal' setting during distributed runs.
+
+        Task error behavior: When a task error occurs the task
+        manager stops submitting new tasks and allows all currently
+        running tasks to complete. Note that in this case 'task error'
+        means that the task could not be completed after exhausting
+        attempted retries.
+
+        Workflow exception behavior: Any exceptions thrown from the
+        python code of classes derived from WorkflowRunner will be
+        logged and trigger notification (e.g. email). The exception
+        will not come down to the client's stack. In sub-workflows the
+        exception is handled exactly like a task error (ie. task
+        submission is shut-down and remaining tasks are allowed to
+        complete). An exception in the master workflow will lead to
+        workflow termination without waiting for currently running
+        tasks to finish.
+
+        @return: 0 if all tasks completed successfully and 1 otherwise
+
+        @param mode: Workflow run mode. Current options are (local|sge)
+
+        @param dataDirRoot: All workflow data is written to
+                       {dataDirRoot}/pyflow.data/ These include
+                       workflow/task logs, persistent task state data,
+                       and summary run info. Two workflows cannot
+                       simultaneously use the same dataDir.
+
+        @param isContinue: If True, continue workflow from a previous
+                      incomplete run based on the workflow data
+                      files. You must use the same dataDirRoot as a
+                      previous run for this to work.  Set to 'Auto' to
+                      have the run continue only if the previous
+                      dataDir exists.  (default: False)
+
+        @param isForceContinue: Only used if isContinue is not False. Normally
+                           when isContinue is run, the commands of
+                           completed tasks are checked to ensure they
+                           match. When isForceContinue is true,
+                           failing this check is reduced from an error
+                           to a warning
+
+        @param nCores: Total number of cores available, or 'unlimited', sge
+                  is currently configured for a maximum job count of
+                  %s, any value higher than this in sge mode will be
+                  reduced to the maximum.  (default: 1 for local mode,
+                  %s for sge mode)
+
+        @param memMb: Total memory available (in megabytes), or 'unlimited',
+                 Note that this value will be ignored in non-local modes
+                 (such as sge), because in this case total memory available
+                 is expected to be known by the scheduler for each node in its
+                 cluster. (default: %i*nCores for local mode, 'unlimited'
+                 for sge mode)
+
+        @param isDryRun: List the commands to be executed without running
+                    them. Note that recursive and dynamic workflows
+                    will potentially have to account for the fact that
+                    expected files will be missing -- here 'recursive
+                    workflow' refers to any workflow which uses the
+                    addWorkflowTask() method, and 'dynamic workflow'
+                    refers to any workflow which uses the
+                    waitForTasks() method. These types of workflows
+                    can query this status with the isDryRun() to make
+                    accomadations.  (default: False)
+
+        @param retryMax: Maximum number of task retries
+
+        @param retryWait: Delay (in seconds) before resubmitting task
+
+        @param retryWindow: Maximum time (in seconds) after the first task
+                       submission in which retries are allowed. A value of
+                       zero or less puts no limit on the time when retries
+                       will be attempted. Retries are always allowed (up to
+                       retryMax times), for failed make jobs.
+
+        @param retryMode: Modes are 'nonlocal' and 'all'. For 'nonlocal'
+                retries are not attempted in local run mode. For 'all'
+                retries are attempted for any run mode. The default mode
+                is 'nonolocal'.
+
+        @param mailTo: An email address or container of email addresses. Notification
+                  will be sent to each email address when
+                  either (1) the run successfully completes (2) the
+                  first task error occurs or (3) an unhandled
+                  exception is raised. The intention is to send one
+                  status message per run() indicating either success
+                  or the reason for failure. This should occur for all
+                  cases except a host hardware/power failure.  Note
+                  that mail comes from '%s' (configurable),
+                  which may be classified as junk-mail by your system.
+
+        @param updateInterval: How often (in minutes) should pyflow log a
+                          status update message summarizing the run
+                          status.  Set this to zero or less to turn
+                          the update off.
+
+        @param schedulerArgList: A list of arguments can be specified to be
+                            passed on to an external scheduler when non-local
+                            modes are used (e.g. in sge mode you could pass
+                            schedulerArgList=['-q','work.q'] to put the whole
+                            pyflow job into the sge work.q queue)
+
+        @param isQuiet: Don't write any logging output to stderr (but still write
+                   log to pyflow_log.txt)
+
+        @param warningLogFile: Replicate all warning messages to the specified file. Warning
+                            messages will still appear in the standard logs, this
+                            file will contain a subset of the log messages pertaining to
+                            warnings only.
+
+        @param errorLogFile: Replicate all error messages to the specified file. Error
+                            messages will still appear in the standard logs, this
+                            file will contain a subset of the log messages pertaining to
+                            errors only. It should be empty for a successful run.
+
+        @param successMsg: Provide a string containing a custom message which
+                           will be prepended to pyflow's standard success
+                           notification. This message will appear in the log
+                           and any configured notifications (e.g. email). The
+                           message may contain linebreaks.
+
+        @param startFromTasks: A task label or container of task labels. Any tasks which
+                               are not in this set or descendants of this set will be marked as
+                               completed.
+        @type startFromTasks: A single string, or set, tuple or list of strings
+
+        @param ignoreTasksAfter: A task label or container of task labels. All descendants
+                                 of these task labels will be ignored.
+        @type ignoreTasksAfter: A single string, or set, tuple or list of strings
+
+        @param resetTasks: A task label or container of task labels. These tasks and all
+                           of their descendants will be reset to the "waiting" state to be re-run.
+                           Note this option will only affect a workflow which has been continued
+                           from a previous run. This will not override any nodes altered by the
+                           startFromTasks setting in the case that both options are used together.
+        @type resetTasks: A single string, or set, tuple or list of strings
+        """
+
+        # Setup pyflow signal handlers:
+        #
+        inHandlers = Bunch(isSet=False)
+
+        class SigTermException(Exception) : pass
+
+        def sigtermHandler(_signum, _frame) :
+            raise SigTermException
+
+        def sighupHandler(_signum, _frame) :
+            self._warningLog("pyflow recieved hangup signal. pyflow will continue, but this signal may still interrupt running tasks.")
+            # tell cdata to turn off any tty writes:
+            self._cdata().isHangUp.set()
+
+        def set_pyflow_sig_handlers() :
+            import signal
+            if not inHandlers.isSet :
+                inHandlers.sigterm = signal.getsignal(signal.SIGTERM)
+                if not isWindows() :
+                    inHandlers.sighup = signal.getsignal(signal.SIGHUP)
+                inHandlers.isSet = True
+            try:
+                signal.signal(signal.SIGTERM, sigtermHandler)
+                if not isWindows() :
+                    signal.signal(signal.SIGHUP, sighupHandler)
+            except ValueError:
+                if isMainThread() :
+                    raise
+                else :
+                    self._warningLog("pyflow has not been initialized on main thread, all custom signal handling disabled")
+
+
+        def unset_pyflow_sig_handlers() :
+            import signal
+            if not inHandlers.isSet : return
+            try :
+                signal.signal(signal.SIGTERM, inHandlers.sigterm)
+                if not isWindows() :
+                    signal.signal(signal.SIGHUP, inHandlers.sighup)
+            except ValueError:
+                if isMainThread() :
+                    raise
+                else:
+                    pass
+
+
+        # if return value is somehow not set after this then something bad happened, so init to 1:
+        retval = 1
+        try:
+            set_pyflow_sig_handlers()
+
+            def exceptionMessaging(prefixMsg=None) :
+                msg = lister(prefixMsg)
+                eMsg = lister(getExceptionMsg())
+                msg.extend(eMsg)
+                self._notify(msg,logState=LogState.ERROR)
+
+            try:
+                self.runStartTimeStamp = time.time()
+                self.updateInterval = int(updateInterval)
+                # a container to haul all the run() options around in:
+                param = Bunch(mode=mode,
+                              dataDir=dataDirRoot,
+                              isContinue=isContinue,
+                              isForceContinue=isForceContinue,
+                              nCores=nCores,
+                              memMb=memMb,
+                              isDryRun=isDryRun,
+                              retryMax=retryMax,
+                              retryWait=retryWait,
+                              retryWindow=retryWindow,
+                              retryMode=retryMode,
+                              mailTo=mailTo,
+                              logRunStartTime=timeStampToTimeStr(self.runStartTimeStamp),
+                              workflowClassName=self._whoami(),
+                              schedulerArgList=schedulerArgList,
+                              isQuiet=isQuiet,
+                              warningLogFile=warningLogFile,
+                              errorLogFile=errorLogFile,
+                              successMsg=successMsg,
+                              startFromTasks=setzer(startFromTasks),
+                              ignoreTasksAfter=setzer(ignoreTasksAfter),
+                              resetTasks=setzer(resetTasks))
+                retval = self._runWorkflow(param)
+
+            except SigTermException:
+                msg = "Received termination signal, shutting down running tasks..."
+                self._killWorkflow(msg)
+            except KeyboardInterrupt:
+                msg = "Keyboard Interrupt, shutting down running tasks..."
+                self._killWorkflow(msg)
+            except DataDirException, e:
+                self._notify(e.msg,logState=LogState.ERROR)
+            except:
+                exceptionMessaging()
+                raise
+
+        finally:
+            # last set: disconnect the workflow log:
+            self._cdata().resetRun()
+            unset_pyflow_sig_handlers()
+
+        return retval
+
+
+    # configurable elements of docstring
+    run.__doc__ = run.__doc__ % (siteConfig.maxSGEJobs,
+                                 RunMode.data["sge"].defaultCores,
+                                 siteConfig.defaultHostMemMbPerCore,
+                                 siteConfig.mailFrom)
+
+
+
+    # protected methods which can be called within the workflow method:
+
+    def addTask(self, label, command=None, cwd=None, env=None, nCores=1,
+                memMb=siteConfig.defaultTaskMemMb,
+                dependencies=None, priority=0,
+                isForceLocal=False, isCommandMakePath=False, isTaskStable=True,
+                mutex=None,
+                retryMax=None, retryWait=None, retryWindow=None, retryMode=None) :
+        """
+        Add task to workflow, including resource requirements and
+        specification of dependencies. Dependency tasks must already
+        exist in the workflow.
+
+        @return: The 'label' argument is returned without modification.
+
+
+        @param label: A string used to identify each task. The label must
+                 be composed of only ascii letters, digits,
+                 underscores and dashes (ie. /[A-Za-z0-9_-]+/). The
+                 label must also be unique within the workflow, and
+                 non-empty.
+
+        @param command: The task command. Commands can be: (1) a shell
+                 string (2) an iterable container of strings (argument
+                 list) (3) None. In all cases strings must not contain
+                 newline characters. A single string is typically used
+                 for commands that require shell features (such as
+                 pipes), an argument list can be used for any other
+                 commands, this is often a useful way to simplify
+                 quoting issues or to submit extremely long
+                 commands. The default command (None), can be used to
+                 create a 'checkpoint', ie. a task which does not run
+                 anything, but provides a label associated with the
+                 completion of a set of dependencies.
+
+        @param cwd: Specify current working directory to use for
+                 command execution. Note that if submitting the
+                 command as an argument list (as opposed to a shell
+                 string) the executable (arg[0]) is searched for
+                 before changing the working directory, so you cannot
+                 specify the executable relative to the cwd
+                 setting. If submitting a shell string command this
+                 restriction does not apply.
+
+        @param env: A map of environment variables for this task, for
+                 example 'env={"PATH": "/usr/bin"}'. When env is set
+                 to None (the default) the environment of the pyflow
+                 client process is used.
+
+        @param nCores: Number of cpu threads required
+
+        @param memMb: Amount of memory required (in megabytes)
+
+        @param dependencies: A task label or container of task labels specifying all dependent
+                        tasks. Dependent tasks must already exist in
+                        the workflow.
+        @type dependencies: A single string, or set, tuple or list of strings
+
+
+        @param priority: Among all tasks which are eligible to run at
+                 the same time, launch tasks with higher priority
+                 first. this value can be set from[-100,100]. Note
+                 that this will strongly control the order of task
+                 launch on a local run, but will only control task
+                 submission order to a secondary scheduler (like
+                 sge). All jobs with the same priority are already
+                 submitted in order from highest to lowest nCores
+                 requested, so there is no need to set priorities to
+                 replicate this behavior. The taskManager can start
+                 executing tasks as soon as each addTask() method is
+                 called, so lower-priority tasks may be launched first
+                 if they are specified first in the workflow.
+
+        @param isForceLocal: Force this task to run locally when a
+	         distributed task mode is used. This can be used to
+	         launch very small jobs outside of the sge queue. Note
+	         that 'isForceLocal' jobs launched during a non-local
+	         task mode are not subject to resource management, so
+	         it is important that these represent small
+	         jobs. Tasks which delete, move or touch a small
+	         number of files are ideal for this setting.
+
+        @param isCommandMakePath: If true, command is assumed to be a
+                 path containing a makefile. It will be run using
+                 make/qmake according to the run's mode and the task's
+                 isForceLocal setting
+
+        @param isTaskStable: If false, indicates that the task command
+                 and/or dependencies may change if the run is
+                 interrupted and restarted. A command marked as
+                 unstable will not be checked to make sure it matches
+                 its previous definition during run continuation.
+                 Unstable examples: command contains a date/time, or
+                 lists a set of files which are deleted at some point
+                 in the workflow, etc.
+
+        @param mutex: Provide an optional id associated with a pyflow
+                 task mutex. For all tasks with the same mutex id, no more
+                 than one will be run at once. Id name must follow task id
+                 restrictions. Mutex ids are global across all recursively
+                 invoked workflows.
+                 Example use case: This feature has been added as a simpler
+                 alternative to file locking, to ensure sequential, but not
+                 ordered, access to a file.
+
+        @param retryMax: The number of times this task will be retried
+                 after failing. If defined, this overrides the workflow
+                 retryMax value.
+
+        @param retryWait: The number of seconds to wait before relaunching
+                 a failed task. If defined, this overrides the workflow
+                 retryWait value.
+
+        @param retryWindow: The number of seconds after job submission in
+                 which retries will be attempted for non-make jobs. A value of
+                 zero or less causes retries to be attempted anytime after
+                 job submission. If defined, this overrides the workflow
+                 retryWindow value.
+
+        @param retryMode: Modes are 'nonlocal' and 'all'. For 'nonlocal'
+                retries are not attempted in local run mode. For 'all'
+                retries are attempted for any run mode. If defined, this overrides
+                the workflow retryMode value.
+        """
+
+        self._requireInWorkflow()
+
+        #### Canceled plans to add deferred dependencies:
+        # # deferredDependencies -- A container of labels specifying dependent
+        # #                         tasks which have not yet been added to the
+        # #                         workflow. In this case the added task will
+        # #                         wait for the dependency to be defined *and*
+        # #                         complete. Avoid these in favor or regular
+        # #                         dependencies if possible.
+
+        # sanitize bools:
+        isForceLocal = argToBool(isForceLocal)
+        isCommandMakePath = argToBool(isCommandMakePath)
+
+        # sanitize ints:
+        nCores = int(nCores)
+        memMb = int(memMb)
+        priority = int(priority)
+        if (priority > 100) or (priority < -100) :
+            raise Exception("priority must be an integer in the range [-100,100]")
+
+        # sanity check label:
+        WorkflowRunner._checkTaskLabel(label)
+
+        fullLabel = namespaceJoin(self._getNamespace(), label)
+
+        # verify/sanitize command:
+        cmd = Command(command, cwd, env)
+
+        # deal with command/resource relationship:
+        if cmd.cmd is None :
+            nCores = 0
+            memMb = 0
+        else:
+            if nCores <= 0 :
+                raise Exception("Unexpected core requirement for task: '%s' nCores: %i" % (fullLabel, nCores))
+            if memMb <= 0:
+                raise Exception("Unexpected memory requirement for task: '%s' memory: %i (megabytes)" % (fullLabel, memMb))
+
+
+        if (self._cdata().param.nCores != "unlimited") and (nCores > self._cdata().param.nCores) :
+            raise Exception("Task core requirement exceeds full available resources")
+
+        if (self._cdata().param.memMb != "unlimited") and (memMb > self._cdata().param.memMb) :
+            raise Exception("Task memory requirement exceeds full available resources")
+
+        # check that make path commands point to a directory:
+        #
+        if isCommandMakePath :
+            if cmd.type != "str" :
+                raise Exception("isCommandMakePath is set, but no path is provided in task: '%s'" % (fullLabel))
+            cmd.cmd = os.path.abspath(cmd.cmd)
+
+        # sanitize mutex option
+        if mutex is not None :
+            WorkflowRunner._checkTaskLabel(mutex)
+
+        task_retry = self._cdata().param.retry.getTaskCopy(retryMax, retryWait, retryWindow, retryMode)
+
+        # private _addTaskCore gets hijacked in recursive workflow submission:
+        #
+        payload = CmdPayload(fullLabel, cmd, nCores, memMb, priority, isForceLocal, isCommandMakePath, isTaskStable, mutex, task_retry)
+        self._addTaskCore(self._getNamespace(), label, payload, dependencies)
+        return label
+
+
+
+
+    def addWorkflowTask(self, label, workflowRunnerInstance, dependencies=None) :
+        """
+        Add another WorkflowRunner instance as a task to this
+        workflow. The added Workflow's workflow() method will be
+        called once the dependencies specified in this call have
+        completed. Once started, all of the submitted workflow's
+        method calls (like addTask) will be placed into the enclosing
+        workflow instance and bound by the run parameters of the
+        enclosing workflow.
+
+        This task will be marked complete once the submitted workflow's
+        workflow() method has finished, and any tasks it initiated have
+        completed.
+
+        Note that all workflow tasks will have their own tasks namespaced
+        with the workflow task label. This namespace is recursive in the
+        case that you add workflow tasks which add their own workflow
+        tasks, etc.
+
+        Note that the submitted workflow instance will be deep copied
+        before being altered in any way.
+
+        @return: The 'label' argument is returned without modification.
+
+        @param label: A string used to identify each task. The label must
+                 be composed of only ascii letters, digits,
+                 underscores and dashes (ie. /[A-Za-z0-9_-]+/). The
+                 label must also be unique within the workflow, and
+                 non-empty.
+
+        @param workflowRunnerInstance: A L{WorkflowRunner} instance.
+
+        @param dependencies: A label string or container of labels specifying all dependent
+                        tasks. Dependent tasks must already exist in
+                        the workflow.
+        @type dependencies: A single string, or set, tuple or list of strings
+        """
+
+        self._requireInWorkflow()
+
+        # sanity check label:
+        WorkflowRunner._checkTaskLabel(label)
+
+        import inspect
+
+        # copy and 'hijack' the workflow:
+        workflowCopy = copy.deepcopy(workflowRunnerInstance)
+
+        # hijack! -- take all public methods at the WorkflowRunner level
+        # (except workflow()), and insert the self copy:
+        publicExclude = ["workflow", "addTask", "addWorkflowTask", "waitForTasks"]
+        for (n, _v) in inspect.getmembers(WorkflowRunner, predicate=inspect.ismethod) :
+            if n[0] == "_" : continue  # skip private/special methods
+            if n in publicExclude : continue
+            setattr(workflowCopy, n, getattr(self, n))
+
+        privateInclude = ["_cdata", "_addTaskCore", "_waitForTasksCore", "_isTaskCompleteCore","_setRunning","_getRunning"]
+        for n in privateInclude :
+            setattr(workflowCopy, n, getattr(self, n))
+
+        # final step: disable the run() function to be extra safe...
+        workflowCopy.run = None
+
+        # set the task namespace:
+        workflowCopy._appendNamespace(self._getNamespaceList())
+        workflowCopy._appendNamespace(label)
+
+        # add workflow task to the task-dag, and launch a new taskrunner thread
+        # if one isn't already running:
+        payload = WorkflowPayload(workflowCopy)
+        self._addTaskCore(self._getNamespace(), label, payload, dependencies)
+        return label
+
+
+    def waitForTasks(self, labels=None) :
+        """
+        Wait for a list of tasks to complete.
+
+        @return: In case of an error in a task being waited for, or in
+                 one of these task's dependencies, the function returns 1.
+                 Else return 0.
+
+        @param labels: Container of task labels to wait for. If an empty container is
+                  given or no list is provided then wait for all
+                  outstanding tasks to complete.
+        @type labels: A single string, or set, tuple or list of strings
+        """
+
+        self._requireInWorkflow()
+
+        return self._waitForTasksCore(self._getNamespace(), labels)
+
+
+    def isTaskComplete(self, taskLabel) :
+        """
+        Query if a specific task is in the workflow and completed without error.
+
+        This can assist workflows with providing
+        stable interrupt/resume behavior.
+
+        @param taskLabel: A task string
+
+        @return: Completion status of task
+        """
+
+        return self._isTaskCompleteCore(self._getNamespace(), taskLabel)
+
+
+    def getRunMode(self) :
+        """
+        Get the current run mode
+
+        This can be used to access the current run mode from
+        within the workflow function. Although the runmode should
+        be transparent to client code, this is occasionally needed
+        to hack workarounds.
+
+        @return: Current run mode
+        """
+
+        self._requireInWorkflow()
+
+        return self._cdata().param.mode
+
+
+    def getNCores(self) :
+        """
+        Get the current run core limit
+
+        This function can be used to access the current run's core
+        limit from within the workflow function. This can be useful
+        to eg. limit the number of cores requested by a single task.
+
+        @return: Total cores available to this workflow run
+        @rtype: Integer value or 'unlimited'
+        """
+
+        self._requireInWorkflow()
+
+        return self._cdata().param.nCores
+
+
+    def limitNCores(self, nCores) :
+        """
+        Takes an task nCores argument and reduces it to
+        the maximum value allowed for the current run.
+
+        @param nCores: Proposed core requirement
+
+        @return: Min(nCores,Total cores available to this workflow run)
+        """
+
+        self._requireInWorkflow()
+
+        nCores = int(nCores)
+        runNCores = self._cdata().param.nCores
+        if runNCores == "unlimited" : return nCores
+        return min(nCores, runNCores)
+
+
+    def getMemMb(self) :
+        """
+        Get the current run's total memory limit (in megabytes)
+
+        @return: Memory limit in megabytes
+        @rtype: Integer value or 'unlimited'
+        """
+
+        self._requireInWorkflow()
+
+        return self._cdata().param.memMb
+
+
+    def limitMemMb(self, memMb) :
+        """
+        Takes a task memMb argument and reduces it to
+        the maximum value allowed for the current run.
+
+        @param memMb: Proposed task memory requirement in megabytes
+
+        @return: Min(memMb,Total memory available to this workflow run)
+        """
+
+        self._requireInWorkflow()
+
+        memMb = int(memMb)
+        runMemMb = self._cdata().param.memMb
+        if runMemMb == "unlimited" : return memMb
+        return min(memMb, runMemMb)
+
+
+    def isDryRun(self) :
+        """
+        Get isDryRun flag value.
+
+        When the dryrun flag is set, no commands are actually run. Querying
+        this flag allows dynamic workflows to correct for dry run behaviors,
+        such as tasks which do no produce expected files.
+
+        @return: DryRun status flag
+        """
+
+        self._requireInWorkflow()
+
+        return self._cdata().param.isDryRun
+
+
+    @staticmethod
+    def runModeDefaultCores(mode) :
+        """
+        Get the default core limit for run mode (local,sge,..)
+
+        @param mode: run mode, as specified in L{the run() method<WorkflowRunner.run>}
+
+        @return: Default maximum number of cores for mode
+
+        @rtype: Either 'unlimited', or a string
+                 representation of the integer limit
+        """
+
+        return str(RunMode.data[mode].defaultCores)
+
+
+    def flowLog(self, msg, logState = LogState.INFO) :
+        """
+        Send a message to the WorkflowRunner's log.
+
+        @param msg: Log message
+        @type msg: A string or an array of strings. String arrays will be separated by newlines in the log.
+        @param logState: Message severity, defaults to INFO.
+        @type logState: A value in pyflow.LogState.{INFO,WARNING,ERROR}
+        """
+
+        self._requireInWorkflow()
+
+        linePrefixOut = "[%s]" % (self._cdata().param.workflowClassName)
+        self._cdata().flowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+
+    # Protected methods for client derived-class override:
+
+    def workflow(self) :
+        """
+        Workflow definition defined in child class
+
+        This method should be overridden in the class derived from
+        L{WorkflowRunner} to specify the actual workflow logic. Client
+        code should not call this method directly.
+        """
+        pass
+
+
+    # private methods:
+
+    # special workflowRunner Exception used to terminate workflow() function
+    # if a ctrl-c is issued
+    class _AbortWorkflowException(Exception) :
+        pass
+
+
+    def _flowLog(self, msg, logState) :
+        linePrefixOut = "[WorkflowRunner]"
+        self._cdata().flowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+    def _infoLog(self, msg) :
+        self._flowLog(msg,logState=LogState.INFO)
+
+    def _warningLog(self, msg) :
+        self._flowLog(msg,logState=LogState.WARNING)
+
+    def _errorLog(self, msg) :
+        self._flowLog(msg,logState=LogState.ERROR)
+
+    def _whoami(self) :
+        # returns name of *derived* class
+        return self.__class__.__name__
+
+
+    def _getNamespaceList(self) :
+        try:
+            return self._namespaceList
+        except AttributeError:
+            self._namespaceList = []
+            return self._namespaceList
+
+    def _getNamespace(self) :
+        return namespaceSep.join(self._getNamespaceList())
+
+    def _appendNamespace(self, names) :
+        names = lister(names)
+        for name in names :
+            # check against runaway recursion:
+            if len(self._getNamespaceList()) >= WorkflowRunner._maxWorkflowRecursion :
+                raise Exception("Recursive workflow invocation depth exceeds maximum allowed depth of %i" % (WorkflowRunner._maxWorkflowRecursion))
+            WorkflowRunner._checkTaskLabel(name)
+            self._getNamespaceList().append(name)
+
+
+    # flag used to request the termination of all task submission:
+    #
+    _allStop = threading.Event()
+
+    @staticmethod
+    def _stopAllWorkflows() :
+        # request all workflows stop task submission:
+        WorkflowRunner._allStop.set()
+
+    @staticmethod
+    def _isWorkflowStopped() :
+        # check whether a global signal has been give to stop all workflow submission
+        # this should only be true when a ctrl-C or similar event has occurred.
+        return WorkflowRunner._allStop.isSet()
+
+    def _addTaskCore(self, namespace, label, payload, dependencies) :
+        # private core taskAdd routine for hijacking
+        # fromWorkflow is the workflow instance used to launch the task
+        #
+
+        # add workflow task to the task-dag, and launch a new taskrunner thread
+        # if one isn't already running:
+        if self._isWorkflowStopped() :
+            raise WorkflowRunner._AbortWorkflowException
+
+        self._infoLog("Adding %s '%s' to %s" % (payload.desc(), namespaceJoin(namespace, label), namespaceLabel(namespace)))
+
+        # add task to the task-dag, and launch a new taskrunner thread
+        # if one isn't already running:
+        dependencies = setzer(dependencies)
+        self._tdag.addTask(namespace, label, payload, dependencies)
+        self._startTaskManager()
+
+
+    def _getWaitStatus(self, namespace, labels, status) :
+        # update and return two values:
+        # (1) isAllTaskDone -- are all tasks done (ie. error or complete state
+        # (2) retval -- this is set to one if any tasks have errors
+        #
+
+        def updateStatusFromTask(task, status) :
+            if not task.isDone() :
+                status.isAllTaskDone = False
+            elif not task.isComplete() :
+                status.retval = 1
+            if status.retval == 0 and (not self._cdata().isTaskSubmissionActive()) :
+                status.retval = 1
+            if status.retval == 0 and task.isDead() :
+                status.retval = 1
+
+
+        if len(labels) == 0 :
+            if namespace == "" :
+                if self._tdag.isRunExhausted() or (not self._tman.isAlive()) :
+                    if not self._tdag.isRunComplete() :
+                        status.retval = 1
+                else:
+                    status.isAllTaskDone = False
+            else :
+                for task in self._tdag.getAllNodes(namespace) :
+                    updateStatusFromTask(task, status)
+        else :
+            for l in labels :
+                if not self._tdag.isTaskPresent(namespace, l) :
+                    raise Exception("Task: '%s' is not in taskDAG" % (namespaceJoin(namespace, l)))
+                task = self._tdag.getTask(namespace, l)
+                updateStatusFromTask(task, status)
+
+
+    def _waitForTasksCore(self, namespace, labels=None, isVerbose=True) :
+        labels = setzer(labels)
+        if isVerbose :
+            msg = "Pausing %s until completion of" % (namespaceLabel(namespace))
+            if len(labels) == 0 :
+                self._infoLog(msg + " its current tasks")
+            else:
+                self._infoLog(msg + " task(s): %s" % (",".join([namespaceJoin(namespace, l) for l in labels])))
+
+        class WaitStatus:
+            def __init__(self) :
+                self.isAllTaskDone = True
+                self.retval = 0
+
+        ewaiter = ExpWaiter(1, 1.7, 15)
+        while True :
+            if self._isWorkflowStopped() :
+                raise WorkflowRunner._AbortWorkflowException
+            status = WaitStatus()
+            self._getWaitStatus(namespace, labels, status)
+            if status.isAllTaskDone or (status.retval != 0) : break
+            ewaiter.wait()
+
+        if isVerbose :
+            msg = "Resuming %s" % (namespaceLabel(namespace))
+            self._infoLog(msg)
+        return status.retval
+
+
+    def _isTaskCompleteCore(self, namespace, taskLabel) :
+
+        if not self._tdag.isTaskPresent(namespace, taskLabel) :
+            return False
+        task = self._tdag.getTask(namespace, taskLabel)
+        return task.isComplete()
+
+
+    @staticmethod
+    def _checkTaskLabel(label) :
+        # sanity check label:
+        if not isinstance(label, basestring) :
+            raise Exception ("Task label is not a string")
+        if label == "" :
+            raise Exception ("Task label is empty")
+        if not re.match("^[A-Za-z0-9_-]+$", label) :
+            raise Exception ("Task label is invalid due to disallowed characters. Label: '%s'" % (label))
+
+
+    def _startTaskManager(self) :
+        # start a new task manager if one isn't already running:
+        #
+        if (self._tman is not None) and (self._tman.isAlive()) : return
+        if not self._cdata().isTaskManagerException :
+            self._tman = TaskManager(self._cdata(), self._tdag)
+            self._tman.start()
+
+
+    def _notify(self, msg, logState) :
+        # msg is printed to log AND sent to any email or other requested
+        # notification systems:
+        self._flowLog(msg,logState)
+        self._cdata().emailNotification(msg, self._flowLog)
+
+
+    def _killWorkflow(self, errorMsg) :
+        self._notify(errorMsg,logState=LogState.ERROR)
+        self._shutdownAll(timeoutSec=10)
+        sys.exit(1)
+
+
+    def _shutdownAll(self, timeoutSec) :
+        # Try to shut down the task manager, all command-tasks,
+        # and all sub-workflow tasks.
+        #
+        if (self._tman is None) or (not self._tman.isAlive()) : return
+        StoppableThread.stopAll()
+        self._stopAllWorkflows()
+        self._tman.stop()
+        for _ in range(timeoutSec) :
+            time.sleep(1)
+            if not self._tman.isAlive() :
+                self._infoLog("Task shutdown complete")
+                return
+        self._infoLog("Task shutdown timed out")
+
+
+    def _cdata(self) :
+        # We're doing this convoluted setup only to avoid having a
+        # ctor for ease of use by the client. See what pyFlow goes
+        # through for you client code??
+        #
+        try:
+            return self._constantData
+        except AttributeError:
+            self._constantData = WorkflowRunnerThreadSharedData()
+            return self._constantData
+
+
+    # TODO: Better definition of the status thread shutdown at the end of a pyflow run to
+    # prevent race conditions -- ie. what happens if the status update is running while
+    # pyflow is shutting down? Every method called by the status updater should be safety
+    # checked wrt this issue.
+    #
+    def _runUpdate(self, runStatus) :
+        while True :
+            time.sleep(self.updateInterval * 60)
+
+            status = self._tdag.getTaskStatus()
+            isSpecComplete = (runStatus.isSpecificationComplete.isSet() and status.isAllSpecComplete)
+            report = []
+            report.append("===== " + self._whoami() + " StatusUpdate =====")
+            report.append("Workflow specification is complete?: %s" % (str(isSpecComplete)))
+            report.append("Task status (waiting/queued/running/complete/error): %i/%i/%i/%i/%i"
+                          % (status.waiting, status.queued, status.running, status.complete, status.error))
+            report.append("Longest ongoing queued task time (hrs): %.4f" % (status.longestQueueSec / 3600.))
+            report.append("Longest ongoing queued task name: '%s'" % (status.longestQueueName))
+            report.append("Longest ongoing running task time (hrs): %.4f" % (status.longestRunSec / 3600.))
+            report.append("Longest ongoing running task name: '%s'" % (status.longestRunName))
+
+            report = [ "[StatusUpdate] " + line for line in report ]
+            self._infoLog(report)
+
+            # Update interval is also an appropriate interval to dump a stack-trace of all active
+            # threads. This is a useful post-mortem in the event of a large class of hang/deadlock
+            # errors:
+            #
+            stackDumpFp = open(self._cdata().stackDumpLogFile, "a")
+
+            # create one fully decorated line in the stack dump file as a prefix to the report:
+            linePrefixOut = "[%s] [StackDump]" % (self._cdata().getRunid())
+            ofpList = [stackDumpFp]
+            log(ofpList, "Initiating stack dump for all threads", linePrefixOut)
+
+            stackDump(stackDumpFp)
+            hardFlush(stackDumpFp)
+            stackDumpFp.close()
+
+
+
+    def _runWorkflow(self, param) :
+        #
+        # Primary workflow logic when nothing goes wrong:
+        #
+        self._setupWorkflow(param)
+        self._initMessage()
+
+        runStatus = RunningTaskStatus(self._tdag.isFinishedEvent)
+
+        # start status update reporter:
+        #
+        # TODO: stop this thread at end of run
+        #
+        if(self.updateInterval > 0) :
+            hb = threading.Thread(target=WorkflowRunner._runUpdate, args=(self, runStatus))
+            hb.setDaemon(True)
+            hb.setName("StatusUpdate-Thread")
+            hb.start()
+
+        # run workflow() function on a separate thread, using exactly
+        # the same method we use for sub-workflows:
+        #
+        # TODO: move the master workflow further into the code path used by sub-workflows,
+        # so that we aren't replicating polling and error handling code in this function:
+        #
+        trun = WorkflowTaskRunner(runStatus, "masterWorkflow", self, self._cdata().flowLog, None)
+        trun.start()
+        # can't join() because that blocks SIGINT
+        ewaiter = ExpWaiter(1, 1.7, 15,runStatus.isComplete)
+        while True :
+            if not trun.isAlive() : break
+            ewaiter.wait()
+
+        if not runStatus.isComplete.isSet() :
+            # if not complete then we don't know what happened, very bad!:
+            runStatus.errorCode = 1
+            runStatus.errorMessage = "Thread: '%s', has stopped without a traceable cause" % (trun.getName())
+
+        return self._evalWorkflow(runStatus)
+
+
+    def _setupWorkflow(self, param) :
+        cdata = self._cdata()
+
+        # setup instance user parameters:
+        cdata.setupNewRun(param)
+
+        # setup other instance data:
+        self._tdag = TaskDAG(cdata.param.isContinue, cdata.param.isForceContinue, cdata.param.isDryRun,
+                             cdata.taskInfoFile, cdata.taskStateFile, cdata.param.workflowClassName,
+                             cdata.param.startFromTasks, cdata.param.ignoreTasksAfter, cdata.param.resetTasks,
+                             self._flowLog)
+        self._tman = None
+
+        def backupFile(inputFile) :
+            """
+            backup old state files if they exist
+            """
+            if not os.path.isfile(inputFile) : return
+            fileDir = os.path.dirname(inputFile)
+            fileName = os.path.basename(inputFile)
+            backupDir = os.path.join(fileDir, "backup")
+            ensureDir(backupDir)
+            backupFileName = fileName + ".backup_before_starting_run_%s.txt" % (cdata.getRunid())
+            backupFile = os.path.join(backupDir, backupFileName)
+            shutil.copyfile(inputFile, backupFile)
+
+        backupFile(cdata.taskStateFile)
+        backupFile(cdata.taskInfoFile)
+
+        if cdata.param.isContinue :
+            self._setupContinuedWorkflow()
+
+
+
+    def _createContinuedStateFile(self) :
+        #
+        # create continued version of task state file
+        #
+
+        cdata = self._cdata()
+        if not os.path.isfile(cdata.taskStateFile) : return set()
+
+        tmpFile = cdata.taskStateFile + ".update.incomplete"
+        tmpfp = open(tmpFile, "w")
+        tmpfp.write(taskStateHeader())
+        complete = set()
+        for words in taskStateParser(cdata.taskStateFile) :
+            (runState, errorCode) = words[2:4]
+            if (runState != "complete") or (int(errorCode) != 0) : continue
+            tmpfp.write("\t".join(words) + "\n")
+            (label, namespace) = words[0:2]
+            complete.add(namespaceJoin(namespace, label))
+
+        tmpfp.close()
+        forceRename(tmpFile, cdata.taskStateFile)
+        return complete
+
+
+    def _createContinuedInfoFile(self, complete) :
+        #
+        # create continued version of task info file
+        #
+
+        cdata = self._cdata()
+        if not os.path.isfile(cdata.taskInfoFile) : return
+
+        tmpFile = cdata.taskInfoFile + ".update.incomplete"
+        tmpfp = open(tmpFile, "w")
+        tmpfp.write(taskInfoHeader())
+        for words in taskInfoParser(cdata.taskInfoFile) :
+            (label, namespace, ptype, nCores, memMb, priority, isForceLocal, depStr, cwdStr, command) = words
+            fullLabel = namespaceJoin(namespace, label)
+            if fullLabel not in complete : continue
+            tmpfp.write("\t".join(words) + "\n")
+            if   ptype == "command" :
+                if command == "" : command = None
+                payload = CmdPayload(fullLabel, Command(command, cwdStr), int(nCores), int(memMb), int(priority), argToBool(isForceLocal))
+            elif ptype == "workflow" :
+                payload = WorkflowPayload(None)
+            else : assert 0
+
+            self._tdag.addTask(namespace, label, payload, getTaskInfoDepSet(depStr), isContinued=True)
+
+        tmpfp.close()
+        forceRename(tmpFile, cdata.taskInfoFile)
+
+
+
+    def _setupContinuedWorkflow(self) :
+        # reduce both state files to completed states only.
+        complete = self._createContinuedStateFile()
+        self._createContinuedInfoFile(complete)
+
+
+
+    def _initMessage(self) :
+        param = self._cdata().param  # shortcut
+        msg = ["Initiating pyFlow run"]
+        msg.append("pyFlowClientWorkflowClass: %s" % (param.workflowClassName))
+        msg.append("pyFlowVersion: %s" % (__version__))
+        msg.append("pythonVersion: %s" % (pythonVersion))
+        msg.append("WorkingDir: '%s'" % (self._cdata().cwd))
+        msg.append("ProcessCmdLine: '%s'" % (cmdline()))
+
+        parammsg = ["mode: %s" % (param.mode),
+                  "nCores: %s" % (str(param.nCores)),
+                  "memMb: %s" % (str(param.memMb)),
+                  "dataDir: %s" % (str(param.dataDir)),
+                  "isDryRun: %s" % (str(param.isDryRun)),
+                  "isContinue: %s" % (str(param.isContinue)),
+                  "isForceContinue: %s" % (str(param.isForceContinue)),
+                  "mailTo: '%s'" % (",".join(param.mailTo))]
+        for i in range(len(parammsg)):
+            parammsg[i] = "[RunParameters] " + parammsg[i]
+        msg += parammsg
+        self._infoLog(msg)
+
+
+
+    def _getTaskErrorsSummaryMsg(self, isForceTaskHarvest=False) :
+        # isForceHarvest means we try to force an update of the shared
+        # taskError information in case this thread is ahead of the
+        # task manager.
+        if isForceTaskHarvest :
+            if (self._tman is not None) and (self._tman.isAlive()) :
+                self._tman.harvestTasks()
+
+        if not self._cdata().isTaskError() : return []
+        # this case has already been emailed in the TaskManager @ first error occurrence:
+        msg = ["Worklow terminated due to the following task errors:"]
+        for task in self._cdata().taskErrors :
+            msg.extend(task.getTaskErrorMsg())
+        return msg
+
+
+    def _evalWorkflow(self, masterRunStatus) :
+
+        isError = False
+        if self._cdata().isTaskError() :
+            msg = self._getTaskErrorsSummaryMsg()
+            self._errorLog(msg)
+            isError = True
+
+        if masterRunStatus.errorCode != 0 :
+            eMsg = lister(masterRunStatus.errorMessage)
+            if (len(eMsg) > 1) or (len(eMsg) == 1 and eMsg[0] != "") :
+                msg = ["Failed to complete master workflow, error code: %s" % (str(masterRunStatus.errorCode))]
+                msg.append("errorMessage:")
+                msg.extend(eMsg)
+                self._notify(msg,logState=LogState.ERROR)
+            isError = True
+
+        if self._cdata().isTaskManagerException :
+            # this case has already been emailed in the TaskManager:
+            self._errorLog("Workflow terminated due to unhandled exception in TaskManager")
+            isError = True
+
+        if (not isError) and (not self._tdag.isRunComplete()) :
+            msg = "Workflow terminated with unknown error condition"
+            self._notify(msg,logState=LogState.ERROR)
+            isError = True
+
+        if isError: return 1
+
+        elapsed = int(time.time() - self.runStartTimeStamp)
+        msg = []
+        if self._cdata().param.successMsg is not None :
+            msg.extend([self._cdata().param.successMsg,""])
+        msg.extend(["Workflow successfully completed all tasks",
+                    "Elapsed time for full workflow: %s sec" % (elapsed)])
+        self._notify(msg,logState=LogState.INFO)
+        return 0
+
+
+    def _requireInWorkflow(self) :
+        """
+        check that the calling method is being called as part of a pyflow workflow() method only
+        """
+        if not self._getRunning():
+            raise Exception("Method must be a (call stack) descendant of WorkflowRunner workflow() method (via run() method)")
+
+
+    def _initRunning(self):
+        try :
+            assert(self._isRunning >= 0)
+        except AttributeError :
+            self._isRunning = 0
+
+    @lockMethod
+    def _setRunning(self, isRunning) :
+        self._initRunning()
+        if isRunning :
+            self._isRunning += 1
+        else :
+            self._isRunning -= 1
+
+    @lockMethod
+    def _getRunning(self) :
+        self._initRunning()
+        return (self._isRunning > 0)
+
+
+
+if __name__ == "__main__" :
+    help(WorkflowRunner)
+
diff --git a/pyflow/src/pyflowConfig.py b/pyflow/src/pyflowConfig.py
new file mode 100644
index 0000000..2e5b16d
--- /dev/null
+++ b/pyflow/src/pyflowConfig.py
@@ -0,0 +1,213 @@
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+pyflowConfig
+
+This file defines a class instance 'siteConfig' containing pyflow components
+which are the most likely to need site-specific configuration.
+"""
+
+import os
+
+
+# this is the only object pyflow needs to import, it
+# is defined at the end of this module:
+#
+siteConfig = None
+
+
+# depending on network setup getfqdn() can be slow, so cache calls to this function here:
+#
+def _getHostName() :
+    import socket
+    return socket.getfqdn()
+
+cachedHostName = None
+
+
+def getHostName() :
+    global cachedHostName
+    if cachedHostName is None :
+        cachedHostName = _getHostName()
+    return cachedHostName
+
+
+def getDomainName() :
+    hn = getHostName().split(".")
+    if len(hn) > 1 : hn = hn[1:]
+    return ".".join(hn)
+
+
+
+class DefaultSiteConfig(object) :
+    """
+    Default configuration settings are designed to work with as
+    many sites as technically feasible
+    """
+
+    # All email notifications come from the following e-mail address
+    #
+    mailFrom = "pyflow-bot@" + getDomainName()
+
+    # Default memory (in megabytes) requested by each command task:
+    #
+    defaultTaskMemMb = 2048
+
+    # In local run mode, this is the defalt memory per thread that we
+    # assume is available:
+    #
+    defaultHostMemMbPerCore = 2048
+
+    # maximum number of jobs which can be submitted to sge at once:
+    #
+    # allowed values are "unlimited" or None for unlimited jobs, or
+    # a positive integer describing the max number of jobs
+    #
+    maxSGEJobs = 128
+
+    # both getHostName and getDomainName are used in the
+    # siteConfig factory, so these are not designed to be
+    # overridden at present:
+    getHostName = staticmethod(getHostName)
+    getDomainName = staticmethod(getDomainName)
+
+    @classmethod
+    def qsubResourceArg(cls, nCores, memMb) :
+        """
+        When a task is launched using qsub in sge mode, it will call this
+        function to specify the requested number of threads and megabytes
+        of memory. The returned argument list will be appended to the qsub
+        arguments.
+
+        nCores -- number of threads requested
+        memMb -- memory requested (in megabytes)
+        """
+        nCores = int(nCores)
+        memMb = int(memMb)
+        return cls._qsubResourceArgConfig(nCores, memMb)
+
+    @classmethod
+    def _qsubResourceArgConfig(cls, nCores, memMb) :
+        """
+        The default function is designed for maximum
+        portability  -- it just provides more memory
+        via more threads.
+        """
+
+        # this is the memory we assume is available per
+        # thread on the cluster:
+        #
+        class Constants(object) : megsPerCore = 4096
+
+        memCores = 1 + ((memMb - 1) / Constants.megsPerCore)
+
+        qsubCores = max(nCores, memCores)
+
+        if qsubCores <= 1 : return []
+        return ["-pe", "threaded", str(qsubCores)]
+
+
+    @classmethod
+    def getSgeMakePrefix(cls, nCores, memMb, schedulerArgList) :
+        """
+        This prefix will be added to ' -C directory', and run from
+        a local process to handle sge make jobs.
+
+        Note that memMb hasn't been well defined for make jobs yet,
+        is it the per task memory limit? The first application to
+        accually make use of this will have to setup the convention,
+        it is ignored right now...
+        """
+        nCores = int(nCores)
+        memMb = int(memMb)
+
+        retval = ["qmake",
+                "-V",
+                "-now", "n",
+                "-cwd",
+                "-N", "pyflowMakeTask"]
+
+        # user arguments to run() (usually q specification:
+        retval.extend(schedulerArgList)
+
+        #### use qmake parallel environment:
+        # retval.extend(["-pe","make",str(nCores),"--"])
+
+        #### ...OR use 'dynamic' sge make environment:
+        retval.extend(["--", "-j", str(nCores)])
+
+        return retval
+
+
+
+def getEnvVar(key) :
+    if key in os.environ : return os.environ[key]
+    return None
+
+
+
+class hvmemSGEConfig(DefaultSiteConfig) :
+    """
+    This config assumes 'h_vmem' is defined on the SGE instance
+
+    """
+
+    @classmethod
+    def _qsubResourceArgConfig(cls, nCores, memMb) :
+
+        retval = []
+
+        # specify memory requirements
+        memGb = 1 + ((memMb - 1) / 1024)
+        reqArg = "h_vmem=%iG" % (memGb)
+        retval.extend(["-l", reqArg])
+
+        if nCores > 1 :
+            retval.extend(["-pe", "threaded", str(nCores)])
+
+        return retval
+
+
+
+#
+# final step is the selection of this run's siteConfig object:
+#
+
+def siteConfigFactory() :
+    # TODO: add an import time test to determine if h_vmem is valid
+    return  hvmemSGEConfig
+
+
+siteConfig = siteConfigFactory()
+
+
diff --git a/pyflow/src/pyflowTaskWrapper.py b/pyflow/src/pyflowTaskWrapper.py
new file mode 100644
index 0000000..2387818
--- /dev/null
+++ b/pyflow/src/pyflowTaskWrapper.py
@@ -0,0 +1,338 @@
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+This script wraps workflow tasks for execution on local or remote
+hosts.  It is responsible for adding log decorations to task's stderr
+output (which is diverted to a file), and writing task state transition
+and error information to the wrapper's stderr, which becomes the
+task's 'signal' file from pyflow's perspective. The signal file is
+used to determine task exit status, total runtime, and queue->run
+state transition when pyflow is run in SGE mode.
+"""
+
+import datetime
+import os
+import subprocess
+import sys
+import time
+
+
+scriptName = "pyflowTaskWrapper.py"
+
+
+def getTracebackStr() :
+    import traceback
+    return traceback.format_exc()
+
+
+def getExceptionMsg() :
+    return ("[ERROR] Unhandled Exception in pyflowTaskWrapper\n" + getTracebackStr())
+
+
+def timeStampToTimeStr(ts) :
+    """
+    converts timeStamp (time.time()) output to timeStr
+    """
+    return datetime.datetime.utcfromtimestamp(ts).isoformat()
+
+def timeStrNow():
+    return timeStampToTimeStr(time.time())
+
+def hardFlush(ofp):
+    ofp.flush()
+    if ofp.isatty() : return
+    os.fsync(ofp.fileno())
+
+def isWindows() :
+    import platform
+    return (platform.system().find("Windows") > -1)
+
+class SimpleFifo(object) :
+    """
+    Store up to last N objects, not thread safe.
+    Note extraction does not follow any traditional fifo interface
+    """
+
+    def __init__(self, size) :
+        self._size = int(size)
+        assert (self._size > 0)
+        self._data = [None] * self._size
+        self._head = 0
+        self._occup = 0
+        self._counter = 0
+
+
+    def count(self) :
+        """
+        Get the total number of adds for this fifo
+        """
+        return self._counter
+
+
+    def add(self, obj) :
+        """
+        add obj to fifo, and return obj for convenience
+        """
+        self._data[self._head] = obj
+        self._counter += 1
+        if self._occup < self._size : self._occup += 1
+        self._head += 1
+        if self._head == self._size : self._head = 0
+        assert (self._head < self._size)
+        return obj
+
+
+    def get(self) :
+        """
+        return an array of the fifo contents
+        """
+        retval = []
+        current = (self._head + self._size) - self._occup
+        for _ in range(self._occup) :
+            while current >= self._size : current -= self._size
+            retval.append(self._data[current])
+            current += 1
+        return retval
+
+
+
+class StringBling(object) :
+    def __init__(self, runid, taskStr) :
+        def getHostName() :
+            import socket
+            # return socket.gethostbyaddr(socket.gethostname())[0]
+            return socket.getfqdn()
+
+        self.runid = runid
+        self.taskStr = taskStr
+        self.hostname = getHostName()
+
+    def _writeMsg(self, ofp, msg, taskStr, writeFilter=lambda x: x) :
+        """
+        log a possibly multi-line message with decoration:
+        """
+        prefix = "[%s] [%s] [%s] [%s] " % (timeStrNow(), self.hostname, self.runid, taskStr)
+        if msg[-1] == "\n" : msg = msg[:-1]
+        for line in msg.split("\n") :
+            ofp.write(writeFilter(prefix + line + "\n"))
+        hardFlush(ofp)
+
+
+    def transfer(self, inos, outos, writeFilter=lambda x: x):
+        """
+        This function is used to decorate the stderr stream from the launched task itself
+        """
+        #
+        # write line-read loop this way to workaround python bug:
+        # http://bugs.python.org/issue3907
+        #
+        while True:
+            line = inos.readline()
+            if not line: break
+            self._writeMsg(outos, line, self.taskStr, writeFilter)
+
+    def wrapperLog(self, log_os, msg) :
+        """
+        Used by the wrapper to decorate each msg line with a prefix. The decoration
+        is similar to that for the task's own stderr, but we prefix the task with
+        'pyflowTaskWrapper' to differentiate the source.
+        """
+        self._writeMsg(log_os, msg, "pyflowTaskWrapper:" + self.taskStr)
+
+
+
+def getParams(paramsFile) :
+    import pickle
+
+    paramhash = pickle.load(open(paramsFile))
+    class Params : pass
+    params = Params()
+    for (k, v) in paramhash.items() : setattr(params, k, v)
+    return params
+
+
+
+def main():
+
+    usage = """
+
+Usage: %s runid taskid parameter_pickle_file
+
+The parameter pickle file contains all of the task parameters required by the wrapper
+
+""" % (scriptName)
+
+    def badUsage(msg=None) :
+        sys.stderr.write(usage)
+        if msg is not None :
+            sys.stderr.write(msg)
+            exitval = 1
+        else:
+            exitval = 2
+        hardFlush(sys.stderr)
+        sys.exit(exitval)
+
+    def checkExpectArgCount(expectArgCount) :
+        if len(sys.argv) == expectArgCount : return
+        badUsage("Incorrect argument count, expected: %i observed: %i\n" % (expectArgCount, len(sys.argv)))
+
+
+    runid = "unknown"
+    taskStr = "unknown"
+
+    if len(sys.argv) > 2 :
+        runid = sys.argv[1]
+        taskStr = sys.argv[2]
+
+    bling = StringBling(runid, taskStr)
+
+    # send a signal for wrapper start as early as possible to help ensure hostname is logged
+    pffp = sys.stderr
+    bling.wrapperLog(pffp, "[wrapperSignal] wrapperStart")
+
+    checkExpectArgCount(4)
+
+    picklefile = sys.argv[3]
+
+    # try multiple times to read the argument file in case of NFS delay:
+    #
+    retryDelaySec = 30
+    maxTrials = 3
+    for _ in range(maxTrials) :
+        if os.path.exists(picklefile) : break
+        time.sleep(retryDelaySec)
+
+    if not os.path.exists(picklefile) :
+        badUsage("First argument does not exist: " + picklefile)
+
+    if not os.path.isfile(picklefile) :
+        badUsage("First argument is not a file: " + picklefile)
+
+    # add another multi-trial loop on the pickle load operation --
+    # on some filesystems the file can appear to exist but not
+    # be fully instantiated yet:
+    #
+    for t in range(maxTrials) :
+        try :
+            params = getParams(picklefile)
+        except :
+            if (t+1) == maxTrials :
+                raise
+            time.sleep(retryDelaySec)
+            continue
+        break
+
+    if params.cmd is None :
+        badUsage("Invalid TaskWrapper input: task command set to NONE")
+
+    if params.cwd == "" or params.cwd == "None" :
+        params.cwd = None
+
+    toutFp = open(params.outFile, "a")
+    terrFp = open(params.errFile, "a")
+
+    # always keep last N lines of task stderr:
+    fifo = SimpleFifo(20)
+
+    isWin=isWindows()
+
+    # Present shell as arg list with Popen(shell=False), so that
+    # we minimize quoting/escaping issues for 'cmd' itself:
+    #
+    fullcmd = []
+    if (not isWin) and params.isShellCmd :
+        # TODO shell selection should be configurable somewhere:
+        shell = ["/bin/bash", "--noprofile", "-o", "pipefail"]
+        fullcmd = shell + ["-c", params.cmd]
+    else :
+        fullcmd = params.cmd
+
+    retval = 1
+
+    isShell=isWin
+
+    try:
+        startTime = time.time()
+        bling.wrapperLog(pffp, "[wrapperSignal] taskStart")
+        # turn off buffering so that stderr is updated correctly and its timestamps
+        # are more accurate:
+        # TODO: is there a way to do this for stderr only?
+        proc = subprocess.Popen(fullcmd, stdout=toutFp, stderr=subprocess.PIPE, shell=isShell, bufsize=1, cwd=params.cwd, env=params.env)
+        bling.transfer(proc.stderr, terrFp, fifo.add)
+        retval = proc.wait()
+
+        elapsed = (time.time() - startTime)
+
+        # communication back to pyflow:
+        bling.wrapperLog(pffp, "[wrapperSignal] taskExitCode %i" % (retval))
+
+        # communication to human-readable log:
+        msg = "Task: '%s' exit code: '%i'" % (taskStr, retval)
+        bling.wrapperLog(terrFp, msg)
+
+        if retval == 0 :
+            # communication back to pyflow:
+            bling.wrapperLog(pffp, "[wrapperSignal] taskElapsedSec %i" % (int(elapsed)))
+
+            # communication to human-readable log:
+            msg = "Task: '%s' complete." % (taskStr)
+            msg += " elapsedSec: %i" % (int(elapsed))
+            msg += " elapsedCoreSec: %i" % (int(elapsed * params.nCores))
+            msg += "\n"
+            bling.wrapperLog(terrFp, msg)
+        else :
+            # communication back to pyflow:
+            tailMsg = fifo.get()
+            bling.wrapperLog(pffp, "[wrapperSignal] taskStderrTail %i" % (1 + len(tailMsg)))
+            pffp.write("Last %i stderr lines from task (of %i total lines):\n" % (len(tailMsg), fifo.count()))
+            for line in tailMsg :
+                pffp.write(line)
+            hardFlush(pffp)
+
+
+    except KeyboardInterrupt:
+        msg = "[ERROR] Keyboard Interupt, shutting down task."
+        bling.wrapperLog(terrFp, msg)
+        sys.exit(1)
+    except:
+        msg = getExceptionMsg()
+        bling.wrapperLog(terrFp, msg)
+        raise
+
+    sys.exit(retval)
+
+
+
+if __name__ == "__main__" :
+    main()
+
diff --git a/scratch/README.txt b/scratch/README.txt
new file mode 100644
index 0000000..d6a10b6
--- /dev/null
+++ b/scratch/README.txt
@@ -0,0 +1,18 @@
+
+
+notes/
+
+random design/development notes
+
+
+pybox/
+
+test code snippets
+
+
+
+test/
+
+unit and integration test scripts
+
+
diff --git a/scratch/delete_trailing_wspace.bash b/scratch/delete_trailing_wspace.bash
new file mode 100755
index 0000000..47cf8da
--- /dev/null
+++ b/scratch/delete_trailing_wspace.bash
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+#
+# clean trailing w/s from pyflow source
+#
+# pretty hacky script... probably best to avoid running this if you have a lot of uncommitted changes
+#
+
+set -o nounset
+
+scriptdir=$(cd $(dirname $0); pwd -P)
+
+
+get_source() {
+    find $scriptdir/../pyflow -type f \
+        -name "*.bash" -or \
+        -name "*.py"
+}
+
+tempfile=$(mktemp)
+
+for f in $(get_source); do
+    echo "checking: $f"
+    cat $f |\
+    sed 's/[ 	]*$//' >|\
+    $tempfile    
+
+    if ! diff $tempfile $f > /dev/null; then 
+        mv -f $tempfile $f
+    else 
+        rm -f $tempfile 
+    fi
+done
diff --git a/scratch/make_release_tarball.bash b/scratch/make_release_tarball.bash
new file mode 100755
index 0000000..5e36955
--- /dev/null
+++ b/scratch/make_release_tarball.bash
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+#
+# this script makes the pyflow release tarball assuming it's being called in the git repo
+# already checked out to the targeted version
+#
+# the tarball is written to the callers cwd
+#
+
+set -o nounset
+set -o xtrace
+set -o errexit
+
+pname_root=""
+if [ $# -gt 1 ]; then
+    echo "usage: $0 [rootname]"
+    exit 2
+elif [ $# == 1 ]; then
+    pname_root=$1
+fi
+
+
+get_abs_path() {
+    (cd $1; pwd -P)
+}
+
+
+script_dir=$(get_abs_path $(dirname $0))
+outdir=$(pwd)
+echo $outdir
+
+cd $script_dir
+echo $script_dir
+gitversion=$(git describe | sed s/^v//)
+
+if [ "$pname_root" == "" ]; then
+    pname_root=pyflow-$gitversion
+fi
+
+pname=$outdir/$pname_root
+
+cd ..
+
+# use archive instead of copy so that we clean up any tmp files in the working directory:
+git archive --prefix=$pname_root/ HEAD:pyflow/ | tar -x -C $outdir
+
+# make version number substitutions:
+cat pyflow/src/pyflow.py |\
+sed "s/pyflowAutoVersion = None/pyflowAutoVersion = \"$gitversion\"/" >|\
+$pname/src/pyflow.py
+
+cat pyflow/README.md |\
+sed "s/\${VERSION}/$gitversion/" >|\
+$pname/README.md
+
+cat pyflow/setup.py |\
+sed "s/\${VERSION}/$gitversion/" >|\
+$pname/setup.py
+
+chmod +x $pname/src/pyflow.py
+
+cd $outdir
+tar -cz $pname_root -f $pname.tar.gz
+rm -rf $pname 
+
diff --git a/scratch/notes/design.notes b/scratch/notes/design.notes
new file mode 100644
index 0000000..c7d38a1
--- /dev/null
+++ b/scratch/notes/design.notes
@@ -0,0 +1,74 @@
+1.Get example task file and launch command.
+
+
+launch cmd:
+"""
+/illumina/software/casava/CASAVA-1.8.2/bin/taskServer.pl --tasksFile=/illumina/builds/lox/Saturn/Saturn1_BB0065ACXX_builds/temp_build/tasks.21_09_49_26_01_12.txt --host=ukch-dev-lndt01 --mode=sge 
+
+/illumina/software/casava/CASAVA-1.8.2/bin/taskServer.pl --tasksFile=/illumina/builds/lox/Saturn/Saturn1_BB0065ACXX_builds/temp_build/tasks.21_09_49_26_01_12.txt --host=localhost --jobsLimit=1 --mode=local
+"""
+
+
+new task specification file:
+xml
+contains tasks and edges
+no special checkpoints anymore, these are just tasks without commands
+a separate "status" file associates a state with each task
+
+
+OR: new task specification script:
+perl
+too much change at once
+
+dynamic_task_manager:
+w=WorkflowClass(config)
+w.run(filename)
+s.init(mode="local|sge",
+       ncores=X|inf,
+       workflow_file_prefix,
+       is_continue=0|1)
+s.add_task(label,description,command);
+s.add_dependency(label,label2,is_optional);
+s.close()
+
+
+dynamic task manager:
+workflow_dir is used to write the stdout and stderr log, as well as the status file
+prefix/runid.stderr.log
+prefix/runid.stdout.log
+prefix/runid.taskstatus.txt
+prefix/taskstatus.txt
+prefix/workflow_run_history.txt
+
+
+s.add_task(label,command,n_cores,[task_dep_list])
+s.add_task(label,command,n_cores,[task_dep_list])
+
+
+Error policy:
+Stop launching new jobs. Record total number of errors and write this on final log line.
+write_to_file: dir/tasks.error.txt
+
+Logs (all append only):
+# all messages from the workflow engine itself:
+dir/logs/workflow_log.txt 
+# all messages from task, including the task wrapper:
+dir/logs/tasks_stderr_log.txt
+dir/logs/tasks_stdout_log.txt 
+
+persistence data:
+# record of all data supplied in each add-task call:
+dir/tasks.info.txt (unique append-only)
+dir/task.history.txt
+
+convenience:
+dir/tasks.corehourtime.txt (append-only)
+dir/tasks.dot (refresh at complete workflow specification)
+
+
+if isContinue:
+1) read in state files and reconstruct data structures from these for complete tasks only, set a new isContinued bit, which persists until the new workflow confirms it with an addTask(). An isContinued task cannot be run, but this doesn't matter sense these are complete tasks only.
+Complete tasks must match their original descriptions, but all other tasks can change
+2) use these to verify and reassign runstate for completed tasks only
+
+
diff --git a/scratch/notes/todo b/scratch/notes/todo
new file mode 100644
index 0000000..59492b8
--- /dev/null
+++ b/scratch/notes/todo
@@ -0,0 +1,53 @@
+pyflow:
+switch to standard python logging module
+summarize total warnings and errors at end of run -- have a per run log folder to store this stuff
+abiltiy to change global defaults at the start of a script.
+add timeouts
+add a callable task, to allow delayed execution of pure python (how to prevent user from doing intense computation on this task?)
+allow file task dependencies
+specify maxcpuCores -- communicate back through an env variable
+better run_num bump
+allow tasks to specify a set of output files, to make nfs tracking better
+allow filenames as task ids
+
+bagPipes:
+check/warning for default sample sheet case
+Java mem grower? -- scan for "java.lang.OutOfMemoryError" and add more memory?
+
+****Add logging for events expected to be rare: any qstat timeout/retry
+****Good system to detect two pyflow jobs trying to use the same pyflow.data directory
+****Get git describe into pyflow automatically
+****version number management
+****Pyflow use two layers of tmp folders
+****Provide last ?10? lines of task stderr on notification when a task fails
+****turn on thread mem reduction
+****Don't write graph on update -- instead provide a script to generate this.
+****setup taskWrapper->parent communication via stderr
+****add something more 'psubmake-ish' for the makefile handling
+****mode change should not interfere with continuation (make/qmake)
+****send cmd as arg list in addition to string
+****resource management for memory (on local at least)
+****specify a mapping function between resources and SGE qsub phrases -- allow this to be overridden for different SGE configurations.
+****add subworkflows as tasks (this goes in a subworkflow namespace)
+****waitForTask() right now blocks the specification of all other non-dependent tasks. Think of a scheme to get around this -- threads in the worklow function?
+****color graph
+****write dot file (on command?, automatically at end of workflow specification?)
+****add run parameters to log
+****add public log function
+****add exception notification email
+****make error notifacation email more robust
+****email events: onComplete, onFirstTaskError
+****create dryrun mode, include runMode() in interface
+****start working on persistence
+****start developing total task time methods
+****add task retry
+****rename file and git to 'pyflow'
+****add forceLocal flag to enable qmake/make runs
+****start working on SGE
+****put task stdout/stderr somewhere
+****write most workflow exceptions to workflow_log
+****check task names for spaces
+****should jobs be killed on ctrl-c?
+****start developing error handle/report polices
+****check that subprocess is correct for intense tasks
+****fix task4 bug
diff --git a/scratch/pybox/email_test.py b/scratch/pybox/email_test.py
new file mode 100644
index 0000000..19efdcb
--- /dev/null
+++ b/scratch/pybox/email_test.py
@@ -0,0 +1,29 @@
+
+import smtplib
+from email.MIMEText import MIMEText
+
+def getHostName() :
+    import socket
+    #return socket.gethostbyaddr(socket.gethostname())[0]
+    return socket.getfqdn()
+
+def getDomainName() :
+    "maybe this isn't the technical term -- this is just the hostname - the host"
+    hn=getHostName().split(".")
+    if len(hn)>1 : hn=hn[1:]
+    return ".".join(hn)
+
+
+me = "pyflow-bot@"+getDomainName()
+to = "csaunders at illumina.com"
+
+msg=MIMEText("foo foo")
+msg["Subject"] = "pyFlow: job: XXX complete"
+msg["From"] = me 
+msg["To"] =  to
+
+msg.as_string()
+
+s=smtplib.SMTP('localhost')
+s.sendmail(me,to,msg.as_string())
+s.quit()
diff --git a/scratch/pybox/hijack.py b/scratch/pybox/hijack.py
new file mode 100644
index 0000000..bacc9fc
--- /dev/null
+++ b/scratch/pybox/hijack.py
@@ -0,0 +1,25 @@
+
+
+class A :
+  def __init__(self) :
+    self.x = 1
+
+  def inc(self) :
+    self.x += 1
+
+
+a = A()
+b = A()
+
+a.inc()
+b.inc()
+
+
+# hijack:
+b.inc = a.inc
+
+b.inc()
+
+print "a", a.x
+print "b", b.x
+
diff --git a/scratch/pybox/inspect.py b/scratch/pybox/inspect.py
new file mode 100644
index 0000000..2bc960c
--- /dev/null
+++ b/scratch/pybox/inspect.py
@@ -0,0 +1,7 @@
+
+def f(x) :
+  return x + 2
+
+import inspect
+print inspect.getsource(f)
+
diff --git a/scratch/pybox/memTest.py b/scratch/pybox/memTest.py
new file mode 100755
index 0000000..9836374
--- /dev/null
+++ b/scratch/pybox/memTest.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+#
+# This demo shows possibly the simplist possible pyflow we can create -- 
+# a single 'hello world' task. After experimenting with this file
+# please see the 'simpleDemo' for coverage of a few more pyflow features
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+sys.path.append(os.path.abspath(os.path.dirname(__file__))+"/../pyflow/src")
+
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class MemTestWorkflow(WorkflowRunner) :
+
+    # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+    #
+    def workflow(self) :
+
+        # The following is our first and only task for this workflow:
+        self.addTask("easy_task1","echo 'Hello World!'")
+        self.addTask("easy_task2","echo 'Hello World!'")
+        self.addTask("easy_task3","echo 'Hello World!'")
+        self.addTask("easy_task4","echo 'Hello World!'",memMb=1)
+
+
+
+# Instantiate the workflow
+#
+wflow = MemTestWorkflow()
+
+# Run the worklow:
+#
+retval=wflow.run(nCores=8,memMb=2049)
+
+# done!
+sys.exit(retval)
+
diff --git a/scratch/test/README.md b/scratch/test/README.md
new file mode 100644
index 0000000..956bb51
--- /dev/null
+++ b/scratch/test/README.md
@@ -0,0 +1,30 @@
+
+## pyflow test scripts
+
+### Global test scripts
+
+The new global test script maintained for *nix and windows is:
+
+    test_pyflow.py
+
+
+The previous global test script written for *nix only is:
+
+    test_release_tarball.bash
+
+
+...this currently contains more tests, and will still be the test target for
+travis until windows support is complete.
+
+
+### Component test scripts
+
+* pyflow_unit_tests.py - all pyflow unit tests 
+
+* pyflow_basic_feature_runner.py - runs a number of pyflow operations for
+  local or sge modes
+
+* demos - Running through the various small demo scripts and making sure they
+  complete without error is used to round out the full test process. Most demo
+  scripts are linux-only at this point.
+
diff --git a/scratch/test/pyflow_basic_feature_runner.py b/scratch/test/pyflow_basic_feature_runner.py
new file mode 100755
index 0000000..ecbb32e
--- /dev/null
+++ b/scratch/test/pyflow_basic_feature_runner.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+"""
+this is a script which runs a lot of features...
+it does not provide test coverage as to whether everything
+ran correctly... it will only pick up a basic crash or hang.
+"""
+
+import os.path
+import sys
+
+# bad example of how to add the path:
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+
+
+# setup PYTHONPATH instead...
+
+#sys.path.append(scriptDir+"/../pyflow/src")
+
+from pyflow import WorkflowRunner
+
+
+testJobDir=os.path.join(scriptDir,"testtasks")
+
+sleepjob=os.path.join(testJobDir,"sleeper.bash")
+yelljob=os.path.join(testJobDir,"yeller.bash")
+runjob=os.path.join(testJobDir,"runner.bash")
+
+class SubSubWorkflow(WorkflowRunner) :
+
+    def workflow(self) :
+        self.addTask("easy_task1",yelljob+" 1")
+        self.addTask("easy_task2",runjob+" 2",nCores=3,dependencies=["easy_task1"])
+        self.waitForTasks("easy_task2")
+        self.addTask("easy_task3",runjob+" 2",nCores=3,dependencies=["easy_task2"])
+        # intentional fail:
+        #self.addTask("easy_task3b",runjob,dependencies=["easy_task2"])
+
+
+class SubWorkflow(WorkflowRunner) :
+
+    def workflow(self) :
+        self.addTask("easy_task1",yelljob+" 1")
+        self.addTask("easy_task2",runjob+" 2",nCores=3,dependencies=["easy_task1"])
+        self.addTask("easy_task3",runjob+" 2",nCores=3,dependencies=["easy_task2"])
+        wflow=SubSubWorkflow()
+        self.addWorkflowTask("subsubwf_task1",wflow,dependencies="easy_task1")
+
+
+
+class TestWorkflow(WorkflowRunner) :
+
+    def workflow(self) :
+
+	job=sleepjob+" 1"
+
+        self.addTask("easy_task1",yelljob+" 1")
+        waitTask=self.addTask("easy_task3",runjob+" 10",nCores=2,memMb=1024,isForceLocal=True)
+        self.flowLog("My message")
+
+        swflow=SubWorkflow()
+
+        self.addWorkflowTask("subwf_task1",swflow,dependencies=waitTask)
+        self.addWorkflowTask("subwf_task2",swflow,dependencies=waitTask)
+
+        self.addTask("easy_task4",runjob+" 2",nCores=3,dependencies=["subwf_task1","subwf_task2"])
+        self.addTask("easy_task5",job,nCores=1)
+
+        # and stop here
+        self.waitForTasks()
+
+        self.flowLog("ITC1: "+str(self.isTaskComplete("easy_task1")))
+        self.flowLog("ITC6: "+str(self.isTaskComplete("easy_task6")))
+
+        self.addTask("easy_task6",job)
+        #self.addTask("easy_task2",sleepjob)
+        self.addTask("checkpoint_task",dependencies=["easy_task1","easy_task6","easy_task4"])
+        self.addTask("dep_task",sleepjob+" 4",dependencies=["checkpoint_task"])
+
+
+
+def getRunOptions() :
+
+    from optparse import OptionParser
+
+    defaults = { "mode" : "local" }
+
+    parser = OptionParser()
+    parser.set_defaults(**defaults)
+
+    parser.add_option("-m", "--mode", type="string", dest="mode",
+                      help="Select run mode {local,sge} (default: %default)")
+
+    (options, args) = parser.parse_args()
+
+    if len(args) :
+        parser.print_help()
+        sys.exit(2)
+
+    if options.mode not in ["sge","local"] :
+        parser.print_help()
+        sys.exit(2)
+
+    return options
+
+
+
+def main() :
+    options = getRunOptions()
+    wflow = TestWorkflow()
+    retval=wflow.run(options.mode,nCores=8,memMb=8*1024,isContinue=False)
+    sys.exit(retval)
+
+
+
+if __name__ == "__main__" :
+    main()
diff --git a/scratch/test/pyflow_unit_tests.py b/scratch/test/pyflow_unit_tests.py
new file mode 100755
index 0000000..2eaf18c
--- /dev/null
+++ b/scratch/test/pyflow_unit_tests.py
@@ -0,0 +1,430 @@
+#!/usr/bin/env python
+
+import unittest
+import os
+import sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+
+def pyflow_lib_dir() :
+    return os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir,"pyflow","src"))
+
+try :
+    # if pyflow is in PYTHONPATH already then use the specified copy:
+    from pyflow import isWindows,WorkflowRunner
+except :
+    # otherwise use the relative path within this repo:
+    sys.path.append(pyflow_lib_dir())
+    from pyflow import isWindows,WorkflowRunner
+
+
+def getRmCmd() :
+    if isWindows():
+        return ["del","/f"]
+    else:
+        return ["rm","-f"]
+
+
+def getSleepCmd() :
+    if isWindows():
+        return ["timeout"]
+    else:
+        return ["sleep"]
+
+
+def getCatCmd() :
+    if isWindows():
+        return ["type"]
+    else:
+        return ["cat"]
+
+
+def getCmdString(cmdList) :
+    return " ".join(cmdList)
+
+
+
+class NullWorkflow(WorkflowRunner) :
+    pass
+
+
+
+class TestWorkflowRunner(unittest.TestCase) :
+
+    def __init__(self, *args, **kw) :
+        unittest.TestCase.__init__(self, *args, **kw)
+        self.testPath="testDataRoot"
+
+    def setUp(self) :
+        self.clearTestPath()
+
+    def tearDown(self) :
+        self.clearTestPath()
+
+    def clearTestPath(self) :
+        import shutil
+        if os.path.isdir(self.testPath) :
+            shutil.rmtree(self.testPath)
+
+
+    def test_createDataDir(self) :
+        w=NullWorkflow()
+        w.run("local",self.testPath,isQuiet=True)
+        self.assertTrue(os.path.isdir(self.testPath))
+
+
+    def test_badMode(self) :
+        w=NullWorkflow()
+        try:
+            w.run("foomode",self.testPath,isQuiet=True)
+            self.fail("Didn't raise Exception")
+        except KeyError:
+            self.assertTrue(sys.exc_info()[1].args[0].find("foomode") != -1)
+
+
+    def test_errorLogPositive(self) :
+        """
+        Test that errors are written to separate log when requested
+        """
+        os.mkdir(self.testPath)
+        logFile=os.path.join(self.testPath,"error.log")
+        w=NullWorkflow()
+        try:
+            w.run("foomode",self.testPath,errorLogFile=logFile,isQuiet=True)
+            self.fail("Didn't raise Exception")
+        except KeyError:
+            self.assertTrue(sys.exc_info()[1].args[0].find("foomode") != -1)
+        self.assertTrue((os.path.getsize(logFile) > 0))
+
+
+    def test_errorLogNegative(self) :
+        """
+        Test that no errors are written to separate error log when none occur
+        """
+        os.mkdir(self.testPath)
+        logFile=os.path.join(self.testPath,"error.log")
+        w=NullWorkflow()
+        w.run("local",self.testPath,errorLogFile=logFile,isQuiet=True)
+        self.assertTrue((os.path.getsize(logFile) == 0))
+
+
+    def test_dataDirCollision(self) :
+        """
+        Test that when two pyflow jobs are launched with the same dataDir, the second will fail.
+        """
+        import threading,time
+
+        class StallWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("sleeper",getSleepCmd()+["5"])
+
+        class runner(threading.Thread) :
+            def __init__(self2) :
+                threading.Thread.__init__(self2)
+                self2.retval1=1
+
+            def run(self2) :
+                w=StallWorkflow()
+                self2.retval1=w.run("local",self.testPath,isQuiet=True)
+
+        w2=StallWorkflow()
+
+        r1=runner()
+        r1.start()
+        time.sleep(1)
+        retval2=w2.run("local",self.testPath,isQuiet=True)
+        self.assertTrue(retval2==1)
+        r1.join()
+        self.assertTrue(r1.retval1==0)
+
+
+    def test_forceContinue(self) :
+        class TestWorkflow(WorkflowRunner) :
+            color="red"
+
+            def setColor(self2,color) :
+                self2.color=color
+
+            def workflow(self2) :
+                self2.addTask("A","echo "+self2.color)
+
+        w=TestWorkflow()
+        retval=w.run("local",self.testPath,isQuiet=True)
+        self.assertTrue(retval==0)
+        retval=w.run("local",self.testPath,isContinue=True,isQuiet=True)
+        self.assertTrue(retval==0)
+        w.setColor("green")
+        retval=w.run("local",self.testPath,isContinue=True,isQuiet=True)
+        self.assertTrue(retval==1)
+        retval=w.run("local",self.testPath,isContinue=True,isForceContinue=True,isQuiet=True)
+        self.assertTrue(retval==0)
+
+
+    def test_badContinue(self) :
+        w=NullWorkflow()
+        try:
+            w.run("local",self.testPath,isContinue=True,isQuiet=True)
+            self.fail("Didn't raise Exception")
+        except Exception:
+            self.assertTrue(sys.exc_info()[1].args[0].find("Cannot continue run") != -1)
+
+
+    def test_goodContinue(self) :
+        w=NullWorkflow()
+        retval1=w.run("local",self.testPath,isQuiet=True)
+        retval2=w.run("local",self.testPath,isContinue=True,isQuiet=True)
+        self.assertTrue((retval1==0) and (retval2==0))
+
+
+    def test_autoContinue(self) :
+        w=NullWorkflow()
+        retval1=w.run("local",self.testPath,isContinue="Auto",isQuiet=True)
+        retval2=w.run("local",self.testPath,isContinue="Auto",isQuiet=True)
+        self.assertTrue((retval1==0) and (retval2==0))
+
+
+    def test_simpleDependency(self) :
+        "make sure B waits for A"
+        class TestWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                filePath=os.path.join(self.testPath,"tmp.txt")
+                self2.addTask("A","echo foo > " +filePath)
+                self2.addTask("B",getCmdString(getCatCmd()) + " " + filePath + " && " + getCmdString(getRmCmd())+ " " + filePath,dependencies="A")
+
+        w=TestWorkflow()
+        self.assertTrue((0==w.run("local",self.testPath,isQuiet=True)))
+
+
+    def test_waitDependency(self) :
+        "make sure waitForTasks waits for A on the workflow thread"
+        class TestWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                filePath=os.path.join(self.testPath,"tmp.txt")
+                if os.path.isfile(filePath) : os.remove(filePath)
+                self2.addTask("A",getCmdString(getSleepCmd()) + " 5 && echo foo > %s" % (filePath))
+                self2.waitForTasks("A")
+                assert(os.path.isfile(filePath))
+                self2.addTask("B",getCmdString(getCatCmd()) + " " + filePath +" && " + getCmdString(getRmCmd())+ " " + filePath)
+
+        w=TestWorkflow()
+        self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+
+
+    def test_flowLog(self) :
+        "make sure flowLog doesn't throw -- but this does not check if the log is updated"
+        class TestWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.flowLog("My Message")
+
+        w=TestWorkflow()
+        self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+
+
+    def test_deadSibling(self) :
+        """
+        Tests that when a task error occurs in one sub-workflow, its
+        sibling workflows exit correctly (instead of hanging forever).
+        This test is an early library error case.
+        """
+        class SubWorkflow1(WorkflowRunner) :
+            "this one fails"
+            def workflow(self2) :
+                self2.addTask("A",getSleepCmd()+["5"])
+                self2.addTask("B","boogyman!",dependencies="A")
+                
+        class SubWorkflow2(WorkflowRunner) :
+            "this one doesn't fail"
+            def workflow(self2) :
+                self2.addTask("A",getSleepCmd()+["5"])
+                self2.addTask("B",getSleepCmd()+["5"],dependencies="A")
+
+        class MasterWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                wflow1=SubWorkflow1()
+                wflow2=SubWorkflow2()
+                self2.addWorkflowTask("wf1",wflow1)
+                self2.addWorkflowTask("wf2",wflow2)
+
+        w=MasterWorkflow()
+        self.assertTrue(1==w.run("local",self.testPath,nCores=2,isQuiet=True))
+
+
+    def test_selfDependency1(self) :
+        """
+        """
+        class SelfWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("A",getSleepCmd()+["5"],dependencies="A")
+                
+        w=SelfWorkflow()
+        self.assertTrue(1==w.run("local",self.testPath,isQuiet=True))
+
+
+    def test_expGraphScaling(self) :
+        """
+        This tests that pyflow does not scale poorly with highly connected subgraphs.
+
+        When the error occurs, it locks the primary thread, so we put the test workflow
+        on its own thread so that we can time it and issue an error.
+
+        Issue reported by R Kelley and A Halpern
+        """
+
+        import threading
+
+        class ScalingWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                tasks = set()
+                for idx in xrange(60) :
+                    sidx = str(idx)
+                    tasks.add(self2.addTask("task_" + sidx, "echo " + sidx, dependencies = tasks))
+                self2.waitForTasks("task_50")
+                tasks.add(self2.addTask("task_1000", "echo 1000", dependencies = tasks))
+
+        class runner(threading.Thread) :
+            def __init__(self2) :
+                threading.Thread.__init__(self2)
+                self2.setDaemon(True)
+
+            def run(self2) :
+                w=ScalingWorkflow()
+                w.run("local",self.testPath,isQuiet=True)
+
+        r1=runner()
+        r1.start()
+        r1.join(30)
+        self.assertTrue(not r1.isAlive())
+
+    def test_startFromTasks(self) :
+        """
+        run() option to ignore all tasks before a specified task node
+        """
+        filePath=os.path.join(self.testPath,"tmp.txt")
+
+        class SelfWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("A","echo foo > "+filePath)
+                self2.addTask("B",getSleepCmd()+["1"],dependencies="A")
+                self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+ 
+        w=SelfWorkflow()
+        self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="B"))
+        self.assertTrue(not os.path.exists(filePath))
+
+
+    def test_startFromTasksSubWflow(self) :
+        """
+        run() option to ignore all tasks before a specified task node
+        """
+        filePath=os.path.join(self.testPath,"tmp.txt")
+
+        class SubWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("D","echo foo > "+filePath)
+
+        class SelfWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("A",getSleepCmd()+["1"])
+                self2.addWorkflowTask("B",SubWorkflow(),dependencies="A")
+                self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+        w=SelfWorkflow()
+        self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="B"))
+        self.assertTrue(os.path.exists(filePath))
+
+
+    def test_startFromTasksSubWflow2(self) :
+        """
+        run() option to ignore all tasks before a specified task node
+        """
+        filePath=os.path.join(self.testPath,"tmp.txt")
+
+        class SubWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("D","echo foo > "+filePath)
+
+        class SelfWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("A",getSleepCmd()+["1"])
+                self2.addWorkflowTask("B",SubWorkflow(),dependencies="A")
+                self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+        w=SelfWorkflow()
+        self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="C"))
+        self.assertTrue(not os.path.exists(filePath))
+
+
+    def test_ignoreTasksAfter(self) :
+        """
+        run() option to ignore all tasks below a specified task node
+        """
+        class SelfWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("A",getSleepCmd()+["1"])
+                self2.addTask("B",getSleepCmd()+["1"],dependencies="A")
+                self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+ 
+        w=SelfWorkflow()
+        self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,ignoreTasksAfter="B"))
+        self.assertTrue(not w.isTaskComplete("C"))
+
+    def test_addTaskOutsideWorkflow(self) :
+        """
+        test that calling addTask() outside of a workflow() method
+        raises an exception
+        """
+
+        class SelfWorkflow(WorkflowRunner) :
+            def __init__(self2) :
+                self2.addTask("A",getSleepCmd()+["1"])
+
+        try :
+            w=SelfWorkflow()
+            self.fail("Didn't raise Exception")
+        except :
+            pass
+
+    def test_runModeInSubWorkflow(self) :
+        """
+        test that calling getRunMode() in a sub-workflow() method
+        does not raise an exception (github issue #5)
+        """
+
+        class SubWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                if self2.getRunMode() == "local" :
+                    self2.addTask("D",getSleepCmd()+["1"])
+
+        class SelfWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("A",getSleepCmd()+["1"])
+                self2.addWorkflowTask("B",SubWorkflow(),dependencies="A")
+                self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+        try :
+            w=SelfWorkflow()
+            self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+        except :
+            self.fail("Should not raise Exception")
+
+    def test_CheckpointChain(self) :
+        """
+        Test that checkout points are handled correctly even
+        when multiple checkpoints have a parent-child relationship
+        """
+
+        class SelfWorkflow(WorkflowRunner) :
+            def workflow(self2) :
+                self2.addTask("A")
+                self2.addTask("B")
+                self2.addTask("C",dependencies=["A","B"])
+
+        try :
+            w=SelfWorkflow()
+            self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+        except :
+            self.fail("Should not raise Exception")
+
+if __name__ == '__main__' :
+    unittest.main()
+
diff --git a/scratch/test/test_pyflow.py b/scratch/test/test_pyflow.py
new file mode 100755
index 0000000..a4c4a84
--- /dev/null
+++ b/scratch/test/test_pyflow.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python 
+#
+"""
+automation friendly cross-platform tests for pyflow
+"""
+
+import os
+import sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+
+
+def getOptions() :
+
+    from optparse import OptionParser
+
+    usage = "usage: %prog [options]"
+    parser = OptionParser(usage=usage,description="Run all pyflow tests")
+
+    parser.add_option("--nosge",dest="isSkipSge", action="store_true",
+                      help="skip SGE testing")
+
+    (options,args) = parser.parse_args()
+
+    if len(args) != 0 :
+        parser.print_help()
+        sys.exit(2)
+
+    return (options,args)
+
+
+def main() :
+    import subprocess
+
+    (options,args) = getOptions()
+
+    pyflowRootDir=os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir))
+    pyflowDir=os.path.join(pyflowRootDir,"pyflow")
+
+    utScriptPath=os.path.join(scriptDir,"pyflow_unit_tests.py")
+
+    if True :
+        # process-out to run the unit tests for now -- TODO: can we just import this instead?
+        utCmd=[sys.executable,"-E",utScriptPath,"-v"]
+        proc = subprocess.Popen(utCmd)
+        proc.wait()
+        if proc.returncode != 0 :
+            raise Exception("Pyflow unit test run failed") 
+
+    # run through demos (only helloWorld is working on windows)
+    if True :
+        demoDir=os.path.join(pyflowDir,"demo")
+        for demoName in ["helloWorld"] :
+            demoScriptPath=os.path.join(demoDir,demoName,demoName+".py")
+            demoCmd=[sys.executable,"-E",demoScriptPath]
+            proc = subprocess.Popen(demoCmd)
+            proc.wait()
+            if proc.returncode != 0 :
+                raise Exception("Pyflow demo failed: '%s'" % (demoScriptPath))
+
+
+main()
+
diff --git a/scratch/test/test_release_tarball.bash b/scratch/test/test_release_tarball.bash
new file mode 100755
index 0000000..afdd3ae
--- /dev/null
+++ b/scratch/test/test_release_tarball.bash
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+#
+# this script brings everything together for an automated build/test system
+#
+
+set -o errexit
+set -o nounset
+set -o xtrace
+
+if [ $# -gt 1 ]; then
+    echo "usage: $0 [ -nosge ]" 2>&1
+    exit 2
+fi
+
+is_sge=1
+if [ $# -ge 1 ] && [ "$1" == "-nosge" ]; then
+    is_sge=0
+fi
+
+
+
+thisdir=$(dirname $0)
+
+cd $thisdir/..
+testname=TESTBALL
+bash ./make_release_tarball.bash $testname
+tar -xzf $testname.tar.gz
+
+testdir=$(pwd)/$testname
+
+# run through tests:
+PYTHONPATH=$testdir/src test/pyflow_unit_tests.py -v
+
+# run this a few times just in case we can russle out any subtle/rare race conditions:
+for f in $(seq 5); do
+    PYTHONPATH=$testdir/src test/pyflow_basic_feature_runner.py --mode local
+done
+
+if [ $is_sge == 1 ]; then
+    PYTHONPATH=$testdir/src test/pyflow_basic_feature_runner.py --mode sge
+fi
+
+# run through demos:
+for f in cwdDemo envDemo helloWorld makeDemo memoryDemo mutexDemo simpleDemo subWorkflow; do
+    cd $testdir/demo/$f
+    python $f.py
+    python pyflow.data/state/make_pyflow_task_graph.py >| test.dot
+done
+
+
diff --git a/scratch/test/testtasks/runner.bash b/scratch/test/testtasks/runner.bash
new file mode 100755
index 0000000..0bf66ec
--- /dev/null
+++ b/scratch/test/testtasks/runner.bash
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+cd $thisdir
+
+if ! [ -e ./runner ]; then
+    # turning on -O2 is too variable accross different platforms, so leave off:
+    gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner
+fi
+
+./runner $1
+
+
diff --git a/scratch/test/testtasks/runner.c b/scratch/test/testtasks/runner.c
new file mode 100644
index 0000000..5fad9c8
--- /dev/null
+++ b/scratch/test/testtasks/runner.c
@@ -0,0 +1,16 @@
+#include "math.h"
+#include "assert.h"
+
+int main(int argc, char**argv) {
+assert(argc==2);
+int mult=atoi(argv[1]);
+int i,j;
+double a=0;
+long total=50000000;
+for(j=0;j<mult;++j) {
+for(i=0;i<total;++i) {
+  a+=i;a=sqrt(a);
+}
+}
+return 0;
+}
diff --git a/scratch/test/testtasks/sleeper.bash b/scratch/test/testtasks/sleeper.bash
new file mode 100755
index 0000000..d901a93
--- /dev/null
+++ b/scratch/test/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep  
+sleep $arg 
+echo pid: $pid arg: $arg ending sleep 
+
diff --git a/scratch/test/testtasks/slow_yeller.py b/scratch/test/testtasks/slow_yeller.py
new file mode 100755
index 0000000..80c5aae
--- /dev/null
+++ b/scratch/test/testtasks/slow_yeller.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+import os,sys,time
+import datetime
+
+if len(sys.argv) != 2 :
+    print "usage $0 arg"
+    sys.exit(1)
+
+arg=sys.argv[1]
+
+pid=os.getpid()
+
+sys.stdout.write("pid: %s arg: %s starting yell\n" % (str(pid),arg))
+
+for i in xrange(100):
+    td=datetime.datetime.utcnow().isoformat()
+    msg="Yeller %s yellin %i" % (str(pid),i)
+    sys.stdout.write(msg+" stdout "+td+"\n")
+    sys.stderr.write(msg+" stderr "+td+"\n")
+    time.sleep(1)
+
+sys.stdout.write("pid: %s arg: %s ending yell\n" % (str(pid),arg))
+
diff --git a/scratch/test/testtasks/yeller.bash b/scratch/test/testtasks/yeller.bash
new file mode 100755
index 0000000..cdd4845
--- /dev/null
+++ b/scratch/test/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+    echo "usage $0 arg"
+    exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell  
+for i in {1..100}; do
+    echo "Yeller $pid yellin $i stdout"
+    echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep 
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pyflow.git