[med-svn] [python-pyflow] 01/02: New upstream version 1.1.13
Andreas Tille
tille at debian.org
Tue Nov 15 15:01:27 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository python-pyflow.
commit 7e7a7d12449e83e7512a9165a5a9f3904721a4a5
Author: Andreas Tille <tille at debian.org>
Date: Tue Nov 15 15:59:38 2016 +0100
New upstream version 1.1.13
---
.appveyor.yml | 15 +
.gitattributes | 1 +
.gitignore | 3 +
.travis.yml | 44 +
README.md | 64 +
pyflow/COPYRIGHT.txt | 28 +
pyflow/README.md | 189 +
pyflow/demo/README.txt | 33 +
pyflow/demo/bclToBwaBam/README.txt | 27 +
pyflow/demo/bclToBwaBam/bwaworkflow.py | 676 ++++
pyflow/demo/bclToBwaBam/configBclToBwaBam.py | 397 ++
pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini | 5 +
pyflow/demo/bclToBwaBam/example_configuration.bash | 18 +
pyflow/demo/cwdDemo/cwdDemo.py | 85 +
pyflow/demo/envDemo/envDemo.py | 96 +
pyflow/demo/helloWorld/README.txt | 3 +
pyflow/demo/helloWorld/helloWorld.py | 77 +
pyflow/demo/makeDemo/.hidden | 7 +
pyflow/demo/makeDemo/makeDemo.py | 87 +
pyflow/demo/memoryDemo/memoryDemo.py | 83 +
pyflow/demo/mutexDemo/mutexDemo.py | 89 +
pyflow/demo/mutexDemo/testtasks/sleeper.bash | 13 +
pyflow/demo/retryDemo/retryDemo.py | 90 +
pyflow/demo/runOptionsDemo/getDemoRunOptions.py | 133 +
pyflow/demo/runOptionsDemo/runOptionsDemo.py | 109 +
pyflow/demo/runOptionsDemo/testtasks/sleeper.bash | 13 +
pyflow/demo/runOptionsDemo/testtasks/yeller.bash | 16 +
pyflow/demo/simpleDemo/simpleDemo.py | 177 +
pyflow/demo/simpleDemo/testtasks/runner.bash | 17 +
pyflow/demo/simpleDemo/testtasks/runner.c | 16 +
pyflow/demo/simpleDemo/testtasks/sleeper.bash | 13 +
pyflow/demo/simpleDemo/testtasks/yeller.bash | 16 +
pyflow/demo/subWorkflow/subWorkflow.py | 115 +
pyflow/demo/subWorkflow/testtasks/runner.bash | 17 +
pyflow/demo/subWorkflow/testtasks/runner.c | 16 +
pyflow/demo/subWorkflow/testtasks/sleeper.bash | 13 +
pyflow/demo/subWorkflow/testtasks/yeller.bash | 16 +
pyflow/demo/successMsgDemo/successMsgDemo.py | 81 +
pyflow/doc/ChangeLog.txt | 202 +
pyflow/doc/README.txt | 4 +
pyflow/doc/client_api/README | 12 +
.../make_WorkflowRunner_API_html_doc.bash | 6 +
.../make_WorkflowRunner_API_simple_doc.py | 13 +
pyflow/doc/developer/README | 1 +
.../developer/make_pyflow_developer_html_doc.bash | 6 +
pyflow/setup.py | 11 +
pyflow/src/__init__.py | 1 +
pyflow/src/pyflow.py | 4175 ++++++++++++++++++++
pyflow/src/pyflowConfig.py | 213 +
pyflow/src/pyflowTaskWrapper.py | 338 ++
scratch/README.txt | 18 +
scratch/delete_trailing_wspace.bash | 33 +
scratch/make_release_tarball.bash | 65 +
scratch/notes/design.notes | 74 +
scratch/notes/todo | 53 +
scratch/pybox/email_test.py | 29 +
scratch/pybox/hijack.py | 25 +
scratch/pybox/inspect.py | 7 +
scratch/pybox/memTest.py | 46 +
scratch/test/README.md | 30 +
scratch/test/pyflow_basic_feature_runner.py | 116 +
scratch/test/pyflow_unit_tests.py | 430 ++
scratch/test/test_pyflow.py | 63 +
scratch/test/test_release_tarball.bash | 50 +
scratch/test/testtasks/runner.bash | 14 +
scratch/test/testtasks/runner.c | 16 +
scratch/test/testtasks/sleeper.bash | 13 +
scratch/test/testtasks/slow_yeller.py | 24 +
scratch/test/testtasks/yeller.bash | 16 +
69 files changed, 9002 insertions(+)
diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 0000000..007af0e
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,15 @@
+
+install:
+ # Check the python version:
+ - "python.exe --version"
+
+build: false # Not a C# project
+
+test_script:
+ # Build the compiled extension and run the project tests
+ - "python.exe scratch/test/test_pyflow.py"
+
+notifications:
+ - provider: Email
+ to:
+ - csaunders at illumina.com
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..3d3fd16
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+pyflow/README.txt export-subst
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8ede833
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+*~
+pyflow.data
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..bec5286
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,44 @@
+language: python
+
+# Note PYVER drives hack to use python 2.4, this is
+# actually pretty ugly on travis -- process is:
+# 1) install # python2.4 from deadsnakes ppa
+# 2) shove 2.4 in /usr/bin/python
+# 3) set PATH back to /usr/bin
+#
+# This removes the system python link which is probably not
+# smart, but the test works so leaving it for now.
+#
+matrix:
+ include:
+ - os: linux
+ sudo: required
+ python: "2.7"
+ - os: linux
+ sudo: required
+ python: "2.7"
+ env: PYVER="2.4"
+
+before_install:
+ - date -u
+ - uname -a
+ - lsb_release -a
+ - if [ "$PYVER" == "2.4" ]; then sudo add-apt-repository -y ppa:fkrull/deadsnakes && sudo apt-get update -qq; fi
+
+install:
+ - if [ "$PYVER" == "2.4" ]; then sudo apt-get install python2.4 -y && python2.4 -V; fi
+ - if [ "$PYVER" == "2.4" ]; then sudo rm -f /usr/bin/python && sudo ln -s /usr/bin/python2.4 /usr/bin/python; fi
+ - if [ "$PYVER" == "2.4" ]; then export PATH=/usr/bin:$PATH; fi
+ - python -V
+
+script:
+ - cd scratch/test && bash ./test_release_tarball.bash -nosge
+
+branches:
+ only:
+ - master
+
+notifications:
+ email:
+ recipients:
+ - csaunders at illumina.com
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..33102f7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,64 @@
+pyFlow - a lightweight parallel task engine
+===========================================
+
+[![Build Status] [tcistatus]] [tcihome]
+[![Build status] [acistatus]] [acihome]
+
+
+pyFlow is a tool to manage tasks in the context of a task dependency
+graph. It has some similarities to make. pyFlow is not a program – it
+is a python module, and workflows are defined using pyFlow by writing
+regular python code with the pyFlow API
+
+For more information, please see the [pyFlow website] [site].
+
+[site]:http://illumina.github.io/pyflow/
+
+[tcistatus]:https://travis-ci.org/Illumina/pyflow.svg?branch=master
+[tcihome]:https://travis-ci.org/Illumina/pyflow
+
+[acistatus]:https://ci.appveyor.com/api/projects/status/fkovw5ife59ae48t/branch/master?svg=true
+[acihome]:https://ci.appveyor.com/project/ctsa/pyflow/branch/master
+
+
+License
+-------
+
+pyFlow source code is provided under the [BSD 2-Clause License]
+(pyflow/COPYRIGHT.txt).
+
+
+Releases
+--------
+
+Recent release tarballs can be found on the github release list here:
+
+https://github.com/Illumina/pyflow/releases
+
+To create a release tarball corresponding to any other version, run:
+
+ git clone git://github.com/Illumina/pyflow.git pyflow
+ cd pyflow
+ git checkout ${VERSION}
+ ./scratch/make_release_tarball.bash
+ # tarball is "./pyflow-${VERSION}.tar.gz"
+
+Note this README is at the root of the pyflow development repository
+and is not part of the python source release.
+
+
+Contents
+--------
+
+For the development repository (this directory), the sub-directories are:
+
+pyflow/
+
+Contains all pyflow code intended for distribution, plus demo code and
+documentation.
+
+scratch/
+
+This directory contains support scripts for tests/cleanup/release
+tarballing.. etc.
+
diff --git a/pyflow/COPYRIGHT.txt b/pyflow/COPYRIGHT.txt
new file mode 100644
index 0000000..984089a
--- /dev/null
+++ b/pyflow/COPYRIGHT.txt
@@ -0,0 +1,28 @@
+pyFlow - a lightweight parallel task engine
+
+Copyright (c) 2012-2015 Illumina, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the
+ distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/pyflow/README.md b/pyflow/README.md
new file mode 100644
index 0000000..4ea7759
--- /dev/null
+++ b/pyflow/README.md
@@ -0,0 +1,189 @@
+
+pyFlow - a lightweight parallel task engine
+===========================================
+
+Chris Saunders (csaunders at illumina.com)
+Version: ${VERSION}
+
+
+SUMMARY:
+--------
+
+pyFlow manages running tasks in the context of a task dependency
+graph. It has some similarities to make. pyFlow is not a program -- it
+is a python module, and workflows are defined using pyFlow by writing
+regular python code with the pyFlow API.
+
+FEATURES:
+---------
+
+- Define workflows as python code
+- Run workflows on localhost or sge
+- Continue workflows which have partially completed
+- Task resource management: Specify number of threads and memory
+ required for each task.
+- Recursive workflow specification: take any existing pyFlow object and
+ use it as a task in another pyFlow.
+- Dynamic workflow specification: define a wait on task specification rather
+ than just tasks, so that tasks can be defined based on the results of
+ upstream tasks (note: recursive workflows are an even better way to do this)
+- Detects and reports all failed tasks with consistent workflow-level logging.
+- Task-level logging: All task stderr is logged and decorated,
+ eg. [time][host][workflow_run][taskid]
+- Task timing: Task wrapper function provides wall time for every task
+- Task priority: Tasks which are simultanously eligible to run can be assigned
+relative priorities to be run or queued first.
+- Task mutex sets: define sets of tasks which access an exclusive resource
+- Email notification on job completion/error/exception
+- Provide ongoing task summary report at specified intervals
+- Output task graph in dot format
+
+LICENSE:
+--------
+
+pyFlow source code is provided under the [BSD 2-Clause License] (COPYRIGHT.txt).
+
+INSTALL:
+--------
+
+pyFlow can be installed and used on python versions in the 2.4 to
+2.7 series
+
+The pyflow module can be installed using standard python distutils
+intallation. To do so unpack the tarball and use the setup script
+as follows:
+
+```
+tar -xzf pyflow-X.Y.Z.tar.gz
+cd pyflow-X.Y.Z
+python setup.py build install
+```
+
+If installation in not convenient, you can simply add the pyflow
+src/ directory to the system search path. For instance:
+
+usepyflow.py:
+```
+import sys
+sys.path.append("/path/to/pyflow/src")
+
+from pyflow import WorkflowRunner
+```
+
+
+
+WRITING WORKFLOWS:
+------------------
+
+Briefly, pyFlow workflows are written by creating a new class which
+inherits from pyflow.WorkflowRunner. This class then defines its
+workflow by overloading the WorkflowRunner.workflow()
+method. Workflows are run by instantiating a workflow class and
+calling the WorkflowRunner.run() method.
+
+A very simple demonstration of the minimal workflow setup and run
+described above is available in the directory: `${pyflowDir}/demo/helloWorld/`
+
+Several other demonstration workflows are available:
+`${pyflowDir}/demo/simpleDemo` – a basic feature sandbox
+`${pyflowDir}/demo/subWorkflow` – shows how recursive workflow invocation works
+
+The developer documentation for the pyflow API can be generated by running
+`${pyflowDir}/doc/getApiDoc.py` or `python ${pyflowDir}/src/pydoc.py`
+
+An advanced proof-of-concept demonstration of bclToBam conversion
+is also available in `${pyflowDir}/demo/bclToBwaBam`
+
+
+
+USING WORKFLOWS:
+----------------
+
+When running a pyFlow workflow, all logs and state information are
+written into a single "pyflow.data" directory. The root of this
+directory is specified in the workflow.run() call.
+
+### Logging:
+
+pyFlow creates a primary workflow-level log, and 2 log files to
+capture all task stdout and stderr, respectively.
+
+Workflow-level log information is copied to both stderr and
+pyflow.data/logs/pyflow_log.txt. All workflow log messages are
+prefixed with "[time] [hosname] [workflow_run] [component] ". Where:
+
+- 'time' is UTC in ISO 8601 format.
+- 'workflow_run' is an id that's weakly unique for each run of the workflow. It
+is composed of (1) the run() PID and (2) the number of times run() has been called on
+the workflow by the same process. These two values are joined by an underscore
+- 'component' - the name of the pyflow thread, the primary threads are
+ 'WorkflowManager' which runs the worklow() method, and 'TaskManager' which
+ polls the task graph and launches jobs.
+
+In the task logs, only the stderr stream is decorated. The prefix in
+this case is: "[time] [hostname] [workflow_run] [taskname] ". The
+'taskname" is usually the label provided for each task in its
+addTask() call. All tasks are launched by a task wrapping function,
+and any messages from the taskWrapper (as opposed to the task command
+itself) will use an extended taskname:
+"pyflowTaskWrapper:${tasklabel}". One example where the task wrapper
+writes to the log is to report the total runtime for its task.
+
+All logging is append only -- pyFlow does not overwrite logs even over
+multiple runs. The workflow_run id can be used to select out the
+information from a specific run if restarting/continuing a run
+multiple times.
+
+### State:
+
+pyFlow continues jobs by marking their status in a file, *not* by
+looking for the presence of file targets. This is a major difference
+from make and must be kept in mind when restarting interrupted
+workflows.
+
+The runstate of each task is in
+pyflow.data/state/pyflow_tasks_runstate.txt, the description of each
+task is in pyflow.data/state/pyflow_tasks_info.txt. At the beginning of
+each run any existing task files are backed up in
+pyflow.data/state/backup.
+
+### Other:
+
+#### Email notification:
+
+When running a workflow with one or more email addresses given in the
+mailTo argument, pyflow will attempt to send a notification describing the
+outcome of the run under any circumstance short of host hardware failure.
+The email should result from 1 of 3 outcomes: (1) successful run completion
+(2) the first unrecoverable task failure, with a description of the error
+(3) an unhandled software exception. Mail comes by default from
+"pyflow-bot at YOURDOMAIN" (configurable). Note that (1) you may
+have to change the email address from the automatically detected domain to
+to recieve emails, and (2) you may need to check your junk-mail
+filter to recieve notifications. It is best to configure one of the demo
+scripts to email you on a new machine to test out any issues before starting
+a production run.
+
+#### Graph output:
+
+pyFlow provides a script which can be used to produce a graph of the current
+task dependencies, where each node colored by the task status. The graph
+generation script is automatically created for each run in the pyflow state
+directory here:
+
+pyflow.data/state/make_pyflow_task_graph.py
+
+This script can be run without arguments to produce the current task graph in
+dot format based on the data files in the pyflow.data/state/ directory.
+
+#### Site configuration:
+
+The file ${pyflowDir}/src/pyflowConfig.py contains any pyflow variables or
+functions which would be likely to need configuration at a new site. This
+currently incudes:
+
+- from: email address from pyflow
+- default memory per task
+- default memory available per thread in localhost mode
+- qsub arguments given in response to a resource request.
+
diff --git a/pyflow/demo/README.txt b/pyflow/demo/README.txt
new file mode 100644
index 0000000..5db7f7e
--- /dev/null
+++ b/pyflow/demo/README.txt
@@ -0,0 +1,33 @@
+
+This directory contains small demonstration workflows for various
+pyflow features. If you are new to pyflow, a recommended order to
+become familiar with its features is:
+
+1. helloWorld
+This demonstrates a minimum single-task pyflow workflow.
+
+2. simpleDemo
+This workflow demonstrates a number of commonly used pyflow features
+by setting up a number of tasks and showing different ways to specify
+task resource requirements and dependencies.
+
+3. subWorkflow
+This workflow demonstrates the more advanced workflow recursion feature.
+
+4. runOptionsDemo
+This workflow demostrates one possible way the pyflow API runtime
+options could be translated to user command-line arguments if building
+a command-line utility.
+
+5. bclToBwaBam
+This workflow demonstrates a much larger 'real-world' script which
+performs bcl to fasta conversion from mulitple flowcells, alignment
+with BWA and translation of the BWA output to a single sorted and
+indexed BAM file. It has numerous dependencies required to actually
+run -- it's primary purpose here is to provide an example of how a
+larger scale pyflow workflow might look.
+
+
+Most of the remaining workflows demonstrate/test the use of specific
+pyflow features.
+
diff --git a/pyflow/demo/bclToBwaBam/README.txt b/pyflow/demo/bclToBwaBam/README.txt
new file mode 100644
index 0000000..d034b84
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/README.txt
@@ -0,0 +1,27 @@
+
+This demo shows the use of pyflow on a production-scale problem.
+
+The "configBclToBwaBam.py" script here will take one or more bcl
+basecalls directories, run them through CASAVA 1.8 bcl conversion and
+align/sort/merge/markdup each sample into a single BAM file. A list of
+sample names may be given to restrict the analysis post bcl
+conversion.
+
+Help for the configuration script is available by typing
+"./configBclToBwaBam.py -h". To run, the script requires at minimum a
+bcl basecalls directory and a BWA index genome fasta file.
+
+This directory contains a configuration file
+"configBclToBwaBam.py.ini" which contains paths for bwa, samtools,
+Picard and CASAVA. You may need to change these to reflect the
+installed location at your site before running
+
+If on the sd-isilon, the file "example_configuration.bash" will call
+"configBclToBwaBam.py" with a pointer to a subsampled bcl directory to
+quickly demonstate the use of this script on real data.
+
+Note that once all arguments are provided and the configuration script
+completes, a run script will be generated in the output directory
+which can be used to actually execute the workflow, allowing for
+local/sge and total job limit specification.
+
diff --git a/pyflow/demo/bclToBwaBam/bwaworkflow.py b/pyflow/demo/bclToBwaBam/bwaworkflow.py
new file mode 100644
index 0000000..42bff5e
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/bwaworkflow.py
@@ -0,0 +1,676 @@
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+"""
+bwaworkflow -- a pyflow demonstration module
+
+This is a quick-and-dirty BCL to BWA BAM workflow to demonstrate
+how pyflow could be used on a production-scale problem.
+
+__author__ = "Christopher Saunders"
+"""
+
+
+import os.path
+import sys
+
+# In production, pyflow can either be installed, or we can distribute
+# workflow to external users with pyflow in the same directory/fixed
+# relative directory or a configured directory macro-ed in by cmake,
+# etc
+#
+# For now we add the module path by hand:
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# utility methods:
+#
+
+def ensureDir(d):
+ """
+ make directory if it doesn't already exist, raise exception is something else is in the way:
+ """
+ if os.path.exists(d):
+ if not os.path.isdir(d) :
+ raise Exception("Can't create directory: %s" % (d))
+ else :
+ os.makedirs(d)
+
+
+def skipJoin(sep, a, b) :
+ if a == "" : return b
+ elif b == "" : return a
+ return a + sep + b
+
+
+def preJoin(a, b) :
+ return skipJoin('_', a, b)
+
+
+#
+# All of these "flow" functions take a set of task dependencies as
+# input and report a set of tasks on output, and thus are designed to
+# be plugged together to create workflows which are initiated in
+# the WorkflowRunner.workflow() method.
+#
+# Note that this style is not a design constraint of pyflow, it was
+# just one natural way to write the bwa workflow, and demonstrates an
+# extensible model wherein various flow functions could be stored in
+# external modules and combined as required.
+#
+# Note that these flow functions are written to lookup dependencies
+# from the WorkflowRunner class, so they are really class
+# methods. Although they could also lookup data from the derived BWA
+# class, they don't -- this allows them to be reused by other
+# WorkflowRunner classes.
+#
+
+
+
+def casava18BclToFastqFlow(self, taskPrefix="", dependencies=set()) :
+ """
+ CASAVA 1.8 bcl to fastq conversion
+
+ This assumes the bclBasecallsDir is generated in a CASAVA 1.8
+ compatible format, and uses CASAVA 1.8 to convert to fastq
+
+ This demonstrates pyflow's makefile handling option, where
+ you specify a makefile directory instead of a regular command, and
+ pyflow runs make/qmake according to the run mode.
+
+ params:
+ casavaDir
+ bclBasecallsDir
+ flowcellFastqDir
+ bclTilePattern
+ bclToFastqMaxCores
+ """
+
+ # configure bcl2fastq makefile:
+ configBclToFastqCmd = "perl %s/bin/configureBclToFastq.pl" % (self.params.casavaDir)
+ configBclToFastqCmd += " --input-dir=%s" % self.params.bclBasecallsDir
+ configBclToFastqCmd += " --output-dir=%s" % self.params.flowcellFastqDir
+ configBclToFastqCmd += " --force" # always a good idea for CASAVA
+ configBclToFastqCmd += " --ignore-missing-bcl"
+ configBclToFastqCmd += " --ignore-missing-stats"
+ configBclToFastqCmd += " --ignore-missing-control"
+ if self.params.bclTilePattern != None :
+ configBclToFastqCmd += " --tiles=%s" % (self.params.bclTilePattern)
+
+ # run configuration:
+ configLabel = self.addTask(preJoin(taskPrefix, "configBclToFastq"), configBclToFastqCmd, isForceLocal=True, dependencies=dependencies)
+
+ # for the bcl to fastq step, we use another workflow manager, so
+ # we just run it as one huge task and handle the mode ourselves:
+ nCores = self.getNCores()
+ mode = self.getRunMode()
+
+ maxCores = self.params.bclToFastqMaxCores
+ if (nCores == "unlimited") or (nCores > maxCores) :
+ nCores = maxCores
+
+ # run the fastq conversion:
+ bclToFastqLabel = self.addTask(preJoin(taskPrefix, "bclToFastq"),
+ self.params.flowcellFastqDir,
+ nCores=nCores,
+ dependencies=configLabel,
+ isCommandMakePath=True)
+
+ return set([bclToFastqLabel])
+
+
+
+class FastqPairToBwaBamFlow(WorkflowRunner) :
+ """
+ Given a read1 and read2 pair of fastq files, create an aligned and
+ sorted bamFile. Don't delete input fastaq files.
+ """
+
+ def __init__(self, params, suggestedAlignThreadCount=2) :
+ """
+ suggestedAlignThreadCount -- Number of threads to use in bwa aln
+ step. The workflow will lower this if
+ it exceeds the total number of cores
+ available in the run, or if it
+ exceeds alnMaxCores
+
+ params:
+ fastq1File
+ fastq2File
+ bamFile
+ alnMaxCores
+ bwaBin
+ genomeFasta
+ samtoolsBin
+ samtoolsSortMemPerCore
+ isKeepFastq
+ """
+ self.params = params
+ self.suggestedAlignThreadCount = suggestedAlignThreadCount
+
+
+ def workflow(self) :
+ bamDir = os.path.dirname(self.params.bamFile)
+ ensureDir(bamDir)
+
+ (bamPrefix, bamExt) = os.path.splitext(self.params.bamFile)
+
+ # must end in ".bam" for samtools
+ if bamExt != ".bam" :
+ raise Exception("bamFile argument must end in '.bam'. bamFile is: %s" % (bamFile))
+ if bamPrefix == "" :
+ raise Exception("bamFile argument must have a prefix before the '.bam' extension.")
+
+ # assuming many fastq pairs are running, good total throughput given cluster nodes with 2G of ram each
+ # should be achieved by given the align processes 2 threads each:
+
+ # grab total cores to make sure we don't exceed it:
+ totalCores = self.getNCores()
+
+ #
+ # setup aln step:
+ #
+
+ # set alnCores
+ alnCores = int(self.suggestedAlignThreadCount)
+ if (totalCores != "unlimited") and (alnCores > totalCores) :
+ alnCores = int(totalCores)
+ if (alnCores > self.params.alnMaxCores) :
+ alnCores = int(self.params.alnMaxCores)
+
+ bwaBaseCmd = "%s aln -t %i %s" % (self.params.bwaBin, alnCores, self.params.genomeFasta)
+
+ peDependencies = set()
+
+ def getReadLabel(i) : return "Read%iBwaAlign" % (i)
+ def getReadSaiFile(i) : return "%s.read%i.sai" % (self.params.bamFile, i)
+ def getReadFastqFile(i) : return (self.params.fastq1File, self.params.fastq2File)[i - 1]
+
+ for read in (1, 2) :
+ readAlnCmd = "%s %s >| %s" % (bwaBaseCmd, getReadFastqFile(read), getReadSaiFile(read))
+ peDependencies.add(self.addTask(getReadLabel(read), readAlnCmd, nCores=alnCores))
+
+ #
+ # setup sampe step:
+ #
+
+ # with all the pipes, the sampe step is probably a 2 core? this lets sort use more mem too:
+ peCores = 2
+ if (totalCores != "unlimited") and (peCores > totalCores) :
+ peCores = int(totalCores)
+
+ peCmd = "%s sampe %s %s %s %s %s" % (self.params.bwaBin, self.params.genomeFasta,
+ getReadSaiFile(1), getReadSaiFile(2),
+ getReadFastqFile(1), getReadFastqFile(2))
+
+ peCmd += " | %s view -uS -" % (self.params.samtoolsBin)
+
+ # For a real pipeline, we'd probably prefer Picard sort, but I don't want to add another
+ # dependency to the trial workflow:
+ #
+ peCmd += " | %s sort -m %i - %s" % (self.params.samtoolsBin,
+ self.params.samtoolsSortMemPerCore, # *peCores, need to leave memory for bwa...
+ bamPrefix)
+
+ peTaskLabel = self.addTask("BwaSamPESort", peCmd, nCores=peCores, dependencies=peDependencies)
+
+ # delete sai files:
+ rmCmd = "rm -f"
+ for read in (1, 2) :
+ rmCmd += " %s" % (getReadSaiFile(read))
+ self.addTask("RmSai", rmCmd, dependencies=peTaskLabel, isForceLocal=True)
+
+
+ # optionally delete input fastqs:
+ if not self.params.isKeepFastq :
+ fastqRmCmd = "rm -f"
+ for read in (1, 2) :
+ fastqRmCmd += " %s" % (getReadFastqFile(read))
+ self.addTask("RmFastq", fastqRmCmd, dependencies=peTaskLabel, isForceLocal=True)
+
+
+
+
+class FileDigger(object) :
+ """
+ Digs into a well-defined directory structure with prefixed
+ folder names to extract all files associated with
+ combinations of directory names.
+
+ This is written primarily to go through the CASAVA 1.8 output
+ structure.
+
+ #casava 1.8 fastq example:
+ fqDigger=FileDigger('.fastq.gz',['Project_','Sample_'])
+ """
+
+ def __init__(self, targetExtension, prefixList) :
+ self.targetExtension = targetExtension
+ self.prefixList = prefixList
+
+ def getNextFile(self, dir, depth=0, ans=tuple()) :
+ """
+ generator of a tuple: (flowcell,project,sample,bamfile)
+ given a multi-flowcell directory
+ """
+ if depth < len(self.prefixList) :
+ for d in os.listdir(dir) :
+ nextDir = os.path.join(dir, d)
+ if not os.path.isdir(nextDir) : continue
+ if not d.startswith(self.prefixList[depth]) : continue
+ value = d[len(self.prefixList[depth]):]
+ for val in self.getNextFile(nextDir, depth + 1, ans + tuple([value])) :
+ yield val
+ else:
+ for f in os.listdir(dir) :
+ file = os.path.join(dir, f)
+ if not os.path.isfile(file) : continue
+ if not f.endswith(self.targetExtension) : continue
+ yield ans + tuple([file])
+
+
+
+def flowcellDirFastqToBwaBamFlow(self, taskPrefix="", dependencies=set()) :
+ """
+ Takes as input 'flowcellFastqDir' pointing to the CASAVA 1.8 flowcell
+ project/sample fastq directory structure. For each project/sample,
+ the fastqs are aligned using BWA, sorted and merged into a single
+ BAM file. The bam output is placed in a parallel project/sample
+ directory structure below 'flowcellBamDir'
+
+ params:
+ samtoolsBin
+ flowcellFastqDir
+ flowcellBamDir
+
+ calls:
+ FastqPairToBwaBamFlow
+ supplies:
+ bamFile
+ fastq1File
+ fastq2File
+ """
+
+ #
+ # 1. separate fastqs into matching pairs:
+ #
+ fqs = {}
+ fqDigger = FileDigger(".fastq.gz", ["Project_", "Sample_"])
+ for (project, sample, fqPath) in fqDigger.getNextFile(self.params.flowcellFastqDir) :
+ if (self.params.sampleNameList != None) and \
+ (len(self.params.sampleNameList) != 0) and \
+ (sample not in self.params.sampleNameList) : continue
+
+ fqFile = os.path.basename(fqPath)
+ w = (fqFile.split(".")[0]).split("_")
+ if len(w) != 5 :
+ raise Exception("Unexpected fastq filename format: '%s'" % (fqPath))
+
+ (sample2, index, lane, read, num) = w
+ if sample != sample2 :
+ raise Exception("Fastq name sample disagrees with directory sample: '%s;" % (fqPath))
+
+ key = (project, sample, index, lane, num)
+ if key not in fqs : fqs[key] = [None, None]
+
+ readNo = int(read[1])
+ if fqs[key][readNo - 1] != None :
+ raise Exceptoin("Unresolvable repeated fastq file pattern in sample: '%s'" % (fqPath))
+ fqs[key][readNo - 1] = fqPath
+
+
+ ensureDir(self.params.flowcellBamDir)
+
+ #
+ # 2. run all fastq pairs through BWA:
+ #
+ nextWait = set()
+
+ for key in fqs.keys() :
+ (project, sample, index, lane, num) = key
+ sampleBamDir = os.path.join(self.params.flowcellBamDir, "Project_" + project, "Sample_" + sample)
+ ensureDir(sampleBamDir)
+ keytag = "_".join(key)
+ self.params.bamFile = os.path.join(sampleBamDir, keytag + ".bam")
+ self.params.fastq1File = fqs[key][0]
+ self.params.fastq2File = fqs[key][1]
+ nextWait.add(self.addWorkflowTask(preJoin(taskPrefix, keytag), FastqPairToBwaBamFlow(self.params), dependencies=dependencies))
+
+ return nextWait
+
+
+
+class FlowcellDirFastqToBwaBamFlow(WorkflowRunner) :
+ """
+ Takes as input 'flowcellFastqDir' pointing to the CASAVA 1.8 flowcell
+ project/sample fastq directory structure. For each project/sample,
+ the fastqs are aligned using BWA, sorted and merged into a single
+ BAM file. The bam output is placed in a parallel project/sample
+ directory structure below 'flowcellBamDir'
+
+ params:
+ flowcellFastqDir
+ flowcellBamDir
+ """
+
+ def __init__(self, params) :
+ self.params = params
+
+ def workflow(self) :
+ flowcellDirFastqToBwaBamFlow(self)
+
+
+
+
+
+# use a really boring flowcell label everywhere right now:
+def getFlowcellLabel(self, i) :
+ return "Flowcell_FC%i" % (i)
+
+
+
+
+def casava18BclToBamListFlow(self, taskPrefix="", dependencies=set()) :
+ """
+ Runs bcl conversion and alignment on multiple flowcells for a subset of samples.
+ Writes BAM files to parallel fastq Project/Sample directory structure. Does not
+ merge individual BAMs. Deletes fastqs on alignment when option is set to do so.
+
+ params:
+ allFlowcellDir
+ bclBasecallsDirList
+ bclTilePatternList
+
+ calls:
+ casava18BclToFastqFlow
+ supplies:
+ bclBasecallsDir
+ flowcellFastqDir
+ FlowcellDirFastqToBwaBamFlow
+ supplies:
+ flowcellFastqDir
+ flowcellBamDir
+
+ """
+
+ ensureDir(self.params.allFlowcellDir)
+
+ # first bcl->fastq->bwa bam for requested samples in all flowcells:
+ nextWait = set()
+ for i, self.params.bclBasecallsDir in enumerate(self.params.bclBasecallsDirList) :
+ flowcellLabel = getFlowcellLabel(self, i)
+ flowcellDir = os.path.join(self.params.allFlowcellDir, flowcellLabel)
+
+ ensureDir(flowcellDir)
+
+ self.params.flowcellFastqDir = os.path.join(flowcellDir, "fastq")
+ self.params.flowcellBamDir = os.path.join(flowcellDir, "bam")
+ if self.params.bclTilePatternList == None :
+ self.params.bclTilePattern = None
+ else :
+ self.params.bclTilePattern = self.params.bclTilePatternList[i]
+
+ fastqFinal = casava18BclToFastqFlow(self, taskPrefix=flowcellLabel)
+
+ label = preJoin(taskPrefix, "_".join((flowcellLabel, "FastqToBwaBam")))
+ nextWait.add(self.addWorkflowTask(label, FlowcellDirFastqToBwaBamFlow(self.params), dependencies=fastqFinal))
+
+ return nextWait
+
+
+
+
+def mergeBamListFlow(self, taskPrefix="", dependencies=set()) :
+ """
+ Take a list of sorted bam files from the same sample, merge them together,
+ and delete input bams, final output to mergeBamName
+
+ params:
+ mergeBamList
+ mergeBamName
+ samtoolsBin
+ """
+
+ for bamFile in self.params.mergeBamList :
+ if not os.path.isfile(bamFile) :
+ raise Exception("Can't find bam file: '%s'" % (bamFile))
+
+ mergeTasks = set()
+ mergeLabel = preJoin(taskPrefix, "merge")
+ if len(self.params.mergeBamList) > 1 :
+ mergeCmd = "%s merge -f %s %s" % (self.params.samtoolsBin, self.params.mergeBamName, " ".join(self.params.mergeBamList))
+ mergeTasks.add(self.addTask(mergeLabel, mergeCmd, dependencies=dependencies, isTaskStable=False))
+
+ rmCmd = "rm -f"
+ for bamFile in self.params.mergeBamList :
+ rmCmd += " %s" % (bamFile)
+
+ self.addTask(preJoin(taskPrefix, "rmBam"), rmCmd, dependencies=mergeLabel, isForceLocal=True)
+ elif len(self.params.mergeBamList) == 1 :
+ mvCmd = "mv %s %s" % (self.params.mergeBamList[0], self.params.mergeBamName)
+ # *must* have same taskLabel as merge command for continuation
+ # to work correctly because of the potential for partial
+ # deletion of the input bam files:
+ mergeTasks.add(self.addTask(mergeLabel, mvCmd, dependencies=dependencies, isForceLocal=True, isTaskStable=False))
+
+ return mergeTasks
+
+
+
+def flowcellBamListMergeFlow(self, taskPrefix="", dependencies=set()) :
+ """
+ given a root flowcell directory and list of samples, merge sample
+ bams across flowcells and dedup.
+
+ ?? Will we be in a situation where sample has more than one library
+ -- this affects the debup order & logic ??
+
+ params:
+ allFlowcellDir
+ mergedDir
+ sampleNameList
+ picardDir
+
+ calls:
+ mergeBamListFlow
+ supplies:
+ mergeBamList
+ mergeBamName
+ """
+
+ #
+ # 1) get a list of bams associated with each project/sample combination:
+ #
+
+ # TODO: what if there's an NFS delay updating all the bams while
+ # we're reading them out here? make this process more robust -- we
+ # should know how many BAM's we're expecting, in a way that's
+ # robust to interuption/restart
+ #
+ bams = {}
+ bamDigger = FileDigger(".bam", ["Flowcell_", "bam", "Project_", "Sample_"])
+ for (flowcell, nothing, project, sample, bamFile) in bamDigger.getNextFile(self.params.allFlowcellDir) :
+ if (self.params.sampleNameList != None) and \
+ (len(self.params.sampleNameList) != 0) and \
+ (sample not in self.params.sampleNameList) : continue
+ key = (project, sample)
+ if key not in bams : bams[key] = []
+ bams[key].append(bamFile)
+
+ mergedBamExt = ".merged.bam"
+ markDupBamExt = ".markdup.bam"
+
+ #
+ # 2) merge and delete smaller bams:
+ #
+ mergedBams = {}
+
+ mergedBamDir = os.path.join(self.params.mergedDir, "bam")
+ sampleTasks = {}
+ if len(bams) : # skip this section if smaller bams have already been deleted
+ ensureDir(mergedBamDir)
+
+ for key in bams.keys() :
+ (project, sample) = key
+ mergedSampleDir = os.path.join(mergedBamDir, "Project_" + project, "Sample_" + sample)
+ ensureDir(mergedSampleDir)
+ self.params.mergeBamList = bams[key]
+ self.params.mergeBamName = os.path.join(mergedSampleDir, sample + mergedBamExt)
+ mergedBams[key] = self.params.mergeBamName
+ outTaskPrefix = preJoin(taskPrefix, "_".join(key))
+ sampleTasks[key] = mergeBamListFlow(self, outTaskPrefix, dependencies)
+
+ if not os.path.isdir(mergedBamDir) : return
+
+
+ #
+ # 3) mark dup:
+ #
+
+ # mergedBams contains all bams from the current run, we also add any from a
+ # previous interupted run:
+ mergedBamDigger = FileDigger(mergedBamExt, ["Project_", "Sample_"])
+ for (project, sample, bamFile) in mergedBamDigger.getNextFile(mergedBamDir) :
+ key = (project, sample)
+ if key in mergedBams :
+ assert (mergedBams[key] == bamFile)
+ else :
+ mergedBams[key] = bamFile
+
+ nextWait = set()
+ totalCores = self.getNCores()
+
+ for sampleKey in mergedBams.keys() :
+ markDupDep = set()
+ if sampleKey in sampleTasks : markDupDep = sampleTasks[sampleKey]
+
+ fullName = "_".join(sampleKey)
+
+ markDupBamFile = mergedBams[sampleKey][:-(len(mergedBamExt))] + markDupBamExt
+ markDupMetricsFile = markDupBamFile[:-(len(".bam"))] + ".metrics.txt"
+ markDupTmpDir = markDupBamFile + ".tmpdir"
+
+ # for now, solve the memory problem with lots of threads:
+ nCores = 4
+ if (totalCores != "unlimited") and (totalCores < nCores) :
+ nCores = totalCores
+ gigs = 2 * nCores
+ javaOpts = "-Xmx%ig" % (gigs)
+ markDupFiles = "INPUT=%s OUTPUT=%s METRICS_FILE=%s" % (mergedBams[sampleKey], markDupBamFile, markDupMetricsFile)
+ markDupOpts = "REMOVE_DUPLICATES=false ASSUME_SORTED=true VALIDATION_STRINGENCY=SILENT CREATE_INDEX=true TMP_DIR=%s" % (markDupTmpDir)
+ markDupJar = os.path.join(self.params.picardDir, "MarkDuplicates.jar")
+ markDupCmd = "java %s -jar %s %s %s" % (javaOpts, markDupJar, markDupFiles, markDupOpts)
+ markDupTask = self.addTask(preJoin(taskPrefix, fullName + "_dupmark"), markDupCmd, dependencies=markDupDep)
+
+ # link index filename to something samtools can understand:
+ #
+ markDupPicardBaiFile = markDupBamFile[:-(len(".bam"))] + ".bai"
+ markDupSamtoolsBaiFile = markDupBamFile + ".bai"
+ indexLinkCmd = "ln %s %s" % (markDupPicardBaiFile, markDupSamtoolsBaiFile)
+ indexLinkTask = self.addTask(preJoin(taskPrefix, fullName + "_indexLink"), indexLinkCmd, dependencies=markDupTask, isForceLocal=True)
+
+ nextWait.add(indexLinkTask)
+
+ # delete TmpDir:
+ #
+ rmMarkDupTmpCmd = "rm -rf %s" % (markDupTmpDir)
+ self.addTask(preJoin(taskPrefix, fullName + "_rmMarkDupTmp"), rmMarkDupTmpCmd, dependencies=markDupTask, isForceLocal=True)
+
+ # now remove the original file:
+ #
+ rmCmd = "rm -f %s" % (mergedBams[sampleKey])
+ self.addTask(preJoin(taskPrefix, fullName + "_rmMerge"), rmCmd, dependencies=markDupTask, isForceLocal=True)
+
+ return nextWait
+
+
+
+
+class FlowcellBamListMergeFlow(WorkflowRunner) :
+
+ def __init__(self, params) :
+ self.params = params
+
+ def workflow(self) :
+ flowcellBamListMergeFlow(self)
+
+
+
+class BWAWorkflow(WorkflowRunner) :
+ """
+ pyflow BCL to BAM BWA workflow
+ """
+
+ def __init__(self, params) :
+ self.params = params
+
+ # make sure working directory is setup:
+ self.params.outputDir = os.path.abspath(self.params.outputDir)
+ ensureDir(self.params.outputDir)
+
+ self.params.allFlowcellDir = os.path.join(self.params.outputDir, "flowcell_results")
+ self.params.mergedDir = os.path.join(self.params.outputDir, "merged_results")
+
+ # Verify/manipulate various input options:
+ #
+ # this is mostly repeated in the conflig script now... get this minimized with auto verification:
+ #
+ self.params.bclBasecallsDirList = map(os.path.abspath, self.params.bclBasecallsDirList)
+ for dir in self.params.bclBasecallsDirList :
+ if not os.path.isdir(dir) :
+ raise Exception("Input BCL basecalls directory not found: '%s'" % (dir))
+
+ self.params.samtoolsSortMemPerCore = int(self.params.samtoolsSortMemPerCore)
+ minSortMem = 1000000
+ if self.params.samtoolsSortMemPerCore < minSortMem :
+ raise Exception("samtoolsSortMemPerCore must be an integer greater than minSortMem")
+
+ if self.params.genomeFasta == None:
+ raise Exception("No bwa genome file defined.")
+ else:
+ if not os.path.isfile(self.params.genomeFasta) :
+ raise Exception("Can't find bwa genome file '%s'" % (self.params.genomeFasta))
+
+
+ def workflow(self) :
+
+ alignTasks = casava18BclToBamListFlow(self)
+ mergeTask = self.addWorkflowTask("mergeBams", FlowcellBamListMergeFlow(self.params), dependencies=alignTasks)
+
+
+
diff --git a/pyflow/demo/bclToBwaBam/configBclToBwaBam.py b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py
new file mode 100755
index 0000000..19833b4
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+This demonstrates a run of a prototype BCL to BWA BAM workflow
+created as a production-scale proof of concept for pyflow.
+
+The bwa workflow is written into the BWAWorkflow object. See
+bwaworkflow.py for implementation details of this class.
+
+Finally, make sure configuration settings in BWAWorkflowConfig
+are appropriate before running.
+"""
+
+import os, sys
+
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+scriptName = os.path.basename(__file__)
+
+
+runScript1 = """#!/usr/bin/env python
+# BWAWorkflow run script auto-generated by command: %s
+
+import os.path, sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append('%s')
+from bwaworkflow import BWAWorkflow
+
+class WorkflowOptions(object) :
+""" % (" ".join(sys.argv), scriptDir)
+
+runScript2 = """
+def get_run_options() :
+ from optparse import OptionParser
+ import textwrap
+
+ epilog=\"""Note this script can be re-run to continue the workflow run in case of interuption.
+Also note that dryRun option has limited utility when task definition depends on upstream task results,
+in which case the dry run will not cover the full 'live' run task set)\"""
+
+ # no epilog in py 2.4! hack-in the feature instead:
+ class MyOptionParser(OptionParser) :
+ def __init__(self, *args, **kwargs):
+ self.myepilog = None
+ try:
+ self.myepilog = kwargs.pop('epilog')
+ except KeyError:
+ pass
+ OptionParser.__init__(self,*args, **kwargs)
+
+ def print_help(self,*args,**kwargs) :
+ OptionParser.print_help(self,*args, **kwargs)
+ if self.myepilog != None :
+ sys.stdout.write("%s\\n" % (textwrap.fill(self.myepilog)))
+
+ parser = MyOptionParser(epilog=epilog)
+
+
+ parser.add_option("-m", "--mode", type="string",dest="mode",
+ help="select run mode (local|sge)")
+ parser.add_option("-j", "--jobs", type="string",dest="jobs",
+ help="number of jobs (default: 1 for local mode, 'unlimited' for sge mode)")
+ parser.add_option("-e","--mailTo", type="string",dest="mailTo",action="append",
+ help="send email notification of job completion status to this address (may be provided multiple times for more than one email address)")
+ parser.add_option("-d","--dryRun", dest="isDryRUn",action="store_true",
+ help="dryRun workflow code without actually running command-tasks")
+
+
+ (options,args) = parser.parse_args()
+
+ if len(args) :
+ parser.print_help()
+ sys.exit(2)
+
+ if options.mode == None :
+ parser.print_help()
+ sys.exit(2)
+ elif options.mode not in ["local","sge"] :
+ parser.error("Invalid mode. Available modes are: local, sge")
+
+ if options.jobs == None :
+ if options.mode == "sge" :
+ options.jobs == "unlimited"
+ else :
+ options.jobs == "1"
+ elif (options.jobs != "unlimited") and (int(options.jobs) <= 0) :
+ parser.error("Jobs must be 'unlimited' or an integer greater than 1")
+
+ return options
+
+runOptions=get_run_options()
+flowOptions=WorkflowOptions()
+flowOptions.outputDir=scriptDir
+wflow = BWAWorkflow(flowOptions)
+retval=wflow.run(mode=runOptions.mode,
+ nCores=runOptions.jobs,
+ dataDirRoot=scriptDir,
+ mailTo=runOptions.mailTo,
+ isContinue="Auto",
+ isForceContinue=True,
+ isDryRun=runOptions.isDryRUn)
+sys.exit(retval)
+"""
+
+
+
+
+def checkArg(x, label, checkfunc) :
+ if x != None:
+ x = os.path.abspath(x)
+ if not checkfunc(x) :
+ raise Exception("Can't find %s: '%s'" % (label, x))
+ return x
+
+def checkDirArg(dir, label) :
+ return checkArg(dir, label, os.path.isdir)
+
+def checkFileArg(file, label) :
+ return checkArg(file, label, os.path.isfile)
+
+
+
+def get_option_parser(defaults, configFileName, isAllHelp=False) :
+ from optparse import OptionGroup, OptionParser, SUPPRESS_HELP
+ import textwrap
+
+ description = """This script configures a bcl to BWA alignmed BAM workflow.
+Given a bcl basecalls directory the workflow will create fastq's using CASAVA's
+bcl to fastq converter, then align each fastq using bwa, and finally consolidate
+the output into a single BAM file for for each Project/Sample combination.
+
+The configuration process will produce a workflow run script, which can be used to
+execute the workflow on a single node or through sge with a specific job limit.
+"""
+
+ epilog = """Default parameters will always be read from the file '%s' if it exists.
+This file is searched for in the current working directory first -- if
+it is not found then the directory containing this script is searched as well.
+The current set of default parameters may be written to this file using the --writeConfig switch,
+which takes all current defaults and arguments, writes these to the
+configuration file and exits without setting up a workflow run script as usual.
+""" % (configFileName)
+
+ # no epilog in py 2.4! hack-in the feature instead:
+ class MyOptionParser(OptionParser) :
+ def __init__(self, *args, **kwargs):
+ self.myepilog = None
+ try:
+ self.myepilog = kwargs.pop('epilog')
+ except KeyError:
+ pass
+ OptionParser.__init__(self, *args, **kwargs)
+
+ def print_help(self, *args, **kwargs) :
+ OptionParser.print_help(self, *args, **kwargs)
+ if self.myepilog != None :
+ sys.stdout.write("%s\n" % (textwrap.fill(self.myepilog)))
+
+
+ parser = MyOptionParser(description=description, epilog=epilog)
+
+ parser.set_defaults(**defaults)
+
+ parser.add_option("--allHelp", action="store_true", dest="isAllHelp",
+ help="show all extended/hidden options")
+
+ group = OptionGroup(parser, "Workflow options")
+ group.add_option("--bclBasecallsDir", type="string", dest="bclBasecallsDirList", metavar="DIR", action="append",
+ help="BCL basecalls directory. Call this option multiple times to specify multiple bcl directories, samples with the same name will be combined over all flowcells after alignmnet. [required] (default: %default)")
+ group.add_option("--bclTilePattern", type="string", dest="bclTilePatternList", metavar="PATTERN", action="append",
+ help="BCL converter tiles expression used to select a subsset of tiles (eg. 's_1') call this option either once for each basecalls dir or not at all (default: %default)")
+ group.add_option("--genomeFasta", type="string", dest="genomeFasta",
+ help="Genome fasta file which includes BWA index in the same directory [required] (default: %default)")
+ group.add_option("--outputDir", type="string", dest="outputDir",
+ help="BCL basecalls directory [required] (default: %default)")
+ group.add_option("--sampleName", type="string", dest="sampleNameList", metavar="sampleName", action="append",
+ help="Restrict analysis to given sampleName. This option can be provided more than once for multiple sample names. If no names are provided all samples are analyzed (default: %default)")
+ parser.add_option_group(group)
+
+ secgroup = OptionGroup(parser, "Extended options",
+ "These options are not likely to be reset after initial configuration in a new site, they will not be printed here if a default exists from the configuration file or otherwise, unless --allHelp is specified")
+
+ # used to access isAnyHelp from the maybeHelp function
+ class Hack : isAnyHelp = False
+
+ def maybeDefHelp(key, msg) :
+ if isAllHelp or (key not in defaults) :
+ Hack.isAnyHelp = True
+ return msg
+ return SUPPRESS_HELP
+
+ secgroup.add_option("--casavaDir", type="string", dest="casavaDir",
+ help=maybeDefHelp("casavaDir", "casava 1.8.2+ installation directory [required] (default: %default)"))
+ secgroup.add_option("--bwaBin", type="string", dest="bwaBin",
+ help=maybeDefHelp("bwaBin", "bwa binary [required] (default: %default)"))
+ secgroup.add_option("--samtoolsBin", type="string", dest="samtoolsBin",
+ help=maybeDefHelp("samtoolsBin", "samtools binary [required] (default: %default)"))
+ secgroup.add_option("--picardDir", type="string", dest="picardDir",
+ help=maybeDefHelp("picardDir", "casava 1.8.2+ installation directory [required] (default: %default)"))
+ if not Hack.isAnyHelp:
+ secgroup.description = "hidden"
+ parser.add_option_group(secgroup)
+
+ def maybeHelp(key, msg) :
+ if isAllHelp : return msg
+ return SUPPRESS_HELP
+
+ configgroup = OptionGroup(parser, "Config options")
+ configgroup.add_option("--writeConfig", action="store_true", dest="isWriteConfig",
+ help=maybeHelp("writeConfig", "Write new default configuration file based on current defaults and agruments. Defaults written to: '%s'" % (configFileName)))
+ if not isAllHelp :
+ configgroup.description = "hidden"
+ parser.add_option_group(configgroup)
+
+ return parser
+
+
+
+def get_run_options() :
+ from ConfigParser import SafeConfigParser
+
+ configFileName = scriptName + ".ini"
+ if not os.path.isfile(configFileName) :
+ configPath = os.path.join(scriptDir, configFileName)
+ else :
+ configPath = os.path.join('.', configFileName)
+
+ configSectionName = scriptName
+
+ config = SafeConfigParser()
+ config.optionxform = str
+ config.read(configPath)
+
+ configOptions = {}
+ if config.has_section(configSectionName) :
+ for (k, v) in config.items(configSectionName) :
+ if v == "" : continue
+ configOptions[k] = v
+
+ defaults = { 'outputDir' : './results',
+ 'bclToFastqMaxCores' : 12,
+ 'samtoolsSortMemPerCore' : 1000000000, # samtools sort uses about 2x what you tell it to...
+ 'alnMaxCores' : 8, # presumably bwa aln will become increasingly inefficient per core, so we don't want to let this go forever...
+ 'isKeepFastq' : True, # important to keep these during testing, but not for production
+ }
+
+ defaults.update(configOptions)
+
+ parser = get_option_parser(defaults, configFileName)
+ (options, args) = parser.parse_args()
+
+ if options.isAllHelp :
+ parser = get_option_parser(defaults, configFileName, True)
+ parser.print_help()
+ sys.exit(2)
+
+ if len(args) : # or (len(sys.argv) == 1):
+ parser.print_help()
+ sys.exit(2)
+
+ # sanitize arguments before writing defaults, check for missing arguments after:
+ #
+ def checkListRepeats(list, itemLabel) :
+ if list == None : return
+ if len(set(list)) != len(list) :
+ parser.error("Repeated %s entries" % (itemLabel))
+
+ if options.bclBasecallsDirList != None :
+ for i, bclDir in enumerate(options.bclBasecallsDirList) :
+ options.bclBasecallsDirList[i] = checkDirArg(bclDir, "bcl basecalls directory")
+ # tmp for testing:
+ # checkListRepeats(options.bclBasecallsDirList,"bcl basecalls directory")
+ if (options.bclTilePatternList != None) and \
+ (len(options.bclBasecallsDirList) != len(options.bclTilePatternList)) :
+ parser.error("Unexpected number of bclTilPattern entries")
+ checkListRepeats(options.sampleNameList, "sample name")
+
+ options.casavaDir = checkDirArg(options.casavaDir, "casava directory")
+
+ options.genomeFasta = checkFileArg(options.genomeFasta, "genome fasta file")
+ options.bwaBin = checkFileArg(options.bwaBin, "bwa binary")
+ options.samtoolsBin = checkFileArg(options.samtoolsBin, "samtools binary")
+
+ if options.isWriteConfig == True :
+ if not config.has_section(configSectionName) :
+ config.add_section(configSectionName)
+ for k, v in vars(options).iteritems() :
+ if k == "isWriteConfig" : continue
+ if v == None : v = ""
+ config.set(configSectionName, k, str(v))
+ configfp = open(configFileName, "w")
+ config.write(configfp)
+ configfp.close()
+ sys.exit(0)
+
+ def noArgOrError(msg) :
+ if len(sys.argv) <= 1 :
+ parser.print_help()
+ sys.exit(2)
+ else :
+ parser.error(msg)
+
+ def assertOption(arg, label) :
+ if arg == None:
+ noArgOrError("No %s specified" % (label))
+
+ def assertList(list, itemLabel) :
+ if (list == None) or (len(list) == 0) :
+ noArgOrError("List containing %s (s) is empty or missing" % (itemLabel))
+ else :
+ for item in list :
+ assertOption(item, itemLabel)
+
+ assertList(options.bclBasecallsDirList, "bcl basecalls directory")
+ assertList(options.sampleNameList, "sample name")
+ assertOption(options.genomeFasta, "genome fasta file")
+ assertOption(options.outputDir, "output directory")
+ assertOption(options.casavaDir, "casava directory")
+ assertOption(options.picardDir, "picard directory")
+ assertOption(options.bwaBin, "bwa binary")
+ assertOption(options.samtoolsBin, "samtools binary")
+
+ return options
+
+
+
+from bwaworkflow import BWAWorkflow, ensureDir
+
+
+def main() :
+
+ options = get_run_options()
+
+ # instantiate workflow object to trigger parameter validation only
+ #
+ wflow = BWAWorkflow(options)
+
+ # generate runscript:
+ #
+ scriptFile = os.path.join(options.outputDir, "runWorkflow.py")
+ ensureDir(options.outputDir)
+
+ sfp = open(scriptFile, "w")
+ sfp.write(runScript1)
+ # there must be a nicer way to reverse eval() an object -- maybe human readable pickle is what we want here?
+ for k, v in vars(options).iteritems() :
+ if isinstance(v, basestring) :
+ sfp.write(" %s = '%s'\n" % (k, v))
+ else:
+ sfp.write(" %s = %s\n" % (k, v))
+ sfp.write("\n")
+ sfp.write(runScript2)
+ sfp.close()
+ os.chmod(scriptFile, 0755)
+
+ notefp = sys.stdout
+ notefp.write("""
+Successfully created workflow run script. To execute the workflow, run the following script and set appropriate options:
+
+%s
+""" % (scriptFile))
+
+
+if __name__ == "__main__" :
+ main()
+
diff --git a/pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini
new file mode 100644
index 0000000..1c86828
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini
@@ -0,0 +1,5 @@
+[configBclToBwaBam.py]
+bwaBin = /home/csaunders/opt/x86_64-linux/bwa/bwa
+samtoolsBin = /illumina/thirdparty/samtools/samtools-0.1.14/samtools
+casavaDir = /illumina/software/casava/CASAVA-1.8.2
+picardDir = /home/csaunders/opt/noarch/picard-tools
diff --git a/pyflow/demo/bclToBwaBam/example_configuration.bash b/pyflow/demo/bclToBwaBam/example_configuration.bash
new file mode 100755
index 0000000..a2468d6
--- /dev/null
+++ b/pyflow/demo/bclToBwaBam/example_configuration.bash
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+set -o xtrace
+
+#
+# executes the configure script for a small bcl directory -- note that
+# the tile mask is required for this bcl directory because it has been
+# extensively subsampled for testing purposes
+#
+
+./configBclToBwaBam.py \
+--bclBasecallsDir /home/csaunders/proj/bwa_workflow_hashout/create_small_lane/small_lane/111119_SN192_0307_BD0FNCACXX_Genentech/Data/Intensities/BaseCalls \
+--bclTilePattern "s_8_[02468][0-9][0-9]1" \
+--bclBasecallsDir /home/csaunders/proj/bwa_workflow_hashout/create_small_lane/small_lane/111119_SN192_0307_BD0FNCACXX_Genentech/Data/Intensities/BaseCalls \
+--bclTilePattern "s_8_[13579][0-9][0-9]1" \
+--genomeFasta /illumina/scratch/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa \
+--sampleName "lane8"
+
diff --git a/pyflow/demo/cwdDemo/cwdDemo.py b/pyflow/demo/cwdDemo/cwdDemo.py
new file mode 100755
index 0000000..9b6eda3
--- /dev/null
+++ b/pyflow/demo/cwdDemo/cwdDemo.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# demonstrate/test addTask() cwd option
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class CwdWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the
+ # WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # get cwd and its parent for the addTask cwd test
+ #
+ cwd=os.getcwd()
+ parentdir=os.path.abspath(os.path.join(cwd,".."))
+
+ self.flowLog("testing pyflow cwd: '%s' parentdir: '%s'" % (cwd,parentdir))
+
+ # task will fail unless pwd == parentdir:
+ #
+ # test both absolute and relative cwd arguments:
+ #
+ self.addTask("testAbsCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd=parentdir)
+ self.addTask("testRelCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd="..")
+
+
+
+# Instantiate the workflow
+#
+wflow = CwdWorkflow()
+
+# Run the worklow:
+#
+retval=wflow.run(mode="local")
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/envDemo/envDemo.py b/pyflow/demo/envDemo/envDemo.py
new file mode 100755
index 0000000..e72608b
--- /dev/null
+++ b/pyflow/demo/envDemo/envDemo.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# demonstrate/test addTask() env option
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class EnvWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the
+ # WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+ # run a task with the parent env:
+ #
+ home = os.environ["HOME"]
+ self.addTask("testDefEnv", "[ $HOME == '%s' ]; exit $?" % (home))
+
+ # create a minimal test environment
+ #
+ new_path = "/bin"
+ min_env = { "PATH" : new_path }
+ self.addTask("testMinEnv", "[ $PATH == '%s' ]; exit $?" % (new_path), env=min_env)
+
+ # augment parent env with additional settings:
+ #
+ augmented_env = os.environ.copy()
+ augmented_env["FOO"] = "BAZ"
+ self.addTask("testAugmentedEnv", "[ $FOO == 'BAZ' ]; exit $?", env=augmented_env)
+
+ # test funny characters that have shown to cause trouble on some sge installations
+ funky_env = {}
+ funky_env["PATH"] = "/bin"
+ funky_env["_"] = "| %s %F \n"
+ # in this case we just want the job to run at all:
+ self.addTask("testFunkyEnv", "echo 'foo'; exit $?", env=funky_env)
+
+ assert("FOO" not in os.environ)
+
+
+
+# Instantiate the workflow
+#
+wflow = EnvWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run(mode="local")
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/helloWorld/README.txt b/pyflow/demo/helloWorld/README.txt
new file mode 100644
index 0000000..ea6117b
--- /dev/null
+++ b/pyflow/demo/helloWorld/README.txt
@@ -0,0 +1,3 @@
+The following demo shows a very simple pyFlow composed of only a
+single task -- a command which echos a simple message. You can run
+this workflow by typing "python ./helloWorld.py"
diff --git a/pyflow/demo/helloWorld/helloWorld.py b/pyflow/demo/helloWorld/helloWorld.py
new file mode 100755
index 0000000..bede07e
--- /dev/null
+++ b/pyflow/demo/helloWorld/helloWorld.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# This demo shows possibly the simplist possible pyflow we can create --
+# a single 'hello world' task. After experimenting with this file
+# please see the 'simpleDemo' for coverage of a few more pyflow features
+#
+
+import os.path
+import sys
+
+# add module path
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir,"src")))
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class HelloWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ #
+ # The output for this task will be written to the file helloWorld.out.txt
+ #
+ self.addTask("easy_task1", "echo 'Hello World!' > helloWorld.out.txt")
+
+
+
+# Instantiate the workflow
+#
+wflow = HelloWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run()
+
+# done!
+sys.exit(retval)
+
diff --git a/pyflow/demo/makeDemo/.hidden b/pyflow/demo/makeDemo/.hidden
new file mode 100644
index 0000000..af44150
--- /dev/null
+++ b/pyflow/demo/makeDemo/.hidden
@@ -0,0 +1,7 @@
+
+.PHONY: A B
+A: B
+ @echo "Made it!"
+
+B:
+ sleep 5
diff --git a/pyflow/demo/makeDemo/makeDemo.py b/pyflow/demo/makeDemo/makeDemo.py
new file mode 100755
index 0000000..d3af256
--- /dev/null
+++ b/pyflow/demo/makeDemo/makeDemo.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class MakeWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the
+ # WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # This command 'configures' a makefile
+ #
+ self.addTask("task1", "cd %s; cp .hidden Makefile" % scriptDir)
+
+ # Sometimes you get to deal with make. The task below
+ # demonstates a make command which starts when the above task
+ # completes. Make tasks are specified as directories which
+ # contain a makefile. This task points to the direcotry of
+ # this demo script, which contains has a Makefile at the
+ # completion of task1.
+ # pyflow will switch the task command between make and qmake
+ # depending on run type.
+ #
+ self.addTask("make_task", scriptDir, isCommandMakePath=True, nCores=2, dependencies="task1")
+
+ # This command 'unconfigures' the makefile
+ #
+ self.addTask("task2", "rm -f %s/Makefile" % scriptDir, dependencies="make_task")
+
+
+# Instantiate the workflow
+#
+# parameters are passed into the workflow via its constructor:
+#
+wflow = MakeWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run(mode="local", nCores=8)
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/memoryDemo/memoryDemo.py b/pyflow/demo/memoryDemo/memoryDemo.py
new file mode 100755
index 0000000..eec236c
--- /dev/null
+++ b/pyflow/demo/memoryDemo/memoryDemo.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# This is a very simple demo/test of pyFlow's new (@ v0.4) memory
+# resource feature.
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src")
+
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class MemTestWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # Each task has a default memory request of 2048 megabytes
+ # but this is site-configurable in pyflowConfig.py, so we
+ # specify it for every task here
+ #
+ # This works correctly if task 4 is the only task run in
+ # parallel with one of the other 3 tasks.
+ #
+ self.addTask("task1", "echo 'Hello World!'", memMb=2048)
+ self.addTask("task2", "echo 'Hello World!'", memMb=2048)
+ self.addTask("task3", "echo 'Hello World!'", memMb=2048)
+ self.addTask("task4", "echo 'Hello World!'", memMb=1)
+
+
+
+# Instantiate the workflow
+#
+wflow = MemTestWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run(nCores=8, memMb=2049)
+
+# done!
+sys.exit(retval)
+
diff --git a/pyflow/demo/mutexDemo/mutexDemo.py b/pyflow/demo/mutexDemo/mutexDemo.py
new file mode 100755
index 0000000..ee4bdd8
--- /dev/null
+++ b/pyflow/demo/mutexDemo/mutexDemo.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir = os.path.join(scriptDir, "testtasks")
+
+sleepjob = os.path.join(testJobDir, "sleeper.bash") # sleeps
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class MutexWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the
+ # WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # create an array of mutex restricted tasks which can only run
+ # once at a time:
+ for i in range(8) :
+ self.addTask("mutex_task_" + str(i), sleepjob + " 1", mutex="test")
+
+ # and add an array of 'normal' tasks for comparison:
+ for i in range(16) :
+ self.addTask("normal_task_" + str(i), sleepjob + " 1")
+
+
+
+
+def main() :
+ # Instantiate the workflow
+ wflow = MutexWorkflow()
+
+ # Run the worklow:
+ retval = wflow.run(mode="local", nCores=6)
+
+ sys.exit(retval)
+
+
+
+if __name__ == "__main__" :
+ main()
diff --git a/pyflow/demo/mutexDemo/testtasks/sleeper.bash b/pyflow/demo/mutexDemo/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/mutexDemo/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/retryDemo/retryDemo.py b/pyflow/demo/retryDemo/retryDemo.py
new file mode 100755
index 0000000..33eeb8b
--- /dev/null
+++ b/pyflow/demo/retryDemo/retryDemo.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+# This is a very simple demo/test of pyFlow's new (@ v0.4) memory
+# resource feature.
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src")
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class RetryWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # this task behaves correctly it retries the job 4 times before failing, no automated way
+ # to confirm success right now.
+ #
+ self.flowLog("****** NOTE: This demo is supposed to fail ******")
+ self.addTask("retry_task_success", "exit 0", retryMax=8, retryWait=2, retryWindow=0, retryMode="all")
+ self.addTask("retry_task_fail", "exit 1", retryMax=3, retryWait=2, retryWindow=0, retryMode="all")
+
+
+
+# Instantiate the workflow
+#
+wflow = RetryWorkflow()
+
+# Run the worklow:
+#
+retval = wflow.run()
+
+if retval == 0 :
+ raise Exception("Example workflow is expected to fail, but did not.")
+else :
+ sys.stderr.write("INFO: Demo workflow failed as expected.\n\n")
+
+
+# Run the workflow again to demonstrate that global settings are overridden by task retry settings:
+#
+retval = wflow.run(retryMax=0)
+
+if retval == 0 :
+ raise Exception("Example workflow is expected to fail, but did not.")
+else :
+ sys.stderr.write("INFO: Demo workflow failed as expected.\n\n")
+
+
diff --git a/pyflow/demo/runOptionsDemo/getDemoRunOptions.py b/pyflow/demo/runOptionsDemo/getDemoRunOptions.py
new file mode 100644
index 0000000..813bc47
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/getDemoRunOptions.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+pyflowDir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))
+sys.path.append(pyflowDir)
+
+from optparse import OptionParser, SUPPRESS_HELP
+from pyflow import WorkflowRunner
+
+from pyflow import isLocalSmtp
+
+
+localDefaultCores = WorkflowRunner.runModeDefaultCores('local')
+sgeDefaultCores = WorkflowRunner.runModeDefaultCores('sge')
+
+
+
+def getDemoRunOptions() :
+ """
+ This routine is shared by a demo programs to demostrate how to pass pyflow's runtime options on to command-line options. It is not intended to be a demo program itself.
+ """
+
+ parser = OptionParser()
+
+
+ parser.add_option("-m", "--mode", type="string", dest="mode",
+ help="Select run mode {local,sge} (required)")
+ parser.add_option("-q", "--queue", type="string", dest="queue",
+ help="Specify sge queue name. Argument ignored if mode is not sge")
+ parser.add_option("-j", "--jobs", type="string", dest="jobs",
+ help="Number of jobs, must be an integer or 'unlimited' (default: %s for local mode, %s for sge mode)" % (localDefaultCores, sgeDefaultCores))
+ parser.add_option("-g", "--memGb", type="string", dest="memGb",
+ help="Gigabytes of memory available to run workflow -- only meaningful in local mode, must be an integer or 'unlimited' (default: 2*jobs for local mode, 'unlimited' for sge mode)")
+ parser.add_option("-r", "--resume", dest="isResume", action="store_true", default=False,
+ help="Resume a workflow from the point of interuption. This flag has no effect on a new workflow run.")
+
+ isEmail = isLocalSmtp()
+ emailHelp=SUPPRESS_HELP
+ if isEmail:
+ emailHelp="Send email notification of job completion status to this address (may be provided multiple times for more than one email address)"
+
+ parser.add_option("-e", "--mailTo", type="string", dest="mailTo", action="append",
+ help=emailHelp)
+
+
+ (options, args) = parser.parse_args()
+
+ if not isEmail :
+ options.mailTo = None
+
+ if len(args) :
+ parser.print_help()
+ sys.exit(2)
+
+ if options.mode is None :
+ parser.print_help()
+ sys.stderr.write("\n\nERROR: must specify run mode\n\n")
+ sys.exit(2)
+ elif options.mode not in ["local", "sge"] :
+ parser.error("Invalid mode. Available modes are: local, sge")
+
+ if options.jobs is None :
+ if options.mode == "sge" :
+ options.jobs = sgeDefaultCores
+ else :
+ options.jobs = localDefaultCores
+ if options.jobs != "unlimited" :
+ options.jobs = int(options.jobs)
+ if options.jobs <= 0 :
+ parser.error("Jobs must be 'unlimited' or an integer greater than 1")
+
+ # note that the user sees gigs, but we set megs
+ if options.memGb is None :
+ if options.mode == "sge" :
+ options.memMb = "unlimited"
+ else :
+ if options.jobs == "unlimited" :
+ options.memMb = "unlimited"
+ else :
+ options.memMb = 2 * 1024 * options.jobs
+ elif options.memGb != "unlimited" :
+ options.memGb = int(options.memGb)
+ if options.memGb <= 0 :
+ parser.error("memGb must be 'unlimited' or an integer greater than 1")
+ options.memMb = 1024 * options.memGb
+ else :
+ options.memMb = options.memGb
+
+ options.schedulerArgList = []
+ if options.queue is not None :
+ options.schedulerArgList = ["-q", options.queue]
+
+ return options
+
+
+
+if __name__ == "__main__" :
+ help(getDemoRunOptions)
+
diff --git a/pyflow/demo/runOptionsDemo/runOptionsDemo.py b/pyflow/demo/runOptionsDemo/runOptionsDemo.py
new file mode 100755
index 0000000..01d5a44
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/runOptionsDemo.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module paths
+#
+filePath = os.path.dirname(__file__)
+pyflowPath = os.path.abspath(os.path.join(filePath, "../../src"))
+sys.path.append(pyflowPath)
+
+from pyflow import WorkflowRunner
+from getDemoRunOptions import getDemoRunOptions
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir = os.path.join(filePath, "testtasks")
+
+sleepjob = os.path.join(testJobDir, "sleeper.bash") # sleeps
+yelljob = os.path.join(testJobDir, "yeller.bash") # generates some i/o
+runjob = os.path.join(testJobDir, "runner.bash") # runs at 100% cpu
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class TestWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the
+ # WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # A simple command task with no dependencies, labeled 'task1'.
+ #
+ cmd = "%s 1" % (yelljob)
+ self.addTask("task1", cmd)
+
+ # Another task which runs the same command, this time the
+ # command is provided as an argument list. An argument list
+ # can be useful when a command has many arguments or
+ # complicated quoting issues:
+ #
+ cmd = [yelljob, "1"]
+ self.addTask("task2", cmd)
+
+ # This task will always run on the local machine, no matter
+ # what the run mode is. The force local option is useful for
+ # non-cpu intensive jobs which are taking care of minor
+ # workflow overhead (moving/touching files, etc)
+ #
+ self.addTask("task3a", sleepjob + " 10", isForceLocal=True)
+
+
+# get runtime options
+#
+runOptions = getDemoRunOptions()
+
+# Instantiate the workflow
+#
+wflow = TestWorkflow()
+
+# Run the worklow with runtime options specified on the command-line:
+#
+retval = wflow.run(mode=runOptions.mode,
+ nCores=runOptions.jobs,
+ memMb=runOptions.memMb,
+ mailTo=runOptions.mailTo,
+ isContinue=(runOptions.isResume and "Auto" or False),
+ isForceContinue=True,
+ schedulerArgList=runOptions.schedulerArgList)
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/runOptionsDemo/testtasks/sleeper.bash b/pyflow/demo/runOptionsDemo/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/runOptionsDemo/testtasks/yeller.bash b/pyflow/demo/runOptionsDemo/testtasks/yeller.bash
new file mode 100755
index 0000000..87bb49c
--- /dev/null
+++ b/pyflow/demo/runOptionsDemo/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell
+for i in {1..100}; do
+ echo "Yeller $pid yellin $i stdout"
+ echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/simpleDemo/simpleDemo.py b/pyflow/demo/simpleDemo/simpleDemo.py
new file mode 100755
index 0000000..e3e530d
--- /dev/null
+++ b/pyflow/demo/simpleDemo/simpleDemo.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir=os.path.join(scriptDir,"testtasks")
+
+sleepjob=os.path.join(testJobDir,"sleeper.bash") # sleeps
+yelljob=os.path.join(testJobDir,"yeller.bash") # generates some i/o
+runjob=os.path.join(testJobDir,"runner.bash") # runs at 100% cpu
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class SimpleWorkflow(WorkflowRunner) :
+
+ # WorkflowRunner objects can create regular constructors to hold
+ # run parameters or other state information:
+ #
+ def __init__(self,params) :
+ self.params=params
+
+
+ # a workflow is defined by overloading the
+ # WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # A simple command task with no dependencies, labeled 'task1'.
+ #
+ cmd="%s 1" % (yelljob)
+ self.addTask("task1",cmd)
+
+ # Another task which runs the same command, this time the
+ # command is provided as an argument list. An argument list
+ # can be useful when a command has many arguments or
+ # complicated quoting issues:
+ #
+ cmd=[yelljob,"1"]
+ self.addTask("task2",cmd)
+
+ # This task will always run on the local machine, no matter
+ # what the run mode is. The force local option is useful for
+ # non-cpu intensive jobs which are taking care of minor
+ # workflow overhead (moving/touching files, etc)
+ #
+ self.addTask("task3a",sleepjob+" 10",isForceLocal=True)
+
+ # This job is requesting 2 threads:
+ #
+ self.addTask("task3b",runjob+" 10",nCores=2)
+
+ # This job is requesting 2 threads and 3 gigs of ram:
+ #
+ self.addTask("task3c",runjob+" 10",nCores=2,memMb=3*1024)
+
+
+ # addTask and addWorkflowTask always return their task labels
+ # as a simple convenience. taskName is set to "task4" now.
+ #
+ taskName=self.addTask("task4",sleepjob+" 1")
+
+ # an example task dependency:
+ #
+ # pyflow stores dependencies in set() objects, but you can
+ # provide a list,tuple,set or single string as the argument to
+ # dependencies:
+ #
+ # all the task5* tasks below specify "task4" as their
+ # dependency:
+ #
+ self.addTask("task5a",yelljob+" 2",dependencies=taskName)
+ self.addTask("task5b",yelljob+" 2",dependencies="task4")
+ self.addTask("task5c",yelljob+" 2",dependencies=["task4"])
+ self.addTask("task5d",yelljob+" 2",dependencies=[taskName])
+
+ # this time we launch a number of sleep tasks based on the
+ # workflow parameters:
+ #
+ # we store all tasks in sleepTasks -- which we use to make
+ # other tasks wait for this entire set of jobs to complete:
+ #
+ sleepTasks=set()
+ for i in range(self.params["numSleepTasks"]) :
+ taskName="sleep_task%i" % (i)
+ sleepTasks.add(taskName)
+ self.addTask(taskName,sleepjob+" 1",dependencies="task5a")
+
+ ## note the three lines above could have been written in a
+ ## more compact single-line format:
+ ##
+ #sleepTasks.add(self.addTask("sleep_task%i" % (i),sleepjob+" 1",dependencies="task5a"))
+
+ # this job cannot start until all tasks in the above loop complete:
+ self.addTask("task6",runjob+" 2",nCores=3,dependencies=sleepTasks)
+
+ # This task is supposed to fail, uncomment to see error reporting:
+ #
+ #self.addTask("task7",sleepjob)
+
+ # Note that no command is provided to this task. It will not
+ # be distributed locally or to sge, but does provide a
+ # convenient label for a set of tasks that other processes
+ # depend on. There is no special "checkpoint-task" type in
+ # pyflow -- but any task can function like one per this
+ # example:
+ #
+ self.addTask("checkpoint_task",dependencies=["task1","task6","task5a"])
+
+ # The final task depends on the above checkpoint:
+ #
+ self.addTask("task8",yelljob+" 2",dependencies="checkpoint_task")
+
+
+
+# simulated workflow parameters
+#
+myRunParams={"numSleepTasks" : 15}
+
+
+# Instantiate the workflow
+#
+# parameters are passed into the workflow via its constructor:
+#
+wflow = SimpleWorkflow(myRunParams)
+
+# Run the worklow:
+#
+retval=wflow.run(mode="local",nCores=8)
+
+sys.exit(retval)
+
diff --git a/pyflow/demo/simpleDemo/testtasks/runner.bash b/pyflow/demo/simpleDemo/testtasks/runner.bash
new file mode 100755
index 0000000..df97eb1
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/runner.bash
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+cd $thisdir
+
+if ! [ -e ./runner ]; then
+ # turning on -O2 is too variable accross different platforms, so leave off:
+ #
+ # the move and sleep steps here help to make sure that we don't get a "text file busy"
+ # error on the ./runner call below:
+ #
+ gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner && sleep 1
+fi
+
+./runner $1
+
diff --git a/pyflow/demo/simpleDemo/testtasks/runner.c b/pyflow/demo/simpleDemo/testtasks/runner.c
new file mode 100644
index 0000000..5fad9c8
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/runner.c
@@ -0,0 +1,16 @@
+#include "math.h"
+#include "assert.h"
+
+int main(int argc, char**argv) {
+assert(argc==2);
+int mult=atoi(argv[1]);
+int i,j;
+double a=0;
+long total=50000000;
+for(j=0;j<mult;++j) {
+for(i=0;i<total;++i) {
+ a+=i;a=sqrt(a);
+}
+}
+return 0;
+}
diff --git a/pyflow/demo/simpleDemo/testtasks/sleeper.bash b/pyflow/demo/simpleDemo/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/simpleDemo/testtasks/yeller.bash b/pyflow/demo/simpleDemo/testtasks/yeller.bash
new file mode 100755
index 0000000..87bb49c
--- /dev/null
+++ b/pyflow/demo/simpleDemo/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell
+for i in {1..100}; do
+ echo "Yeller $pid yellin $i stdout"
+ echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/subWorkflow/subWorkflow.py b/pyflow/demo/subWorkflow/subWorkflow.py
new file mode 100755
index 0000000..ebabc3f
--- /dev/null
+++ b/pyflow/demo/subWorkflow/subWorkflow.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# very simple task scripts called by the demo:
+#
+testJobDir=os.path.join(scriptDir,"testtasks")
+
+sleepjob=os.path.join(testJobDir,"sleeper.bash") # sleeps
+yelljob=os.path.join(testJobDir,"yeller.bash") # generates some i/o
+runjob=os.path.join(testJobDir,"runner.bash") # runs at 100% cpu
+
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+# this workflow is a simple example of a workflow we can either run directly,
+# or run as a task within another workflow:
+#
+class SubWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+ # this workflow executes a simple dependency diamond:
+ self.addTask("task1",yelljob+" 1")
+ self.addTask("task2a",yelljob+" 1",dependencies="task1")
+ self.addTask("task2b",yelljob+" 1",dependencies="task1")
+ self.addTask("task3",yelljob+" 1",dependencies=("task2a","task2b"))
+
+
+#
+# This workflow will use SubWorkflow as a task:
+#
+class SimpleWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # it's fine to repeat task names in two workflows, even if you're sub-tasking one from the other
+ self.addTask("task1",yelljob+" 1")
+ self.addTask("task2",runjob+" 3")
+
+ # instantiate a new workflow and run it as soon as task1 and task2 complete
+ wflow=SubWorkflow()
+ self.addWorkflowTask("subwf_task3",wflow,dependencies=("task1","task2"))
+
+ # this job will not run until the workflow-task completes. This means that all of the
+ # tasks that SubWorkflow launches will need to complete successfully beforehand:
+ #
+ self.addTask("task4",sleepjob+" 1",dependencies="subwf_task3")
+
+
+# Instantiate our workflow
+#
+wflow = SimpleWorkflow()
+
+# Run the worklow:
+#
+retval=wflow.run(mode="local",nCores=8)
+
+
+# If we want to run the SubWorkflow as a regular workflow, that can be done as well:
+#
+
+#wflow2 = SubWorkflow()
+#retval2=wflow2.run()
+
+
+sys.exit(retval)
+
+
diff --git a/pyflow/demo/subWorkflow/testtasks/runner.bash b/pyflow/demo/subWorkflow/testtasks/runner.bash
new file mode 100755
index 0000000..df97eb1
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/runner.bash
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+cd $thisdir
+
+if ! [ -e ./runner ]; then
+ # turning on -O2 is too variable accross different platforms, so leave off:
+ #
+ # the move and sleep steps here help to make sure that we don't get a "text file busy"
+ # error on the ./runner call below:
+ #
+ gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner && sleep 1
+fi
+
+./runner $1
+
diff --git a/pyflow/demo/subWorkflow/testtasks/runner.c b/pyflow/demo/subWorkflow/testtasks/runner.c
new file mode 100644
index 0000000..5fad9c8
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/runner.c
@@ -0,0 +1,16 @@
+#include "math.h"
+#include "assert.h"
+
+int main(int argc, char**argv) {
+assert(argc==2);
+int mult=atoi(argv[1]);
+int i,j;
+double a=0;
+long total=50000000;
+for(j=0;j<mult;++j) {
+for(i=0;i<total;++i) {
+ a+=i;a=sqrt(a);
+}
+}
+return 0;
+}
diff --git a/pyflow/demo/subWorkflow/testtasks/sleeper.bash b/pyflow/demo/subWorkflow/testtasks/sleeper.bash
new file mode 100755
index 0000000..8c77fb2
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/subWorkflow/testtasks/yeller.bash b/pyflow/demo/subWorkflow/testtasks/yeller.bash
new file mode 100755
index 0000000..87bb49c
--- /dev/null
+++ b/pyflow/demo/subWorkflow/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell
+for i in {1..100}; do
+ echo "Yeller $pid yellin $i stdout"
+ echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/pyflow/demo/successMsgDemo/successMsgDemo.py b/pyflow/demo/successMsgDemo/successMsgDemo.py
new file mode 100755
index 0000000..1e53d9e
--- /dev/null
+++ b/pyflow/demo/successMsgDemo/successMsgDemo.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+import os.path
+import sys
+
+# add module path by hand
+#
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(scriptDir+"/../../src")
+
+from pyflow import WorkflowRunner
+
+
+#
+# test and demostrate the use of a custom success message
+# at the end of a workflow
+#
+
+
+
+# all pyflow workflows are written into classes derived from
+# pyflow.WorkflowRunner:
+#
+class SuccessWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the
+ # WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # provide a minimum task
+ self.addTask("task1","touch success\! && exit 0")
+
+
+
+
+# Instantiate the workflow
+#
+wflow = SuccessWorkflow()
+
+# Run the worklow:
+#
+cwd=os.getcwd()
+successMsg = "SuccessWorkflow has successfully succeeded!\n"
+successMsg += "\tPlease find your token of successful succeeding here: '%s'\n" % (cwd)
+retval=wflow.run(mode="local",nCores=8,successMsg=successMsg,mailTo="csaunders at illumina.com")
+
+sys.exit(retval)
+
diff --git a/pyflow/doc/ChangeLog.txt b/pyflow/doc/ChangeLog.txt
new file mode 100644
index 0000000..bc0bcab
--- /dev/null
+++ b/pyflow/doc/ChangeLog.txt
@@ -0,0 +1,202 @@
+v1.1.13 20160414
+* fix rare issue with sets of dependent checkpoint tasks
+* fix for travis CI script from Dominic Jodoin
+v1.1.12 20151203
+* lengthen signal file delay tolerance to 4 mins
+* [#14] Filter environment variables to remove bash functions. This
+eliminates complications between shellshock bash update and SGE.
+v1.1.11 20151125
+* Improve SGE robustness
+v1.1.10 20150927
+* Remove old custom cluster SGE logic and standardize on h_vmem
+v1.1.9 20150923
+* Add windows CI script to run cross-platform tests on master
+* Add new cross-platform test script and adjust all unit tests
+to run on windows
+* Improve error reporting for missing signal file case
+v1.1.8 20150918
+* Improve windows shell compatibility
+* [#10] If an error occurs creating the task visualization script,
+issue warning and allow workflow to continue
+v1.1.7 20150806
+* [#9] improve robustness to filesystem delays for task
+wrapper parameter file
+* [#9] improve error log specificity when anomolous task
+wrapper output occurs in localhost run mode
+v1.1.6 20150713
+* Fix multithread conflict introduced by [#5] fix
+v1.1.5 20150710
+* Changed to BSD 2-Clause license
+* [#5] fix in-workflow check for nested workflows
+v1.1.4 20150527
+* added check for local SMTP service before sending email notification
+* added extra check aginst workflow methods running outside of pyflow runtime
+v1.1.3 20141028
+* fix master node memory spike for SGE jobs identified by Lilian Janin
+* added Windows OS patches form Tobias Mann
+v1.1.2 20131026
+* added python distutils setup.py provided by Ryan Kelley
+v1.1.1 20130716
+* fix issue with new startFromTasks feature when used with subWorkflows
+v1.1.0 20130715
+* add new features to allow only part of workflow to be run and/or continued
+v1.0.1 20130710
+* Fix O(exp) scaling problems in DAG handling methods
+v1.0.0 20130507
+* relicenced to Illumina Open Source Software License v1
+v0.6.26 20130304
+* fix bug in forceContinue'd workflow runs
+v0.6.25 20130221
+* Add optional warning and error log which contains all logged warning or error messages.
+* allow specification of a custom notification message on successful workflow completion
+* allow any task to specify its own retry parameters, overriding the
+run-level parameters of the same name
+* add retryMode to allow task retry to be applied to local mode
+v0.6.24 20121128
+* accelerate localhost task sweep so that short-running task workflows can
+execute more quickly
+* create new mutex option to addTask, this allows a set of tasks to share a mutex
+id, causing no more than one in the group to be executed at the same time
+v0.6.23 20121018
+* change cwd parameter to not require directory to exist
+* fix version number search so that an non-installed version does not require git to be installed
+v0.6.22 20121002
+* fix custom environment option to be more robust in various sge contexts
+v0.6.21 20120925
+* add option to supply custom environment variables to any task
+* fix error message when invalid runmode is given
+v0.6.20 20120920
+* increase # of retries and timeout length for qstat call
+v0.6.19 20120914
+* check for and allow OSError on fsync call.
+v0.6.18 201207
+* ignore available memory limit in non-local run modes
+* detect if multiple active pyflow jobs are attempting to use the same data
+directory
+v0.6.17 20120622
+* minor cleanups: add python version to reports and python 2.7.2 warning to
+logs
+v0.6.16 20120529
+* issue error when task specifies itself as a dependency
+* fix issue which could cause pyflow to hang when using python 2.7.2 in sge
+mode, hang can still occur in local mode. python 2.7.2 should be avoided.
+v0.6.15 20120525
+* Improved developer logging: dump stack for all threads in python 2.5+ during the update interval
+* Additional sge command error logging
+* automate pyflow version setting and add this to workflow logs
+* improved logging scalability for 10000+ task workflows
+* improved API documentation
+v0.6.14.1 20120518
+* remove typo bug
+v0.6.14 20120507
+* Add timeouts to sge qsub and qstat commands in case these hang (observed at
+low fequency on sd clusters)
+* Write SGE job_number to log for evey qsub-ed job
+* Write the name of the longest queued and longest running tasks in the status
+update report.
+* Add new demo demonstrating commandline settings for workflow run arguments
+v0.6.13 20120503
+* Fix regression in sge make jobs introduced with queue option
+v0.6.12 20120429
+* Add cwd argument to addTask to change wd before task execution
+v0.6.11 20120424
+* Remove sub-workflows from status update report
+* Dump full qsub arg list for each sge job to temporary logs in case of sge anomoly
+* Log sge job number in case of anomolous state at end of sge job
+* taskWrapper logs hostname as soon as possible in case of error
+* More reliable (but slower) flush used for log writes
+* Add option to provide a list of arguments to qsub/qmake (to specify queue
+most likely)
+* Add option to turn off logging to stderr.
+v0.6.10 20120419
+* Provide a 'heartbeat' task status update to the log at a specified
+interval.
+v0.6.9
+* Improve robustness against NFS update delay for task wrapper file
+* Include more sge error details in Notification email
+v0.6.8 20120319
+* Better handling on terminal hang-up: capture and ignore SIGHUP and
+handle failed writes to stderr. You should still use nohup where needed,
+but if you don't, then proper logging and notification will continue.
+* flush log writes
+v0.6.7 20120316
+* add tail of task stderr stream to nofications when tasks fail
+* apply ctrl-C task shutdown to SIGTERM as well
+v0.6.6 20120315
+* include configuration for uscp-prd cluster
+* Passive graph creation
+* Reduce thread stack size in later versions of python
+* More robust ctrl-C shutdown behavior (master workflow on own thread)
+v0.6.5 20120308
+* bagPipes fix
+v0.6.4 20120307
+* Consume filehandles more efficiently when running a very large number of
+local jobs. Reset SGE default max jobs to something reasonable (128). Allow
+logging to continue even after filehandles are exhausted in case it manages
+to happen.
+v0.6.3 20120305
+* Switch sge mode from using qsub sync to a more scalable qsub-and-poll scheme
+This immediately removes the 99 job sge limit, and provides the infrastructure
+for queueing or running timeout on tasks.
+v0.6.2
+* allow commands to be submitted as argument lists (in addition to shell
+strings, as before). Argument lists allow longer commands and obviate a
+variety of quoting issues.
+* Change site configuration to an object scheme which simplifies site
+customization.
+* change qmake tasks from parallel env to dynamic allocation mode
+* allow qmake jobs to retry without an expiration window
+* improved reporting of failed make jobs
+* fixed minor issue with make path cleanup
+v0.6.1 20120228
+* primarily a bagPipes release
+* fixed isForceLocal task bug introduced in 0.6
+* fixed make task bug introduced in 0.6
+v0.6 20120227
+* primarily a bagPipes release
+* added task priority option
+
+v0.5.5 20120224
+* more robust hostname lookup combines ip query and alias list
+* fix runner.bash demo failures on some machines
+* fix pyflowTaskWrapper stderr caching
+v0.5.4 20120224
+* fix configuration for non-sge hosts
+v0.5.2 20120222
+* chuk pthread fix for pyflow tasks
+v0.5.1 20120221
+* Added autmatic chuk sge configuration to allow bagPipes to complete
+in the uk.
+* Various fixes from uk testing: (1) full hostname is correctly found in
+the uk now (2) default configuration for email is it now comes form
+"pyflow-bot@"YOUR_DOMAIN_NAME now. This is required to correctly
+get mail sent from a uk box.
+v0.5 20120220
+* Cutting version of pyFlow to sync with first bagPipes prototype
+* add max SGE jobs to configuration parameters -- default set to 96
+* Fix sub-workflows to shutdown properly after task shutdown
+* More robust handling of commands with quoting and special characters
+* Non-breaking API change: isTaskComplete lets you query whether a task
+is in the workflow and completed -- useful for sane interupt/resume behavior
+* Non-breaking API change: limitNCores(n) and limitMemMb(n) can be used now to
+reduce your resource request to the maximum available for this run.
+
+v0.4 20120216
+* Added memory to the task resource tracking
+* Created pyflowConfig file which contains site specific code --
+moved resource to qsub argument translation functions into this
+config file
+* Non-breaking API change: Added isCmdStable option to addTask to specify that a command
+can change on workflow resume.
+* Non-breaking API change: all add*() methods return task label
+
+v0.3 20120213
+* Full support for recursive WorkflowRunner task specification -- provide any
+other WorkflowRunner instance as a task in a workflow() definition.
+* Report Exceptions in TaskRunner objects as task errors
+* Report list of failed tasks even during a waitForTasks() holding loop.
+
+v0.2 20120207
+First versioned released. Major addition is a complete command-line interface
+for the bcl converter demo
+
diff --git a/pyflow/doc/README.txt b/pyflow/doc/README.txt
new file mode 100644
index 0000000..7812fff
--- /dev/null
+++ b/pyflow/doc/README.txt
@@ -0,0 +1,4 @@
+client_api/ -> contains documetation on the pyflow API which you can use to create your own workflow scripts
+
+developer/ -> contains documenation that's only useful if you'd like to change or add features to pyflow itself
+
diff --git a/pyflow/doc/client_api/README b/pyflow/doc/client_api/README
new file mode 100644
index 0000000..503650b
--- /dev/null
+++ b/pyflow/doc/client_api/README
@@ -0,0 +1,12 @@
+Pre-generated client API documentation is here:
+
+WorkflowRunner_API_html_doc
+
+Full API documentation can be created as an html tree using the script:
+'make_WorkflowRunner_API_html_doc.bash'
+
+Note this requires the program 'epydoc'.
+
+A simpler ascii documentation page can be generated in pydoc by running:
+'make_WorkflowRunner_API_simple_doc.py'
+
diff --git a/pyflow/doc/client_api/make_WorkflowRunner_API_html_doc.bash b/pyflow/doc/client_api/make_WorkflowRunner_API_html_doc.bash
new file mode 100755
index 0000000..4067ac8
--- /dev/null
+++ b/pyflow/doc/client_api/make_WorkflowRunner_API_html_doc.bash
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+PYTHONPATH=$thisdir/../../src epydoc pyflow.WorkflowRunner --no-private -o WorkflowRunner_API_html_doc
+
diff --git a/pyflow/doc/client_api/make_WorkflowRunner_API_simple_doc.py b/pyflow/doc/client_api/make_WorkflowRunner_API_simple_doc.py
new file mode 100755
index 0000000..ed75429
--- /dev/null
+++ b/pyflow/doc/client_api/make_WorkflowRunner_API_simple_doc.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import os.path
+import sys
+
+sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src")
+
+import pyflow
+
+# Document the public functions of pyflow's only public class:
+#
+help(pyflow.WorkflowRunner)
+
diff --git a/pyflow/doc/developer/README b/pyflow/doc/developer/README
new file mode 100644
index 0000000..76ebfb8
--- /dev/null
+++ b/pyflow/doc/developer/README
@@ -0,0 +1 @@
+This documentation is intended for anyone interested in changing pyflow itself. For documentation on the API to *use* pyflow, please see pyflow/doc/client_api and demo programs in pyflow/demo
diff --git a/pyflow/doc/developer/make_pyflow_developer_html_doc.bash b/pyflow/doc/developer/make_pyflow_developer_html_doc.bash
new file mode 100755
index 0000000..ac453e1
--- /dev/null
+++ b/pyflow/doc/developer/make_pyflow_developer_html_doc.bash
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+epydoc $thisdir/../../src/*.py -o pyflow_developer_html_doc -v --graph all
+
diff --git a/pyflow/setup.py b/pyflow/setup.py
new file mode 100644
index 0000000..1345197
--- /dev/null
+++ b/pyflow/setup.py
@@ -0,0 +1,11 @@
+from distutils.core import setup
+
+setup(
+ name='pyFlow',
+ version='${VERSION}',
+ description='A lightweight parallel task engine',
+ author='Chris Saunders',
+ author_email='csaunders at illumina.com',
+ packages=['pyflow'],
+ package_dir={'pyflow': 'src'}
+)
diff --git a/pyflow/src/__init__.py b/pyflow/src/__init__.py
new file mode 100644
index 0000000..b69891e
--- /dev/null
+++ b/pyflow/src/__init__.py
@@ -0,0 +1 @@
+from pyflow import *
diff --git a/pyflow/src/pyflow.py b/pyflow/src/pyflow.py
new file mode 100644
index 0000000..0ed516d
--- /dev/null
+++ b/pyflow/src/pyflow.py
@@ -0,0 +1,4175 @@
+#!/usr/bin/env python
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+pyflow -- a lightweight parallel task engine
+"""
+
+__author__ = 'Christopher Saunders'
+
+
+import copy
+import datetime
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import time
+import traceback
+
+from pyflowConfig import siteConfig
+
+
+moduleDir = os.path.abspath(os.path.dirname(__file__))
+
+
+# minimum python version
+#
+pyver = sys.version_info
+if pyver[0] != 2 or (pyver[0] == 2 and pyver[1] < 4) :
+ raise Exception("pyflow module has only been tested for python versions [2.4,3.0)")
+
+# problem python versions:
+#
+# Internal interpreter deadlock issue in python 2.7.2:
+# http://bugs.python.org/issue13817
+# ..is so bad that pyflow can partially, but not completely, work around it -- so issue a warning for this case.
+if pyver[0] == 2 and pyver[1] == 7 and pyver[2] == 2 :
+ raise Exception("Python interpreter errors in python 2.7.2 may cause a pyflow workflow hang or crash. Please use a different python version.")
+
+
+# The line below is a workaround for a python 2.4/2.5 bug in
+# the subprocess module.
+#
+# Bug is described here: http://bugs.python.org/issue1731717
+# Workaround is described here: http://bugs.python.org/issue1236
+#
+subprocess._cleanup = lambda: None
+
+
+# In python 2.5 or greater, we can lower the per-thread stack size to
+# improve memory consumption when a very large number of jobs are
+# run. Below it is lowered to 256Kb (compare to linux default of
+# 8Mb).
+#
+try:
+ threading.stack_size(min(256 * 1024, threading.stack_size))
+except AttributeError:
+ # Assuming this means python version < 2.5
+ pass
+
+
+class GlobalSync :
+ """
+ Control total memory usage in non-local run modes by
+ limiting the number of simultaneous subprocess calls
+
+ Note that in practice this only controls the total number
+ of qsub/qstat calls in SGE mode
+ """
+ maxSubprocess = 2
+ subprocessControl = threading.Semaphore(maxSubprocess)
+
+
+
+def getPythonVersion() :
+ python_version = sys.version_info
+ return ".".join([str(i) for i in python_version])
+
+pythonVersion = getPythonVersion()
+
+
+# Get pyflow version number
+#
+
+def getPyflowVersion() :
+ # this will be automatically macro-ed in for pyflow releases:
+ pyflowAutoVersion = None
+
+ # Get version number in regular release code:
+ if pyflowAutoVersion is not None : return pyflowAutoVersion
+
+ # Get version number during dev:
+ try :
+ proc = subprocess.Popen(["git", "describe"], stdout=subprocess.PIPE, stderr=open(os.devnull, "w"), cwd=moduleDir, shell=False)
+ (stdout, _stderr) = proc.communicate()
+ retval = proc.wait()
+ stdoutList = stdout.split("\n")[:-1]
+ if (retval == 0) and (len(stdoutList) == 1) : return stdoutList[0]
+ except OSError:
+ # no git installed
+ pass
+
+ return "unknown"
+
+
+__version__ = getPyflowVersion()
+
+
+# portability functions:
+#
+
+def _isWindows() :
+ import platform
+ return (platform.system().find("Windows") > -1)
+
+class GlobalConstants :
+ isWindows=_isWindows()
+
+
+def isWindows() :
+ return GlobalConstants.isWindows
+
+
+
+
+def forceRename(src,dst) :
+ """
+ dst is only overwritten in a single atomic operation on *nix
+ on windows, we can't have atomic rename, but we can recreate the behavior otherwise
+ """
+ if isWindows() :
+ if os.path.exists(dst) :
+ os.remove(dst)
+
+ maxTrials=5
+ for trial in range(maxTrials) :
+ try :
+ os.rename(src,dst)
+ return
+ except OSError :
+ if (trial+1) >= maxTrials : raise
+ time.sleep(5)
+
+
+
+def cleanEnv() :
+ """
+ clear bash functions out of the env
+
+ without this change the shellshock security update causes pyflow SGE jobs to
+ fail with the behavior of current (201512) versions of SGE qsub
+ """
+
+ ekeys = os.environ.keys()
+ for key in ekeys :
+ if key.endswith("()") :
+ del os.environ[key]
+
+
+# utility values and functions:
+#
+
+def ensureDir(d):
+ """
+ make directory if it doesn't already exist, raise exception if
+ something else is in the way:
+ """
+ if os.path.exists(d):
+ if not os.path.isdir(d) :
+ raise Exception("Can't create directory: %s" % (d))
+ else :
+ os.makedirs(d)
+
+
+#
+# time functions -- note there's an additional copy in the pyflow wrapper script:
+#
+# all times in pyflow are utc (never local) and printed to iso8601
+#
+def timeStampToTimeStr(ts) :
+ """
+ converts time.time() output to timenow() string
+ """
+ return datetime.datetime.utcfromtimestamp(ts).isoformat()
+
+def timeStrNow():
+ return timeStampToTimeStr(time.time())
+
+def timeStrToTimeStamp(ts):
+ import calendar
+ d = datetime.datetime(*map(int, re.split(r'[^\d]', ts)[:-1]))
+ return calendar.timegm(d.timetuple())
+
+
+
+def isInt(x) :
+ return isinstance(x, (int, long))
+
+def isString(x):
+ return isinstance(x, basestring)
+
+
+def isIterable(x):
+ return (getattr(x, '__iter__', False) != False)
+
+
+def lister(x):
+ """
+ Convert input into a list, whether it's already iterable or
+ not. Make an exception for individual strings to be returned
+ as a list of one string, instead of being chopped into letters
+ Also, convert None type to empty list:
+ """
+ # special handling in case a single string is given:
+ if x is None : return []
+ if (isString(x) or (not isIterable(x))) : return [x]
+ return list(x)
+
+
+
+def setzer(x) :
+ """
+ convert user input into a set, handling the pathological case
+ that you have been handed a single string, and you don't want
+ a set of letters:
+ """
+ return set(lister(x))
+
+
+
+class LogState :
+ """
+ A simple logging enum
+ """
+ INFO = 1
+ WARNING = 2
+ ERROR = 3
+
+ @classmethod
+ def toString(cls,logState) :
+ if logState == cls.INFO : return "INFO"
+ if logState == cls.WARNING : return "WARNING"
+ if logState == cls.ERROR : return "ERROR"
+
+ raise Exception("Unknown log state: " + str(logState))
+
+
+# allow fsync to be globally turned off
+class LogGlobals :
+ isFsync = True
+
+
+def hardFlush(ofp):
+ ofp.flush()
+ if ofp.isatty() : return
+ # fsync call has been reported to consistently fail in some contexts (rsh?)
+ # so allow OSError
+ if not LogGlobals.isFsync : return
+ try :
+ os.fsync(ofp.fileno())
+ except OSError:
+ LogGlobals.isFsync = False
+
+
+
+def log(ofpList, msgList, linePrefix=None):
+ """
+ General logging function.
+
+ @param ofpList: A container of file objects to write to
+
+ @param msgList: A container of (or a single) multi-line log message
+ string. Final newlines are not required
+
+ @param linePrefix: A prefix to add before every line. This will come
+ *after* the log function's own '[time] [hostname]'
+ prefix.
+
+ @return: Returns a boolean tuple of size ofpList indicating the success of
+ writing to each file object
+ """
+ msgList = lister(msgList)
+ ofpList = setzer(ofpList)
+ retval = [True] * len(ofpList)
+ for msg in msgList :
+ # strip final trailing newline if it exists:
+ if (len(msg) > 0) and (msg[-1] == "\n") : msg = msg[:-1]
+ linePrefixOut = "[%s] [%s]" % (timeStrNow(), siteConfig.getHostName())
+ if linePrefix is not None : linePrefixOut += " " + linePrefix
+ # split message into prefixable lines:
+ for i, ofp in enumerate(ofpList):
+ # skip io streams which have failed before:
+ if not retval[i] : continue
+ try :
+ for line in msg.split("\n") :
+ ofp.write("%s %s\n" % (linePrefixOut, line))
+ hardFlush(ofp)
+ except IOError:
+ retval[i] = False
+ return retval
+
+
+
+def getThreadName():
+ return threading.currentThread().getName()
+
+def isMainThread() :
+ return (getThreadName == "MainThread")
+
+
+class StrFileObject(object) :
+ """
+ fakes a filehandle for library functions which write to a stream,
+ and captures output in a string
+ """
+ def __init__(self) :
+ self.str = ""
+
+ def write(self, string) :
+ self.str += string
+
+ def __str__(self) :
+ return self.str
+
+
+def getTracebackStr() :
+ return traceback.format_exc()
+
+
+def getExceptionMsg() :
+
+ msg = ("Unhandled Exception in %s\n" % (getThreadName())) + getTracebackStr()
+ if msg[-1] == "\n" : msg = msg[:-1]
+ return msg.split("\n")
+
+
+def cmdline() :
+ return " ".join(sys.argv)
+
+
+
+def msgListToMsg(msgList):
+ """
+ convert string or list of strings into a single string message
+ """
+ msg = ""
+ isFirst=True
+ for chunk in lister(msgList) :
+ if isFirst :
+ isFirst = False
+ else :
+ msg += "\n"
+ if ((len(chunk)>0) and (chunk[-1] == '\n')) :
+ chunk = chunk[:-1]
+ msg += chunk
+
+ return msg
+
+
+
+emailRegex = re.compile(r"(?:^|\s)[-a-z0-9_.]+@(?:[-a-z0-9]+\.)+[a-z]{2,6}(?:\s|$)", re.IGNORECASE)
+
+def verifyEmailAddy(x) :
+ return (emailRegex.match(x) is not None)
+
+
+def isLocalSmtp() :
+ """
+ return true if a local smtp server is available
+ """
+ import smtplib
+ try :
+ s = smtplib.SMTP('localhost')
+ except :
+ return False
+ return True
+
+
+def sendEmail(mailTo, mailFrom, subject, msgList) :
+ import smtplib
+ # this is the way to import MIMEText in py 2.4:
+ from email.MIMEText import MIMEText
+
+ # format message list into a single string:
+ msg = msgListToMsg(msgList)
+
+ mailTo = setzer(mailTo)
+
+ msg = MIMEText(msg)
+ msg["Subject"] = subject
+ msg["From"] = mailFrom
+ msg["To"] = ", ".join(mailTo)
+
+ s = smtplib.SMTP('localhost')
+ s.sendmail(mailFrom, list(mailTo), msg.as_string())
+ s.quit()
+
+
+def boolToStr(b) :
+ return str(int(b))
+
+
+def argToBool(x) :
+ """
+ convert argument of unknown type to a bool:
+ """
+ class FalseStrings :
+ val = ("", "0", "false", "f", "no", "n", "off")
+
+ if isinstance(x, basestring) :
+ return (x.lower() not in FalseStrings.val)
+ return bool(x)
+
+
+def hashObjectValue(obj) :
+ """
+ This function hashes objects values -- the hash will be the
+ same for two objects containing the same methods and data, so
+ it corresponds to 'A==B' and *not* 'A is B'.
+ """
+ import pickle
+ import hashlib
+ hashlib.md5(pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)).hexdigest()
+
+
+namespaceSep = "+"
+
+
+def namespaceJoin(a, b) :
+ """
+ join two strings with a separator only if a exists
+ """
+ if a == "" : return b
+ elif b == "" : return a
+ return a + namespaceSep + b
+
+
+def namespaceLabel(namespace) :
+ """
+ provide a consistent naming scheme to users for embedded workflows
+ """
+ if namespace == "" :
+ return "master workflow"
+ else :
+ return "sub-workflow '%s'" % (namespace)
+
+
+
+class ExpWaiter(object) :
+ """
+ Convenience object to setup exponentially increasing wait/polling times
+ """
+ def __init__(self, startSec, factor, maxSec, event = None) :
+ """
+ optionally allow an event to interrupt wait cycle
+ """
+ assert (startSec > 0.)
+ assert (factor > 1.)
+ assert (maxSec >= startSec)
+ self.startSec = startSec
+ self.factor = factor
+ self.maxSec = maxSec
+ self.event = event
+
+ self.sec = self.startSec
+ self.isMax = False
+
+ def reset(self) :
+ self.sec = self.startSec
+
+ def wait(self) :
+ if self.event is None :
+ time.sleep(self.sec)
+ else :
+ self.event.wait(self.sec)
+ if self.isMax : return
+ self.sec = min(self.sec * self.factor, self.maxSec)
+ self.isMax = (self.sec == self.maxSec)
+ assert self.sec <= self.maxSec
+
+
+
+def lockMethod(f):
+ """
+ method decorator acquires/releases object's lock
+ """
+
+ def wrapped(self, *args, **kw):
+ if not hasattr(self,"lock") :
+ self.lock = threading.RLock()
+
+ self.lock.acquire()
+ try:
+ return f(self, *args, **kw)
+ finally:
+ self.lock.release()
+ return wrapped
+
+
+
+class Bunch:
+ """
+ generic struct with named argument constructor
+ """
+ def __init__(self, **kwds):
+ self.__dict__.update(kwds)
+
+
+
+def stackDump(dumpfp):
+ """
+ adapted from haridsv @ stackoverflow:
+ """
+
+ athreads = threading.enumerate()
+ tnames = [(th.getName()) for th in athreads]
+
+ frames = None
+ try:
+ frames = sys._current_frames()
+ except AttributeError:
+ # python version < 2.5
+ pass
+
+ id2name = {}
+ try:
+ id2name = dict([(th.ident, th.getName()) for th in athreads])
+ except AttributeError :
+ # python version < 2.6
+ pass
+
+ if (frames is None) or (len(tnames) > 50) :
+ dumpfp.write("ActiveThreadCount: %i\n" % (len(tnames)))
+ dumpfp.write("KnownActiveThreadNames:\n")
+ for name in tnames : dumpfp.write(" %s\n" % (name))
+ dumpfp.write("\n")
+ return
+
+ dumpfp.write("ActiveThreadCount: %i\n" % (len(frames)))
+ dumpfp.write("KnownActiveThreadNames:\n")
+ for name in tnames : dumpfp.write(" %s\n" % (name))
+ dumpfp.write("\n")
+
+ for tid, stack in frames.items():
+ dumpfp.write("Thread: %d %s\n" % (tid, id2name.get(tid, "NAME_UNKNOWN")))
+ for filename, lineno, name, line in traceback.extract_stack(stack):
+ dumpfp.write('File: "%s", line %d, in %s\n' % (filename, lineno, name))
+ if line is not None:
+ dumpfp.write(" %s\n" % (line.strip()))
+ dumpfp.write("\n")
+ dumpfp.write("\n")
+
+
+
+
+#######################################################################
+#
+# these functions are written out to a utility script which allows users
+# to make a dot graph from their current state directory output. We
+# keep it in pyflow as working code so that pyflow can call sections of it.
+#
+
+def taskStateHeader() :
+ return "#taskLabel\ttaskNamespace\trunState\terrorCode\trunStateUpdateTime\n"
+
+
+def taskStateParser(stateFile) :
+ class Constants :
+ nStateCols = 5
+
+ for line in open(stateFile) :
+ if len(line) and line[0] == "#" : continue
+ line = line.strip()
+ w = line.split("\t")
+ if len(w) != Constants.nStateCols :
+ raise Exception("Unexpected format in taskStateFile: '%s' line: '%s'" % (stateFile, line))
+ yield [x.strip() for x in w]
+
+
+def taskInfoHeader() :
+ return "#%s\n" % ("\t".join(("taskLabel", "taskNamespace", "taskType", "nCores", "memMb", "priority", "isForceLocal", "dependencies", "cwd", "command")))
+
+
+def taskInfoParser(infoFile) :
+ class Constants :
+ nInfoCols = 10
+
+ for line in open(infoFile) :
+ if len(line) and line[0] == "#" : continue
+ line = line.lstrip()
+ w = line.split("\t", (Constants.nInfoCols - 1))
+ if len(w) != Constants.nInfoCols :
+ raise Exception("Unexpected format in taskInfoFile: '%s' line: '%s'" % (infoFile, line))
+ yield [x.strip() for x in w]
+
+
+def getTaskInfoDepSet(s) :
+ # reconstruct dependencies allowing for extraneous whitespace in the file:
+ s = s.strip()
+ if s == "" : return []
+ return set([d.strip() for d in s.split(",")])
+
+
+
+class TaskNodeConstants(object) :
+
+ validRunstates = ("complete", "running", "queued", "waiting", "error")
+
+
+
+class DotConfig(object) :
+ """
+ A static container of configuration data for dot graph output
+ """
+
+ runstateDotColor = {"waiting" : "grey",
+ "running" : "green",
+ "queued" : "yellow",
+ "error" : "red",
+ "complete" : "blue" }
+
+ runstateDotStyle = {"waiting" : "dashed",
+ "running" : None,
+ "queued" : None,
+ "error" : "bold",
+ "complete" : None }
+
+ @staticmethod
+ def getRunstateDotAttrib(runstate) :
+ color = DotConfig.runstateDotColor[runstate]
+ style = DotConfig.runstateDotStyle[runstate]
+ attrib = ""
+ if color is not None : attrib += " color=%s" % (color)
+ if style is not None : attrib += " style=%s" % (style)
+ return attrib
+
+ @staticmethod
+ def getTypeDotAttrib(nodeType) :
+ attrib = ""
+ if nodeType == "workflow" :
+ attrib += " shape=rect style=rounded"
+ return attrib
+
+ @staticmethod
+ def getDotLegend() :
+ string = '{ rank = source; Legend [shape=none, margin=0, label=<\n'
+ string += '<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="4">\n'
+ string += '<TR><TD COLSPAN="2">Legend</TD></TR>\n'
+ for state in TaskNodeConstants.validRunstates :
+ color = DotConfig.runstateDotColor[state]
+ string += '<TR> <TD>%s</TD> <TD BGCOLOR="%s"></TD> </TR>\n' % (state, color)
+ string += '</TABLE>>];}\n'
+ return string
+
+
+
+def writeDotGraph(taskInfoFile, taskStateFile, workflowClassName) :
+ """
+ write out the current graph state in dot format
+ """
+
+ addOrder = []
+ taskInfo = {}
+ headNodes = set()
+ tailNodes = set()
+
+ # read info file:
+ for (label, namespace, ptype, _nCores, _memMb, _priority, _isForceLocal, depStr, _cwdStr, _command) in taskInfoParser(taskInfoFile) :
+ tid = (namespace, label)
+ addOrder.append(tid)
+ taskInfo[tid] = Bunch(ptype=ptype,
+ parentLabels=getTaskInfoDepSet(depStr))
+ if len(taskInfo[tid].parentLabels) == 0 : headNodes.add(tid)
+ tailNodes.add(tid)
+ for plabel in taskInfo[tid].parentLabels :
+ ptid = (namespace, plabel)
+ if ptid in tailNodes : tailNodes.remove(ptid)
+
+ for (label, namespace, runState, _errorCode, _time) in taskStateParser(taskStateFile) :
+ tid = (namespace, label)
+ taskInfo[tid].runState = runState
+
+ dotFp = sys.stdout
+ dotFp.write("// Task graph from pyflow object '%s'\n" % (workflowClassName))
+ dotFp.write("// Process command: '%s'\n" % (cmdline()))
+ dotFp.write("// Process working dir: '%s'\n" % (os.getcwd()))
+ dotFp.write("// Graph capture time: %s\n" % (timeStrNow()))
+ dotFp.write("\n")
+ dotFp.write("digraph %s {\n" % (workflowClassName + "Graph"))
+ dotFp.write("\tcompound=true;\nrankdir=LR;\nnode[fontsize=10];\n")
+ labelToSym = {}
+ namespaceGraph = {}
+ for (i, (namespace, label)) in enumerate(addOrder) :
+ tid = (namespace, label)
+ if namespace not in namespaceGraph :
+ namespaceGraph[namespace] = ""
+ sym = "n%i" % i
+ labelToSym[tid] = sym
+ attrib1 = DotConfig.getRunstateDotAttrib(taskInfo[tid].runState)
+ attrib2 = DotConfig.getTypeDotAttrib(taskInfo[tid].ptype)
+ namespaceGraph[namespace] += "\t\t%s [label=\"%s\"%s%s];\n" % (sym, label, attrib1, attrib2)
+
+ for (namespace, label) in addOrder :
+ tid = (namespace, label)
+ sym = labelToSym[tid]
+ for plabel in taskInfo[tid].parentLabels :
+ ptid = (namespace, plabel)
+ namespaceGraph[namespace] += ("\t\t%s -> %s;\n" % (labelToSym[ptid], sym))
+
+ for (i, ns) in enumerate(namespaceGraph.keys()) :
+ isNs = ((ns is not None) and (ns != ""))
+ dotFp.write("\tsubgraph cluster_sg%i {\n" % (i))
+ if isNs :
+ dotFp.write("\t\tlabel = \"%s\";\n" % (ns))
+ else :
+ dotFp.write("\t\tlabel = \"%s\";\n" % (workflowClassName))
+ dotFp.write(namespaceGraph[ns])
+ dotFp.write("\t\tbegin%i [label=\"begin\" shape=diamond];\n" % (i))
+ dotFp.write("\t\tend%i [label=\"end\" shape=diamond];\n" % (i))
+ for (namespace, label) in headNodes :
+ if namespace != ns : continue
+ sym = labelToSym[(namespace, label)]
+ dotFp.write("\t\tbegin%i -> %s;\n" % (i, sym))
+ for (namespace, label) in tailNodes :
+ if namespace != ns : continue
+ sym = labelToSym[(namespace, label)]
+ dotFp.write("\t\t%s -> end%i;\n" % (sym, i))
+ dotFp.write("\t}\n")
+ if ns in labelToSym :
+ dotFp.write("\t%s -> begin%i [style=dotted];\n" % (labelToSym[ns], i))
+ # in LR orientation this will make the graph look messy:
+ # dotFp.write("\tend%i -> %s [style=invis];\n" % (i,labelToSym[ns]))
+
+ dotFp.write(DotConfig.getDotLegend())
+ dotFp.write("}\n")
+ hardFlush(dotFp)
+
+
+
+def writeDotScript(taskDotScriptFile,
+ taskInfoFileName, taskStateFileName,
+ workflowClassName) :
+ """
+ write dot task graph creation script
+ """
+ import inspect
+
+ dsfp = os.fdopen(os.open(taskDotScriptFile, os.O_WRONLY | os.O_CREAT, 0755), 'w')
+
+ dsfp.write("""#!/usr/bin/env python
+#
+# This is a script to create a dot graph from pyflow state files.
+# Usage: $script >| task_graph.dot
+#
+# Note that script assumes the default pyflow state files are in the script directory.
+#
+# This file was autogenerated by process: '%s'
+# ...from working directory: '%s'
+#
+
+import datetime,os,sys,time
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+""" % (os.getcwd(), cmdline()))
+
+ for dobj in (timeStampToTimeStr, timeStrNow, cmdline, Bunch, LogGlobals, hardFlush, TaskNodeConstants, DotConfig, taskStateParser, taskInfoParser, getTaskInfoDepSet, writeDotGraph) :
+ dsfp.write("\n\n")
+ dsfp.write(inspect.getsource(dobj))
+
+ dsfp.write("""
+
+if __name__ == '__main__' :
+ writeDotGraph(os.path.join(scriptDir,'%s'),os.path.join(scriptDir,'%s'),'%s')
+
+""" % (taskInfoFileName, taskStateFileName, workflowClassName))
+
+
+
+################################################################
+#
+# workflowRunner Helper Classes:
+#
+#
+
+
+class Command(object) :
+ """
+ Commands can be presented as strings or argument lists (or none)
+ """
+
+ def __init__(self, cmd, cwd, env=None) :
+ # 1: sanitize/error-check cmd
+ if ((cmd is None) or
+ (cmd == "") or
+ (isIterable(cmd) and len(cmd) == 0)) :
+ self.cmd = None
+ self.type = "none"
+ elif isString(cmd) :
+ self.cmd = Command.cleanStr(cmd)
+ self.type = "str"
+ elif isIterable(cmd) :
+ self.cmd = []
+ for i, s in enumerate(cmd):
+ if not (isString(s) or isInt(s)):
+ raise Exception("Argument: '%s' from position %i in argument list command is not a string or integer. Full command: '%s'" %
+ (str(s), (i + 1), " ".join([str(s) for s in cmd])))
+ self.cmd.append(Command.cleanStr(s))
+ self.type = "list"
+ else :
+ raise Exception("Invalid task command: '%s'" % (str(cmd)))
+
+ # 2: sanitize cwd
+ self.cwd = ""
+ if cwd is not None and cwd != "" :
+ self.cwd = os.path.abspath(cwd)
+ if os.path.exists(self.cwd) and not os.path.isdir(self.cwd) :
+ raise Exception("Cwd argument is not a directory: '%s', provided for command '%s'" % (cwd, str(cmd)))
+
+ # copy env:
+ self.env = env
+
+ def __repr__(self) :
+ if self.cmd is None : return ""
+ if self.type == "str" : return self.cmd
+ return " ".join(self.cmd)
+
+ @staticmethod
+ def cleanStr(s) :
+ if isInt(s) : s = str(s)
+ if "\n" in s : raise Exception("Task command/argument contains newline characters: '%s'" % (s))
+ return s.strip()
+
+
+
+class StoppableThread(threading.Thread):
+ """
+ Thread class with a stop() method. The thread itself has to check
+ regularly for the stopped() condition.
+
+ Note that this is a very new thread base class for pyflow, and most
+ threads do not (yet) check their stopped status.
+
+ """
+
+ _stopAll = threading.Event()
+
+ def __init__(self, *args, **kw):
+ threading.Thread.__init__(self, *args, **kw)
+ self._stop = threading.Event()
+
+ def stop(self):
+ "thread specific stop method, may be overridden to add async thread-specific kill behavior"
+ self._stop.set()
+
+ @staticmethod
+ def stopAll():
+ "quick global stop signal for threads that happen to poll stopped() very soon after event"
+ StoppableThread._stopAll.set()
+
+ def stopped(self):
+ return (StoppableThread._stopAll.isSet() or self._stop.isSet())
+
+
+
+def getSGEJobsDefault() :
+ if ((siteConfig.maxSGEJobs is not None) and
+ (siteConfig.maxSGEJobs != "") and
+ (siteConfig.maxSGEJobs != "unlimited")) :
+ return int(siteConfig.maxSGEJobs)
+ return "unlimited"
+
+
+
+class ModeInfo(object) :
+ """
+ Stores default values associated with each runmode: local,sge,...
+ """
+ def __init__(self, defaultCores, defaultMemMbPerCore, defaultIsRetry) :
+ self.defaultCores = defaultCores
+ self.defaultMemMbPerCore = defaultMemMbPerCore
+ self.defaultIsRetry = defaultIsRetry
+
+
+
+class RunMode(object):
+
+ data = { "local" : ModeInfo(defaultCores=1,
+ defaultMemMbPerCore=siteConfig.defaultHostMemMbPerCore,
+ defaultIsRetry=False),
+ "sge" : ModeInfo(defaultCores=getSGEJobsDefault(),
+ defaultMemMbPerCore="unlimited",
+ defaultIsRetry=True) }
+
+
+
+class RetryParam(object) :
+ """
+ parameters pertaining to task retry behavior
+ """
+ allowed_modes = [ "nonlocal" , "all" ]
+
+ def __init__(self, run_mode, retry_max, wait, window, retry_mode) :
+ if retry_mode not in self.allowed_modes :
+ raise Exception("Invalid retry mode parameter '%s'. Accepted retry modes are {%s}." \
+ % (retry_mode, ",".join(self.allowed_modes)))
+
+ self._retry_max = retry_max
+ self.wait = wait
+ self.window = window
+ self._retry_mode = retry_mode
+ self._run_mode = run_mode
+
+ self._finalize()
+ self.validate()
+
+
+ def _finalize(self) :
+ """
+ decide whether to turn retry off based on retry and run modes:
+ """
+ if (self._retry_mode == "nonlocal") and \
+ (not RunMode.data[self._run_mode].defaultIsRetry) :
+ self.max = 0
+ else :
+ self.max = int(self._retry_max)
+
+
+ def validate(self):
+ """
+ check that the public parameters are valid
+ """
+ def nonNegParamCheck(val, valLabel) :
+ if val < 0 : raise Exception("Parameter %s must be non-negative" % valLabel)
+
+ nonNegParamCheck(self.max, "retryMax")
+ nonNegParamCheck(self.wait, "retryWait")
+ nonNegParamCheck(self.window, "retryWindow")
+
+
+ def getTaskCopy(self,retry_max, wait, window, retry_mode):
+ """
+ return a deepcopy of the class customized for each individual task for
+ any retry parameters which are not None
+ """
+ taskself = copy.deepcopy(self)
+
+ if retry_max is not None:
+ taskself._retry_max = retry_max
+ if wait is not None:
+ taskself.wait = wait
+ if window is not None:
+ taskself.window = window
+ if retry_mode is not None :
+ taskself._retry_mode = retry_mode
+
+ taskself._finalize()
+ taskself.validate()
+ return taskself
+
+
+class RunningTaskStatus(object) :
+ """
+ simple object allowing remote task threads to communicate their
+ status back to the TaskManager
+ """
+ def __init__(self,isFinishedEvent) :
+ self.isFinishedEvent = isFinishedEvent
+ self.isComplete = threading.Event()
+ self.errorCode = 0
+
+ # errorMessage is filled in by sub-workflow
+ # and command-line tasks.
+ #
+ # Sub-workflows use this to convey whether they have
+ # failed (1) because of failures of their own tasks or (2)
+ # because of an exception in the sub-workflow code, in which
+ # case the exception message and stacktrace are provided.
+ #
+ # command tasks use this to report the stderr tail of a failing
+ # task
+ #
+ self.errorMessage = ""
+
+ # only used by sub-workflows to indicate that all tasks have been specified
+ self.isSpecificationComplete = threading.Event()
+
+
+class BaseTaskRunner(StoppableThread) :
+ """
+ Each individual command-task or sub workflow task
+ is run on its own thread using a class inherited from
+ BaseTaskRunner
+ """
+
+ def __init__(self, runStatus, taskStr, sharedFlowLog, setRunstate) :
+ StoppableThread.__init__(self)
+ self.setDaemon(True)
+ self.taskStr = taskStr
+ self.setName("TaskRunner-Thread-%s" % (taskStr))
+ self.runStatus = runStatus
+ self._sharedFlowLog = sharedFlowLog
+ self.lock = threading.RLock()
+
+ # allows taskRunner to update between queued and running status:
+ self._setRunstate = setRunstate
+
+ # this is moved into the ctor now, so that a race condition that would double-launch a task
+ # is now not possible (however unlikely it was before):
+ self.setInitialRunstate()
+
+
+ def run(self) :
+ """
+ BaseTaskRunner's run() method ensures that we can
+ capture exceptions which might occur in this thread.
+ Do not override this method -- instead define the core
+ logic for the task run operation in '_run()'
+
+ Note that for sub-workflow tasks we're interpreting raw
+ client python code on this thread, so exceptions are
+ *very likely* here -- this is not a corner case.
+ """
+ retval = 1
+ retmsg = ""
+ try:
+ (retval, retmsg) = self._run()
+ except WorkflowRunner._AbortWorkflowException :
+ # This indicates an intended workflow interruption.
+ # send a retval of 1 but not an error message
+ pass
+ except:
+ retmsg = getExceptionMsg()
+ self.runStatus.errorCode = retval
+ self.runStatus.errorMessage = retmsg
+ # this indicates that this specific task has finished:
+ self.runStatus.isComplete.set()
+ # this indicates that *any* task has just finished, so
+ # taskmanager can stop polling and immediately sweep
+ self.runStatus.isFinishedEvent.set()
+ return retval
+
+ def setRunstate(self, *args, **kw) :
+ if self._setRunstate is None : return
+ self._setRunstate(*args, **kw)
+
+ def setInitialRunstate(self) :
+ self.setRunstate("running")
+
+ def flowLog(self, msg, logState) :
+ linePrefixOut = "[TaskRunner:%s]" % (self.taskStr)
+ self._sharedFlowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+ def infoLog(self, msg) :
+ self.flowLog(msg, logState=LogState.INFO)
+
+ def warningLog(self, msg) :
+ self.flowLog(msg, logState=LogState.WARNING)
+
+ def errorLog(self, msg) :
+ self.flowLog(msg, logState=LogState.ERROR)
+
+
+
+class WorkflowTaskRunner(BaseTaskRunner) :
+ """
+ Manages a sub-workflow task
+ """
+
+ def __init__(self, runStatus, taskStr, workflow, sharedFlowLog, setRunstate) :
+ BaseTaskRunner.__init__(self, runStatus, taskStr, sharedFlowLog, setRunstate)
+ self.workflow = workflow
+
+ def _run(self) :
+ namespace = self.workflow._getNamespace()
+ nsLabel = namespaceLabel(namespace)
+ self.infoLog("Starting task specification for %s" % (nsLabel))
+ self.workflow._setRunning(True)
+ self.workflow.workflow()
+ self.workflow._setRunning(False)
+ self.runStatus.isSpecificationComplete.set()
+ self.infoLog("Finished task specification for %s, waiting for task completion" % (nsLabel))
+ retval = self.workflow._waitForTasksCore(namespace, isVerbose=False)
+ retmsg = ""
+ return (retval, retmsg)
+
+
+class CommandTaskRunner(BaseTaskRunner) :
+ """
+ Parent to local and SGE TaskRunner specializations for command tasks
+ """
+
+ taskWrapper = os.path.join(moduleDir, "pyflowTaskWrapper.py")
+
+ def __init__(self, runStatus, runid, taskStr, cmd, nCores, memMb, retry, isDryRun,
+ outFile, errFile, tmpDir, schedulerArgList,
+ sharedFlowLog, setRunstate) :
+ """
+ @param outFile: stdout file
+ @param errFile: stderr file
+ @param tmpDir: location to write files containing output from
+ the task wrapper script (and not the wrapped task)
+ """
+ import pickle
+
+ BaseTaskRunner.__init__(self, runStatus, taskStr, sharedFlowLog, setRunstate)
+
+ self.cmd = cmd
+ self.nCores = nCores
+ self.memMb = memMb
+ self.retry = retry
+ self.isDryRun = isDryRun
+ self.outFile = outFile
+ self.errFile = errFile
+ self.tmpDir = tmpDir
+ self.schedulerArgList = schedulerArgList
+ if not os.path.isfile(self.taskWrapper) :
+ raise Exception("Can't find task wrapper script: %s" % self.taskWrapper)
+
+ ensureDir(self.tmpDir)
+ self.wrapFile = os.path.join(self.tmpDir, "pyflowTaskWrapper.signal.txt")
+
+ # setup all the data to be passed to the taskWrapper and put this in argFile:
+ taskInfo = { 'nCores' : nCores,
+ 'outFile' : outFile, 'errFile' : errFile,
+ 'cwd' : cmd.cwd, 'env' : cmd.env,
+ 'cmd' : cmd.cmd, 'isShellCmd' : (cmd.type == "str") }
+
+ argFile = os.path.join(self.tmpDir, "taskWrapperParameters.pickle")
+ pickle.dump(taskInfo, open(argFile, "w"))
+
+ self.wrapperCmd = [self.taskWrapper, runid, taskStr, argFile]
+
+
+
+ def _run(self) :
+ """
+ Outer loop of _run() handles task retry behavior:
+ """
+ startTime = time.time()
+ retries = 0
+ retInfo = Bunch(retval=1, taskExitMsg="", isAllowRetry=False)
+
+ while not self.stopped() :
+ if retries :
+ self.infoLog("Retrying task: '%s'. Total prior task failures: %i" % (self.taskStr, retries))
+
+ if self.isDryRun :
+ self.infoLog("Dryrunning task: '%s' task arg list: [%s]" % (self.taskStr, ",".join(['"%s"' % (s) for s in self.getFullCmd()])))
+ retInfo.retval = 0
+ else :
+ self.runOnce(retInfo)
+
+ if retInfo.retval == 0 : break
+ if retries >= self.retry.max : break
+ elapsed = (time.time() - startTime)
+ if (self.retry.window > 0) and \
+ (elapsed >= self.retry.window) : break
+ if self.stopped() : break
+ if not retInfo.isAllowRetry : break
+ retries += 1
+ self.warningLog("Task: '%s' failed but qualifies for retry. Total task failures (including this one): %i. Task command: '%s'" % (self.taskStr, retries, str(self.cmd)))
+ retInfo = Bunch(retval=1, taskExitMsg="", isAllowRetry=False)
+ time.sleep(self.retry.wait)
+
+ return (retInfo.retval, retInfo.taskExitMsg)
+
+
+ def getExitMsg(self) :
+ """
+ Attempt to extract exit message from a failed command task, do not complain in
+ case of any errors in task signal file for this case.
+ """
+ msgSize = None
+ wrapFp = open(self.wrapFile)
+ for line in wrapFp:
+ w = line.strip().split()
+ if (len(w) < 6) or (w[4] != "[wrapperSignal]") :
+ break
+ if w[5] == "taskStderrTail" :
+ if (len(w) == 7) : msgSize = int(w[6])
+ break
+
+ taskExitMsg = ""
+ if msgSize is not None :
+ i = 0
+ for line in wrapFp:
+ if i >= msgSize : break
+ taskExitMsg += line
+ i += 1
+ wrapFp.close()
+ return taskExitMsg
+
+
+ def getWrapFileResult(self) :
+ """
+ When the task is theoretically done, go and read the task wrapper to
+ see the actual task exit code. This is required because:
+
+ 1) On SGE or similar: We have no other way to get the exit code
+
+ 2) On all systems, we can distinguish between a conventional task error
+ and other problems, such as (a) linux OOM killer (b) exception in the
+ task wrapper itself (c) filesystem failures.
+ """
+
+ def checkWrapFileExit(result) :
+ """
+ return isError=True on error in file format only, missing or incomplete file
+ is not considered an error and the function should not return an error for this
+ case.
+ """
+
+ if not os.path.isfile(self.wrapFile) : return
+
+ for line in open(self.wrapFile) :
+ # an incomplete line indicates that the file is still being written:
+ if len(line) == 0 or line[-1] != '\n' : return
+
+ w = line.strip().split()
+
+ if len(w) < 6 :
+ result.isError = True
+ return
+ if (w[4] != "[wrapperSignal]") :
+ result.isError = True
+ return
+ if w[5] == "taskExitCode" :
+ if (len(w) == 7) :
+ result.taskExitCode = int(w[6])
+ return
+
+ retryCount = 8
+ retryDelaySec = 30
+
+ wrapResult = Bunch(taskExitCode=None, isError=False)
+
+ totalDelaySec = 0
+ for trialIndex in range(retryCount) :
+ # if the problem occurs at 0 seconds don't bother with a warning, but
+ # if we've gone through a full retry cycle, then the filesystem delay is
+ # getting unusual and should be a warning:
+ if trialIndex > 1 :
+ msg = "No complete signal file found after %i seconds, retrying after delay. Signal file path: '%s'" % (totalDelaySec,self.wrapFile)
+ self.flowLog(msg, logState=LogState.WARNING)
+
+ if trialIndex != 0 :
+ time.sleep(retryDelaySec)
+ totalDelaySec += retryDelaySec
+
+ checkWrapFileExit(wrapResult)
+ if wrapResult.isError : break
+ if wrapResult.taskExitCode is not None : break
+
+ return wrapResult
+
+
+ def getWrapperErrorMsg(self) :
+ if os.path.isfile(self.wrapFile) :
+ stderrList = open(self.wrapFile).readlines()
+ taskExitMsg = ["Anomalous task wrapper stderr output. Wrapper signal file: '%s'" % (self.wrapFile),
+ "Logging %i line(s) of task wrapper log output below:" % (len(stderrList))]
+ linePrefix = "[taskWrapper-stderr]"
+ taskExitMsg.extend([linePrefix + " " + line for line in stderrList])
+ else :
+ taskExitMsg = ["Anomalous task wrapper condition: Wrapper signal file is missing: '%s'" % (self.wrapFile)]
+
+ return taskExitMsg
+
+
+
+class LocalTaskRunner(CommandTaskRunner) :
+
+ def getFullCmd(self) :
+ return [sys.executable] + self.wrapperCmd
+
+ def runOnce(self, retInfo) :
+ # sys.stderr.write("starting subprocess call. task '%s' cmd '%s'" % (self.taskStr,self.cmd))
+ # sys.stderr.write("full cmd: "+" ".join(self.getFullCmd()) + "\n")
+ wrapFp = open(self.wrapFile, "w")
+ proc = subprocess.Popen(self.getFullCmd(), stdout=wrapFp, stderr=subprocess.STDOUT, shell=False, bufsize=1)
+ self.infoLog("Task initiated on local node")
+ retInfo.retval = proc.wait()
+ wrapFp.close()
+
+ wrapResult = self.getWrapFileResult()
+
+ if (wrapResult.taskExitCode is None) or (wrapResult.taskExitCode != retInfo.retval):
+ retInfo.taskExitMsg = self.getWrapperErrorMsg()
+ retInfo.retval = 1
+ return retInfo
+ elif retInfo.retval != 0 :
+ retInfo.taskExitMsg = self.getExitMsg()
+
+ retInfo.isAllowRetry = True
+
+ # success! (taskWrapper, but maybe not for the task...)
+ return retInfo
+
+
+
+class QCaller(threading.Thread) :
+ """
+ Calls to both qsub and qstat go through this run() method so that we
+ can time them out:
+ """
+
+ def __init__(self, cmd, infoLog) :
+ threading.Thread.__init__(self)
+ self.setDaemon(True)
+ self.setName("QCaller-Timeout-Thread")
+ self.lock = threading.RLock()
+ self.cmd = cmd
+ self.infoLog = infoLog
+ self.results = Bunch(isComplete=False, retval=1, outList=[])
+ self.proc = None
+ self.is_kill_attempt = False
+
+ def run(self) :
+ # Note: Moved Popen() call outside of the mutex and
+ # stopped using proc.communicate() here after
+ # observing python interpreter bug:
+ # http://bugs.python.org/issue13817
+ #
+ # The interpreter deadlock for this issue has been
+ # observed to block the Popen() call below when using
+ # python 2.7.2:
+ #
+ # Oct 2014 - also wrapped this call with a semaphore because
+ # of the high memory usage associated with each qsub/qstat
+ # subprocess. This was causing pyflow jobs to become unstable
+ # as they would spontaneously exceed the maximum allowed master
+ # process memory.
+ #
+ GlobalSync.subprocessControl.acquire()
+ try :
+ tmp_proc = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False)
+ self.lock.acquire()
+ try:
+ self.proc = tmp_proc
+ # handle the case where Popen was taking its good sweet time and a killProc() was sent in the meantime:
+ if self.is_kill_attempt: self.killProc()
+ finally:
+ self.lock.release()
+
+ if self.is_kill_attempt: return
+
+ for line in self.proc.stdout :
+ self.results.outList.append(line)
+ self.results.retval = self.proc.wait()
+ finally:
+ GlobalSync.subprocessControl.release()
+ self.results.isComplete = True
+
+ @lockMethod
+ def killProc(self) :
+ import signal
+
+ self.is_kill_attempt = True
+
+ if self.proc is None : return
+
+ try:
+ os.kill(self.proc.pid , signal.SIGTERM)
+ self.infoLog("Sent SIGTERM to sge command process id: %i" % (self.proc.pid))
+ except OSError :
+ # process ended before we could kill it (hopefully rare, but possible race condition artifact)
+ pass
+
+
+
+class SGETaskRunner(CommandTaskRunner) :
+
+ def getFullCmd(self):
+ # qsub options:
+ #
+ qsubCmd = ["qsub",
+ "-V", # import environment variables from shell
+ "-cwd", # use current working directory
+ "-S", sys.executable, # The taskwrapper script is python
+ "-o", self.wrapFile,
+ "-e", self.wrapFile]
+
+ qsubCmd.extend(self.schedulerArgList)
+ qsubCmd.extend(siteConfig.qsubResourceArg(self.nCores, self.memMb))
+ qsubCmd.extend(self.wrapperCmd)
+
+ return tuple(qsubCmd)
+
+
+ def setInitialRunstate(self) :
+ self.setRunstate("queued")
+
+
+ @lockMethod
+ def setNewJobId(self, jobId) :
+ """
+ if stopped here, this is the case where a ctrl-c was entered while the qsub
+ command was being submitted, so we must kill the job here:
+ """
+ self.jobId = jobId
+ if self.stopped(): self._killJob()
+
+
+ def runOnce(self, retInfo) :
+
+ def qcallWithTimeouts(cmd, maxQcallAttempt=1) :
+ maxQcallWait = 180
+ qcall = None
+ for i in range(maxQcallAttempt) :
+ qcall = QCaller(cmd,self.infoLog)
+ qcall.start()
+ qcall.join(maxQcallWait)
+ if not qcall.isAlive() : break
+ self.infoLog("Trial %i of sge command has timed out. Killing process for cmd '%s'" % ((i + 1), cmd))
+ qcall.killProc()
+ self.infoLog("Finished attempting to kill sge command")
+
+ return qcall.results
+
+ # 1) call qsub, check for errors and retrieve taskId:
+ #
+ if os.path.isfile(self.wrapFile): os.remove(self.wrapFile)
+
+ # write extra info, just in case we need it for post-mortem debug:
+ qsubFile = os.path.join(os.path.dirname(self.wrapFile), "qsub.args.txt")
+ if os.path.isfile(qsubFile): os.remove(qsubFile)
+ qsubfp = open(qsubFile, "w")
+ for arg in self.getFullCmd() :
+ qsubfp.write(arg + "\n")
+ qsubfp.close()
+
+ results = qcallWithTimeouts(self.getFullCmd())
+
+ isQsubError = False
+ self.jobId = None
+ if len(results.outList) != 1 :
+ isQsubError = True
+ else :
+ w = results.outList[0].split()
+ if (len(w) > 3) and (w[0] == "Your") and (w[1] == "job") :
+ self.setNewJobId(int(w[2]))
+ else :
+ isQsubError = True
+
+ if not results.isComplete :
+ self._killJob() # just in case...
+ retInfo.taskExitMsg = ["Job submission failure -- qsub command timed-out"]
+ return retInfo
+
+ if isQsubError or (self.jobId is None):
+ retInfo.taskExitMsg = ["Unexpected qsub output. Logging %i line(s) of qsub output below:" % (len(results.outList)) ]
+ retInfo.taskExitMsg.extend([ "[qsub-out] " + line for line in results.outList ])
+ return retInfo
+
+ if results.retval != 0 :
+ retInfo.retval = results.retval
+ retInfo.taskExitMsg = ["Job submission failure -- qsub returned exit code: %i" % (retInfo.retval)]
+ return retInfo
+
+ # No qsub errors detected and an sge job_number is acquired -- success!
+ self.infoLog("Task submitted to sge queue with job_number: %i" % (self.jobId))
+
+
+ # 2) poll jobId until sge indicates it's not running or queued:
+ #
+ queueStatus = Bunch(isQueued=True, runStartTimeStamp=None)
+
+ def checkWrapFileRunStart(result) :
+ """
+ check wrapper file for a line indicating that it has transitioned from queued to
+ running state. Allow for NFS delay or incomplete file
+ """
+ if not os.path.isfile(self.wrapFile) : return
+ for line in open(self.wrapFile) :
+ w = line.strip().split()
+ if (len(w) < 6) or (w[4] != "[wrapperSignal]") :
+ # this could be incomplete flush to the signal file, so
+ # don't treat it as error:
+ return
+ if w[5] == "taskStart" :
+ result.runStartTimeStamp = timeStrToTimeStamp(w[0].strip('[]'))
+ result.isQueued = False
+ return
+
+
+ # exponential polling times -- make small jobs responsive but give sge a break on long runs...
+ ewaiter = ExpWaiter(5, 1.7, 60)
+
+ pollCmd = ("/bin/bash", "--noprofile", "-o", "pipefail", "-c", "qstat -j %i | awk '/^error reason/'" % (self.jobId))
+ while not self.stopped():
+ results = qcallWithTimeouts(pollCmd, 6)
+ isQstatError = False
+ if results.retval != 0:
+ if ((len(results.outList) == 2) and
+ (results.outList[0].strip() == "Following jobs do not exist:") and
+ (int(results.outList[1]) == self.jobId)) :
+ break
+ else :
+ isQstatError = True
+ else :
+ if (len(results.outList) != 0) :
+ isQstatError = True
+
+ if isQstatError :
+ if not results.isComplete :
+ retInfo.taskExitMsg = ["The qstat command for sge job_number %i has timed out for all attempted retries" % (self.jobId)]
+ self._killJob()
+ else :
+ retInfo.taskExitMsg = ["Unexpected qstat output or task has entered sge error state. Sge job_number: %i" % (self.jobId)]
+ retInfo.taskExitMsg.extend(["Logging %i line(s) of qstat output below:" % (len(results.outList)) ])
+ retInfo.taskExitMsg.extend([ "[qstat-out] " + line for line in results.outList ])
+ # self._killJob() # leave the job there so the user can better diagnose whetever unexpected pattern has occurred
+ return retInfo
+
+ # also check to see if job has transitioned from queued to running state:
+ if queueStatus.isQueued :
+ checkWrapFileRunStart(queueStatus)
+ if not queueStatus.isQueued :
+ self.setRunstate("running", queueStatus.runStartTimeStamp)
+
+ ewaiter.wait()
+
+ if self.stopped() :
+ # self._killJob() # no need, job should already have been killed at the stop() call...
+ return retInfo
+
+ lastJobId = self.jobId
+
+ # if we've correctly communicated with SGE, then its roll is done here
+ # if a job kill is required for any of the error states above, it needs to be
+ # added before this point:
+ self.jobId = None
+
+ wrapResult = self.getWrapFileResult()
+
+ if wrapResult.taskExitCode is None :
+ retInfo.taskExitMsg = ["Sge job_number: '%s'" % (lastJobId)]
+ retInfo.taskExitMsg.extend(self.getWrapperErrorMsg())
+ retInfo.retval = 1
+ return retInfo
+ elif wrapResult.taskExitCode != 0 :
+ retInfo.taskExitMsg = self.getExitMsg()
+
+ retInfo.retval = wrapResult.taskExitCode
+ retInfo.isAllowRetry = True
+
+ # success! (for sge & taskWrapper, but maybe not for the task...)
+ return retInfo
+
+
+ @lockMethod
+ def _killJob(self) :
+ """
+ (possibly) asynchronous job kill
+ """
+ try : isKilled = self.isKilled
+ except AttributeError: isKilled = False
+ if isKilled: return
+
+ try : jobId = self.jobId
+ except AttributeError: jobId = None
+ if jobId is None: return
+ killCmd = ["qdel", "%i" % (int(jobId))]
+ # don't wait for or check exit code of kill cmd... just give it one try
+ # because we want cleanup to go as quickly as possible
+ subprocess.Popen(killCmd, shell=False)
+ self.isKilled = True
+
+
+ @lockMethod
+ def stop(self) :
+ """
+ overload thead stop function to provide a
+ qdel any running tasks.
+ """
+ CommandTaskRunner.stop(self)
+ self._killJob()
+
+
+
+class TaskManager(StoppableThread) :
+ """
+ This class runs on a separate thread from workflowRunner,
+ launching jobs based on the current state of the TaskDAG
+ """
+
+ def __init__(self, cdata, tdag) :
+ """
+ @param cdata: data from WorkflowRunner instance which will be
+ constant during the lifetime of the TaskManager,
+ should be safe to lookup w/o locking
+ @param tdag: task graph
+ """
+ StoppableThread.__init__(self)
+ # parameter copy:
+ self._cdata = cdata
+ self.tdag = tdag
+ # thread settings:
+ self.setDaemon(True)
+ self.setName("TaskManager-Thread")
+ # lock is used for function (harvest), which is checked by
+ # the WorkflowRunner under (literally) exceptional circumstances only
+ self.lock = threading.RLock()
+ # rm configuration:
+ self.freeCores = self._cdata.param.nCores
+ self.freeMemMb = self._cdata.param.memMb
+ self.runningTasks = {}
+
+ # This is used to track 'pyflow mutexes' -- for each key only a single
+ # task can run at once. Key is set to True if mutex is occupied.
+ self.taskMutexState = {}
+
+
+
+ def run(self) :
+ """
+ TaskManager runs so long as there are outstanding jobs
+ """
+
+ try:
+ cleanEnv()
+ while not self._isTerm() :
+ # update status of running jobs
+ self.tdag.isFinishedEvent.clear()
+ self.harvestTasks()
+ # try to launch jobs:
+ if self.stopped() : continue
+ self._startTasks()
+ self.tdag.isFinishedEvent.wait(5)
+ except:
+ msg = getExceptionMsg()
+ self._flowLog(msg,logState=LogState.ERROR)
+ self._cdata.emailNotification(msg, self._flowLog)
+ self._cdata.setTaskManagerException()
+
+
+ def _getCommandTaskRunner(self, task) :
+ """
+ assist launch of a command-task
+ """
+
+ # shortcuts:
+ payload = task.payload
+ param = self._cdata.param
+
+ if payload.cmd.cmd is None :
+ # Note these should have been marked off by the TaskManager already:
+ raise Exception("Attempting to launch checkpoint task: %s" % (task.fullLabel()))
+
+ isForcedLocal = ((param.mode != "local") and (payload.isForceLocal))
+
+ # mark task resources as occupied:
+ if not isForcedLocal :
+ if self.freeCores != "unlimited" :
+ if (self.freeCores < payload.nCores) :
+ raise Exception("Not enough free cores to launch task")
+ self.freeCores -= payload.nCores
+
+ if self.freeMemMb != "unlimited" :
+ if (self.freeMemMb < payload.memMb) :
+ raise Exception("Not enough free memory to launch task")
+ self.freeMemMb -= payload.memMb
+
+ if payload.mutex is not None :
+ self.taskMutexState[payload.mutex] = True
+
+ TaskRunner = None
+ if param.mode == "local" or payload.isForceLocal or payload.isCmdMakePath :
+ TaskRunner = LocalTaskRunner
+ elif param.mode == "sge" :
+ TaskRunner = SGETaskRunner
+ else :
+ raise Exception("Can't support mode: '%s'" % (param.mode))
+
+ #
+ # TODO: find less hacky way to handle make tasks:
+ #
+ taskRetry = payload.retry
+
+ if payload.isCmdMakePath :
+ taskRetry = copy.deepcopy(payload.retry)
+ taskRetry.window = 0
+
+ if param.mode == "local" or payload.isForceLocal :
+ launchCmdList = ["make", "-j", str(payload.nCores)]
+ elif param.mode == "sge" :
+ launchCmdList = siteConfig.getSgeMakePrefix(payload.nCores, payload.memMb, param.schedulerArgList)
+ else :
+ raise Exception("Can't support mode: '%s'" % (param.mode))
+
+ launchCmdList.extend(["-C", payload.cmd.cmd])
+ payload.launchCmd = Command(launchCmdList, payload.cmd.cwd, payload.cmd.env)
+
+ #
+ # each commandTaskRunner requires a unique tmp dir to write
+ # wrapper signals to. TaskRunner will create this directory -- it does not bother to destroy it right now:
+ #
+
+ # split the task id into two parts to keep from adding too many files to one directory:
+ tmpDirId1 = "%03i" % ((int(task.id) / 1000))
+ tmpDirId2 = "%03i" % ((int(task.id) % 1000))
+ taskRunnerTmpDir = os.path.join(self._cdata.wrapperLogDir, tmpDirId1, tmpDirId2)
+
+ return TaskRunner(task.runStatus, self._cdata.getRunid(),
+ task.fullLabel(), payload.launchCmd,
+ payload.nCores, payload.memMb,
+ taskRetry, param.isDryRun,
+ self._cdata.taskStdoutFile,
+ self._cdata.taskStderrFile,
+ taskRunnerTmpDir,
+ param.schedulerArgList,
+ self._cdata.flowLog,
+ task.setRunstate)
+
+
+ def _getWorkflowTaskRunner(self, task) :
+ """
+ assist launch of a workflow-task
+ """
+ return WorkflowTaskRunner(task.runStatus, task.fullLabel(), task.payload.workflow,
+ self._cdata.flowLog, task.setRunstate)
+
+
+ def _launchTask(self, task) :
+ """
+ launch a specific task
+ """
+
+ if task.payload.type() == "command" :
+ trun = self._getCommandTaskRunner(task)
+ elif task.payload.type() == "workflow" :
+ trun = self._getWorkflowTaskRunner(task)
+ else :
+ assert 0
+
+ self._infoLog("Launching %s: '%s' from %s" % (task.payload.desc(), task.fullLabel(), namespaceLabel(task.namespace)))
+ trun.start()
+ self.runningTasks[task] = trun
+
+
+ @lockMethod
+ def _startTasks(self) :
+ """
+ determine what tasks, if any, can be started
+
+ Note that the lock is here to protect self.runningTasks
+ """
+ # trace through DAG, completing any empty-command checkpoints
+ # found with all dependencies completed:
+ (ready, completed) = self.tdag.getReadyTasks()
+ for node in completed:
+ if self.stopped() : return
+ self._infoLog("Completed %s: '%s' launched from %s" % (node.payload.desc(), node.fullLabel(), namespaceLabel(node.namespace)))
+
+ # launch all workflows first, then command tasks as resources
+ # allow:
+ ready_workflows = [r for r in ready if r.payload.type() == "workflow"]
+ for task in ready_workflows :
+ if self.stopped() : return
+ self._launchTask(task)
+
+ # task submission could be shutdown, eg. in response to a task
+ # error:
+ if (not self._cdata.isTaskSubmissionActive()) : return
+
+ isNonLocal = (self._cdata.param.mode != "local")
+
+ # start command task launch:
+ ready_commands = [r for r in ready if r.payload.type() == "command"]
+ ready_commands.sort(key=lambda t: (t.payload.priority, t.payload.nCores), reverse=True)
+ for task in ready_commands :
+ if self.stopped() : return
+
+ # In a non-local run mode, "isForceLocal" tasks are not subject to
+ # global core and memory restrictions:
+ isForcedLocal = (isNonLocal and task.payload.isForceLocal)
+ if not isForcedLocal :
+ if ((self.freeCores != "unlimited") and (task.payload.nCores > self.freeCores)) : continue
+ if ((self.freeMemMb != "unlimited") and (task.payload.memMb > self.freeMemMb)) : continue
+
+ # all command tasks must obey separate mutex restrictions:
+ if ((task.payload.mutex is not None) and
+ (task.payload.mutex in self.taskMutexState) and
+ (self.taskMutexState[task.payload.mutex])) : continue
+
+ self._launchTask(task)
+
+
+
+ @lockMethod
+ def harvestTasks(self) :
+ """
+ Check the set of running tasks to see if they've completed and update
+ Node status accordingly:
+ """
+ notrunning = set()
+ for task in self.runningTasks.keys() :
+ if self.stopped() : break
+ trun = self.runningTasks[task]
+ if not task.runStatus.isComplete.isSet() :
+ if trun.isAlive() : continue
+ # if not complete and thread is dead then we don't know what happened, very bad!:
+ task.errorstate = 1
+ task.errorMessage = "Thread: '%s', has stopped without a traceable cause" % (trun.getName())
+ else :
+ task.errorstate = task.runStatus.errorCode
+ task.errorMessage = task.runStatus.errorMessage
+
+ if task.errorstate == 0 :
+ task.setRunstate("complete")
+ else:
+ task.setRunstate("error")
+
+ notrunning.add(task)
+
+ if not task.isError() :
+ self._infoLog("Completed %s: '%s' launched from %s" % (task.payload.desc(), task.fullLabel(), namespaceLabel(task.namespace)))
+ else:
+ msg = task.getTaskErrorMsg()
+
+ if self._cdata.isTaskSubmissionActive() :
+ # if this is the first error in the workflow, then
+ # we elaborate a bit on the workflow's response to
+ # the error. We also send any email-notifications
+ # for the first error only:
+ msg.extend(["Shutting down task submission. Waiting for remaining tasks to complete."])
+
+ self._errorLog(msg)
+ if self._cdata.isTaskSubmissionActive() :
+ self._cdata.emailNotification(msg, self._flowLog)
+
+ # Be sure to send notifications *before* setting error
+ # bits, because the WorkflowRunner may decide to
+ # immediately shutdown all tasks and pyflow threads on
+ # the first error:
+ self._cdata.setTaskError(task)
+
+ # shortcut:
+ param = self._cdata.param
+
+ # recover task resources:
+ for task in notrunning :
+ if task.payload.type() == "command" :
+ isForcedLocal = ((param.mode != "local") and (task.payload.isForceLocal))
+ if not isForcedLocal :
+ if self.freeCores != "unlimited" :
+ self.freeCores += task.payload.nCores
+ if self.freeMemMb != "unlimited" :
+ self.freeMemMb += task.payload.memMb
+
+ if task.payload.mutex is not None :
+ self.taskMutexState[task.payload.mutex] = False
+
+ for task in notrunning:
+ del self.runningTasks[task]
+
+
+ @lockMethod
+ def stop(self) :
+ StoppableThread.stop(self)
+ for trun in self.runningTasks.values() :
+ trun.stop()
+
+
+ @lockMethod
+ def _areTasksDead(self) :
+ for trun in self.runningTasks.values() :
+ if trun.isAlive(): return False
+ return True
+
+
+ def _isTerm(self) :
+ # check for explicit thread stop request (presumably from the workflowManager):
+ # if this happens we exit the polling loop
+ #
+ if self.stopped() :
+ while True :
+ if self._areTasksDead() : return True
+ time.sleep(1)
+
+ # check for "regular" termination conditions:
+ if (not self._cdata.isTaskSubmissionActive()) :
+ return (len(self.runningTasks) == 0)
+ else :
+ if self.tdag.isRunComplete() :
+ if (len(self.runningTasks) != 0) :
+ raise Exception("Inconsistent TaskManager state: workflow appears complete but there are still running tasks")
+ return True
+ elif self.tdag.isRunExhausted() :
+ return True
+ else :
+ return False
+
+
+ def _flowLog(self, msg, logState) :
+ linePrefixOut = "[TaskManager]"
+ # if linePrefix is not None : linePrefixOut+=" "+linePrefix
+ self._cdata.flowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+
+ def _infoLog(self, msg) :
+ self._flowLog(msg, logState=LogState.INFO)
+
+ def _errorLog(self, msg) :
+ self._flowLog(msg, logState=LogState.ERROR)
+
+
+
+
+# payloads are used to manage the different
+# possible actions attributed to task nodes:
+#
+class CmdPayload(object) :
+ def __init__(self, fullLabel, cmd, nCores, memMb, priority,
+ isForceLocal, isCmdMakePath=False, isTaskStable=True,
+ mutex=None, retry=None) :
+ self.cmd = cmd
+ self.nCores = nCores
+ self.memMb = memMb
+ self.priority = priority
+ self.isForceLocal = isForceLocal
+ self.isCmdMakePath = isCmdMakePath
+ self.isTaskStable = isTaskStable
+ self.mutex = mutex
+ self.retry = retry
+
+ # launch command includes make/qmake wrapper for Make path commands:
+ self.launchCmd = cmd
+
+ if (cmd.cmd is None) and ((nCores != 0) or (memMb != 0)) :
+ raise Exception("Null tasks should not have resource requirements. task: '%s'" % (fullLabel))
+
+ def type(self) :
+ return "command"
+
+ def desc(self) :
+ return "command task"
+
+
+class WorkflowPayload(object) :
+ def __init__(self, workflow) :
+ self.workflow = workflow
+ self.isTaskStable = True
+
+ def type(self) :
+ return "workflow"
+
+ def name(self) :
+ if self.workflow is None :
+ return "None"
+ else :
+ return self.workflow._whoami()
+
+ def desc(self) :
+ return "sub-workflow task"
+
+
+
+class TaskNode(object) :
+ """
+ Represents an individual task in the task graph
+ """
+
+ def __init__(self, tdag, lock, init_id, namespace, label, payload, isContinued, isFinishedEvent) :
+ self.tdag = tdag
+ self.lock = lock
+ self.id = init_id
+ self.namespace = namespace
+ self.label = label
+ self.payload = payload
+ self.isContinued = isContinued
+
+ # if true, do not execute this task or honor it as a dependency for child tasks
+ self.isIgnoreThis = False
+
+ # if true, set the ignore state for all children of this task to true
+ self.isIgnoreChildren = False
+
+ # if true, this task and its dependents will be automatically marked as completed (until
+ # a startFromTasks node is found)
+ self.isAutoCompleted = False
+
+ # task is reset to waiting runstate in a continued run
+ self.isReset = False
+
+ self.parents = set()
+ self.children = set()
+ self.runstateUpdateTimeStamp = time.time()
+ if self.isContinued:
+ self.runstate = "complete"
+ else:
+ self.runstate = "waiting"
+ self.errorstate = 0
+
+ # errorMessage is used by sub-workflow tasks, but not by command taks:
+ self.errorMessage = ""
+
+ # This is a link to the live status object updated by TaskRunner:
+ self.runStatus = RunningTaskStatus(isFinishedEvent)
+
+ def __str__(self) :
+ msg = "TASK id: %s state: %s error: %i" % (self.fullLabel(), self.runstate, self.errorstate)
+ return msg
+
+ def fullLabel(self) :
+ return namespaceJoin(self.namespace, self.label)
+
+ @lockMethod
+ def isDone(self) :
+ "task has gone as far as it can"
+ return ((self.runstate == "error") or (self.runstate == "complete"))
+
+ @lockMethod
+ def isError(self) :
+ "true if an error occurred in this node"
+ return ((self.errorstate != 0) or (self.runstate == "error"))
+
+ @lockMethod
+ def isComplete(self) :
+ "task completed without error"
+ return ((self.errorstate == 0) and (self.runstate == "complete"))
+
+ @lockMethod
+ def isReady(self) :
+ "task is ready to be run"
+ retval = ((self.runstate == "waiting") and (self.errorstate == 0) and (not self.isIgnoreThis))
+ if retval :
+ for p in self.parents :
+ if p.isIgnoreThis : continue
+ if not p.isComplete() :
+ retval = False
+ break
+ return retval
+
+
+ def _isDeadWalker(self, searched) :
+ "recursive helper function for isDead()"
+
+ # the fact that you're still searching means that it must have returned False last time:
+ if self in searched : return False
+ searched.add(self)
+
+ if self.isError() : return True
+ if self.isComplete() : return False
+ for p in self.parents :
+ if p._isDeadWalker(searched) : return True
+ return False
+
+ @lockMethod
+ def isDead(self) :
+ """
+ If true, there's no longer a point to waiting for this task,
+ because it either has an error or there is an error in an
+ upstream dependency
+ """
+
+ # searched is used to restrict the complexity of this
+ # operation on large graphs:
+ searched = set()
+ return self._isDeadWalker(searched)
+
+ @lockMethod
+ def setRunstate(self, runstate, updateTimeStamp=None) :
+ """
+ updateTimeStamp is only supplied in the case where the state
+ transition time is interestingly different than the function
+ call time. This can happen with the state update comes from
+ a polling function with a long poll interval.
+ """
+ if runstate not in TaskNodeConstants.validRunstates :
+ raise Exception("Can't set TaskNode runstate to %s" % (runstate))
+
+ if updateTimeStamp is None :
+ self.runstateUpdateTimeStamp = time.time()
+ else :
+ self.runstateUpdateTimeStamp = updateTimeStamp
+ self.runstate = runstate
+ self.tdag.writeTaskStatus()
+
+ #def getParents(self) :
+ # return self.parents
+
+ #def getChildren(self) :
+ # return self.children
+
+ @lockMethod
+ def getTaskErrorMsg(self) :
+ """
+ generate consistent task error message from task state
+ """
+
+ if not self.isError() : return []
+
+ msg = "Failed to complete %s: '%s' launched from %s" % (self.payload.desc(), self.fullLabel(), namespaceLabel(self.namespace))
+ if self.payload.type() == "command" :
+ msg += ", error code: %s, command: '%s'" % (str(self.errorstate), str(self.payload.launchCmd))
+ elif self.payload.type() == "workflow" :
+ msg += ", failed sub-workflow classname: '%s'" % (self.payload.name())
+ else :
+ assert 0
+
+ msg = lister(msg)
+
+ if self.errorMessage != "" :
+ msg2 = ["Error Message:"]
+ msg2.extend(lister(self.errorMessage))
+ linePrefix = "[%s] " % (self.fullLabel())
+ for i in range(len(msg2)) :
+ msg2[i] = linePrefix + msg2[i]
+ msg.extend(msg2)
+
+ return msg
+
+
+
+class TaskDAG(object) :
+ """
+ Holds all tasks and their dependencies.
+
+ Also responsible for task state persistence/continue across
+ interrupted runs. Object is accessed by both the workflow and
+ taskrunner threads, so it needs to be thread-safe.
+ """
+
+ def __init__(self, isContinue, isForceContinue, isDryRun,
+ taskInfoFile, taskStateFile, workflowClassName,
+ startFromTasks, ignoreTasksAfter, resetTasks,
+ flowLog) :
+ """
+ No other object gets to access the taskStateFile, file locks
+ are not required (but thread locks are)
+ """
+ self.isContinue = isContinue
+ self.isForceContinue = isForceContinue
+ self.isDryRun = isDryRun
+ self.taskInfoFile = taskInfoFile
+ self.taskStateFile = taskStateFile
+ self.workflowClassName = workflowClassName
+ self.startFromTasks = startFromTasks
+ self.ignoreTasksAfter = ignoreTasksAfter
+ self.resetTasks = resetTasks
+ self.flowLog = flowLog
+
+ # unique id for each task in each run -- not persistent across continued runs:
+ self.taskId = 0
+
+ # it will be easier for people to read the task status file if
+ # the tasks are in approximately the same order as they were
+ # added by the workflow:
+ self.addOrder = []
+ self.labelMap = {}
+ self.headNodes = set()
+ self.tailNodes = set()
+ self.lock = threading.RLock()
+
+ # this event can be used to optionally accelerate the task cycle
+ # when running in modes where task can set this event on completion
+ # (ie. local mode but not sge), if this isn't set the normal polling
+ # cycle applies
+ self.isFinishedEvent = threading.Event()
+
+ @lockMethod
+ def isTaskPresent(self, namespace, label) :
+ return ((namespace, label) in self.labelMap)
+
+ @lockMethod
+ def getTask(self, namespace, label) :
+ if (namespace, label) in self.labelMap :
+ return self.labelMap[(namespace, label)]
+ return None
+
+ @lockMethod
+ def getHeadNodes(self) :
+ "all tasks with no parents"
+ return list(self.headNodes)
+
+ @lockMethod
+ def getTailNodes(self) :
+ "all tasks with no (runnable) children"
+ return list(self.tailNodes)
+
+ @lockMethod
+ def getAllNodes(self, namespace="") :
+ "get all nodes in this namespace"
+ retval = []
+ for (taskNamespace, taskLabel) in self.addOrder :
+ if namespace != taskNamespace : continue
+ node=self.labelMap[(taskNamespace, taskLabel)]
+ if node.isIgnoreThis : continue
+ retval.append(node)
+ return retval
+
+ def _isRunExhaustedNode(self, node, searched) :
+
+ # the fact that you're still searching means that it must have returned true last time:
+ if node in searched : return True
+ searched.add(node)
+
+ if not node.isIgnoreThis :
+ if not node.isDone() :
+ return False
+ if node.isComplete() :
+ for c in node.children :
+ if not self._isRunExhaustedNode(c, searched) :
+ return False
+ return True
+
+ @lockMethod
+ def isRunExhausted(self) :
+ """
+ Returns true if the run is as complete as possible due to errors
+ """
+
+ # searched is used to restrict the complexity of this
+ # operation on large graphs:
+ searched = set()
+ for node in self.getHeadNodes() :
+ if not self._isRunExhaustedNode(node,searched) :
+ return False
+ return True
+
+
+ @lockMethod
+ def isRunComplete(self) :
+ "returns true if run is complete and error free"
+ for node in self.labelMap.values():
+ if node.isIgnoreThis : continue
+ if not node.isComplete() :
+ return False
+ return True
+
+
+ def _getReadyTasksFromNode(self, node, ready, searched) :
+ "helper function for getReadyTasks"
+
+ if node.isIgnoreThis : return
+
+ if node in searched : return
+ searched.add(node)
+
+ if node.isReady() :
+ ready.add(node)
+ else:
+ if not node.isComplete() :
+ for c in node.parents :
+ self._getReadyTasksFromNode(c, ready, searched)
+
+
+ @lockMethod
+ def getReadyTasks(self) :
+ """
+ Go through DAG from the tail nodes and find all tasks which
+ have all prerequisites completed:
+ """
+
+ completed = self.markCheckPointsComplete()
+ ready = set()
+ # searched is used to restrict the complexity of this
+ # operation on large graphs:
+ searched = set()
+ for node in self.getTailNodes() :
+ self._getReadyTasksFromNode(node, ready, searched)
+ return (list(ready), list(completed))
+
+
+ def _markCheckPointsCompleteFromNode(self, node, completed, searched) :
+ "helper function for markCheckPointsComplete"
+
+ if node.isIgnoreThis : return
+
+ if node in searched : return
+ searched.add(node)
+
+ if node.isComplete() : return
+
+ for c in node.parents :
+ self._markCheckPointsCompleteFromNode(c, completed, searched)
+
+ if (node.payload.type() == "command") and (node.payload.cmd.cmd is None) and (node.isReady()) :
+ node.setRunstate("complete")
+ completed.add(node)
+
+
+ @lockMethod
+ def markCheckPointsComplete(self) :
+ """
+ traverse from tail nodes up, marking any checkpoint tasks
+ (task.cmd=None) jobs that are ready as complete, return set
+ of newly completed tasks:
+ """
+ completed = set()
+ # searched is used to restrict the complexity of this
+ # operation on large graphs:
+ searched = set()
+ for node in self.getTailNodes() :
+ self._markCheckPointsCompleteFromNode(node, completed, searched)
+ return completed
+
+
+ @lockMethod
+ def addTask(self, namespace, label, payload, dependencies, isContinued=False) :
+ """
+ add new task to the DAG
+
+ isContinued indicates the task is being read from state history during a continuation run
+ """
+ # internal data structures use these separately, but for logging we
+ # create one string:
+ fullLabel = namespaceJoin(namespace, label)
+
+ # first check to see if task exists in DAG already, this is not allowed unless
+ # we are continuing a previous run, in which case it's allowed once:
+ if not isContinued and self.isTaskPresent(namespace, label):
+ if self.isContinue and self.labelMap[(namespace, label)].isContinued:
+ # confirm that task is a match, flip off the isContinued flag and return:
+ task = self.labelMap[(namespace, label)]
+ parentLabels = set([p.label for p in task.parents])
+ excPrefix = "Task: '%s' does not match previous definition defined in '%s'." % (fullLabel, self.taskInfoFile)
+ if task.payload.type() != payload.type() :
+ msg = excPrefix + " New/old payload type: '%s'/'%s'" % (payload.type(), task.payload.type())
+ raise Exception(msg)
+ if payload.isTaskStable :
+ if (payload.type() == "command") and (str(task.payload.cmd) != str(payload.cmd)) :
+ msg = excPrefix + " New/old command: '%s'/'%s'" % (str(payload.cmd), str(task.payload.cmd))
+ if self.isForceContinue : self.flowLog(msg,logState=LogState.WARNING)
+ else : raise Exception(msg)
+ if (parentLabels != set(dependencies)) :
+ msg = excPrefix + " New/old dependencies: '%s'/'%s'" % (",".join(dependencies), ",".join(parentLabels))
+ if self.isForceContinue : self.flowLog(msg,logState=LogState.WARNING)
+ else : raise Exception(msg)
+ if payload.type() == "command" :
+ task.payload.cmd = payload.cmd
+ task.payload.isCmdMakePath = payload.isCmdMakePath
+ task.isContinued = False
+ return
+ else:
+ raise Exception("Task: '%s' is already in TaskDAG" % (fullLabel))
+
+ task = TaskNode(self, self.lock, self.taskId, namespace, label, payload, isContinued, self.isFinishedEvent)
+
+ self.taskId += 1
+
+ self.addOrder.append((namespace, label))
+ self.labelMap[(namespace, label)] = task
+
+ for d in dependencies :
+ parent = self.getTask(namespace, d)
+ if parent is task :
+ raise Exception("Task: '%s' cannot specify its own task label as a dependency" % (fullLabel))
+ if parent is None :
+ raise Exception("Dependency: '%s' for task: '%s' does not exist in TaskDAG" % (namespaceJoin(namespace, d), fullLabel))
+ task.parents.add(parent)
+ parent.children.add(task)
+
+
+ if isContinued :
+ isReset=False
+ if label in self.resetTasks :
+ isReset=True
+ else :
+ for p in task.parents :
+ if p.isReset :
+ isReset = True
+ break
+ if isReset :
+ task.setRunstate("waiting")
+ task.isReset=True
+
+ if not isContinued:
+ self.writeTaskInfo(task)
+ self.writeTaskStatus()
+
+ # determine if this is an ignoreTasksAfter node
+ if label in self.ignoreTasksAfter :
+ task.isIgnoreChildren = True
+
+ # determine if this is an ignoreTasksAfter descendent
+ for p in task.parents :
+ if p.isIgnoreChildren :
+ task.isIgnoreThis = True
+ task.isIgnoreChildren = True
+ break
+
+ # update headNodes
+ if len(task.parents) == 0 :
+ self.headNodes.add(task)
+
+ # update isAutoCompleted:
+ if (self.startFromTasks and
+ (label not in self.startFromTasks)) :
+ task.isAutoCompleted = True
+ for p in task.parents :
+ if not p.isAutoCompleted :
+ task.isAutoCompleted = False
+ break
+
+ # in case of no-parents, also check sub-workflow node
+ if task.isAutoCompleted and (len(task.parents) == 0) and (namespace != ""):
+ wval=namespace.rsplit(namespaceSep,1)
+ if len(wval) == 2 :
+ (workflowNamespace,workflowLabel)=wval
+ else :
+ workflowNamespace=""
+ workflowLabel=wval[0]
+ workflowParent = self.labelMap[(workflowNamespace, workflowLabel)]
+ if not workflowParent.isAutoCompleted :
+ task.isAutoCompleted = False
+
+ if task.isAutoCompleted :
+ task.setRunstate("complete")
+
+ # update tailNodes:
+ if not task.isIgnoreThis :
+ self.tailNodes.add(task)
+ for p in task.parents :
+ if p in self.tailNodes :
+ self.tailNodes.remove(p)
+
+ # check dependency runState consistency:
+ if task.isDone() :
+ for p in task.parents :
+ if p.isIgnoreThis : continue
+ if p.isComplete() : continue
+ raise Exception("Task: '%s' has invalid continuation state. Task dependencies are incomplete")
+
+
+
+ @lockMethod
+ def writeTaskStatus(self) :
+ """
+ (atomic on *nix) update of the runstate and errorstate for all tasks
+ """
+ # don't write task status during dry runs:
+ if self.isDryRun : return
+
+ tmpFile = self.taskStateFile + ".update.incomplete"
+ tmpFp = open(tmpFile, "w")
+ tmpFp.write(taskStateHeader())
+ for (namespace, label) in self.addOrder :
+ node = self.labelMap[(namespace, label)]
+ runstateUpdateTimeStr = timeStampToTimeStr(node.runstateUpdateTimeStamp)
+ tmpFp.write("%s\t%s\t%s\t%i\t%s\n" % (label, namespace, node.runstate, node.errorstate, runstateUpdateTimeStr))
+ tmpFp.close()
+
+ forceRename(tmpFile, self.taskStateFile)
+
+
+ @lockMethod
+ def getTaskStatus(self) :
+ """
+ Enumerate status of command tasks (but look at sub-workflows to determine if specification is complete)
+ """
+
+ val = Bunch(waiting=0, queued=0, running=0, complete=0, error=0, isAllSpecComplete=True,
+ longestQueueSec=0, longestRunSec=0, longestQueueName="", longestRunName="")
+
+ currentSec = time.time()
+ for (namespace, label) in self.addOrder :
+ node = self.labelMap[(namespace, label)]
+ # special check just for workflow tasks:
+ if node.payload.type() == "workflow" :
+ if not node.runStatus.isSpecificationComplete.isSet() :
+ val.isAllSpecComplete = False
+
+ # the rest of this enumeration is for command tasks only:
+ continue
+
+ taskTime = int(currentSec - node.runstateUpdateTimeStamp)
+
+ if node.runstate == "waiting" :
+ val.waiting += 1
+ elif node.runstate == "queued" :
+ val.queued += 1
+ if val.longestQueueSec < taskTime :
+ val.longestQueueSec = taskTime
+ val.longestQueueName = node.fullLabel()
+ elif node.runstate == "running" :
+ val.running += 1
+ if val.longestRunSec < taskTime :
+ val.longestRunSec = taskTime
+ val.longestRunName = node.fullLabel()
+ elif node.runstate == "complete" :
+ val.complete += 1
+ elif node.runstate == "error" :
+ val.error += 1
+
+ return val
+
+
+ @lockMethod
+ def writeTaskInfo(self, task) :
+ """
+ appends a description of new tasks to the taskInfo file
+ """
+ depstring = ""
+ if len(task.parents) :
+ depstring = ",".join([p.label for p in task.parents])
+
+ cmdstring = ""
+ nCores = "0"
+ memMb = "0"
+ priority = "0"
+ isForceLocal = "0"
+ payload = task.payload
+ cwdstring = ""
+ if payload.type() == "command" :
+ cmdstring = str(payload.cmd)
+ nCores = str(payload.nCores)
+ memMb = str(payload.memMb)
+ priority = str(payload.priority)
+ isForceLocal = boolToStr(payload.isForceLocal)
+ cwdstring = payload.cmd.cwd
+ elif payload.type() == "workflow" :
+ cmdstring = payload.name()
+ else :
+ assert 0
+ taskline = "\t".join((task.label, task.namespace, payload.type(),
+ nCores, memMb, priority,
+ isForceLocal, depstring, cwdstring, cmdstring))
+ fp = open(self.taskInfoFile, "a")
+ fp.write(taskline + "\n")
+ fp.close()
+
+
+
+# workflowRunner:
+#
+
+
+# special exception used for the case where pyflow data dir is already in use:
+#
+class DataDirException(Exception) :
+ def __init__(self, msg) :
+ Exception.__init__(self)
+ self.msg = msg
+
+
+
+class WorkflowRunnerThreadSharedData(object) :
+ """
+ All data used by the WorkflowRunner which will be constant over
+ the lifetime of a TaskManager instance. All of the information in
+ this class will be accessed by both threads without locking.
+ """
+
+ def __init__(self) :
+ self.lock = threading.RLock()
+ self.pid = os.getpid()
+ self.runcount = 0
+ self.cwd = os.path.abspath(os.getcwd())
+
+ self.markFile = None
+
+ # we potentially have to log before the logfile is setup (eg
+ # an exception is thrown reading run parameters), so provide
+ # an explicit notification that there's no log file:
+ self.flowLogFp = None
+
+ self.warningLogFp = None
+ self.errorLogFp = None
+
+ self.resetRun()
+
+ # two elements required to implement a nohup-like behavior:
+ self.isHangUp = threading.Event()
+ self._isStderrAlive = True
+
+
+ @staticmethod
+ def _validateFixParam(param):
+ """
+ validate and refine raw run() parameters for use by workflow
+ """
+
+ param.mailTo = setzer(param.mailTo)
+ param.schedulerArgList = lister(param.schedulerArgList)
+ if param.successMsg is not None :
+ if not isString(param.successMsg) :
+ raise Exception("successMsg argument to WorkflowRunner.run() is not a string")
+
+ # create combined task retry settings manager:
+ param.retry=RetryParam(param.mode,
+ param.retryMax,
+ param.retryWait,
+ param.retryWindow,
+ param.retryMode)
+
+ # setup resource parameters
+ if param.nCores is None :
+ param.nCores = RunMode.data[param.mode].defaultCores
+
+ # ignore total available memory settings in non-local modes:
+ if param.mode != "local" :
+ param.memMb = "unlimited"
+
+ if param.mode == "sge" :
+ if siteConfig.maxSGEJobs != "unlimited" :
+ if ((param.nCores == "unlimited") or
+ (int(param.nCores) > int(siteConfig.maxSGEJobs))) :
+ param.nCores = int(siteConfig.maxSGEJobs)
+
+ if param.nCores != "unlimited" :
+ param.nCores = int(param.nCores)
+ if param.nCores < 1 :
+ raise Exception("Invalid run mode nCores argument: %s. Value must be 'unlimited' or an integer no less than 1" % (param.nCores))
+
+ if param.memMb is None :
+ if param.nCores == "unlimited" :
+ param.memMb = "unlimited"
+ mpc = RunMode.data[param.mode].defaultMemMbPerCore
+ if mpc == "unlimited" :
+ param.memMb = "unlimited"
+ else :
+ param.memMb = mpc * param.nCores
+ elif param.memMb != "unlimited" :
+ param.memMb = int(param.memMb)
+ if param.memMb < 1 :
+ raise Exception("Invalid run mode memMb argument: %s. Value must be 'unlimited' or an integer no less than 1" % (param.memMb))
+
+ # verify/normalize input settings:
+ if param.mode not in RunMode.data.keys() :
+ raise Exception("Invalid mode argument '%s'. Accepted modes are {%s}." \
+ % (param.mode, ",".join(RunMode.data.keys())))
+
+ if param.mode == "sge" :
+ # TODO not-portable to windows (but is this a moot point -- all of sge mode is non-portable, no?):
+ def checkSgeProg(prog) :
+ proc = subprocess.Popen(("which", prog), stdout=open(os.devnull, "w"), shell=False)
+ retval = proc.wait()
+ if retval != 0 : raise Exception("Run mode is sge, but no %s in path" % (prog))
+ checkSgeProg("qsub")
+ checkSgeProg("qstat")
+
+
+ stateDir = os.path.join(param.dataDir, "state")
+ if param.isContinue == "Auto" :
+ param.isContinue = os.path.exists(stateDir)
+
+ if param.isContinue :
+ if not os.path.exists(stateDir) :
+ raise Exception("Cannot continue run without providing a pyflow dataDir containing previous state.: '%s'" % (stateDir))
+
+ for email in param.mailTo :
+ if not verifyEmailAddy(email):
+ raise Exception("Invalid email address: '%s'" % (email))
+
+
+
+ def _setCustomLogs(self) :
+ if (self.warningLogFp is None) and (self.param.warningLogFile is not None) :
+ self.warningLogFp = open(self.param.warningLogFile,"w")
+
+ if (self.errorLogFp is None) and (self.param.errorLogFile is not None) :
+ self.errorLogFp = open(self.param.errorLogFile,"w")
+
+
+
+ def setupNewRun(self, param) :
+ self.param = param
+
+ # setup log file-handle first, then run the rest of parameter validation:
+ # (hold this file open so that we can still log if pyflow runs out of filehandles)
+ self.param.dataDir = os.path.abspath(self.param.dataDir)
+ self.param.dataDir = os.path.join(self.param.dataDir, "pyflow.data")
+ logDir = os.path.join(self.param.dataDir, "logs")
+ ensureDir(logDir)
+ self.flowLogFile = os.path.join(logDir, "pyflow_log.txt")
+ self.flowLogFp = open(self.flowLogFile, "a")
+
+ # run remaining validation
+ self._validateFixParam(self.param)
+
+ # initial per-run data
+ self.taskErrors = set() # this set actually contains every task that failed -- tasks contain all of their own error info
+ self.isTaskManagerException = False
+
+ # create data directory if it does not exist
+ ensureDir(self.param.dataDir)
+
+ # check whether a process already exists:
+ self.markFile = os.path.join(self.param.dataDir, "active_pyflow_process.txt")
+ if os.path.exists(self.markFile) :
+ # Non-conventional logging situation -- another pyflow process is possibly using this same data directory, so we want
+ # to log to stderr (even if the user has set isQuiet) and not interfere with the other process's log
+ self.flowLogFp = None
+ self.param.isQuiet = False
+ msg = [ "Can't initialize pyflow run because the data directory appears to be in use by another process.",
+ "\tData directory: '%s'" % (self.param.dataDir),
+ "\tIt is possible that a previous process was abruptly interrupted and did not clean up properly. To determine if this is",
+ "\tthe case, please refer to the file '%s'" % (self.markFile),
+ "\tIf this file refers to a non-running process, delete the file and relaunch pyflow,",
+ "\totherwise, specify a new data directory. At the API-level this can be done with the dataDirRoot option." ]
+ self.markFile = None # this keeps pyflow from deleting this file, as it normally would on exit
+ raise DataDirException(msg)
+ else :
+ mfp = open(self.markFile, "w")
+ msg = """
+This file provides details of the pyflow instance currently using this data directory.
+During normal pyflow run termination (due to job completion, error, SIGINT, etc...),
+this file should be deleted. If this file is present it should mean either:
+(1) the data directory is still in use by a running workflow
+(2) a sudden job failure occurred that prevented normal run termination
+
+The associated pyflow job details are as follows:
+"""
+ mfp.write(msg + "\n")
+ for line in self.getInfoMsg() :
+ mfp.write(line + "\n")
+ mfp.write("\n")
+ mfp.close()
+
+ stateDir = os.path.join(self.param.dataDir, "state")
+ ensureDir(stateDir)
+
+ # setup other instance data:
+ self.runcount += 1
+
+ # initialize directories
+ self.wrapperLogDir = os.path.join(logDir, "tmp", "taskWrapperLogs")
+ ensureDir(self.wrapperLogDir)
+ stackDumpLogDir = os.path.join(logDir, "tmp", "stackDumpLog")
+ ensureDir(stackDumpLogDir)
+
+ # initialize filenames:
+ taskStateFileName = "pyflow_tasks_runstate.txt"
+ taskInfoFileName = "pyflow_tasks_info.txt"
+
+ self.taskStdoutFile = os.path.join(logDir, "pyflow_tasks_stdout_log.txt")
+ self.taskStderrFile = os.path.join(logDir, "pyflow_tasks_stderr_log.txt")
+ self.taskStateFile = os.path.join(stateDir, taskStateFileName)
+ self.taskInfoFile = os.path.join(stateDir, taskInfoFileName)
+ self.taskDotScriptFile = os.path.join(stateDir, "make_pyflow_task_graph.py")
+
+ self.stackDumpLogFile = os.path.join(stackDumpLogDir, "pyflow_stack_dump.txt")
+
+ # empty file:
+ if not self.param.isContinue:
+ fp = open(self.taskInfoFile, "w")
+ fp.write(taskInfoHeader())
+ fp.close()
+
+ self._setCustomLogs()
+
+ # finally write dot task graph creation script:
+ #
+ # this could fail because of script permission settings, buk it is not critical for
+ # workflow completion so we get away with a warning
+ try :
+ writeDotScript(self.taskDotScriptFile, taskInfoFileName, taskStateFileName, self.param.workflowClassName)
+ except OSError:
+ msg = ["Failed to write task graph visualization script to %s" % (self.taskDotScriptFile)]
+ self.flowLog(msg,logState=LogState.WARNING)
+
+
+ def resetRun(self) :
+ """
+ Anything that needs to be cleaned up at the end of a run
+
+ Right now this just make sure we don't log to the previous run's log file
+ """
+ self.flowLogFile = None
+ self.param = None
+ if self.flowLogFp is not None :
+ self.flowLogFp.close()
+ self.flowLogFp = None
+
+ if self.warningLogFp is not None :
+ self.warningLogFp.close()
+ self.warningLogFp = None
+
+ if self.errorLogFp is not None :
+ self.errorLogFp.close()
+ self.errorLogFp = None
+
+ if self.markFile is not None :
+ if os.path.exists(self.markFile) : os.unlink(self.markFile)
+
+ self.markFile = None
+
+ def getRunid(self) :
+ return "%s_%s" % (self.pid, self.runcount)
+
+ @lockMethod
+ def setTaskError(self, task) :
+ self.taskErrors.add(task)
+
+ @lockMethod
+ def isTaskError(self) :
+ return (len(self.taskErrors) != 0)
+
+ def isTaskSubmissionActive(self) :
+ """
+ wait() pollers need to know if task submission has been
+ shutdown to implement sane behavior.
+ """
+ return (not self.isTaskError())
+
+ @lockMethod
+ def setTaskManagerException(self) :
+ self.isTaskManagerException = True
+
+ @lockMethod
+ def flowLog(self, msg, linePrefix=None, logState = LogState.INFO) :
+ linePrefixOut = "[%s]" % (self.getRunid())
+ if linePrefix is not None :
+ linePrefixOut += " " + linePrefix
+
+ if (logState == LogState.ERROR) or (logState == LogState.WARNING) :
+ linePrefixOut += " [" + LogState.toString(logState) + "]"
+
+ ofpList = []
+ isAddStderr = (self._isStderrAlive and ((self.flowLogFp is None) or (self.param is None) or (not self.param.isQuiet)))
+ if isAddStderr:
+ ofpList.append(sys.stderr)
+ if self.flowLogFp is not None :
+ ofpList.append(self.flowLogFp)
+
+ # make a last ditch effort to open the special error logs if these are not available already:
+ try :
+ self._setCustomLogs()
+ except :
+ pass
+
+ if (self.warningLogFp is not None) and (logState == LogState.WARNING) :
+ ofpList.append(self.warningLogFp)
+ if (self.errorLogFp is not None) and (logState == LogState.ERROR) :
+ ofpList.append(self.errorLogFp)
+
+ if len(ofpList) == 0 : return
+ retval = log(ofpList, msg, linePrefixOut)
+
+ # check if stderr stream failed. If so, turn it off for the remainder of run (assume terminal hup):
+ if isAddStderr and (not retval[0]) :
+ if self.isHangUp.isSet() :
+ self._isStderrAlive = False
+
+
+ def getInfoMsg(self) :
+ """
+ return a string array with general stats about this run
+ """
+
+ msg = [ "%s\t%s" % ("pyFlowClientWorkflowClass:", self.param.workflowClassName),
+ "%s\t%s" % ("pyFlowVersion:", __version__),
+ "%s\t%s" % ("pythonVersion:", pythonVersion),
+ "%s\t%s" % ("Runid:", self.getRunid()),
+ "%s\t%s UTC" % ("RunStartTime:", self.param.logRunStartTime),
+ "%s\t%s UTC" % ("NotificationTime:", timeStrNow()),
+ "%s\t%s" % ("HostName:", siteConfig.getHostName()),
+ "%s\t%s" % ("WorkingDir:", self.cwd),
+ "%s\t%s" % ("DataDir:", self.param.dataDir),
+ "%s\t'%s'" % ("ProcessCmdLine:", cmdline()) ]
+ return msg
+
+
+ def emailNotification(self, msgList, emailErrorLog=None) :
+ #
+ # email addy might not be setup yet:
+ #
+ # if errorLog is specified, then an email send exception will
+ # be handled and logged, otherwise the exception will be re-raised
+ # down to the caller.
+ #
+
+ if self.param is None : return
+ if len(self.param.mailTo) == 0 : return
+
+ if not isLocalSmtp() :
+ if emailErrorLog :
+ msg = ["email notification failed, no local smtp server"]
+ emailErrorLog(msg,logState=LogState.WARNING)
+ return
+
+ mailTo = sorted(list(self.param.mailTo))
+ subject = "pyflow notification from %s run: %s" % (self.param.workflowClassName, self.getRunid())
+ msg = msgListToMsg(msgList)
+ fullMsgList = ["Message:",
+ '"""',
+ msg,
+ '"""']
+ fullMsgList.extend(self.getInfoMsg())
+
+ import smtplib
+ try:
+ sendEmail(mailTo, siteConfig.mailFrom, subject, fullMsgList)
+ except smtplib.SMTPException :
+ if emailErrorLog is None : raise
+ msg = ["email notification failed"]
+ eMsg = lister(getExceptionMsg())
+ msg.extend(eMsg)
+ emailErrorLog(msg,logState=LogState.WARNING)
+
+
+
+class WorkflowRunner(object) :
+ """
+ This object is designed to be inherited by a class in
+ client code. This inheriting class can override the
+ L{workflow()<WorkflowRunner.workflow>} method to define the
+ tasks that need to be run and their dependencies.
+
+ The inheriting class defining a workflow can be executed in
+ client code by calling the WorkflowRunner.run() method.
+ This method provides various run options such as whether
+ to run locally or on sge.
+ """
+
+
+ _maxWorkflowRecursion = 30
+ """
+ This limit protects against a runaway forkbomb in case a
+ workflow task recursively adds itself w/o termination:
+ """
+
+
+ def run(self,
+ mode="local",
+ dataDirRoot=".",
+ isContinue=False,
+ isForceContinue=False,
+ nCores=None,
+ memMb=None,
+ isDryRun=False,
+ retryMax=2,
+ retryWait=90,
+ retryWindow=360,
+ retryMode="nonlocal",
+ mailTo=None,
+ updateInterval=60,
+ schedulerArgList=None,
+ isQuiet=False,
+ warningLogFile=None,
+ errorLogFile=None,
+ successMsg=None,
+ startFromTasks=None,
+ ignoreTasksAfter=None,
+ resetTasks=None) :
+ """
+ Call this method to execute the workflow() method overridden
+ in a child class and specify the resources available for the
+ workflow to run.
+
+ Task retry behavior: Retry attempts will be made per the
+ arguments below for distributed workflow runs (eg. sge run
+ mode). Note this means that retries will be attempted for
+ tasks with an 'isForceLocal' setting during distributed runs.
+
+ Task error behavior: When a task error occurs the task
+ manager stops submitting new tasks and allows all currently
+ running tasks to complete. Note that in this case 'task error'
+ means that the task could not be completed after exhausting
+ attempted retries.
+
+ Workflow exception behavior: Any exceptions thrown from the
+ python code of classes derived from WorkflowRunner will be
+ logged and trigger notification (e.g. email). The exception
+ will not come down to the client's stack. In sub-workflows the
+ exception is handled exactly like a task error (ie. task
+ submission is shut-down and remaining tasks are allowed to
+ complete). An exception in the master workflow will lead to
+ workflow termination without waiting for currently running
+ tasks to finish.
+
+ @return: 0 if all tasks completed successfully and 1 otherwise
+
+ @param mode: Workflow run mode. Current options are (local|sge)
+
+ @param dataDirRoot: All workflow data is written to
+ {dataDirRoot}/pyflow.data/ These include
+ workflow/task logs, persistent task state data,
+ and summary run info. Two workflows cannot
+ simultaneously use the same dataDir.
+
+ @param isContinue: If True, continue workflow from a previous
+ incomplete run based on the workflow data
+ files. You must use the same dataDirRoot as a
+ previous run for this to work. Set to 'Auto' to
+ have the run continue only if the previous
+ dataDir exists. (default: False)
+
+ @param isForceContinue: Only used if isContinue is not False. Normally
+ when isContinue is run, the commands of
+ completed tasks are checked to ensure they
+ match. When isForceContinue is true,
+ failing this check is reduced from an error
+ to a warning
+
+ @param nCores: Total number of cores available, or 'unlimited', sge
+ is currently configured for a maximum job count of
+ %s, any value higher than this in sge mode will be
+ reduced to the maximum. (default: 1 for local mode,
+ %s for sge mode)
+
+ @param memMb: Total memory available (in megabytes), or 'unlimited',
+ Note that this value will be ignored in non-local modes
+ (such as sge), because in this case total memory available
+ is expected to be known by the scheduler for each node in its
+ cluster. (default: %i*nCores for local mode, 'unlimited'
+ for sge mode)
+
+ @param isDryRun: List the commands to be executed without running
+ them. Note that recursive and dynamic workflows
+ will potentially have to account for the fact that
+ expected files will be missing -- here 'recursive
+ workflow' refers to any workflow which uses the
+ addWorkflowTask() method, and 'dynamic workflow'
+ refers to any workflow which uses the
+ waitForTasks() method. These types of workflows
+ can query this status with the isDryRun() to make
+ accomadations. (default: False)
+
+ @param retryMax: Maximum number of task retries
+
+ @param retryWait: Delay (in seconds) before resubmitting task
+
+ @param retryWindow: Maximum time (in seconds) after the first task
+ submission in which retries are allowed. A value of
+ zero or less puts no limit on the time when retries
+ will be attempted. Retries are always allowed (up to
+ retryMax times), for failed make jobs.
+
+ @param retryMode: Modes are 'nonlocal' and 'all'. For 'nonlocal'
+ retries are not attempted in local run mode. For 'all'
+ retries are attempted for any run mode. The default mode
+ is 'nonolocal'.
+
+ @param mailTo: An email address or container of email addresses. Notification
+ will be sent to each email address when
+ either (1) the run successfully completes (2) the
+ first task error occurs or (3) an unhandled
+ exception is raised. The intention is to send one
+ status message per run() indicating either success
+ or the reason for failure. This should occur for all
+ cases except a host hardware/power failure. Note
+ that mail comes from '%s' (configurable),
+ which may be classified as junk-mail by your system.
+
+ @param updateInterval: How often (in minutes) should pyflow log a
+ status update message summarizing the run
+ status. Set this to zero or less to turn
+ the update off.
+
+ @param schedulerArgList: A list of arguments can be specified to be
+ passed on to an external scheduler when non-local
+ modes are used (e.g. in sge mode you could pass
+ schedulerArgList=['-q','work.q'] to put the whole
+ pyflow job into the sge work.q queue)
+
+ @param isQuiet: Don't write any logging output to stderr (but still write
+ log to pyflow_log.txt)
+
+ @param warningLogFile: Replicate all warning messages to the specified file. Warning
+ messages will still appear in the standard logs, this
+ file will contain a subset of the log messages pertaining to
+ warnings only.
+
+ @param errorLogFile: Replicate all error messages to the specified file. Error
+ messages will still appear in the standard logs, this
+ file will contain a subset of the log messages pertaining to
+ errors only. It should be empty for a successful run.
+
+ @param successMsg: Provide a string containing a custom message which
+ will be prepended to pyflow's standard success
+ notification. This message will appear in the log
+ and any configured notifications (e.g. email). The
+ message may contain linebreaks.
+
+ @param startFromTasks: A task label or container of task labels. Any tasks which
+ are not in this set or descendants of this set will be marked as
+ completed.
+ @type startFromTasks: A single string, or set, tuple or list of strings
+
+ @param ignoreTasksAfter: A task label or container of task labels. All descendants
+ of these task labels will be ignored.
+ @type ignoreTasksAfter: A single string, or set, tuple or list of strings
+
+ @param resetTasks: A task label or container of task labels. These tasks and all
+ of their descendants will be reset to the "waiting" state to be re-run.
+ Note this option will only affect a workflow which has been continued
+ from a previous run. This will not override any nodes altered by the
+ startFromTasks setting in the case that both options are used together.
+ @type resetTasks: A single string, or set, tuple or list of strings
+ """
+
+ # Setup pyflow signal handlers:
+ #
+ inHandlers = Bunch(isSet=False)
+
+ class SigTermException(Exception) : pass
+
+ def sigtermHandler(_signum, _frame) :
+ raise SigTermException
+
+ def sighupHandler(_signum, _frame) :
+ self._warningLog("pyflow recieved hangup signal. pyflow will continue, but this signal may still interrupt running tasks.")
+ # tell cdata to turn off any tty writes:
+ self._cdata().isHangUp.set()
+
+ def set_pyflow_sig_handlers() :
+ import signal
+ if not inHandlers.isSet :
+ inHandlers.sigterm = signal.getsignal(signal.SIGTERM)
+ if not isWindows() :
+ inHandlers.sighup = signal.getsignal(signal.SIGHUP)
+ inHandlers.isSet = True
+ try:
+ signal.signal(signal.SIGTERM, sigtermHandler)
+ if not isWindows() :
+ signal.signal(signal.SIGHUP, sighupHandler)
+ except ValueError:
+ if isMainThread() :
+ raise
+ else :
+ self._warningLog("pyflow has not been initialized on main thread, all custom signal handling disabled")
+
+
+ def unset_pyflow_sig_handlers() :
+ import signal
+ if not inHandlers.isSet : return
+ try :
+ signal.signal(signal.SIGTERM, inHandlers.sigterm)
+ if not isWindows() :
+ signal.signal(signal.SIGHUP, inHandlers.sighup)
+ except ValueError:
+ if isMainThread() :
+ raise
+ else:
+ pass
+
+
+ # if return value is somehow not set after this then something bad happened, so init to 1:
+ retval = 1
+ try:
+ set_pyflow_sig_handlers()
+
+ def exceptionMessaging(prefixMsg=None) :
+ msg = lister(prefixMsg)
+ eMsg = lister(getExceptionMsg())
+ msg.extend(eMsg)
+ self._notify(msg,logState=LogState.ERROR)
+
+ try:
+ self.runStartTimeStamp = time.time()
+ self.updateInterval = int(updateInterval)
+ # a container to haul all the run() options around in:
+ param = Bunch(mode=mode,
+ dataDir=dataDirRoot,
+ isContinue=isContinue,
+ isForceContinue=isForceContinue,
+ nCores=nCores,
+ memMb=memMb,
+ isDryRun=isDryRun,
+ retryMax=retryMax,
+ retryWait=retryWait,
+ retryWindow=retryWindow,
+ retryMode=retryMode,
+ mailTo=mailTo,
+ logRunStartTime=timeStampToTimeStr(self.runStartTimeStamp),
+ workflowClassName=self._whoami(),
+ schedulerArgList=schedulerArgList,
+ isQuiet=isQuiet,
+ warningLogFile=warningLogFile,
+ errorLogFile=errorLogFile,
+ successMsg=successMsg,
+ startFromTasks=setzer(startFromTasks),
+ ignoreTasksAfter=setzer(ignoreTasksAfter),
+ resetTasks=setzer(resetTasks))
+ retval = self._runWorkflow(param)
+
+ except SigTermException:
+ msg = "Received termination signal, shutting down running tasks..."
+ self._killWorkflow(msg)
+ except KeyboardInterrupt:
+ msg = "Keyboard Interrupt, shutting down running tasks..."
+ self._killWorkflow(msg)
+ except DataDirException, e:
+ self._notify(e.msg,logState=LogState.ERROR)
+ except:
+ exceptionMessaging()
+ raise
+
+ finally:
+ # last set: disconnect the workflow log:
+ self._cdata().resetRun()
+ unset_pyflow_sig_handlers()
+
+ return retval
+
+
+ # configurable elements of docstring
+ run.__doc__ = run.__doc__ % (siteConfig.maxSGEJobs,
+ RunMode.data["sge"].defaultCores,
+ siteConfig.defaultHostMemMbPerCore,
+ siteConfig.mailFrom)
+
+
+
+ # protected methods which can be called within the workflow method:
+
+ def addTask(self, label, command=None, cwd=None, env=None, nCores=1,
+ memMb=siteConfig.defaultTaskMemMb,
+ dependencies=None, priority=0,
+ isForceLocal=False, isCommandMakePath=False, isTaskStable=True,
+ mutex=None,
+ retryMax=None, retryWait=None, retryWindow=None, retryMode=None) :
+ """
+ Add task to workflow, including resource requirements and
+ specification of dependencies. Dependency tasks must already
+ exist in the workflow.
+
+ @return: The 'label' argument is returned without modification.
+
+
+ @param label: A string used to identify each task. The label must
+ be composed of only ascii letters, digits,
+ underscores and dashes (ie. /[A-Za-z0-9_-]+/). The
+ label must also be unique within the workflow, and
+ non-empty.
+
+ @param command: The task command. Commands can be: (1) a shell
+ string (2) an iterable container of strings (argument
+ list) (3) None. In all cases strings must not contain
+ newline characters. A single string is typically used
+ for commands that require shell features (such as
+ pipes), an argument list can be used for any other
+ commands, this is often a useful way to simplify
+ quoting issues or to submit extremely long
+ commands. The default command (None), can be used to
+ create a 'checkpoint', ie. a task which does not run
+ anything, but provides a label associated with the
+ completion of a set of dependencies.
+
+ @param cwd: Specify current working directory to use for
+ command execution. Note that if submitting the
+ command as an argument list (as opposed to a shell
+ string) the executable (arg[0]) is searched for
+ before changing the working directory, so you cannot
+ specify the executable relative to the cwd
+ setting. If submitting a shell string command this
+ restriction does not apply.
+
+ @param env: A map of environment variables for this task, for
+ example 'env={"PATH": "/usr/bin"}'. When env is set
+ to None (the default) the environment of the pyflow
+ client process is used.
+
+ @param nCores: Number of cpu threads required
+
+ @param memMb: Amount of memory required (in megabytes)
+
+ @param dependencies: A task label or container of task labels specifying all dependent
+ tasks. Dependent tasks must already exist in
+ the workflow.
+ @type dependencies: A single string, or set, tuple or list of strings
+
+
+ @param priority: Among all tasks which are eligible to run at
+ the same time, launch tasks with higher priority
+ first. this value can be set from[-100,100]. Note
+ that this will strongly control the order of task
+ launch on a local run, but will only control task
+ submission order to a secondary scheduler (like
+ sge). All jobs with the same priority are already
+ submitted in order from highest to lowest nCores
+ requested, so there is no need to set priorities to
+ replicate this behavior. The taskManager can start
+ executing tasks as soon as each addTask() method is
+ called, so lower-priority tasks may be launched first
+ if they are specified first in the workflow.
+
+ @param isForceLocal: Force this task to run locally when a
+ distributed task mode is used. This can be used to
+ launch very small jobs outside of the sge queue. Note
+ that 'isForceLocal' jobs launched during a non-local
+ task mode are not subject to resource management, so
+ it is important that these represent small
+ jobs. Tasks which delete, move or touch a small
+ number of files are ideal for this setting.
+
+ @param isCommandMakePath: If true, command is assumed to be a
+ path containing a makefile. It will be run using
+ make/qmake according to the run's mode and the task's
+ isForceLocal setting
+
+ @param isTaskStable: If false, indicates that the task command
+ and/or dependencies may change if the run is
+ interrupted and restarted. A command marked as
+ unstable will not be checked to make sure it matches
+ its previous definition during run continuation.
+ Unstable examples: command contains a date/time, or
+ lists a set of files which are deleted at some point
+ in the workflow, etc.
+
+ @param mutex: Provide an optional id associated with a pyflow
+ task mutex. For all tasks with the same mutex id, no more
+ than one will be run at once. Id name must follow task id
+ restrictions. Mutex ids are global across all recursively
+ invoked workflows.
+ Example use case: This feature has been added as a simpler
+ alternative to file locking, to ensure sequential, but not
+ ordered, access to a file.
+
+ @param retryMax: The number of times this task will be retried
+ after failing. If defined, this overrides the workflow
+ retryMax value.
+
+ @param retryWait: The number of seconds to wait before relaunching
+ a failed task. If defined, this overrides the workflow
+ retryWait value.
+
+ @param retryWindow: The number of seconds after job submission in
+ which retries will be attempted for non-make jobs. A value of
+ zero or less causes retries to be attempted anytime after
+ job submission. If defined, this overrides the workflow
+ retryWindow value.
+
+ @param retryMode: Modes are 'nonlocal' and 'all'. For 'nonlocal'
+ retries are not attempted in local run mode. For 'all'
+ retries are attempted for any run mode. If defined, this overrides
+ the workflow retryMode value.
+ """
+
+ self._requireInWorkflow()
+
+ #### Canceled plans to add deferred dependencies:
+ # # deferredDependencies -- A container of labels specifying dependent
+ # # tasks which have not yet been added to the
+ # # workflow. In this case the added task will
+ # # wait for the dependency to be defined *and*
+ # # complete. Avoid these in favor or regular
+ # # dependencies if possible.
+
+ # sanitize bools:
+ isForceLocal = argToBool(isForceLocal)
+ isCommandMakePath = argToBool(isCommandMakePath)
+
+ # sanitize ints:
+ nCores = int(nCores)
+ memMb = int(memMb)
+ priority = int(priority)
+ if (priority > 100) or (priority < -100) :
+ raise Exception("priority must be an integer in the range [-100,100]")
+
+ # sanity check label:
+ WorkflowRunner._checkTaskLabel(label)
+
+ fullLabel = namespaceJoin(self._getNamespace(), label)
+
+ # verify/sanitize command:
+ cmd = Command(command, cwd, env)
+
+ # deal with command/resource relationship:
+ if cmd.cmd is None :
+ nCores = 0
+ memMb = 0
+ else:
+ if nCores <= 0 :
+ raise Exception("Unexpected core requirement for task: '%s' nCores: %i" % (fullLabel, nCores))
+ if memMb <= 0:
+ raise Exception("Unexpected memory requirement for task: '%s' memory: %i (megabytes)" % (fullLabel, memMb))
+
+
+ if (self._cdata().param.nCores != "unlimited") and (nCores > self._cdata().param.nCores) :
+ raise Exception("Task core requirement exceeds full available resources")
+
+ if (self._cdata().param.memMb != "unlimited") and (memMb > self._cdata().param.memMb) :
+ raise Exception("Task memory requirement exceeds full available resources")
+
+ # check that make path commands point to a directory:
+ #
+ if isCommandMakePath :
+ if cmd.type != "str" :
+ raise Exception("isCommandMakePath is set, but no path is provided in task: '%s'" % (fullLabel))
+ cmd.cmd = os.path.abspath(cmd.cmd)
+
+ # sanitize mutex option
+ if mutex is not None :
+ WorkflowRunner._checkTaskLabel(mutex)
+
+ task_retry = self._cdata().param.retry.getTaskCopy(retryMax, retryWait, retryWindow, retryMode)
+
+ # private _addTaskCore gets hijacked in recursive workflow submission:
+ #
+ payload = CmdPayload(fullLabel, cmd, nCores, memMb, priority, isForceLocal, isCommandMakePath, isTaskStable, mutex, task_retry)
+ self._addTaskCore(self._getNamespace(), label, payload, dependencies)
+ return label
+
+
+
+
+ def addWorkflowTask(self, label, workflowRunnerInstance, dependencies=None) :
+ """
+ Add another WorkflowRunner instance as a task to this
+ workflow. The added Workflow's workflow() method will be
+ called once the dependencies specified in this call have
+ completed. Once started, all of the submitted workflow's
+ method calls (like addTask) will be placed into the enclosing
+ workflow instance and bound by the run parameters of the
+ enclosing workflow.
+
+ This task will be marked complete once the submitted workflow's
+ workflow() method has finished, and any tasks it initiated have
+ completed.
+
+ Note that all workflow tasks will have their own tasks namespaced
+ with the workflow task label. This namespace is recursive in the
+ case that you add workflow tasks which add their own workflow
+ tasks, etc.
+
+ Note that the submitted workflow instance will be deep copied
+ before being altered in any way.
+
+ @return: The 'label' argument is returned without modification.
+
+ @param label: A string used to identify each task. The label must
+ be composed of only ascii letters, digits,
+ underscores and dashes (ie. /[A-Za-z0-9_-]+/). The
+ label must also be unique within the workflow, and
+ non-empty.
+
+ @param workflowRunnerInstance: A L{WorkflowRunner} instance.
+
+ @param dependencies: A label string or container of labels specifying all dependent
+ tasks. Dependent tasks must already exist in
+ the workflow.
+ @type dependencies: A single string, or set, tuple or list of strings
+ """
+
+ self._requireInWorkflow()
+
+ # sanity check label:
+ WorkflowRunner._checkTaskLabel(label)
+
+ import inspect
+
+ # copy and 'hijack' the workflow:
+ workflowCopy = copy.deepcopy(workflowRunnerInstance)
+
+ # hijack! -- take all public methods at the WorkflowRunner level
+ # (except workflow()), and insert the self copy:
+ publicExclude = ["workflow", "addTask", "addWorkflowTask", "waitForTasks"]
+ for (n, _v) in inspect.getmembers(WorkflowRunner, predicate=inspect.ismethod) :
+ if n[0] == "_" : continue # skip private/special methods
+ if n in publicExclude : continue
+ setattr(workflowCopy, n, getattr(self, n))
+
+ privateInclude = ["_cdata", "_addTaskCore", "_waitForTasksCore", "_isTaskCompleteCore","_setRunning","_getRunning"]
+ for n in privateInclude :
+ setattr(workflowCopy, n, getattr(self, n))
+
+ # final step: disable the run() function to be extra safe...
+ workflowCopy.run = None
+
+ # set the task namespace:
+ workflowCopy._appendNamespace(self._getNamespaceList())
+ workflowCopy._appendNamespace(label)
+
+ # add workflow task to the task-dag, and launch a new taskrunner thread
+ # if one isn't already running:
+ payload = WorkflowPayload(workflowCopy)
+ self._addTaskCore(self._getNamespace(), label, payload, dependencies)
+ return label
+
+
+ def waitForTasks(self, labels=None) :
+ """
+ Wait for a list of tasks to complete.
+
+ @return: In case of an error in a task being waited for, or in
+ one of these task's dependencies, the function returns 1.
+ Else return 0.
+
+ @param labels: Container of task labels to wait for. If an empty container is
+ given or no list is provided then wait for all
+ outstanding tasks to complete.
+ @type labels: A single string, or set, tuple or list of strings
+ """
+
+ self._requireInWorkflow()
+
+ return self._waitForTasksCore(self._getNamespace(), labels)
+
+
+ def isTaskComplete(self, taskLabel) :
+ """
+ Query if a specific task is in the workflow and completed without error.
+
+ This can assist workflows with providing
+ stable interrupt/resume behavior.
+
+ @param taskLabel: A task string
+
+ @return: Completion status of task
+ """
+
+ return self._isTaskCompleteCore(self._getNamespace(), taskLabel)
+
+
+ def getRunMode(self) :
+ """
+ Get the current run mode
+
+ This can be used to access the current run mode from
+ within the workflow function. Although the runmode should
+ be transparent to client code, this is occasionally needed
+ to hack workarounds.
+
+ @return: Current run mode
+ """
+
+ self._requireInWorkflow()
+
+ return self._cdata().param.mode
+
+
+ def getNCores(self) :
+ """
+ Get the current run core limit
+
+ This function can be used to access the current run's core
+ limit from within the workflow function. This can be useful
+ to eg. limit the number of cores requested by a single task.
+
+ @return: Total cores available to this workflow run
+ @rtype: Integer value or 'unlimited'
+ """
+
+ self._requireInWorkflow()
+
+ return self._cdata().param.nCores
+
+
+ def limitNCores(self, nCores) :
+ """
+ Takes an task nCores argument and reduces it to
+ the maximum value allowed for the current run.
+
+ @param nCores: Proposed core requirement
+
+ @return: Min(nCores,Total cores available to this workflow run)
+ """
+
+ self._requireInWorkflow()
+
+ nCores = int(nCores)
+ runNCores = self._cdata().param.nCores
+ if runNCores == "unlimited" : return nCores
+ return min(nCores, runNCores)
+
+
+ def getMemMb(self) :
+ """
+ Get the current run's total memory limit (in megabytes)
+
+ @return: Memory limit in megabytes
+ @rtype: Integer value or 'unlimited'
+ """
+
+ self._requireInWorkflow()
+
+ return self._cdata().param.memMb
+
+
+ def limitMemMb(self, memMb) :
+ """
+ Takes a task memMb argument and reduces it to
+ the maximum value allowed for the current run.
+
+ @param memMb: Proposed task memory requirement in megabytes
+
+ @return: Min(memMb,Total memory available to this workflow run)
+ """
+
+ self._requireInWorkflow()
+
+ memMb = int(memMb)
+ runMemMb = self._cdata().param.memMb
+ if runMemMb == "unlimited" : return memMb
+ return min(memMb, runMemMb)
+
+
+ def isDryRun(self) :
+ """
+ Get isDryRun flag value.
+
+ When the dryrun flag is set, no commands are actually run. Querying
+ this flag allows dynamic workflows to correct for dry run behaviors,
+ such as tasks which do no produce expected files.
+
+ @return: DryRun status flag
+ """
+
+ self._requireInWorkflow()
+
+ return self._cdata().param.isDryRun
+
+
+ @staticmethod
+ def runModeDefaultCores(mode) :
+ """
+ Get the default core limit for run mode (local,sge,..)
+
+ @param mode: run mode, as specified in L{the run() method<WorkflowRunner.run>}
+
+ @return: Default maximum number of cores for mode
+
+ @rtype: Either 'unlimited', or a string
+ representation of the integer limit
+ """
+
+ return str(RunMode.data[mode].defaultCores)
+
+
+ def flowLog(self, msg, logState = LogState.INFO) :
+ """
+ Send a message to the WorkflowRunner's log.
+
+ @param msg: Log message
+ @type msg: A string or an array of strings. String arrays will be separated by newlines in the log.
+ @param logState: Message severity, defaults to INFO.
+ @type logState: A value in pyflow.LogState.{INFO,WARNING,ERROR}
+ """
+
+ self._requireInWorkflow()
+
+ linePrefixOut = "[%s]" % (self._cdata().param.workflowClassName)
+ self._cdata().flowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+
+ # Protected methods for client derived-class override:
+
+ def workflow(self) :
+ """
+ Workflow definition defined in child class
+
+ This method should be overridden in the class derived from
+ L{WorkflowRunner} to specify the actual workflow logic. Client
+ code should not call this method directly.
+ """
+ pass
+
+
+ # private methods:
+
+ # special workflowRunner Exception used to terminate workflow() function
+ # if a ctrl-c is issued
+ class _AbortWorkflowException(Exception) :
+ pass
+
+
+ def _flowLog(self, msg, logState) :
+ linePrefixOut = "[WorkflowRunner]"
+ self._cdata().flowLog(msg, linePrefix=linePrefixOut, logState=logState)
+
+ def _infoLog(self, msg) :
+ self._flowLog(msg,logState=LogState.INFO)
+
+ def _warningLog(self, msg) :
+ self._flowLog(msg,logState=LogState.WARNING)
+
+ def _errorLog(self, msg) :
+ self._flowLog(msg,logState=LogState.ERROR)
+
+ def _whoami(self) :
+ # returns name of *derived* class
+ return self.__class__.__name__
+
+
+ def _getNamespaceList(self) :
+ try:
+ return self._namespaceList
+ except AttributeError:
+ self._namespaceList = []
+ return self._namespaceList
+
+ def _getNamespace(self) :
+ return namespaceSep.join(self._getNamespaceList())
+
+ def _appendNamespace(self, names) :
+ names = lister(names)
+ for name in names :
+ # check against runaway recursion:
+ if len(self._getNamespaceList()) >= WorkflowRunner._maxWorkflowRecursion :
+ raise Exception("Recursive workflow invocation depth exceeds maximum allowed depth of %i" % (WorkflowRunner._maxWorkflowRecursion))
+ WorkflowRunner._checkTaskLabel(name)
+ self._getNamespaceList().append(name)
+
+
+ # flag used to request the termination of all task submission:
+ #
+ _allStop = threading.Event()
+
+ @staticmethod
+ def _stopAllWorkflows() :
+ # request all workflows stop task submission:
+ WorkflowRunner._allStop.set()
+
+ @staticmethod
+ def _isWorkflowStopped() :
+ # check whether a global signal has been give to stop all workflow submission
+ # this should only be true when a ctrl-C or similar event has occurred.
+ return WorkflowRunner._allStop.isSet()
+
+ def _addTaskCore(self, namespace, label, payload, dependencies) :
+ # private core taskAdd routine for hijacking
+ # fromWorkflow is the workflow instance used to launch the task
+ #
+
+ # add workflow task to the task-dag, and launch a new taskrunner thread
+ # if one isn't already running:
+ if self._isWorkflowStopped() :
+ raise WorkflowRunner._AbortWorkflowException
+
+ self._infoLog("Adding %s '%s' to %s" % (payload.desc(), namespaceJoin(namespace, label), namespaceLabel(namespace)))
+
+ # add task to the task-dag, and launch a new taskrunner thread
+ # if one isn't already running:
+ dependencies = setzer(dependencies)
+ self._tdag.addTask(namespace, label, payload, dependencies)
+ self._startTaskManager()
+
+
+ def _getWaitStatus(self, namespace, labels, status) :
+ # update and return two values:
+ # (1) isAllTaskDone -- are all tasks done (ie. error or complete state
+ # (2) retval -- this is set to one if any tasks have errors
+ #
+
+ def updateStatusFromTask(task, status) :
+ if not task.isDone() :
+ status.isAllTaskDone = False
+ elif not task.isComplete() :
+ status.retval = 1
+ if status.retval == 0 and (not self._cdata().isTaskSubmissionActive()) :
+ status.retval = 1
+ if status.retval == 0 and task.isDead() :
+ status.retval = 1
+
+
+ if len(labels) == 0 :
+ if namespace == "" :
+ if self._tdag.isRunExhausted() or (not self._tman.isAlive()) :
+ if not self._tdag.isRunComplete() :
+ status.retval = 1
+ else:
+ status.isAllTaskDone = False
+ else :
+ for task in self._tdag.getAllNodes(namespace) :
+ updateStatusFromTask(task, status)
+ else :
+ for l in labels :
+ if not self._tdag.isTaskPresent(namespace, l) :
+ raise Exception("Task: '%s' is not in taskDAG" % (namespaceJoin(namespace, l)))
+ task = self._tdag.getTask(namespace, l)
+ updateStatusFromTask(task, status)
+
+
+ def _waitForTasksCore(self, namespace, labels=None, isVerbose=True) :
+ labels = setzer(labels)
+ if isVerbose :
+ msg = "Pausing %s until completion of" % (namespaceLabel(namespace))
+ if len(labels) == 0 :
+ self._infoLog(msg + " its current tasks")
+ else:
+ self._infoLog(msg + " task(s): %s" % (",".join([namespaceJoin(namespace, l) for l in labels])))
+
+ class WaitStatus:
+ def __init__(self) :
+ self.isAllTaskDone = True
+ self.retval = 0
+
+ ewaiter = ExpWaiter(1, 1.7, 15)
+ while True :
+ if self._isWorkflowStopped() :
+ raise WorkflowRunner._AbortWorkflowException
+ status = WaitStatus()
+ self._getWaitStatus(namespace, labels, status)
+ if status.isAllTaskDone or (status.retval != 0) : break
+ ewaiter.wait()
+
+ if isVerbose :
+ msg = "Resuming %s" % (namespaceLabel(namespace))
+ self._infoLog(msg)
+ return status.retval
+
+
+ def _isTaskCompleteCore(self, namespace, taskLabel) :
+
+ if not self._tdag.isTaskPresent(namespace, taskLabel) :
+ return False
+ task = self._tdag.getTask(namespace, taskLabel)
+ return task.isComplete()
+
+
+ @staticmethod
+ def _checkTaskLabel(label) :
+ # sanity check label:
+ if not isinstance(label, basestring) :
+ raise Exception ("Task label is not a string")
+ if label == "" :
+ raise Exception ("Task label is empty")
+ if not re.match("^[A-Za-z0-9_-]+$", label) :
+ raise Exception ("Task label is invalid due to disallowed characters. Label: '%s'" % (label))
+
+
+ def _startTaskManager(self) :
+ # start a new task manager if one isn't already running:
+ #
+ if (self._tman is not None) and (self._tman.isAlive()) : return
+ if not self._cdata().isTaskManagerException :
+ self._tman = TaskManager(self._cdata(), self._tdag)
+ self._tman.start()
+
+
+ def _notify(self, msg, logState) :
+ # msg is printed to log AND sent to any email or other requested
+ # notification systems:
+ self._flowLog(msg,logState)
+ self._cdata().emailNotification(msg, self._flowLog)
+
+
+ def _killWorkflow(self, errorMsg) :
+ self._notify(errorMsg,logState=LogState.ERROR)
+ self._shutdownAll(timeoutSec=10)
+ sys.exit(1)
+
+
+ def _shutdownAll(self, timeoutSec) :
+ # Try to shut down the task manager, all command-tasks,
+ # and all sub-workflow tasks.
+ #
+ if (self._tman is None) or (not self._tman.isAlive()) : return
+ StoppableThread.stopAll()
+ self._stopAllWorkflows()
+ self._tman.stop()
+ for _ in range(timeoutSec) :
+ time.sleep(1)
+ if not self._tman.isAlive() :
+ self._infoLog("Task shutdown complete")
+ return
+ self._infoLog("Task shutdown timed out")
+
+
+ def _cdata(self) :
+ # We're doing this convoluted setup only to avoid having a
+ # ctor for ease of use by the client. See what pyFlow goes
+ # through for you client code??
+ #
+ try:
+ return self._constantData
+ except AttributeError:
+ self._constantData = WorkflowRunnerThreadSharedData()
+ return self._constantData
+
+
+ # TODO: Better definition of the status thread shutdown at the end of a pyflow run to
+ # prevent race conditions -- ie. what happens if the status update is running while
+ # pyflow is shutting down? Every method called by the status updater should be safety
+ # checked wrt this issue.
+ #
+ def _runUpdate(self, runStatus) :
+ while True :
+ time.sleep(self.updateInterval * 60)
+
+ status = self._tdag.getTaskStatus()
+ isSpecComplete = (runStatus.isSpecificationComplete.isSet() and status.isAllSpecComplete)
+ report = []
+ report.append("===== " + self._whoami() + " StatusUpdate =====")
+ report.append("Workflow specification is complete?: %s" % (str(isSpecComplete)))
+ report.append("Task status (waiting/queued/running/complete/error): %i/%i/%i/%i/%i"
+ % (status.waiting, status.queued, status.running, status.complete, status.error))
+ report.append("Longest ongoing queued task time (hrs): %.4f" % (status.longestQueueSec / 3600.))
+ report.append("Longest ongoing queued task name: '%s'" % (status.longestQueueName))
+ report.append("Longest ongoing running task time (hrs): %.4f" % (status.longestRunSec / 3600.))
+ report.append("Longest ongoing running task name: '%s'" % (status.longestRunName))
+
+ report = [ "[StatusUpdate] " + line for line in report ]
+ self._infoLog(report)
+
+ # Update interval is also an appropriate interval to dump a stack-trace of all active
+ # threads. This is a useful post-mortem in the event of a large class of hang/deadlock
+ # errors:
+ #
+ stackDumpFp = open(self._cdata().stackDumpLogFile, "a")
+
+ # create one fully decorated line in the stack dump file as a prefix to the report:
+ linePrefixOut = "[%s] [StackDump]" % (self._cdata().getRunid())
+ ofpList = [stackDumpFp]
+ log(ofpList, "Initiating stack dump for all threads", linePrefixOut)
+
+ stackDump(stackDumpFp)
+ hardFlush(stackDumpFp)
+ stackDumpFp.close()
+
+
+
+ def _runWorkflow(self, param) :
+ #
+ # Primary workflow logic when nothing goes wrong:
+ #
+ self._setupWorkflow(param)
+ self._initMessage()
+
+ runStatus = RunningTaskStatus(self._tdag.isFinishedEvent)
+
+ # start status update reporter:
+ #
+ # TODO: stop this thread at end of run
+ #
+ if(self.updateInterval > 0) :
+ hb = threading.Thread(target=WorkflowRunner._runUpdate, args=(self, runStatus))
+ hb.setDaemon(True)
+ hb.setName("StatusUpdate-Thread")
+ hb.start()
+
+ # run workflow() function on a separate thread, using exactly
+ # the same method we use for sub-workflows:
+ #
+ # TODO: move the master workflow further into the code path used by sub-workflows,
+ # so that we aren't replicating polling and error handling code in this function:
+ #
+ trun = WorkflowTaskRunner(runStatus, "masterWorkflow", self, self._cdata().flowLog, None)
+ trun.start()
+ # can't join() because that blocks SIGINT
+ ewaiter = ExpWaiter(1, 1.7, 15,runStatus.isComplete)
+ while True :
+ if not trun.isAlive() : break
+ ewaiter.wait()
+
+ if not runStatus.isComplete.isSet() :
+ # if not complete then we don't know what happened, very bad!:
+ runStatus.errorCode = 1
+ runStatus.errorMessage = "Thread: '%s', has stopped without a traceable cause" % (trun.getName())
+
+ return self._evalWorkflow(runStatus)
+
+
+ def _setupWorkflow(self, param) :
+ cdata = self._cdata()
+
+ # setup instance user parameters:
+ cdata.setupNewRun(param)
+
+ # setup other instance data:
+ self._tdag = TaskDAG(cdata.param.isContinue, cdata.param.isForceContinue, cdata.param.isDryRun,
+ cdata.taskInfoFile, cdata.taskStateFile, cdata.param.workflowClassName,
+ cdata.param.startFromTasks, cdata.param.ignoreTasksAfter, cdata.param.resetTasks,
+ self._flowLog)
+ self._tman = None
+
+ def backupFile(inputFile) :
+ """
+ backup old state files if they exist
+ """
+ if not os.path.isfile(inputFile) : return
+ fileDir = os.path.dirname(inputFile)
+ fileName = os.path.basename(inputFile)
+ backupDir = os.path.join(fileDir, "backup")
+ ensureDir(backupDir)
+ backupFileName = fileName + ".backup_before_starting_run_%s.txt" % (cdata.getRunid())
+ backupFile = os.path.join(backupDir, backupFileName)
+ shutil.copyfile(inputFile, backupFile)
+
+ backupFile(cdata.taskStateFile)
+ backupFile(cdata.taskInfoFile)
+
+ if cdata.param.isContinue :
+ self._setupContinuedWorkflow()
+
+
+
+ def _createContinuedStateFile(self) :
+ #
+ # create continued version of task state file
+ #
+
+ cdata = self._cdata()
+ if not os.path.isfile(cdata.taskStateFile) : return set()
+
+ tmpFile = cdata.taskStateFile + ".update.incomplete"
+ tmpfp = open(tmpFile, "w")
+ tmpfp.write(taskStateHeader())
+ complete = set()
+ for words in taskStateParser(cdata.taskStateFile) :
+ (runState, errorCode) = words[2:4]
+ if (runState != "complete") or (int(errorCode) != 0) : continue
+ tmpfp.write("\t".join(words) + "\n")
+ (label, namespace) = words[0:2]
+ complete.add(namespaceJoin(namespace, label))
+
+ tmpfp.close()
+ forceRename(tmpFile, cdata.taskStateFile)
+ return complete
+
+
+ def _createContinuedInfoFile(self, complete) :
+ #
+ # create continued version of task info file
+ #
+
+ cdata = self._cdata()
+ if not os.path.isfile(cdata.taskInfoFile) : return
+
+ tmpFile = cdata.taskInfoFile + ".update.incomplete"
+ tmpfp = open(tmpFile, "w")
+ tmpfp.write(taskInfoHeader())
+ for words in taskInfoParser(cdata.taskInfoFile) :
+ (label, namespace, ptype, nCores, memMb, priority, isForceLocal, depStr, cwdStr, command) = words
+ fullLabel = namespaceJoin(namespace, label)
+ if fullLabel not in complete : continue
+ tmpfp.write("\t".join(words) + "\n")
+ if ptype == "command" :
+ if command == "" : command = None
+ payload = CmdPayload(fullLabel, Command(command, cwdStr), int(nCores), int(memMb), int(priority), argToBool(isForceLocal))
+ elif ptype == "workflow" :
+ payload = WorkflowPayload(None)
+ else : assert 0
+
+ self._tdag.addTask(namespace, label, payload, getTaskInfoDepSet(depStr), isContinued=True)
+
+ tmpfp.close()
+ forceRename(tmpFile, cdata.taskInfoFile)
+
+
+
+ def _setupContinuedWorkflow(self) :
+ # reduce both state files to completed states only.
+ complete = self._createContinuedStateFile()
+ self._createContinuedInfoFile(complete)
+
+
+
+ def _initMessage(self) :
+ param = self._cdata().param # shortcut
+ msg = ["Initiating pyFlow run"]
+ msg.append("pyFlowClientWorkflowClass: %s" % (param.workflowClassName))
+ msg.append("pyFlowVersion: %s" % (__version__))
+ msg.append("pythonVersion: %s" % (pythonVersion))
+ msg.append("WorkingDir: '%s'" % (self._cdata().cwd))
+ msg.append("ProcessCmdLine: '%s'" % (cmdline()))
+
+ parammsg = ["mode: %s" % (param.mode),
+ "nCores: %s" % (str(param.nCores)),
+ "memMb: %s" % (str(param.memMb)),
+ "dataDir: %s" % (str(param.dataDir)),
+ "isDryRun: %s" % (str(param.isDryRun)),
+ "isContinue: %s" % (str(param.isContinue)),
+ "isForceContinue: %s" % (str(param.isForceContinue)),
+ "mailTo: '%s'" % (",".join(param.mailTo))]
+ for i in range(len(parammsg)):
+ parammsg[i] = "[RunParameters] " + parammsg[i]
+ msg += parammsg
+ self._infoLog(msg)
+
+
+
+ def _getTaskErrorsSummaryMsg(self, isForceTaskHarvest=False) :
+ # isForceHarvest means we try to force an update of the shared
+ # taskError information in case this thread is ahead of the
+ # task manager.
+ if isForceTaskHarvest :
+ if (self._tman is not None) and (self._tman.isAlive()) :
+ self._tman.harvestTasks()
+
+ if not self._cdata().isTaskError() : return []
+ # this case has already been emailed in the TaskManager @ first error occurrence:
+ msg = ["Worklow terminated due to the following task errors:"]
+ for task in self._cdata().taskErrors :
+ msg.extend(task.getTaskErrorMsg())
+ return msg
+
+
+ def _evalWorkflow(self, masterRunStatus) :
+
+ isError = False
+ if self._cdata().isTaskError() :
+ msg = self._getTaskErrorsSummaryMsg()
+ self._errorLog(msg)
+ isError = True
+
+ if masterRunStatus.errorCode != 0 :
+ eMsg = lister(masterRunStatus.errorMessage)
+ if (len(eMsg) > 1) or (len(eMsg) == 1 and eMsg[0] != "") :
+ msg = ["Failed to complete master workflow, error code: %s" % (str(masterRunStatus.errorCode))]
+ msg.append("errorMessage:")
+ msg.extend(eMsg)
+ self._notify(msg,logState=LogState.ERROR)
+ isError = True
+
+ if self._cdata().isTaskManagerException :
+ # this case has already been emailed in the TaskManager:
+ self._errorLog("Workflow terminated due to unhandled exception in TaskManager")
+ isError = True
+
+ if (not isError) and (not self._tdag.isRunComplete()) :
+ msg = "Workflow terminated with unknown error condition"
+ self._notify(msg,logState=LogState.ERROR)
+ isError = True
+
+ if isError: return 1
+
+ elapsed = int(time.time() - self.runStartTimeStamp)
+ msg = []
+ if self._cdata().param.successMsg is not None :
+ msg.extend([self._cdata().param.successMsg,""])
+ msg.extend(["Workflow successfully completed all tasks",
+ "Elapsed time for full workflow: %s sec" % (elapsed)])
+ self._notify(msg,logState=LogState.INFO)
+ return 0
+
+
+ def _requireInWorkflow(self) :
+ """
+ check that the calling method is being called as part of a pyflow workflow() method only
+ """
+ if not self._getRunning():
+ raise Exception("Method must be a (call stack) descendant of WorkflowRunner workflow() method (via run() method)")
+
+
+ def _initRunning(self):
+ try :
+ assert(self._isRunning >= 0)
+ except AttributeError :
+ self._isRunning = 0
+
+ @lockMethod
+ def _setRunning(self, isRunning) :
+ self._initRunning()
+ if isRunning :
+ self._isRunning += 1
+ else :
+ self._isRunning -= 1
+
+ @lockMethod
+ def _getRunning(self) :
+ self._initRunning()
+ return (self._isRunning > 0)
+
+
+
+if __name__ == "__main__" :
+ help(WorkflowRunner)
+
diff --git a/pyflow/src/pyflowConfig.py b/pyflow/src/pyflowConfig.py
new file mode 100644
index 0000000..2e5b16d
--- /dev/null
+++ b/pyflow/src/pyflowConfig.py
@@ -0,0 +1,213 @@
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+pyflowConfig
+
+This file defines a class instance 'siteConfig' containing pyflow components
+which are the most likely to need site-specific configuration.
+"""
+
+import os
+
+
+# this is the only object pyflow needs to import, it
+# is defined at the end of this module:
+#
+siteConfig = None
+
+
+# depending on network setup getfqdn() can be slow, so cache calls to this function here:
+#
+def _getHostName() :
+ import socket
+ return socket.getfqdn()
+
+cachedHostName = None
+
+
+def getHostName() :
+ global cachedHostName
+ if cachedHostName is None :
+ cachedHostName = _getHostName()
+ return cachedHostName
+
+
+def getDomainName() :
+ hn = getHostName().split(".")
+ if len(hn) > 1 : hn = hn[1:]
+ return ".".join(hn)
+
+
+
+class DefaultSiteConfig(object) :
+ """
+ Default configuration settings are designed to work with as
+ many sites as technically feasible
+ """
+
+ # All email notifications come from the following e-mail address
+ #
+ mailFrom = "pyflow-bot@" + getDomainName()
+
+ # Default memory (in megabytes) requested by each command task:
+ #
+ defaultTaskMemMb = 2048
+
+ # In local run mode, this is the defalt memory per thread that we
+ # assume is available:
+ #
+ defaultHostMemMbPerCore = 2048
+
+ # maximum number of jobs which can be submitted to sge at once:
+ #
+ # allowed values are "unlimited" or None for unlimited jobs, or
+ # a positive integer describing the max number of jobs
+ #
+ maxSGEJobs = 128
+
+ # both getHostName and getDomainName are used in the
+ # siteConfig factory, so these are not designed to be
+ # overridden at present:
+ getHostName = staticmethod(getHostName)
+ getDomainName = staticmethod(getDomainName)
+
+ @classmethod
+ def qsubResourceArg(cls, nCores, memMb) :
+ """
+ When a task is launched using qsub in sge mode, it will call this
+ function to specify the requested number of threads and megabytes
+ of memory. The returned argument list will be appended to the qsub
+ arguments.
+
+ nCores -- number of threads requested
+ memMb -- memory requested (in megabytes)
+ """
+ nCores = int(nCores)
+ memMb = int(memMb)
+ return cls._qsubResourceArgConfig(nCores, memMb)
+
+ @classmethod
+ def _qsubResourceArgConfig(cls, nCores, memMb) :
+ """
+ The default function is designed for maximum
+ portability -- it just provides more memory
+ via more threads.
+ """
+
+ # this is the memory we assume is available per
+ # thread on the cluster:
+ #
+ class Constants(object) : megsPerCore = 4096
+
+ memCores = 1 + ((memMb - 1) / Constants.megsPerCore)
+
+ qsubCores = max(nCores, memCores)
+
+ if qsubCores <= 1 : return []
+ return ["-pe", "threaded", str(qsubCores)]
+
+
+ @classmethod
+ def getSgeMakePrefix(cls, nCores, memMb, schedulerArgList) :
+ """
+ This prefix will be added to ' -C directory', and run from
+ a local process to handle sge make jobs.
+
+ Note that memMb hasn't been well defined for make jobs yet,
+ is it the per task memory limit? The first application to
+ accually make use of this will have to setup the convention,
+ it is ignored right now...
+ """
+ nCores = int(nCores)
+ memMb = int(memMb)
+
+ retval = ["qmake",
+ "-V",
+ "-now", "n",
+ "-cwd",
+ "-N", "pyflowMakeTask"]
+
+ # user arguments to run() (usually q specification:
+ retval.extend(schedulerArgList)
+
+ #### use qmake parallel environment:
+ # retval.extend(["-pe","make",str(nCores),"--"])
+
+ #### ...OR use 'dynamic' sge make environment:
+ retval.extend(["--", "-j", str(nCores)])
+
+ return retval
+
+
+
+def getEnvVar(key) :
+ if key in os.environ : return os.environ[key]
+ return None
+
+
+
+class hvmemSGEConfig(DefaultSiteConfig) :
+ """
+ This config assumes 'h_vmem' is defined on the SGE instance
+
+ """
+
+ @classmethod
+ def _qsubResourceArgConfig(cls, nCores, memMb) :
+
+ retval = []
+
+ # specify memory requirements
+ memGb = 1 + ((memMb - 1) / 1024)
+ reqArg = "h_vmem=%iG" % (memGb)
+ retval.extend(["-l", reqArg])
+
+ if nCores > 1 :
+ retval.extend(["-pe", "threaded", str(nCores)])
+
+ return retval
+
+
+
+#
+# final step is the selection of this run's siteConfig object:
+#
+
+def siteConfigFactory() :
+ # TODO: add an import time test to determine if h_vmem is valid
+ return hvmemSGEConfig
+
+
+siteConfig = siteConfigFactory()
+
+
diff --git a/pyflow/src/pyflowTaskWrapper.py b/pyflow/src/pyflowTaskWrapper.py
new file mode 100644
index 0000000..2387818
--- /dev/null
+++ b/pyflow/src/pyflowTaskWrapper.py
@@ -0,0 +1,338 @@
+#
+# pyFlow - a lightweight parallel task engine
+#
+# Copyright (c) 2012-2015 Illumina, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+"""
+This script wraps workflow tasks for execution on local or remote
+hosts. It is responsible for adding log decorations to task's stderr
+output (which is diverted to a file), and writing task state transition
+and error information to the wrapper's stderr, which becomes the
+task's 'signal' file from pyflow's perspective. The signal file is
+used to determine task exit status, total runtime, and queue->run
+state transition when pyflow is run in SGE mode.
+"""
+
+import datetime
+import os
+import subprocess
+import sys
+import time
+
+
+scriptName = "pyflowTaskWrapper.py"
+
+
+def getTracebackStr() :
+ import traceback
+ return traceback.format_exc()
+
+
+def getExceptionMsg() :
+ return ("[ERROR] Unhandled Exception in pyflowTaskWrapper\n" + getTracebackStr())
+
+
+def timeStampToTimeStr(ts) :
+ """
+ converts timeStamp (time.time()) output to timeStr
+ """
+ return datetime.datetime.utcfromtimestamp(ts).isoformat()
+
+def timeStrNow():
+ return timeStampToTimeStr(time.time())
+
+def hardFlush(ofp):
+ ofp.flush()
+ if ofp.isatty() : return
+ os.fsync(ofp.fileno())
+
+def isWindows() :
+ import platform
+ return (platform.system().find("Windows") > -1)
+
+class SimpleFifo(object) :
+ """
+ Store up to last N objects, not thread safe.
+ Note extraction does not follow any traditional fifo interface
+ """
+
+ def __init__(self, size) :
+ self._size = int(size)
+ assert (self._size > 0)
+ self._data = [None] * self._size
+ self._head = 0
+ self._occup = 0
+ self._counter = 0
+
+
+ def count(self) :
+ """
+ Get the total number of adds for this fifo
+ """
+ return self._counter
+
+
+ def add(self, obj) :
+ """
+ add obj to fifo, and return obj for convenience
+ """
+ self._data[self._head] = obj
+ self._counter += 1
+ if self._occup < self._size : self._occup += 1
+ self._head += 1
+ if self._head == self._size : self._head = 0
+ assert (self._head < self._size)
+ return obj
+
+
+ def get(self) :
+ """
+ return an array of the fifo contents
+ """
+ retval = []
+ current = (self._head + self._size) - self._occup
+ for _ in range(self._occup) :
+ while current >= self._size : current -= self._size
+ retval.append(self._data[current])
+ current += 1
+ return retval
+
+
+
+class StringBling(object) :
+ def __init__(self, runid, taskStr) :
+ def getHostName() :
+ import socket
+ # return socket.gethostbyaddr(socket.gethostname())[0]
+ return socket.getfqdn()
+
+ self.runid = runid
+ self.taskStr = taskStr
+ self.hostname = getHostName()
+
+ def _writeMsg(self, ofp, msg, taskStr, writeFilter=lambda x: x) :
+ """
+ log a possibly multi-line message with decoration:
+ """
+ prefix = "[%s] [%s] [%s] [%s] " % (timeStrNow(), self.hostname, self.runid, taskStr)
+ if msg[-1] == "\n" : msg = msg[:-1]
+ for line in msg.split("\n") :
+ ofp.write(writeFilter(prefix + line + "\n"))
+ hardFlush(ofp)
+
+
+ def transfer(self, inos, outos, writeFilter=lambda x: x):
+ """
+ This function is used to decorate the stderr stream from the launched task itself
+ """
+ #
+ # write line-read loop this way to workaround python bug:
+ # http://bugs.python.org/issue3907
+ #
+ while True:
+ line = inos.readline()
+ if not line: break
+ self._writeMsg(outos, line, self.taskStr, writeFilter)
+
+ def wrapperLog(self, log_os, msg) :
+ """
+ Used by the wrapper to decorate each msg line with a prefix. The decoration
+ is similar to that for the task's own stderr, but we prefix the task with
+ 'pyflowTaskWrapper' to differentiate the source.
+ """
+ self._writeMsg(log_os, msg, "pyflowTaskWrapper:" + self.taskStr)
+
+
+
+def getParams(paramsFile) :
+ import pickle
+
+ paramhash = pickle.load(open(paramsFile))
+ class Params : pass
+ params = Params()
+ for (k, v) in paramhash.items() : setattr(params, k, v)
+ return params
+
+
+
+def main():
+
+ usage = """
+
+Usage: %s runid taskid parameter_pickle_file
+
+The parameter pickle file contains all of the task parameters required by the wrapper
+
+""" % (scriptName)
+
+ def badUsage(msg=None) :
+ sys.stderr.write(usage)
+ if msg is not None :
+ sys.stderr.write(msg)
+ exitval = 1
+ else:
+ exitval = 2
+ hardFlush(sys.stderr)
+ sys.exit(exitval)
+
+ def checkExpectArgCount(expectArgCount) :
+ if len(sys.argv) == expectArgCount : return
+ badUsage("Incorrect argument count, expected: %i observed: %i\n" % (expectArgCount, len(sys.argv)))
+
+
+ runid = "unknown"
+ taskStr = "unknown"
+
+ if len(sys.argv) > 2 :
+ runid = sys.argv[1]
+ taskStr = sys.argv[2]
+
+ bling = StringBling(runid, taskStr)
+
+ # send a signal for wrapper start as early as possible to help ensure hostname is logged
+ pffp = sys.stderr
+ bling.wrapperLog(pffp, "[wrapperSignal] wrapperStart")
+
+ checkExpectArgCount(4)
+
+ picklefile = sys.argv[3]
+
+ # try multiple times to read the argument file in case of NFS delay:
+ #
+ retryDelaySec = 30
+ maxTrials = 3
+ for _ in range(maxTrials) :
+ if os.path.exists(picklefile) : break
+ time.sleep(retryDelaySec)
+
+ if not os.path.exists(picklefile) :
+ badUsage("First argument does not exist: " + picklefile)
+
+ if not os.path.isfile(picklefile) :
+ badUsage("First argument is not a file: " + picklefile)
+
+ # add another multi-trial loop on the pickle load operation --
+ # on some filesystems the file can appear to exist but not
+ # be fully instantiated yet:
+ #
+ for t in range(maxTrials) :
+ try :
+ params = getParams(picklefile)
+ except :
+ if (t+1) == maxTrials :
+ raise
+ time.sleep(retryDelaySec)
+ continue
+ break
+
+ if params.cmd is None :
+ badUsage("Invalid TaskWrapper input: task command set to NONE")
+
+ if params.cwd == "" or params.cwd == "None" :
+ params.cwd = None
+
+ toutFp = open(params.outFile, "a")
+ terrFp = open(params.errFile, "a")
+
+ # always keep last N lines of task stderr:
+ fifo = SimpleFifo(20)
+
+ isWin=isWindows()
+
+ # Present shell as arg list with Popen(shell=False), so that
+ # we minimize quoting/escaping issues for 'cmd' itself:
+ #
+ fullcmd = []
+ if (not isWin) and params.isShellCmd :
+ # TODO shell selection should be configurable somewhere:
+ shell = ["/bin/bash", "--noprofile", "-o", "pipefail"]
+ fullcmd = shell + ["-c", params.cmd]
+ else :
+ fullcmd = params.cmd
+
+ retval = 1
+
+ isShell=isWin
+
+ try:
+ startTime = time.time()
+ bling.wrapperLog(pffp, "[wrapperSignal] taskStart")
+ # turn off buffering so that stderr is updated correctly and its timestamps
+ # are more accurate:
+ # TODO: is there a way to do this for stderr only?
+ proc = subprocess.Popen(fullcmd, stdout=toutFp, stderr=subprocess.PIPE, shell=isShell, bufsize=1, cwd=params.cwd, env=params.env)
+ bling.transfer(proc.stderr, terrFp, fifo.add)
+ retval = proc.wait()
+
+ elapsed = (time.time() - startTime)
+
+ # communication back to pyflow:
+ bling.wrapperLog(pffp, "[wrapperSignal] taskExitCode %i" % (retval))
+
+ # communication to human-readable log:
+ msg = "Task: '%s' exit code: '%i'" % (taskStr, retval)
+ bling.wrapperLog(terrFp, msg)
+
+ if retval == 0 :
+ # communication back to pyflow:
+ bling.wrapperLog(pffp, "[wrapperSignal] taskElapsedSec %i" % (int(elapsed)))
+
+ # communication to human-readable log:
+ msg = "Task: '%s' complete." % (taskStr)
+ msg += " elapsedSec: %i" % (int(elapsed))
+ msg += " elapsedCoreSec: %i" % (int(elapsed * params.nCores))
+ msg += "\n"
+ bling.wrapperLog(terrFp, msg)
+ else :
+ # communication back to pyflow:
+ tailMsg = fifo.get()
+ bling.wrapperLog(pffp, "[wrapperSignal] taskStderrTail %i" % (1 + len(tailMsg)))
+ pffp.write("Last %i stderr lines from task (of %i total lines):\n" % (len(tailMsg), fifo.count()))
+ for line in tailMsg :
+ pffp.write(line)
+ hardFlush(pffp)
+
+
+ except KeyboardInterrupt:
+ msg = "[ERROR] Keyboard Interupt, shutting down task."
+ bling.wrapperLog(terrFp, msg)
+ sys.exit(1)
+ except:
+ msg = getExceptionMsg()
+ bling.wrapperLog(terrFp, msg)
+ raise
+
+ sys.exit(retval)
+
+
+
+if __name__ == "__main__" :
+ main()
+
diff --git a/scratch/README.txt b/scratch/README.txt
new file mode 100644
index 0000000..d6a10b6
--- /dev/null
+++ b/scratch/README.txt
@@ -0,0 +1,18 @@
+
+
+notes/
+
+random design/development notes
+
+
+pybox/
+
+test code snippets
+
+
+
+test/
+
+unit and integration test scripts
+
+
diff --git a/scratch/delete_trailing_wspace.bash b/scratch/delete_trailing_wspace.bash
new file mode 100755
index 0000000..47cf8da
--- /dev/null
+++ b/scratch/delete_trailing_wspace.bash
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+#
+# clean trailing w/s from pyflow source
+#
+# pretty hacky script... probably best to avoid running this if you have a lot of uncommitted changes
+#
+
+set -o nounset
+
+scriptdir=$(cd $(dirname $0); pwd -P)
+
+
+get_source() {
+ find $scriptdir/../pyflow -type f \
+ -name "*.bash" -or \
+ -name "*.py"
+}
+
+tempfile=$(mktemp)
+
+for f in $(get_source); do
+ echo "checking: $f"
+ cat $f |\
+ sed 's/[ ]*$//' >|\
+ $tempfile
+
+ if ! diff $tempfile $f > /dev/null; then
+ mv -f $tempfile $f
+ else
+ rm -f $tempfile
+ fi
+done
diff --git a/scratch/make_release_tarball.bash b/scratch/make_release_tarball.bash
new file mode 100755
index 0000000..5e36955
--- /dev/null
+++ b/scratch/make_release_tarball.bash
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+#
+# this script makes the pyflow release tarball assuming it's being called in the git repo
+# already checked out to the targeted version
+#
+# the tarball is written to the callers cwd
+#
+
+set -o nounset
+set -o xtrace
+set -o errexit
+
+pname_root=""
+if [ $# -gt 1 ]; then
+ echo "usage: $0 [rootname]"
+ exit 2
+elif [ $# == 1 ]; then
+ pname_root=$1
+fi
+
+
+get_abs_path() {
+ (cd $1; pwd -P)
+}
+
+
+script_dir=$(get_abs_path $(dirname $0))
+outdir=$(pwd)
+echo $outdir
+
+cd $script_dir
+echo $script_dir
+gitversion=$(git describe | sed s/^v//)
+
+if [ "$pname_root" == "" ]; then
+ pname_root=pyflow-$gitversion
+fi
+
+pname=$outdir/$pname_root
+
+cd ..
+
+# use archive instead of copy so that we clean up any tmp files in the working directory:
+git archive --prefix=$pname_root/ HEAD:pyflow/ | tar -x -C $outdir
+
+# make version number substitutions:
+cat pyflow/src/pyflow.py |\
+sed "s/pyflowAutoVersion = None/pyflowAutoVersion = \"$gitversion\"/" >|\
+$pname/src/pyflow.py
+
+cat pyflow/README.md |\
+sed "s/\${VERSION}/$gitversion/" >|\
+$pname/README.md
+
+cat pyflow/setup.py |\
+sed "s/\${VERSION}/$gitversion/" >|\
+$pname/setup.py
+
+chmod +x $pname/src/pyflow.py
+
+cd $outdir
+tar -cz $pname_root -f $pname.tar.gz
+rm -rf $pname
+
diff --git a/scratch/notes/design.notes b/scratch/notes/design.notes
new file mode 100644
index 0000000..c7d38a1
--- /dev/null
+++ b/scratch/notes/design.notes
@@ -0,0 +1,74 @@
+1.Get example task file and launch command.
+
+
+launch cmd:
+"""
+/illumina/software/casava/CASAVA-1.8.2/bin/taskServer.pl --tasksFile=/illumina/builds/lox/Saturn/Saturn1_BB0065ACXX_builds/temp_build/tasks.21_09_49_26_01_12.txt --host=ukch-dev-lndt01 --mode=sge
+
+/illumina/software/casava/CASAVA-1.8.2/bin/taskServer.pl --tasksFile=/illumina/builds/lox/Saturn/Saturn1_BB0065ACXX_builds/temp_build/tasks.21_09_49_26_01_12.txt --host=localhost --jobsLimit=1 --mode=local
+"""
+
+
+new task specification file:
+xml
+contains tasks and edges
+no special checkpoints anymore, these are just tasks without commands
+a separate "status" file associates a state with each task
+
+
+OR: new task specification script:
+perl
+too much change at once
+
+dynamic_task_manager:
+w=WorkflowClass(config)
+w.run(filename)
+s.init(mode="local|sge",
+ ncores=X|inf,
+ workflow_file_prefix,
+ is_continue=0|1)
+s.add_task(label,description,command);
+s.add_dependency(label,label2,is_optional);
+s.close()
+
+
+dynamic task manager:
+workflow_dir is used to write the stdout and stderr log, as well as the status file
+prefix/runid.stderr.log
+prefix/runid.stdout.log
+prefix/runid.taskstatus.txt
+prefix/taskstatus.txt
+prefix/workflow_run_history.txt
+
+
+s.add_task(label,command,n_cores,[task_dep_list])
+s.add_task(label,command,n_cores,[task_dep_list])
+
+
+Error policy:
+Stop launching new jobs. Record total number of errors and write this on final log line.
+write_to_file: dir/tasks.error.txt
+
+Logs (all append only):
+# all messages from the workflow engine itself:
+dir/logs/workflow_log.txt
+# all messages from task, including the task wrapper:
+dir/logs/tasks_stderr_log.txt
+dir/logs/tasks_stdout_log.txt
+
+persistence data:
+# record of all data supplied in each add-task call:
+dir/tasks.info.txt (unique append-only)
+dir/task.history.txt
+
+convenience:
+dir/tasks.corehourtime.txt (append-only)
+dir/tasks.dot (refresh at complete workflow specification)
+
+
+if isContinue:
+1) read in state files and reconstruct data structures from these for complete tasks only, set a new isContinued bit, which persists until the new workflow confirms it with an addTask(). An isContinued task cannot be run, but this doesn't matter sense these are complete tasks only.
+Complete tasks must match their original descriptions, but all other tasks can change
+2) use these to verify and reassign runstate for completed tasks only
+
+
diff --git a/scratch/notes/todo b/scratch/notes/todo
new file mode 100644
index 0000000..59492b8
--- /dev/null
+++ b/scratch/notes/todo
@@ -0,0 +1,53 @@
+pyflow:
+switch to standard python logging module
+summarize total warnings and errors at end of run -- have a per run log folder to store this stuff
+abiltiy to change global defaults at the start of a script.
+add timeouts
+add a callable task, to allow delayed execution of pure python (how to prevent user from doing intense computation on this task?)
+allow file task dependencies
+specify maxcpuCores -- communicate back through an env variable
+better run_num bump
+allow tasks to specify a set of output files, to make nfs tracking better
+allow filenames as task ids
+
+bagPipes:
+check/warning for default sample sheet case
+Java mem grower? -- scan for "java.lang.OutOfMemoryError" and add more memory?
+
+****Add logging for events expected to be rare: any qstat timeout/retry
+****Good system to detect two pyflow jobs trying to use the same pyflow.data directory
+****Get git describe into pyflow automatically
+****version number management
+****Pyflow use two layers of tmp folders
+****Provide last ?10? lines of task stderr on notification when a task fails
+****turn on thread mem reduction
+****Don't write graph on update -- instead provide a script to generate this.
+****setup taskWrapper->parent communication via stderr
+****add something more 'psubmake-ish' for the makefile handling
+****mode change should not interfere with continuation (make/qmake)
+****send cmd as arg list in addition to string
+****resource management for memory (on local at least)
+****specify a mapping function between resources and SGE qsub phrases -- allow this to be overridden for different SGE configurations.
+****add subworkflows as tasks (this goes in a subworkflow namespace)
+****waitForTask() right now blocks the specification of all other non-dependent tasks. Think of a scheme to get around this -- threads in the worklow function?
+****color graph
+****write dot file (on command?, automatically at end of workflow specification?)
+****add run parameters to log
+****add public log function
+****add exception notification email
+****make error notifacation email more robust
+****email events: onComplete, onFirstTaskError
+****create dryrun mode, include runMode() in interface
+****start working on persistence
+****start developing total task time methods
+****add task retry
+****rename file and git to 'pyflow'
+****add forceLocal flag to enable qmake/make runs
+****start working on SGE
+****put task stdout/stderr somewhere
+****write most workflow exceptions to workflow_log
+****check task names for spaces
+****should jobs be killed on ctrl-c?
+****start developing error handle/report polices
+****check that subprocess is correct for intense tasks
+****fix task4 bug
diff --git a/scratch/pybox/email_test.py b/scratch/pybox/email_test.py
new file mode 100644
index 0000000..19efdcb
--- /dev/null
+++ b/scratch/pybox/email_test.py
@@ -0,0 +1,29 @@
+
+import smtplib
+from email.MIMEText import MIMEText
+
+def getHostName() :
+ import socket
+ #return socket.gethostbyaddr(socket.gethostname())[0]
+ return socket.getfqdn()
+
+def getDomainName() :
+ "maybe this isn't the technical term -- this is just the hostname - the host"
+ hn=getHostName().split(".")
+ if len(hn)>1 : hn=hn[1:]
+ return ".".join(hn)
+
+
+me = "pyflow-bot@"+getDomainName()
+to = "csaunders at illumina.com"
+
+msg=MIMEText("foo foo")
+msg["Subject"] = "pyFlow: job: XXX complete"
+msg["From"] = me
+msg["To"] = to
+
+msg.as_string()
+
+s=smtplib.SMTP('localhost')
+s.sendmail(me,to,msg.as_string())
+s.quit()
diff --git a/scratch/pybox/hijack.py b/scratch/pybox/hijack.py
new file mode 100644
index 0000000..bacc9fc
--- /dev/null
+++ b/scratch/pybox/hijack.py
@@ -0,0 +1,25 @@
+
+
+class A :
+ def __init__(self) :
+ self.x = 1
+
+ def inc(self) :
+ self.x += 1
+
+
+a = A()
+b = A()
+
+a.inc()
+b.inc()
+
+
+# hijack:
+b.inc = a.inc
+
+b.inc()
+
+print "a", a.x
+print "b", b.x
+
diff --git a/scratch/pybox/inspect.py b/scratch/pybox/inspect.py
new file mode 100644
index 0000000..2bc960c
--- /dev/null
+++ b/scratch/pybox/inspect.py
@@ -0,0 +1,7 @@
+
+def f(x) :
+ return x + 2
+
+import inspect
+print inspect.getsource(f)
+
diff --git a/scratch/pybox/memTest.py b/scratch/pybox/memTest.py
new file mode 100755
index 0000000..9836374
--- /dev/null
+++ b/scratch/pybox/memTest.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+#
+# This demo shows possibly the simplist possible pyflow we can create --
+# a single 'hello world' task. After experimenting with this file
+# please see the 'simpleDemo' for coverage of a few more pyflow features
+#
+
+import os.path
+import sys
+
+# add module path by hand
+#
+sys.path.append(os.path.abspath(os.path.dirname(__file__))+"/../pyflow/src")
+
+
+from pyflow import WorkflowRunner
+
+
+# all pyflow workflows are written into classes derived from pyflow.WorkflowRunner:
+#
+class MemTestWorkflow(WorkflowRunner) :
+
+ # a workflow is defined by overloading the WorkflowRunner.workflow() method:
+ #
+ def workflow(self) :
+
+ # The following is our first and only task for this workflow:
+ self.addTask("easy_task1","echo 'Hello World!'")
+ self.addTask("easy_task2","echo 'Hello World!'")
+ self.addTask("easy_task3","echo 'Hello World!'")
+ self.addTask("easy_task4","echo 'Hello World!'",memMb=1)
+
+
+
+# Instantiate the workflow
+#
+wflow = MemTestWorkflow()
+
+# Run the worklow:
+#
+retval=wflow.run(nCores=8,memMb=2049)
+
+# done!
+sys.exit(retval)
+
diff --git a/scratch/test/README.md b/scratch/test/README.md
new file mode 100644
index 0000000..956bb51
--- /dev/null
+++ b/scratch/test/README.md
@@ -0,0 +1,30 @@
+
+## pyflow test scripts
+
+### Global test scripts
+
+The new global test script maintained for *nix and windows is:
+
+ test_pyflow.py
+
+
+The previous global test script written for *nix only is:
+
+ test_release_tarball.bash
+
+
+...this currently contains more tests, and will still be the test target for
+travis until windows support is complete.
+
+
+### Component test scripts
+
+* pyflow_unit_tests.py - all pyflow unit tests
+
+* pyflow_basic_feature_runner.py - runs a number of pyflow operations for
+ local or sge modes
+
+* demos - Running through the various small demo scripts and making sure they
+ complete without error is used to round out the full test process. Most demo
+ scripts are linux-only at this point.
+
diff --git a/scratch/test/pyflow_basic_feature_runner.py b/scratch/test/pyflow_basic_feature_runner.py
new file mode 100755
index 0000000..ecbb32e
--- /dev/null
+++ b/scratch/test/pyflow_basic_feature_runner.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+"""
+this is a script which runs a lot of features...
+it does not provide test coverage as to whether everything
+ran correctly... it will only pick up a basic crash or hang.
+"""
+
+import os.path
+import sys
+
+# bad example of how to add the path:
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+
+
+# setup PYTHONPATH instead...
+
+#sys.path.append(scriptDir+"/../pyflow/src")
+
+from pyflow import WorkflowRunner
+
+
+testJobDir=os.path.join(scriptDir,"testtasks")
+
+sleepjob=os.path.join(testJobDir,"sleeper.bash")
+yelljob=os.path.join(testJobDir,"yeller.bash")
+runjob=os.path.join(testJobDir,"runner.bash")
+
+class SubSubWorkflow(WorkflowRunner) :
+
+ def workflow(self) :
+ self.addTask("easy_task1",yelljob+" 1")
+ self.addTask("easy_task2",runjob+" 2",nCores=3,dependencies=["easy_task1"])
+ self.waitForTasks("easy_task2")
+ self.addTask("easy_task3",runjob+" 2",nCores=3,dependencies=["easy_task2"])
+ # intentional fail:
+ #self.addTask("easy_task3b",runjob,dependencies=["easy_task2"])
+
+
+class SubWorkflow(WorkflowRunner) :
+
+ def workflow(self) :
+ self.addTask("easy_task1",yelljob+" 1")
+ self.addTask("easy_task2",runjob+" 2",nCores=3,dependencies=["easy_task1"])
+ self.addTask("easy_task3",runjob+" 2",nCores=3,dependencies=["easy_task2"])
+ wflow=SubSubWorkflow()
+ self.addWorkflowTask("subsubwf_task1",wflow,dependencies="easy_task1")
+
+
+
+class TestWorkflow(WorkflowRunner) :
+
+ def workflow(self) :
+
+ job=sleepjob+" 1"
+
+ self.addTask("easy_task1",yelljob+" 1")
+ waitTask=self.addTask("easy_task3",runjob+" 10",nCores=2,memMb=1024,isForceLocal=True)
+ self.flowLog("My message")
+
+ swflow=SubWorkflow()
+
+ self.addWorkflowTask("subwf_task1",swflow,dependencies=waitTask)
+ self.addWorkflowTask("subwf_task2",swflow,dependencies=waitTask)
+
+ self.addTask("easy_task4",runjob+" 2",nCores=3,dependencies=["subwf_task1","subwf_task2"])
+ self.addTask("easy_task5",job,nCores=1)
+
+ # and stop here
+ self.waitForTasks()
+
+ self.flowLog("ITC1: "+str(self.isTaskComplete("easy_task1")))
+ self.flowLog("ITC6: "+str(self.isTaskComplete("easy_task6")))
+
+ self.addTask("easy_task6",job)
+ #self.addTask("easy_task2",sleepjob)
+ self.addTask("checkpoint_task",dependencies=["easy_task1","easy_task6","easy_task4"])
+ self.addTask("dep_task",sleepjob+" 4",dependencies=["checkpoint_task"])
+
+
+
+def getRunOptions() :
+
+ from optparse import OptionParser
+
+ defaults = { "mode" : "local" }
+
+ parser = OptionParser()
+ parser.set_defaults(**defaults)
+
+ parser.add_option("-m", "--mode", type="string", dest="mode",
+ help="Select run mode {local,sge} (default: %default)")
+
+ (options, args) = parser.parse_args()
+
+ if len(args) :
+ parser.print_help()
+ sys.exit(2)
+
+ if options.mode not in ["sge","local"] :
+ parser.print_help()
+ sys.exit(2)
+
+ return options
+
+
+
+def main() :
+ options = getRunOptions()
+ wflow = TestWorkflow()
+ retval=wflow.run(options.mode,nCores=8,memMb=8*1024,isContinue=False)
+ sys.exit(retval)
+
+
+
+if __name__ == "__main__" :
+ main()
diff --git a/scratch/test/pyflow_unit_tests.py b/scratch/test/pyflow_unit_tests.py
new file mode 100755
index 0000000..2eaf18c
--- /dev/null
+++ b/scratch/test/pyflow_unit_tests.py
@@ -0,0 +1,430 @@
+#!/usr/bin/env python
+
+import unittest
+import os
+import sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+
+def pyflow_lib_dir() :
+ return os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir,"pyflow","src"))
+
+try :
+ # if pyflow is in PYTHONPATH already then use the specified copy:
+ from pyflow import isWindows,WorkflowRunner
+except :
+ # otherwise use the relative path within this repo:
+ sys.path.append(pyflow_lib_dir())
+ from pyflow import isWindows,WorkflowRunner
+
+
+def getRmCmd() :
+ if isWindows():
+ return ["del","/f"]
+ else:
+ return ["rm","-f"]
+
+
+def getSleepCmd() :
+ if isWindows():
+ return ["timeout"]
+ else:
+ return ["sleep"]
+
+
+def getCatCmd() :
+ if isWindows():
+ return ["type"]
+ else:
+ return ["cat"]
+
+
+def getCmdString(cmdList) :
+ return " ".join(cmdList)
+
+
+
+class NullWorkflow(WorkflowRunner) :
+ pass
+
+
+
+class TestWorkflowRunner(unittest.TestCase) :
+
+ def __init__(self, *args, **kw) :
+ unittest.TestCase.__init__(self, *args, **kw)
+ self.testPath="testDataRoot"
+
+ def setUp(self) :
+ self.clearTestPath()
+
+ def tearDown(self) :
+ self.clearTestPath()
+
+ def clearTestPath(self) :
+ import shutil
+ if os.path.isdir(self.testPath) :
+ shutil.rmtree(self.testPath)
+
+
+ def test_createDataDir(self) :
+ w=NullWorkflow()
+ w.run("local",self.testPath,isQuiet=True)
+ self.assertTrue(os.path.isdir(self.testPath))
+
+
+ def test_badMode(self) :
+ w=NullWorkflow()
+ try:
+ w.run("foomode",self.testPath,isQuiet=True)
+ self.fail("Didn't raise Exception")
+ except KeyError:
+ self.assertTrue(sys.exc_info()[1].args[0].find("foomode") != -1)
+
+
+ def test_errorLogPositive(self) :
+ """
+ Test that errors are written to separate log when requested
+ """
+ os.mkdir(self.testPath)
+ logFile=os.path.join(self.testPath,"error.log")
+ w=NullWorkflow()
+ try:
+ w.run("foomode",self.testPath,errorLogFile=logFile,isQuiet=True)
+ self.fail("Didn't raise Exception")
+ except KeyError:
+ self.assertTrue(sys.exc_info()[1].args[0].find("foomode") != -1)
+ self.assertTrue((os.path.getsize(logFile) > 0))
+
+
+ def test_errorLogNegative(self) :
+ """
+ Test that no errors are written to separate error log when none occur
+ """
+ os.mkdir(self.testPath)
+ logFile=os.path.join(self.testPath,"error.log")
+ w=NullWorkflow()
+ w.run("local",self.testPath,errorLogFile=logFile,isQuiet=True)
+ self.assertTrue((os.path.getsize(logFile) == 0))
+
+
+ def test_dataDirCollision(self) :
+ """
+ Test that when two pyflow jobs are launched with the same dataDir, the second will fail.
+ """
+ import threading,time
+
+ class StallWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("sleeper",getSleepCmd()+["5"])
+
+ class runner(threading.Thread) :
+ def __init__(self2) :
+ threading.Thread.__init__(self2)
+ self2.retval1=1
+
+ def run(self2) :
+ w=StallWorkflow()
+ self2.retval1=w.run("local",self.testPath,isQuiet=True)
+
+ w2=StallWorkflow()
+
+ r1=runner()
+ r1.start()
+ time.sleep(1)
+ retval2=w2.run("local",self.testPath,isQuiet=True)
+ self.assertTrue(retval2==1)
+ r1.join()
+ self.assertTrue(r1.retval1==0)
+
+
+ def test_forceContinue(self) :
+ class TestWorkflow(WorkflowRunner) :
+ color="red"
+
+ def setColor(self2,color) :
+ self2.color=color
+
+ def workflow(self2) :
+ self2.addTask("A","echo "+self2.color)
+
+ w=TestWorkflow()
+ retval=w.run("local",self.testPath,isQuiet=True)
+ self.assertTrue(retval==0)
+ retval=w.run("local",self.testPath,isContinue=True,isQuiet=True)
+ self.assertTrue(retval==0)
+ w.setColor("green")
+ retval=w.run("local",self.testPath,isContinue=True,isQuiet=True)
+ self.assertTrue(retval==1)
+ retval=w.run("local",self.testPath,isContinue=True,isForceContinue=True,isQuiet=True)
+ self.assertTrue(retval==0)
+
+
+ def test_badContinue(self) :
+ w=NullWorkflow()
+ try:
+ w.run("local",self.testPath,isContinue=True,isQuiet=True)
+ self.fail("Didn't raise Exception")
+ except Exception:
+ self.assertTrue(sys.exc_info()[1].args[0].find("Cannot continue run") != -1)
+
+
+ def test_goodContinue(self) :
+ w=NullWorkflow()
+ retval1=w.run("local",self.testPath,isQuiet=True)
+ retval2=w.run("local",self.testPath,isContinue=True,isQuiet=True)
+ self.assertTrue((retval1==0) and (retval2==0))
+
+
+ def test_autoContinue(self) :
+ w=NullWorkflow()
+ retval1=w.run("local",self.testPath,isContinue="Auto",isQuiet=True)
+ retval2=w.run("local",self.testPath,isContinue="Auto",isQuiet=True)
+ self.assertTrue((retval1==0) and (retval2==0))
+
+
+ def test_simpleDependency(self) :
+ "make sure B waits for A"
+ class TestWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ filePath=os.path.join(self.testPath,"tmp.txt")
+ self2.addTask("A","echo foo > " +filePath)
+ self2.addTask("B",getCmdString(getCatCmd()) + " " + filePath + " && " + getCmdString(getRmCmd())+ " " + filePath,dependencies="A")
+
+ w=TestWorkflow()
+ self.assertTrue((0==w.run("local",self.testPath,isQuiet=True)))
+
+
+ def test_waitDependency(self) :
+ "make sure waitForTasks waits for A on the workflow thread"
+ class TestWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ filePath=os.path.join(self.testPath,"tmp.txt")
+ if os.path.isfile(filePath) : os.remove(filePath)
+ self2.addTask("A",getCmdString(getSleepCmd()) + " 5 && echo foo > %s" % (filePath))
+ self2.waitForTasks("A")
+ assert(os.path.isfile(filePath))
+ self2.addTask("B",getCmdString(getCatCmd()) + " " + filePath +" && " + getCmdString(getRmCmd())+ " " + filePath)
+
+ w=TestWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+
+
+ def test_flowLog(self) :
+ "make sure flowLog doesn't throw -- but this does not check if the log is updated"
+ class TestWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.flowLog("My Message")
+
+ w=TestWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+
+
+ def test_deadSibling(self) :
+ """
+ Tests that when a task error occurs in one sub-workflow, its
+ sibling workflows exit correctly (instead of hanging forever).
+ This test is an early library error case.
+ """
+ class SubWorkflow1(WorkflowRunner) :
+ "this one fails"
+ def workflow(self2) :
+ self2.addTask("A",getSleepCmd()+["5"])
+ self2.addTask("B","boogyman!",dependencies="A")
+
+ class SubWorkflow2(WorkflowRunner) :
+ "this one doesn't fail"
+ def workflow(self2) :
+ self2.addTask("A",getSleepCmd()+["5"])
+ self2.addTask("B",getSleepCmd()+["5"],dependencies="A")
+
+ class MasterWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ wflow1=SubWorkflow1()
+ wflow2=SubWorkflow2()
+ self2.addWorkflowTask("wf1",wflow1)
+ self2.addWorkflowTask("wf2",wflow2)
+
+ w=MasterWorkflow()
+ self.assertTrue(1==w.run("local",self.testPath,nCores=2,isQuiet=True))
+
+
+ def test_selfDependency1(self) :
+ """
+ """
+ class SelfWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("A",getSleepCmd()+["5"],dependencies="A")
+
+ w=SelfWorkflow()
+ self.assertTrue(1==w.run("local",self.testPath,isQuiet=True))
+
+
+ def test_expGraphScaling(self) :
+ """
+ This tests that pyflow does not scale poorly with highly connected subgraphs.
+
+ When the error occurs, it locks the primary thread, so we put the test workflow
+ on its own thread so that we can time it and issue an error.
+
+ Issue reported by R Kelley and A Halpern
+ """
+
+ import threading
+
+ class ScalingWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ tasks = set()
+ for idx in xrange(60) :
+ sidx = str(idx)
+ tasks.add(self2.addTask("task_" + sidx, "echo " + sidx, dependencies = tasks))
+ self2.waitForTasks("task_50")
+ tasks.add(self2.addTask("task_1000", "echo 1000", dependencies = tasks))
+
+ class runner(threading.Thread) :
+ def __init__(self2) :
+ threading.Thread.__init__(self2)
+ self2.setDaemon(True)
+
+ def run(self2) :
+ w=ScalingWorkflow()
+ w.run("local",self.testPath,isQuiet=True)
+
+ r1=runner()
+ r1.start()
+ r1.join(30)
+ self.assertTrue(not r1.isAlive())
+
+ def test_startFromTasks(self) :
+ """
+ run() option to ignore all tasks before a specified task node
+ """
+ filePath=os.path.join(self.testPath,"tmp.txt")
+
+ class SelfWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("A","echo foo > "+filePath)
+ self2.addTask("B",getSleepCmd()+["1"],dependencies="A")
+ self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+ w=SelfWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="B"))
+ self.assertTrue(not os.path.exists(filePath))
+
+
+ def test_startFromTasksSubWflow(self) :
+ """
+ run() option to ignore all tasks before a specified task node
+ """
+ filePath=os.path.join(self.testPath,"tmp.txt")
+
+ class SubWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("D","echo foo > "+filePath)
+
+ class SelfWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("A",getSleepCmd()+["1"])
+ self2.addWorkflowTask("B",SubWorkflow(),dependencies="A")
+ self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+ w=SelfWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="B"))
+ self.assertTrue(os.path.exists(filePath))
+
+
+ def test_startFromTasksSubWflow2(self) :
+ """
+ run() option to ignore all tasks before a specified task node
+ """
+ filePath=os.path.join(self.testPath,"tmp.txt")
+
+ class SubWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("D","echo foo > "+filePath)
+
+ class SelfWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("A",getSleepCmd()+["1"])
+ self2.addWorkflowTask("B",SubWorkflow(),dependencies="A")
+ self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+ w=SelfWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="C"))
+ self.assertTrue(not os.path.exists(filePath))
+
+
+ def test_ignoreTasksAfter(self) :
+ """
+ run() option to ignore all tasks below a specified task node
+ """
+ class SelfWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("A",getSleepCmd()+["1"])
+ self2.addTask("B",getSleepCmd()+["1"],dependencies="A")
+ self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+ w=SelfWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,ignoreTasksAfter="B"))
+ self.assertTrue(not w.isTaskComplete("C"))
+
+ def test_addTaskOutsideWorkflow(self) :
+ """
+ test that calling addTask() outside of a workflow() method
+ raises an exception
+ """
+
+ class SelfWorkflow(WorkflowRunner) :
+ def __init__(self2) :
+ self2.addTask("A",getSleepCmd()+["1"])
+
+ try :
+ w=SelfWorkflow()
+ self.fail("Didn't raise Exception")
+ except :
+ pass
+
+ def test_runModeInSubWorkflow(self) :
+ """
+ test that calling getRunMode() in a sub-workflow() method
+ does not raise an exception (github issue #5)
+ """
+
+ class SubWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ if self2.getRunMode() == "local" :
+ self2.addTask("D",getSleepCmd()+["1"])
+
+ class SelfWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("A",getSleepCmd()+["1"])
+ self2.addWorkflowTask("B",SubWorkflow(),dependencies="A")
+ self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B"))
+
+ try :
+ w=SelfWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+ except :
+ self.fail("Should not raise Exception")
+
+ def test_CheckpointChain(self) :
+ """
+ Test that checkout points are handled correctly even
+ when multiple checkpoints have a parent-child relationship
+ """
+
+ class SelfWorkflow(WorkflowRunner) :
+ def workflow(self2) :
+ self2.addTask("A")
+ self2.addTask("B")
+ self2.addTask("C",dependencies=["A","B"])
+
+ try :
+ w=SelfWorkflow()
+ self.assertTrue(0==w.run("local",self.testPath,isQuiet=True))
+ except :
+ self.fail("Should not raise Exception")
+
+if __name__ == '__main__' :
+ unittest.main()
+
diff --git a/scratch/test/test_pyflow.py b/scratch/test/test_pyflow.py
new file mode 100755
index 0000000..a4c4a84
--- /dev/null
+++ b/scratch/test/test_pyflow.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+#
+"""
+automation friendly cross-platform tests for pyflow
+"""
+
+import os
+import sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+
+
+def getOptions() :
+
+ from optparse import OptionParser
+
+ usage = "usage: %prog [options]"
+ parser = OptionParser(usage=usage,description="Run all pyflow tests")
+
+ parser.add_option("--nosge",dest="isSkipSge", action="store_true",
+ help="skip SGE testing")
+
+ (options,args) = parser.parse_args()
+
+ if len(args) != 0 :
+ parser.print_help()
+ sys.exit(2)
+
+ return (options,args)
+
+
+def main() :
+ import subprocess
+
+ (options,args) = getOptions()
+
+ pyflowRootDir=os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir))
+ pyflowDir=os.path.join(pyflowRootDir,"pyflow")
+
+ utScriptPath=os.path.join(scriptDir,"pyflow_unit_tests.py")
+
+ if True :
+ # process-out to run the unit tests for now -- TODO: can we just import this instead?
+ utCmd=[sys.executable,"-E",utScriptPath,"-v"]
+ proc = subprocess.Popen(utCmd)
+ proc.wait()
+ if proc.returncode != 0 :
+ raise Exception("Pyflow unit test run failed")
+
+ # run through demos (only helloWorld is working on windows)
+ if True :
+ demoDir=os.path.join(pyflowDir,"demo")
+ for demoName in ["helloWorld"] :
+ demoScriptPath=os.path.join(demoDir,demoName,demoName+".py")
+ demoCmd=[sys.executable,"-E",demoScriptPath]
+ proc = subprocess.Popen(demoCmd)
+ proc.wait()
+ if proc.returncode != 0 :
+ raise Exception("Pyflow demo failed: '%s'" % (demoScriptPath))
+
+
+main()
+
diff --git a/scratch/test/test_release_tarball.bash b/scratch/test/test_release_tarball.bash
new file mode 100755
index 0000000..afdd3ae
--- /dev/null
+++ b/scratch/test/test_release_tarball.bash
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+#
+# this script brings everything together for an automated build/test system
+#
+
+set -o errexit
+set -o nounset
+set -o xtrace
+
+if [ $# -gt 1 ]; then
+ echo "usage: $0 [ -nosge ]" 2>&1
+ exit 2
+fi
+
+is_sge=1
+if [ $# -ge 1 ] && [ "$1" == "-nosge" ]; then
+ is_sge=0
+fi
+
+
+
+thisdir=$(dirname $0)
+
+cd $thisdir/..
+testname=TESTBALL
+bash ./make_release_tarball.bash $testname
+tar -xzf $testname.tar.gz
+
+testdir=$(pwd)/$testname
+
+# run through tests:
+PYTHONPATH=$testdir/src test/pyflow_unit_tests.py -v
+
+# run this a few times just in case we can russle out any subtle/rare race conditions:
+for f in $(seq 5); do
+ PYTHONPATH=$testdir/src test/pyflow_basic_feature_runner.py --mode local
+done
+
+if [ $is_sge == 1 ]; then
+ PYTHONPATH=$testdir/src test/pyflow_basic_feature_runner.py --mode sge
+fi
+
+# run through demos:
+for f in cwdDemo envDemo helloWorld makeDemo memoryDemo mutexDemo simpleDemo subWorkflow; do
+ cd $testdir/demo/$f
+ python $f.py
+ python pyflow.data/state/make_pyflow_task_graph.py >| test.dot
+done
+
+
diff --git a/scratch/test/testtasks/runner.bash b/scratch/test/testtasks/runner.bash
new file mode 100755
index 0000000..0bf66ec
--- /dev/null
+++ b/scratch/test/testtasks/runner.bash
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+thisdir=$(dirname $0)
+
+cd $thisdir
+
+if ! [ -e ./runner ]; then
+ # turning on -O2 is too variable accross different platforms, so leave off:
+ gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner
+fi
+
+./runner $1
+
+
diff --git a/scratch/test/testtasks/runner.c b/scratch/test/testtasks/runner.c
new file mode 100644
index 0000000..5fad9c8
--- /dev/null
+++ b/scratch/test/testtasks/runner.c
@@ -0,0 +1,16 @@
+#include "math.h"
+#include "assert.h"
+
+int main(int argc, char**argv) {
+assert(argc==2);
+int mult=atoi(argv[1]);
+int i,j;
+double a=0;
+long total=50000000;
+for(j=0;j<mult;++j) {
+for(i=0;i<total;++i) {
+ a+=i;a=sqrt(a);
+}
+}
+return 0;
+}
diff --git a/scratch/test/testtasks/sleeper.bash b/scratch/test/testtasks/sleeper.bash
new file mode 100755
index 0000000..d901a93
--- /dev/null
+++ b/scratch/test/testtasks/sleeper.bash
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting sleep
+sleep $arg
+echo pid: $pid arg: $arg ending sleep
+
diff --git a/scratch/test/testtasks/slow_yeller.py b/scratch/test/testtasks/slow_yeller.py
new file mode 100755
index 0000000..80c5aae
--- /dev/null
+++ b/scratch/test/testtasks/slow_yeller.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+import os,sys,time
+import datetime
+
+if len(sys.argv) != 2 :
+ print "usage $0 arg"
+ sys.exit(1)
+
+arg=sys.argv[1]
+
+pid=os.getpid()
+
+sys.stdout.write("pid: %s arg: %s starting yell\n" % (str(pid),arg))
+
+for i in xrange(100):
+ td=datetime.datetime.utcnow().isoformat()
+ msg="Yeller %s yellin %i" % (str(pid),i)
+ sys.stdout.write(msg+" stdout "+td+"\n")
+ sys.stderr.write(msg+" stderr "+td+"\n")
+ time.sleep(1)
+
+sys.stdout.write("pid: %s arg: %s ending yell\n" % (str(pid),arg))
+
diff --git a/scratch/test/testtasks/yeller.bash b/scratch/test/testtasks/yeller.bash
new file mode 100755
index 0000000..cdd4845
--- /dev/null
+++ b/scratch/test/testtasks/yeller.bash
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+if [ $# != 1 ]; then
+ echo "usage $0 arg"
+ exit 1
+fi
+arg=$1
+
+pid=$$
+echo pid: $pid arg: $arg starting yell
+for i in {1..100}; do
+ echo "Yeller $pid yellin $i stdout"
+ echo "Yeller $pid yellin $i stderr" 1>&2
+done
+echo pid: $pid arg: $arg ending sleep
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pyflow.git
More information about the debian-med-commit
mailing list