[med-svn] [python-pbcommand] 01/06: Imported Upstream version 0.2.16
Afif Elghraoui
afif-guest at moszumanska.debian.org
Mon Oct 26 07:50:07 UTC 2015
This is an automated email from the git hooks/post-receive script.
afif-guest pushed a commit to branch master
in repository python-pbcommand.
commit f2831433e900d2bbe56eaff59df43b6ee0b40a51
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Sun Oct 25 17:46:53 2015 -0700
Imported Upstream version 0.2.16
---
.gitignore | 69 ++
.travis.yml | 9 +
Dockerfile | 9 +
LICENSES.txt | 32 +
MANIFEST.in | 2 +
Makefile | 42 ++
README.md | 215 ++++++
REQUIREMENTS.txt | 3 +
REQUIREMENTS_TEST.txt | 2 +
circle.yml | 13 +
docs/Makefile | 192 ++++++
docs/example_01.py | 34 +
docs/source/commandline_interface.rst | 142 ++++
docs/source/conf.py | 289 ++++++++
docs/source/index.rst | 24 +
log_nose.cfg | 32 +
pbcommand/__init__.py | 11 +
pbcommand/cli/__init__.py | 6 +
pbcommand/cli/core.py | 205 ++++++
pbcommand/cli/examples/__init__.py | 0
pbcommand/cli/examples/dev_app.py | 108 +++
pbcommand/cli/examples/dev_gather_fasta_app.py | 116 ++++
pbcommand/cli/examples/dev_mixed_app.py | 122 ++++
pbcommand/cli/examples/dev_quick_hello_world.py | 59 ++
pbcommand/cli/examples/dev_scatter_fasta_app.py | 162 +++++
pbcommand/cli/examples/dev_simple_app.py | 93 +++
pbcommand/cli/examples/dev_txt_app.py | 80 +++
pbcommand/cli/quick.py | 269 ++++++++
pbcommand/common_options.py | 80 +++
pbcommand/engine/__init__.py | 1 +
pbcommand/engine/runner.py | 77 +++
pbcommand/interactive_resolver.py | 73 ++
pbcommand/models/__init__.py | 6 +
pbcommand/models/common.py | 446 ++++++++++++
pbcommand/models/parser.py | 561 ++++++++++++++++
pbcommand/models/report.py | 748 +++++++++++++++++++++
pbcommand/models/tool_contract.py | 366 ++++++++++
pbcommand/pb_io/__init__.py | 8 +
pbcommand/pb_io/common.py | 45 ++
pbcommand/pb_io/report.py | 119 ++++
pbcommand/pb_io/tool_contract_io.py | 349 ++++++++++
pbcommand/resolver.py | 210 ++++++
pbcommand/schemas/__init__.py | 36 +
pbcommand/schemas/pbreport.avsc | 166 +++++
pbcommand/schemas/resolved_tool_contract.avsc | 80 +++
pbcommand/schemas/tool_contract.avsc | 165 +++++
pbcommand/testkit/__init__.py | 1 +
pbcommand/testkit/base_utils.py | 26 +
pbcommand/testkit/core.py | 162 +++++
pbcommand/utils.py | 159 +++++
pbcommand/validators.py | 114 ++++
setup.cfg | 2 +
setup.py | 56 ++
tests/__init__.py | 14 +
tests/base_utils.py | 14 +
.../dev_example_dev_txt_app_tool_contract.json | 65 ++
tests/data/dev_example_resolved_tool_contract.json | 22 +
tests/data/dev_example_tool_contract.json | 65 ++
tests/data/dev_gather_fasta_app_tool_contract.json | 37 +
.../data/dev_scatter_fasta_app_tool_contract.json | 65 ++
.../example-reports/filter_reports_adapters.json | 53 ++
tests/data/example-reports/laa_report1.json | 51 ++
tests/data/example-reports/laa_report2.json | 51 ++
tests/data/example-reports/overview.json | 19 +
tests/data/example.fasta | 2 +
tests/data/example.txt | 10 +
...ommand.tasks.dev_fastq2fasta_tool_contract.json | 37 +
...mmand.tasks.dev_qhello_world_tool_contract.json | 61 ++
...nd.tasks.dev_txt_custom_outs_tool_contract.json | 44 ++
...bcommand.tasks.dev_txt_hello_tool_contract.json | 44 ++
tests/data/resolved_contract_01.json | 23 +
tests/data/resolved_tool_contract_dev_app.json | 23 +
tests/test_common_cmdline_core.py | 38 ++
tests/test_e2e_example_apps.py | 49 ++
tests/test_engine_runner.py | 31 +
tests/test_load_resolved_tool_contract.py | 58 ++
tests/test_models_common.py | 18 +
tests/test_models_report.py | 72 ++
tests/test_parsers.py | 95 +++
tests/test_pb_io_common.py | 33 +
tests/test_pb_io_report.py | 54 ++
tests/test_pb_io_tool_contract.py | 51 ++
tests/test_resolver.py | 46 ++
tests/test_schema_validation.py | 59 ++
tests/test_utils.py | 53 ++
tox.ini | 12 +
86 files changed, 7765 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9509cce
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,69 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Custom logger for nose
+reports_unittests.log
+.DS_Store
+# ipython notebooks
+notebooks
+
+
+java-classes
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..6007333
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,9 @@
+language: python
+python:
+ - "2.7"
+# command to install dependencies
+install:
+ - "pip install -r REQUIREMENTS.txt"
+ - "pip install ."
+# command to run tests
+script: nosetests -s --verbose --logging-config log_nose.cfg tests/test_*.py
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..c4f6098
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,9 @@
+# Install CramUnit and run a simple dev example
+FROM mpkocher/docker-pacbiobase
+MAINTAINER Michael Kocher
+
+# Copy the code to container
+COPY ./ /tmp/pbcommand
+
+# Install
+RUN pip install -r /tmp/C/REQUIREMENTS.txt && pip install /tmp/pbcommand
diff --git a/LICENSES.txt b/LICENSES.txt
new file mode 100644
index 0000000..184fab3
--- /dev/null
+++ b/LICENSES.txt
@@ -0,0 +1,32 @@
+jjCopyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+* Neither the name of Pacific Biosciences nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..06dfa68
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include *.txt *.md
+recursive-include examples *.txt *.py
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..399490b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,42 @@
+.PHONY: all clean install dev-install test doc
+SHELL = /bin/bash -e
+
+all: install
+
+install:
+ @which pip > /dev/null
+ @pip freeze|grep 'pbcommand=='>/dev/null \
+ && pip uninstall -y pbcommand \
+ || echo -n ''
+ @pip install ./
+
+clean:
+ rm -rf build/;\
+ find . -name "*.egg-info" | xargs rm -rf;\
+ find . -name "*.pyc" | xargs rm -f;\
+ find . -name "*.err" | xargs rm -f;\
+ find . -name "*.log" | xargs rm -f;\
+ rm -rf dist;\
+ rm -rf docs/_build
+
+test:
+ tox
+
+doc:
+ cd docs && make html
+
+build-tool-contracts:
+ python -m pbcommand.cli.examples.dev_app --emit-tool-contract > ./tests/data/dev_example_tool_contract.json
+ python -m pbcommand.cli.examples.dev_txt_app --emit-tool-contract > ./tests/data/dev_example_dev_txt_app_tool_contract.json
+ python -m pbcommand.cli.examples.dev_gather_fasta_app --emit-tool-contract > ./tests/data/dev_gather_fasta_app_tool_contract.json
+ python -m pbcommand.cli.examples.dev_scatter_fasta_app --emit-tool-contract > ./tests/data/dev_scatter_fasta_app_tool_contract.json
+ python -m pbcommand.cli.examples.dev_quick_hello_world emit-tool-contracts -o ./tests/data
+
+run-pep8:
+ find pbcommand -name "*.py" -exec pep8 --ignore=E501,E265,E731,E402 {} \;
+
+run-auto-pep8:
+ find pbcommand -name "*.py" -exec autopep8 -i --ignore=E501,E265,E731,E402 {} \;
+
+build-java-classes:
+ avro-tools compile schema pbcommand/schemas java-classes/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c370780
--- /dev/null
+++ b/README.md
@@ -0,0 +1,215 @@
+pbcommand High Level Overview
+=============================
+
+[Full Docs](http://pbcommand.readthedocs.org/en/latest/)
+
+Note the APIs are still in flux. WIP.
+
+[![Circle CI](https://circleci.com/gh/PacificBiosciences/pbcommand.svg?style=svg)](https://circleci.com/gh/PacificBiosciences/pbcommand)
+
+PacBio library for common utils, models, and tools to interface with pbsmrtpipe workflow engine.
+
+To integrate with the pbsmrtpipe workflow engine you must to be able to generate a **Tool Contract** and to be able to run from a **Resolved Tool Contract**.
+
+A **Tool Contract** contains the metadata of the exe, such as the file types of inputs, outputs and options.
+
+Example [Tool Contract Json](https://github.com/PacificBiosciences/pbcommand/blob/master/tests/data/dev_example_dev_txt_app_tool_contract.json) (and [Avro Schema](https://github.com/PacificBiosciences/pbcommand/blob/master/pbcommand/schemas/tool_contract.avsc))
+
+Example [Resolved Tool Contract Json](https://github.com/PacificBiosciences/pbcommand/blob/master/tests/data/resolved_tool_contract_dev_app.json) (and [Avro Schema](https://github.com/PacificBiosciences/pbcommand/blob/master/pbcommand/schemas/resolved_tool_contract.avsc))
+
+There are two principle use cases, first wrapping/calling python functions that have been defined in external python packages, or scripts. Second, creating a CLI tool that supports emitting tool contracts, running resolved tool contracts and complete argparse style CLI.
+
+Example from **pbcommand.cli.examples**
+
+```python
+
+import sys
+import logging
+
+from pbcommand.models import FileTypes
+from pbcommand.cli import registry_builder, registry_runner
+
+log = logging.getLogger(__name__)
+
+registry = registry_builder("pbcommand", "python -m pbcommand.cli.examples.dev_quick_hello_world ")
+
+
+def _example_main(input_files, output_files, **kwargs):
+ log.info("Running example main with {i} {o} kw:{k}".format(i=input_files,
+ o=output_files, k=kwargs))
+ # write mock output files, otherwise the End-to-End test will fail
+ xs = output_files if isinstance(output_files, (list, tuple)) else [output_files]
+ for x in xs:
+ with open(x, 'w') as writer:
+ writer.write("Mock data\n")
+ return 0
+
+
+ at registry("dev_qhello_world", "0.2.1", FileTypes.FASTA, FileTypes.FASTA, nproc=1, options=dict(alpha=1234))
+def run_rtc(rtc):
+ return _example_main(rtc.task.input_files[0], rtc.task.output_files[0], nproc=rtc.task.nproc)
+
+
+ at registry("dev_fastq2fasta", "0.1.0", FileTypes.FASTQ, FileTypes.FASTA)
+def run_rtc(rtc):
+ return _example_main(rtc.task.input_files[0], rtc.task.output_files[0])
+
+
+if __name__ == '__main__':
+ sys.exit(registry_runner(registry, sys.argv[1:]))
+
+```
+
+A driver is the commandline interface that the workflow engine will call.
+
+The driver will be called with "${exe} /path/to/resolved_tool_contract.json"
+
+The tool contracts can be emitted to a directory and used in [pbsmrtpipe](https://github.com/PacificBiosciences/pbsmrtpipe).
+
+```bash
+$> python -m pbcommand.cli.examples.dev_quick_hello_world -o /path/to/my-tool-contracts
+```
+
+
+Creating a Full Commandline Tool with TC/RTC and argparse support
+-----------------------------------------------------------------
+
+Three Steps
+- define Parser
+- add running from argparse and running from Resolved ToolContract funcs to call your main
+- add call to driver
+
+Import or define your main function.
+
+```python
+def run_my_main(fasta_in, fasta_out, min_length):
+ # do stuff. Main should return an int exit code
+ return 0
+```
+
+Define a function that will add inputs, outputs and options to your parser.
+
+```python
+from pbcommand.models import FileTypes
+
+def add_args_and_options(p):
+ # FileType, label, name, description
+ p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta File", "PacBio Spec'ed fasta file")
+ # File Type, label, name, description, default file name
+ p.add_output_file_type(FileTypes.FASTA, "fasta_out", "Filtered Fasta file", "Filtered Fasta file", "filter.fasta")
+ # Option id, label, default value, name, description
+ # for the argparse, the read-length will be translated to --read-length and (accessible via args.read_length)
+ p.add_int("pbcommand.task_options.dev_read_length", "read-length", 25, "Length filter", "Min Sequence Length filter")
+ return p
+```
+
+Define Parser
+
+```python
+from pbcommand.models import TaskTypes, ResourceTypes, SymbolTypes
+def get_contract_parser():
+ # Number of processors to use, can also be SymbolTypes.MAX_NPROC
+ nproc = 1
+ # Log file, tmp dir, tmp file. See ResourceTypes in models, ResourceTypes.TMP_DIR
+ resource_types = ()
+ # Commandline exe to call "{exe}" /path/to/resolved-tool-contract.json
+ driver_exe = "python -m pbcommand.cli.example.dev_app --resolved-tool-contract "
+ desc = "Dev app for Testing that supports emitting tool contracts"
+ task_type = TaskTypes.LOCAL
+ # TaskTypes.DISTRIBUTED if you want your task to be submitted to the cluster manager (e.g., SGE) if
+ # one is provided to the workflow engine.
+ p = get_pbparser(TOOL_ID, __version__, desc, driver_exe, task_type, nproc, resource_types)
+ add_args_and_options(p)
+ return p
+
+```
+
+
+Define a Wrapping layer to call your main from both the tool contract and raw argparse IO layer
+
+```python
+def _args_runner(args):
+ # this is the args from parser.parse_args()
+ # the properties of args are defined as "labels" in the add_args_and_options func.
+ return run_my_main(args.fasta_in, fasta_out, args.read_length)
+
+def _resolved_tool_contract_runner(resolved_tool_contract):
+ rtc = resolved_tool_contract
+ # all options are referenced by globally namespaced id. This allows tools to use other tools options
+ # e.g., pbalign to use blasr defined options.
+ return run_my_main(rtc.inputs[0], rtc.outputs[0], rtc.options["pbcommand.task_options.dev_read_length"])
+```
+
+
+
+
+Add running layer
+
+```python
+import sys
+import logging
+import pbcommand.utils setup_log
+from pbcommand.cli import pbparser_runner
+
+log = logging.getLogger(__name__)
+
+def main(argv=sys.argv):
+ # New interface that supports running resolved tool contracts
+ log.info("Starting {f} version {v} pbcommand example dev app".format(f=__file__, v=__version__))
+ p = get_contract_parser()
+ return pbparser_runner(argv[1:],
+ p,
+ _args_runner, # argparse runner func
+ _resolved_tool_contract_runner, # tool contract runner func
+ log, # log instance
+ setup_log # setup log func
+ )
+if __name__ == '__main__':
+ sys.exit(main())
+```
+
+Now you can run your tool via the argparse standard interface as well as emitting a **Tool Contract** to stdout from the commandline interface.
+
+```sh
+> python -m 'pbcommand.cli.examples.dev_app' --emit-tool-contract
+```
+
+And you can run the tool from a **Resolved Tool Contract**
+
+```sh
+> python -m pbcommand.cli.example.dev_app --resolved-tool-contract /path/to/resolved_contract.json
+```
+
+See the dev apps in ["pbcommand.cli.examples"](https://github.com/PacificBiosciences/pbcommand/blob/master/pbcommand/cli/examples/dev_app.py) for a complete application (They require pbcore to be installed).
+
+In addition to TC/RTC support, there's a complete argparse support for the task options. An example of **help** is shown below.
+
+```sh
+(pbcommand_test)pbcommand $> python -m 'pbcommand.cli.examples.dev_app' --help
+usage: dev_app.py [-h] [-v] [--versions] [--emit-tool-contract]
+ [--resolved-tool-contract RESOLVED_TOOL_CONTRACT]
+ [--log-level LOG_LEVEL] [--debug]
+ [--read-length READ_LENGTH]
+ fasta_in fasta_out
+
+Dev app for Testing that supports emitting tool contracts
+
+positional arguments:
+ fasta_in PacBio Spec'ed fasta file
+ fasta_out Filtered Fasta file
+
+optional arguments:
+ -h, --help show this help message and exit
+ -v, --version show program's version number and exit
+ --versions Show versions of individual components (default: None)
+ --emit-tool-contract Emit Tool Contract to stdout (default: False)
+ --resolved-tool-contract RESOLVED_TOOL_CONTRACT
+ Run Tool directly from a PacBio Resolved tool contract
+ (default: None)
+ --log-level LOG_LEVEL
+ Set log level (default: 10)
+ --debug Debug to stdout (default: False)
+ --read-length READ_LENGTH
+ Min Sequence Length filter (default: 25)
+```
+
diff --git a/REQUIREMENTS.txt b/REQUIREMENTS.txt
new file mode 100644
index 0000000..ad11173
--- /dev/null
+++ b/REQUIREMENTS.txt
@@ -0,0 +1,3 @@
+xmlbuilder
+jsonschema
+avro
diff --git a/REQUIREMENTS_TEST.txt b/REQUIREMENTS_TEST.txt
new file mode 100644
index 0000000..17b0395
--- /dev/null
+++ b/REQUIREMENTS_TEST.txt
@@ -0,0 +1,2 @@
+nose
+tox
\ No newline at end of file
diff --git a/circle.yml b/circle.yml
new file mode 100644
index 0000000..3fab715
--- /dev/null
+++ b/circle.yml
@@ -0,0 +1,13 @@
+machine:
+ python:
+ version: 2.7.9
+
+dependencies:
+ pre:
+ - pip install -r REQUIREMENTS.txt
+ - pip install nose
+
+test:
+ override:
+ - mkdir -p $CIRCLE_TEST_REPORTS
+ - nosetests -s --verbose --with-xunit --xunit-file=$CIRCLE_TEST_REPORTS/nosetests.xml --logging-config log_nose.cfg tests/test_*.py
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..b63344f
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,192 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+BUILDDIR = build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " applehelp to make an Apple Help Book"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " xml to make Docutils-native XML files"
+ @echo " pseudoxml to make pseudoxml-XML files for display purposes"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+ @echo " coverage to run coverage check of the documentation (if enabled)"
+
+clean:
+ rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pbcommand.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pbcommand.qhc"
+
+applehelp:
+ $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
+ @echo
+ @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
+ @echo "N.B. You won't be able to view it unless you put it in" \
+ "~/Library/Documentation/Help or install it in your application" \
+ "bundle."
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/pbcommand"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pbcommand"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through platex and dvipdfmx..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+coverage:
+ $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
+ @echo "Testing of coverage in the sources finished, look at the " \
+ "results in $(BUILDDIR)/coverage/python.txt."
+
+xml:
+ $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+ @echo
+ @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+ $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+ @echo
+ @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/example_01.py b/docs/example_01.py
new file mode 100644
index 0000000..946aae7
--- /dev/null
+++ b/docs/example_01.py
@@ -0,0 +1,34 @@
+import pprint
+
+from pbcommand.models.common import (FileTypes, ResourceTypes, SymbolTypes, TaskTypes)
+from pbcommand.models.parser import get_pbparser
+from pbcommand.models.tool_contract import ToolDriver
+
+
+def _example_options(p):
+ p.add_input_file_type(FileTypes.BAM, "ubam", "Unaligned BAM", "A General description of BAM")
+ p.add_input_file_type(FileTypes.DS_REF, "ref", "Reference", "Reference Dataset XML")
+ p.add_int("mytool.task_options.myoption", "myopt", 7, "My Option", "My Option which does this and that")
+ p.add_str("mytool.task_options.myopt2", "mylabel", "Stuff", "My Option name", "My Option2 description")
+ p.add_output_file_type(FileTypes.REPORT, "rpt", "Json Report", "Mapping Stats Report Task", "mapping-stats.report.json")
+ return p
+
+
+def example_01():
+ driver = ToolDriver("my-exe --config")
+ resource_types = (ResourceTypes.TMP_DIR, ResourceTypes.LOG_FILE)
+ p = get_pbparser("pbcommand.tools.example", "0.1.2", "My Description", driver, TaskTypes.DISTRIBUTED, SymbolTypes.MAX_NPROC, resource_types)
+ return _example_options(p)
+
+
+def example_02():
+ p = example_01()
+
+ print "Generated Manifest"
+ pprint.pprint(p.parsers[1].to_tool_contract())
+
+ # ipython will dump out here. with non-zero exitcode. blah...
+ print "Running Argparse --help"
+ p.parsers[0].parser.parse_args(["--help"])
+
+ return p
diff --git a/docs/source/commandline_interface.rst b/docs/source/commandline_interface.rst
new file mode 100644
index 0000000..b36feaf
--- /dev/null
+++ b/docs/source/commandline_interface.rst
@@ -0,0 +1,142 @@
+Common Commandline Interface
+============================
+
+
+Motivation And High Level Example
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Provide a common interface for executables to expose options
+- Provide a common interface for executables to be called
+- Provide a common interface for exposing metadata of tool, such as memory usage, cpu usage, required temp files
+
+Benefits
+~~~~~~~~
+
+- A consistent concrete common interface for shelling out to executables
+- task options have a consistent model for validation
+- task versioning is supported
+- A principled model for wrapping tools. For example, pbalign would "inherit" blasr options and extend, or wrap them.
+- Once a manifest has been defined and registered to pbsmrtpipe, the task/manifest can be referenced in pipelines with no additional work
+
+
+Terms
+~~~~~
+
+- 'Tool Contract' is a single file that exposing the exe interface. It
+ contains metadata about the task, such as input and output file
+ types, nproc.
+- 'Resolved Tool Contract' is a single file that contains the resolved values in the manifest
+- 'Driver' is the general interface for calling a commandline exe. This can be called from the commandline or directly as an API call (via any language which supports the manifest interface).
+
+Hello World Example
+~~~~~~~~~~~~~~~~~~~
+
+Tool Contract file for 'my-exe'
+
+
+.. literalinclude:: ../../tests/data/dev_example_tool_contract.json
+ :language: javascript
+
+
+Details of Tool Contract
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Tool Contract id which can be referenced globally (e.g., within a pipeline template)
+- Input File types have file type id, id that can be referenced within the driver, and a brief description
+- Output File types have a file type id and a default output file name
+- number of processors is defined by $nproc. "\$" prefixed values are symbols that have well defined semantic meaning
+- Temp files and Log files are defined using "$" symbols are can have multiple items
+- the exe options are exposed via jsonschema standard. Each option has an id and maps to a single schema definition. Each option must have a default value.
+- the exe section of the "driver" is the commandline interface that will be called as a positional arguement (e.g., "my-exe resolved-manifest.json")
+- task type describes if the task should be submitted to the cluster resources
+
+
+Note. A single driver can reference many manifests. For example "pbreports" would have a single driver exe. From the "task_manifest_id", the driver would dispatch to the correct function call
+
+Programmatically defining a Parser to Emit a Tool Contract
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+pbcommand provides a API to create a tool contract and an argparse instance from a single interface. This facilitates a single point of defining options and keeps the standard commandline entry point and the tool contract to be in sync. This also allows your tool to emit the tool contract to stdout using "--emit-tool-contract" and to be run from a **Resolved Tool Contract** using the "--resolved-tool-contract /path/to/resolved-tool-contract.json" commandline argument.
+
+Complete App shown below.
+
+
+.. literalinclude:: ../../pbcommand/cli/examples/dev_app.py
+ :language: python
+
+.. note:: Options must be prefixed with {pbcommand}.task_options.{option_id} format.
+
+Details of Resolved Tool Contract
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- input, outputs file types are resolved to file paths
+- nproc and other resources are resolved
+
+
+.. literalinclude:: ../../tests/data/dev_example_resolved_tool_contract.json
+ :language: javascript
+
+
+Library usage
+~~~~~~~~~~~~~
+
+(language API example)
+
+
+Example of using a manifest in an tool, such as mapping status report.
+
+.. code-block:: python
+
+ from pbcommand.pb_io import load_tool_contract_from
+ # your application was called via "pbreports resolved-manifest.json"
+ p = "/path/to/resolved-tool-contract.json"
+ # load resolved manifest from
+ rtc = load_tool_contract_from(p)
+
+ # general call to mapping stats report main
+ # mapping_stats_main("/path/to/align.dataset.xml", "/path/to/reference.dataset.xml", "/path/to/output.json", my_option=1235)
+ exit_code = mapping_stats_main(rtc.input_files[0], rtc.input_files[1], rtc.output_files[0], rtc.opts["pbreports.task_options.my_option"])
+
+
+Example to resolving the Tool Contract
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The resolver must have assigned values for max nproc, root temp dir,
+output dir. The output dir can be used to assign the output paths of
+the output files.
+
+.. code-block:: python
+
+ # simple python example, the scala or C++ API would be similar
+
+ from pbcommand.pb_io import load_tool_contract_from
+ from pbcommand.cli import resolve_tool_contract
+
+ # load tool contract that is registered to your python package
+ tool_contract = load_tool_contract_from("/path/to/tool-contract.json")
+ tool_contract = ToolContractRegistry.get("pbsmrtpipe.tasks.dev_static_task")
+
+ max_nproc = 3
+ tmp_dir = "/tmp/my-tmp"
+ output_dir = os.getcwd()
+
+ input_files = ("/path/to/file.csv", "/path/to/dataset.subreads.xml")
+ options = {"pbsmrtipe.task_options.my_option": 1234}
+
+ # create instance of Resolved Tool Contract
+ rtc = resolve_tool_contract(tool_contract, input_files, output_dir, tmp_dir, max_nproc, options)
+
+ # TODO. Not implemented yet
+ # The driver will run the tool, validate output files exist and
+ # cleanup any temp files/resources.
+ result = run_tool_contract_driver(rtc, cleanup=False)
+
+ print result.exit_code
+ print result.error_message
+ print result.host_name
+ print result.run_time
+
+ # sugar to persist results
+ result.write_json("output-results.json")
+
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..621fbdc
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,289 @@
+# -*- coding: utf-8 -*-
+#
+# pbcommand documentation build configuration file, created by
+# sphinx-quickstart on Mon Jul 6 12:53:15 2015.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+import shlex
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.coverage',
+ 'sphinx.ext.viewcode',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'pbcommand'
+copyright = u'2015, Michael Kocher'
+author = u'Michael Kocher'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.1.0'
+# The full version, including alpha/beta/rc tags.
+release = '0.1.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'alabaster'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
+#html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# Now only 'ja' uses this config value
+#html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pbcommanddoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+ # The paper size ('letterpaper' or 'a4paper').
+ #'papersize': 'letterpaper',
+
+ # The font size ('10pt', '11pt' or '12pt').
+ #'pointsize': '10pt',
+
+ # Additional stuff for the LaTeX preamble.
+ #'preamble': '',
+
+ # Latex figure (float) alignment
+ #'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'pbcommand.tex', u'pbcommand Documentation',
+ u'Michael Kocher', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'pbcommand', u'pbcommand Documentation',
+ [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'pbcommand', u'pbcommand Documentation',
+ author, 'pbcommand', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..029e6bc
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,24 @@
+.. pbcommand documentation master file, created by
+ sphinx-quickstart on Mon Jul 6 12:53:15 2015.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Welcome to pbcommand's documentation!
+=====================================
+
+Contents:
+
+.. toctree::
+ :maxdepth: 2
+
+ commandline_interface
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/log_nose.cfg b/log_nose.cfg
new file mode 100644
index 0000000..1a43cf9
--- /dev/null
+++ b/log_nose.cfg
@@ -0,0 +1,32 @@
+[loggers]
+keys=root,log01
+
+[logger_root]
+#level=DEBUG
+level=NOTSET
+handlers=hand01
+
+[logger_log01]
+level=NOTSET
+handlers=hand01
+propagate=1
+qualname=""
+
+[handlers]
+keys=hand01
+
+[filters]
+
+[formatters]
+keys=form01
+
+[handler_hand01]
+class=FileHandler
+level=DEBUG
+formatter=form01
+args=('reports_unittests.log', 'w')
+
+[formatter_form01]
+format=[%(levelname)s] %(asctime)-15s [%(name)s %(funcName)s %(lineno)d] %(message)s
+datefmt=
+class=logging.Formatter
diff --git a/pbcommand/__init__.py b/pbcommand/__init__.py
new file mode 100644
index 0000000..eaaed79
--- /dev/null
+++ b/pbcommand/__init__.py
@@ -0,0 +1,11 @@
+VERSION = (0, 2, 16)
+
+
+def get_version():
+ """Return the version as a string. "O.7"
+
+ This uses a major.minor.tiny to be compatible with semver spec.
+
+ .. note:: This should be improved to be compliant with PEP 386.
+ """
+ return ".".join([str(i) for i in VERSION])
diff --git a/pbcommand/cli/__init__.py b/pbcommand/cli/__init__.py
new file mode 100644
index 0000000..2795c2d
--- /dev/null
+++ b/pbcommand/cli/__init__.py
@@ -0,0 +1,6 @@
+from .core import (pacbio_args_runner,
+ pacbio_args_or_contract_runner,
+ pbparser_runner,
+ get_default_argparser)
+
+from .quick import (registry_runner, registry_builder)
\ No newline at end of file
diff --git a/pbcommand/cli/core.py b/pbcommand/cli/core.py
new file mode 100644
index 0000000..7a05bc1
--- /dev/null
+++ b/pbcommand/cli/core.py
@@ -0,0 +1,205 @@
+""" New Commandline interface that supports ResolvedToolContracts and emitting ToolContracts
+
+There's three use cases.
+
+- running from an argparse instance
+- running from a Resolved Tool Contract (RTC)
+- emitting a ToolContract (TC)
+
+Going to do this in a new steps.
+
+- de-serializing of RTC (I believe this should be done via avro, not a new random JSON file. With avro, the java, c++, classes can be generated. Python can load the RTC via a structure dict that has a well defined schema)
+- get loading and running of RTC from commandline to call main func in a report.
+- generate/emit TC from a a common commandline parser interface that builds the TC and the standard argparse instance
+
+
+"""
+import argparse
+import json
+import logging
+import time
+import traceback
+import sys
+
+from pbcommand.common_options import (RESOLVED_TOOL_CONTRACT_OPTION,
+ EMIT_TOOL_CONTRACT_OPTION)
+from pbcommand.models import PbParser
+from pbcommand.pb_io.tool_contract_io import load_resolved_tool_contract_from
+
+
+def get_default_argparser(version, description):
+ """
+ Everyone MUST use this to create an instance on a argparser python parser.
+
+ :param version:
+ :param description:
+ :return:
+ :rtype: ArgumentParser
+ """
+ p = argparse.ArgumentParser(version=version,
+ description=description,
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ return p
+
+
+def _pacbio_main_runner(alog, setup_log_func, func, *args, **kwargs):
+ """
+ Runs a general func and logs results. The return type is expected to be an (int) return code.
+
+ :param alog: a log instance
+
+ :param func: a cli exe func, must return an int exit code. func(args) => Int, where args is parsed from p.parse_args()
+
+ :param args: parsed args from parser
+
+ :return: Exit code of callable func
+ :rtype: int
+ """
+
+ started_at = time.time()
+
+ pargs = args[0]
+ level = logging.INFO
+ # Assuming that the log_level might not be an added option.
+ if hasattr(pargs, 'log_level'):
+ level = logging.getLevelName(pargs.log_level)
+
+ log_options = dict(level=level)
+ # The Setup log func must adhere to the pbcommand.utils.setup_log func
+ # signature
+ setup_log_func(alog, **log_options)
+
+ try:
+ # the code in func should catch any exceptions. The try/catch
+ # here is a fail safe to make sure the program doesn't fail
+ # and the makes sure the exit code is logged.
+ return_code = func(*args, **kwargs)
+ run_time = time.time() - started_at
+ except Exception as e:
+ run_time = time.time() - started_at
+ alog.error(e, exc_info=True)
+ traceback.print_exc(sys.stderr)
+
+ # We should have a standard map of exit codes to Int
+ if isinstance(e, IOError):
+ return_code = 1
+ else:
+ return_code = 2
+
+ _d = dict(r=return_code, s=run_time)
+ alog.info("exiting with return code {r} in {s:.2f} sec.".format(**_d))
+ return return_code
+
+
+def _get_resolved_tool_contract_from_argv(argv):
+ """
+ Extract the resolved tool contract path from the raw argv
+
+ There are two cases
+
+ --resolved-tool-contract-path=/path/to/tool_contract.json
+ --resolved-tool-contract-path /path/to/tool_contract.json
+
+ :param argv:
+ :rtype: str
+ :raises: ValueError
+ :return: Path to Manifest
+ """
+ # this is a lackluster implementation. FIXME.
+
+ m_str = RESOLVED_TOOL_CONTRACT_OPTION
+
+ error = ValueError("Unable to extract resolved tool contract from commandline args {a}. Expecting {m}=/path/to/file.json".format(a=argv, m=m_str))
+ tool_contract_path = None
+ nargv = len(argv)
+
+ # Provided the --resolved-tool-contract /path/to/tool_contract_path.json
+ if m_str in argv:
+ for i, a in enumerate(argv):
+ # print i, nargv, a
+ if a.startswith(m_str):
+ if (i + 1) <= nargv:
+ tool_contract_path = argv[i + 1]
+ break
+ else:
+ raise error
+
+ # Provided the --resolved-tool-contract=/path/to/tool_contract_path.json
+ m_str_eq = m_str + "="
+ for i in argv:
+ if i.startswith(m_str_eq):
+ tool_contract_path = i.split(m_str_eq)[-1]
+ break
+
+ if tool_contract_path is None:
+ raise error
+
+ return tool_contract_path
+
+
+def pacbio_args_runner(argv, parser, args_runner_func, alog, setup_log_func):
+ # For tools that haven't yet implemented the ToolContract API
+ args = parser.parse_args(argv)
+ return _pacbio_main_runner(alog, setup_log_func, args_runner_func, args)
+
+
+def pacbio_args_or_contract_runner(argv,
+ parser,
+ args_runner_func,
+ contract_tool_runner_func,
+ alog, setup_log_func):
+ """
+ For tools that understand resolved_tool_contracts, but can't emit
+ tool contracts (they may have been written by hand)
+
+ :param parser: argparse Parser
+ :type parser: ArgumentParser
+
+ :param args_runner_func: func(args) => int signature
+
+ :param contract_tool_runner_func: func(tool_contract_instance) should be
+ the signature
+
+ :param alog: a python log instance
+ :param setup_log_func: func(log_instance) => void signature
+ :return: int return code
+ :rtype: int
+ """
+
+ # circumvent the argparse parsing by inspecting the raw argv, then manually
+ # parse out the resolved_tool_contract path. Not awesome, but the only way to skip the
+ # parser.parse_args(args) machinery
+ if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv):
+ print "Attempting to Load resolved tool contract from {a}".format(a=argv)
+ # FIXME need to catch the exception if raised here before the _pacbio_main_runner is called
+ resolved_tool_contract_path = _get_resolved_tool_contract_from_argv(argv)
+ resolved_tool_contract = load_resolved_tool_contract_from(resolved_tool_contract_path)
+ r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract)
+ # alog.info("Completed running resolved contract. {c}".format(c=resolved_tool_contract))
+ return r
+ else:
+ # tool was called with the standard commandline invocation
+ return pacbio_args_runner(argv, parser, args_runner_func, alog,
+ setup_log_func)
+
+
+def pbparser_runner(argv,
+ parser,
+ args_runner_func,
+ contract_runner_func,
+ alog,
+ setup_log_func):
+ """Run a Contract or emit a contract to stdout."""
+ if not isinstance(parser, PbParser):
+ raise TypeError("Only supports PbParser.")
+
+ arg_parser = parser.arg_parser.parser
+ # extract the contract
+ tool_contract = parser.to_contract()
+
+ if EMIT_TOOL_CONTRACT_OPTION in argv:
+ # print tool_contract
+ x = json.dumps(tool_contract.to_dict(), indent=4)
+ print x
+ else:
+ return pacbio_args_or_contract_runner(argv, arg_parser, args_runner_func, contract_runner_func, alog, setup_log_func)
diff --git a/pbcommand/cli/examples/__init__.py b/pbcommand/cli/examples/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pbcommand/cli/examples/dev_app.py b/pbcommand/cli/examples/dev_app.py
new file mode 100755
index 0000000..06e97f2
--- /dev/null
+++ b/pbcommand/cli/examples/dev_app.py
@@ -0,0 +1,108 @@
+"""Simple CLI dev app for testing Emitting Tool Contracts and Running from Resolved Tool Contracts"""
+
+import logging
+import sys
+
+from pbcommand.utils import setup_log
+from pbcommand.cli import pbparser_runner
+from pbcommand.models import TaskTypes, FileTypes, get_pbparser, ResourceTypes
+
+
+# This has the same functionality as the dev_simple_app
+from .dev_simple_app import run_main
+
+log = logging.getLogger(__name__)
+
+__version__ = '0.2.1'
+
+# Used for the tool contract id. Must have the form {namespace}.tasks.{name}
+# to prevent namespace collisions. For python tools, the namespace should be
+# the python package name.
+TOOL_ID = "pbcommand.tasks.dev_app"
+
+
+def add_args_and_options(p):
+ """
+ Add input, output files and options to parser.
+
+ :type p: PbParser
+ :return: PbParser
+ """
+ # FileType, label, name, description
+ p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta File", "PacBio Spec'ed fasta file")
+ # File Type, label, name, description, default file name
+ p.add_output_file_type(FileTypes.FASTA, "fasta_out", "Filtered Fasta file", "Filtered Fasta file", "filter.fasta")
+ # Option id, label, default value, name, description
+ # for the argparse, the read-length will be translated to --read-length and (accessible via args.read_length)
+ p.add_int("pbcommand.task_options.dev_read_length", "read-length", 25, "Length filter", "Min Sequence Length filter")
+ return p
+
+
+def get_contract_parser():
+ """
+ Central point of programmatically defining a Parser.
+ :rtype: PbParser
+ :return: PbParser
+ """
+ # Commandline exe to call "{exe}" /path/to/resolved-tool-contract.json
+
+ driver_exe = "python -m pbcommand.cli.example.dev_app --resolved-tool-contract "
+ desc = "Dev app for Testing that supports emitting tool contracts"
+ subcomponents = [("my_subcomponent", "1.2.3")]
+
+ resource_types = (ResourceTypes.TMP_FILE,
+ ResourceTypes.TMP_FILE,
+ ResourceTypes.TMP_DIR)
+
+ p = get_pbparser(TOOL_ID,
+ __version__,
+ "Example Dev App",
+ desc,
+ driver_exe,
+ is_distributed=False,
+ resource_types=resource_types,
+ subcomponents=subcomponents)
+
+ add_args_and_options(p)
+ return p
+
+
+def args_runner(args):
+ """Entry point from argparse"""
+ log.debug("raw args {a}".format(a=args))
+ return run_main(args.fasta_in, args.fasta_out, args.read_length)
+
+
+def resolved_tool_contract_runner(resolved_tool_contract):
+ """Run from the resolved contract
+
+ :param resolved_tool_contract:
+ :type resolved_tool_contract: ResolvedToolContract
+ """
+
+ in_file = resolved_tool_contract.task.input_files[0]
+ out_file = resolved_tool_contract.task.output_files[0]
+ min_read_length = resolved_tool_contract.task.options["pbcommand.task_options.dev_read_length"]
+ r = run_main(in_file, out_file, min_read_length)
+ return r
+
+
+def main(argv=sys.argv):
+ log.info("Starting {f} version {v} pbcommand example dev app".format(f=__file__, v=__version__))
+ # PbParser instance, this has both the argparse instance and the tool contract
+ # instance.
+ mp = get_contract_parser()
+ # To Access the argparse instance
+ # mp.arg_parser.parser
+ # The Tool Contract parser
+ # mp.tool_contract_parser.parser
+ return pbparser_runner(argv[1:],
+ mp,
+ args_runner,
+ resolved_tool_contract_runner,
+ log,
+ setup_log)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/pbcommand/cli/examples/dev_gather_fasta_app.py b/pbcommand/cli/examples/dev_gather_fasta_app.py
new file mode 100644
index 0000000..43b2f8c
--- /dev/null
+++ b/pbcommand/cli/examples/dev_gather_fasta_app.py
@@ -0,0 +1,116 @@
+"""Example of Gather TC to gather several $chunk.fasta_id in chunk.json file.
+
+
+There's a bit of code here that is copied from pbsmrtpipe.tools.chunk_utils.
+Martin will eventually refactor this into pbcore.
+"""
+import logging
+import sys
+import warnings
+
+import functools
+
+from pbcommand.cli import pbparser_runner
+from pbcommand.models import get_gather_pbparser, FileTypes
+from pbcommand.pb_io import load_pipeline_chunks_from_json
+from pbcommand.utils import setup_log
+
+from .dev_scatter_fasta_app import Constants
+
+log = logging.getLogger(__name__)
+
+TOOL_ID = "pbcommand.tasks.dev_gather_fasta"
+__version__ = '0.1.0'
+
+
+try:
+ from pbcore.io import FastaWriter, FastaReader
+except ImportError:
+ warnings.warn("Example apps require pbcore. Install from https://github.com/PacificBiosciences/pbcore")
+
+
+def __gather_fastx(fastx_reader, fastx_writer, fastx_files, output_file):
+ # this will work for any Pbcore Reader, Writer classes
+ n = 0
+ with fastx_writer(output_file) as writer:
+ for fastx_file in fastx_files:
+ with fastx_reader(fastx_file) as reader:
+ for record in reader:
+ n += 1
+ writer.writeRecord(record)
+
+ log.info("Completed gathering {n} files (with {x} records) to {f}".format(n=len(fastx_files), f=output_file, x=n))
+ return 0
+
+gather_fasta = functools.partial(__gather_fastx, FastaReader, FastaWriter)
+
+
+def _get_datum_from_chunks_by_chunk_key(chunks, chunk_key):
+ datum = []
+ for chunk in chunks:
+ if chunk_key in chunk.chunk_keys:
+ value = chunk.chunk_d[chunk_key]
+ datum.append(value)
+ else:
+ raise KeyError("Unable to find chunk key '{i}' in {p}".format(i=chunk_key, p=chunk))
+
+ return datum
+
+
+def __args_gather_runner(func, chunk_json, output_file, chunk_key):
+ chunks = load_pipeline_chunks_from_json(chunk_json)
+
+ # Allow looseness
+ if not chunk_key.startswith('$chunk.'):
+ chunk_key = '$chunk.' + chunk_key
+ log.warn("Prepending chunk key with '$chunk.' to '{c}'".format(c=chunk_key))
+ else:
+ chunk_key = chunk_key
+
+ fastx_files = _get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
+ _ = func(fastx_files, output_file)
+ return 0
+
+
+def run_main(chunked_json, output_fasta, chunk_key):
+ """Create a Chunk.json file with nchunks <= max_nchunks
+
+ Not clear on the nchunks vs max_nchunks.
+ """
+ return __args_gather_runner(gather_fasta, chunked_json, output_fasta, chunk_key)
+
+
+def get_parser():
+
+ driver = "python -m pbcommand.cli.examples.dev_scatter_fasta_app --resolved-tool-contract "
+ desc = "Gather a fasta resources in a Chunk.json file"
+ # chunk keys that will be written to the file
+ chunk_key = "$chunk.fasta_id"
+ p = get_gather_pbparser(TOOL_ID, __version__, "Fasta Chunk Gather",
+ desc, driver, is_distributed=False)
+ p.add_input_file_type(FileTypes.CHUNK, "chunk_json", "Chunk JSON", "Chunked Fasta JSON Out")
+ p.add_output_file_type(FileTypes.FASTA, "output", "Chunk JSON", "Output Fasta", "gathered.fasta")
+ return p
+
+
+def args_runner(args):
+ return run_main(args.chunk_json, args.output, Constants.FA_CHUNK_KEY)
+
+
+def rtc_runner(rtc):
+ return run_main(rtc.task.input_files[0],
+ rtc.task.output_files[0],
+ Constants.FA_CHUNK_KEY)
+
+
+def main(argv=sys.argv):
+ return pbparser_runner(argv[1:],
+ get_parser(),
+ args_runner,
+ rtc_runner,
+ log,
+ setup_log)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/pbcommand/cli/examples/dev_mixed_app.py b/pbcommand/cli/examples/dev_mixed_app.py
new file mode 100644
index 0000000..fbfcc4d
--- /dev/null
+++ b/pbcommand/cli/examples/dev_mixed_app.py
@@ -0,0 +1,122 @@
+"""Example to show how to expose a subset of functionality to tool contract,
+while exposing all the options via argparse commandline interface
+
+In this example, the tool contract has
+
+ins = (csv,)
+outs = (report, )
+options = alpha
+
+In the "full" argpase layer that has an optional hdf5 option file and beta. (Option[Int] is the scala-style notation)
+
+ins = (csv, )
+outs (report, Option[h5])
+options = alpha, beta
+"""
+import sys
+import logging
+
+from pbcommand.models import TaskTypes, FileTypes, get_pbparser
+from pbcommand.cli import pbparser_runner
+from pbcommand.utils import setup_log
+
+log = logging.getLogger(__name__)
+
+TOOL_ID = "pbcommand.tasks.dev_mixed_app"
+__version__ = "0.2.0"
+
+
+def _get_contract_parser():
+ """
+ Central point of programmatically defining a Parser.
+ :rtype: PbParser
+ :return: PbParser
+ """
+ # Number of processors to use
+ nproc = 2
+ # Commandline exe to call "{exe}" /path/to/resolved-tool-contract.json
+ driver_exe = "python -m pbcommand.cli.example.dev_app --resolved-tool-contract "
+ desc = "Dev app for Testing that supports emitting tool contracts"
+ p = get_pbparser(TOOL_ID, __version__, "DevApp", desc, driver_exe,
+ is_distributed=False, nproc=nproc)
+ return p
+
+
+def add_rtc_options(p):
+ """
+ Add all ins/outs and options that will be in both the tool contract and the argparse layer
+
+ :param p:
+ :type p: pbcommand.models.PbParser
+ :return:
+ """
+ p.add_input_file_type(FileTypes.CSV, "csv", "Input CSV", "Input csv description")
+ p.add_output_file_type(FileTypes.REPORT, "rpt", "Output Report", "Output PacBio Report JSON", "example.report.json")
+ p.add_int("pbcommand.task_options.alpha", "alpha", 25, "Alpha", "Alpha description")
+ return p
+
+
+def add_argparse_only(p):
+ """
+ Standard argparse layer
+
+ :param p:
+ :type p: argparse.ArgumentParser
+ :return:
+ """
+ p.add_argument("--output-h5", type=str, help="Optional output H5 file.")
+ p.add_argument("--beta", type=int, default=1234, help="Example option")
+ return p
+
+
+def get_contract_parser():
+ p = _get_contract_parser()
+ # minimal ins/outs + options exposed at the contract level
+ add_rtc_options(p)
+ # add all options to the raw argparse instance
+ add_argparse_only(p.arg_parser.parser)
+ return p
+
+
+def _fake_main(csv, report_json, alpha=1, beta=1234, output_h5=None):
+ _d = dict(c=csv, r=report_json, a=alpha, b=beta, h=output_h5)
+ log.info("Running main with {c} {r} alpha={a} beta={b} h5={h}".format(**_d))
+ return 0
+
+
+def args_runner(args):
+ """Standard python args access point"""
+
+ csv = args.csv
+ report_json = args.rpt
+ output_h5 = args.output_h5
+ return _fake_main(csv, report_json, alpha=args.alpha, beta=args.beta, output_h5=output_h5)
+
+
+def resolved_tool_contract_runner(rtc):
+ """
+
+ :param rtc:
+ :type rtc: pbcommand.models.tool_contract.ResolvedToolContract
+ :return:
+ """
+ csv = rtc.task.input_files[0]
+ rpt = rtc.task.output_files[0]
+ alpha = rtc.task.options["pbcommand.task_options.alpha"]
+ return _fake_main(csv, rpt, alpha=alpha)
+
+
+def main(argv=sys.argv):
+ # New interface that supports running resolved tool contracts
+ log.info("Starting {f} version {v} pbcommand example dev app".format(f=__file__, v=__version__))
+
+ p = get_contract_parser()
+ return pbparser_runner(argv[1:], p,
+ args_runner,
+ resolved_tool_contract_runner,
+ log,
+ setup_log)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/pbcommand/cli/examples/dev_quick_hello_world.py b/pbcommand/cli/examples/dev_quick_hello_world.py
new file mode 100644
index 0000000..7ffa50b
--- /dev/null
+++ b/pbcommand/cli/examples/dev_quick_hello_world.py
@@ -0,0 +1,59 @@
+import sys
+import logging
+
+from pbcommand.models import FileTypes, OutputFileType
+from pbcommand.cli import registry_builder, registry_runner
+
+log = logging.getLogger(__name__)
+
+registry = registry_builder("pbcommand", "python -m pbcommand.cli.examples.dev_quick_hello_world ")
+
+
+def _example_main(input_files, output_files, **kwargs):
+ log.info("Running example main with {i} {o} kw:{k}".format(i=input_files,
+ o=output_files, k=kwargs))
+ # write mock output files, otherwise the End-to-End test will fail
+ xs = output_files if isinstance(output_files, (list, tuple)) else [output_files]
+ for x in xs:
+ with open(x, 'w') as writer:
+ writer.write("Mock data\n")
+ return 0
+
+
+ at registry("dev_qhello_world", "0.2.1", FileTypes.FASTA, FileTypes.FASTA, nproc=1, options=dict(alpha=1234))
+def run_rtc(rtc):
+ return _example_main(rtc.task.input_files[0], rtc.task.output_files[0], nproc=rtc.task.nproc)
+
+
+ at registry("dev_fastq2fasta", "0.1.0", FileTypes.FASTQ, FileTypes.FASTA)
+def run_rtc(rtc):
+ return _example_main(rtc.task.input_files[0], rtc.task.output_files[0])
+
+
+ at registry("dev_txt_hello", "0.1.0", FileTypes.TXT, (FileTypes.TXT, FileTypes.TXT), nproc=3, is_distributed=False)
+def run_rtc(rtc):
+ return _example_main(rtc.task.input_files, rtc.task.output_files)
+
+
+def _to_output(i, file_type):
+ default_name = "_".join([file_type.file_type_id, file_type.base_name + "_" + str(i) + "." + file_type.ext])
+ label = "label_" + file_type.file_type_id
+ desc = "File {f}".format(f=file_type)
+ return OutputFileType(file_type.file_type_id, label, repr(file_type), desc, default_name)
+
+
+def _to_outputs(file_types):
+ return [_to_output(i, ft) for i, ft in enumerate(file_types)]
+
+
+ at registry("dev_txt_custom_outs", "0.1.0", FileTypes.TXT, _to_outputs((FileTypes.TXT, FileTypes.TXT)))
+def run_rtc(rtc):
+ """Test for using OutputFileTypes as outputs
+
+ Output types can be specified as FileType, or OutputFileType instances
+ """
+ return _example_main(rtc.task.input_files, rtc.task.output_files)
+
+
+if __name__ == '__main__':
+ sys.exit(registry_runner(registry, sys.argv[1:]))
diff --git a/pbcommand/cli/examples/dev_scatter_fasta_app.py b/pbcommand/cli/examples/dev_scatter_fasta_app.py
new file mode 100644
index 0000000..0f9d02c
--- /dev/null
+++ b/pbcommand/cli/examples/dev_scatter_fasta_app.py
@@ -0,0 +1,162 @@
+"""Example of Generating a Chunk.json file that 'scatters' a pair of fasta files
+
+
+In the example, the first fasta file is chunked, while the path to the second
+fasta file is passed directly.
+
+It generates a fasta_1_id and fasta_2_id chunk keys,
+
+There's a bit of code here that is copied from pbsmrtpipe.tools.chunk_utils.
+
+Martin will eventually refactor this into pbcore.
+"""
+import os
+import logging
+import sys
+import warnings
+import math
+import datetime
+
+from pbcommand.cli import pbparser_runner
+from pbcommand.models import get_scatter_pbparser, FileTypes, PipelineChunk
+from pbcommand.pb_io import write_pipeline_chunks
+from pbcommand.utils import setup_log
+
+log = logging.getLogger(__name__)
+
+TOOL_ID = "pbcommand.tasks.dev_scatter_fasta"
+__version__ = '0.1.0'
+
+
+try:
+ from pbcore.io import FastaWriter, FastaReader
+except ImportError:
+ warnings.warn("Example apps require pbcore. Install from https://github.com/PacificBiosciences/pbcore")
+
+
+class Constants(object):
+ NCHUNKS_OPT = "pbcommand.task_options.dev_scatter_fa_nchunks"
+ FA_CHUNK_KEY = "$chunk.fasta_id"
+
+
+def __get_nrecords_from_reader(reader):
+ n = 0
+ for _ in reader:
+ n += 1
+ return n
+
+
+def write_fasta_records(fastax_writer_klass, records, file_name):
+
+ n = 0
+ with fastax_writer_klass(file_name) as w:
+ for record in records:
+ w.writeRecord(record)
+ n += 1
+
+ log.debug("Completed writing {n} fasta records".format(n=n))
+
+
+def __to_chunked_fastx_files(fastx_reader_klass, fastax_writer_klass, chunk_key, fastx_path, max_total_nchunks, dir_name, base_name, ext):
+ """Convert a Fasta/Fasta file to a chunked list of files"""
+
+ # grab the number of records so we can chunk it
+ with fastx_reader_klass(fastx_path) as f:
+ nrecords = __get_nrecords_from_reader(f)
+
+ max_total_nchunks = min(nrecords, max_total_nchunks)
+
+ n = int(math.ceil(float(nrecords)) / max_total_nchunks)
+
+ nchunks = 0
+ with fastx_reader_klass(fastx_path) as r:
+ it = iter(r)
+ for i in xrange(max_total_nchunks):
+ records = []
+
+ chunk_id = "_".join([base_name, str(nchunks)])
+ chunk_name = ".".join([chunk_id, ext])
+ nchunks += 1
+ fasta_chunk_path = os.path.join(dir_name, chunk_name)
+
+ if i != max_total_nchunks:
+ for _ in xrange(n):
+ records.append(it.next())
+ else:
+ for x in it:
+ records.append(x)
+
+ write_fasta_records(fastax_writer_klass, records, fasta_chunk_path)
+ total_bases = sum(len(r.sequence) for r in records)
+ d = dict(total_bases=total_bases, nrecords=len(records))
+ d[chunk_key] = os.path.abspath(fasta_chunk_path)
+ c = PipelineChunk(chunk_id, **d)
+ yield c
+
+
+def to_chunked_fasta_files(fasta_path, max_total_nchunks, dir_name, chunk_key, base_name, ext):
+ return __to_chunked_fastx_files(FastaReader, FastaWriter, chunk_key, fasta_path, max_total_nchunks, dir_name, base_name, ext)
+
+
+def write_chunks_to_json(chunks, chunk_file):
+ log.debug("Wrote {n} chunks to {f}.".format(n=len(chunks), f=chunk_file))
+ write_pipeline_chunks(chunks, chunk_file, "Chunks written at {d}".format(d=datetime.datetime.now()))
+ return 0
+
+
+def _write_fasta_chunks_to_file(to_chunk_fastx_file_func, chunk_file, fastx_path, max_total_chunks, dir_name, chunk_key, chunk_base_name, chunk_ext):
+ chunks = list(to_chunk_fastx_file_func(fastx_path, max_total_chunks, dir_name, chunk_key, chunk_base_name, chunk_ext))
+ write_chunks_to_json(chunks, chunk_file)
+ return 0
+
+
+def write_fasta_chunks_to_file(chunk_file, fasta_path, max_total_chunks, dir_name, chunk_key, chunk_base_name, chunk_ext):
+ return _write_fasta_chunks_to_file(to_chunked_fasta_files, chunk_file, fasta_path, max_total_chunks, dir_name, chunk_key, chunk_base_name, chunk_ext)
+
+
+def run_main(fasta_file, chunk_output_json, chunk_key, max_nchunks, nchunks=None, chunk_base_name="fasta"):
+ """Create a Chunk.json file with nchunks <= max_nchunks
+
+ Not clear on the nchunks vs max_nchunks.
+ """
+ output_dir = os.path.dirname(chunk_output_json)
+ return write_fasta_chunks_to_file(chunk_output_json, fasta_file, max_nchunks, output_dir, chunk_key, chunk_base_name, "fasta")
+
+
+def get_parser():
+
+ driver = "python -m pbcommand.cli.examples.dev_scatter_fasta_app --resolved-tool-contract "
+ desc = "Scatter a single fasta file to create chunk.json file"
+ # chunk keys that will be written to the file
+ chunk_keys = ("$chunk.fasta_id", )
+ p = get_scatter_pbparser(TOOL_ID, __version__, "Fasta Scatter",
+ desc, driver, chunk_keys, is_distributed=False)
+ p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In", "Fasta file to scatter")
+ p.add_output_file_type(FileTypes.CHUNK, "cjson", "Chunk JSON", "Scattered/Chunked Fasta Chunk.json", "fasta.chunks.json")
+ p.add_int("pbcommand.task_options.dev_scatter_fa_nchunks", "nchunks", 10, "Number of chunks",
+ "Suggested number of chunks. May be overridden by $max_nchunks")
+ return p
+
+
+def args_runner(args):
+ return run_main(args.fasta_in, args.cjson, Constants.FA_CHUNK_KEY, args.nchunks)
+
+
+def rtc_runner(rtc):
+ return run_main(rtc.task.input_files[0],
+ rtc.task.output_files[0],
+ Constants.FA_CHUNK_KEY,
+ rtc.task.options[Constants.NCHUNKS_OPT])
+
+
+def main(argv=sys.argv):
+ return pbparser_runner(argv[1:],
+ get_parser(),
+ args_runner,
+ rtc_runner,
+ log,
+ setup_log)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/pbcommand/cli/examples/dev_simple_app.py b/pbcommand/cli/examples/dev_simple_app.py
new file mode 100755
index 0000000..2e77527
--- /dev/null
+++ b/pbcommand/cli/examples/dev_simple_app.py
@@ -0,0 +1,93 @@
+"""Simple CLI dev app for testing
+
+This app is a 'simple' app in that is can not emit tool-contracts, but it
+can run tool contracts via --
+"""
+
+import logging
+import sys
+import warnings
+
+from pbcommand.utils import setup_log
+from pbcommand.validators import validate_file
+from pbcommand.models import ResolvedToolContract
+from pbcommand.common_options import add_resolved_tool_contract_option
+from pbcommand.cli import pacbio_args_or_contract_runner, get_default_argparser
+
+log = logging.getLogger(__name__)
+
+__version__ = '0.1.1'
+
+TOOL_ID = "pbcommand.tasks.dev_app_simple"
+
+try:
+ from pbcore.io import FastaWriter, FastaReader
+except ImportError:
+ warnings.warn("Example apps require pbcore. Install from https://github.com/PacificBiosciences/pbcore")
+
+
+def get_parser():
+ p = get_default_argparser(__version__, __doc__)
+ p.add_argument("fasta_in", type=validate_file, help="Input Fasta")
+ p.add_argument("fasta_out", type=str, help="Output Fasta")
+ p.add_argument('--read-length', type=int, default=25, help="Min Sequence length to filter")
+ add_resolved_tool_contract_option(p)
+ # this parser cannot emit a tool contract, but can run from a resolved
+ # contract via --resolved-tool-contract /path/to/resolved-tool-contract.json
+ return p
+
+
+def run_main(input_file, output_file, min_sequence_length):
+ """
+ Main function entry point to your application (this should be imported
+ from your library code)
+
+ :rtype int:
+ """
+ _d = dict(i=input_file, a=min_sequence_length, o=output_file)
+ msg = "Running dev_app task. with input:{i} output:{o} and min-length={a}".format(**_d)
+ log.info(msg)
+ with FastaWriter(output_file) as w:
+ with FastaReader(input_file) as r:
+ for record in r:
+ if len(record.sequence) > min_sequence_length:
+ w.writeRecord(record)
+ log.debug("completed running main.")
+ return 0
+
+
+def args_runner(args):
+ """Entry point from argparse"""
+ log.debug("raw args {a}".format(a=args))
+ return run_main(args.fasta_in, args.fasta_out, args.read_length)
+
+
+def resolved_tool_contract_runner(resolved_tool_contract):
+ """Run from the resolved contract
+
+ :param resolved_tool_contract:
+ :type resolved_tool_contract: ResolvedToolContract
+ """
+
+ in_file = resolved_tool_contract.task.input_files[0]
+ out_file = resolved_tool_contract.task.output_files[0]
+ alpha = 9
+ r = run_main(in_file, out_file, alpha)
+ log.info("Completed running resolved contract. {c}".format(c=resolved_tool_contract))
+ return r
+
+
+def main(argv=sys.argv):
+ # New interface that supports running resolved tool contracts
+ log.info("Starting {f} version {v} pbcommand example dev app".format(f=__file__, v=__version__))
+
+ p = get_parser()
+ return pacbio_args_or_contract_runner(argv[1:], p,
+ args_runner,
+ resolved_tool_contract_runner,
+ log,
+ setup_log)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/pbcommand/cli/examples/dev_txt_app.py b/pbcommand/cli/examples/dev_txt_app.py
new file mode 100644
index 0000000..a40c7f8
--- /dev/null
+++ b/pbcommand/cli/examples/dev_txt_app.py
@@ -0,0 +1,80 @@
+"""Demonstration Example app
+
+Primaryly used for end-to-end testing.
+emit tool contract -> Resolve -> resolved tool contract -> run.
+"""
+
+import logging
+import sys
+
+from pbcommand.utils import setup_log
+from pbcommand.cli import pbparser_runner
+from pbcommand.models import TaskTypes, FileTypes, get_pbparser, ResourceTypes
+
+TOOL_ID = "pbcommand.tasks.dev_txt_app"
+VERSION = "0.1.0"
+
+log = logging.getLogger(__name__)
+
+
+def get_parser():
+ driver_exe = "python -m pbcommand.cli.examples.dev_app --resolved-tool-contract "
+ desc = "Dev app for Testing that supports emitting tool contracts"
+ # Can specify libs or other dependencies that
+ subcomponents = [("pbcommand", VERSION),
+ ("my_component", "0.1.0"),
+ ("my_component_id", "1.2.3")]
+ # ResourceTypes.*
+ resource_types = (ResourceTypes.TMP_FILE,
+ ResourceTypes.TMP_FILE,
+ ResourceTypes.TMP_DIR)
+
+ # Create an instance of a Pacbio Parser
+ p = get_pbparser(TOOL_ID, VERSION, "Txt App", desc, driver_exe,
+ is_distributed=False, resource_types=resource_types,
+ subcomponents=subcomponents)
+
+ # Add Input Files types
+ p.add_input_file_type(FileTypes.TXT, "txt_in", "Txt file", "Generic Text File")
+ # Add output files types
+ p.add_output_file_type(FileTypes.TXT, "txt_out", "Txt outfile", "Generic Output Txt file", "output.txt")
+ p.add_int("pbcommand.task_options.dev_max_nlines", "max_nlines", 10, "Max Lines", "Max Number of lines to Copy")
+ return p
+
+
+def run_main(input_txt, output_txt, max_nlines):
+ n = 0
+ with open(input_txt, 'r') as r:
+ with open(output_txt, 'w') as w:
+ w.write("# Output Txt file")
+ for line in r:
+ if n >= max_nlines:
+ break
+ w.write(line + "\n")
+ n += 1
+
+ log.info("Completed writing {n} lines".format(n=n))
+ return 0
+
+
+def args_runner(args):
+ return run_main(args.txt_in, args.txt_out, args.max_nlines)
+
+
+def rtc_runner(rtc):
+ return run_main(rtc.task.input_files[0],
+ rtc.task.output_files[0],
+ rtc.task.options["pbcommand.task_options.dev_max_nlines"])
+
+
+def main(argv=sys.argv):
+ return pbparser_runner(argv[1:],
+ get_parser(),
+ args_runner,
+ rtc_runner,
+ log,
+ setup_log)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/pbcommand/cli/quick.py b/pbcommand/cli/quick.py
new file mode 100644
index 0000000..c147a82
--- /dev/null
+++ b/pbcommand/cli/quick.py
@@ -0,0 +1,269 @@
+import json
+import logging
+import os
+import sys
+from pbcommand.cli import get_default_argparser
+
+from pbcommand.models import (FileTypes, ToolContractTask, ToolContract,
+ InputFileType, OutputFileType, FileType)
+from pbcommand.models.parser import (to_option_schema, JsonSchemaTypes)
+from pbcommand.models.tool_contract import ToolDriver
+from pbcommand.pb_io import (load_resolved_tool_contract_from,
+ write_tool_contract)
+from pbcommand.utils import setup_log
+
+log = logging.getLogger(__name__)
+
+__all__ = ['registry_builder', 'registry_runner', 'Registry']
+
+
+class Constants(object):
+ RTC_DRIVER = 'run-rtc'
+
+
+def _example_main(*args, **kwargs):
+ log.info("Running example main with {a} kw:{k}".format(a=args, k=kwargs))
+ return 0
+
+
+def _file_type_to_input_file_type(file_type, index):
+ fid = "_".join([file_type.file_type_id, str(index)])
+ return InputFileType(file_type.file_type_id,
+ "Label " + fid,
+ repr(file_type),
+ "description for {f}".format(f=fid))
+
+
+def _file_type_to_output_file_type(file_type, index):
+ fid = "_".join([file_type.file_type_id, str(index)])
+ return OutputFileType(file_type.file_type_id,
+ "Label " + fid,
+ repr(file_type),
+ "description for {f}".format(f=file_type),
+ file_type.default_name)
+
+
+def __convert_to_option(jtype, namespace, key, value):
+ opt_id = ".".join([namespace, 'task_options', key])
+ name = "Option {n}".format(n=key)
+ desc = "Option {n} description".format(n=key)
+ opt = to_option_schema(opt_id, jtype, name, desc, value)
+ return opt
+
+
+def _convert_to_option(namespace, key, value):
+ if isinstance(value, basestring):
+ opt = __convert_to_option(JsonSchemaTypes.STR, namespace, key, value)
+ elif isinstance(value, bool):
+ opt = __convert_to_option(JsonSchemaTypes.BOOL, namespace, key, value)
+ elif isinstance(value, int):
+ opt = __convert_to_option(JsonSchemaTypes.INT, namespace, key, value)
+ elif isinstance(value, float):
+ opt = __convert_to_option(JsonSchemaTypes.NUM, namespace, key, value)
+ else:
+ raise TypeError("Unsupported option {k} type. {t} ".format(k=key, t=type(value)))
+
+ return opt
+
+
+def _to_list(x):
+ if isinstance(x, (list, tuple)):
+ return x
+ else:
+ return [x]
+
+
+def _transform_output_ftype(x, i):
+ if isinstance(x, FileType):
+ return _file_type_to_output_file_type(x, i)
+ elif isinstance(x, OutputFileType):
+ return x
+ else:
+ raise TypeError("Unsupported type {t} value {x}".format(x=x, t=type(x)))
+
+
+class Registry(object):
+ def __init__(self, tool_namespace, driver_base):
+ self.namespace = tool_namespace
+ self.driver_base = driver_base
+ # id -> func(rtc)
+ self.rtc_runners = {}
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, n=self.namespace,
+ d=self.driver_base, t=len(self.rtc_runners))
+ return "<{k} {n} {d} tool-contracts:{t} >".format(**_d)
+
+ def __call__(self, tool_id, version, input_types, output_types, options=None, nproc=1, is_distributed=True):
+ def _w(func):
+ """
+
+ Task Options are provided as 'naked' non-namespaced values and
+ are automatically type detected and converted to a PacBioOption
+
+ """
+ # support list or a single value
+ itypes = _to_list(input_types)
+ otypes = _to_list(output_types)
+
+ global_id = ".".join([self.namespace, 'tasks', tool_id])
+ name = "Tool {n}".format(n=tool_id)
+ desc = "Quick tool {n} {g}".format(n=tool_id, g=global_id)
+
+ input_file_types = [_file_type_to_input_file_type(ft, i) for i, ft in enumerate(itypes)]
+ output_file_types = [_transform_output_ftype(ft, i) for i, ft in enumerate(otypes)]
+
+ if options is None:
+ tool_options = []
+ else:
+ tool_options = [_convert_to_option(self.namespace, key, value) for key, value in options.iteritems()]
+
+ resource_types = []
+ task = ToolContractTask(global_id, name, desc, version, is_distributed,
+ input_file_types, output_file_types, tool_options, nproc, resource_types)
+ # trailing space if for 'my-tool --resolved-tool-contract ' /path/to/rtc.json
+ driver_exe = " ".join([self.driver_base, Constants.RTC_DRIVER, " "])
+ driver = ToolDriver(driver_exe, )
+ tc = ToolContract(task, driver)
+ self.rtc_runners[tc] = func
+ return _w
+
+ def to_summary(self):
+ xs = []
+ x = xs.append
+ x("Registry namespace:{n} driverbase:{d}".format(n=self.namespace, d=self.driver_base))
+ for tc, func in self.rtc_runners.iteritems():
+ x(str(tc))
+
+ return "\n".join(xs)
+
+
+def registry_builder(tool_namespace, driver_base):
+ r = Registry(tool_namespace, driver_base)
+ return r
+
+
+def _subparser_builder(subparser, name, description, options_func, exe_func):
+ p = subparser.add_parser(name, help=description)
+ options_func(p)
+ # I strongly dislike this.
+ p.set_defaults(func=exe_func)
+ return p
+
+
+def _add_run_rtc_options(p):
+ p.add_argument('rtc_path', type=str, help="Path to resolved tool contract")
+ return p
+
+
+def _add_emit_all_tcs_options(p):
+ p.add_argument('-o', '--output_dir', type=str, default=os.getcwd(),
+ help='Emit all Tool Contracts to output directory')
+ return p
+
+
+def _add_emit_tc_options(p):
+ p.add_argument('tc_id', type=str, help="Tool Contract Id")
+ return p
+
+
+def __args_summary_runner(registry):
+ def _w(args):
+ log.info("Registry {r}".format(r=registry))
+ log.info("\n" + registry.to_summary())
+ print registry.to_summary()
+ return 0
+ return _w
+
+
+def __args_rtc_runner(registry):
+ def _w(args):
+ # FIXME.
+ setup_log(log, level=logging.DEBUG)
+
+ log.info("Registry {r}".format(r=registry))
+ log.info("loading RTC from {i}".format(i=args.rtc_path))
+ rtc = load_resolved_tool_contract_from(args.rtc_path)
+ id_funcs = {t.task.task_id:func for t, func in registry.rtc_runners.iteritems()}
+ func = id_funcs.get(rtc.task.task_id, None)
+ if func is None:
+ sys.stderr.write("ERROR. Unknown tool contract id {x}".format(x=rtc.task.task_id))
+ return -1
+ else:
+ exit_code = func(rtc)
+ log.info("Completed running {r} exitcode {e}".format(r=rtc, e=exit_code))
+ return _w
+
+
+def __args_emit_tc_runner(registry):
+ def _w(args):
+ log.info("Registry {r}".format(r=registry))
+ tc_id = args.tc_id
+ log.info("Emitting TC from {i}".format(i=tc_id))
+ id_tc = {t.task.task_id:t for t in registry.rtc_runners.keys()}
+ log.info(id_tc)
+ tc = id_tc.get(tc_id, None)
+ if tc is None:
+ sys.stderr.write("ERROR. Unable to find tool-contract id {i}".format(i=tc_id))
+ return -1
+ else:
+ print json.dumps(tc.to_dict(), sort_keys=True, indent=4)
+ return 0
+ return _w
+
+
+def __args_emit_all_tcs_runner(registry):
+ def _w(args):
+ log.info("Registry {r}".format(r=registry))
+ log.info(registry.to_summary())
+ log.info("Emitting TCs to {i}".format(i=args.output_dir))
+ tcs = registry.rtc_runners.keys()
+ for tc in tcs:
+ output_file = os.path.join(args.output_dir, tc.task.task_id + "_tool_contract.json")
+ write_tool_contract(tc, output_file)
+ return 0
+ return _w
+
+
+def _to_registry_parser(version, description):
+ def _f(registry):
+ p = get_default_argparser(version, description)
+ sp = p.add_subparsers(help='Commands')
+
+ args_summary_runner = __args_summary_runner(registry)
+ args_rtc_runner = __args_rtc_runner(registry)
+ args_tc_emit = __args_emit_tc_runner(registry)
+ args_tcs_emit = __args_emit_all_tcs_runner(registry)
+
+ _subparser_builder(sp, Constants.RTC_DRIVER, "Run Resolved Tool contract", _add_run_rtc_options, args_rtc_runner)
+ _subparser_builder(sp, 'emit-tool-contracts', "Emit all Tool contracts to output-dir", _add_emit_all_tcs_options, args_tcs_emit)
+ _subparser_builder(sp, 'emit-tool-contract', "Emit a single tool contract by id", _add_emit_tc_options, args_tc_emit)
+ _subparser_builder(sp, 'summary', "Summary of Tool Contracts", lambda x: x, args_summary_runner)
+ return p
+ return _f
+
+
+def registry_runner(registry, argv):
+ """Runs a registry
+
+ 1. Manually build an argparser that has
+
+ For running:
+
+ my_tool run-rtc /path/to/resolved-tool-contract.json
+
+ For emitting:
+
+ my-tool emit-tool-contracts /path/to/output-dir
+ my-tool emit-tool-contract global_id
+
+ :type registry: Registry
+ """
+ log.info("Running registry {r} with args {a}".format(r=registry, a=argv))
+ f = _to_registry_parser('0.1.0', "Multi-quick-tool-runner for {r}".format(r=registry.namespace))
+ p = f(registry)
+ args = p.parse_args(argv)
+ # need to disable this because some subparsers are emitting to stdout
+ # setup_log(log, level=logging.DEBUG)
+ return_code = args.func(args)
+ return return_code
diff --git a/pbcommand/common_options.py b/pbcommand/common_options.py
new file mode 100644
index 0000000..01192d8
--- /dev/null
+++ b/pbcommand/common_options.py
@@ -0,0 +1,80 @@
+"""Common options and utils that can me used in commandline utils"""
+import argparse
+import logging
+import sys
+
+from pbcommand.utils import compose
+
+RESOLVED_TOOL_CONTRACT_OPTION = "--resolved-tool-contract"
+EMIT_TOOL_CONTRACT_OPTION = "--emit-tool-contract"
+
+
+def add_debug_option(p):
+ p.add_argument('--debug', action="store_true", default=False, help="Debug to stdout")
+ return p
+
+
+def add_log_level_option(p):
+ p.add_argument('--log-level', choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'),
+ default='INFO', help="Set log level")
+ return p
+
+
+def add_resolved_tool_contract_option(p):
+ p.add_argument(RESOLVED_TOOL_CONTRACT_OPTION, type=str,
+ help="Run Tool directly from a PacBio Resolved tool contract")
+ return p
+
+
+def add_emit_tool_contract_option(p):
+ p.add_argument(EMIT_TOOL_CONTRACT_OPTION, action="store_true",
+ default=False,
+ help="Emit Tool Contract to stdout")
+ return p
+
+
+def add_base_options(p):
+ funcs = [add_debug_option,
+ add_log_level_option]
+ fs = compose(*funcs)
+ return fs(p)
+
+
+def add_base_options_with_emit_tool_contract(p):
+ funcs = [add_base_options,
+ add_resolved_tool_contract_option,
+ add_emit_tool_contract_option]
+ fs = compose(*funcs)
+ return fs(p)
+
+
+def _to_print_message_action(msg):
+
+ class PrintMessageAction(argparse.Action):
+
+ """Print message and exit"""
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ sys.stdout.write(msg + "\n")
+ sys.exit(0)
+
+ return PrintMessageAction
+
+
+def add_subcomponent_versions_option(p, subcomponents):
+ """Add subcomponents to a subparser to provide more information
+ about the tools dependencies.
+
+ Subcomponents must be provided as a list of tuples (component, version)
+ """
+ max_length = max(len(x) for x, _ in subcomponents)
+ pad = 2
+ msg = "\n" .join([" : ".join([x.rjust(max_length + pad), y]) for x, y in subcomponents])
+
+ action = _to_print_message_action(msg)
+ p.add_argument("--versions",
+ nargs=0,
+ help="Show versions of individual components",
+ action=action)
+
+ return p
diff --git a/pbcommand/engine/__init__.py b/pbcommand/engine/__init__.py
new file mode 100644
index 0000000..899a721
--- /dev/null
+++ b/pbcommand/engine/__init__.py
@@ -0,0 +1 @@
+from .runner import run_cmd, ExtCmdResult
diff --git a/pbcommand/engine/runner.py b/pbcommand/engine/runner.py
new file mode 100644
index 0000000..4af52a4
--- /dev/null
+++ b/pbcommand/engine/runner.py
@@ -0,0 +1,77 @@
+"""Utils for Running an external process"""
+
+import logging
+import tempfile
+import shlex
+import platform
+import subprocess
+import time
+from collections import namedtuple
+
+log = logging.getLogger(__name__)
+
+ExtCmdResult = namedtuple("ExtCmdResult", "exit_code cmd run_time")
+
+
+def run_cmd(cmd, stdout_fh, stderr_fh, shell=True, time_out=None):
+ """Run external command
+
+
+ :param: cmd: External command
+ :param time_out: Timeout in seconds.
+ :type time_out: None | int
+
+ :return: ExtCmdResult
+
+ This could all be bundled into a context manager
+
+ with RunCommand('/path/stdout', '/path/to/stderr') as r:
+ r.exe("echo 'exe1')
+ r.exe("echo 'exe2')
+ result = r.get_result() # close the file handles
+ """
+ # Clarify with Dave
+ # add simple usecase with no file handles, get stderr back as str
+ # stdout, stderr. In general, stdout can be large
+ # add env={}
+ # sleeptime scaling
+
+ started_at = time.time()
+ # Most of the current pacbio shell commands have aren't shlex-able
+ if not shell:
+ cmd = shlex.split(cmd)
+
+ hostname = platform.node()
+ log.debug("calling cmd '{c}' on {h}".format(c=cmd, h=hostname))
+ process = subprocess.Popen(cmd, stderr=stderr_fh, stdout=stdout_fh, shell=shell)
+
+ # This needs a better dynamic model
+ max_sleep_time = 5
+ sleep_time = 0.1
+ dt = 0.1
+
+ process.poll()
+ while process.returncode is None:
+ process.poll()
+ time.sleep(sleep_time)
+ run_time = time.time() - started_at
+ if time_out is not None:
+ if run_time > time_out:
+ log.warn("Exceeded TIMEOUT of {t}. Killing cmd '{c}'".format(t=time_out, c=cmd))
+ try:
+ # ask for forgiveness model
+ process.kill()
+ except OSError:
+ # already been killed
+ pass
+ if sleep_time < max_sleep_time:
+ sleep_time += dt
+
+ run_time = time.time() - started_at
+
+ run_time = run_time
+ returncode = process.returncode
+ log.debug("returncode is {r} in {s:.2f} sec.".format(r=process.returncode,
+ s=run_time))
+
+ return ExtCmdResult(returncode, cmd, run_time)
diff --git a/pbcommand/interactive_resolver.py b/pbcommand/interactive_resolver.py
new file mode 100644
index 0000000..10b7d0b
--- /dev/null
+++ b/pbcommand/interactive_resolver.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+import os
+import sys
+import warnings
+
+from pbcommand.cli import get_default_argparser
+from pbcommand.models import SymbolTypes
+from pbcommand.pb_io import (load_tool_contract_from,
+ write_resolved_tool_contract)
+
+from pbcommand.resolver import resolve_tool_contract
+
+try:
+ from prompt_toolkit.filters import Always
+ from prompt_toolkit.shortcuts import get_input
+except ImportError:
+ sys.stderr.write("interactive resolver requires 'prompt_toolkit' (pip install prompt_toolkit)\n")
+ raise
+
+
+def run_main(tc):
+ """:type tc: ToolContract"""
+ print "Loaded tc {c}".format(c=tc)
+
+ if tc.task.nproc == SymbolTypes.MAX_NPROC:
+ nproc = get_input('Enter max nproc: ')
+ else:
+ # not quite right
+ nproc = 1
+
+ output_dir = get_input('Output Directory: ', enable_system_bindings=Always())
+ output_dir = os.path.abspath(output_dir)
+
+ input_files = []
+ for i, input_type in enumerate(tc.task.input_file_types):
+ in_path = get_input(" {i} file {p} path :".format(i=i, p=input_type))
+ if not os.path.exists(in_path):
+ warnings.warn("Unable to find {p}".format(p=in_path))
+ input_files.append(in_path)
+
+ tool_options = {}
+ rtc = resolve_tool_contract(tc, input_files, output_dir, '/tmp', int(nproc), tool_options)
+ print rtc
+
+ file_name = tc.task.task_id + "_resolved_tool_contract.json"
+ rtc_path = os.path.join(output_dir, file_name)
+ print "writing RTC to {f}".format(f=rtc_path)
+ write_resolved_tool_contract(rtc, rtc_path)
+
+ return rtc
+
+
+def _run_main(args):
+ return run_main(load_tool_contract_from(args.tc_path))
+
+
+def get_parser():
+ p = get_default_argparser("0.1.0", "Interactive tool for resolving Tool Contracts")
+ p.add_argument("tc_path", type=str, help='Path to Tool Contract')
+ p.set_defaults(func=_run_main)
+ return p
+
+
+def main(argv=sys.argv):
+ p = get_parser()
+ args = p.parse_args(argv[1:])
+ args.func(args)
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/pbcommand/models/__init__.py b/pbcommand/models/__init__.py
new file mode 100644
index 0000000..4936173
--- /dev/null
+++ b/pbcommand/models/__init__.py
@@ -0,0 +1,6 @@
+from .common import (FileType, FileTypes, TaskTypes, ResourceTypes, SymbolTypes,
+ PipelineChunk, DataStoreFile, DataStore)
+from .tool_contract import *
+from .parser import (get_pbparser,
+ get_gather_pbparser,
+ get_scatter_pbparser, PbParser)
diff --git a/pbcommand/models/common.py b/pbcommand/models/common.py
new file mode 100644
index 0000000..30bb753
--- /dev/null
+++ b/pbcommand/models/common.py
@@ -0,0 +1,446 @@
+"""Core models used in the ToolContract and Resolved ToolContract
+
+
+Large parts of this are pulled from pbsmrtpipe.
+
+Author: Michael Kocher
+"""
+import json
+import logging
+import os
+import re
+import warnings
+import functools
+import datetime
+
+log = logging.getLogger(__name__)
+
+REGISTERED_FILE_TYPES = {}
+
+
+class PacBioNamespaces(object):
+ # File Types
+ # PBSMRTPIPE_FILE_PREFIX = 'pbsmrtpipe.files'
+ # NEW File Type Identifier style Prefix
+ NEW_PBSMRTPIPE_FILE_PREFIX = "PacBio.FileTypes"
+ # New DataSet Identifier Prefix
+ DATASET_FILE_PREFIX = "PacBio.DataSet"
+
+ PB_INDEX = "PacBio.Index"
+
+ # Task Ids
+ PBSMRTPIPE_TASK_PREFIX = 'pbsmrtpipe.tasks'
+
+ PB_TASK_TYPES = 'pbsmrtpipe.task_types'
+
+ # Task Options
+ PBSMRTPIPE_TASK_OPTS_PREFIX = 'pbsmrtpipe.task_options'
+ # Workflow Level Options
+ PBSMRTPIPE_OPTS_PREFIX = 'pbsmrtpipe.options'
+ # Constants
+ PBSMRTPIPE_CONSTANTS_PREFIX = 'pbsmrtpipe.constants'
+ # Pipelines
+ PBSMRTPIPE_PIPELINES = "pbsmrtpipe.pipelines"
+
+
+def __to_type(prefix, name):
+ return ".".join([prefix, name])
+
+to_constant_ns = functools.partial(__to_type, PacBioNamespaces.PBSMRTPIPE_CONSTANTS_PREFIX)
+to_file_ns = functools.partial(__to_type, PacBioNamespaces.NEW_PBSMRTPIPE_FILE_PREFIX)
+to_ds_ns = functools.partial(__to_type, PacBioNamespaces.DATASET_FILE_PREFIX)
+to_task_option_ns = functools.partial(__to_type, PacBioNamespaces.PBSMRTPIPE_TASK_OPTS_PREFIX)
+to_task_ns = functools.partial(__to_type, PacBioNamespaces.PBSMRTPIPE_TASK_PREFIX)
+to_task_types_ns = functools.partial(__to_type, PacBioNamespaces.PB_TASK_TYPES)
+to_workflow_option_ns = functools.partial(__to_type, PacBioNamespaces.PBSMRTPIPE_OPTS_PREFIX)
+to_pipeline_ns = functools.partial(__to_type, PacBioNamespaces.PBSMRTPIPE_PIPELINES)
+to_index_ns = functools.partial(__to_type, PacBioNamespaces.PB_INDEX)
+
+
+class TaskTypes(object):
+ # This is really TC types
+
+ STANDARD = to_task_types_ns("standard")
+ SCATTERED = to_task_types_ns("scattered")
+ GATHERED = to_task_types_ns("gathered")
+
+
+class SymbolTypes(object):
+
+ """*Symbols* that are understood during resolving, such as max number of
+ processors, Max Chunks"""
+ MAX_NPROC = '$max_nproc'
+ MAX_NCHUNKS = '$max_nchunks'
+ TASK_TYPE = '$task_type'
+ RESOLVED_OPTS = '$ropts'
+ SCHEMA_OPTS = '$opts_schema'
+ OPTS = '$opts'
+ NCHUNKS = '$nchunks'
+ NPROC = '$nproc'
+
+
+class ResourceTypes(object):
+
+ """Resources such as tmp dirs and files, log files"""
+ TMP_DIR = '$tmpdir'
+ TMP_FILE = '$tmpfile'
+ LOG_FILE = '$logfile'
+ # tasks can write output to this directory
+ OUTPUT_DIR = '$outputdir'
+ # Not sure this is a good idea
+ #TASK_DIR = '$taskdir'
+
+ @classmethod
+ def ALL(cls):
+ return cls.TMP_DIR, cls.TMP_FILE, cls.LOG_FILE, cls.OUTPUT_DIR
+
+ @classmethod
+ def is_tmp_resource(cls, name):
+ return name in (cls.TMP_FILE, cls.TMP_DIR)
+
+ @classmethod
+ def is_valid(cls, attr_name):
+ return attr_name in cls.ALL()
+
+
+class _RegisteredFileType(type):
+
+ def __init__(cls, name, bases, dct):
+ super(_RegisteredFileType, cls).__init__(name, bases, dct)
+
+ def __call__(cls, *args, **kwargs):
+ if len(args) != 4:
+ log.error(args)
+ raise ValueError("Incorrect initialization for {c}".format(c=cls.__name__))
+
+ file_type_id, base_name, file_ext, mime_type = args
+ file_type = REGISTERED_FILE_TYPES.get(file_type_id, None)
+
+ if file_type is None:
+ file_type = super(_RegisteredFileType, cls).__call__(*args)
+ #log.debug("Registering file type '{i}'".format(i=file_type_id))
+ REGISTERED_FILE_TYPES[file_type_id] = file_type
+ else:
+ # print warning if base name, ext, mime type aren't the same
+ attrs_names = [('base_name', base_name),
+ ('ext', file_ext),
+ ('mime_type', mime_type)]
+
+ for attrs_name, value in attrs_names:
+ v = getattr(file_type, attrs_name)
+ if v != value:
+ _msg = "Attempting to register a file with a different '{x}' -> {v} (expected {y})".format(x=attrs_name, v=v, y=value)
+ log.warn(_msg)
+ warnings.warn(_msg)
+
+ return file_type
+
+
+class FileType(object):
+ __metaclass__ = _RegisteredFileType
+
+ def __init__(self, file_type_id, base_name, ext, mime_type):
+ self.file_type_id = file_type_id
+ self.base_name = base_name
+ self.ext = ext
+ self.mime_type = mime_type
+
+ if file_type_id not in REGISTERED_FILE_TYPES:
+ REGISTERED_FILE_TYPES[file_type_id] = self
+
+ @property
+ def default_name(self):
+ return ".".join([self.base_name, self.ext])
+
+ def __eq__(self, other):
+ if isinstance(other, self.__class__):
+ if self.file_type_id == other.file_type_id:
+ if self.base_name == other.base_name:
+ if self.ext == other.ext:
+ return True
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.file_type_id,
+ n=self.default_name)
+ return "<{k} id={i} name={n} >".format(**_d)
+
+
+class MimeTypes(object):
+ JSON = 'application/json'
+ TXT = 'text/plain'
+ CSV = 'text/csv'
+ XML = 'application/xml'
+ BINARY = 'application/octet-stream'
+ PICKLE = 'application/python-pickle'
+
+
+class FileTypes(object):
+
+ """Registry of all PacBio Files types
+
+ This needs to be cleaned up and solidified. The old pre-SA3 file types need to be deleted.
+
+ """
+
+ # generic Txt file
+ TXT = FileType(to_file_ns('txt'), 'file', 'txt', MimeTypes.TXT)
+ # Generic Log file
+ LOG = FileType(to_file_ns('log'), 'file', 'log', MimeTypes.TXT)
+
+ # THIS NEEDS TO BE CONSISTENT with scala code. When the datastore
+ # is written to disk the file type id's might be translated to
+ # the DataSet style file type ids.
+ REPORT = FileType(to_file_ns('JsonReport'), "report", "json", MimeTypes.JSON)
+
+ # this will go away soon in favor of using a more type based model to
+ # distinguish between scatter and gather file types
+ CHUNK = FileType(to_file_ns("CHUNK"), "chunk", "json", MimeTypes.JSON)
+ GCHUNK = FileType(to_file_ns("GCHUNK"), 'gather_chunk', "json", MimeTypes.JSON)
+ SCHUNK = FileType(to_file_ns("SCHUNK"), "scatter_chunk", "json", MimeTypes.JSON)
+
+ FASTA = FileType(to_file_ns('Fasta'), "file", "fasta", MimeTypes.TXT)
+ FASTQ = FileType(to_file_ns('Fastq'), "file", "fastq", MimeTypes.TXT)
+
+ # Not sure this should be a special File Type?
+ INPUT_XML = FileType(to_file_ns('input_xml'), "input", "xml", MimeTypes.XML)
+ FOFN = FileType(to_file_ns("generic_fofn"), "generic", "fofn", MimeTypes.TXT)
+ MOVIE_FOFN = FileType(to_file_ns('movie_fofn'), "movie", "fofn", MimeTypes.TXT)
+ RGN_FOFN = FileType(to_file_ns('rgn_fofn'), "region", "fofn", MimeTypes.TXT)
+
+ RS_MOVIE_XML = FileType(to_file_ns("rs_movie_metadata"), "file", "rs_movie.metadata.xml", MimeTypes.XML)
+ REF_ENTRY_XML = FileType(to_file_ns('reference_info_xml'), "reference.info.xml", "xml", MimeTypes.XML)
+
+ ALIGNMENT_CMP_H5 = FileType(to_file_ns('alignment_cmp_h5'), "alignments", "cmp.h5", MimeTypes.BINARY)
+ # I am not sure this should be a first class file
+ BLASR_M4 = FileType(to_file_ns('blasr_file'), 'blasr', 'm4', MimeTypes.TXT)
+ BAM = FileType(to_file_ns('bam'), "alignments", "bam", MimeTypes.BINARY)
+ BAMBAI = FileType(to_file_ns('bam_bai'), "alignments", "bam.bai", MimeTypes.BINARY)
+
+ BED = FileType(to_file_ns('bed'), "file", "bed", MimeTypes.TXT)
+ SAM = FileType(to_file_ns('sam'), "alignments", "sam", MimeTypes.BINARY)
+ VCF = FileType(to_file_ns('vcf'), "file", "vcf", MimeTypes.TXT)
+ GFF = FileType(to_file_ns('gff'), "file", "gff", MimeTypes.TXT)
+ CSV = FileType(to_file_ns('csv'), "file", "csv", MimeTypes.CSV)
+ XML = FileType(to_file_ns('xml'), "file", "xml", 'application/xml')
+ # Generic Json File
+ JSON = FileType(to_file_ns("json"), "file", "json", MimeTypes.JSON)
+ # Generic H5 File
+ H5 = FileType(to_file_ns("h5"), "file", "h5", MimeTypes.BINARY)
+ # Generic Python pickle XXX EVIL
+ PICKLE = FileType(to_file_ns("pickle"), "file", "pickle", MimeTypes.PICKLE)
+
+ # ******************* NEW SA3 File Types ********************
+ # DataSet Types. The default file names should have well-defined agreed
+ # upon format. See what Dave did for the bam files.
+ # https://github.com/PacificBiosciences/PacBioFileFormats
+ DS_SUBREADS_H5 = FileType(to_ds_ns("HdfSubreadSet"), "file", "hdfsubreadset.xml", MimeTypes.XML)
+ DS_SUBREADS = FileType(to_ds_ns("SubreadSet"), "file", "subreadset.xml", MimeTypes.XML)
+ DS_CCS = FileType(to_ds_ns("ConsensusReadSet"), "file", "consensusreadset.xml", MimeTypes.XML)
+ DS_REF = FileType(to_ds_ns("ReferenceSet"), "file", "referenceset.xml", MimeTypes.XML)
+ DS_ALIGN = FileType(to_ds_ns("AlignmentSet"), "file", "alignmentset.xml", MimeTypes.XML)
+ DS_CONTIG = FileType(to_ds_ns("ContigSet"), "file", "contigset.xml", MimeTypes.XML)
+ DS_BARCODE = FileType(to_ds_ns("BarcodeSet"), "file", "barcodeset.xml", MimeTypes.XML)
+ DS_ALIGN_CCS = FileType(to_ds_ns("ConsensusAlignmentSet"), "file",
+ "consensusalignmentset.xml", MimeTypes.XML)
+
+ # Index Files
+ I_SAM = FileType(to_index_ns("SamIndex"), "file", "sam.index", MimeTypes.BINARY)
+ I_SAW = FileType(to_index_ns("SaWriterIndex"), "file", "sa", MimeTypes.BINARY)
+
+ # PacBio Defined Formats
+ FASTA_BC = FileType("PacBio.BarcodeFile.BarcodeFastaFile", "file", "barcode.fasta", MimeTypes.TXT)
+ # No ':' or '"' in the id
+ FASTA_REF = FileType("PacBio.ReferenceFile.ReferenceFastaFile", "file", "pbreference.fasta", MimeTypes.TXT)
+
+ # FIXME. Add Bax/Bam Formats here. This should replace the exiting pre-SA3 formats.
+ BAM_ALN = FileType("PacBio.AlignmentFile.AlignmentBamFile", "file", "alignment.bam", MimeTypes.BINARY)
+ BAM_SUB = FileType("PacBio.SubreadFile.SubreadBamFile", "file", "subread.bam", MimeTypes.BINARY)
+ BAM_CCS = FileType("PacBio.ConsensusReadFile.ConsensusReadBamFile", "file", "ccs.bam", MimeTypes.BINARY)
+
+ BAX = FileType("PacBio.SubreadFile.BaxFile", "file", "bax.h5", MimeTypes.BINARY)
+
+ # THIS IS EXPERIMENT for internal analysis. DO NOT use
+ COND = FileType(to_file_ns("COND"), "file", "conditions.json", MimeTypes.JSON)
+
+ @staticmethod
+ def is_valid_id(file_type_id):
+ return file_type_id in REGISTERED_FILE_TYPES
+
+ @staticmethod
+ def ALL():
+ return REGISTERED_FILE_TYPES
+
+
+class DataStoreFile(object):
+
+ def __init__(self, uuid, file_id, type_id, path):
+ # adding this for consistency. In the scala code, the unique id must be
+ # a uuid format
+ self.uuid = uuid
+ # this must globally unique. This is used to provide context to where
+ # the file originated from (i.e., the tool author
+ self.file_id = file_id
+ # Consistent with a value in FileTypes
+ self.file_type_id = type_id
+ self.path = path
+ self.file_size = os.path.getsize(path)
+ self.created_at = datetime.datetime.fromtimestamp(os.path.getctime(path))
+ self.modified_at = datetime.datetime.fromtimestamp(os.path.getmtime(path))
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.file_id,
+ t=self.file_type_id,
+ p=os.path.basename(self.path))
+ return "<{k} {i} type:{t} filename:{p} >".format(**_d)
+
+ def to_dict(self):
+ return dict(sourceId=self.file_id,
+ uniqueId=str(self.uuid),
+ fileTypeId=self.file_type_id,
+ path=self.path,
+ fileSize=self.file_size,
+ createdAt=_datetime_to_string(self.created_at),
+ modifiedAt=_datetime_to_string(self.modified_at))
+
+ @staticmethod
+ def from_dict(d):
+ # FIXME. This isn't quite right.
+ to_a = lambda x: x.encode('ascii', 'ignore')
+ to_k = lambda x: to_a(d[x])
+ return DataStoreFile(to_k('uniqueId'), to_k('sourceId'), to_k('fileTypeId'), to_k('path'))
+
+
+def _datetime_to_string(dt):
+ return dt.strftime('%Y-%m-%dT%H:%M:%S')
+
+
+class DataStore(object):
+ version = "0.2.2"
+
+ def __init__(self, ds_files, created_at=None):
+ """
+
+ :type ds_files: list[DataStoreFile]
+ """
+ self.files = {f.uuid: f for f in ds_files}
+ self.created_at = datetime.datetime.now() if created_at is None else created_at
+ self.updated_at = datetime.datetime.now()
+
+ def __repr__(self):
+ _d = dict(n=len(self.files), k=self.__class__.__name__)
+ return "<{k} nfiles={n} >".format(**_d)
+
+ def add(self, ds_file):
+ if isinstance(ds_file, DataStoreFile):
+ self.files[ds_file.uuid] = ds_file
+ self.updated_at = datetime.datetime.now()
+ else:
+ raise TypeError("DataStoreFile expected. Got type {t} for {d}".format(t=type(ds_file), d=ds_file))
+
+ def to_dict(self):
+ fs = [f.to_dict() for i, f in self.files.iteritems()]
+ _d = dict(version=self.version,
+ createdAt=_datetime_to_string(self.created_at),
+ updatedAt=_datetime_to_string(self.updated_at), files=fs)
+ return _d
+
+ def _write_json(self, file_name, permission):
+ with open(file_name, permission) as f:
+ s = json.dumps(self.to_dict(), indent=4, sort_keys=True)
+ f.write(s)
+
+ def write_json(self, file_name):
+ # if the file exists is should raise?
+ self._write_json(file_name, 'w')
+
+ def write_update_json(self, file_name):
+ """Overwrite Datastore with current state"""
+ self._write_json(file_name, 'w+')
+
+ @staticmethod
+ def load_from_json(path):
+ with open(path, 'r') as reader:
+ d = json.loads(reader.read())
+
+ ds_files = [DataStoreFile.from_dict(x) for x in d['files']]
+ return DataStore(ds_files)
+
+
+def _is_chunk_key(k):
+ return k.startswith(PipelineChunk.CHUNK_KEY_PREFIX)
+
+
+class MalformedChunkKeyError(ValueError):
+
+ """Chunk Key does NOT adhere to the spec"""
+ pass
+
+
+class PipelineChunk(object):
+
+ CHUNK_KEY_PREFIX = "$chunk."
+ RX_CHUNK_KEY = re.compile(r'^\$chunk\.([A-z0-9_]*)')
+
+ def __init__(self, chunk_id, **kwargs):
+ """
+
+ kwargs is a key-value store. keys that begin "$chunk." are considered
+ to be semantically understood by workflow and can be "routed" to
+ chunked task inputs.
+
+ Values that don't begin with "$chunk." are considered metadata.
+
+
+ :param chunk_id: Chunk id
+ :type chunk_id: str
+
+ """
+ if self.RX_CHUNK_KEY.match(chunk_id) is not None:
+ raise MalformedChunkKeyError("'{c}' expected {p}".format(c=chunk_id, p=self.RX_CHUNK_KEY.pattern))
+
+ self.chunk_id = chunk_id
+ # loose key-value pair
+ self._datum = kwargs
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, i=self.chunk_id, c=",".join(self.chunk_keys))
+ return "<{k} id='{i}' chunk keys={c} >".format(**_d)
+
+ def set_chunk_key(self, chunk_key, value):
+ """Overwrite or add a chunk_key => value to the Chunk datum
+
+ the chunk-key can be provided with or without the '$chunk:' prefix
+ """
+ if not chunk_key.startswith(PipelineChunk.CHUNK_KEY_PREFIX):
+ chunk_key = PipelineChunk.CHUNK_KEY_PREFIX + chunk_key
+ self._datum[chunk_key] = value
+
+ def set_metadata_key(self, metadata_key, value):
+ """Set chunk metadata key => value
+
+ metadata key must NOT begin with $chunk. format
+ """
+ if metadata_key.startswith(PipelineChunk.CHUNK_KEY_PREFIX):
+ raise ValueError("Cannot set chunk-key values. {i}".format(i=metadata_key))
+ self._datum[metadata_key] = value
+
+ @property
+ def chunk_d(self):
+ return {k: v for k, v in self._datum.iteritems() if _is_chunk_key(k)}
+
+ @property
+ def chunk_keys(self):
+ return self.chunk_d.keys()
+
+ @property
+ def chunk_metadata(self):
+ return {k: v for k, v in self._datum.iteritems() if not _is_chunk_key(k)}
+
+ def to_dict(self):
+ return {'chunk_id': self.chunk_id, 'chunk': self._datum}
diff --git a/pbcommand/models/parser.py b/pbcommand/models/parser.py
new file mode 100644
index 0000000..3da3eba
--- /dev/null
+++ b/pbcommand/models/parser.py
@@ -0,0 +1,561 @@
+""" Commandline Parser for Tools. Supports Tool Contracts
+
+# Author: Michael Kocher
+"""
+import abc
+import os
+import logging
+import argparse
+import functools
+import re
+
+# there's a problem with functools32 and jsonschema. This import raise an
+# import error.
+#import jsonschema
+
+from pbcommand.common_options import (add_base_options_with_emit_tool_contract,
+ add_subcomponent_versions_option)
+from pbcommand.models import SymbolTypes
+from .tool_contract import (ToolDriver,
+ InputFileType, OutputFileType,
+ ToolContract, ToolContractTask,
+ ScatterToolContractTask, GatherToolContractTask)
+
+log = logging.getLogger(__name__)
+
+__version__ = "0.1.1"
+
+__all__ = ["PbParser",
+ "PyParser",
+ "ToolContractParser",
+ "get_pbparser",
+ "get_scatter_pbparser",
+ "get_gather_pbparser"]
+
+RX_TASK_ID = re.compile(r'^([A-z0-9_]*)\.tasks\.([A-z0-9_]*)$')
+RX_TASK_OPTION_ID = re.compile(r'^([A-z0-9_]*)\.task_options\.([A-z0-9_\.]*)')
+
+
+def _to_file_type(format_):
+ return "pacbio.file_types.{x}".format(x=format_)
+
+
+class JsonSchemaTypes(object):
+ # array is a native type, but not supported
+ BOOL = "boolean"
+ INT = "integer"
+ NUM = "number"
+ STR = "string"
+ NULL = "null"
+ OBJ = "object"
+
+ # Optional values e.g., Option[String]
+ OPT_BOOL = [BOOL, NULL]
+ OPT_INT = [INT, NULL]
+ OPT_STR = [STR, NULL]
+ OPT_NUM = [NUM, NULL]
+
+
+def _validate_file(label, path):
+ if os.path.exists(path):
+ return os.path.abspath(path)
+ else:
+ raise IOError("Unable to find '{x}' file '{p}'".format(x=label, p=path))
+
+
+def _validate_option_or_cast(dtype, dvalue):
+ if isinstance(dvalue, dtype):
+ return dvalue
+ else:
+ # XXX this is almost always going to be the case...
+ if isinstance(dvalue, basestring):
+ try:
+ return dtype(dvalue)
+ except ValueError as e:
+ pass
+ raise TypeError("Invalid option type: '{a}' provided, '{e}' "
+ "expected".format(a=dvalue, e=dtype))
+
+
+def _validate_option(dtype, dvalue):
+ if isinstance(dvalue, dtype):
+ return dvalue
+ else:
+ raise TypeError("Invalid option type: '{a}' provided, '{e}' "
+ "expected".format(a=dvalue, e=dtype))
+
+
+def _validate_id(prog, idtype, tid):
+ if prog.match(tid):
+ return tid
+ else:
+ raise ValueError("Invalid format {t}: '{i}' {p}".format(t=idtype, i=tid, p=repr(prog.pattern)))
+
+_validate_task_id = functools.partial(_validate_id, RX_TASK_ID, 'task id')
+_validate_task_option_id = functools.partial(_validate_id, RX_TASK_OPTION_ID,
+ 'task option id')
+
+
+def to_opt_id(namespace, s):
+ return ".".join([namespace, "options", s])
+
+
+def validate_value(schema, v):
+ import jsonschema
+ return jsonschema.validate(v, schema)
+
+
+def is_valid(schema, v):
+ """Returns a bool if the schema is valid"""
+ import jsonschema
+ try:
+ validate_value(schema, v)
+ return True
+ except jsonschema.ValidationError:
+ pass
+ return False
+
+
+def validate_schema(f):
+ """Deco for validate the returned jsonschema against Draft 4 of the spec"""
+ def w(*args, **kwargs):
+ schema = f(*args, **kwargs)
+ import jsonschema
+ _ = jsonschema.Draft4Validator(schema)
+ return schema
+ return w
+
+
+def to_option_schema(option_id, dtype_or_dtypes, display_name, description, default_value):
+ """
+ Simple util factory method
+ :param dtype_or_dtypes: single data type or list of data types
+ :param option_id: globally unique task option id. Must begin with
+ 'pbsmrtpipe.task_options.'
+ :param display_name: display name of task options
+ :param description: Short description of the task options
+ :param required: Is the option required.
+ """
+ # annoying that you can't specify a tuple
+ if isinstance(dtype_or_dtypes, tuple):
+ dtype_or_dtypes = list(dtype_or_dtypes)
+
+ _validate_task_option_id(option_id)
+
+ # Steps toward moving away from JSON schema as the format, but reuse
+ # the jsonschema defined types. Only non-union types are supported.
+ pbd = {"option_id": option_id,
+ "type": dtype_or_dtypes,
+ "default": default_value,
+ "name": display_name,
+ "description": description}
+
+ d = {'$schema': "http://json-schema.org/draft-04/schema#",
+ 'type': 'object',
+ 'title': "JSON Schema for {o}".format(o=option_id),
+ 'properties': {option_id: {'description': description,
+ 'title': display_name,
+ 'type': dtype_or_dtypes},
+ },
+ "pb_option": pbd
+ }
+
+ d['required'] = [option_id]
+ d['properties'][option_id]['default'] = default_value
+ return d
+
+
+class PbParserBase(object):
+
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, tool_id, version, name, description):
+ self.tool_id = _validate_task_id(tool_id)
+ self.version = version
+ self.description = description
+ self.name = name
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, i=self.tool_id, v=self.version)
+ return "<{k} id:{i} {v} >".format(**_d)
+
+ @abc.abstractmethod
+ def add_input_file_type(self, file_type, file_id, name, description):
+ """
+ Add a mandatory input file parameter. On the Python argparse side,
+ this will be a positional argument.
+
+ :param file_type: file type ID from pbcommand.models.common, e.g.
+ FileTypes.DS_REF
+ :param file_id: parameter name, mainly used on argparse side
+ :param name: plain-English name
+ :param description: help string
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def add_output_file_type(self, file_type, file_id, name, description, default_name):
+ """
+ Add a mandatory output file parameter. On the Python argparse side,
+ this will be a positional argument.
+
+ :param file_type: file type ID from pbcommand.models.common, e.g.
+ FileTypes.DS_REF
+ :param file_id: parameter name, mainly used on argparse side
+ :param name: plain-English name
+ :param description: help string
+ :param default_name: tuple of form (base_name, extension) specifying
+ the default output file name
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def add_int(self, option_id, option_str, default, name, description):
+ """
+ Add an optional integer keyword argument (e.g. "--n=10" or "--n 10" on
+ the command line).
+
+ :param option_id: fully-qualified option name used in tool contract
+ layer, of form "pbcommand.task_options.my_option"
+ :param option_str: shorter parameter name, mainly used in Python
+ argparse layer, but *without* leading dashes
+ :param default: default value (must be an actual integer, not None)
+ :param name: plain-English name
+ :param description: help string
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def add_float(self, option_id, option_str, default, name, description):
+ """
+ Add an optional float keyword argument (e.g. "--n=10" or "--n 10" on
+ the command line).
+
+ :param option_id: fully-qualified option name used in tool contract
+ layer, of form "pbcommand.task_options.my_option"
+ :param option_str: shorter parameter name, mainly used in Python
+ argparse layer, but *without* leading dashes
+ :param default: default value (must be an actual number, not None)
+ :param name: plain-English name
+ :param description: help string
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def add_str(self, option_id, option_str, default, name, description):
+ """
+ Add a generic keyword argument whose type is a string.
+
+ :param option_id: fully-qualified option name used in tool contract
+ layer, of form "pbcommand.task_options.my_option"
+ :param option_str: shorter parameter name, mainly used in Python
+ argparse layer, but *without* leading dashes
+ :param default: default value (can be blank, but not None)
+ :param name: plain-English name
+ :param description: help string
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def add_boolean(self, option_id, option_str, default, name, description):
+ """
+ Add a boolean option.
+
+ :param option_id: fully-qualified option name used in tool contract
+ layer, of form "pbcommand.task_options.my_option"
+ :param option_str: shorter parameter name, mainly used in Python
+ argparse layer, but *without* leading dashes
+ :param default: specifies the boolean value of this option **if the
+ argument was supplied**, i.e. on the argparse layer,
+ default=True is equivalent to action="store_true"
+ :param name: plain-English name
+ :param description: help string
+ """
+ raise NotImplementedError
+
+_validate_argparse_int = functools.partial(_validate_option_or_cast, int)
+_validate_argparse_float = functools.partial(_validate_option_or_cast, float)
+_validate_argparse_bool = functools.partial(_validate_option_or_cast, bool)
+_validate_argparse_str = functools.partial(_validate_option_or_cast, str)
+
+
+class PyParser(PbParserBase):
+ """PbParser backed that supports argparse"""
+
+ def __init__(self, tool_id, version, name, description, subcomponents=()):
+ super(PyParser, self).__init__(tool_id, version, name, description)
+ self.parser = argparse.ArgumentParser(version=version,
+ description=description,
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ add_help=True)
+ if subcomponents:
+ add_subcomponent_versions_option(self.parser, subcomponents)
+
+ def add_input_file_type(self, file_type, file_id, name, description):
+ # this will propagate up the label to the exception
+ vfunc = functools.partial(_validate_file, file_id)
+ self.parser.add_argument(file_id, type=vfunc, help=description)
+
+ def add_output_file_type(self, file_type, file_id, name, description, default_name):
+ self.parser.add_argument(file_id, type=str, help=description)
+
+ def add_int(self, option_id, option_str, default, name, description):
+ # FIXME Need to better define and validate option_str
+ opt = "--" + option_str
+ self.parser.add_argument(opt, type=_validate_argparse_int,
+ help=description,
+ default=_validate_argparse_int(default))
+
+ def add_float(self, option_id, option_str, default, name, description):
+ if isinstance(default, int):
+ default = float(default)
+ opt = "--" + option_str
+ self.parser.add_argument(opt, type=_validate_argparse_float,
+ help=description,
+ default=_validate_argparse_float(default))
+
+ def add_str(self, option_id, option_str, default, name, description):
+ # Fixme
+ opt = "--" + option_str
+ self.parser.add_argument(opt, type=_validate_argparse_str,
+ help=description,
+ default=_validate_argparse_str(default))
+
+ def add_boolean(self, option_id, option_str, default, name, description):
+ """
+ Note, the default value is set by NOT setting the option.
+
+ Example, if you have option_str of --my-option with a default value of True,
+ if --my-option is NOT provided, the value is True, if the --my-option
+ is provided, then the value is false.
+
+ """
+ d = {True: "store_true", False: "store_false"}
+ opt = '--' + option_str
+ self.parser.add_argument(opt, action=d[_validate_argparse_bool(not default)],
+ help=description)
+
+
+class ToolContractParser(PbParserBase):
+ """Parser to support Emitting and running ToolContracts"""
+
+ def __init__(self, tool_id, version, name, description, task_type, driver, nproc_symbol,
+ resource_types):
+ """Keeps the required elements for creating an instance of a
+ ToolContract"""
+ super(ToolContractParser, self).__init__(tool_id, version, name, description)
+ self.input_types = []
+ self.output_types = []
+ self.options = []
+ self.driver = driver
+ self.name = name
+ self.nproc_symbol = nproc_symbol
+ self.resource_types = resource_types
+ self.task_type = task_type
+
+ def add_input_file_type(self, file_type, file_id, name, description):
+ x = InputFileType(file_type.file_type_id, file_id, name, description)
+ self.input_types.append(x)
+
+ def add_output_file_type(self, file_type, file_id, name, description, default_name):
+ x = OutputFileType(file_type.file_type_id, file_id, name, description, default_name)
+ self.output_types.append(x)
+
+ def add_int(self, option_id, option_str, default, name, description):
+ self.options.append(to_option_schema(option_id,
+ JsonSchemaTypes.INT, name, description,
+ _validate_option(int, default)))
+
+ def add_float(self, option_id, option_str, default, name, description):
+ if isinstance(default, int):
+ default = float(default)
+ self.options.append(to_option_schema(option_id,
+ JsonSchemaTypes.NUM, name, description,
+ _validate_option(float, default)))
+
+ def add_str(self, option_id, option_str, default, name, description):
+ self.options.append(to_option_schema(option_id,
+ JsonSchemaTypes.STR, name, description,
+ _validate_option(str, default)))
+
+ def add_boolean(self, option_id, option_str, default, name, description):
+ self.options.append(to_option_schema(option_id,
+ JsonSchemaTypes.BOOL, name, description,
+ _validate_option(bool, default)))
+
+ def to_tool_contract(self):
+ # Not a well formed tool contract, must have at least one input and
+ # one output
+ if not self.input_types and not self.output_types:
+ raise ValueError("Malformed tool contract inputs")
+
+ task = ToolContractTask(self.tool_id,
+ self.name,
+ self.description,
+ self.version,
+ self.task_type,
+ self.input_types,
+ self.output_types,
+ self.options,
+ self.nproc_symbol,
+ self.resource_types)
+ tc = ToolContract(task, self.driver)
+ # this should just return TC, not tc.to_dict()
+ return tc
+
+
+class ScatterToolContractParser(ToolContractParser):
+ def __init__(self, tool_id, version, name, description, task_type, driver, nproc_symbol,
+ resource_types, chunk_keys, nchunks):
+ super(ScatterToolContractParser, self).__init__(tool_id, version, name, description, task_type, driver,
+ nproc_symbol, resource_types)
+ self.chunk_keys = chunk_keys
+ self.nchunks = nchunks
+
+ def to_tool_contract(self):
+ task = ScatterToolContractTask(self.tool_id,
+ self.name,
+ self.description,
+ self.version,
+ self.task_type,
+ self.input_types,
+ self.output_types,
+ self.options,
+ self.nproc_symbol,
+ self.resource_types,
+ self.chunk_keys,
+ self.nchunks)
+ tc = ToolContract(task, self.driver)
+ return tc
+
+
+class GatherToolContractParser(ToolContractParser):
+
+ def to_tool_contract(self):
+ task = GatherToolContractTask(self.tool_id,
+ self.name,
+ self.description,
+ self.version,
+ self.task_type,
+ self.input_types,
+ self.output_types,
+ self.options,
+ self.nproc_symbol,
+ self.resource_types)
+ tc = ToolContract(task, self.driver)
+ return tc
+
+
+class PbParser(PbParserBase):
+ """
+ Wrapper class for managing separate tool contract and argument parsers
+ (stored as tool_contract_parser and arg_parser attributes respectively).
+ """
+
+ def __init__(self, tool_contract_parser, arg_parser, *parsers):
+ """
+
+ :param tool_contract_parser:
+ :type tool_contract_parser: ToolContractParser
+ :param arg_parser:
+ :type arg_parser: PyParser
+ :param parsers:
+ :return:
+ """
+
+ # Tool Contract Parser
+ self.tool_contract_parser = tool_contract_parser
+
+ # python wrapper parser.
+ self.arg_parser = arg_parser
+ # add options, so it will show up via --help
+ add_base_options_with_emit_tool_contract(self.arg_parser.parser)
+
+ # a list of other parsers that adhere to the PbParserBase interface
+ # can be used.
+ self.other_parsers = parsers
+
+ # for now assume parsers have the same version, id, ...
+ tool_id = tool_contract_parser.tool_id
+ version = tool_contract_parser.version
+ name = tool_contract_parser.name
+ description = tool_contract_parser.description
+
+ super(PbParser, self).__init__(tool_id, version, name, description)
+
+ @property
+ def parsers(self):
+ return [self.tool_contract_parser, self.arg_parser]
+
+ def _dispatch(self, f_name, args, kwds):
+ for parser in self.parsers:
+ f = getattr(parser, f_name)
+ f(*args, **kwds)
+
+ def add_input_file_type(self, file_type, file_id, name, description):
+ args = file_type, file_id, name, description
+ self._dispatch("add_input_file_type", args, {})
+
+ def add_output_file_type(self, file_type, file_id, name, description, default_name):
+ args = file_type, file_id, name, description, default_name
+ self._dispatch("add_output_file_type", args, {})
+
+ def add_int(self, option_id, option_str, default, name, description):
+ args = option_id, option_str, default, name, description
+ self._dispatch("add_int", args, {})
+
+ def add_float(self, option_id, option_str, default, name, description):
+ args = option_id, option_str, default, name, description
+ self._dispatch("add_float", args, {})
+
+ def add_str(self, option_id, option_str, default, name, description):
+ args = option_id, option_str, default, name, description
+ self._dispatch("add_str", args, {})
+
+ def add_boolean(self, option_id, option_str, default, name, description):
+ args = option_id, option_str, default, name, description
+ self._dispatch("add_boolean", args, {})
+
+ def to_contract(self):
+ return self.tool_contract_parser.to_tool_contract()
+
+
+def _factory(tool_id, version, name, description, subcomponents):
+ def _f(tc_parser):
+ arg_parser = PyParser(tool_id, version, name, description, subcomponents=subcomponents)
+ return PbParser(tc_parser, arg_parser)
+ return _f
+
+
+def get_pbparser(tool_id, version, name, description, driver_exe, is_distributed=True, nproc=1,
+ resource_types=(), subcomponents=(), serialization='json'):
+ """
+ Central point of creating a Tool contract that can emit and run tool
+ contracts.
+
+ :returns: PbParser object
+ """
+ driver = ToolDriver(driver_exe, serialization=serialization)
+ tc_parser = ToolContractParser(tool_id, version, name, description, is_distributed, driver,
+ nproc, resource_types)
+ return _factory(tool_id, version, name, description, subcomponents)(tc_parser)
+
+
+def get_scatter_pbparser(tool_id, version, name, description, driver_exe, chunk_keys,
+ is_distributed=True, nproc=1, nchunks=SymbolTypes.MAX_NCHUNKS, resource_types=(),
+ subcomponents=(), serialization='json'):
+ """Create a Scatter Tool"""
+ driver = ToolDriver(driver_exe, serialization=serialization)
+ tc_parser = ScatterToolContractParser(tool_id, version, name, description, is_distributed,
+ driver, nproc, resource_types, chunk_keys,
+ nchunks)
+ return _factory(tool_id, version, name, description, subcomponents)(tc_parser)
+
+
+def get_gather_pbparser(tool_id, version, name, description, driver_exe,
+ is_distributed=True, nproc=1, resource_types=(), subcomponents=(), serialization='json'):
+ """Create a Gather tool"""
+ driver = ToolDriver(driver_exe, serialization=serialization)
+ tc_parser = GatherToolContractParser(tool_id, version, name, description,
+ is_distributed, driver, nproc, resource_types)
+ return _factory(tool_id, version, name, description, subcomponents)(tc_parser)
diff --git a/pbcommand/models/report.py b/pbcommand/models/report.py
new file mode 100644
index 0000000..8239554
--- /dev/null
+++ b/pbcommand/models/report.py
@@ -0,0 +1,748 @@
+"""Common PacBio Report model
+
+
+Author: Johann Miller and Michael Kocher
+"""
+
+from collections import defaultdict
+import warnings
+import abc
+import logging
+import json
+import os
+import re
+from pprint import pformat
+
+# make this optional. This is only for serialization
+import numpy as np
+
+log = logging.getLogger(__name__)
+
+__all__ = ['PbReportError',
+ 'Attribute',
+ 'Report',
+ 'Plot',
+ 'PlotGroup',
+ 'Column',
+ 'Table']
+
+import pbcommand
+
+_HAS_NUMPY = False
+
+try:
+ import numpy as np
+ _HAS_NUMPY = True
+except ImportError:
+ pass
+
+
+def _get_decoder():
+ """
+ There's a bit of nonsense here to support the exiting pbreports python
+ package.
+
+ numpy is only used for Report that have Table columns that are numpy arrays.
+ This really should have strictly defined in the original API to only support
+ native python lists. Similarly with numpy scalars in Report Attributes.
+
+ :return: None | numpy decoder
+ """
+ if _HAS_NUMPY:
+ class NumpyJsonEncoder(json.JSONEncoder):
+
+ def default(self, obj):
+ if isinstance(obj, np.core.numerictypes.floating):
+ return float(obj)
+ if isinstance(obj, np.core.numerictypes.integer):
+ return int(obj)
+ if isinstance(obj, np.ndarray) and obj.ndim == 1:
+ return [float(x) for x in obj]
+ # Let the base class default method raise the TypeError
+ return json.JSONEncoder.default(self, obj)
+ return NumpyJsonEncoder
+ else:
+ return None
+
+
+def _to_json_with_decoder(d):
+ decoder_or_none = _get_decoder()
+ if decoder_or_none is None:
+ return json.dumps(d, sort_keys=True, indent=4)
+ else:
+ return json.dumps(d, cls=decoder_or_none, sort_keys=True, indent=4)
+
+
+class PbReportError(Exception):
+ pass
+
+
+class BaseReportElement(object):
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, id_):
+ if not isinstance(id_, basestring):
+ raise PbReportError("Type error. id '{i}' cannot be {t}.".format(i=id_, t=type(id_)))
+
+ if not re.match('^[a-z0-9_]+$', id_):
+ msg = "id '{i}' for {x} must contain only alphanumeric or underscore characters".format(x=self.__class__.__name__, i=id_)
+ log.error(msg)
+ raise PbReportError(msg)
+
+ self._id = id_
+ self._ids = set([])
+
+ def is_unique(self, id_):
+ """
+ Raise an error if a BaseReportElement with this id has already
+ been added.
+ :param id_: (int) id of child BaseReportElement
+ """
+ if id_ in self._ids:
+ msg = "a plot with id '{i}' has already been added to {t}.".format(i=id_, t=str(type(self)))
+ log.error(msg)
+ raise PbReportError(msg)
+ self._ids.add(id_)
+
+ @property
+ def id(self):
+ return self._id
+
+ @abc.abstractmethod
+ def _get_attrs_simple(self):
+ """
+ Return a list of attributes names where each
+ attribute returns a simple type like a string, int, or float.
+ The 'id' attribute should NOT be included.
+ Example [ 'title' ]
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def _get_attrs_complex_list(self):
+ """
+ Return a list of attributes names where each
+ attribute returns a list of BaseReportElement objects which
+ implement to_dict()
+ """
+ raise NotImplementedError
+
+ def to_dict(self, id_parts=None):
+ """
+ Return a dict-view of this object.
+ Recursively descend in to collections of BaseReportElement instances,
+ calling to_dict on each.
+
+ Additionally, prepends the id with a '.'-delimited string of
+ parent id's
+
+ :param id_parts: (list of string) Parent id's, as a function of depth within the object graph
+ """
+ if id_parts is None:
+ # start the part list
+ id_parts = [self.id]
+ else:
+ id_parts.append(self.id)
+
+ d = {a: getattr(self, a) for a in self._get_attrs_simple()}
+
+ # Versioning
+ # import pbreports
+ # version = pbreports.get_version()
+ # changelist = pbreports.get_changelist()
+
+ # d['_version'] = version
+ # d['_changelist'] = changelist
+
+ d['id'] = '.'.join([str(v) for v in id_parts])
+ complex_attrs = self._get_attrs_complex_list()
+
+ for ca in complex_attrs:
+ d[ca] = []
+ for i in getattr(self, ca):
+ copy = []
+ copy.extend(id_parts)
+ d[ca].append(i.to_dict(copy))
+ # yank the last id so it doesn't prepend the next item of same type.
+ # slicing doesn't work on original list. need copy! bug 23799
+ id_parts = copy[:-1]
+
+ if len(id_parts) > 1:
+ # yank the last id part, so it doesn't prepend the next
+ # category of attributes
+ id_parts = id_parts[:-1]
+ return d
+
+
+class Attribute(BaseReportElement):
+
+ """
+ An attribute always has an id and a value. A name is optional.
+ """
+
+ def __init__(self, id_, value, name=None):
+ """
+ :param id_: (str) Unique id for attribute (Not None, or Empty)
+ :param value: (str, float) Numeric values should be float values. Formatting is performed durning the report rendering
+ :param name: (str, None) optional display name. Can be changed in portal display rules
+ """
+ BaseReportElement.__init__(self, id_)
+ if value is None:
+ raise PbReportError("value cannot be None. {n} given.".format(n=value))
+ self._value = value
+ self._name = name
+
+ @property
+ def value(self):
+ return self._value
+
+ @property
+ def name(self):
+ return self._name
+
+ def _get_attrs_simple(self):
+ return ['value', 'name']
+
+ def _get_attrs_complex_list(self):
+ return []
+
+ def __eq__(self, other):
+ if isinstance(other, Attribute):
+ if self.name == other.name and self.value == other.value and self.id == other.id:
+ return True
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.id,
+ v=self.value,
+ n=self.name)
+ return "<{k} id:{i} value:{v} name:{n} >".format(**_d)
+
+
+class PlotGroup(BaseReportElement):
+
+ """
+ A plotGroup is a container of plots.
+ """
+
+ def __init__(self, id_, title=None, legend=None, thumbnail=None, plots=()):
+ """
+ :param id_: (str) id of plotgroup. Not None or Empty
+ :param title: (str, None) Title of the plotGroup, displayed to user.
+ :param legend: (str, None) Path to legend image, if applicable
+ :param thumbnail: (str, None)Path to thumbnail image, if applicable
+ :param plots: (list of Plot instances)
+ """
+ BaseReportElement.__init__(self, id_)
+ self._title = title
+ self._legend = legend
+ self._thumbnail = thumbnail
+ self._plots = []
+ if plots:
+ for plot in plots:
+ self.add_plot(plot)
+
+ @property
+ def title(self):
+ return self._title
+
+ @property
+ def legend(self):
+ return self._legend
+
+ @property
+ def thumbnail(self):
+ return self._thumbnail
+
+ @property
+ def plots(self):
+ return self._plots
+
+ @property
+ def nplots(self):
+ return len(self.plots)
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.id,
+ t=self.title,
+ n=self.nplots)
+ return "<{k} id:{i} title:{t} nplots:{n} >".format(**_d)
+
+ def _get_attrs_simple(self):
+ return ['title', 'legend', 'thumbnail']
+
+ def _get_attrs_complex_list(self):
+ return ['plots']
+
+ def add_plot(self, plot):
+ """
+ Add a plot to the plotGroup
+ """
+ if not isinstance(plot, Plot):
+ raise TypeError("Unable to add plot. Got type {x} expect Plot".format(x=type(plot)))
+ BaseReportElement.is_unique(self, plot.id)
+ self._plots.append(plot)
+
+ def to_dict(self, id_parts=None):
+ return BaseReportElement.to_dict(self, id_parts=id_parts)
+
+
+def _validate_not_abs_path(path):
+ if os.path.isabs(path):
+ raise ValueError("paths must be relative. Got {i}".format(i=path))
+
+
+class Plot(BaseReportElement):
+
+ """
+ A plot contains a path to image file.
+ """
+
+ def __init__(self, id_, image, caption=None, thumbnail=None):
+ """
+ :param id_: (str, not None, or empty) Unique id for plot.
+ :param image: (str) Required - not None - path to image
+ :param caption: (str, None) Plot caption displayed to user under plot.
+ :param thumbnail: (str, None) thumbnail path
+
+ Paths must be given as relative
+ """
+ BaseReportElement.__init__(self, id_)
+
+ if image is None:
+ raise PbReportError('image cannot be None')
+ _validate_not_abs_path(image)
+
+ self._image = image
+ self._caption = caption
+ if thumbnail is not None:
+ _validate_not_abs_path(thumbnail)
+
+ self._thumbnail = thumbnail
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.id,
+ p=self.image)
+ return "<{k} {i} {p} >".format(**_d)
+
+ @property
+ def image(self):
+ return self._image
+
+ @property
+ def thumbnail(self):
+ return self._thumbnail
+
+ @property
+ def caption(self):
+ return self._caption
+
+ def _get_attrs_simple(self):
+ return ['image', 'caption']
+
+ def _get_attrs_complex_list(self):
+ return []
+
+
+class Table(BaseReportElement):
+
+ """
+ A table consists of an id, title, and list of columns.
+ """
+
+ def __init__(self, id_, title=None, columns=()):
+ """
+ :param id_: (str), Unique id for table in report.
+ :param title: (str, None)
+ :param columns: (list of column instances)
+ """
+ BaseReportElement.__init__(self, id_)
+ self._title = title
+ self._columns = []
+ if columns:
+ for column in columns:
+ self.add_column(column)
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.id,
+ t=self.title,
+ n=self.ncolumns)
+ return "<{k} {i} title:{t} ncolumns:{n} >".format(**_d)
+
+ def __str__(self):
+ pad = 2
+ max_values = max(len(column.values) for column in self.columns)
+ # max length for each column value
+ max_lengths = {}
+ for c in self.columns:
+ if c.values:
+ n = max(max(len(str(v)) for v in c.values), len(c.header))
+ else:
+ n = len(c.header)
+ max_lengths[c] = n
+
+ header = "".join([c.header.ljust(max_lengths[c] + pad) for c in self.columns])
+
+ outs = list()
+ outs.append("")
+ outs.append("Table id:{i}".format(i=self.id))
+ outs.append("-" * len(header))
+ outs.append(header)
+ outs.append("-" * len(header))
+
+ for i in xrange(max_values):
+ out = []
+ for column in self.columns:
+ try:
+ l = max_lengths[column] + pad
+ out.append(str(column.values[i]).ljust(l))
+ except IndexError as e:
+ log.warn(e)
+ out.append("No Value ")
+
+ outs.append(" ".join(out))
+
+ return "\n".join(outs)
+
+ @property
+ def id(self):
+ return self._id
+
+ @property
+ def title(self):
+ return self._title
+
+ @property
+ def ncolumns(self):
+ return len(self.columns)
+
+ @property
+ def columns(self):
+ return self._columns
+
+ def _get_attrs_simple(self):
+ return ['title']
+
+ def _get_attrs_complex_list(self):
+ return ['columns']
+
+ def add_column(self, column):
+ """
+ Add a column to the table
+
+ :param column: (Column instance)
+ """
+ if not isinstance(column, Column):
+ raise TypeError("Got type {x}. Expected Column type.".format(x=type(column)))
+
+ BaseReportElement.is_unique(self, column.id)
+ self._columns.append(column)
+
+ def append_data(self, column_index, item):
+ """
+ This should be deprecated in favor of `add_data_by_column_id`.
+
+ Append datum to a column by column index
+
+ :param column_index: (int) Index into internal column list
+ :param item: (float, str) data item.
+ """
+ if column_index < len(self._columns):
+ self._columns[column_index].values.append(item)
+ else:
+ raise IndexError("Unable to find index {i} in columns.".format(i=column_index))
+
+ def add_data_by_column_id(self, column_id, value):
+ """Add a value to column.
+
+ :param column_id: (str) Column id
+ :param value: (float, str, int)
+ """
+ if column_id in [c.id for c in self.columns]:
+ # _columns should really be a dict
+ # self._columns[column_id].values.append(value)
+ for column in self.columns:
+ if column_id == column.id:
+ column.values.append(value)
+ else:
+ raise KeyError("Unable to Column with id '{i}' to assign value {v}".format(i=column_id, v=value))
+
+ @staticmethod
+ def merge(tables):
+ table_id = tables[0].id
+ table_title = tables[0].title
+ column_ids = sorted([col.id for col in tables[0].columns])
+
+ col_collisions = {col_id: [] for col_id in column_ids}
+ for table in tables:
+ assert table.id == table_id
+ assert table.title == table_title
+ assert sorted([col.id for col in table.columns]) == column_ids
+ for col in table.columns:
+ col_collisions[col.id].append(col)
+ columns = {}
+ for col_id, cols in col_collisions.iteritems():
+ assert len(cols) == len(tables)
+ columns[col_id] = Column.merge(cols)
+ # order by table[0]'s column order:
+ columns = [columns[col.id] for col in tables[0].columns]
+ return Table(table_id, table_title, columns=columns)
+
+
+class Column(BaseReportElement):
+
+ """
+ A column consists of an id, header, and list of values.
+ """
+
+ def __init__(self, id_, header=None, values=()):
+ """
+ :param id_: (str)
+ :param header: (str, None) Header of Column.
+ """
+ BaseReportElement.__init__(self, id_)
+ self._id = id_
+ self._header = header
+ self._values = list(values)
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.id,
+ h=self.header,
+ n=self.nvalues)
+ return "<{k} id:{i} header:{h} nvalues:{n} >".format(**_d)
+
+ @property
+ def id(self):
+ return self._id
+
+ @property
+ def header(self):
+ return self._header
+
+ @property
+ def nvalues(self):
+ return len(self.values)
+
+ @property
+ def values(self):
+ return self._values
+
+ def _get_attrs_simple(self):
+ return ['header', 'values']
+
+ def _get_attrs_complex_list(self):
+ return []
+
+ @staticmethod
+ def merge(columns):
+ column_id = columns[0].id
+ column_header = columns[0].header
+ values = []
+ for col in columns:
+ assert col.id == column_id
+ assert col.header == column_header
+ values.extend(col.values)
+ return Column(column_id, column_header, values=values)
+
+
+class Report(BaseReportElement):
+
+ """
+ A report is a container for attributes, plotGroups, and tables.
+ It can be serialized to json.
+ """
+
+ def __init__(self, id_, tables=(), attributes=(), plotgroups=(), dataset_uuids=()):
+ """
+ :param id_: (str) Should be a string that identifies the report, like 'adapter'.
+ :param tables: (list of table instances)
+ :param attributes: (list of attribute instances)
+ :param plotgroups: (list of plot group instances)
+ :param dataset_uuids: list[string] DataSet uuids of files used to generate the report
+ """
+ BaseReportElement.__init__(self, id_)
+ self._attributes = []
+ self._plotgroups = []
+ self._tables = []
+ if tables:
+ for table in tables:
+ self.add_table(table)
+ if attributes:
+ for attr in attributes:
+ self.add_attribute(attr)
+ if plotgroups:
+ for plotgroup in plotgroups:
+ self.add_plotgroup(plotgroup)
+
+ # Datasets that
+ self._dataset_uuids = dataset_uuids
+
+ def add_attribute(self, attribute):
+ """Add an attribute to the report
+ :param attribute: (Attribute instance)
+ """
+ if not isinstance(attribute, Attribute):
+ TypeError("Got type {x}. Expected Attribute type.".format(x=type(attribute)))
+
+ BaseReportElement.is_unique(self, attribute.id)
+ self._attributes.append(attribute)
+
+ def add_plotgroup(self, plotgroup):
+ """
+ Add a plotgroup to the report
+ """
+ if not isinstance(plotgroup, PlotGroup):
+ TypeError("Got type {x}. Expected Attribute type.".format(x=type(plotgroup)))
+
+ BaseReportElement.is_unique(self, plotgroup.id)
+ self._plotgroups.append(plotgroup)
+
+ def add_table(self, table):
+ """
+ Add a table to the report
+ """
+ BaseReportElement.is_unique(self, table.id)
+ self._tables.append(table)
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.id,
+ a=len(self.attributes),
+ p=len(self.plotGroups),
+ t=len(self.tables))
+ return "<{k} id:{i} nattributes:{a} nplot_groups:{p} ntables:{t} >".format(**_d)
+
+ @property
+ def attributes(self):
+ return self._attributes
+
+ @property
+ def plotGroups(self):
+ return self._plotgroups
+
+ @property
+ def tables(self):
+ return self._tables
+
+ def _get_attrs_simple(self):
+ return []
+
+ def _get_attrs_complex_list(self):
+ return ['attributes', 'plotGroups', 'tables']
+
+ def get_attribute_by_id(self, id_):
+ """Get an attribute by id. The id should NOT contain the root report id
+
+ :returns: (None, Attribute)
+
+ Example:
+ report.get_attribute_by_id('nmovies')
+
+ *NOT*
+ report.get_attribute_by_id('overview.nmovies')
+ """
+ for attr in self.attributes:
+ if attr.id == id_:
+ return attr
+
+ return None
+
+ def to_dict(self, id_parts=None):
+ version = pbcommand.get_version()
+
+ d = BaseReportElement.to_dict(self, id_parts=id_parts)
+ d['_version'] = version
+ d['_changelist'] = "UNKNOWN"
+ d['dataset_uuids'] = list(set(self._dataset_uuids))
+ return d
+
+ def to_json(self):
+ """Return a json string of the report"""
+ try:
+ s = _to_json_with_decoder(self.to_dict())
+ except TypeError as e:
+ msg = "Unable to serialize report due to {e} \n".format(e=e)
+ log.error(msg)
+ log.error("Object: " + pformat(self.to_dict()))
+ raise
+
+ return s
+
+ def write_json(self, file_name):
+ """
+ Serialized the report to a json file.
+
+ :param file_name: (str) Path to write output json file to.
+ """
+ with open(file_name, 'w') as f:
+ f.write(self.to_json())
+ log.info("Wrote report {r}".format(r=file_name))
+
+ @staticmethod
+ def from_simple_dict(report_id, raw_d, namespace):
+ """
+ Generate a Report with populated attributes, starting from a flat
+ dictionary (without namespace).
+ """
+ attributes = []
+ for k, v in raw_d.items():
+ ns = "_".join([namespace, k.lower()])
+ # These can't be none for some reason
+ if v is not None:
+ a = Attribute(ns, v, name=k)
+ attributes.append(a)
+ else:
+ warnings.warn("skipping null entry {k}->{v}".format(k=k, v=v))
+ return Report(report_id, attributes=attributes)
+
+ @staticmethod
+ def merge(reports):
+ report_id = reports[0].id
+ def _merge_attributes_d(attributes_list):
+ attrs = defaultdict(lambda : [])
+ for ax in attributes_list:
+ for a in ax:
+ attrs[a.id].append(a.value)
+ return attrs
+ def _attributes_to_table(attributes_list, table_id, title):
+ attrs = _merge_attributes_d(attributes_list)
+ columns = [ Column(k.lower(), header=k, values=values)
+ for k, values in attrs.iteritems() ]
+ table = Table(table_id, title=title, columns=columns)
+ return table
+ def _sum_attributes(attributes_list):
+ d = _merge_attributes_d(attributes_list)
+ return [ Attribute(k, sum(values), name=k)
+ for k, values in d.iteritems() ]
+ def _merge_tables(tables):
+ """Pass through singletons, Table.merge dupes"""
+ id_collisions = defaultdict(list)
+ merged = []
+ for tab in tables:
+ id_collisions[tab.id].append(tab)
+ for tabs in id_collisions.values():
+ if len(tabs) == 1:
+ merged.append(tabs[0])
+ else:
+ merged.append(Table.merge(tabs))
+ return merged
+ attr_list = []
+ table_list = []
+ for report in reports:
+ assert report.id == report_id
+ attr_list.append(report.attributes)
+ table_list.extend(report.tables)
+ table = _attributes_to_table(attr_list, 'chunk_metrics',
+ "Chunk Metrics")
+ tables = _merge_tables(table_list)
+ tables.append(table)
+ merged_attributes = _sum_attributes(attr_list)
+ return Report(report_id, attributes=merged_attributes, tables=tables)
diff --git a/pbcommand/models/tool_contract.py b/pbcommand/models/tool_contract.py
new file mode 100644
index 0000000..53b2559
--- /dev/null
+++ b/pbcommand/models/tool_contract.py
@@ -0,0 +1,366 @@
+"""Common models for Tool Contract and Resolved Tool Contract
+
+
+Author: Michael Kocher
+"""
+import abc
+
+import pbcommand
+from pbcommand.models import TaskTypes, ResourceTypes
+
+__version__ = pbcommand.get_version()
+
+
+class MalformedToolContractError(ValueError):
+ pass
+
+
+class MalformedResolvedToolContractError(ValueError):
+ pass
+
+
+def _validate_type(value, type_or_types):
+ return isinstance(value, type_or_types)
+
+
+def _validate_or_raise(value, type_or_types):
+ if not _validate_type(value, type_or_types):
+ _d = dict(x=value, t=type(value), s=type_or_types)
+ raise TypeError("Unsupported type for {x} {t}. Expected types {s}".format(**_d))
+ return value
+
+
+def _is_empty_list(alist):
+ return len(alist) == 0
+
+
+def __validate_ioputs(msg, alist):
+ if _is_empty_list(alist):
+ raise MalformedToolContractError(msg)
+ return True
+
+
+def validate_tool_contract(tc):
+ """:type tc: ToolContract
+
+ Expand this out.
+ """
+ __validate_ioputs("Inputs must have at least 1 input.", tc.task.input_file_types)
+ __validate_ioputs("Outputs must have at least 1 output", tc.task.output_file_types)
+ return tc
+
+
+class _IOFileType(object):
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, file_type_id, label, display_name, description):
+ self.file_type_id = file_type_id
+ self.label = label
+ self.display_name = display_name
+ # short description
+ self.description = description
+
+ def __repr__(self):
+ _d = dict(i=self.label,
+ n=self.display_name,
+ f=self.file_type_id,
+ k=self.__class__.__name__)
+ return "<{k} {f} {i} >".format(**_d)
+
+ @abc.abstractmethod
+ def to_dict(self):
+ raise NotImplementedError
+
+
+class InputFileType(_IOFileType):
+
+ def to_dict(self):
+ return dict(file_type_id=self.file_type_id,
+ id=self.label,
+ title=self.display_name,
+ description=self.description)
+
+
+class OutputFileType(_IOFileType):
+
+ def __init__(self, file_type_id, label, display_name, description, default_name):
+ super(OutputFileType, self).__init__(file_type_id, label, display_name, description)
+ # Default name of the output file. Should be specified as (base, ext)
+ # but "base.ext" is also supported. This should go away
+ self.default_name = default_name
+
+ def to_dict(self):
+ return dict(file_type_id=self.file_type_id,
+ id=self.label,
+ title=self.display_name,
+ description=self.description,
+ default_name=self.default_name)
+
+
+class ToolContractResolvedResource(object):
+ def __init__(self, resource_type_id, path):
+ assert resource_type_id in ResourceTypes.ALL()
+ self.type_id = resource_type_id
+ self.path = path
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__,
+ i=self.type_id, p=self.path)
+ return "<{k} {i} path:{p} >".format(**_d)
+
+ @staticmethod
+ def from_d(d):
+ return ToolContractResolvedResource(d['resource_type'], d['path'])
+
+ def to_dict(self):
+ return dict(resource_type=self.type_id, path=self.path)
+
+
+class ToolDriver(object):
+
+ def __init__(self, driver_exe, env=None, serialization='json'):
+ """
+
+ :param driver_exe: Path to the driver
+ :param env: path to env to be sourced before it's run?
+ :return:
+ """
+ self.driver_exe = driver_exe
+ self.env = {} if env is None else env
+ # 'avro' or 'json'
+ self.serialization = serialization
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, e=self.driver_exe)
+ return "<{k} driver:{e} >".format(**_d)
+
+ def to_dict(self):
+ return dict(exe=self.driver_exe, env=self.env, serialization=self.serialization)
+
+
+class ToolContractTask(object):
+
+ TASK_TYPE_ID = TaskTypes.STANDARD
+
+ def __init__(self, task_id, name, description, version, is_distributed, input_types, output_types, tool_options, nproc, resources):
+ """
+ Core metadata for a commandline task
+
+ :param task_id: Global id to reference your tool in a pipeline
+ :type task_id: str
+ :param name: Display name of your
+ :param description: Short description of your tool
+ :param version: semantic style versioning
+ :param is_distributed: If the task will be run locally or not
+ :param is_distributed: bool
+ :param input_types: list[FileType]
+ :param output_types:
+ :param tool_options:
+ :param nproc:
+ :param resources:
+ :return:
+ """
+ self.task_id = task_id
+ self.name = name
+ self.description = description
+ self.version = version
+ self.is_distributed = is_distributed
+ self.input_file_types = input_types
+ self.output_file_types = output_types
+ # This needs to be list
+ # self.options = _validate_or_raise(tool_options, (list, tuple))
+ self.options = tool_options
+ self.nproc = nproc
+ # List of ResourceTypes
+ self.resources = resources
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, i=self.task_id, t=self.is_distributed, n=self.name)
+ return "<{k} id:{i} {n} >".format(**_d)
+
+ def to_dict(self):
+ # this is a little hack to get around some sloppyness in the datamodel
+ opts = self.options if self.options else []
+
+ _t = dict(tool_contract_id=self.task_id,
+ input_types=[i.to_dict() for i in self.input_file_types],
+ output_types=[i.to_dict() for i in self.output_file_types],
+ task_type=self.TASK_TYPE_ID,
+ is_distributed=self.is_distributed,
+ name=self.name,
+ description=self.description,
+ schema_options=opts,
+ nproc=self.nproc,
+ resource_types=self.resources,
+ _comment="Created by v{v}".format(v=__version__))
+ return _t
+
+
+class ScatterToolContractTask(ToolContractTask):
+
+ TASK_TYPE_ID = TaskTypes.SCATTERED
+
+ def __init__(self, task_id, name, description, version, is_distributed,
+ input_types, output_types, tool_options, nproc, resources, chunk_keys, max_nchunks):
+ """Scatter tasks have a special output signature of [FileTypes.CHUNK]
+
+ The chunk keys are the expected to be written to the chunk.json file
+ """
+ super(ScatterToolContractTask, self).__init__(task_id, name, description, version, is_distributed,
+ input_types, output_types, tool_options, nproc, resources)
+ self.chunk_keys = chunk_keys
+ # int or $max_chunks symbol
+ self.max_nchunks = max_nchunks
+
+ def to_dict(self):
+ s = super(ScatterToolContractTask, self).to_dict()
+ s['chunk_keys'] = self.chunk_keys
+ s['nchunks'] = self.max_nchunks
+ return s
+
+
+class GatherToolContractTask(ToolContractTask):
+ """Gather tasks have special input type [FileTypes.CHUNK]"""
+ TASK_TYPE_ID = TaskTypes.GATHERED
+ # not completely sure how to handle chunk-keys.
+
+
+class ToolContract(object):
+
+ def __init__(self, task, driver):
+ """
+
+ :type task: ToolContractTask | ScatterToolContractTask | GatherToolContractTask
+ :type driver: ToolDriver
+
+ :param task:
+ :param driver:
+ :return:
+ """
+ self.task = task
+ self.driver = driver
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, i=self.task.task_id, t=self.task.is_distributed)
+ return "<{k} id:{i} >".format(**_d)
+
+ def to_dict(self):
+ validate_tool_contract(self)
+ _t = self.task.to_dict()
+
+ _d = dict(version=self.task.version,
+ tool_contract_id=self.task.task_id,
+ driver=self.driver.to_dict(),
+ tool_contract=_t)
+ return _d
+
+
+def _get_resource_by_type(rt, resources):
+ xs = []
+ for r in resources:
+ if r.type_id == rt:
+ xs.append(r)
+ return xs
+
+
+class ResolvedToolContractTask(object):
+ # The interface is the same, but the types are "resolved" and have a
+ # different
+ # structure
+ TASK_TYPE_ID = TaskTypes.STANDARD
+
+ def __init__(self, task_id, is_distributed, input_files, output_files,
+ options, nproc, resources):
+ self.task_id = task_id
+ self.is_distributed = is_distributed
+ self.input_files = input_files
+ self.output_files = output_files
+ self.options = options
+ self.nproc = nproc
+ self.resources = resources
+
+ @property
+ def tmpdir_resources(self):
+ return _get_resource_by_type(ResourceTypes.TMP_DIR, self.resources)
+
+ @property
+ def tmpfile_resources(self):
+ return _get_resource_by_type(ResourceTypes.TMP_FILE, self.resources)
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, i=self.task_id,
+ t=self.is_distributed)
+ return "<{k} id:{i} >".format(**_d)
+
+ def to_dict(self):
+ comment = "Created by pbcommand v{v}".format(v=pbcommand.get_version())
+
+ tc = dict(input_files=self.input_files,
+ output_files=self.output_files,
+ task_type=self.TASK_TYPE_ID,
+ is_distributed=self.is_distributed,
+ tool_contract_id=self.task_id,
+ nproc=self.nproc,
+ resources=[r.to_dict() for r in self.resources],
+ options=self.options,
+ _comment=comment)
+ return tc
+
+
+class ResolvedScatteredToolContractTask(ResolvedToolContractTask):
+ TASK_TYPE_ID = TaskTypes.SCATTERED
+
+ def __init__(self, task_id, is_distributed, input_files, output_files, options, nproc, resources, max_nchunks, chunk_keys):
+ super(ResolvedScatteredToolContractTask, self).__init__(task_id, is_distributed, input_files, output_files, options, nproc, resources)
+ self.max_nchunks = max_nchunks
+ # these can be used to verified the output chunk.json
+ # after the task has been run
+ self.chunk_keys = chunk_keys
+
+ def to_dict(self):
+ d = super(ResolvedScatteredToolContractTask, self).to_dict()
+ d['max_nchunks'] = self.max_nchunks
+ d['chunk_keys'] = self.chunk_keys
+ return d
+
+
+class ResolvedGatherToolContractTask(ResolvedToolContractTask):
+ TASK_TYPE_ID = TaskTypes.GATHERED
+
+ def __init__(self, task_id, is_distributed, input_files, output_files, options, nproc, resources, chunk_key):
+ """
+ The chunk key is used in the pluck specific chunk values from
+ PipelineChunks. This makes gather tasks (i.e., GffGather) generalized.
+ """
+ super(ResolvedGatherToolContractTask, self).__init__(task_id, is_distributed, input_files, output_files, options, nproc, resources)
+ self.chunk_key = chunk_key
+
+ def to_dict(self):
+ d = super(ResolvedGatherToolContractTask, self).to_dict()
+ d['chunk_key'] = self.chunk_key
+ return d
+
+
+class ResolvedToolContract(object):
+
+ def __init__(self, task, driver):
+ """
+
+ :type task: ResolvedToolContractTask |
+ ResolvedScatteredToolContractTask | ResolvedGatherToolContractTask
+ :type driver: ToolDriver
+
+ :param task:
+ :param driver:
+ :return:
+ """
+ self.task = task
+ self.driver = driver
+
+ def __repr__(self):
+ _d = dict(k=self.__class__.__name__, i=self.task.task_id, t=self.task.is_distributed)
+ return "<{k} id:{i} >".format(**_d)
+
+ def to_dict(self):
+ return dict(resolved_tool_contract=self.task.to_dict(),
+ driver=self.driver.to_dict())
diff --git a/pbcommand/pb_io/__init__.py b/pbcommand/pb_io/__init__.py
new file mode 100644
index 0000000..e75af62
--- /dev/null
+++ b/pbcommand/pb_io/__init__.py
@@ -0,0 +1,8 @@
+from .report import load_report_from_json
+from .tool_contract_io import (load_tool_contract_from,
+ load_resolved_tool_contract_from,
+ write_resolved_tool_contract,
+ write_tool_contract,
+ write_resolved_tool_contract_avro,
+ write_tool_contract_avro)
+from .common import load_pipeline_chunks_from_json, write_pipeline_chunks
diff --git a/pbcommand/pb_io/common.py b/pbcommand/pb_io/common.py
new file mode 100644
index 0000000..9459c22
--- /dev/null
+++ b/pbcommand/pb_io/common.py
@@ -0,0 +1,45 @@
+import logging
+import json
+import sys
+
+from pbcommand.models import PipelineChunk
+
+log = logging.getLogger(__name__)
+
+
+def write_pipeline_chunks(chunks, output_json_file, comment):
+
+ _d = dict(nchunks=len(chunks), _version="0.1.0",
+ chunks=[c.to_dict() for c in chunks])
+
+ if comment is not None:
+ _d['_comment'] = comment
+
+ with open(output_json_file, 'w') as f:
+ f.write(json.dumps(_d, indent=4))
+
+ log.debug("Write {n} chunks to {o}".format(n=len(chunks), o=output_json_file))
+
+
+def load_pipeline_chunks_from_json(path):
+ """Returns a list of Pipeline Chunks
+
+
+ :rtype: list[PipelineChunk]
+ """
+
+ try:
+ with open(path, 'r') as f:
+ d = json.loads(f.read())
+
+ chunks = []
+ for cs in d['chunks']:
+ chunk_id = cs['chunk_id']
+ chunk_datum = cs['chunk']
+ c = PipelineChunk(chunk_id, **chunk_datum)
+ chunks.append(c)
+ return chunks
+ except Exception:
+ msg = "Unable to load pipeline chunks from {f}".format(f=path)
+ sys.stderr.write(msg + "\n")
+ raise
diff --git a/pbcommand/pb_io/report.py b/pbcommand/pb_io/report.py
new file mode 100644
index 0000000..dad7523
--- /dev/null
+++ b/pbcommand/pb_io/report.py
@@ -0,0 +1,119 @@
+"""Loading a report from JSON
+
+This manual marshalling/de-marshalling is not awesome.
+"""
+import json
+import logging
+
+from pbcommand.models.report import (Report, Plot, PlotGroup, Attribute,
+ Table, Column)
+
+SUPPORTED_VERSIONS = ('2.1', '2.2', '2.3')
+_DEFAULT_VERSION = '2.1' # before the version was officially added
+
+log = logging.getLogger(__name__)
+
+__all__ = ["load_report_from_json"]
+
+
+def _to_id(s):
+ if '.' in s:
+ return s.split('.')[-1]
+ else:
+ return s
+
+
+def _to_plot(d):
+ id_ = _to_id(d['id'])
+ caption = d.get('caption', None)
+ image = d['image']
+ thumbnail = d.get('thumbnail', None)
+ p = Plot(id_, image, caption=caption, thumbnail=thumbnail)
+ return p
+
+
+def _to_plot_group(d):
+ id_ = _to_id(d['id'])
+ legend = d.get('legend', None)
+ thumbnail = d.get('thumbnail', None)
+ # is this optional?
+ title = d.get('title', None)
+
+ if 'plots' in d:
+ plots = [_to_plot(pd) for pd in d['plots']]
+ else:
+ plots = []
+
+ return PlotGroup(id_, title=title, legend=legend, plots=plots,
+ thumbnail=thumbnail)
+
+
+def _to_attribute(d):
+ id_ = _to_id(d['id'])
+ name = d.get('name', None)
+ # this can't be none
+ value = d['value']
+ return Attribute(id_, value, name=name)
+
+
+def _to_column(d):
+ id_ = _to_id(d['id'])
+ header = d.get('header', None)
+ values = d.get('values', [])
+ return Column(id_, header=header, values=values)
+
+
+def _to_table(d):
+ id_ = _to_id(d['id'])
+ title = d.get('title', None)
+
+ columns = []
+ for column_d in d.get('columns', []):
+ c = _to_column(column_d)
+ columns.append(c)
+
+ # assert that all the columns have the same number of values
+ nvalues = {len(c.values) for c in columns}
+ assert len(nvalues) == 1
+
+ return Table(id_, title=title, columns=columns)
+
+
+def dict_to_report(dct):
+ if '_version' in dct:
+ version = dct['_version']
+ if version not in SUPPORTED_VERSIONS:
+ # should this raise an exception?
+ log.warn("{v} is an unsupported version. Supported versions {vs}".format(v=version, vs=SUPPORTED_VERSIONS))
+
+ report_id = dct['id']
+
+ plot_groups = []
+ if 'plotGroups' in dct:
+ pg = dct['plotGroups']
+ if pg:
+ plot_groups = [_to_plot_group(d) for d in pg]
+
+ attributes = []
+ for r_attr in dct.get('attributes', []):
+ attr = _to_attribute(r_attr)
+ attributes.append(attr)
+
+ tables = []
+ for table_d in dct.get('tables', []):
+ t = _to_table(table_d)
+ tables.append(t)
+
+ report = Report(report_id, plotgroups=plot_groups, tables=tables,
+ attributes=attributes)
+
+ return report
+
+
+def load_report_from_json(json_file):
+ """Convert a report json file to Report instance."""
+
+ with open(json_file, 'r') as f:
+ d = json.loads(f.read())
+ r = dict_to_report(d)
+ return r
diff --git a/pbcommand/pb_io/tool_contract_io.py b/pbcommand/pb_io/tool_contract_io.py
new file mode 100644
index 0000000..dd31fc6
--- /dev/null
+++ b/pbcommand/pb_io/tool_contract_io.py
@@ -0,0 +1,349 @@
+"""IO Layer for creating models from files"""
+import json
+import logging
+from avro.datafile import DataFileWriter
+from avro.io import DatumWriter
+
+import pbcommand
+
+from pbcommand.schemas import RTC_SCHEMA, TC_SCHEMA
+from pbcommand.models import (TaskTypes,
+ GatherToolContractTask,
+ ScatterToolContractTask,
+ MalformedToolContractError,
+ MalformedResolvedToolContractError,
+ validate_tool_contract)
+
+from pbcommand.models.tool_contract import (ToolDriver,
+ ToolContractTask,
+ ToolContract,
+ ResolvedToolContractTask,
+ ResolvedToolContract,
+ InputFileType,
+ OutputFileType,
+ ResolvedScatteredToolContractTask,
+ ResolvedGatherToolContractTask,
+ ToolContractResolvedResource)
+
+log = logging.getLogger(__name__)
+
+__all__ = ['load_resolved_tool_contract_from',
+ 'load_tool_contract_from',
+ 'write_tool_contract',
+ 'write_resolved_tool_contract']
+
+
+class Constants(object):
+ TOOL_ID = "tool_contract_id"
+ TOOL = "tool_contract"
+ TOOL_TYPE = "task_type"
+ IS_DIST = 'is_distributed'
+
+ # Serialization Format
+ SERIALIZATION = 'serialization'
+
+ # Scatter TC, mirrors the nproc key in the JSON
+ NCHUNKS = "nchunks"
+
+ RTOOL = "resolved_tool_contract"
+ # Used in Scattering/Chunking tasks to
+ # produce chunks with specific $chunk_keys
+ CHUNK_KEYS = "chunk_keys"
+ MAX_NCHUNKS = 'max_nchunks'
+
+ # Used in Gather Tasks
+ GATHER_CHUNK_KEY = 'chunk_key'
+
+
+def load_or_raise(ex_type):
+ def loader_wrap(func):
+ def _wrapper(path):
+ msg = "Failed to load {p}".format(p=path)
+ try:
+ return func(path)
+ except Exception as e:
+ msg = msg + " {e} {m}".format(m=e.message, e=e)
+ log.error(msg, exc_info=True)
+ raise ex_type(msg)
+ return _wrapper
+ return loader_wrap
+
+
+def __driver_from_d(d):
+ driver_exe = d['driver']['exe']
+ driver_env = d['driver'].get('env', {})
+ serialization = d['driver'].get(Constants.SERIALIZATION, 'json')
+ return ToolDriver(driver_exe, env=driver_env, serialization=serialization)
+
+
+def __core_resolved_tool_contract_task_from_d(d):
+ def _to_a(x):
+ return x.encode('ascii', 'ignore')
+
+ def _get(attr_name):
+ return d[Constants.RTOOL][attr_name]
+
+ def _get_ascii(x_):
+ return _to_a(_get(x_))
+
+ tool_contract_id = _get_ascii(Constants.TOOL_ID)
+ tool_type = _get_ascii(Constants.TOOL_TYPE)
+ is_distributed = _get(Constants.IS_DIST)
+ # list of strings
+ input_files = [_to_a(x) for x in _get("input_files")]
+ # list of strings
+ output_files = [_to_a(x) for x in _get("output_files")]
+
+ tool_options = _get("options")
+ # int
+ nproc = _get("nproc")
+
+ resource_types = [ToolContractResolvedResource.from_d(dx) for dx in _get("resources")]
+
+ return tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types
+
+
+def __to_rtc_from_d(d):
+ def _wrapper(task):
+ driver = __driver_from_d(d)
+ rtc = ResolvedToolContract(task, driver)
+ return rtc
+ return _wrapper
+
+
+def _standard_resolved_tool_contract_from_d(d):
+ """Load a 'Standard' CLI task type"""
+
+ tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types = __core_resolved_tool_contract_task_from_d(d)
+
+ task = ResolvedToolContractTask(tool_contract_id, is_distributed,
+ input_files, output_files,
+ tool_options, nproc, resource_types)
+ return __to_rtc_from_d(d)(task)
+
+
+def _scatter_resolved_tool_contract_from_d(d):
+ """Load a Gathered Tool Contract """
+ tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types = __core_resolved_tool_contract_task_from_d(d)
+ max_nchunks = d[Constants.RTOOL][Constants.MAX_NCHUNKS]
+ chunk_keys = d[Constants.RTOOL][Constants.CHUNK_KEYS]
+ task = ResolvedScatteredToolContractTask(tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types, max_nchunks, chunk_keys)
+
+ return __to_rtc_from_d(d)(task)
+
+
+def _gather_resolved_tool_contract_from_d(d):
+ tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types = __core_resolved_tool_contract_task_from_d(d)
+
+ chunk_key = d[Constants.RTOOL][Constants.GATHER_CHUNK_KEY]
+ task = ResolvedGatherToolContractTask(tool_contract_id, is_distributed,
+ input_files, output_files,
+ tool_options, nproc, resource_types, chunk_key)
+ return __to_rtc_from_d(d)(task)
+
+
+def resolved_tool_contract_from_d(d):
+ """Convert a dict to Resolved Tool Contract"""
+
+ def _to_a(x):
+ return x.encode('ascii', 'ignore')
+
+ def _get(attr_name):
+ return d[Constants.RTOOL][attr_name]
+
+ def _get_ascii(x_):
+ return _to_a(_get(x_))
+
+ tool_type = _get_ascii(Constants.TOOL_TYPE)
+
+ dispatch_funcs = {TaskTypes.STANDARD: _standard_resolved_tool_contract_from_d,
+ TaskTypes.GATHERED: _gather_resolved_tool_contract_from_d,
+ TaskTypes.SCATTERED: _scatter_resolved_tool_contract_from_d}
+
+ if tool_type in dispatch_funcs:
+ return dispatch_funcs[tool_type](d)
+ else:
+ raise ValueError("Unsupported task type '{x}' Supported task types {t}".format(x=tool_type, t=dispatch_funcs.keys()))
+
+
+def json_path_or_d(value):
+ if isinstance(value, dict):
+ return value
+ elif isinstance(value, basestring):
+ with open(value, 'r') as f:
+ d = json.loads(f.read())
+ return d
+ else:
+ raise ValueError("Unsupported value. Expected dict, or string")
+
+
+def _json_path_or_d(func):
+ def _wrapper(value):
+ return func(json_path_or_d(value))
+ return _wrapper
+
+
+ at load_or_raise(MalformedResolvedToolContractError)
+ at _json_path_or_d
+def load_resolved_tool_contract_from(path_or_d):
+ return resolved_tool_contract_from_d(path_or_d)
+
+
+ at _json_path_or_d
+def __core_tool_contract_task_from(d):
+ def _to_a(x):
+ return x.encode('ascii', 'ignore')
+
+ def _get(x_):
+ if x_ not in d[Constants.TOOL]:
+ raise MalformedToolContractError("Unable to find key '{x}'".format(x=x_))
+ return d[Constants.TOOL][x_]
+
+ def _get_or(x_, default):
+ return d[Constants.TOOL].get(x_, default)
+
+ def _get_ascii(x_):
+ return _to_a(_get(x_))
+
+ def _get_ascii_or(x_, default):
+ return _to_a(_get_or(x_, default))
+
+ def _to_in_ft(fd):
+ fx = lambda s: _to_a(fd[s])
+ return InputFileType(fx("file_type_id"), fx("id"), fx("title"), fx("description"))
+
+ def _to_out_ft(fd):
+ fx = lambda s: _to_a(fd[s])
+ return OutputFileType(fx("file_type_id"), fx("id"), fx("title"), fx("description"), fx("default_name"))
+
+ task_id = _to_a(d[Constants.TOOL_ID])
+ display_name = _get_ascii("name")
+ version = _to_a(d["version"])
+ default_desc = "PacBio Tool {n}".format(n=display_name)
+ description = _get_ascii_or("description", default_desc)
+ is_distributed = _get(Constants.IS_DIST)
+
+ input_types = [_to_in_ft(x) for x in _get("input_types")]
+ output_types = [_to_out_ft(x) for x in _get("output_types")]
+ tool_options = _get("schema_options")
+ nproc = _get("nproc")
+ resource_types = _get("resource_types")
+ return task_id, display_name, description, version, is_distributed, input_types, output_types, tool_options, nproc, resource_types
+
+
+def __to_tc_from_d(d):
+ def _wrapper(task):
+ driver = __driver_from_d(d)
+ tc = ToolContract(task, driver)
+ return tc
+ return _wrapper
+
+
+ at _json_path_or_d
+def _standard_tool_contract_from(path_or_d):
+ task_id, display_name, description, version, is_distributed, input_types, output_types, tool_options, nproc, resource_types = __core_tool_contract_task_from(path_or_d)
+ task = ToolContractTask(task_id, display_name, description, version,
+ is_distributed,
+ input_types,
+ output_types,
+ tool_options, nproc, resource_types)
+ return __to_tc_from_d(path_or_d)(task)
+
+
+ at _json_path_or_d
+def _scattered_tool_contract_from(path_or_d):
+ task_id, display_name, description, version, is_distributed, input_types, output_types, tool_options, nproc, resource_types = __core_tool_contract_task_from(path_or_d)
+
+ chunk_keys = path_or_d[Constants.TOOL][Constants.CHUNK_KEYS]
+ # int, or SymbolTypes.MAX_NCHUNKS
+ nchunks = path_or_d[Constants.TOOL][Constants.NCHUNKS]
+ task = ScatterToolContractTask(task_id, display_name, description, version,
+ is_distributed,
+ input_types,
+ output_types,
+ tool_options, nproc, resource_types, chunk_keys, nchunks)
+ return __to_tc_from_d(path_or_d)(task)
+
+
+ at _json_path_or_d
+def _gather_tool_contract_from(path_or_d):
+ task_id, display_name, description, version, is_distributed, input_types, output_types, tool_options, nproc, resource_types = __core_tool_contract_task_from(path_or_d)
+ task = GatherToolContractTask(task_id, display_name, description, version,
+ is_distributed,
+ input_types,
+ output_types,
+ tool_options, nproc, resource_types)
+ return __to_tc_from_d(path_or_d)(task)
+
+
+ at _json_path_or_d
+def tool_contract_from_d(d):
+ """Load tool contract from dict"""
+
+ task_type = d[Constants.TOOL][Constants.TOOL_TYPE]
+
+ dispatch_funcs = {TaskTypes.SCATTERED: _scattered_tool_contract_from,
+ TaskTypes.GATHERED: _gather_tool_contract_from,
+ TaskTypes.STANDARD: _standard_tool_contract_from}
+
+ if task_type in dispatch_funcs:
+ tc = dispatch_funcs[task_type](d)
+ return validate_tool_contract(tc)
+ else:
+ raise ValueError("Unsupported task type {x}".format(x=task_type))
+
+
+ at load_or_raise(MalformedToolContractError)
+ at _json_path_or_d
+def load_tool_contract_from(path_or_d):
+ return tool_contract_from_d(path_or_d)
+
+
+def _write_json(s, output_file):
+ with open(output_file, 'w') as f:
+ f.write(json.dumps(s, indent=4, sort_keys=True))
+ return s
+
+
+def write_tool_contract(tool_contract, output_json_file):
+ """
+ Write a Tool Contract
+
+ :type tool_contract: ToolContract
+ :param output_json_file:
+ :return:
+ """
+ return _write_json(tool_contract.to_dict(), output_json_file)
+
+
+def write_resolved_tool_contract(rtc, output_json_file):
+ """
+
+ :param rtc:
+ :type rtc: ResolvedToolContract
+ :param output_json_file:
+ :return:
+ """
+ d = rtc.to_dict()
+ return _write_json(d, output_json_file)
+
+
+def _write_records_to_avro(schema, _d_or_ds, output_file):
+ # FIXME. There's only one record being written here,
+ # why does this not support a single item
+ if isinstance(_d_or_ds, dict):
+ _d_or_ds = [_d_or_ds]
+ with open(output_file, 'w') as outs:
+ with DataFileWriter(outs, DatumWriter(), schema) as writer:
+ for record in _d_or_ds:
+ writer.append(record)
+ log.debug("Write avro file to {p}".format(p=output_file))
+ return _d_or_ds
+
+
+def write_tool_contract_avro(tc, avro_output):
+ return _write_records_to_avro(TC_SCHEMA, tc.to_dict(), avro_output)
+
+
+def write_resolved_tool_contract_avro(rtc, avro_output):
+ return _write_records_to_avro(RTC_SCHEMA, rtc.to_dict(), avro_output)
diff --git a/pbcommand/resolver.py b/pbcommand/resolver.py
new file mode 100644
index 0000000..008f395
--- /dev/null
+++ b/pbcommand/resolver.py
@@ -0,0 +1,210 @@
+"""Driver for creating a Resolved Tool Contract from a Tool Contract"""
+from collections import defaultdict
+
+import logging
+import os
+import uuid
+
+from pbcommand.models.common import (SymbolTypes, REGISTERED_FILE_TYPES,
+ ResourceTypes)
+from pbcommand.models.tool_contract import (ResolvedToolContract,
+ ToolContract,
+ ResolvedToolContractTask,
+ ResolvedScatteredToolContractTask,
+ ResolvedGatherToolContractTask,
+ ToolContractResolvedResource)
+
+log = logging.getLogger(__name__)
+
+
+class ToolContractError(BaseException):
+ pass
+
+
+def __resolve_int_or_symbol(symbol_type, symbol_or_int, max_value):
+ if isinstance(symbol_or_int, int):
+ return min(symbol_or_int, max_value)
+ elif symbol_or_int == symbol_type:
+ return max_value
+ else:
+ raise TypeError("unsupported type for {s} '{t}".format(t=symbol_or_int,
+ s=symbol_type))
+
+
+def _resolve_nproc(nproc_int_or_symbol, max_nproc):
+ return __resolve_int_or_symbol(SymbolTypes.MAX_NPROC, nproc_int_or_symbol, max_nproc)
+
+
+def _resolve_max_nchunks(nchunks_or_symbol, max_nchunks):
+ return __resolve_int_or_symbol(SymbolTypes.MAX_NCHUNKS, nchunks_or_symbol, max_nchunks)
+
+
+def _resolve_options(tool_contract, tool_options):
+ resolved_options = {}
+
+ # These probably exist somewhere else, feel free to replace:
+ type_map = {'integer': int,
+ 'object': object,
+ 'boolean': bool,
+ 'number': (int, float),
+ 'string': basestring}
+
+ # Get and Validate resolved value.
+ # TODO. None support should be removed.
+ for option in tool_contract.task.options:
+ for optid in option['required']:
+ exp_type = option['properties'][optid]['type']
+ value = tool_options.get(optid, option['properties'][optid]['default'])
+
+ if not isinstance(value, type_map[exp_type]):
+ raise ToolContractError("Incompatible option types. Supplied "
+ "{i}. Expected {t}".format(
+ i=type(value),
+ t=exp_type))
+ resolved_options[optid] = value
+
+ return resolved_options
+
+
+def _resolve_output_file(registry_d, file_type, output_file_type, root_output_dir):
+ """
+ Resolved the Output File Type
+
+ :type file_type: pbcommand.models.FileType
+ :type output_file_type: pbcommand.models.OutputFileType
+ :return: Resolved output file name
+ """
+ def _get_fname(base, ext):
+ idx = base, ext
+ count = registry_d[idx]
+ xs = "" if count == 0 else "-" + str(count)
+ registry_d[idx] += 1
+ name = "".join([base, xs, ".", ext])
+ return os.path.join(root_output_dir, name)
+
+ # FIXME. THIS NEED TO BE FUNDAMENTALLY FIXED and updated to use the spec
+ # in the avro schema.
+ if isinstance(output_file_type.default_name, basestring):
+ a, b = os.path.splitext(output_file_type.default_name)
+ return _get_fname(a, b.replace('.', ''))
+ elif isinstance(output_file_type.default_name, (list, tuple)):
+ base, ext = output_file_type.default_name
+ return _get_fname(base, ext)
+ else:
+ return _get_fname(file_type.base_name, file_type.ext)
+
+
+def _resolve_resource_types(resources, output_dir, root_tmp_dir):
+ resolved_resources = []
+
+ def _add(rt_id, p):
+ r = ToolContractResolvedResource(rt_id, p)
+ resolved_resources.append(r)
+ return r
+
+ def _to_p(x):
+ return os.path.join(root_tmp_dir, x)
+
+ def _to_r(prefix, suffix=None):
+ u = uuid.uuid4()
+ name = "{x}-{u}".format(u=u, x=prefix)
+ if suffix is not None:
+ name += suffix
+ return _to_p(name)
+
+ # The names are not optimal, this would require more config
+ for resource in resources:
+ if resource == ResourceTypes.TMP_DIR:
+ path = _to_r("pb-tmp")
+ _add(resource, path)
+ elif resource == ResourceTypes.TMP_FILE:
+ _add(resource, _to_r("pb-tmp", "-file"))
+ elif resource == ResourceTypes.LOG_FILE:
+ u = uuid.uuid4()
+ name = "{x}-{u}-log".format(u=u, x="pb-tmp")
+ path = os.path.join(output_dir, name)
+ _add(resource, path)
+ else:
+ raise ValueError("Unsupported Resource Type {x}".format(x=resource))
+
+ return resolved_resources
+
+
+def _resolve_output_files(output_file_types, root_output_dir):
+ # store the files as {(base, ext): count}
+ _outs_registry = defaultdict(lambda : 0)
+ return [_resolve_output_file(_outs_registry, REGISTERED_FILE_TYPES[f.file_type_id], f, root_output_dir) for f in output_file_types]
+
+
+def _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_options, tmp_dir=None):
+
+ if len(input_files) != len(tool_contract.task.input_file_types):
+ _d = dict(i=input_files, t=tool_contract.task.input_file_types)
+ raise ToolContractError("Incompatible input types. Supplied {i}. Expected file types {t}".format(**_d))
+
+ output_files = _resolve_output_files(tool_contract.task.output_file_types, root_output_dir)
+
+ resolved_options = _resolve_options(tool_contract, tool_options)
+
+ nproc = _resolve_nproc(tool_contract.task.nproc, max_nproc)
+
+ resolved_resources = _resolve_resource_types(tool_contract.task.resources, root_output_dir, tmp_dir)
+
+ return output_files, resolved_options, nproc, resolved_resources
+
+
+def resolve_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options):
+ """
+ Convert a ToolContract into a Resolved Tool Contract.
+
+
+ :param tool_contract: Tool Contract interface
+ :param input_files: List of input files (must be consistent with the tool contract input file list (types are not enforced)
+
+ :param max_nproc: Max number of processors
+ :param tool_options: dict of overridden options
+
+ :type input_files: list[String]
+ :type max_nproc: int
+
+ :type tool_contract: ToolContract
+ :type tool_options: dict
+
+ :rtype: ResolvedToolContract
+ :return: A Resolved tool contract
+ """
+ output_files, resolved_options, nproc, resources = _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_options, root_tmp_dir)
+ task = ResolvedToolContractTask(tool_contract.task.task_id,
+ tool_contract.task.is_distributed,
+ input_files,
+ output_files,
+ resolved_options,
+ nproc,
+ resources)
+
+ return ResolvedToolContract(task, tool_contract.driver)
+
+
+def resolve_scatter_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, max_nchunks, chunk_keys):
+ output_files, resolved_options, nproc, resources = _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_options, tmp_dir=root_tmp_dir)
+ resolved_max_chunks = _resolve_max_nchunks(tool_contract.task.max_nchunks, max_nchunks)
+ task = ResolvedScatteredToolContractTask(tool_contract.task.task_id,
+ tool_contract.task.is_distributed,
+ input_files,
+ output_files,
+ resolved_options,
+ nproc,
+ resources, resolved_max_chunks, chunk_keys)
+ return ResolvedToolContract(task, tool_contract.driver)
+
+
+def resolve_gather_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, chunk_key):
+ output_files, resolved_options, nproc, resources = _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_options, tmp_dir=root_tmp_dir)
+ task = ResolvedGatherToolContractTask(tool_contract.task.task_id,
+ tool_contract.task.is_distributed,
+ input_files,
+ output_files,
+ resolved_options,
+ nproc,
+ resources, chunk_key)
+ return ResolvedToolContract(task, tool_contract.driver)
diff --git a/pbcommand/schemas/__init__.py b/pbcommand/schemas/__init__.py
new file mode 100644
index 0000000..5f238df
--- /dev/null
+++ b/pbcommand/schemas/__init__.py
@@ -0,0 +1,36 @@
+import os
+
+import functools
+
+import avro.schema
+from avro.io import validate
+
+SCHEMA_REGISTRY = {}
+
+__all__ = ['validate_pbreport',
+ 'validate_tc',
+ 'validate_rtc',
+ 'SCHEMA_REGISTRY']
+
+
+def _load_schema(idx, name):
+
+ d = os.path.dirname(__file__)
+ schema_path = os.path.join(d, name)
+ with open(schema_path, 'r') as f:
+ schema = avro.schema.parse(f.read())
+ SCHEMA_REGISTRY[idx] = schema
+ return schema
+
+RTC_SCHEMA = _load_schema("resolved_tool_contract", "resolved_tool_contract.avsc")
+PBREPORT_SCHEMA = _load_schema("pbreport", "pbreport.avsc")
+TC_SCHEMA = _load_schema("tool_contract", "tool_contract.avsc")
+
+
+def _validate(schema, d):
+ """Validate a python dict against a avro schema"""
+ return validate(schema, d)
+
+validate_rtc = functools.partial(_validate, RTC_SCHEMA)
+validate_pbreport = functools.partial(_validate, PBREPORT_SCHEMA)
+validate_tc = functools.partial(_validate, TC_SCHEMA)
\ No newline at end of file
diff --git a/pbcommand/schemas/pbreport.avsc b/pbcommand/schemas/pbreport.avsc
new file mode 100644
index 0000000..3fb37e1
--- /dev/null
+++ b/pbcommand/schemas/pbreport.avsc
@@ -0,0 +1,166 @@
+{
+ "namespace": "com.pacbio.common.models.reports",
+ "type": "record",
+ "name": "Report",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string",
+ "desc": "Pbreports style id, must only have [A-z][0-9]_"
+ },
+ {
+ "name": "attributes",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "ReportAttribute",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "name": "name",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": [
+ "string",
+ "int",
+ "float"
+ ]
+ }
+ ]
+ }
+ }
+ },
+ {
+ "name": "plotGroups",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "PlotGroup",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "name": "title",
+ "type": "string"
+ },
+ {
+ "name": "legend",
+ "type": [
+ "string",
+ "null"
+ ],
+ "desc": "Not clear what the usecase is of this"
+ },
+ {
+ "name": "thumbnail",
+ "type": [
+ "string",
+ "null"
+ ]
+ },
+ {
+ "name": "plots",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "ReportPlot",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string",
+ "desc": "Plot Id"
+ },
+ {
+ "name": "image",
+ "type": "string",
+ "desc": "Relative Path to Image"
+ },
+ {
+ "name": "caption",
+ "desc": "Caption of the Plot",
+ "type": [
+ "string",
+ "null"
+ ]
+ },
+ {
+ "name": "thumbnail",
+ "desc": "Relative path to thumbnail of the Plot",
+ "type": [
+ "string",
+ "null"
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+ },
+ {
+ "name": "tables",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "ReportTable",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "name": "title",
+ "type": "string",
+ "desc": "Title of the Table"
+ },
+ {
+ "name": "columns",
+ "desc": "List of Columns",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "ReportTableColumn",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "name": "header",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "desc": "Column values. Attention to mixed-types attempting to represent 'NA'",
+ "type": {
+ "type": "array",
+ "items": [
+ "int",
+ "float"
+ ]
+ }
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/pbcommand/schemas/resolved_tool_contract.avsc b/pbcommand/schemas/resolved_tool_contract.avsc
new file mode 100644
index 0000000..92dc8ab
--- /dev/null
+++ b/pbcommand/schemas/resolved_tool_contract.avsc
@@ -0,0 +1,80 @@
+{
+ "namespace": "com.pacbio.common.models.contracts",
+ "type": "record",
+ "name": "ResolvedToolContract",
+ "fields": [
+ {
+ "name": "resolved_tool_contract",
+ "type": {
+ "type": "record",
+ "name": "ResolvedToolContractTask",
+ "fields": [
+ {
+ "name": "input_files",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ {
+ "name": "output_files",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ {
+ "doc": "Resolved Task Options",
+ "name": "options",
+ "type": {
+ "type": "map",
+ "values": ["long", "boolean", "string", "int"]
+ }
+ },
+ {
+ "name": "nproc",
+ "type": "int"
+ },
+ {
+ "name": "is_distributed",
+ "type": "boolean"
+ },
+ {
+ "name": "task_type",
+ "type": "string"
+ },
+ {
+ "name": "tool_contract_id",
+ "type": "string"
+ },
+ {
+ "name": "resources",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ ]
+ }
+ },
+ {
+ "name": "driver",
+ "type": {
+ "type": "record",
+ "name": "Driver",
+ "fields": [
+ {
+ "name": "exe",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/pbcommand/schemas/tool_contract.avsc b/pbcommand/schemas/tool_contract.avsc
new file mode 100644
index 0000000..9f4a7f2
--- /dev/null
+++ b/pbcommand/schemas/tool_contract.avsc
@@ -0,0 +1,165 @@
+{
+ "namespace": "com.pacbio.common.models.contracts",
+ "type": "record",
+ "name": "ToolContract",
+ "fields": [
+ {
+ "name": "tool_contract",
+ "type": {
+ "type": "record",
+ "name": "ToolContractTask",
+ "fields": [
+ {
+ "name": "input_types",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "ToolInputFile",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "doc": "PacBio File Type identifier",
+ "name": "file_type_id",
+ "type": "string"
+ },
+ {
+ "doc": "Display Name",
+ "name": "title",
+ "type": "string"
+ },
+ {
+ "name": "description",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ },
+ {
+ "name": "output_types",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "ToolOutputFile",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "doc": "PacBio FileType identifier",
+ "name": "file_type_id",
+ "type": "string"
+ },
+ {
+ "doc": "Display Name",
+ "name": "title",
+ "type": "string"
+ },
+ {
+ "doc": "Default file name",
+ "name": "default_name",
+ "type": "string"
+ },
+ {
+ "name": "description",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ },
+ {
+ "name": "schema_options",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "PacBioOptions",
+ "fields": [
+ {
+ "name": "pb_option",
+ "type": {
+ "type": "record",
+ "name": "pb_option",
+ "fields": [
+ {
+ "name": "default",
+ "type": [
+ "int",
+ "string",
+ "boolean",
+ "float"
+ ]
+ },
+ {
+ "name": "option_id",
+ "type": "string"
+ },
+ {
+ "name": "name",
+ "type": "string"
+ },
+ {
+ "name": "description",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ }
+ },
+ {
+ "doc": "Number of processors to use",
+ "name": "nproc",
+ "type": "int"
+ },
+ {
+ "doc": "Globally unique Tool Contract identifier",
+ "name": "tool_contract_id",
+ "type": "string"
+ },
+ {
+ "doc": "Task class type, Standard, Scatter, Gather",
+ "name": "task_type",
+ "type": "string"
+ },
+ {
+ "doc": "Determine if the task will be submitted to the cluster resources",
+ "name": "is_distributed",
+ "type": "boolean"
+ },
+ {
+ "name": "resource_types",
+ "type": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ ]
+ }
+ },
+ {
+ "name": "driver",
+ "type": {
+ "type": "record",
+ "name": "ToolDriver",
+ "fields": [
+ {
+ "name": "exe",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/pbcommand/testkit/__init__.py b/pbcommand/testkit/__init__.py
new file mode 100644
index 0000000..7d3d53c
--- /dev/null
+++ b/pbcommand/testkit/__init__.py
@@ -0,0 +1 @@
+from .core import PbTestApp
diff --git a/pbcommand/testkit/base_utils.py b/pbcommand/testkit/base_utils.py
new file mode 100755
index 0000000..b56ac63
--- /dev/null
+++ b/pbcommand/testkit/base_utils.py
@@ -0,0 +1,26 @@
+import os
+import tempfile
+
+HAS_PBCORE = False
+
+try:
+ import pbcore
+ HAS_PBCORE = True
+except ImportError:
+ HAS_PBCORE = False
+
+
+def pbcore_skip_msg(msg=None):
+ msg = "" if msg is None else msg
+ return "" if HAS_PBCORE else "pbcore is not installed. {m}".format(m=msg)
+
+
+def get_temp_file(suffix, dir_):
+ t = tempfile.NamedTemporaryFile(suffix=suffix, delete=False, dir=dir_)
+ t.close()
+ return t.name
+
+
+def get_temp_dir(suffix=""):
+ """This will make subdir in the root tmp dir"""
+ return tempfile.mkdtemp(dir=None, suffix=suffix)
diff --git a/pbcommand/testkit/core.py b/pbcommand/testkit/core.py
new file mode 100644
index 0000000..7b75c79
--- /dev/null
+++ b/pbcommand/testkit/core.py
@@ -0,0 +1,162 @@
+import os
+import unittest
+import logging
+import subprocess
+
+from .base_utils import (HAS_PBCORE,
+ pbcore_skip_msg,
+ get_temp_file,
+ get_temp_dir)
+
+from pbcommand.resolver import (resolve_tool_contract,
+ resolve_gather_tool_contract,
+ resolve_scatter_tool_contract)
+from pbcommand.models import ResolvedToolContract, PipelineChunk
+from pbcommand.pb_io import (load_tool_contract_from,
+ load_resolved_tool_contract_from)
+
+from pbcommand.pb_io.tool_contract_io import write_resolved_tool_contract
+
+log = logging.getLogger(__name__)
+
+
+class PbTestApp(unittest.TestCase):
+
+ """Generic Harness for running tool contracts end-to-end"""
+
+ # if the base command is defined, DRIVER_EMIT and DRIVER_RESOLVE can be
+ # guessed automatically
+ DRIVER_BASE = None
+ # complete Emit a tool contract
+ DRIVER_EMIT = ""
+ # Run tool from a resolve tool contract JSON file
+ DRIVER_RESOLVE = ""
+
+ # Requires Pbcore
+ REQUIRES_PBCORE = False
+
+ # input files that will be passed to the resolver
+ # To get example files use, get_data_file("example.txt")]
+ INPUT_FILES = []
+
+ # Arguments passed to the Resolver
+ MAX_NPROC = 1
+ TASK_OPTIONS = {}
+
+ # These will be checked against the resolved tool contract values
+ RESOLVED_TASK_OPTIONS = {}
+ RESOLVED_NPROC = 1
+
+ @classmethod
+ def setUpClass(cls):
+ if cls.DRIVER_BASE is not None:
+ if cls.DRIVER_EMIT == "":
+ cls.DRIVER_EMIT = cls.DRIVER_BASE + " --emit-tool-contract "
+ if cls.DRIVER_RESOLVE == "":
+ cls.DRIVER_RESOLVE = cls.DRIVER_BASE + " --resolved-tool-contract "
+
+ def _test_outputs_exists(self, rtc):
+ """:type rtc: pbcommand.models.ResolvedToolContract"""
+ log.debug("validating output file existence from {r}".format(r=rtc))
+ log.debug("Resolved Output files from {t}".format(t=rtc.task.task_id))
+ log.debug(rtc.task.output_files)
+ for i, output_file in enumerate(rtc.task.output_files):
+ msg = "Unable to find {i}-th output file {p}".format(i=i, p=output_file)
+ self.assertTrue(os.path.exists(output_file), msg)
+
+ def _to_rtc(self, tc, output_dir, tmp_dir):
+ # handled the polymorphism in subclasses by overriding
+ return resolve_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS)
+
+ def test_run_e2e(self):
+ # hack to skip running the base Test class (which is the nose default behavior)
+ if self.__class__.__name__ in ('PbTestApp', 'PbTestScatterApp', 'PbTestGatherApp'):
+ return
+
+ if self.REQUIRES_PBCORE:
+ if not HAS_PBCORE:
+ self.assertTrue(True, pbcore_skip_msg("Skipping running e2e for {d}".format(d=self.DRIVER_EMIT)))
+ return
+
+ output_dir = get_temp_dir(suffix="rtc-test")
+ tmp_dir = get_temp_dir(suffix="rtc-temp")
+
+ log.debug("Driver {e}".format(e=self.DRIVER_EMIT))
+ log.debug("input files {i}".format(i=self.INPUT_FILES))
+ log.debug("running in {p}".format(p=output_dir))
+
+ output_tc = get_temp_file("-{n}-tool_contract.json".format(n=self.__class__.__name__), output_dir)
+ emit_tc_exe = "{e} > {o}".format(e=self.DRIVER_EMIT, o=output_tc)
+ rcode = subprocess.call([emit_tc_exe], shell=True)
+
+ self.assertEquals(rcode, 0, "Emitting tool contract failed for '{e}'".format(e=emit_tc_exe))
+
+ # sanity marshall-unmashalling
+ log.debug("Loading tool-contract from {p}".format(p=output_tc))
+ tc = load_tool_contract_from(output_tc)
+
+ log.info("Resolving tool contract to RTC")
+
+ rtc = self._to_rtc(tc, output_dir, tmp_dir)
+
+ output_json_rtc = get_temp_file("resolved_tool_contract.json", output_dir)
+ write_resolved_tool_contract(rtc, output_json_rtc)
+
+ # sanity
+ loaded_rtc = load_resolved_tool_contract_from(output_json_rtc)
+ self.assertIsInstance(loaded_rtc, ResolvedToolContract)
+
+ # Test Resolved options if specified.
+ for opt, resolved_value in self.RESOLVED_TASK_OPTIONS.iteritems():
+ self.assertTrue(opt in rtc.task.options, "Resolved option {x} not in RTC options.".format(x=opt))
+ # this needs to support polymorphic equals (i.e., almostEquals
+ if not isinstance(resolved_value, float):
+ emsg = "Resolved option {o} are not equal. Expected {a}, got {b}".format(o=opt, b=rtc.task.options[opt], a=resolved_value)
+ self.assertEquals(rtc.task.options[opt], resolved_value, emsg)
+
+ # Resolved NPROC
+ self.assertEquals(rtc.task.nproc, self.RESOLVED_NPROC)
+
+ log.info("running resolved contract {r}".format(r=output_json_rtc))
+
+ exe = "{d} {p}".format(p=output_json_rtc, d=self.DRIVER_RESOLVE)
+ log.info("Running exe '{e}'".format(e=exe))
+ rcode = subprocess.call([exe], shell=True)
+ self.assertEqual(rcode, 0, "Running from resolved tool contract failed from {e}".format(e=exe))
+ log.info("Successfully completed running e2e for {d}".format(d=self.DRIVER_EMIT))
+
+ self._test_outputs_exists(rtc)
+
+ self.run_after(rtc, output_dir)
+
+ def run_after(self, rtc, output_dir):
+ """
+ Optional additional test code, e.g. to verify that the job produced
+ the expected outputs. This is run automatically by test_run_e2e, but
+ does nothing unless overridden in a subclass.
+ """
+ pass
+
+
+class PbTestScatterApp(PbTestApp):
+ """Test harness for testing end-to-end scattering apps
+
+ Override MAX_NCHUNKS, RESOLVED_MAX_NCHUNKS and CHUNK_KEYS
+ """
+ MAX_NCHUNKS = 12
+ RESOLVED_MAX_NCHUNKS = 12
+ CHUNK_KEYS = ()
+
+ def _to_rtc(self, tc, output_dir, tmp_dir):
+ return resolve_scatter_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS, self.MAX_NCHUNKS, self.CHUNK_KEYS)
+
+
+class PbTestGatherApp(PbTestApp):
+ """Test harness for testing end-to-end gather apps
+
+ Override the CHUNK_KEY to pass that into your resolver
+ """
+ CHUNK_KEY = PipelineChunk.CHUNK_KEY_PREFIX + 'fasta_id'
+
+ def _to_rtc(self, tc, output_dir, tmp_dir):
+ return resolve_gather_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS, self.CHUNK_KEY)
\ No newline at end of file
diff --git a/pbcommand/utils.py b/pbcommand/utils.py
new file mode 100644
index 0000000..87f6fa8
--- /dev/null
+++ b/pbcommand/utils.py
@@ -0,0 +1,159 @@
+"""Utils for common funcs, such as setting up a log, composing functions."""
+import functools
+import os
+import sys
+import logging
+import argparse
+import traceback
+import time
+import types
+
+log = logging.getLogger(__name__)
+
+
+def setup_log(alog, level=logging.INFO, file_name=None, log_filter=None,
+ str_formatter='[%(levelname)s] %(asctime)-15sZ [%(name)s %(funcName)s %(lineno)d] %(message)s'):
+ """Core Util to setup log handler
+
+ :param alog: a log instance
+ :param level: (int) Level of logging debug
+ :param file_name: (str, None) if None, stdout is used, str write to file
+ :param log_filter: (LogFilter, None)
+ :param str_formatter: (str) log formatting str
+ """
+ logging.Formatter.converter = time.gmtime
+
+ alog.setLevel(logging.DEBUG)
+ if file_name is None:
+ handler = logging.StreamHandler(sys.stdout)
+ else:
+ handler = logging.FileHandler(file_name)
+ formatter = logging.Formatter(str_formatter)
+ handler.setFormatter(formatter)
+ handler.setLevel(level)
+ if log_filter:
+ handler.addFilter(log_filter)
+ alog.addHandler(handler)
+
+ return alog
+
+
+def log_traceback(alog, ex, ex_traceback):
+ """
+ Log a python traceback in the log file
+
+ :param ex: python Exception instance
+ :param ex_traceback: exception traceback
+
+
+ Example Usage (assuming you have a log instance in your scope)
+
+ try:
+ 1 / 0
+ except Exception as e:
+ msg = "{i} failed validation. {e}".format(i=item, e=e)
+ log.error(msg)
+ _, _, ex_traceback = sys.exc_info()
+ log_traceback(log, e, ex_traceback)
+
+ """
+
+ tb_lines = traceback.format_exception(ex.__class__, ex, ex_traceback)
+ tb_text = ''.join(tb_lines)
+ alog.error(tb_text)
+
+
+def _simple_validate_type(atype, instance):
+ if not isinstance(instance, atype):
+ _d = dict(t=atype, x=type(instance), v=instance)
+ raise TypeError("Expected type {t}. Got type {x} for {v}".format(**_d))
+ return instance
+
+_is_argparser_instance = functools.partial(_simple_validate_type, argparse.ArgumentParser)
+
+
+def is_argparser_instance(func):
+ @functools.wraps
+ def wrapper(*args, **kwargs):
+ _is_argparser_instance(args[0])
+ return func(*args, **kwargs)
+ return wrapper
+
+
+def compose(*funcs):
+ """
+ Functional composition of a non-empty list
+
+ [f, g, h] will be f(g(h(x)))
+
+ fx = compose(f, g, h)
+
+ or
+
+ fx = compose(*[f, g, h])
+
+ """
+ if not funcs:
+ raise ValueError("Compose only supports non-empty lists")
+ for func in funcs:
+ if not isinstance(func, (types.BuiltinMethodType,
+ functools.partial,
+ types.MethodType,
+ types.BuiltinFunctionType,
+ types.FunctionType)):
+ raise TypeError("Only Function types are supported")
+
+ def compose_two(f, g):
+ def c(x):
+ return f(g(x))
+ return c
+ return functools.reduce(compose_two, funcs)
+
+
+def which(exe_str):
+ """walk the exe_str in PATH to get current exe_str.
+
+ If path is found, the full path is returned. Else it returns None.
+ """
+ paths = os.environ.get('PATH', None)
+ state = None
+
+ if paths is None:
+ # log warning
+ msg = "PATH env var is not defined."
+ log.error(msg)
+ return state
+
+ for path in paths.split(":"):
+ exe_path = os.path.join(path, exe_str)
+ # print exe_path
+ if os.path.exists(exe_path):
+ state = exe_path
+ break
+
+ return state
+
+
+
+class Singleton(type):
+
+ """
+ General Purpose singleton class
+
+ Usage:
+
+ >>> class MyClass(object):
+ >>> __metaclass__ = Singleton
+ >>> def __init__(self):
+ >>> self.name = 'name'
+
+ """
+
+ def __init__(cls, name, bases, dct):
+ super(Singleton, cls).__init__(name, bases, dct)
+ cls.instance = None
+
+ def __call__(cls, *args, **kw):
+ if cls.instance is None:
+ cls.instance = super(Singleton, cls).__call__(*args)
+ return cls.instance
diff --git a/pbcommand/validators.py b/pbcommand/validators.py
new file mode 100644
index 0000000..8ac41f8
--- /dev/null
+++ b/pbcommand/validators.py
@@ -0,0 +1,114 @@
+import os
+import logging
+import functools
+import subprocess
+
+
+log = logging.getLogger(__name__)
+
+
+def trigger_nfs_refresh(ff):
+ """
+ Central place for all NFS hackery
+
+ Return whether a file or a dir ff exists or not.
+ Call ls instead of python os.path.exists to eliminate NFS errors.
+
+ Added try/catch black hole exception cases to help trigger an NFS refresh
+
+ :rtype bool:
+
+ # Yuan Li and various people contributed.
+ """
+ # try to trigger refresh for File case
+ try:
+ f = open(ff, 'r')
+ f.close()
+ except Exception:
+ pass
+
+ # try to trigger refresh for Directory case
+ try:
+ _ = os.stat(ff)
+ _ = os.listdir(ff)
+ except Exception:
+ pass
+
+ # Call externally
+ cmd = "ls %s" % ff
+ _, rcode, _ = subprocess.check_call(cmd)
+
+ return rcode == 0
+
+
+def _trigger_nfs_refresh_and_ignore(ff):
+ """
+
+ :rtype str
+ """
+ _ = trigger_nfs_refresh(ff)
+ return ff
+
+
+def _validate_resource(func, resource):
+ """Validate the existence of a file/dir"""
+ # Attempt to trigger an NFS metadata refresh
+ _ = trigger_nfs_refresh(resource)
+
+ if func(resource):
+ return os.path.abspath(resource)
+ else:
+ raise IOError("Unable to find {f}".format(f=resource))
+
+
+validate_file = functools.partial(_validate_resource, os.path.isfile)
+validate_dir = functools.partial(_validate_resource, os.path.isdir)
+validate_output_dir = functools.partial(_validate_resource, os.path.isdir)
+
+
+def validate_report(report_file_name):
+ """
+ Raise ValueError if report contains path seps
+ """
+ if not os.path.basename(report_file_name) == report_file_name:
+ raise ValueError("Path separators are not allowed: {r}".format(r=report_file_name))
+ return report_file_name
+
+
+def validate_fofn(fofn):
+ """Validate existence of FOFN and files within the FOFN.
+
+ :param fofn: (str) Path to File of file names.
+ :raises: IOError if any file is not found.
+ :return: (str) abspath of the input fofn
+ """
+ _ = trigger_nfs_refresh(fofn)
+
+ if os.path.isfile(fofn):
+ file_names = fofn_to_files(os.path.abspath(fofn))
+ log.debug("Found {n} files in FOFN {f}.".format(n=len(file_names), f=fofn))
+ return os.path.abspath(fofn)
+ else:
+ raise IOError("Unable to find {f}".format(f=fofn))
+
+
+def fofn_to_files(fofn):
+ """Util func to convert a bas/bax fofn file to a list of bas/bax files."""
+
+ _ = trigger_nfs_refresh(fofn)
+
+ if os.path.exists(fofn):
+ with open(fofn, 'r') as f:
+ bas_files = {line.strip() for line in f.readlines()}
+
+ for bas_file in bas_files:
+ if not os.path.isfile(bas_file):
+ # try one more time to find the file by
+ # performing an NFS refresh
+ found = trigger_nfs_refresh(bas_file)
+ if not found:
+ raise IOError("Unable to find bas/bax file '{f}'".format(f=bas_file))
+
+ return list(bas_files)
+ else:
+ raise IOError("Unable to find FOFN {f}".format(f=fofn))
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..224a779
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[metadata]
+description-file = README.md
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..c3032e9
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,56 @@
+import os
+
+try:
+ from setuptools import setup, find_packages
+except ImportError:
+ from distutils.core import setup
+
+version = __import__('pbcommand').get_version()
+
+_REQUIREMENTS_FILE = 'REQUIREMENTS.txt'
+_REQUIREMENTS_TEST_FILE = "REQUIREMENTS_TEST.txt"
+_README = 'README.md'
+
+
+def _get_description():
+ with open(_get_local_file(_README)) as f:
+ _long_description = f.read()
+ return _long_description
+
+
+def _get_local_file(file_name):
+ return os.path.join(os.path.dirname(__file__), file_name)
+
+
+def _get_requirements(file_name):
+ with open(file_name, 'r') as f:
+ reqs = [line for line in f if not line.startswith("#")]
+ return reqs
+
+
+def _get_local_requirements(file_name):
+ return _get_requirements(_get_local_file(file_name))
+
+
+setup(
+ name='pbcommand',
+ version=version,
+ license='BSD',
+ author='mpkocher',
+ author_email='mkocher at pacificbiosciences.com',
+ url="https://github.com/PacificBiosciences/pbcommand",
+ download_url='https://github.com/PacificBiosciences/pbcommand/tarball/{v}'.format(v=version),
+ description='Library and Tools for interfacing to PacBio pbsmrtpipe workflow engine.',
+ install_requires=_get_local_requirements(_REQUIREMENTS_FILE),
+ tests_require=_get_local_requirements(_REQUIREMENTS_TEST_FILE),
+ long_description=_get_description(),
+ keywords='workflow pacbio'.split(),
+ packages=find_packages(),
+ package_data={"pbcommand": ["schemas/*.avsc"]},
+ zip_safe=False,
+ extras_require={"pbcore": ["pbcore", "ipython", "autopep8"],
+ "interactive": ['prompt_toolkit']},
+ classifiers=['Development Status :: 4 - Beta',
+ 'Environment :: Console',
+ 'Topic :: Software Development :: Bug Tracking']
+)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..a279baa
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:fileencoding=utf_8
+
+"""Add doc string
+
+"""
+__author__ = 'M. Kocher'
+__copyright__ = ""
+__credits__ = ['M. Kocher']
+__license__ = 'MIT License'
+__maintainer__ = 'M. Kocher'
+__email__ = 'Michael.Kocher at me.com'
+__version__ = '0.1'
diff --git a/tests/base_utils.py b/tests/base_utils.py
new file mode 100755
index 0000000..e36b0c1
--- /dev/null
+++ b/tests/base_utils.py
@@ -0,0 +1,14 @@
+
+import os
+
+from pbcommand.testkit.base_utils import *
+
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
+
+
+def get_data_file(path):
+ return os.path.join(DATA_DIR, path)
+
+
+def get_data_file_from_subdir(subdir, path):
+ return os.path.join(DATA_DIR, subdir, path)
diff --git a/tests/data/dev_example_dev_txt_app_tool_contract.json b/tests/data/dev_example_dev_txt_app_tool_contract.json
new file mode 100644
index 0000000..0565b70
--- /dev/null
+++ b/tests/data/dev_example_dev_txt_app_tool_contract.json
@@ -0,0 +1,65 @@
+{
+ "version": "0.1.0",
+ "driver": {
+ "serialization": "json",
+ "exe": "python -m pbcommand.cli.examples.dev_app --resolved-tool-contract ",
+ "env": {}
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_txt_app",
+ "tool_contract": {
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "resource_types": [
+ "$tmpfile",
+ "$tmpfile",
+ "$tmpdir"
+ ],
+ "description": "Dev app for Testing that supports emitting tool contracts",
+ "schema_options": [
+ {
+ "pb_option": {
+ "default": 10,
+ "type": "integer",
+ "option_id": "pbcommand.task_options.dev_max_nlines",
+ "name": "Max Lines",
+ "description": "Max Number of lines to Copy"
+ },
+ "title": "JSON Schema for pbcommand.task_options.dev_max_nlines",
+ "required": [
+ "pbcommand.task_options.dev_max_nlines"
+ ],
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "type": "object",
+ "properties": {
+ "pbcommand.task_options.dev_max_nlines": {
+ "default": 10,
+ "type": "integer",
+ "description": "Max Number of lines to Copy",
+ "title": "Max Lines"
+ }
+ }
+ }
+ ],
+ "output_types": [
+ {
+ "title": "Txt outfile",
+ "description": "Generic Output Txt file",
+ "default_name": "output.txt",
+ "id": "txt_out",
+ "file_type_id": "PacBio.FileTypes.txt"
+ }
+ ],
+ "_comment": "Created by v0.2.14",
+ "name": "Txt App",
+ "input_types": [
+ {
+ "description": "Generic Text File",
+ "title": "Txt file",
+ "id": "txt_in",
+ "file_type_id": "PacBio.FileTypes.txt"
+ }
+ ],
+ "nproc": 1,
+ "is_distributed": false,
+ "tool_contract_id": "pbcommand.tasks.dev_txt_app"
+ }
+}
diff --git a/tests/data/dev_example_resolved_tool_contract.json b/tests/data/dev_example_resolved_tool_contract.json
new file mode 100644
index 0000000..fce2fe2
--- /dev/null
+++ b/tests/data/dev_example_resolved_tool_contract.json
@@ -0,0 +1,22 @@
+{
+ "driver": {
+ "env": {},
+ "exe": "python -m pbcommand.cli.example.dev_app --resolved-tool-contract "
+ },
+ "resolved_tool_contract": {
+ "input_files": [
+ "/tmp/tmpVgzvudfasta"
+ ],
+ "nproc": 1,
+ "options": {
+ "pbcommand.task_options.dev_read_length": 27
+ },
+ "output_files": [
+ "/tmp/file.fasta"
+ ],
+ "resources": [],
+ "is_distributed": false,
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "tool_contract_id": "pbcommand.tools.dev_app"
+ }
+}
diff --git a/tests/data/dev_example_tool_contract.json b/tests/data/dev_example_tool_contract.json
new file mode 100644
index 0000000..c320ee6
--- /dev/null
+++ b/tests/data/dev_example_tool_contract.json
@@ -0,0 +1,65 @@
+{
+ "version": "0.2.1",
+ "driver": {
+ "serialization": "json",
+ "exe": "python -m pbcommand.cli.example.dev_app --resolved-tool-contract ",
+ "env": {}
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_app",
+ "tool_contract": {
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "resource_types": [
+ "$tmpfile",
+ "$tmpfile",
+ "$tmpdir"
+ ],
+ "description": "Dev app for Testing that supports emitting tool contracts",
+ "schema_options": [
+ {
+ "pb_option": {
+ "default": 25,
+ "type": "integer",
+ "option_id": "pbcommand.task_options.dev_read_length",
+ "name": "Length filter",
+ "description": "Min Sequence Length filter"
+ },
+ "title": "JSON Schema for pbcommand.task_options.dev_read_length",
+ "required": [
+ "pbcommand.task_options.dev_read_length"
+ ],
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "type": "object",
+ "properties": {
+ "pbcommand.task_options.dev_read_length": {
+ "default": 25,
+ "type": "integer",
+ "description": "Min Sequence Length filter",
+ "title": "Length filter"
+ }
+ }
+ }
+ ],
+ "output_types": [
+ {
+ "title": "Filtered Fasta file",
+ "description": "Filtered Fasta file",
+ "default_name": "filter.fasta",
+ "id": "fasta_out",
+ "file_type_id": "PacBio.FileTypes.Fasta"
+ }
+ ],
+ "_comment": "Created by v0.2.14",
+ "name": "Example Dev App",
+ "input_types": [
+ {
+ "description": "PacBio Spec'ed fasta file",
+ "title": "Fasta File",
+ "id": "fasta_in",
+ "file_type_id": "PacBio.FileTypes.Fasta"
+ }
+ ],
+ "nproc": 1,
+ "is_distributed": false,
+ "tool_contract_id": "pbcommand.tasks.dev_app"
+ }
+}
diff --git a/tests/data/dev_gather_fasta_app_tool_contract.json b/tests/data/dev_gather_fasta_app_tool_contract.json
new file mode 100644
index 0000000..9095ece
--- /dev/null
+++ b/tests/data/dev_gather_fasta_app_tool_contract.json
@@ -0,0 +1,37 @@
+{
+ "version": "0.1.0",
+ "driver": {
+ "serialization": "json",
+ "exe": "python -m pbcommand.cli.examples.dev_scatter_fasta_app --resolved-tool-contract ",
+ "env": {}
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_gather_fasta",
+ "tool_contract": {
+ "task_type": "pbsmrtpipe.task_types.gathered",
+ "resource_types": [],
+ "description": "Gather a fasta resources in a Chunk.json file",
+ "schema_options": [],
+ "output_types": [
+ {
+ "title": "Chunk JSON",
+ "description": "Output Fasta",
+ "default_name": "gathered.fasta",
+ "id": "output",
+ "file_type_id": "PacBio.FileTypes.Fasta"
+ }
+ ],
+ "_comment": "Created by v0.2.14",
+ "name": "Fasta Chunk Gather",
+ "input_types": [
+ {
+ "description": "Chunked Fasta JSON Out",
+ "title": "Chunk JSON",
+ "id": "chunk_json",
+ "file_type_id": "PacBio.FileTypes.CHUNK"
+ }
+ ],
+ "nproc": 1,
+ "is_distributed": false,
+ "tool_contract_id": "pbcommand.tasks.dev_gather_fasta"
+ }
+}
diff --git a/tests/data/dev_scatter_fasta_app_tool_contract.json b/tests/data/dev_scatter_fasta_app_tool_contract.json
new file mode 100644
index 0000000..0b7b179
--- /dev/null
+++ b/tests/data/dev_scatter_fasta_app_tool_contract.json
@@ -0,0 +1,65 @@
+{
+ "version": "0.1.0",
+ "driver": {
+ "serialization": "json",
+ "exe": "python -m pbcommand.cli.examples.dev_scatter_fasta_app --resolved-tool-contract ",
+ "env": {}
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_scatter_fasta",
+ "tool_contract": {
+ "task_type": "pbsmrtpipe.task_types.scattered",
+ "resource_types": [],
+ "description": "Scatter a single fasta file to create chunk.json file",
+ "schema_options": [
+ {
+ "pb_option": {
+ "default": 10,
+ "type": "integer",
+ "option_id": "pbcommand.task_options.dev_scatter_fa_nchunks",
+ "name": "Number of chunks",
+ "description": "Suggested number of chunks. May be overridden by $max_nchunks"
+ },
+ "title": "JSON Schema for pbcommand.task_options.dev_scatter_fa_nchunks",
+ "required": [
+ "pbcommand.task_options.dev_scatter_fa_nchunks"
+ ],
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "type": "object",
+ "properties": {
+ "pbcommand.task_options.dev_scatter_fa_nchunks": {
+ "default": 10,
+ "type": "integer",
+ "description": "Suggested number of chunks. May be overridden by $max_nchunks",
+ "title": "Number of chunks"
+ }
+ }
+ }
+ ],
+ "output_types": [
+ {
+ "title": "Chunk JSON",
+ "description": "Scattered/Chunked Fasta Chunk.json",
+ "default_name": "fasta.chunks.json",
+ "id": "cjson",
+ "file_type_id": "PacBio.FileTypes.CHUNK"
+ }
+ ],
+ "_comment": "Created by v0.2.14",
+ "nchunks": "$max_nchunks",
+ "name": "Fasta Scatter",
+ "input_types": [
+ {
+ "description": "Fasta file to scatter",
+ "title": "Fasta In",
+ "id": "fasta_in",
+ "file_type_id": "PacBio.FileTypes.Fasta"
+ }
+ ],
+ "chunk_keys": [
+ "$chunk.fasta_id"
+ ],
+ "nproc": 1,
+ "is_distributed": false,
+ "tool_contract_id": "pbcommand.tasks.dev_scatter_fasta"
+ }
+}
diff --git a/tests/data/example-reports/filter_reports_adapters.json b/tests/data/example-reports/filter_reports_adapters.json
new file mode 100644
index 0000000..604525d
--- /dev/null
+++ b/tests/data/example-reports/filter_reports_adapters.json
@@ -0,0 +1,53 @@
+{
+ "tables": [],
+ "_version": "2.1",
+ "_changelist": 127707,
+ "attributes": [
+ {
+ "name": "Adapter Dimers",
+ "value": 0.0014104560030870359,
+ "id": "adapter.adapter_dimers"
+ },
+ {
+ "name": "Short Inserts",
+ "value": 0.000252817585458997,
+ "id": "adapter.short_inserts"
+ },
+ {
+ "name": "Medium Inserts",
+ "value": 0.0010911074740861974,
+ "id": "adapter.medium_inserts"
+ },
+ {
+ "name": "Adapter Dimers",
+ "value": 0.0033818058843422386,
+ "id": "adapter.hq_adapter_dimers"
+ },
+ {
+ "name": "Short Inserts",
+ "value": 0.00013527223537368956,
+ "id": "adapter.hq_short_inserts"
+ },
+ {
+ "name": "Medium Inserts",
+ "value": 0.002198173824822455,
+ "id": "adapter.hq_medium_inserts"
+ }
+ ],
+ "id": "adapter",
+ "plotGroups": [
+ {
+ "id": "adapter.observed_insert_length_distribution",
+ "thumbnail": "adapter_observed_insert_length_distribution_thumb.png",
+ "plots": [
+ {
+ "caption": null,
+ "image": "adapter_observed_insert_length_distribution.png",
+ "id": "adapter.observed_insert_length_distribution.plot1"
+ }
+ ],
+ "legend": null,
+ "title": "Observed Insert Length Distribution"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tests/data/example-reports/laa_report1.json b/tests/data/example-reports/laa_report1.json
new file mode 100644
index 0000000..c60032f
--- /dev/null
+++ b/tests/data/example-reports/laa_report1.json
@@ -0,0 +1,51 @@
+{
+ "_changelist": "UNKNOWN",
+ "_version": "0.2.14",
+ "attributes": [],
+ "dataset_uuids": [],
+ "id": "pblaa_tasks_laa",
+ "plotGroups": [],
+ "tables": [
+ {
+ "columns": [
+ {
+ "header": "BarcodeName",
+ "id": "pblaa_tasks_laa.pblaa_result_table.barcodename",
+ "values": ["Barcode1", "Barcode2"]
+ },
+ {
+ "header": "FastaName",
+ "id": "pblaa_tasks_laa.pblaa_result_table.fastaname",
+ "values": ["BarcodeFasta1", "BarcodeFasta2"]
+ },
+ {
+ "header": "CoarseCluster",
+ "id": "pblaa_tasks_laa.pblaa_result_table.coarsecluster",
+ "values": [1, 2]
+ },
+ {
+ "header": "Phase",
+ "id": "pblaa_tasks_laa.pblaa_result_table.phase",
+ "values": [1, 2]
+ },
+ {
+ "header": "TotalCoverage",
+ "id": "pblaa_tasks_laa.pblaa_result_table.totalcoverage",
+ "values": [1, 2]
+ },
+ {
+ "header": "SequenceLength",
+ "id": "pblaa_tasks_laa.pblaa_result_table.sequencelength",
+ "values": [1, 2]
+ },
+ {
+ "header": "PredictedAccuracy",
+ "id": "pblaa_tasks_laa.pblaa_result_table.predictedaccuracy",
+ "values": [1, 2]
+ }
+ ],
+ "id": "pblaa_tasks_laa.pblaa_result_table",
+ "title": "Pblaa Results By Barcode"
+ }
+ ]
+}
diff --git a/tests/data/example-reports/laa_report2.json b/tests/data/example-reports/laa_report2.json
new file mode 100644
index 0000000..f5ad3d2
--- /dev/null
+++ b/tests/data/example-reports/laa_report2.json
@@ -0,0 +1,51 @@
+{
+ "_changelist": "UNKNOWN",
+ "_version": "0.2.14",
+ "attributes": [],
+ "dataset_uuids": [],
+ "id": "pblaa_tasks_laa",
+ "plotGroups": [],
+ "tables": [
+ {
+ "columns": [
+ {
+ "header": "BarcodeName",
+ "id": "pblaa_tasks_laa.pblaa_result_table.barcodename",
+ "values": ["Barcode4", "Barcode3"]
+ },
+ {
+ "header": "FastaName",
+ "id": "pblaa_tasks_laa.pblaa_result_table.fastaname",
+ "values": ["BarcodeFasta4", "BarcodeFasta3"]
+ },
+ {
+ "header": "CoarseCluster",
+ "id": "pblaa_tasks_laa.pblaa_result_table.coarsecluster",
+ "values": [4, 3]
+ },
+ {
+ "header": "Phase",
+ "id": "pblaa_tasks_laa.pblaa_result_table.phase",
+ "values": [4, 3]
+ },
+ {
+ "header": "TotalCoverage",
+ "id": "pblaa_tasks_laa.pblaa_result_table.totalcoverage",
+ "values": [4, 3]
+ },
+ {
+ "header": "SequenceLength",
+ "id": "pblaa_tasks_laa.pblaa_result_table.sequencelength",
+ "values": [4, 3]
+ },
+ {
+ "header": "PredictedAccuracy",
+ "id": "pblaa_tasks_laa.pblaa_result_table.predictedaccuracy",
+ "values": [4, 3]
+ }
+ ],
+ "id": "pblaa_tasks_laa.pblaa_result_table",
+ "title": "Pblaa Results By Barcode"
+ }
+ ]
+}
diff --git a/tests/data/example-reports/overview.json b/tests/data/example-reports/overview.json
new file mode 100644
index 0000000..5fd9b14
--- /dev/null
+++ b/tests/data/example-reports/overview.json
@@ -0,0 +1,19 @@
+{
+ "tables": [],
+ "_version": "2.1",
+ "_changelist": 127707,
+ "attributes": [
+ {
+ "name": "SMRT Cells",
+ "value": 1,
+ "id": "overview.ncells"
+ },
+ {
+ "name": "Movies",
+ "value": 1,
+ "id": "overview.nmovies"
+ }
+ ],
+ "id": "overview",
+ "plotGroups": []
+}
\ No newline at end of file
diff --git a/tests/data/example.fasta b/tests/data/example.fasta
new file mode 100644
index 0000000..f54b14e
--- /dev/null
+++ b/tests/data/example.fasta
@@ -0,0 +1,2 @@
+>record_48
+AACTTTCGGACCCGTGGTAGGATTGTGGGAGAATACTGTTGATGTTTTCAC
\ No newline at end of file
diff --git a/tests/data/example.txt b/tests/data/example.txt
new file mode 100644
index 0000000..a5c46f7
--- /dev/null
+++ b/tests/data/example.txt
@@ -0,0 +1,10 @@
+This is a line
+This is a line
+This is a line
+This is a line
+This is a line
+This is a line
+This is a line
+This is a line
+This is a line
+This is the last line
\ No newline at end of file
diff --git a/tests/data/pbcommand.tasks.dev_fastq2fasta_tool_contract.json b/tests/data/pbcommand.tasks.dev_fastq2fasta_tool_contract.json
new file mode 100644
index 0000000..b3d8630
--- /dev/null
+++ b/tests/data/pbcommand.tasks.dev_fastq2fasta_tool_contract.json
@@ -0,0 +1,37 @@
+{
+ "driver": {
+ "env": {},
+ "exe": "python -m pbcommand.cli.examples.dev_quick_hello_world run-rtc ",
+ "serialization": "json"
+ },
+ "tool_contract": {
+ "_comment": "Created by v0.2.14",
+ "description": "Quick tool dev_fastq2fasta pbcommand.tasks.dev_fastq2fasta",
+ "input_types": [
+ {
+ "description": "description for PacBio.FileTypes.Fastq_0",
+ "file_type_id": "PacBio.FileTypes.Fastq",
+ "id": "Label PacBio.FileTypes.Fastq_0",
+ "title": "<FileType id=PacBio.FileTypes.Fastq name=file.fastq >"
+ }
+ ],
+ "is_distributed": true,
+ "name": "Tool dev_fastq2fasta",
+ "nproc": 1,
+ "output_types": [
+ {
+ "default_name": "file.fasta",
+ "description": "description for <FileType id=PacBio.FileTypes.Fasta name=file.fasta >",
+ "file_type_id": "PacBio.FileTypes.Fasta",
+ "id": "Label PacBio.FileTypes.Fasta_0",
+ "title": "<FileType id=PacBio.FileTypes.Fasta name=file.fasta >"
+ }
+ ],
+ "resource_types": [],
+ "schema_options": [],
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "tool_contract_id": "pbcommand.tasks.dev_fastq2fasta"
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_fastq2fasta",
+ "version": "0.1.0"
+}
\ No newline at end of file
diff --git a/tests/data/pbcommand.tasks.dev_qhello_world_tool_contract.json b/tests/data/pbcommand.tasks.dev_qhello_world_tool_contract.json
new file mode 100644
index 0000000..bdf026e
--- /dev/null
+++ b/tests/data/pbcommand.tasks.dev_qhello_world_tool_contract.json
@@ -0,0 +1,61 @@
+{
+ "driver": {
+ "env": {},
+ "exe": "python -m pbcommand.cli.examples.dev_quick_hello_world run-rtc ",
+ "serialization": "json"
+ },
+ "tool_contract": {
+ "_comment": "Created by v0.2.14",
+ "description": "Quick tool dev_qhello_world pbcommand.tasks.dev_qhello_world",
+ "input_types": [
+ {
+ "description": "description for PacBio.FileTypes.Fasta_0",
+ "file_type_id": "PacBio.FileTypes.Fasta",
+ "id": "Label PacBio.FileTypes.Fasta_0",
+ "title": "<FileType id=PacBio.FileTypes.Fasta name=file.fasta >"
+ }
+ ],
+ "is_distributed": true,
+ "name": "Tool dev_qhello_world",
+ "nproc": 1,
+ "output_types": [
+ {
+ "default_name": "file.fasta",
+ "description": "description for <FileType id=PacBio.FileTypes.Fasta name=file.fasta >",
+ "file_type_id": "PacBio.FileTypes.Fasta",
+ "id": "Label PacBio.FileTypes.Fasta_0",
+ "title": "<FileType id=PacBio.FileTypes.Fasta name=file.fasta >"
+ }
+ ],
+ "resource_types": [],
+ "schema_options": [
+ {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "pb_option": {
+ "default": 1234,
+ "description": "Option alpha description",
+ "name": "Option alpha",
+ "option_id": "pbcommand.task_options.alpha",
+ "type": "integer"
+ },
+ "properties": {
+ "pbcommand.task_options.alpha": {
+ "default": 1234,
+ "description": "Option alpha description",
+ "title": "Option alpha",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "pbcommand.task_options.alpha"
+ ],
+ "title": "JSON Schema for pbcommand.task_options.alpha",
+ "type": "object"
+ }
+ ],
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "tool_contract_id": "pbcommand.tasks.dev_qhello_world"
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_qhello_world",
+ "version": "0.2.1"
+}
\ No newline at end of file
diff --git a/tests/data/pbcommand.tasks.dev_txt_custom_outs_tool_contract.json b/tests/data/pbcommand.tasks.dev_txt_custom_outs_tool_contract.json
new file mode 100644
index 0000000..1e1148d
--- /dev/null
+++ b/tests/data/pbcommand.tasks.dev_txt_custom_outs_tool_contract.json
@@ -0,0 +1,44 @@
+{
+ "driver": {
+ "env": {},
+ "exe": "python -m pbcommand.cli.examples.dev_quick_hello_world run-rtc ",
+ "serialization": "json"
+ },
+ "tool_contract": {
+ "_comment": "Created by v0.2.14",
+ "description": "Quick tool dev_txt_custom_outs pbcommand.tasks.dev_txt_custom_outs",
+ "input_types": [
+ {
+ "description": "description for PacBio.FileTypes.txt_0",
+ "file_type_id": "PacBio.FileTypes.txt",
+ "id": "Label PacBio.FileTypes.txt_0",
+ "title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
+ }
+ ],
+ "is_distributed": true,
+ "name": "Tool dev_txt_custom_outs",
+ "nproc": 1,
+ "output_types": [
+ {
+ "default_name": "PacBio.FileTypes.txt_file_0.txt",
+ "description": "File <FileType id=PacBio.FileTypes.txt name=file.txt >",
+ "file_type_id": "PacBio.FileTypes.txt",
+ "id": "label_PacBio.FileTypes.txt",
+ "title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
+ },
+ {
+ "default_name": "PacBio.FileTypes.txt_file_1.txt",
+ "description": "File <FileType id=PacBio.FileTypes.txt name=file.txt >",
+ "file_type_id": "PacBio.FileTypes.txt",
+ "id": "label_PacBio.FileTypes.txt",
+ "title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
+ }
+ ],
+ "resource_types": [],
+ "schema_options": [],
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "tool_contract_id": "pbcommand.tasks.dev_txt_custom_outs"
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_txt_custom_outs",
+ "version": "0.1.0"
+}
\ No newline at end of file
diff --git a/tests/data/pbcommand.tasks.dev_txt_hello_tool_contract.json b/tests/data/pbcommand.tasks.dev_txt_hello_tool_contract.json
new file mode 100644
index 0000000..f704adb
--- /dev/null
+++ b/tests/data/pbcommand.tasks.dev_txt_hello_tool_contract.json
@@ -0,0 +1,44 @@
+{
+ "driver": {
+ "env": {},
+ "exe": "python -m pbcommand.cli.examples.dev_quick_hello_world run-rtc ",
+ "serialization": "json"
+ },
+ "tool_contract": {
+ "_comment": "Created by v0.2.14",
+ "description": "Quick tool dev_txt_hello pbcommand.tasks.dev_txt_hello",
+ "input_types": [
+ {
+ "description": "description for PacBio.FileTypes.txt_0",
+ "file_type_id": "PacBio.FileTypes.txt",
+ "id": "Label PacBio.FileTypes.txt_0",
+ "title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
+ }
+ ],
+ "is_distributed": false,
+ "name": "Tool dev_txt_hello",
+ "nproc": 3,
+ "output_types": [
+ {
+ "default_name": "file.txt",
+ "description": "description for <FileType id=PacBio.FileTypes.txt name=file.txt >",
+ "file_type_id": "PacBio.FileTypes.txt",
+ "id": "Label PacBio.FileTypes.txt_0",
+ "title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
+ },
+ {
+ "default_name": "file.txt",
+ "description": "description for <FileType id=PacBio.FileTypes.txt name=file.txt >",
+ "file_type_id": "PacBio.FileTypes.txt",
+ "id": "Label PacBio.FileTypes.txt_1",
+ "title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
+ }
+ ],
+ "resource_types": [],
+ "schema_options": [],
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "tool_contract_id": "pbcommand.tasks.dev_txt_hello"
+ },
+ "tool_contract_id": "pbcommand.tasks.dev_txt_hello",
+ "version": "0.1.0"
+}
\ No newline at end of file
diff --git a/tests/data/resolved_contract_01.json b/tests/data/resolved_contract_01.json
new file mode 100644
index 0000000..8f52fc9
--- /dev/null
+++ b/tests/data/resolved_contract_01.json
@@ -0,0 +1,23 @@
+{
+ "driver": {
+ "env": {},
+ "exe": "python -m pbcommand.cli.examples.dev_app --resolved-tool-contract "
+ },
+ "resolved_tool_contract": {
+ "_comment": "Created by pbcommand v0.2.3",
+ "input_files": [
+ "/Users/mkocher/gh_projects/pbcommand/tests/data/example.txt"
+ ],
+ "is_distributed": false,
+ "nproc": 1,
+ "options": {
+ "pbcommand.task_options.dev_max_nlines": 27
+ },
+ "output_files": [
+ "/var/folders/xk/_785bh115wj4m6_sy8g5wsx00000gn/T/tmp3fWNGvrtc-test/output.txt"
+ ],
+ "resources": [],
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "tool_contract_id": "pbcommand.tasks.dev_txt_app"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/resolved_tool_contract_dev_app.json b/tests/data/resolved_tool_contract_dev_app.json
new file mode 100644
index 0000000..8f52fc9
--- /dev/null
+++ b/tests/data/resolved_tool_contract_dev_app.json
@@ -0,0 +1,23 @@
+{
+ "driver": {
+ "env": {},
+ "exe": "python -m pbcommand.cli.examples.dev_app --resolved-tool-contract "
+ },
+ "resolved_tool_contract": {
+ "_comment": "Created by pbcommand v0.2.3",
+ "input_files": [
+ "/Users/mkocher/gh_projects/pbcommand/tests/data/example.txt"
+ ],
+ "is_distributed": false,
+ "nproc": 1,
+ "options": {
+ "pbcommand.task_options.dev_max_nlines": 27
+ },
+ "output_files": [
+ "/var/folders/xk/_785bh115wj4m6_sy8g5wsx00000gn/T/tmp3fWNGvrtc-test/output.txt"
+ ],
+ "resources": [],
+ "task_type": "pbsmrtpipe.task_types.standard",
+ "tool_contract_id": "pbcommand.tasks.dev_txt_app"
+ }
+}
\ No newline at end of file
diff --git a/tests/test_common_cmdline_core.py b/tests/test_common_cmdline_core.py
new file mode 100644
index 0000000..102958d
--- /dev/null
+++ b/tests/test_common_cmdline_core.py
@@ -0,0 +1,38 @@
+import unittest
+import logging
+import shlex
+
+import pbcommand.common_options as CU
+from pbcommand.cli.core import pacbio_args_runner
+from pbcommand.cli import get_default_argparser
+from pbcommand.utils import setup_log
+
+log = logging.getLogger(__name__)
+
+
+def args_runner(*args, **kwargs):
+ log.info("Running args: {a}".format(a=args))
+ return 0
+
+
+def _example_parser():
+ p = get_default_argparser("1.0.0", "Example Mock Parser")
+ p = CU.add_debug_option(p)
+ p.add_argument('example_file', type=str, help="No testing of existence")
+ return p
+
+
+def _example_main(cmdline_args):
+ """Example func for testing."""
+ p = _example_parser()
+ argv = shlex.split(cmdline_args)
+ rcode = pacbio_args_runner(argv, p, args_runner, log, setup_log)
+ return rcode
+
+
+class SimpleTest(unittest.TestCase):
+
+ def test_01(self):
+ args = "--debug /path/to/my_fake_file.txt"
+ rcode = _example_main(args)
+ self.assertEqual(rcode, 0)
diff --git a/tests/test_e2e_example_apps.py b/tests/test_e2e_example_apps.py
new file mode 100644
index 0000000..b9d624c
--- /dev/null
+++ b/tests/test_e2e_example_apps.py
@@ -0,0 +1,49 @@
+import logging
+
+from base_utils import get_data_file
+import pbcommand.testkit
+
+log = logging.getLogger(__name__)
+
+
+class TestRunDevApp(pbcommand.testkit.PbTestApp):
+ DRIVER_BASE = "python -m pbcommand.cli.examples.dev_app "
+ REQUIRES_PBCORE = True
+ INPUT_FILES = [get_data_file("example.fasta")]
+ TASK_OPTIONS = {"pbcommand.task_options.dev_read_length": 27}
+
+
+class TestTxtDevApp(pbcommand.testkit.PbTestApp):
+ DRIVER_BASE = "python -m pbcommand.cli.examples.dev_txt_app "
+ # XXX using default args, so the emit/resolve drivers are automatic
+ REQUIRES_PBCORE = False
+ INPUT_FILES = [get_data_file("example.txt")]
+ TASK_OPTIONS = {"pbcommand.task_options.dev_max_nlines": 27}
+ RESOLVED_TASK_OPTIONS = {"pbcommand.task_options.dev_max_nlines": 27}
+
+
+class TestQuickDevHelloWorld(pbcommand.testkit.PbTestApp):
+ """Runs dev_qhello_world """
+ DRIVER_EMIT = "python -m pbcommand.cli.examples.dev_quick_hello_world emit-tool-contract pbcommand.tasks.dev_qhello_world "
+ DRIVER_RESOLVE = "python -m pbcommand.cli.examples.dev_quick_hello_world run-rtc "
+
+ REQUIRES_PBCORE = False
+ INPUT_FILES = [get_data_file("example.txt")]
+
+
+class TestQuickTxt(pbcommand.testkit.PbTestApp):
+ """Runs dev_qhello_world """
+ DRIVER_EMIT = "python -m pbcommand.cli.examples.dev_quick_hello_world emit-tool-contract pbcommand.tasks.dev_txt_hello "
+ DRIVER_RESOLVE = "python -m pbcommand.cli.examples.dev_quick_hello_world run-rtc "
+
+ REQUIRES_PBCORE = False
+ INPUT_FILES = [get_data_file("example.txt")]
+
+
+class TestQuickCustomTxtCustomOuts(pbcommand.testkit.PbTestApp):
+ """Runs dev_qhello_world """
+ DRIVER_EMIT = "python -m pbcommand.cli.examples.dev_quick_hello_world emit-tool-contract pbcommand.tasks.dev_txt_custom_outs "
+ DRIVER_RESOLVE = "python -m pbcommand.cli.examples.dev_quick_hello_world run-rtc "
+
+ REQUIRES_PBCORE = False
+ INPUT_FILES = [get_data_file("example.txt")]
diff --git a/tests/test_engine_runner.py b/tests/test_engine_runner.py
new file mode 100644
index 0000000..13e3a14
--- /dev/null
+++ b/tests/test_engine_runner.py
@@ -0,0 +1,31 @@
+import logging
+import unittest
+
+from pbcommand.engine import run_cmd
+
+from .base_utils import get_temp_file, get_temp_dir
+
+log = logging.getLogger(__name__)
+
+
+class RunnerSmokeTest(unittest.TestCase):
+
+ def test_simple_run_cmd(self):
+ d = get_temp_dir("simple-cmd")
+ txt_in = get_temp_file(".txt", d)
+ txt_out = get_temp_file("*.txt", d)
+ exe = "cat {i} > {o}".format(i=txt_in, o=txt_out)
+
+ # this could all be bundled into a context manager
+ # with RunCommand('/path/stdout', '/path/to/stderr') as r:
+ # r.exe("echo 'exe1')
+ # r.exe("echo 'exe2')
+ # result = r.get_result() # close the file handles
+ stdout = get_temp_file("-stdout", d)
+ stderr = get_temp_file("-stderr", d)
+ with open(stdout, 'w') as fo:
+ with open(stderr, 'w') as fe:
+ result = run_cmd(exe, fo, fe)
+
+ emgs = "Command {e} failed".format(e=exe)
+ self.assertEquals(result.exit_code, 0, emgs)
diff --git a/tests/test_load_resolved_tool_contract.py b/tests/test_load_resolved_tool_contract.py
new file mode 100644
index 0000000..4453fd0
--- /dev/null
+++ b/tests/test_load_resolved_tool_contract.py
@@ -0,0 +1,58 @@
+import pprint
+import tempfile
+import unittest
+import logging
+import os.path
+
+from base_utils import get_data_file
+from pbcommand.resolver import resolve_tool_contract
+from pbcommand.pb_io.tool_contract_io import (load_resolved_tool_contract_from,
+ load_tool_contract_from)
+
+log = logging.getLogger(__name__)
+
+
+class _TestUtil(unittest.TestCase):
+ FILE_NAME = "resolved_contract_01"
+
+ def _to_object(self, path):
+ log.debug("Loading from {p}".format(p=path))
+ return load_tool_contract_from(path)
+
+ def test_sanity(self):
+ path = get_data_file(self.FILE_NAME)
+ tool_contract = self._to_object(path)
+ self.assertIsNotNone(tool_contract)
+
+
+class TestLoadResolvedContract(unittest.TestCase):
+
+ def test_01(self):
+ path = get_data_file("dev_example_resolved_tool_contract.json")
+ rtc = load_resolved_tool_contract_from(path)
+ log.info(rtc)
+ self.assertIsNotNone(rtc)
+
+
+class TestResolveContract(unittest.TestCase):
+
+ def test_01(self):
+ name = "dev_example_dev_txt_app_tool_contract.json"
+ p = get_data_file(name)
+ tc = load_tool_contract_from(p)
+ input_files = ["/tmp/file.txt"]
+ root_output_dir = "/tmp"
+ root_tmp_dir = root_output_dir
+ tmp_file = tempfile.NamedTemporaryFile().name
+ max_nproc = 2
+ tool_options = {}
+ rtc = resolve_tool_contract(tc, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options)
+ log.info(pprint.pformat(rtc))
+ self.assertIsNotNone(rtc)
+ self.assertEqual(os.path.basename(rtc.task.output_files[0]),
+ "output.txt")
+ # Validate Resolved Resource Types
+ log.debug("Resources {t}".format(t=rtc.task.resources))
+ self.assertEqual(len(rtc.task.tmpdir_resources), 1)
+ self.assertEqual(len(rtc.task.tmpfile_resources), 2)
+ #self.assertEqual(rtc.task.tmp_file, tmp_file)
diff --git a/tests/test_models_common.py b/tests/test_models_common.py
new file mode 100644
index 0000000..4507e8b
--- /dev/null
+++ b/tests/test_models_common.py
@@ -0,0 +1,18 @@
+import unittest
+import logging
+
+from pbcommand.models import FileTypes
+
+log = logging.getLogger(__name__)
+
+
+class TestLoadFileTypes(unittest.TestCase):
+
+ def test_file_types(self):
+ # smoke test for loading file types
+ ft = FileTypes.DS_ALIGN
+ self.assertIsNotNone(ft)
+
+ def test_is_valid(self):
+ ft = FileTypes.DS_ALIGN
+ self.assertTrue(FileTypes.is_valid_id(ft.file_type_id))
diff --git a/tests/test_models_report.py b/tests/test_models_report.py
new file mode 100644
index 0000000..58980df
--- /dev/null
+++ b/tests/test_models_report.py
@@ -0,0 +1,72 @@
+
+import unittest
+import json
+import logging
+
+from pbcommand.models.report import Report
+from pbcommand.pb_io import load_report_from_json
+
+_SERIALIZED_JSON_DIR = 'example-reports'
+
+from base_utils import get_data_file_from_subdir
+
+log = logging.getLogger(__name__)
+
+def _to_report(name):
+ file_name = get_data_file_from_subdir(_SERIALIZED_JSON_DIR, name)
+ log.info("loading json report from {f}".format(f=file_name))
+ r = load_report_from_json(file_name)
+ return r
+
+class TestReportModel(unittest.TestCase):
+
+ def test_from_simple_dict(self):
+ r = Report.from_simple_dict("pbcommand_test", {"n_reads": 50},
+ "pbcommand")
+ json_dict = json.loads(r.to_json())
+ self.assertEqual(json_dict['attributes'], [
+ {
+ "id": "pbcommand_test.pbcommand_n_reads",
+ "name": "n_reads",
+ "value": 50
+ },
+ ])
+
+ def test_merge(self):
+ r = Report.merge([
+ Report.from_simple_dict("pbcommand_test",
+ {"n_reads": 50, "n_zmws": 10},
+ "pbcommand"),
+ Report.from_simple_dict("pbcommand_test",
+ {"n_reads": 250, "n_zmws": 50},
+ "pbcommand")])
+ attr = {a.id: a.value for a in r.attributes}
+ self.assertEqual(attr['pbcommand_n_reads'], 300)
+ self.assertEqual(attr['pbcommand_n_zmws'], 60)
+
+ def test_merge_tables(self):
+ names = ['laa_report1.json', 'laa_report2.json']
+ r = Report.merge([_to_report(names[0]), _to_report(names[1])])
+ table = r.tables[0]
+ self.assertEqual(len(table.columns), 7)
+ self.assertEqual(
+ [col.header for col in table.columns],
+ ['BarcodeName', 'FastaName', 'CoarseCluster', 'Phase',
+ 'TotalCoverage', 'SequenceLength', 'PredictedAccuracy'])
+ for col in table.columns:
+ self.assertEqual(len(col.values), 4)
+ if col.header == 'BarcodeName':
+ self.assertEqual(
+ col.values,
+ ['Barcode1', 'Barcode2', 'Barcode4', 'Barcode3'])
+ elif col.header == 'FastaName':
+ self.assertEqual(
+ col.values,
+ ['BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4',
+ 'BarcodeFasta3'])
+ else:
+ self.assertEqual(col.values, [1, 2, 4, 3])
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
new file mode 100644
index 0000000..f954616
--- /dev/null
+++ b/tests/test_parsers.py
@@ -0,0 +1,95 @@
+import unittest
+
+from pbcommand.models import TaskTypes, FileTypes, get_pbparser
+
+
+class TestParsers(unittest.TestCase):
+ def test_input_output_files(self):
+ p = get_pbparser(
+ "pbcommand.tasks.test_parsers",
+ "0.1",
+ "Display Name",
+ "Tool Description ",
+ "pbcommand-driver-cmd",
+ is_distributed=False,
+ nproc=1,
+ resource_types=())
+
+ p.add_input_file_type(
+ file_type=FileTypes.FASTA,
+ file_id="fasta",
+ name="Fasta file",
+ description="Fasta file description")
+
+ p.add_input_file_type(FileTypes.JSON,
+ "json",
+ "JSON file",
+ "JSON file description")
+
+ p.add_output_file_type(
+ file_type=FileTypes.GFF,
+ file_id="gff",
+ name="GFF file",
+ description="GFF file description",
+ default_name="annotations.gff")
+ tc_contract = p.to_contract()
+ d = tc_contract.to_dict()
+ inputs = d['tool_contract']['input_types']
+ self.assertEqual(inputs, [
+ {
+ 'description': 'Fasta file description',
+ 'title': 'Fasta file',
+ 'id': 'fasta',
+ 'file_type_id': 'PacBio.FileTypes.Fasta'
+ },
+ {
+ 'description': 'JSON file description',
+ 'title': 'JSON file',
+ 'id': 'json',
+ 'file_type_id': 'PacBio.FileTypes.json'
+ }
+ ])
+ outputs = d['tool_contract']['output_types']
+ self.assertEqual(outputs, [
+ {
+ 'title': 'GFF file',
+ 'description': 'GFF file description',
+ 'default_name': 'annotations.gff',
+ 'id': 'gff',
+ 'file_type_id': 'PacBio.FileTypes.gff'
+ }
+ ])
+
+ def test_misc_parser_types(self):
+ p = get_pbparser(
+ "pbcommand.tasks.test_parsers",
+ "0.1.0",
+ "Tool Name",
+ "Tool Descripion",
+ "pbcommand-driver-exe ")
+
+ p.add_int("pbcommand.task_options.n", "n", default=0, name="N",
+ description="Integer option")
+
+ p.add_float("pbcommand.task_options.f", "f", default=0, name="F",
+ description="Float option")
+
+ # XXX note that the 'default' value is not actually what the option is
+ # set to by default - it simply signals that action=store_true
+ p.add_boolean("pbcommand.task_options.loud", "loud", default=False,
+ name="Verbose", description="Boolean option")
+
+ pa = p.arg_parser.parser.parse_args
+
+ opts = pa(["--n", "250", "--f", "1.2345", "--loud"])
+ self.assertEqual(opts.n, 250)
+ self.assertEqual(opts.f, 1.2345)
+ self.assertTrue(opts.loud)
+
+ opts2 = pa([])
+ self.assertFalse(opts2.loud)
+
+ # TODO we should add a lot more tests for parser behavior
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_pb_io_common.py b/tests/test_pb_io_common.py
new file mode 100644
index 0000000..734ee50
--- /dev/null
+++ b/tests/test_pb_io_common.py
@@ -0,0 +1,33 @@
+import unittest
+import logging
+from pbcommand.testkit.base_utils import get_temp_dir
+
+log = logging.getLogger(__name__)
+
+from pbcommand.models import PipelineChunk
+from pbcommand.pb_io import load_pipeline_chunks_from_json, write_pipeline_chunks
+
+from base_utils import get_temp_file
+
+
+class TestWriteChunk(unittest.TestCase):
+
+ def test_write_chunks(self):
+
+ def f(i):
+ return {"{c}movie_fofn_id".format(c=PipelineChunk.CHUNK_KEY_PREFIX): "/path/to_movie-{i}.fofn".format(i=i),
+ "{c}region_fofn_id".format(c=PipelineChunk.CHUNK_KEY_PREFIX): "/path/rgn_{i}.fofn".format(i=i)}
+
+ to_i = lambda i: "chunk-id-{i}".format(i=i)
+ to_p = lambda i: PipelineChunk(to_i(i), **f(i))
+
+ nchunks = 5
+ pipeline_chunks = [to_p(i) for i in xrange(nchunks)]
+ log.debug(pipeline_chunks)
+ tmp_dir = get_temp_dir("pipeline-chunks")
+ tmp_name = get_temp_file("_chunk.json", tmp_dir)
+
+ write_pipeline_chunks(pipeline_chunks, tmp_name, "Example chunk file")
+
+ pchunks = load_pipeline_chunks_from_json(tmp_name)
+ self.assertEquals(len(pchunks), nchunks)
\ No newline at end of file
diff --git a/tests/test_pb_io_report.py b/tests/test_pb_io_report.py
new file mode 100644
index 0000000..8583b08
--- /dev/null
+++ b/tests/test_pb_io_report.py
@@ -0,0 +1,54 @@
+import os
+import logging
+import unittest
+import json
+from pprint import pformat
+
+from pbcommand.pb_io import load_report_from_json
+
+_SERIALIZED_JSON_DIR = 'example-reports'
+
+from base_utils import get_data_file_from_subdir
+
+log = logging.getLogger(__name__)
+
+
+def _to_report(name):
+ file_name = get_data_file_from_subdir(_SERIALIZED_JSON_DIR, name)
+ log.info("loading json report from {f}".format(f=file_name))
+ r = load_report_from_json(file_name)
+ return r
+
+
+class TestSerializationOverviewReport(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ name = 'overview.json'
+ cls.report = _to_report(name)
+
+ def test_id(self):
+ self.assertEqual(self.report.id, "overview")
+
+ def test_attributes(self):
+ self.assertTrue(len(self.report.attributes), 2)
+
+
+class TestSerializationAdapterReport(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ file_name = 'filter_reports_adapters.json'
+ cls.report = _to_report(file_name)
+
+ def test_id(self):
+ self.assertEqual(self.report.id, 'adapter')
+
+ def test_attributes(self):
+ self.assertEqual(len(self.report.attributes), 6)
+
+ def test_plotgroups(self):
+ self.assertEqual(len(self.report.plotGroups), 1)
+
+ def test_plots(self):
+ self.assertEqual(len(self.report.plotGroups[0].plots), 1)
diff --git a/tests/test_pb_io_tool_contract.py b/tests/test_pb_io_tool_contract.py
new file mode 100644
index 0000000..7c50590
--- /dev/null
+++ b/tests/test_pb_io_tool_contract.py
@@ -0,0 +1,51 @@
+import os
+import unittest
+import logging
+
+from base_utils import get_data_file, HAS_PBCORE, pbcore_skip_msg, get_temp_file, get_temp_dir
+
+from pbcommand.models import (ToolContract,
+ ResolvedToolContract,
+ MalformedToolContractError)
+
+from pbcommand.pb_io.tool_contract_io import (load_tool_contract_from,
+ load_resolved_tool_contract_from,
+ write_resolved_tool_contract_avro)
+
+import pbcommand.cli.examples.dev_app
+
+log = logging.getLogger(__name__)
+
+
+class TestLoadToolContract(unittest.TestCase):
+
+ def test_01(self):
+ file_name = "dev_example_tool_contract.json"
+ path = get_data_file(file_name)
+ tc = load_tool_contract_from(path)
+ self.assertIsInstance(tc, ToolContract)
+
+
+class TestMalformedToolContract(unittest.TestCase):
+
+ def test_tc_no_inputs(self):
+ file_name = "dev_example_tool_contract.json"
+ path = get_data_file(file_name)
+ tc = load_tool_contract_from(path)
+ tc.task.input_file_types = []
+
+ def _run():
+ return tc.to_dict()
+
+ self.assertRaises(MalformedToolContractError, _run)
+
+
+class TestWriteResolvedToolContractAvro(unittest.TestCase):
+ def test_01(self):
+ file_name = "resolved_tool_contract_dev_app.json"
+ rtc = load_resolved_tool_contract_from(get_data_file(file_name))
+ self.assertIsInstance(rtc, ResolvedToolContract)
+
+ d = get_temp_dir("rtc-app")
+ f = get_temp_file("-resolved-tool-contract.avro", d)
+ write_resolved_tool_contract_avro(rtc, f)
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
new file mode 100644
index 0000000..f48e293
--- /dev/null
+++ b/tests/test_resolver.py
@@ -0,0 +1,46 @@
+import logging
+import unittest
+
+from base_utils import get_data_file, get_temp_dir
+from pbcommand.models import ResolvedToolContract, ResolvedScatteredToolContractTask, ResolvedGatherToolContractTask
+
+from pbcommand.pb_io import load_tool_contract_from
+from pbcommand.resolver import resolve_scatter_tool_contract, resolve_gather_tool_contract
+
+log = logging.getLogger(__name__)
+
+
+class TestScatterResolver(unittest.TestCase):
+ FILE_NAME = "dev_scatter_fasta_app_tool_contract.json"
+ MAX_NCHUNKS = 7
+ MAX_NPROC = 9
+ INPUT_FILES = ['/tmp/file.fasta']
+ CHUNK_KEYS = ('$chunk.fasta_id')
+
+ TOOL_OPTIONS = {}
+
+ def test_sanity(self):
+ d = get_temp_dir("resolved-tool-contract")
+ tc = load_tool_contract_from(get_data_file(self.FILE_NAME))
+ rtc = resolve_scatter_tool_contract(tc, self.INPUT_FILES, d, d, self.MAX_NPROC, self.TOOL_OPTIONS, self.MAX_NCHUNKS, self.CHUNK_KEYS)
+ self.assertIsInstance(rtc, ResolvedToolContract)
+ self.assertIsInstance(rtc.task, ResolvedScatteredToolContractTask)
+ self.assertEqual(rtc.task.max_nchunks, 7)
+
+
+class TestGatherResolver(unittest.TestCase):
+ FILE_NAME = "dev_gather_fasta_app_tool_contract.json"
+ MAX_NCHUNKS = 7
+ MAX_NPROC = 9
+ INPUT_FILES = ['/tmp/file.fasta.chunk.json']
+ CHUNK_KEY = '$chunk.filter_fasta_id'
+
+ TOOL_OPTIONS = {}
+
+ def test_sanity(self):
+ d = get_temp_dir("resolved-tool-contract")
+ tc = load_tool_contract_from(get_data_file(self.FILE_NAME))
+ rtc = resolve_gather_tool_contract(tc, self.INPUT_FILES, d, d, self.MAX_NPROC, self.TOOL_OPTIONS, self.CHUNK_KEY)
+ self.assertIsInstance(rtc, ResolvedToolContract)
+ self.assertIsInstance(rtc.task, ResolvedGatherToolContractTask)
+ self.assertEqual(rtc.task.chunk_key, self.CHUNK_KEY)
diff --git a/tests/test_schema_validation.py b/tests/test_schema_validation.py
new file mode 100644
index 0000000..e24ffa1
--- /dev/null
+++ b/tests/test_schema_validation.py
@@ -0,0 +1,59 @@
+import json
+import os
+import logging
+import unittest
+from pbcommand.models import ToolContract, ResolvedToolContract
+
+from pbcommand.pb_io import (load_tool_contract_from,
+ load_resolved_tool_contract_from)
+from pbcommand.schemas import validate_rtc, validate_tc
+
+from base_utils import DATA_DIR
+
+log = logging.getLogger(__name__)
+
+
+def _to_json(path):
+ with open(path, 'r') as f:
+ d = json.loads(f.read())
+ return d
+
+
+def _filter_rtc(path):
+ return path.endswith('resolved_tool_contract.json')
+
+
+def _filter_tc(path):
+ return path.endswith('tool_contract.json') and not path.endswith('resolved_tool_contract.json')
+
+
+def _get_all_from(root_dir, filter_func):
+ for path in os.listdir(root_dir):
+ if filter_func(path):
+ yield os.path.join(root_dir, path)
+
+
+def _to_assertion(path, schema_validate_func):
+ def test_is_validate(self):
+ d = _to_json(path)
+ log.debug(d)
+ is_valid = schema_validate_func(d)
+ log.info(" is-valid? {i} {p}".format(i=is_valid, p=path))
+ self.assertTrue(is_valid, "{p} is not valid with the avro schema".format(p=path))
+ return test_is_validate
+
+
+class ValidateResolvedToolContracts(unittest.TestCase):
+ def test_validate_resolved_tool_contracts(self):
+ for path in _get_all_from(DATA_DIR, _filter_rtc):
+ f = _to_assertion(path, validate_rtc)
+ f(self)
+ self.assertIsInstance(load_resolved_tool_contract_from(path), ResolvedToolContract)
+
+
+class ValidateToolContracts(unittest.TestCase):
+ def test_validate_tool_contracts(self):
+ for path in _get_all_from(DATA_DIR, _filter_tc):
+ f = _to_assertion(path, validate_tc)
+ f(self)
+ self.assertIsInstance(load_tool_contract_from(path), ToolContract)
\ No newline at end of file
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..b70ef4f
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,53 @@
+import functools
+import unittest
+from pbcommand.utils import Singleton, compose
+
+
+class TestSingleton(unittest.TestCase):
+
+ def test_basic(self):
+ class Lithium(object):
+ __metaclass__ = Singleton
+
+ def __init__(self):
+ self.name = 'Lithium'
+ self.number = 3
+
+ a = Lithium()
+ b = Lithium()
+ self.assertEqual(id(a), id(b))
+
+
+class TestCompose(unittest.TestCase):
+ def test_simple(self):
+ f = lambda x: x * 2
+ g = lambda y: y + 2
+
+ h = compose(f, g)
+ value = h(7)
+ self.assertEquals(value, 18)
+
+ def test_no_args_list(self):
+
+ def _f():
+ return compose()
+
+ self.assertRaises(ValueError, _f)
+
+ def test_empty_list(self):
+ def _f():
+ return compose([])
+
+ self.assertRaises(TypeError, _f)
+
+ def test_partial(self):
+
+ def add(a, b):
+ return a + b
+
+ add_five = functools.partial(add, 5)
+ add_two = functools.partial(add, 2)
+
+ f = compose(add_five, add_two)
+ value = f(5)
+ self.assertEquals(value, 12)
\ No newline at end of file
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..9f98f34
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,12 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist = py27
+
+[testenv]
+commands = nosetests -s --verbose --logging-config log_nose.cfg
+deps =
+ nose
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pbcommand.git
More information about the debian-med-commit
mailing list