[med-svn] [python-pbcommand] 01/10: Imported Upstream version 0.3.22
Afif Elghraoui
afif at moszumanska.debian.org
Sun Apr 24 03:11:50 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository python-pbcommand.
commit 3ad18d1d06048b30953be0f781e39395107337aa
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Sat Apr 23 19:27:17 2016 -0700
Imported Upstream version 0.3.22
---
Makefile | 4 +-
README.md | 10 +-
REQUIREMENTS.txt | 2 +
REQUIREMENTS_TEST.txt | 10 +-
docs/source/conf.py | 9 +-
docs/source/index.rst | 9 +-
docs/source/services.rst | 9 +
pbcommand/__init__.py | 2 +-
pbcommand/cli/__init__.py | 4 +-
pbcommand/cli/core.py | 197 ++++---
pbcommand/cli/examples/dev_app.py | 4 +-
pbcommand/cli/examples/dev_gather_fasta_app.py | 2 +-
pbcommand/cli/examples/dev_mixed_app.py | 4 +-
pbcommand/cli/examples/dev_quick_hello_world.py | 13 +-
pbcommand/cli/examples/dev_scatter_fasta_app.py | 2 +-
pbcommand/cli/examples/dev_txt_app.py | 4 +-
pbcommand/cli/examples/template_simple.py | 49 ++
pbcommand/cli/quick.py | 119 +++--
pbcommand/cli/utils.py | 139 +++++
pbcommand/common_options.py | 90 +++-
pbcommand/interactive_resolver.py | 2 +-
pbcommand/models/__init__.py | 4 +-
pbcommand/models/common.py | 102 +++-
pbcommand/models/parser.py | 39 +-
pbcommand/models/report.py | 49 +-
pbcommand/models/tool_contract.py | 24 +-
pbcommand/pb_io/report.py | 35 +-
pbcommand/pb_io/tool_contract_io.py | 21 +-
pbcommand/resolver.py | 34 +-
pbcommand/schemas/__init__.py | 2 +-
pbcommand/schemas/resolved_tool_contract.avsc | 6 +-
pbcommand/services/__init__.py | 4 +
pbcommand/services/cli.py | 505 ++++++++++++++++++
pbcommand/services/models.py | 163 ++++++
pbcommand/services/service_access_layer.py | 585 +++++++++++++++++++++
pbcommand/services/utils.py | 114 ++++
pbcommand/testkit/core.py | 25 +-
pbcommand/utils.py | 345 +++++++++++-
pbcommand/validators.py | 76 +--
setup.py | 3 +-
.../dev_example_dev_txt_app_tool_contract.json | 2 +-
tests/data/dev_example_resolved_tool_contract.json | 3 +-
tests/data/dev_example_tool_contract.json | 2 +-
tests/data/dev_gather_fasta_app_tool_contract.json | 2 +-
.../data/dev_scatter_fasta_app_tool_contract.json | 2 +-
tests/data/example-reports/overview.json | 4 +-
...ommand.tasks.dev_fastq2fasta_tool_contract.json | 55 +-
...mmand.tasks.dev_qhello_world_tool_contract.json | 4 +-
...nd.tasks.dev_txt_custom_outs_tool_contract.json | 6 +-
...bcommand.tasks.dev_txt_hello_tool_contract.json | 6 +-
tests/data/resolved_contract_01.json | 5 +-
tests/data/resolved_tool_contract_dev_app.json | 5 +-
tests/test_common_cmdline_core.py | 2 +-
tests/test_e2e_example_apps.py | 4 +
tests/test_load_resolved_tool_contract.py | 2 +-
tests/test_models_report.py | 206 +++++++-
tests/test_models_report_attribute.py | 53 ++
tests/test_models_report_column.py | 76 +++
tests/test_models_report_plot.py | 43 ++
tests/test_models_report_plotgroup.py | 58 ++
tests/test_models_report_table.py | 121 +++++
tests/test_parsers.py | 20 +-
tests/test_pb_io_report.py | 3 +
tests/test_resolver.py | 6 +-
tests/test_utils.py | 44 +-
tox.ini | 2 +
66 files changed, 3189 insertions(+), 367 deletions(-)
diff --git a/Makefile b/Makefile
index 3acdc74..fc0100d 100644
--- a/Makefile
+++ b/Makefile
@@ -33,10 +33,10 @@ build-tool-contracts:
python -m pbcommand.cli.examples.dev_quick_hello_world emit-tool-contracts -o ./tests/data
run-pep8:
- find pbcommand -name "*.py" -exec pep8 --ignore=E501,E265,E731,E402 {} \;
+ find pbcommand -name "*.py" -exec pep8 --ignore=E501,E265,E731,E402,W292 {} \;
run-auto-pep8:
- find pbcommand -name "*.py" -exec autopep8 -i --ignore=E501,E265,E731,E402 {} \;
+ find pbcommand -name "*.py" -exec autopep8 -i --ignore=E501,E265,E731,E402,W292 {} \;
build-java-classes:
avro-tools compile schema pbcommand/schemas java-classes/
diff --git a/README.md b/README.md
index 2dca82e..6da4870 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,15 @@
pbcommand High Level Overview
=============================
-[Full Docs](http://pbcommand.readthedocs.org/en/latest/)
+co-owners:
+
+[mpkocher](https://github.com/mpkocher)
+
+[natechols](https://github.com/natechols)
-Note the APIs are still in flux. WIP.
+PacBio Officially Supported Library. Note the APIs are still in flux and not yet 1.0.0.
+
+[Full Docs](http://pbcommand.readthedocs.org/en/latest/)
[![Circle CI](https://circleci.com/gh/PacificBiosciences/pbcommand.svg?style=svg)](https://circleci.com/gh/PacificBiosciences/pbcommand)
diff --git a/REQUIREMENTS.txt b/REQUIREMENTS.txt
index ad11173..5368e16 100644
--- a/REQUIREMENTS.txt
+++ b/REQUIREMENTS.txt
@@ -1,3 +1,5 @@
xmlbuilder
jsonschema
avro
+requests
+iso8601
\ No newline at end of file
diff --git a/REQUIREMENTS_TEST.txt b/REQUIREMENTS_TEST.txt
index 7f3ccb2..55da931 100644
--- a/REQUIREMENTS_TEST.txt
+++ b/REQUIREMENTS_TEST.txt
@@ -1,3 +1,11 @@
nose
tox
-numpy # this is required for pbreports serialization
\ No newline at end of file
+numpy # this is required for pbreports serialization
+# Putting these here for RTD
+sphinx-argparse
+sphinx-bootstrap-theme
+xmlbuilder
+jsonschema
+avro
+requests
+iso8601
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 621fbdc..57cb70b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,6 +15,7 @@
import sys
import os
import shlex
+import sphinx_bootstrap_theme
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@@ -36,6 +37,9 @@ extensions = [
'sphinx.ext.viewcode',
]
+# For argparse
+extensions += ['sphinxarg.ext']
+
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@@ -113,7 +117,10 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-html_theme = 'alabaster'
+# html_theme = 'alabaster'
+# Activate the my theme.
+html_theme = 'bootstrap'
+html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 029e6bc..f8de83e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,8 +3,8 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
-Welcome to pbcommand's documentation!
-=====================================
+pbcommand
+=========
Contents:
@@ -12,11 +12,10 @@ Contents:
:maxdepth: 2
commandline_interface
+ services
-
-Indices and tables
-==================
+Indices and tables:
* :ref:`genindex`
* :ref:`modindex`
diff --git a/docs/source/services.rst b/docs/source/services.rst
new file mode 100644
index 0000000..7032765
--- /dev/null
+++ b/docs/source/services.rst
@@ -0,0 +1,9 @@
+
+SMRT Service commandline interface
+==================================
+
+
+.. argparse::
+ :module: pbcommand.services.cli
+ :func: get_parser
+ :prog: pbservice
\ No newline at end of file
diff --git a/pbcommand/__init__.py b/pbcommand/__init__.py
index e61fceb..73486c9 100644
--- a/pbcommand/__init__.py
+++ b/pbcommand/__init__.py
@@ -1,4 +1,4 @@
-VERSION = (0, 2, 17)
+VERSION = (0, 3, 22)
def get_version():
diff --git a/pbcommand/cli/__init__.py b/pbcommand/cli/__init__.py
index 2795c2d..f56bc6b 100644
--- a/pbcommand/cli/__init__.py
+++ b/pbcommand/cli/__init__.py
@@ -1,6 +1,6 @@
from .core import (pacbio_args_runner,
pacbio_args_or_contract_runner,
pbparser_runner,
- get_default_argparser)
+ get_default_argparser, get_default_argparser_with_base_opts)
-from .quick import (registry_runner, registry_builder)
\ No newline at end of file
+from .quick import (registry_runner, registry_builder, QuickOpt)
diff --git a/pbcommand/cli/core.py b/pbcommand/cli/core.py
index 7a05bc1..d6c5a24 100644
--- a/pbcommand/cli/core.py
+++ b/pbcommand/cli/core.py
@@ -19,30 +19,73 @@ import json
import logging
import time
import traceback
+import shutil
+import os
import sys
+import pbcommand
+
+from pbcommand.models import PbParser, ResourceTypes
from pbcommand.common_options import (RESOLVED_TOOL_CONTRACT_OPTION,
- EMIT_TOOL_CONTRACT_OPTION)
-from pbcommand.models import PbParser
+ EMIT_TOOL_CONTRACT_OPTION,
+ add_resolved_tool_contract_option,
+ add_base_options)
+from pbcommand.utils import get_parsed_args_log_level
from pbcommand.pb_io.tool_contract_io import load_resolved_tool_contract_from
+def _add_version(p, version):
+ p.version = version
+ p.add_argument('--version',
+ action="version",
+ help="show program's version number and exit")
+ return p
+
+
def get_default_argparser(version, description):
"""
- Everyone MUST use this to create an instance on a argparser python parser.
+ Everyone should use this to create an instance on a argparser python parser.
+
- :param version:
- :param description:
+ *This should be replaced updated to have the required base options*
+
+ :param version: Version of your tool
+ :param description: Description of your tool
:return:
:rtype: ArgumentParser
"""
- p = argparse.ArgumentParser(version=version,
- description=description,
+ p = argparse.ArgumentParser(description=description,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- return p
+ # Explicitly adding here to have only --version (not -v)
+ return _add_version(p, version)
+
+
+def get_default_argparser_with_base_opts(version, description, default_level="INFO"):
+ """Return a parser with the default log related options
+
+ If you don't want the default log behavior to go to stdout, then set
+ the default log level to be "ERROR". This will essentially suppress all
+ output to stdout.
+ Default behavior will only emit to stderr. This is essentially a '--quiet'
+ default mode.
+
+ my-tool --my-opt=1234 file_in.txt
+
+ To override the default behavior and add a chatty-er stdout
+
+ my-tool --my-opt=1234 --log-level=INFO file_in.txt
+
+ Or write the console output to write the log file to an explict file and
+ leave the stdout untouched.
+
+ my-tool --my-opt=1234 --log-level=DEBUG --log-file=file.log file_in.txt
-def _pacbio_main_runner(alog, setup_log_func, func, *args, **kwargs):
+ """
+ return add_base_options(get_default_argparser(version, description), default_level=default_level)
+
+
+def _pacbio_main_runner(alog, setup_log_func, exe_main_func, *args, **kwargs):
"""
Runs a general func and logs results. The return type is expected to be an (int) return code.
@@ -52,6 +95,8 @@ def _pacbio_main_runner(alog, setup_log_func, func, *args, **kwargs):
:param args: parsed args from parser
+ :param setup_log_func: F(alog, level=value, file_name=value, formatter=value) or None
+
:return: Exit code of callable func
:rtype: int
"""
@@ -59,26 +104,43 @@ def _pacbio_main_runner(alog, setup_log_func, func, *args, **kwargs):
started_at = time.time()
pargs = args[0]
+ # default logging level
level = logging.INFO
- # Assuming that the log_level might not be an added option.
- if hasattr(pargs, 'log_level'):
- level = logging.getLevelName(pargs.log_level)
- log_options = dict(level=level)
+ if 'level' in kwargs:
+ level = kwargs.pop('level')
+ else:
+ level = get_parsed_args_log_level(pargs)
+
+ # None will default to stdout
+ log_file = getattr(pargs, 'log_file', None)
+
+ # Currently, only support to stdout. More customization would require
+ # more required commandline options in base parser (e.g., --log-file, --log-formatter)
+ log_options = dict(level=level, file_name=log_file)
+
# The Setup log func must adhere to the pbcommand.utils.setup_log func
# signature
- setup_log_func(alog, **log_options)
+ # FIXME. This should use the more concrete F(file_name_or_name, level, formatter)
+ # signature of setup_logger
+ if setup_log_func is not None and alog is not None:
+ setup_log_func(alog, **log_options)
+ alog.info("Using pbcommand v{v}".format(v=pbcommand.get_version()))
+ alog.info("completed setting up logger with {f}".format(f=setup_log_func))
+ alog.info("log opts {d}".format(d=log_options))
try:
# the code in func should catch any exceptions. The try/catch
# here is a fail safe to make sure the program doesn't fail
# and the makes sure the exit code is logged.
- return_code = func(*args, **kwargs)
+ return_code = exe_main_func(*args, **kwargs)
run_time = time.time() - started_at
except Exception as e:
run_time = time.time() - started_at
- alog.error(e, exc_info=True)
- traceback.print_exc(sys.stderr)
+ if alog is not None:
+ alog.error(e, exc_info=True)
+ else:
+ traceback.print_exc(sys.stderr)
# We should have a standard map of exit codes to Int
if isinstance(e, IOError):
@@ -87,60 +149,33 @@ def _pacbio_main_runner(alog, setup_log_func, func, *args, **kwargs):
return_code = 2
_d = dict(r=return_code, s=run_time)
- alog.info("exiting with return code {r} in {s:.2f} sec.".format(**_d))
+ if alog is not None:
+ alog.info("exiting with return code {r} in {s:.2f} sec.".format(**_d))
return return_code
-def _get_resolved_tool_contract_from_argv(argv):
- """
- Extract the resolved tool contract path from the raw argv
-
- There are two cases
-
- --resolved-tool-contract-path=/path/to/tool_contract.json
- --resolved-tool-contract-path /path/to/tool_contract.json
-
- :param argv:
- :rtype: str
- :raises: ValueError
- :return: Path to Manifest
- """
- # this is a lackluster implementation. FIXME.
-
- m_str = RESOLVED_TOOL_CONTRACT_OPTION
-
- error = ValueError("Unable to extract resolved tool contract from commandline args {a}. Expecting {m}=/path/to/file.json".format(a=argv, m=m_str))
- tool_contract_path = None
- nargv = len(argv)
-
- # Provided the --resolved-tool-contract /path/to/tool_contract_path.json
- if m_str in argv:
- for i, a in enumerate(argv):
- # print i, nargv, a
- if a.startswith(m_str):
- if (i + 1) <= nargv:
- tool_contract_path = argv[i + 1]
- break
- else:
- raise error
+def pacbio_args_runner(argv, parser, args_runner_func, alog, setup_log_func):
+ # For tools that haven't yet implemented the ToolContract API
+ args = parser.parse_args(argv)
+ return _pacbio_main_runner(alog, setup_log_func, args_runner_func, args)
- # Provided the --resolved-tool-contract=/path/to/tool_contract_path.json
- m_str_eq = m_str + "="
- for i in argv:
- if i.startswith(m_str_eq):
- tool_contract_path = i.split(m_str_eq)[-1]
- break
- if tool_contract_path is None:
- raise error
+class TemporaryResourcesManager(object):
+ """Context manager for creating and destroying temporary resources"""
- return tool_contract_path
+ def __init__(self, rtc):
+ self.resolved_tool_contract = rtc
+ def __enter__(self):
+ for resource in self.resolved_tool_contract.task.resources:
+ if resource.type_id == ResourceTypes.TMP_DIR:
+ os.makedirs(resource.path)
-def pacbio_args_runner(argv, parser, args_runner_func, alog, setup_log_func):
- # For tools that haven't yet implemented the ToolContract API
- args = parser.parse_args(argv)
- return _pacbio_main_runner(alog, setup_log_func, args_runner_func, args)
+ def __exit__(self, type, value, traceback):
+ for resource in self.resolved_tool_contract.task.resources:
+ if resource.type_id == ResourceTypes.TMP_DIR:
+ if os.path.exists(resource.path):
+ shutil.rmtree(resource.path)
def pacbio_args_or_contract_runner(argv,
@@ -165,18 +200,34 @@ def pacbio_args_or_contract_runner(argv,
:return: int return code
:rtype: int
"""
+ def _log_not_none(msg):
+ if alog is not None:
+ alog.info(msg)
- # circumvent the argparse parsing by inspecting the raw argv, then manually
- # parse out the resolved_tool_contract path. Not awesome, but the only way to skip the
- # parser.parse_args(args) machinery
+ # circumvent the argparse parsing by inspecting the raw argv, then create
+ # a temporary parser with limited arguments to process the special case of
+ # --resolved-cool-contract (while still respecting verbosity flags).
if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv):
- print "Attempting to Load resolved tool contract from {a}".format(a=argv)
- # FIXME need to catch the exception if raised here before the _pacbio_main_runner is called
- resolved_tool_contract_path = _get_resolved_tool_contract_from_argv(argv)
- resolved_tool_contract = load_resolved_tool_contract_from(resolved_tool_contract_path)
- r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract)
- # alog.info("Completed running resolved contract. {c}".format(c=resolved_tool_contract))
- return r
+ p_tmp = get_default_argparser(version=parser.version,
+ description=parser.description)
+ add_resolved_tool_contract_option(add_base_options(p_tmp,
+ default_level="NOTSET"))
+ args_tmp = p_tmp.parse_args(argv)
+ resolved_tool_contract = load_resolved_tool_contract_from(
+ args_tmp.resolved_tool_contract)
+ _log_not_none("Successfully loaded resolved tool contract from {a}".format(a=argv))
+ # XXX if one of the logging flags was specified, that takes precedence,
+ # otherwise use the log level in the resolved tool contract. note that
+ # this takes advantage of the fact that argparse allows us to use
+ # NOTSET as the default level even though it's not one of the choices.
+ log_level = get_parsed_args_log_level(args_tmp,
+ default_level=logging.NOTSET)
+ if log_level == logging.NOTSET:
+ log_level = resolved_tool_contract.task.log_level
+ with TemporaryResourcesManager(resolved_tool_contract) as tmp_mgr:
+ r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract, level=log_level)
+ _log_not_none("Completed running resolved contract. {c}".format(c=resolved_tool_contract))
+ return r
else:
# tool was called with the standard commandline invocation
return pacbio_args_runner(argv, parser, args_runner_func, alog,
diff --git a/pbcommand/cli/examples/dev_app.py b/pbcommand/cli/examples/dev_app.py
index 06e97f2..56d3c63 100755
--- a/pbcommand/cli/examples/dev_app.py
+++ b/pbcommand/cli/examples/dev_app.py
@@ -5,7 +5,7 @@ import sys
from pbcommand.utils import setup_log
from pbcommand.cli import pbparser_runner
-from pbcommand.models import TaskTypes, FileTypes, get_pbparser, ResourceTypes
+from pbcommand.models import FileTypes, get_pbparser, ResourceTypes
# This has the same functionality as the dev_simple_app
@@ -31,7 +31,7 @@ def add_args_and_options(p):
# FileType, label, name, description
p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta File", "PacBio Spec'ed fasta file")
# File Type, label, name, description, default file name
- p.add_output_file_type(FileTypes.FASTA, "fasta_out", "Filtered Fasta file", "Filtered Fasta file", "filter.fasta")
+ p.add_output_file_type(FileTypes.FASTA, "fasta_out", "Filtered Fasta file", "Filtered Fasta file", "filter")
# Option id, label, default value, name, description
# for the argparse, the read-length will be translated to --read-length and (accessible via args.read_length)
p.add_int("pbcommand.task_options.dev_read_length", "read-length", 25, "Length filter", "Min Sequence Length filter")
diff --git a/pbcommand/cli/examples/dev_gather_fasta_app.py b/pbcommand/cli/examples/dev_gather_fasta_app.py
index 43b2f8c..5c15a8b 100644
--- a/pbcommand/cli/examples/dev_gather_fasta_app.py
+++ b/pbcommand/cli/examples/dev_gather_fasta_app.py
@@ -89,7 +89,7 @@ def get_parser():
p = get_gather_pbparser(TOOL_ID, __version__, "Fasta Chunk Gather",
desc, driver, is_distributed=False)
p.add_input_file_type(FileTypes.CHUNK, "chunk_json", "Chunk JSON", "Chunked Fasta JSON Out")
- p.add_output_file_type(FileTypes.FASTA, "output", "Chunk JSON", "Output Fasta", "gathered.fasta")
+ p.add_output_file_type(FileTypes.FASTA, "output", "Chunk JSON", "Output Fasta", "gathered")
return p
diff --git a/pbcommand/cli/examples/dev_mixed_app.py b/pbcommand/cli/examples/dev_mixed_app.py
index fbfcc4d..5078d03 100644
--- a/pbcommand/cli/examples/dev_mixed_app.py
+++ b/pbcommand/cli/examples/dev_mixed_app.py
@@ -16,7 +16,7 @@ options = alpha, beta
import sys
import logging
-from pbcommand.models import TaskTypes, FileTypes, get_pbparser
+from pbcommand.models import FileTypes, get_pbparser
from pbcommand.cli import pbparser_runner
from pbcommand.utils import setup_log
@@ -51,7 +51,7 @@ def add_rtc_options(p):
:return:
"""
p.add_input_file_type(FileTypes.CSV, "csv", "Input CSV", "Input csv description")
- p.add_output_file_type(FileTypes.REPORT, "rpt", "Output Report", "Output PacBio Report JSON", "example.report.json")
+ p.add_output_file_type(FileTypes.REPORT, "rpt", "Output Report", "Output PacBio Report JSON", "example.report")
p.add_int("pbcommand.task_options.alpha", "alpha", 25, "Alpha", "Alpha description")
return p
diff --git a/pbcommand/cli/examples/dev_quick_hello_world.py b/pbcommand/cli/examples/dev_quick_hello_world.py
index 7ffa50b..e0bf8a5 100644
--- a/pbcommand/cli/examples/dev_quick_hello_world.py
+++ b/pbcommand/cli/examples/dev_quick_hello_world.py
@@ -2,7 +2,7 @@ import sys
import logging
from pbcommand.models import FileTypes, OutputFileType
-from pbcommand.cli import registry_builder, registry_runner
+from pbcommand.cli import registry_builder, registry_runner, QuickOpt
log = logging.getLogger(__name__)
@@ -22,10 +22,12 @@ def _example_main(input_files, output_files, **kwargs):
@registry("dev_qhello_world", "0.2.1", FileTypes.FASTA, FileTypes.FASTA, nproc=1, options=dict(alpha=1234))
def run_rtc(rtc):
+ log.debug("Dev Quick Hello World Example. Fasta -> Fasta with option alpha=1234")
return _example_main(rtc.task.input_files[0], rtc.task.output_files[0], nproc=rtc.task.nproc)
- at registry("dev_fastq2fasta", "0.1.0", FileTypes.FASTQ, FileTypes.FASTA)
+ at registry("dev_fastq2fasta", "0.1.0", FileTypes.FASTQ, FileTypes.FASTA,
+ options=dict(beta=QuickOpt(1234.0, "Beta Name", "Beta Description"), gamma=True))
def run_rtc(rtc):
return _example_main(rtc.task.input_files[0], rtc.task.output_files[0])
@@ -36,7 +38,7 @@ def run_rtc(rtc):
def _to_output(i, file_type):
- default_name = "_".join([file_type.file_type_id, file_type.base_name + "_" + str(i) + "." + file_type.ext])
+ default_name = "_".join([file_type.file_type_id, file_type.base_name + "_" + str(i)])
label = "label_" + file_type.file_type_id
desc = "File {f}".format(f=file_type)
return OutputFileType(file_type.file_type_id, label, repr(file_type), desc, default_name)
@@ -56,4 +58,7 @@ def run_rtc(rtc):
if __name__ == '__main__':
- sys.exit(registry_runner(registry, sys.argv[1:]))
+ default_log_level = logging.DEBUG
+ sys.exit(registry_runner(registry,
+ sys.argv[1:],
+ default_log_level=default_log_level))
diff --git a/pbcommand/cli/examples/dev_scatter_fasta_app.py b/pbcommand/cli/examples/dev_scatter_fasta_app.py
index 0f9d02c..7479956 100644
--- a/pbcommand/cli/examples/dev_scatter_fasta_app.py
+++ b/pbcommand/cli/examples/dev_scatter_fasta_app.py
@@ -132,7 +132,7 @@ def get_parser():
p = get_scatter_pbparser(TOOL_ID, __version__, "Fasta Scatter",
desc, driver, chunk_keys, is_distributed=False)
p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In", "Fasta file to scatter")
- p.add_output_file_type(FileTypes.CHUNK, "cjson", "Chunk JSON", "Scattered/Chunked Fasta Chunk.json", "fasta.chunks.json")
+ p.add_output_file_type(FileTypes.CHUNK, "cjson", "Chunk JSON", "Scattered/Chunked Fasta Chunk.json", "fasta.chunks")
p.add_int("pbcommand.task_options.dev_scatter_fa_nchunks", "nchunks", 10, "Number of chunks",
"Suggested number of chunks. May be overridden by $max_nchunks")
return p
diff --git a/pbcommand/cli/examples/dev_txt_app.py b/pbcommand/cli/examples/dev_txt_app.py
index a40c7f8..2cda5d6 100644
--- a/pbcommand/cli/examples/dev_txt_app.py
+++ b/pbcommand/cli/examples/dev_txt_app.py
@@ -9,7 +9,7 @@ import sys
from pbcommand.utils import setup_log
from pbcommand.cli import pbparser_runner
-from pbcommand.models import TaskTypes, FileTypes, get_pbparser, ResourceTypes
+from pbcommand.models import FileTypes, get_pbparser, ResourceTypes
TOOL_ID = "pbcommand.tasks.dev_txt_app"
VERSION = "0.1.0"
@@ -37,7 +37,7 @@ def get_parser():
# Add Input Files types
p.add_input_file_type(FileTypes.TXT, "txt_in", "Txt file", "Generic Text File")
# Add output files types
- p.add_output_file_type(FileTypes.TXT, "txt_out", "Txt outfile", "Generic Output Txt file", "output.txt")
+ p.add_output_file_type(FileTypes.TXT, "txt_out", "Txt outfile", "Generic Output Txt file", "output")
p.add_int("pbcommand.task_options.dev_max_nlines", "max_nlines", 10, "Max Lines", "Max Number of lines to Copy")
return p
diff --git a/pbcommand/cli/examples/template_simple.py b/pbcommand/cli/examples/template_simple.py
new file mode 100644
index 0000000..136f45e
--- /dev/null
+++ b/pbcommand/cli/examples/template_simple.py
@@ -0,0 +1,49 @@
+"""Simple Example Template for creating a CLI tool"""
+import os
+import sys
+import logging
+
+from pbcommand.validators import validate_file
+from pbcommand.utils import setup_log
+from pbcommand.cli import get_default_argparser_with_base_opts, pacbio_args_runner
+
+log = logging.getLogger(__name__)
+
+__version__ = "0.1.0"
+# __author__ = "Add-your-name"
+
+
+def get_parser():
+ """Define Parser. Use the helper methods in validators to validate input"""
+ p = get_default_argparser_with_base_opts(__version__, __doc__)
+ p.add_argument('path_to_file', type=validate_file, help="Path to File")
+ return p
+
+
+def run_main(path, value=8):
+ """
+ Main function that should be called. Typically this is imported from your
+ library code.
+
+ This should NOT reference args.*
+ """
+ log.info("Running path {p} with value {v}".format(p=path, v=value))
+ log.info("Found path? {t} {p}".format(p=path, t=os.path.exists(path)))
+ return 0
+
+
+def args_runner(args):
+ log.info("Raw args {a}".format(a=args))
+ return run_main(args.path_to_file, value=100)
+
+
+def main(argv):
+ return pacbio_args_runner(argv[1:],
+ get_parser(),
+ args_runner,
+ log,
+ setup_log_func=setup_log)
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))
diff --git a/pbcommand/cli/quick.py b/pbcommand/cli/quick.py
index 7cb0b4d..52ae47d 100644
--- a/pbcommand/cli/quick.py
+++ b/pbcommand/cli/quick.py
@@ -1,16 +1,23 @@
+import argparse
import json
import logging
import os
import sys
-from pbcommand.cli import get_default_argparser
-from pbcommand.models import (FileTypes, ToolContractTask, ToolContract,
+from collections import namedtuple
+import time
+
+import pbcommand
+from .core import get_default_argparser_with_base_opts
+from pbcommand.common_options import add_base_options, add_common_options
+
+from pbcommand.models import (ToolContractTask, ToolContract,
InputFileType, OutputFileType, FileType)
from pbcommand.models.parser import (to_option_schema, JsonSchemaTypes)
from pbcommand.models.tool_contract import ToolDriver
from pbcommand.pb_io import (load_resolved_tool_contract_from,
write_tool_contract)
-from pbcommand.utils import setup_log
+from pbcommand.utils import setup_log, setup_logger, get_parsed_args_log_level
log = logging.getLogger(__name__)
@@ -21,6 +28,9 @@ class Constants(object):
RTC_DRIVER = 'run-rtc'
+QuickOpt = namedtuple("QuickOpt", "value name description")
+
+
def _example_main(*args, **kwargs):
log.info("Running example main with {a} kw:{k}".format(a=args, k=kwargs))
return 0
@@ -43,29 +53,40 @@ def _file_type_to_output_file_type(file_type, index):
file_type.default_name)
-def __convert_to_option(jtype, namespace, key, value):
+def __convert_to_option(jtype, namespace, key, value, name=None, description=None):
+ """Convert to Option dict
+
+ This really should have been a concrete type, at least a namedtuple
+ """
opt_id = ".".join([namespace, 'task_options', key])
- name = "Option {n}".format(n=key)
- desc = "Option {n} description".format(n=key)
+ name = "Option {n}".format(n=key) if name is None else name
+ desc = "Option {n} description".format(n=key) if description is None else description
opt = to_option_schema(opt_id, jtype, name, desc, value)
return opt
-def _convert_to_option(namespace, key, value):
+def _convert_to_option(namespace, key, value, name=None, description=None):
if isinstance(value, basestring):
- opt = __convert_to_option(JsonSchemaTypes.STR, namespace, key, value)
+ opt = __convert_to_option(JsonSchemaTypes.STR, namespace, key, value, name=name, description=description)
elif isinstance(value, bool):
- opt = __convert_to_option(JsonSchemaTypes.BOOL, namespace, key, value)
+ opt = __convert_to_option(JsonSchemaTypes.BOOL, namespace, key, value, name=name, description=description)
elif isinstance(value, int):
- opt = __convert_to_option(JsonSchemaTypes.INT, namespace, key, value)
+ opt = __convert_to_option(JsonSchemaTypes.INT, namespace, key, value, name=name, description=description)
elif isinstance(value, float):
- opt = __convert_to_option(JsonSchemaTypes.NUM, namespace, key, value)
+ opt = __convert_to_option(JsonSchemaTypes.NUM, namespace, key, value, name=name, description=description)
else:
raise TypeError("Unsupported option {k} type. {t} ".format(k=key, t=type(value)))
return opt
+def _convert_quick_option(namespace, key, quick_opt):
+ """:type quick_opt: QuickOpt"""
+ return _convert_to_option(namespace, key, quick_opt.value,
+ name=quick_opt.name,
+ description=quick_opt.description)
+
+
def _to_list(x):
if isinstance(x, (list, tuple)):
return x
@@ -82,7 +103,16 @@ def _transform_output_ftype(x, i):
raise TypeError("Unsupported type {t} value {x}".format(x=x, t=type(x)))
+def _convert_to_raw_option(namespace, key, value_or_quick_opt):
+ if isinstance(value_or_quick_opt, QuickOpt):
+ return _convert_quick_option(namespace, key, value_or_quick_opt)
+ else:
+ # 'raw' opt was provide with a primitive type
+ return _convert_to_option(namespace, key, value_or_quick_opt)
+
+
class Registry(object):
+
def __init__(self, tool_namespace, driver_base):
self.namespace = tool_namespace
self.driver_base = driver_base
@@ -116,7 +146,7 @@ class Registry(object):
if options is None:
tool_options = []
else:
- tool_options = [_convert_to_option(self.namespace, key, value) for key, value in options.iteritems()]
+ tool_options = [_convert_to_raw_option(self.namespace, key, value) for key, value in options.iteritems()]
resource_types = []
task = ToolContractTask(global_id, name, desc, version, is_distributed,
@@ -144,16 +174,20 @@ def registry_builder(tool_namespace, driver_base):
def _subparser_builder(subparser, name, description, options_func, exe_func):
- p = subparser.add_parser(name, help=description)
+ p = subparser.add_parser(name, help=description,
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
options_func(p)
# I strongly dislike this.
p.set_defaults(func=exe_func)
return p
-def _add_run_rtc_options(p):
- p.add_argument('rtc_path', type=str, help="Path to resolved tool contract")
- return p
+def _add_run_rtc_options(default_log_level=logging.INFO):
+ def _wrapper(p):
+ add_common_options(p, default_level=default_log_level)
+ p.add_argument('rtc_path', type=str, help="Path to resolved tool contract")
+ return p
+ return _wrapper
def _add_emit_all_tcs_options(p):
@@ -176,22 +210,42 @@ def __args_summary_runner(registry):
return _w
-def __args_rtc_runner(registry):
+def __args_rtc_runner(registry, default_log_level):
def _w(args):
- # FIXME.
- setup_log(log, level=logging.DEBUG)
+ started_at = time.time()
+
+ def run_time():
+ return time.time() - started_at
+
+ def exit_msg(rcode_):
+ return "Completed running {r} exitcode {e} in {t:.2f} sec.".format(r=rtc, e=rcode_, t=run_time())
+
+ level = get_parsed_args_log_level(args)
+ setup_logger(None, level=level)
+
+ log.info("Loading pbcommand {v}".format(v=pbcommand.get_version()))
log.info("Registry {r}".format(r=registry))
+ log.info("Setting log-level to {d}".format(d=level))
+ log.debug("args {a}".format(a=args))
log.info("loading RTC from {i}".format(i=args.rtc_path))
rtc = load_resolved_tool_contract_from(args.rtc_path)
- id_funcs = {t.task.task_id:func for t, func in registry.rtc_runners.iteritems()}
+ id_funcs = {t.task.task_id: func for t, func in registry.rtc_runners.iteritems()}
func = id_funcs.get(rtc.task.task_id, None)
if func is None:
- sys.stderr.write("ERROR. Unknown tool contract id {x}".format(x=rtc.task.task_id))
- return -1
+ rcode = 1
+ log.error("Unknown tool contract id '{x}' Registered TC ids {i}".format(x=rtc.task.task_id, i=id_funcs.keys()))
+ log.error(exit_msg(rcode))
+ return rcode
else:
+ log.info("Running id:{i} Resolved Tool Contract {r}".format(r=rtc, i=rtc.task.task_id))
+ log.info("Runner func {f}".format(f=func))
exit_code = func(rtc)
- log.info("Completed running {r} exitcode {e}".format(r=rtc, e=exit_code))
+ if exit_code == 0:
+ log.info(exit_msg(exit_code))
+ else:
+ log.error(exit_msg(exit_code))
+
return exit_code
return _w
@@ -201,7 +255,7 @@ def __args_emit_tc_runner(registry):
log.info("Registry {r}".format(r=registry))
tc_id = args.tc_id
log.info("Emitting TC from {i}".format(i=tc_id))
- id_tc = {t.task.task_id:t for t in registry.rtc_runners.keys()}
+ id_tc = {t.task.task_id: t for t in registry.rtc_runners.keys()}
log.info(id_tc)
tc = id_tc.get(tc_id, None)
if tc is None:
@@ -226,17 +280,17 @@ def __args_emit_all_tcs_runner(registry):
return _w
-def _to_registry_parser(version, description):
+def _to_registry_parser(version, description, default_log_level):
def _f(registry):
- p = get_default_argparser(version, description)
+ p = get_default_argparser_with_base_opts(version, description)
sp = p.add_subparsers(help='Commands')
args_summary_runner = __args_summary_runner(registry)
- args_rtc_runner = __args_rtc_runner(registry)
+ args_rtc_runner = __args_rtc_runner(registry, default_log_level)
args_tc_emit = __args_emit_tc_runner(registry)
args_tcs_emit = __args_emit_all_tcs_runner(registry)
- _subparser_builder(sp, Constants.RTC_DRIVER, "Run Resolved Tool contract", _add_run_rtc_options, args_rtc_runner)
+ _subparser_builder(sp, Constants.RTC_DRIVER, "Run Resolved Tool contract", _add_run_rtc_options(default_log_level), args_rtc_runner)
_subparser_builder(sp, 'emit-tool-contracts', "Emit all Tool contracts to output-dir", _add_emit_all_tcs_options, args_tcs_emit)
_subparser_builder(sp, 'emit-tool-contract', "Emit a single tool contract by id", _add_emit_tc_options, args_tc_emit)
_subparser_builder(sp, 'summary', "Summary of Tool Contracts", lambda x: x, args_summary_runner)
@@ -244,7 +298,7 @@ def _to_registry_parser(version, description):
return _f
-def registry_runner(registry, argv):
+def registry_runner(registry, argv, default_log_level=logging.INFO):
"""Runs a registry
1. Manually build an argparser that has
@@ -260,11 +314,10 @@ def registry_runner(registry, argv):
:type registry: Registry
"""
- log.info("Running registry {r} with args {a}".format(r=registry, a=argv))
- f = _to_registry_parser('0.1.0', "Multi-quick-tool-runner for {r}".format(r=registry.namespace))
+ f = _to_registry_parser('0.1.1', "Multi-quick-tool-runner for {r}".format(r=registry.namespace), default_log_level)
p = f(registry)
args = p.parse_args(argv)
- # need to disable this because some subparsers are emitting to stdout
- # setup_log(log, level=logging.DEBUG)
+ # The logger needs to be setup only in specific subparsers. Some commands
+ # are using the stdout as a non logging model
return_code = args.func(args)
return return_code
diff --git a/pbcommand/cli/utils.py b/pbcommand/cli/utils.py
new file mode 100755
index 0000000..2d707ac
--- /dev/null
+++ b/pbcommand/cli/utils.py
@@ -0,0 +1,139 @@
+
+"""
+Additional utilities for running command-line apps - most of these do not apply
+to tool-contract-driven programs. (Ported from pbsmrtpipe)
+"""
+
+import traceback
+import argparse
+import platform
+import logging
+import time
+import os
+
+from pbcommand.validators import validate_file, validate_fofn
+from pbcommand.utils import setup_log
+
+log = logging.getLogger(__name__)
+
+
+def subparser_builder(subparser, subparser_id, description, options_func, exe_func):
+ """
+ Util to add subparser options
+
+ :param subparser:
+ :param subparser_id:
+ :param description:
+ :param options_func: Function that will add args and options to Parser instance F(subparser) -> None
+ :param exe_func: Function to run F(args) -> Int
+ :return:
+ """
+ p = subparser.add_parser(subparser_id, help=description)
+ options_func(p)
+ p.set_defaults(func=exe_func)
+ return p
+
+
+def add_debug_option(p):
+ p.add_argument('--debug', action='store_true',
+ help="Send logging info to stdout.")
+ return p
+
+
+def _validate_output_dir_or_get_default(value):
+ if value is None:
+ return os.getcwd()
+ else:
+ if os.path.exists(value):
+ return os.path.abspath(value)
+ else:
+ os.mkdir(value)
+ return os.path.abspath(value)
+
+
+def add_output_dir_option(p):
+ p.add_argument('-o', '--output-dir', type=_validate_output_dir_or_get_default, default=os.getcwd(), help="Output directory.")
+ return p
+
+
+def _add_input_file(args_label, type_, help_):
+ def _wrapper(p):
+ p.add_argument(args_label, type=type_, help=help_)
+ return p
+ return _wrapper
+
+
+add_fasta_output = _add_input_file("fasta_out", str, "Path to output Fasta File")
+add_fasta_input = _add_input_file("fasta_in", validate_file, "Path to Input FASTA File")
+
+add_fastq_output = _add_input_file("fastq_out", str, "Path to output Fastq File")
+add_fastq_input = _add_input_file("fastq_in", validate_file, "Path to Input FASTQ File")
+
+add_fofn_input = _add_input_file("fofn_in", validate_fofn, "Path to Input FOFN (File of file names) File")
+add_fofn_output = _add_input_file("fofn_out", str, "Path to output FOFN.")
+
+add_report_output = _add_input_file("json_report", str, "Path to PacBio JSON Report")
+
+add_subread_input = _add_input_file("subread_ds", validate_file, "Path to PacBio Subread DataSet XML")
+
+add_ds_reference_input = _add_input_file("reference_ds", validate_file, "Path to PacBio Subread DataSet XML")
+
+
+def args_executer(args):
+ """
+
+
+ :rtype int
+ """
+ try:
+ return_code = args.func(args)
+ except Exception as e:
+ log.error(e, exc_info=True)
+ import sys
+ traceback.print_exc(sys.stderr)
+ if isinstance(e, IOError):
+ return_code = 1
+ else:
+ return_code = 2
+
+ return return_code
+
+
+def main_runner(argv, parser, exe_runner_func, setup_log_func, alog):
+ """
+ Fundamental interface to commandline applications
+ """
+ started_at = time.time()
+ args = parser.parse_args(argv)
+ # log.debug(args)
+
+ # setup log
+ _have_log_setup = False
+ if hasattr(args, 'quiet') and args.quiet:
+ setup_log_func(alog, level=logging.ERROR)
+ elif hasattr(args, 'verbosity') and args.verbosity > 0:
+ if args.verbosity >= 2:
+ setup_log_func(alog, level=logging.DEBUG)
+ else:
+ setup_log_func(alog, level=logging.INFO)
+ elif hasattr(args, 'debug') and args.debug:
+ setup_log_func(alog, level=logging.DEBUG)
+ else:
+ alog.addHandler(logging.NullHandler())
+
+ log.debug(args)
+ alog.info("Starting tool version {v}".format(v=parser.version))
+ rcode = exe_runner_func(args)
+
+ run_time = time.time() - started_at
+ _d = dict(r=rcode, s=run_time)
+ alog.info("exiting with return code {r} in {s:.2f} sec.".format(**_d))
+ return rcode
+
+
+def main_runner_default(argv, parser, alog):
+ # FIXME. This still has the old set_defaults(func=func) and
+ # has the assumption that --debug has been assigned as an option
+ # This is used for all the subparsers
+ setup_log_func = setup_log
+ return main_runner(argv, parser, args_executer, setup_log_func, alog)
diff --git a/pbcommand/common_options.py b/pbcommand/common_options.py
index 01192d8..6e4dd46 100644
--- a/pbcommand/common_options.py
+++ b/pbcommand/common_options.py
@@ -1,22 +1,54 @@
"""Common options and utils that can me used in commandline utils"""
-import argparse
+
import logging
+import argparse
import sys
-from pbcommand.utils import compose
RESOLVED_TOOL_CONTRACT_OPTION = "--resolved-tool-contract"
EMIT_TOOL_CONTRACT_OPTION = "--emit-tool-contract"
def add_debug_option(p):
- p.add_argument('--debug', action="store_true", default=False, help="Debug to stdout")
+ p.add_argument("--pdb", action="store_true", default=False,
+ help="Enable Python debugger")
return p
-def add_log_level_option(p):
+def add_log_debug_option(p):
+ """This requires the log-level option"""
+ p.add_argument('--debug', action="store_true", default=False, help="Alias for setting log level to DEBUG")
+ return p
+
+
+def add_log_quiet_option(p):
+ """This requires the log-level option"""
+ p.add_argument('--quiet', action="store_true", default=False, help="Alias for setting log level to CRITICAL to suppress output.")
+ return p
+
+
+def add_log_verbose_option(p):
+ p.add_argument(
+ "-v",
+ "--verbose",
+ dest="verbosity",
+ action="count",
+ help="Set the verbosity level.")
+ return p
+
+
+def add_log_level_option(p, default_level='INFO'):
+ """Add logging level with a default value"""
+ if isinstance(default_level, int):
+ default_level = logging.getLevelName(default_level)
p.add_argument('--log-level', choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'),
- default='INFO', help="Set log level")
+ default=default_level, help="Set log level")
+ return p
+
+
+def add_log_file_option(p):
+ p.add_argument('--log-file', default=None, type=str,
+ help="Write the log to file. Default(None) will write to stdout.")
return p
@@ -33,19 +65,45 @@ def add_emit_tool_contract_option(p):
return p
-def add_base_options(p):
- funcs = [add_debug_option,
- add_log_level_option]
- fs = compose(*funcs)
- return fs(p)
+def add_base_options(p, default_level='INFO'):
+ """Add the core logging options to the parser and set the default log level
+
+ If you don't want the default log behavior to go to stdout, then set
+ the default log level to be "ERROR". This will essentially suppress all
+ output to stdout.
+
+ Default behavior will only emit to stderr. This is essentially a '--quiet'
+ default mode.
+
+ my-tool --my-opt=1234 file_in.txt
+
+ To override the default behavior:
+
+ my-tool --my-opt=1234 --log-level=INFO file_in.txt
+
+ Or write the file to an explict log file
+
+ my-tool --my-opt=1234 --log-level=DEBUG --log-file=file.log file_in.txt
+
+ """
+ # This should automatically/required be added to be added from get_default_argparser
+ add_log_file_option(p)
+ p_log = p.add_mutually_exclusive_group()
+ add_log_verbose_option(add_log_quiet_option(add_log_debug_option(
+ add_log_level_option(p_log, default_level=default_level))))
+ return p
+
+
+def add_common_options(p, default_level='INFO'):
+ """
+ New model for 3.1 release. This should replace add_base_options
+ """
+ return add_log_quiet_option(add_log_debug_option(add_log_level_option(add_log_file_option(p), default_level=default_level)))
-def add_base_options_with_emit_tool_contract(p):
- funcs = [add_base_options,
- add_resolved_tool_contract_option,
- add_emit_tool_contract_option]
- fs = compose(*funcs)
- return fs(p)
+def add_base_options_with_emit_tool_contract(p, default_level='INFO'):
+ # can't use compose here because of circular imports via parser
+ return add_base_options(add_resolved_tool_contract_option(add_emit_tool_contract_option(p)), default_level=default_level)
def _to_print_message_action(msg):
diff --git a/pbcommand/interactive_resolver.py b/pbcommand/interactive_resolver.py
index 10b7d0b..4c6a076 100644
--- a/pbcommand/interactive_resolver.py
+++ b/pbcommand/interactive_resolver.py
@@ -40,7 +40,7 @@ def run_main(tc):
input_files.append(in_path)
tool_options = {}
- rtc = resolve_tool_contract(tc, input_files, output_dir, '/tmp', int(nproc), tool_options)
+ rtc = resolve_tool_contract(tc, input_files, output_dir, '/tmp', int(nproc), tool_options, is_distributable=False)
print rtc
file_name = tc.task.task_id + "_resolved_tool_contract.json"
diff --git a/pbcommand/models/__init__.py b/pbcommand/models/__init__.py
index 4936173..55bcc28 100644
--- a/pbcommand/models/__init__.py
+++ b/pbcommand/models/__init__.py
@@ -1,4 +1,6 @@
-from .common import (FileType, FileTypes, TaskTypes, ResourceTypes, SymbolTypes,
+from .common import (FileType, FileTypes,
+ DataSetFileType, DataSetMetaData,
+ TaskTypes, ResourceTypes, SymbolTypes,
PipelineChunk, DataStoreFile, DataStore)
from .tool_contract import *
from .parser import (get_pbparser,
diff --git a/pbcommand/models/common.py b/pbcommand/models/common.py
index 2d6c7cd..c21901e 100644
--- a/pbcommand/models/common.py
+++ b/pbcommand/models/common.py
@@ -12,11 +12,15 @@ import re
import warnings
import functools
import datetime
+from collections import namedtuple
log = logging.getLogger(__name__)
REGISTERED_FILE_TYPES = {}
+# Light weight Dataset metatadata. Use pbcore for full dataset functionality
+DataSetMetaData = namedtuple("DataSetMetaData", 'uuid metatype')
+
class PacBioNamespaces(object):
# File Types
@@ -150,7 +154,7 @@ class FileType(object):
@property
def default_name(self):
- return ".".join([self.base_name, self.ext])
+ return self.base_name # ".".join([self.base_name, self.ext])
def __eq__(self, other):
if isinstance(other, self.__class__):
@@ -170,6 +174,11 @@ class FileType(object):
return "<{k} id={i} name={n} >".format(**_d)
+class DataSetFileType(FileType):
+ """File types that are a DataSet Type"""
+ pass
+
+
class MimeTypes(object):
JSON = 'application/json'
TXT = 'text/plain'
@@ -191,6 +200,8 @@ class FileTypes(object):
TXT = FileType(to_file_ns('txt'), 'file', 'txt', MimeTypes.TXT)
# Generic Log file
LOG = FileType(to_file_ns('log'), 'file', 'log', MimeTypes.TXT)
+ # Config file
+ CFG = FileType(to_file_ns('cfg'), 'config', 'cfg', MimeTypes.TXT)
# THIS NEEDS TO BE CONSISTENT with scala code. When the datastore
# is written to disk the file type id's might be translated to
@@ -238,32 +249,54 @@ class FileTypes(object):
# DataSet Types. The default file names should have well-defined agreed
# upon format. See what Dave did for the bam files.
# https://github.com/PacificBiosciences/PacBioFileFormats
- DS_SUBREADS_H5 = FileType(to_ds_ns("HdfSubreadSet"), "file", "hdfsubreadset.xml", MimeTypes.XML)
- DS_SUBREADS = FileType(to_ds_ns("SubreadSet"), "file", "subreadset.xml", MimeTypes.XML)
- DS_CCS = FileType(to_ds_ns("ConsensusReadSet"), "file", "consensusreadset.xml", MimeTypes.XML)
- DS_REF = FileType(to_ds_ns("ReferenceSet"), "file", "referenceset.xml", MimeTypes.XML)
- DS_ALIGN = FileType(to_ds_ns("AlignmentSet"), "file", "alignmentset.xml", MimeTypes.XML)
- DS_CONTIG = FileType(to_ds_ns("ContigSet"), "file", "contigset.xml", MimeTypes.XML)
- DS_BARCODE = FileType(to_ds_ns("BarcodeSet"), "file", "barcodeset.xml", MimeTypes.XML)
- DS_ALIGN_CCS = FileType(to_ds_ns("ConsensusAlignmentSet"), "file",
- "consensusalignmentset.xml", MimeTypes.XML)
-
- # Index Files
+ DS_SUBREADS_H5 = DataSetFileType(to_ds_ns("HdfSubreadSet"), "file", "hdfsubreadset.xml", MimeTypes.XML)
+ DS_SUBREADS = DataSetFileType(to_ds_ns("SubreadSet"), "file", "subreadset.xml", MimeTypes.XML)
+ DS_CCS = DataSetFileType(to_ds_ns("ConsensusReadSet"), "file", "consensusreadset.xml", MimeTypes.XML)
+ DS_REF = DataSetFileType(to_ds_ns("ReferenceSet"), "file", "referenceset.xml", MimeTypes.XML)
+ DS_ALIGN = DataSetFileType(to_ds_ns("AlignmentSet"), "file", "alignmentset.xml", MimeTypes.XML)
+ DS_CONTIG = DataSetFileType(to_ds_ns("ContigSet"), "file", "contigset.xml", MimeTypes.XML)
+ DS_BARCODE = DataSetFileType(to_ds_ns("BarcodeSet"), "file", "barcodeset.xml", MimeTypes.XML)
+ DS_ALIGN_CCS = DataSetFileType(to_ds_ns("ConsensusAlignmentSet"), "file",
+ "consensusalignmentset.xml", MimeTypes.XML)
+
+ # PacBio Defined Formats
+ # **** Index Files
+
+ # ReferenceSet specific
I_SAM = FileType(to_index_ns("SamIndex"), "file", "sam.index", MimeTypes.BINARY)
I_SAW = FileType(to_index_ns("SaWriterIndex"), "file", "sa", MimeTypes.BINARY)
- # PacBio Defined Formats
+ # SMRT VIew specific files
+ I_INDEXER = FileType(to_index_ns("Indexer"), "file", "fasta.index", MimeTypes.TXT)
+ I_FCI = FileType(to_index_ns("FastaContigIndex"), "file", "fasta.contig.index", MimeTypes.TXT)
+
+ # PacBio BAM pbi
+ I_PBI = FileType(to_index_ns("PacBioIndex"), "file", "pbi", MimeTypes.BINARY)
+ # This is duplicated from the old pre-DS era models. see BAMBAI
+ I_BAI = FileType(to_index_ns("BamIndex"), "file", "bam.bai", MimeTypes.BINARY)
+
+ # Fasta type files
FASTA_BC = FileType("PacBio.BarcodeFile.BarcodeFastaFile", "file", "barcode.fasta", MimeTypes.TXT)
# No ':' or '"' in the id
FASTA_REF = FileType("PacBio.ReferenceFile.ReferenceFastaFile", "file", "pbreference.fasta", MimeTypes.TXT)
+ CONTIG_FA = FileType("PacBio.ContigFile.ContigFastaFile", "file", "contig.fasta", MimeTypes.TXT)
- # FIXME. Add Bax/Bam Formats here. This should replace the exiting pre-SA3 formats.
+ # BAM dialects
BAM_ALN = FileType("PacBio.AlignmentFile.AlignmentBamFile", "file", "alignment.bam", MimeTypes.BINARY)
BAM_SUB = FileType("PacBio.SubreadFile.SubreadBamFile", "file", "subread.bam", MimeTypes.BINARY)
BAM_CCS = FileType("PacBio.ConsensusReadFile.ConsensusReadBamFile", "file", "ccs.bam", MimeTypes.BINARY)
+ BAM_CCS_ALN = FileType("PacBio.AlignmentFile.ConsensusAlignmentBamFile", "file", "ccs_align.bam", MimeTypes.BINARY)
+ # MK TODO. Add remaining SubreadSet files types, Scraps, HqRegion, etc..
+ BAZ = FileType("PacBio.ReadFile.BazFile", "file", "baz", MimeTypes.BINARY)
+ TRC = FileType("PacBio.ReadFile.TraceFile", "file", "trc", MimeTypes.BINARY)
+ PLS = FileType("PacBio.ReadFile.PulseFile", "file", "pls", MimeTypes.BINARY)
+ # RS era
BAX = FileType("PacBio.SubreadFile.BaxFile", "file", "bax.h5", MimeTypes.BINARY)
+ # sts.xml
+ STS_XML = FileType("PacBio.SubreadFile.ChipStatsFile", "file", "sts.xml", MimeTypes.XML)
+
# THIS IS EXPERIMENT for internal analysis. DO NOT use
COND = FileType(to_file_ns("COND"), "file", "conditions.json", MimeTypes.JSON)
@@ -272,13 +305,25 @@ class FileTypes(object):
return file_type_id in REGISTERED_FILE_TYPES
@staticmethod
+ def ALL_DATASET_TYPES():
+ return {i: f for i, f in REGISTERED_FILE_TYPES.iteritems() if isinstance(f, DataSetFileType)}
+
+ @staticmethod
def ALL():
return REGISTERED_FILE_TYPES
+def _get_timestamp_or_now(path, func):
+ if os.path.exists(path):
+ return func(path)
+ else:
+ return datetime.datetime.now()
+
+
class DataStoreFile(object):
- def __init__(self, uuid, source_id, type_id, path, is_chunked=False):
+ def __init__(self, uuid, source_id, type_id, path, is_chunked=False,
+ name="", description=""):
# adding this for consistency. In the scala code, the unique id must be
# a uuid format
self.uuid = uuid
@@ -288,11 +333,14 @@ class DataStoreFile(object):
# Consistent with a value in FileTypes
self.file_type_id = type_id
self.path = path
- self.file_size = os.path.getsize(path)
- self.created_at = datetime.datetime.fromtimestamp(os.path.getctime(path))
- self.modified_at = datetime.datetime.fromtimestamp(os.path.getmtime(path))
+ # FIXME(mkocher)(2016-2-23): This is probably not the best model
+ self.file_size = os.path.getsize(path) if os.path.exists(path) else 0
+ self.created_at = _get_timestamp_or_now(path, lambda px: datetime.datetime.fromtimestamp(os.path.getctime(px)))
+ self.modified_at = _get_timestamp_or_now(path, lambda px: datetime.datetime.fromtimestamp(os.path.getmtime(px)))
# Was the file produced by Chunked task
self.is_chunked = is_chunked
+ self.name = name
+ self.description = description
def __repr__(self):
_d = dict(k=self.__class__.__name__,
@@ -309,7 +357,9 @@ class DataStoreFile(object):
fileSize=self.file_size,
createdAt=_datetime_to_string(self.created_at),
modifiedAt=_datetime_to_string(self.modified_at),
- isChunked=self.is_chunked)
+ isChunked=self.is_chunked,
+ name=self.name,
+ description=self.description)
@staticmethod
def from_dict(d):
@@ -320,7 +370,10 @@ class DataStoreFile(object):
return DataStoreFile(to_k('uniqueId'),
to_k('sourceId'),
to_k('fileTypeId'),
- to_k('path'), is_chunked=is_chunked)
+ to_k('path'),
+ is_chunked=is_chunked,
+ name=to_a(d.get("name", "")),
+ description=to_a(d.get("description", "")))
def _datetime_to_string(dt):
@@ -371,12 +424,15 @@ class DataStore(object):
self._write_json(file_name, 'w+')
@staticmethod
+ def load_from_d(d):
+ ds_files = [DataStoreFile.from_dict(x) for x in d['files']]
+ return DataStore(ds_files)
+
+ @staticmethod
def load_from_json(path):
with open(path, 'r') as reader:
d = json.loads(reader.read())
-
- ds_files = [DataStoreFile.from_dict(x) for x in d['files']]
- return DataStore(ds_files)
+ return DataStore.load_from_d(d)
def _is_chunk_key(k):
diff --git a/pbcommand/models/parser.py b/pbcommand/models/parser.py
index 3da3eba..ceec823 100644
--- a/pbcommand/models/parser.py
+++ b/pbcommand/models/parser.py
@@ -9,13 +9,11 @@ import argparse
import functools
import re
-# there's a problem with functools32 and jsonschema. This import raise an
-# import error.
-#import jsonschema
+import jsonschema
+from .common import SymbolTypes
from pbcommand.common_options import (add_base_options_with_emit_tool_contract,
add_subcomponent_versions_option)
-from pbcommand.models import SymbolTypes
from .tool_contract import (ToolDriver,
InputFileType, OutputFileType,
ToolContract, ToolContractTask,
@@ -101,13 +99,11 @@ def to_opt_id(namespace, s):
def validate_value(schema, v):
- import jsonschema
return jsonschema.validate(v, schema)
def is_valid(schema, v):
"""Returns a bool if the schema is valid"""
- import jsonschema
try:
validate_value(schema, v)
return True
@@ -120,7 +116,6 @@ def validate_schema(f):
"""Deco for validate the returned jsonschema against Draft 4 of the spec"""
def w(*args, **kwargs):
schema = f(*args, **kwargs)
- import jsonschema
_ = jsonschema.Draft4Validator(schema)
return schema
return w
@@ -284,10 +279,15 @@ class PyParser(PbParserBase):
def __init__(self, tool_id, version, name, description, subcomponents=()):
super(PyParser, self).__init__(tool_id, version, name, description)
- self.parser = argparse.ArgumentParser(version=version,
+ self.parser = argparse.ArgumentParser(#version=version,
description=description,
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
add_help=True)
+ self.parser.version = version
+ self.parser.add_argument('--version',
+ action="version",
+ help="show program's version number and exit")
+
if subcomponents:
add_subcomponent_versions_option(self.parser, subcomponents)
@@ -405,6 +405,7 @@ class ToolContractParser(PbParserBase):
class ScatterToolContractParser(ToolContractParser):
+
def __init__(self, tool_id, version, name, description, task_type, driver, nproc_symbol,
resource_types, chunk_keys, nchunks):
super(ScatterToolContractParser, self).__init__(tool_id, version, name, description, task_type, driver,
@@ -452,7 +453,7 @@ class PbParser(PbParserBase):
(stored as tool_contract_parser and arg_parser attributes respectively).
"""
- def __init__(self, tool_contract_parser, arg_parser, *parsers):
+ def __init__(self, tool_contract_parser, arg_parser, *parsers, **kwds):
"""
:param tool_contract_parser:
@@ -469,7 +470,8 @@ class PbParser(PbParserBase):
# python wrapper parser.
self.arg_parser = arg_parser
# add options, so it will show up via --help
- add_base_options_with_emit_tool_contract(self.arg_parser.parser)
+ add_base_options_with_emit_tool_contract(self.arg_parser.parser,
+ default_level=kwds.get("default_level", "INFO"))
# a list of other parsers that adhere to the PbParserBase interface
# can be used.
@@ -520,15 +522,16 @@ class PbParser(PbParserBase):
return self.tool_contract_parser.to_tool_contract()
-def _factory(tool_id, version, name, description, subcomponents):
+def _factory(tool_id, version, name, description, subcomponents, default_level):
def _f(tc_parser):
arg_parser = PyParser(tool_id, version, name, description, subcomponents=subcomponents)
- return PbParser(tc_parser, arg_parser)
+ return PbParser(tc_parser, arg_parser, default_level=default_level)
return _f
def get_pbparser(tool_id, version, name, description, driver_exe, is_distributed=True, nproc=1,
- resource_types=(), subcomponents=(), serialization='json'):
+ resource_types=(), subcomponents=(), serialization='json',
+ default_level="INFO"):
"""
Central point of creating a Tool contract that can emit and run tool
contracts.
@@ -538,24 +541,24 @@ def get_pbparser(tool_id, version, name, description, driver_exe, is_distributed
driver = ToolDriver(driver_exe, serialization=serialization)
tc_parser = ToolContractParser(tool_id, version, name, description, is_distributed, driver,
nproc, resource_types)
- return _factory(tool_id, version, name, description, subcomponents)(tc_parser)
+ return _factory(tool_id, version, name, description, subcomponents, default_level)(tc_parser)
def get_scatter_pbparser(tool_id, version, name, description, driver_exe, chunk_keys,
is_distributed=True, nproc=1, nchunks=SymbolTypes.MAX_NCHUNKS, resource_types=(),
- subcomponents=(), serialization='json'):
+ subcomponents=(), serialization='json', default_level="INFO"):
"""Create a Scatter Tool"""
driver = ToolDriver(driver_exe, serialization=serialization)
tc_parser = ScatterToolContractParser(tool_id, version, name, description, is_distributed,
driver, nproc, resource_types, chunk_keys,
nchunks)
- return _factory(tool_id, version, name, description, subcomponents)(tc_parser)
+ return _factory(tool_id, version, name, description, subcomponents, default_level)(tc_parser)
def get_gather_pbparser(tool_id, version, name, description, driver_exe,
- is_distributed=True, nproc=1, resource_types=(), subcomponents=(), serialization='json'):
+ is_distributed=True, nproc=1, resource_types=(), subcomponents=(), serialization='json', default_level="INFO"):
"""Create a Gather tool"""
driver = ToolDriver(driver_exe, serialization=serialization)
tc_parser = GatherToolContractParser(tool_id, version, name, description,
is_distributed, driver, nproc, resource_types)
- return _factory(tool_id, version, name, description, subcomponents)(tc_parser)
+ return _factory(tool_id, version, name, description, subcomponents, default_level)(tc_parser)
diff --git a/pbcommand/models/report.py b/pbcommand/models/report.py
index 7e910a3..5c9105c 100644
--- a/pbcommand/models/report.py
+++ b/pbcommand/models/report.py
@@ -4,7 +4,7 @@
Author: Johann Miller and Michael Kocher
"""
-from collections import defaultdict
+from collections import defaultdict, OrderedDict
import warnings
import abc
import logging
@@ -85,7 +85,7 @@ class BaseReportElement(object):
raise PbReportError("Type error. id '{i}' cannot be {t}.".format(i=id_, t=type(id_)))
if not re.match('^[a-z0-9_]+$', id_):
- msg = "id '{i}' for {x} must contain only alphanumeric or underscore characters".format(x=self.__class__.__name__, i=id_)
+ msg = "id '{i}' for {x} must contain only lower-case alphanumeric or underscore characters".format(x=self.__class__.__name__, i=id_)
log.error(msg)
raise PbReportError(msg)
@@ -560,9 +560,10 @@ class Report(BaseReportElement):
It can be serialized to json.
"""
- def __init__(self, id_, tables=(), attributes=(), plotgroups=(), dataset_uuids=()):
+ def __init__(self, id_, title=None, tables=(), attributes=(), plotgroups=(), dataset_uuids=()):
"""
:param id_: (str) Should be a string that identifies the report, like 'adapter'.
+ :param title: Display name of report Defaults to the Report+id if None (added in 0.3.9)
:param tables: (list of table instances)
:param attributes: (list of attribute instances)
:param plotgroups: (list of plot group instances)
@@ -572,6 +573,8 @@ class Report(BaseReportElement):
self._attributes = []
self._plotgroups = []
self._tables = []
+ self.title = "Report {i}".format(i=self.id) if title is None else title
+
if tables:
for table in tables:
self.add_table(table)
@@ -615,10 +618,11 @@ class Report(BaseReportElement):
def __repr__(self):
_d = dict(k=self.__class__.__name__,
i=self.id,
+ n=self.title,
a=len(self.attributes),
p=len(self.plotGroups),
t=len(self.tables))
- return "<{k} id:{i} nattributes:{a} nplot_groups:{p} ntables:{t} >".format(**_d)
+ return "<{k} id:{i} title:{n} nattributes:{a} nplot_groups:{p} ntables:{t} >".format(**_d)
@property
def attributes(self):
@@ -659,6 +663,7 @@ class Report(BaseReportElement):
version = pbcommand.get_version()
d = BaseReportElement.to_dict(self, id_parts=id_parts)
+ d['title'] = self.title
d['_version'] = version
d['_changelist'] = "UNKNOWN"
d['dataset_uuids'] = list(set(self._dataset_uuids))
@@ -706,22 +711,41 @@ class Report(BaseReportElement):
@staticmethod
def merge(reports):
report_id = reports[0].id
+
def _merge_attributes_d(attributes_list):
- attrs = defaultdict(lambda : [])
+ attrs = OrderedDict()
for ax in attributes_list:
for a in ax:
- attrs[a.id].append(a.value)
+ if a.id in attrs:
+ attrs[a.id].append(a.value)
+ else:
+ attrs[a.id] = [a.value]
return attrs
+
+ def _merge_attributes_names(attributes_list):
+ names = {}
+ for ax in attributes_list:
+ for a in ax:
+ if a.id in names:
+ assert names[a.id] == a.name
+ else:
+ names[a.id] = a.name
+ return names
+
def _attributes_to_table(attributes_list, table_id, title):
attrs = _merge_attributes_d(attributes_list)
- columns = [ Column(k.lower(), header=k, values=values)
- for k, values in attrs.iteritems() ]
+ labels = _merge_attributes_names(attributes_list)
+ columns = [Column(k.lower(), header=labels[k], values=values)
+ for k, values in attrs.iteritems()]
table = Table(table_id, title=title, columns=columns)
return table
+
def _sum_attributes(attributes_list):
d = _merge_attributes_d(attributes_list)
- return [ Attribute(k, sum(values), name=k)
- for k, values in d.iteritems() ]
+ labels = _merge_attributes_names(attributes_list)
+ return [Attribute(k, sum(values), name=labels[k])
+ for k, values in d.iteritems()]
+
def _merge_tables(tables):
"""Pass through singletons, Table.merge dupes"""
id_collisions = defaultdict(list)
@@ -736,13 +760,16 @@ class Report(BaseReportElement):
return merged
attr_list = []
table_list = []
+ dataset_uuids = set()
for report in reports:
assert report.id == report_id
attr_list.append(report.attributes)
table_list.extend(report.tables)
+ dataset_uuids.update(set(report._dataset_uuids))
table = _attributes_to_table(attr_list, 'chunk_metrics',
"Chunk Metrics")
tables = _merge_tables(table_list)
tables.append(table)
merged_attributes = _sum_attributes(attr_list)
- return Report(report_id, attributes=merged_attributes, tables=tables)
+ return Report(report_id, attributes=merged_attributes, tables=tables,
+ dataset_uuids=sorted(list(dataset_uuids)))
diff --git a/pbcommand/models/tool_contract.py b/pbcommand/models/tool_contract.py
index 53b2559..4929ebd 100644
--- a/pbcommand/models/tool_contract.py
+++ b/pbcommand/models/tool_contract.py
@@ -6,7 +6,8 @@ Author: Michael Kocher
import abc
import pbcommand
-from pbcommand.models import TaskTypes, ResourceTypes
+
+from .common import TaskTypes, ResourceTypes, REGISTERED_FILE_TYPES
__version__ = pbcommand.get_version()
@@ -47,6 +48,12 @@ def validate_tool_contract(tc):
"""
__validate_ioputs("Inputs must have at least 1 input.", tc.task.input_file_types)
__validate_ioputs("Outputs must have at least 1 output", tc.task.output_file_types)
+ for oft in tc.task.output_file_types:
+ file_type = REGISTERED_FILE_TYPES[oft.file_type_id]
+ if oft.default_name.endswith(file_type.ext):
+ raise ValueError(
+ "File {i} default name already has extension: {n}".format(
+ i=oft.label, n=oft.default_name))
return tc
@@ -98,6 +105,7 @@ class OutputFileType(_IOFileType):
class ToolContractResolvedResource(object):
+
def __init__(self, resource_type_id, path):
assert resource_type_id in ResourceTypes.ALL()
self.type_id = resource_type_id
@@ -270,7 +278,7 @@ class ResolvedToolContractTask(object):
TASK_TYPE_ID = TaskTypes.STANDARD
def __init__(self, task_id, is_distributed, input_files, output_files,
- options, nproc, resources):
+ options, nproc, resources, log_level="INFO"):
self.task_id = task_id
self.is_distributed = is_distributed
self.input_files = input_files
@@ -278,6 +286,7 @@ class ResolvedToolContractTask(object):
self.options = options
self.nproc = nproc
self.resources = resources
+ self.log_level = log_level
@property
def tmpdir_resources(self):
@@ -303,15 +312,16 @@ class ResolvedToolContractTask(object):
nproc=self.nproc,
resources=[r.to_dict() for r in self.resources],
options=self.options,
- _comment=comment)
+ _comment=comment,
+ log_level=self.log_level)
return tc
class ResolvedScatteredToolContractTask(ResolvedToolContractTask):
TASK_TYPE_ID = TaskTypes.SCATTERED
- def __init__(self, task_id, is_distributed, input_files, output_files, options, nproc, resources, max_nchunks, chunk_keys):
- super(ResolvedScatteredToolContractTask, self).__init__(task_id, is_distributed, input_files, output_files, options, nproc, resources)
+ def __init__(self, task_id, is_distributed, input_files, output_files, options, nproc, resources, max_nchunks, chunk_keys, log_level="INFO"):
+ super(ResolvedScatteredToolContractTask, self).__init__(task_id, is_distributed, input_files, output_files, options, nproc, resources, log_level)
self.max_nchunks = max_nchunks
# these can be used to verified the output chunk.json
# after the task has been run
@@ -327,12 +337,12 @@ class ResolvedScatteredToolContractTask(ResolvedToolContractTask):
class ResolvedGatherToolContractTask(ResolvedToolContractTask):
TASK_TYPE_ID = TaskTypes.GATHERED
- def __init__(self, task_id, is_distributed, input_files, output_files, options, nproc, resources, chunk_key):
+ def __init__(self, task_id, is_distributed, input_files, output_files, options, nproc, resources, chunk_key, log_level="INFO"):
"""
The chunk key is used in the pluck specific chunk values from
PipelineChunks. This makes gather tasks (i.e., GffGather) generalized.
"""
- super(ResolvedGatherToolContractTask, self).__init__(task_id, is_distributed, input_files, output_files, options, nproc, resources)
+ super(ResolvedGatherToolContractTask, self).__init__(task_id, is_distributed, input_files, output_files, options, nproc, resources, log_level)
self.chunk_key = chunk_key
def to_dict(self):
diff --git a/pbcommand/pb_io/report.py b/pbcommand/pb_io/report.py
index dad7523..3037232 100644
--- a/pbcommand/pb_io/report.py
+++ b/pbcommand/pb_io/report.py
@@ -80,14 +80,21 @@ def _to_table(d):
def dict_to_report(dct):
- if '_version' in dct:
- version = dct['_version']
- if version not in SUPPORTED_VERSIONS:
- # should this raise an exception?
- log.warn("{v} is an unsupported version. Supported versions {vs}".format(v=version, vs=SUPPORTED_VERSIONS))
+ # MK. We'll revisit this at some point.
+ # if '_version' in dct:
+ # version = dct['_version']
+ # if version not in SUPPORTED_VERSIONS:
+ # # should this raise an exception?
+ # log.warn("{v} is an unsupported version. Supported versions {vs}".format(v=version, vs=SUPPORTED_VERSIONS))
report_id = dct['id']
+ # Legacy Reports > 0.3.9 will not have the title key
+ if 'title' in dct:
+ title = dct['title']
+ else:
+ title = "Report {i}".format(i=report_id)
+
plot_groups = []
if 'plotGroups' in dct:
pg = dct['plotGroups']
@@ -104,8 +111,12 @@ def dict_to_report(dct):
t = _to_table(table_d)
tables.append(t)
- report = Report(report_id, plotgroups=plot_groups, tables=tables,
- attributes=attributes)
+ report = Report(report_id,
+ title=title,
+ plotgroups=plot_groups,
+ tables=tables,
+ attributes=attributes,
+ dataset_uuids=dct.get('dataset_uuids', ()))
return report
@@ -117,3 +128,13 @@ def load_report_from_json(json_file):
d = json.loads(f.read())
r = dict_to_report(d)
return r
+
+
+def _to_report(nfiles, attribute_id, report_id):
+ # this should have version of the bax/bas files, chemistry
+ attributes = [Attribute(attribute_id, nfiles)]
+ return Report(report_id, attributes=attributes)
+
+
+def fofn_to_report(nfofns):
+ return _to_report(nfofns, "nfofns", "fofn_report")
diff --git a/pbcommand/pb_io/tool_contract_io.py b/pbcommand/pb_io/tool_contract_io.py
index dd31fc6..aa497df 100644
--- a/pbcommand/pb_io/tool_contract_io.py
+++ b/pbcommand/pb_io/tool_contract_io.py
@@ -97,10 +97,11 @@ def __core_resolved_tool_contract_task_from_d(d):
tool_options = _get("options")
# int
nproc = _get("nproc")
+ log_level = _get("log_level")
resource_types = [ToolContractResolvedResource.from_d(dx) for dx in _get("resources")]
- return tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types
+ return tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types, log_level
def __to_rtc_from_d(d):
@@ -114,31 +115,33 @@ def __to_rtc_from_d(d):
def _standard_resolved_tool_contract_from_d(d):
"""Load a 'Standard' CLI task type"""
- tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types = __core_resolved_tool_contract_task_from_d(d)
+ tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types, log_level = __core_resolved_tool_contract_task_from_d(d)
task = ResolvedToolContractTask(tool_contract_id, is_distributed,
input_files, output_files,
- tool_options, nproc, resource_types)
+ tool_options, nproc, resource_types,
+ log_level)
return __to_rtc_from_d(d)(task)
def _scatter_resolved_tool_contract_from_d(d):
"""Load a Gathered Tool Contract """
- tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types = __core_resolved_tool_contract_task_from_d(d)
+ tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types, log_level = __core_resolved_tool_contract_task_from_d(d)
max_nchunks = d[Constants.RTOOL][Constants.MAX_NCHUNKS]
chunk_keys = d[Constants.RTOOL][Constants.CHUNK_KEYS]
- task = ResolvedScatteredToolContractTask(tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types, max_nchunks, chunk_keys)
+ task = ResolvedScatteredToolContractTask(tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types, max_nchunks, chunk_keys, log_level=log_level)
return __to_rtc_from_d(d)(task)
def _gather_resolved_tool_contract_from_d(d):
- tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types = __core_resolved_tool_contract_task_from_d(d)
+ tool_contract_id, is_distributed, input_files, output_files, tool_options, nproc, resource_types, log_level = __core_resolved_tool_contract_task_from_d(d)
chunk_key = d[Constants.RTOOL][Constants.GATHER_CHUNK_KEY]
task = ResolvedGatherToolContractTask(tool_contract_id, is_distributed,
- input_files, output_files,
- tool_options, nproc, resource_types, chunk_key)
+ input_files, output_files,
+ tool_options, nproc, resource_types,
+ chunk_key, log_level=log_level)
return __to_rtc_from_d(d)(task)
@@ -241,7 +244,7 @@ def __to_tc_from_d(d):
@_json_path_or_d
def _standard_tool_contract_from(path_or_d):
- task_id, display_name, description, version, is_distributed, input_types, output_types, tool_options, nproc, resource_types = __core_tool_contract_task_from(path_or_d)
+ task_id, display_name, description, version, is_distributed, input_types, output_types, tool_options, nproc, resource_types = __core_tool_contract_task_from(path_or_d)
task = ToolContractTask(task_id, display_name, description, version,
is_distributed,
input_types,
diff --git a/pbcommand/resolver.py b/pbcommand/resolver.py
index 008f395..3cb899b 100644
--- a/pbcommand/resolver.py
+++ b/pbcommand/resolver.py
@@ -84,14 +84,7 @@ def _resolve_output_file(registry_d, file_type, output_file_type, root_output_di
# FIXME. THIS NEED TO BE FUNDAMENTALLY FIXED and updated to use the spec
# in the avro schema.
- if isinstance(output_file_type.default_name, basestring):
- a, b = os.path.splitext(output_file_type.default_name)
- return _get_fname(a, b.replace('.', ''))
- elif isinstance(output_file_type.default_name, (list, tuple)):
- base, ext = output_file_type.default_name
- return _get_fname(base, ext)
- else:
- return _get_fname(file_type.base_name, file_type.ext)
+ return _get_fname(output_file_type.default_name, file_type.ext)
def _resolve_resource_types(resources, output_dir, root_tmp_dir):
@@ -132,7 +125,7 @@ def _resolve_resource_types(resources, output_dir, root_tmp_dir):
def _resolve_output_files(output_file_types, root_output_dir):
# store the files as {(base, ext): count}
- _outs_registry = defaultdict(lambda : 0)
+ _outs_registry = defaultdict(lambda: 0)
return [_resolve_output_file(_outs_registry, REGISTERED_FILE_TYPES[f.file_type_id], f, root_output_dir) for f in output_file_types]
@@ -153,7 +146,7 @@ def _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_o
return output_files, resolved_options, nproc, resolved_resources
-def resolve_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options):
+def resolve_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, is_distributable, log_level="INFO"):
"""
Convert a ToolContract into a Resolved Tool Contract.
@@ -174,37 +167,42 @@ def resolve_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_
:return: A Resolved tool contract
"""
output_files, resolved_options, nproc, resources = _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_options, root_tmp_dir)
+ is_distributed = False
+ if is_distributable and tool_contract.task.is_distributed:
+ is_distributed = True
task = ResolvedToolContractTask(tool_contract.task.task_id,
- tool_contract.task.is_distributed,
+ is_distributed,
input_files,
output_files,
resolved_options,
nproc,
- resources)
+ resources,
+ log_level=log_level)
return ResolvedToolContract(task, tool_contract.driver)
-def resolve_scatter_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, max_nchunks, chunk_keys):
+def resolve_scatter_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, max_nchunks, chunk_keys, is_distributable, log_level="INFO"):
output_files, resolved_options, nproc, resources = _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_options, tmp_dir=root_tmp_dir)
resolved_max_chunks = _resolve_max_nchunks(tool_contract.task.max_nchunks, max_nchunks)
task = ResolvedScatteredToolContractTask(tool_contract.task.task_id,
- tool_contract.task.is_distributed,
+ tool_contract.task.is_distributed and is_distributable,
input_files,
output_files,
resolved_options,
nproc,
- resources, resolved_max_chunks, chunk_keys)
+ resources, resolved_max_chunks, chunk_keys, log_level=log_level)
return ResolvedToolContract(task, tool_contract.driver)
-def resolve_gather_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, chunk_key):
+def resolve_gather_tool_contract(tool_contract, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, chunk_key, is_distributable, log_level="INFO"):
output_files, resolved_options, nproc, resources = _resolve_core(tool_contract, input_files, root_output_dir, max_nproc, tool_options, tmp_dir=root_tmp_dir)
task = ResolvedGatherToolContractTask(tool_contract.task.task_id,
- tool_contract.task.is_distributed,
+ tool_contract.task.is_distributed and is_distributable,
input_files,
output_files,
resolved_options,
nproc,
- resources, chunk_key)
+ resources, chunk_key,
+ log_level=log_level)
return ResolvedToolContract(task, tool_contract.driver)
diff --git a/pbcommand/schemas/__init__.py b/pbcommand/schemas/__init__.py
index 5f238df..46f9046 100644
--- a/pbcommand/schemas/__init__.py
+++ b/pbcommand/schemas/__init__.py
@@ -33,4 +33,4 @@ def _validate(schema, d):
validate_rtc = functools.partial(_validate, RTC_SCHEMA)
validate_pbreport = functools.partial(_validate, PBREPORT_SCHEMA)
-validate_tc = functools.partial(_validate, TC_SCHEMA)
\ No newline at end of file
+validate_tc = functools.partial(_validate, TC_SCHEMA)
diff --git a/pbcommand/schemas/resolved_tool_contract.avsc b/pbcommand/schemas/resolved_tool_contract.avsc
index 92dc8ab..9d3c47f 100644
--- a/pbcommand/schemas/resolved_tool_contract.avsc
+++ b/pbcommand/schemas/resolved_tool_contract.avsc
@@ -52,6 +52,10 @@
"type": "string"
},
{
+ "name": "log_level",
+ "type": "string"
+ },
+ {
"name": "resources",
"type": {
"type": "array",
@@ -77,4 +81,4 @@
}
}
]
-}
\ No newline at end of file
+}
diff --git a/pbcommand/services/__init__.py b/pbcommand/services/__init__.py
new file mode 100644
index 0000000..4a64fca
--- /dev/null
+++ b/pbcommand/services/__init__.py
@@ -0,0 +1,4 @@
+from .service_access_layer import ServiceAccessLayer
+from .models import (JobExeError, JobResult, LogLevels,
+ ServiceResourceTypes, JobTypes, JobStates,
+ ServiceJob, ServiceEntryPoint)
diff --git a/pbcommand/services/cli.py b/pbcommand/services/cli.py
new file mode 100644
index 0000000..e70aa31
--- /dev/null
+++ b/pbcommand/services/cli.py
@@ -0,0 +1,505 @@
+"""CLI for interacting with the PacBio Services
+
+0.1.0 Version, Import/Convert datasets
+
+pbservice import-dataset # dir or XML file
+pbservice import-rs-movie # dir or XML file (Requires 'movie-to-dataset' exe)
+pbservice import-ref-info # dir or XML file (Requires 'reference-to-dataset' exe)
+pbservice import-fasta /path/to/file.fasta --name my-name --organism my-org --ploidy haploid
+
+0.2.0 Version, Jobs Support, leveraging
+
+pbservice run-analysis path/to/file.json
+pbservice run-merge-dataset path/to/file.json
+
+
+"""
+import argparse
+import json
+
+import os
+import pprint
+import sys
+import logging
+import functools
+import time
+import tempfile
+import traceback
+import uuid
+from requests import RequestException
+
+from pbcommand.cli import get_default_argparser_with_base_opts
+from pbcommand.models import FileTypes
+from pbcommand.services import (ServiceAccessLayer,
+ ServiceEntryPoint,
+ JobExeError)
+from pbcommand.validators import validate_file, validate_or
+from pbcommand.common_options import add_common_options
+from pbcommand.utils import (is_dataset,
+ walker, setup_log, compose, setup_logger,
+ get_parsed_args_log_level)
+
+from .utils import to_ascii
+
+__version__ = "0.2.0"
+
+log = logging.getLogger(__name__)
+log.addHandler(logging.NullHandler()) # suppress warning message
+
+
+_LOG_FORMAT = '[%(levelname)s] %(asctime)-15s %(message)s'
+
+
+class Constants(object):
+ FASTA_TO_REFERENCE = "fasta-to-reference"
+ RS_MOVIE_TO_DS = "movie-metadata-to-dataset"
+
+ # Currently only small-ish files are supported, users should
+ # use fasta-to-reference offline and import the reference set
+ MAX_FASTA_FILE_MB = 100
+
+
+def _is_xml(path):
+ return path.endswith(".xml")
+
+
+def validate_xml_file_or_dir(path):
+ px = os.path.abspath(os.path.expanduser(path))
+ if os.path.isdir(px):
+ return px
+ elif os.path.isfile(px) and _is_xml(px):
+ return px
+ else:
+ raise argparse.ArgumentTypeError("Expected dir or file '{p}'".format(p=path))
+
+
+def _get_size_mb(path):
+ return os.stat(path).st_size / 1024.0 / 1024.0
+
+
+def validate_file_and_size(max_size_mb):
+ def _wrapper(path):
+ p = validate_file(path)
+ sx = _get_size_mb(path)
+ if sx > max_size_mb:
+ raise argparse.ArgumentTypeError("Fasta file is too large {s:.2f} MB > {m:.2f} MB. Create a ReferenceSet using {e}, then import using `pbservice import-dataset /path/to/referenceset.xml` ".format(e=Constants.FASTA_TO_REFERENCE, s=sx, m=Constants.MAX_FASTA_FILE_MB))
+ else:
+ return p
+ return _wrapper
+
+
+validate_max_fasta_file_size = validate_file_and_size(Constants.MAX_FASTA_FILE_MB)
+
+
+def add_block_option(p):
+ p.add_argument('--block', action='store_true', default=False,
+ help="Block during importing process")
+ return p
+
+
+def add_sal_options(p):
+ p.add_argument('--host', type=str,
+ default="http://localhost", help="Server host")
+ p.add_argument('--port', type=int, default=8070, help="Server Port")
+ return p
+
+
+def add_base_and_sal_options(p):
+ fx = [add_common_options, add_sal_options]
+ f = compose(*fx)
+ return f(p)
+
+
+def add_xml_or_dir_option(p):
+ p.add_argument('xml_or_dir', type=validate_xml_file_or_dir, help="Directory or XML file.")
+ return p
+
+
+def add_sal_and_xml_dir_options(p):
+ fx = [add_common_options,
+ add_sal_options,
+ add_xml_or_dir_option]
+ f = compose(*fx)
+ return f(p)
+
+
+def get_sal_and_status(host, port):
+ """Get Sal or Raise if status isn't successful"""
+ try:
+ sal = ServiceAccessLayer(host, port)
+ sal.get_status()
+ return sal
+ except RequestException as e:
+ log.error("Failed to connect to {h}:{p}".format(h=host, p=port))
+ raise
+
+
+def run_file_or_dir(file_func, dir_func, xml_or_dir):
+ if os.path.isdir(xml_or_dir):
+ return dir_func(xml_or_dir)
+ elif os.path.isfile(xml_or_dir):
+ return file_func(xml_or_dir)
+ else:
+ raise ValueError("Unsupported value {x}".format(x=xml_or_dir))
+
+
+def is_xml_dataset(path):
+ if _is_xml(path):
+ if is_dataset(path):
+ return True
+ return False
+
+
+def dataset_walker(root_dir):
+ filter_func = is_xml_dataset
+ return walker(root_dir, filter_func)
+
+
+def import_local_dataset(sal, path):
+ """:type sal: ServiceAccessLayer"""
+ # XXX basic validation of external resources
+ try:
+ from pbcore.io import openDataSet, ReadSet, HdfSubreadSet
+ except ImportError:
+ log.warn("Can't import pbcore, skipping dataset sanity check")
+ else:
+ ds = openDataSet(path, strict=True)
+ if isinstance(ds, ReadSet) and not isinstance(ds, HdfSubreadSet):
+ log.info("checking BAM file integrity")
+ for rr in ds.resourceReaders():
+ try:
+ last_record = rr[-1]
+ except Exception as e:
+ log.exception("Import failed because the underlying "+
+ "data appear to be corrupted. Run "+
+ "'pbvalidate' on the dataset for more "+
+ "thorough checking.")
+ return 1
+ # this will raise if the import wasn't successful
+ _ = sal.run_import_local_dataset(path)
+ log.info("Successfully import dataset from {f}".format(f=path))
+ return 0
+
+
+def import_datasets(sal, root_dir):
+ # FIXME. Need to add a flag to keep importing even if an import fails
+ rcodes = []
+ for path in dataset_walker(root_dir):
+ try:
+ import_local_dataset(sal, path)
+ rcodes.append(0)
+ except Exception as e:
+ log.error("Failed to import dataset {e}".format(e=e))
+ rcodes.append(1)
+
+ state = all(v == 0 for v in rcodes)
+ return 0 if state else 1
+
+
+def run_import_local_datasets(host, port, xml_or_dir):
+ sal = ServiceAccessLayer(host, port)
+ file_func = functools.partial(import_local_dataset, sal)
+ dir_func = functools.partial(import_datasets, sal)
+ return run_file_or_dir(file_func, dir_func, xml_or_dir)
+
+
+def args_runner_import_datasets(args):
+ return run_import_local_datasets(args.host, args.port, args.xml_or_dir)
+
+
+def add_import_fasta_opts(p):
+ px = p.add_argument
+ px('fasta_path', type=validate_max_fasta_file_size, help="Path to Fasta File")
+ px('--name', required=True, type=str, help="Name of ReferenceSet")
+ px('--organism', required=True, type=str, help="Organism")
+ px('--ploidy', required=True, type=str, help="Ploidy")
+ add_block_option(p)
+ add_sal_options(p)
+ add_common_options(p)
+ return p
+
+
+def run_import_fasta(host, port, fasta_path, name, organism, ploidy, block=False):
+ sal = ServiceAccessLayer(host, port)
+ log.info("importing ({s:.2f} MB) {f} ".format(s=_get_size_mb(fasta_path), f=fasta_path))
+ if block is True:
+ result = sal.run_import_fasta(fasta_path, name, organism, ploidy)
+ log.info("Successfully imported {f}".format(f=fasta_path))
+ log.info("result {r}".format(r=result))
+ else:
+ sal.import_fasta(fasta_path, name, organism, ploidy)
+
+ return 0
+
+
+def args_run_import_fasta(args):
+ log.debug(args)
+ return run_import_fasta(args.host, args.port, args.fasta_path,
+ args.name, args.organism, args.ploidy, block=args.block)
+
+
+def load_analysis_job_json(d):
+ """Translate a dict to args for scenario runner inputs"""
+ job_name = to_ascii(d['name'])
+ pipeline_template_id = to_ascii(d["pipelineId"])
+ service_epoints = [ServiceEntryPoint.from_d(x) for x in d['entryPoints']]
+ return job_name, pipeline_template_id, service_epoints
+
+
+def _validate_analysis_job_json(path):
+ px = validate_file(path)
+ with open(px, 'r') as f:
+ d = json.loads(f.read())
+
+ try:
+ load_analysis_job_json(d)
+ return px
+ except (KeyError, TypeError, ValueError) as e:
+ raise argparse.ArgumentTypeError("Invalid analysis.json format for '{p}' {e}".format(p=px, e=repr(e)))
+
+
+def add_run_analysis_job_opts(p):
+ p.add_argument('json_path', type=_validate_analysis_job_json, help="Path to analysis.json file")
+ add_sal_options(p)
+ add_common_options(p)
+ add_block_option(p)
+ return
+
+
+def run_analysis_job(sal, job_name, pipeline_id, service_entry_points, block=False, time_out=None, task_options=()):
+ """Run analysis (pbsmrtpipe) job
+
+ :rtype ServiceJob:
+ """
+ if time_out is None:
+ time_out = sal.JOB_DEFAULT_TIMEOUT
+ status = sal.get_status()
+ log.info("Status {x}".format(x=status['message']))
+
+ resolved_service_entry_points = []
+ for service_entry_point in service_entry_points:
+ # Always lookup/resolve the dataset by looking up the id
+ ds = sal.get_dataset_by_uuid(service_entry_point.resource)
+ if ds is None:
+ raise ValueError("Failed to find DataSet with id {r} {s}".format(s=service_entry_point, r=service_entry_point.resource))
+
+ dataset_id = ds['id']
+ ep = ServiceEntryPoint(service_entry_point.entry_id, service_entry_point.dataset_type, dataset_id)
+ log.debug("Resolved dataset {e}".format(e=ep))
+ resolved_service_entry_points.append(ep)
+
+ if block:
+ job_result = sal.run_by_pipeline_template_id(job_name, pipeline_id, resolved_service_entry_points, time_out=time_out, task_options=task_options)
+ job_id = job_result.job.id
+ # service job
+ result = sal.get_analysis_job_by_id(job_id)
+ if not result.was_successful():
+ raise JobExeError("Job {i} failed".format(i=job_id))
+ else:
+ # service job or error
+ result = sal.create_by_pipeline_template_id(job_name, pipeline_id, resolved_service_entry_points)
+
+ log.info("Result {r}".format(r=result))
+ return result
+
+
+def args_run_analysis_job(args):
+ log.debug(args)
+ with open(args.json_path, 'r') as f:
+ d = json.loads(f.read())
+
+ log.debug("Loaded \n" + pprint.pformat(d))
+ job_name, pipeline_id, service_entry_points = load_analysis_job_json(d)
+
+ sal = ServiceAccessLayer(args.host, args.port)
+ # this should raise if there's a failure
+ result = run_analysis_job(sal, job_name, pipeline_id, service_entry_points, block=args.block)
+ return 0
+
+
+def args_emit_analysis_template(args):
+ ep1 = ServiceEntryPoint("eid_ref_dataset", FileTypes.DS_REF.file_type_id, 1)
+ ep1_d = ep1.to_d()
+ ep1_d['_comment'] = "datasetId can be provided as the DataSet UUID or Int. The entryId(s) can be obtained by running 'pbsmrtpipe show-pipeline-templates {PIPELINE-ID}'"
+ d = dict(name="Job name",
+ pipelineId="pbsmrtpipe.pipelines.dev_diagnostic",
+ entryPoints=[ep1_d],
+ taskOptions=[],
+ workflowOptions=[])
+
+ sx = json.dumps(d, sort_keys=True, indent=4)
+ print sx
+
+ return 0
+
+
+def args_get_sal_summary(args):
+
+ host = args.host
+ port = args.port
+
+ sal = ServiceAccessLayer(host, port)
+
+ print sal.to_summary()
+
+ return 0
+
+
+def add_get_job_options(p):
+ add_base_and_sal_options(p)
+ p.add_argument("job_id", type=int, help="Job id")
+ return p
+
+
+def run_get_job_summary(host, port, job_id):
+ sal = get_sal_and_status(host, port)
+ job = sal.get_job_by_id(job_id)
+
+ if job is None:
+ log.error("Unable to find job {i} from {u}".format(i=job_id, u=sal.uri))
+ else:
+ print job
+
+ return 0
+
+
+def args_get_job_summary(args):
+ return run_get_job_summary(args.host, args.port, args.job_id)
+
+validate_int_or_uuid = validate_or(int, uuid.UUID, "Expected Int or UUID")
+
+
+def add_get_dataset_options(p):
+ add_base_and_sal_options(p)
+ p.add_argument('id_or_uuid', type=validate_int_or_uuid, help="DataSet Id or UUID")
+ return p
+
+
+def run_get_dataset_summary(host, port, dataset_id_or_uuid):
+
+ sal = get_sal_and_status(host, port)
+
+ ds = sal.get_dataset_by_uuid(dataset_id_or_uuid)
+
+ if ds is None:
+ log.info("Unable to find DataSet '{i}' on {u}".format(i=dataset_id_or_uuid, u=sal.uri))
+ else:
+ print ds
+
+ return 0
+
+
+def args_run_dataset_summary(args):
+ return run_get_dataset_summary(args.host, args.port, args.id_or_uuid)
+
+
+def subparser_builder(subparser, subparser_id, description, options_func, exe_func):
+ """
+ Util to add subparser options
+
+ :param subparser:
+ :param subparser_id:
+ :param description:
+ :param options_func: Function that will add args and options to Parser instance F(subparser) -> None
+ :param exe_func: Function to run F(args) -> Int
+ :return:
+ """
+ p = subparser.add_parser(subparser_id, help=description,
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ options_func(p)
+ p.set_defaults(func=exe_func)
+ return p
+
+
+def get_parser():
+ desc = "Tool to import datasets, convert/import fasta file and run analysis jobs"
+ p = get_default_argparser_with_base_opts(__version__, desc)
+
+ sp = p.add_subparsers(help='commands')
+
+ def builder(subparser_id, description, options_func, exe_func):
+ subparser_builder(sp, subparser_id, description, options_func, exe_func)
+
+ status_desc = "Get System Status, DataSet and Job Summary"
+ builder('status', status_desc, add_base_and_sal_options, args_get_sal_summary)
+
+ local_desc = " The file location must be accessible from the host where the Services are running (often on a shared file system)"
+ ds_desc = "Import Local DataSet XML." + local_desc
+ builder('import-dataset', ds_desc, add_sal_and_xml_dir_options, args_runner_import_datasets)
+
+ fasta_desc = "Import Fasta (and convert to ReferenceSet)." + local_desc
+ builder("import-fasta", fasta_desc, add_import_fasta_opts, args_run_import_fasta)
+
+ run_analysis_desc = "Run Secondary Analysis Pipeline using an analysis.json"
+ builder("run-analysis", run_analysis_desc, add_run_analysis_job_opts, args_run_analysis_job)
+
+ emit_analysis_json_desc = "Emit an analysis.json Template to stdout that can be run using 'run-analysis'"
+ builder("emit-analysis-template", emit_analysis_json_desc, add_common_options, args_emit_analysis_template)
+
+ # Get Summary Job by Id
+ job_summary_desc = "Get Job Summary by Job Id"
+ builder('get-job', job_summary_desc, add_get_job_options, args_get_job_summary)
+
+ ds_summary_desc = "Get DataSet Summary by DataSet Id or UUID"
+ builder('get-dataset', ds_summary_desc, add_get_dataset_options, args_run_dataset_summary)
+
+ return p
+
+
+def args_executer(args):
+ """
+ This is pulled from pbsmrtpipe that uses the goofy func=my_runner_func,
+ which will be called using args.func(args)
+
+ :rtype int
+ """
+ try:
+
+ return_code = args.func(args)
+ except Exception as e:
+ if isinstance(e, RequestException):
+ # make this terse so there's not a useless stacktrace
+ emsg = "Failed to connect to SmrtServer {e}".format(e=repr(e.__class__.__name__))
+ log.error(emsg)
+ return_code = 3
+ elif isinstance(e, IOError):
+ log.error(e, exc_info=True)
+ traceback.print_exc(sys.stderr)
+ return_code = 1
+ else:
+ log.error(e, exc_info=True)
+ traceback.print_exc(sys.stderr)
+ return_code = 2
+
+ return return_code
+
+
+def main_runner(argv, parser, exe_runner_func,
+ level=logging.DEBUG, str_formatter=_LOG_FORMAT):
+ """
+ Fundamental interface to commandline applications
+ """
+ started_at = time.time()
+ args = parser.parse_args(argv)
+
+ level = get_parsed_args_log_level(args, default_level=logging.DEBUG)
+ console_or_file = args.log_file
+ setup_logger(console_or_file, level, formatter=str_formatter)
+
+ log.debug(args)
+ log.info("Starting tool version {v}".format(v=parser.version))
+
+ rcode = exe_runner_func(args)
+
+ run_time = time.time() - started_at
+ _d = dict(r=rcode, s=run_time)
+ log.info("exiting with return code {r} in {s:.2f} sec.".format(**_d))
+ return rcode
+
+
+def main(argv=None):
+
+ argv_ = sys.argv if argv is None else argv
+ parser = get_parser()
+
+ return main_runner(argv_[1:], parser, args_executer)
diff --git a/pbcommand/services/models.py b/pbcommand/services/models.py
new file mode 100644
index 0000000..300e0ad
--- /dev/null
+++ b/pbcommand/services/models.py
@@ -0,0 +1,163 @@
+"""Services Specific Data Models"""
+from collections import namedtuple
+import uuid
+
+import iso8601
+
+from requests.exceptions import RequestException
+
+
+def to_ascii(s):
+ return s.encode('ascii', 'ignore')
+
+
+# This are mirrored from the BaseSMRTServer
+class LogLevels(object):
+ TRACE = "TRACE"
+ DEBUG = "DEBUG"
+ INFO = "INFO"
+ NOTICE = "NOTICE"
+ WARN = "WARN"
+ ERROR = "ERROR"
+ CRITICAL = "CRITICAL"
+ FATAL = "FATAL"
+
+ ALL = (TRACE, DEBUG, INFO, NOTICE, WARN, ERROR, CRITICAL, FATAL)
+
+ @classmethod
+ def is_valid(cls, level):
+ return level in cls.ALL
+
+
+SERVICE_LOGGER_RESOURCE_ID = "pbsmrtpipe"
+
+LogResource = namedtuple("LogResource", "id name description")
+LogMessage = namedtuple("LogMessage", "sourceId level message")
+
+PbsmrtpipeLogResource = LogResource(SERVICE_LOGGER_RESOURCE_ID, "Pbsmrtpipe",
+ "Secondary Analysis Pbsmrtpipe Job logger")
+
+
+class ServiceJob(namedtuple("ServiceJob", 'id uuid name state path job_type created_at')):
+
+ @staticmethod
+ def from_d(d):
+ def sx(x):
+ return d[x]
+
+ def se(x):
+ return sx(x).encode('ascii', 'ignore')
+
+ def to_t(x):
+ return iso8601.parse_date(se(x))
+
+ return ServiceJob(sx('id'), sx('uuid'), se('name'), se('state'),
+ se('path'), se('jobTypeId'), to_t('createdAt'))
+
+ def was_successful(self):
+ return self.state == JobStates.SUCCESSFUL
+
+
+class JobExeError(ValueError):
+ """Service Job failed to complete successfully"""
+ pass
+
+
+class SmrtServerConnectionError(RequestException):
+ """This is blunt to catch all status related errors"""
+ pass
+
+
+class SMRTServiceBaseError(Exception):
+ """Fundamental Error datastructure in SMRT Server"""
+
+ def __init__(self, http_code, error_type, message, **kwargs):
+ self.http_code = http_code
+ self.error_type = error_type
+ self.msg = message
+ message = "Http code={h} msg={m} type={t}".format(h=http_code, m=message, t=error_type)
+ super(Exception, self).__init__(message)
+
+ @staticmethod
+ def from_d(d):
+ return SMRTServiceBaseError(d['httpCode'], d['errorType'], d['message'])
+
+
+# "Job" is the raw output from the jobs/1234
+JobResult = namedtuple("JobResult", "job run_time errors")
+
+
+def _to_resource_id(x):
+ if isinstance(x, int):
+ return x
+ try:
+ _ = uuid.UUID(x)
+ return x
+ except ValueError as e:
+ raise ValueError("Resource id '{x}' must be given as int or uuid".format(x=x))
+
+
+class ServiceEntryPoint(object):
+ """Entry Points to initialize Pipelines"""
+
+ def __init__(self, entry_id, dataset_type, path_or_uri):
+ self.entry_id = entry_id
+ self.dataset_type = dataset_type
+ # int (only supported), UUID or path to XML dataset will be added
+ self._resource = path_or_uri
+
+ @property
+ def resource(self):
+ return self._resource
+
+ def __repr__(self):
+ return "<{k} {e} {d} {r} >".format(k=self.__class__.__name__, e=self.entry_id, r=self._resource, d=self.dataset_type)
+
+ @staticmethod
+ def from_d(d):
+ i = _to_resource_id(d['datasetId'])
+ return ServiceEntryPoint(to_ascii(d['entryId']), to_ascii(d['fileTypeId']), i)
+
+ def to_d(self):
+ return dict(entryId=self.entry_id,
+ fileTypeId=self.dataset_type,
+ datasetId=self.resource)
+
+
+class JobEntryPoint(namedtuple("JobEntryPoint", "job_id dataset_uuid dataset_metatype")):
+ """ Returned from the Services /job/1234/entry-points """
+ @staticmethod
+ def from_d(d):
+ return JobEntryPoint(d['jobId'], d['datasetUUID'], d['datasetType'])
+
+
+class JobStates(object):
+ RUNNING = "RUNNING"
+ CREATED = "CREATED"
+ FAILED = "FAILED"
+ SUCCESSFUL = "SUCCESSFUL"
+
+ ALL = (RUNNING, CREATED, FAILED)
+
+ # End points
+ ALL_COMPLETED = (FAILED, SUCCESSFUL)
+
+
+class JobTypes(object):
+ IMPORT_DS = "import-dataset"
+ IMPORT_DSTORE = "import-datastore"
+ MERGE_DS = "merge-datasets"
+ PB_PIPE = "pbsmrtpipe"
+ MOCK_PB_PIPE = "mock-pbsmrtpipe"
+ CONVERT_FASTA = 'convert-fasta-reference'
+
+ @classmethod
+ def ALL(cls):
+ return (cls.IMPORT_DS, cls.IMPORT_DSTORE, cls.MERGE_DS,
+ cls.PB_PIPE, cls.MOCK_PB_PIPE, cls.CONVERT_FASTA)
+
+
+class ServiceResourceTypes(object):
+ REPORTS = "reports"
+ DATASTORE = "datastore"
+ ENTRY_POINTS = "entry-points"
diff --git a/pbcommand/services/service_access_layer.py b/pbcommand/services/service_access_layer.py
new file mode 100644
index 0000000..e8e59fb
--- /dev/null
+++ b/pbcommand/services/service_access_layer.py
@@ -0,0 +1,585 @@
+"""Utils for Updating state/progress and results to WebServices
+
+
+"""
+import json
+import logging
+import pprint
+import time
+
+import requests
+from requests import RequestException
+
+from pbcommand.models import (FileTypes,
+ DataSetFileType,
+ DataStore,
+ DataStoreFile)
+from pbcommand.utils import get_dataset_metadata
+
+from .models import (SMRTServiceBaseError,
+ JobResult, JobStates, JobExeError, JobTypes,
+ LogLevels, ServiceEntryPoint,
+ ServiceResourceTypes, ServiceJob, JobEntryPoint)
+
+from .utils import to_ascii, to_sal_summary
+
+log = logging.getLogger(__name__)
+#log.addHandler(logging.NullHandler()) # to prevent the annoying 'No handlers .. ' msg
+
+
+class Constants(object):
+ HEADERS = {'Content-type': 'application/json'}
+
+
+def _post_requests(headers):
+ def wrapper(url, d_):
+ data = json.dumps(d_)
+ return requests.post(url, data=data, headers=headers)
+
+ return wrapper
+
+
+def _get_requests(headers):
+ def wrapper(url):
+ return requests.get(url, headers=headers)
+
+ return wrapper
+
+# These are exposed publicly as a utility, but shouldn't be used in any API
+# call. The _process_* are the entry points for API calls to make sure an
+# errors are handled correctly.
+rqpost = _post_requests(Constants.HEADERS)
+rqget = _get_requests(Constants.HEADERS)
+
+
+def _parse_base_service_error(response):
+ """:type response: requests.Response
+
+ Don't trust the services. Try to parse the response to SMRT Server Error
+ datastructure (even if a 200 is returned)
+ """
+ if response.ok:
+ try:
+ d = response.json()
+ emsg = SMRTServiceBaseError.from_d(d)
+ raise emsg
+ except (KeyError, TypeError):
+ # couldn't parse response -> error,
+ # so everything is fine
+ return response
+ else:
+ return response
+
+
+def _process_rget(total_url):
+ """Process get request and return JSON response. Raise if not successful"""
+ r = rqget(total_url)
+ _parse_base_service_error(r)
+ if not r.ok:
+ log.error("Failed ({s}) GET to {x}".format(x=total_url, s=r.status_code))
+ r.raise_for_status()
+ j = r.json()
+ return j
+
+
+def _process_rget_with_transform(func):
+ """Post process the JSON result (if successful) with F(json_d) -> T"""
+ def wrapper(total_url):
+ j = _process_rget(total_url)
+ return func(j)
+ return wrapper
+
+
+def _process_rget_with_jobs_transform(total_url):
+ # defining an internal method, because this used in several places
+ jobs_d = _process_rget(total_url)
+ return [ServiceJob.from_d(job_d) for job_d in jobs_d]
+
+
+def _process_rget_or_none(func):
+ """
+ apply the transform func to the output of GET request if it was successful, else returns None
+
+ This is intended to be used for looking up Results by Id where the a 404
+ is found.
+ """
+ def wrapper(total_url):
+ try:
+ return _process_rget_with_transform(func)(total_url)
+ except (RequestException, SMRTServiceBaseError):
+ # FIXME
+ # this should be a tighter exception case
+ # only look for 404
+ return None
+
+ return wrapper
+
+
+def _process_rget_with_job_transform_or_none(total_url):
+ return _process_rget_or_none(ServiceJob.from_d)(total_url)
+
+
+def _process_rpost(total_url, payload_d):
+ r = rqpost(total_url, payload_d)
+ _parse_base_service_error(r)
+ # FIXME This should be strict to only return a 201
+ if r.status_code not in (200, 201):
+ log.error("Failed ({s} to call {u}".format(u=total_url, s=r.status_code))
+ log.error("payload")
+ log.error("\n" + pprint.pformat(payload_d))
+ r.raise_for_status()
+ j = r.json()
+ return j
+
+
+def _process_rpost_with_transform(func):
+ def wrapper(total_url, payload_d):
+ j = _process_rpost(total_url, payload_d)
+ return func(j)
+ return wrapper
+
+
+def _to_url(base, ext):
+ return "".join([base, ext])
+
+
+def _null_func(x):
+ # Pass thorough func
+ return x
+
+
+def _import_dataset_by_type(dataset_type_or_id):
+
+ if isinstance(dataset_type_or_id, DataSetFileType):
+ ds_type_id = dataset_type_or_id.file_type_id
+ else:
+ ds_type_id = dataset_type_or_id
+
+ def wrapper(total_url, path):
+ _d = dict(datasetType=ds_type_id, path=path)
+ return _process_rpost_with_transform(ServiceJob.from_d)(total_url, _d)
+
+ return wrapper
+
+
+def _get_job_by_id_or_raise(sal, job_id, error_klass, error_messge_extras=None):
+ job = sal.get_job_by_id(job_id)
+
+ if job is None:
+ details = "" if error_messge_extras is None else error_messge_extras
+ base_msg = "Failed to find job {i}".format(i=job_id)
+ emsg = " ".join([base_msg, details])
+ raise error_klass(emsg)
+
+ return job
+
+
+def _block_for_job_to_complete(sal, job_id, time_out=600, sleep_time=2):
+ """
+ Waits for job to complete
+
+ :param sal: ServiceAccessLayer
+ :param job_id: Job Id
+ :param time_out: Total runtime before aborting
+ :param sleep_time: polling interval (in sec)
+
+ :rtype: JobResult
+ :raises: KeyError if job is not initially found, or JobExeError
+ if the job fails during the polling process or times out
+ """
+
+ time.sleep(sleep_time)
+ job = _get_job_by_id_or_raise(sal, job_id, KeyError)
+
+ log.debug("time_out = {t}".format(t=time_out))
+
+ error_msg = ""
+ job_result = JobResult(job, 0, error_msg)
+ started_at = time.time()
+
+ # number of polling steps
+ i = 0
+ while True:
+ run_time = time.time() - started_at
+
+ if job.state in JobStates.ALL_COMPLETED:
+ break
+
+ i += 1
+ time.sleep(sleep_time)
+
+ msg = "Running pipeline {n} state: {s} runtime:{r:.2f} sec {i} iteration".format(n=job.name, s=job.state, r=run_time, i=i)
+ log.debug(msg)
+ # making the exceptions different to distinguish between an initial
+ # error and a "polling" error. Adding some msg details
+ job = _get_job_by_id_or_raise(sal, job_id, JobExeError, error_messge_extras=msg)
+
+ # FIXME, there's currently not a good way to get errors for jobs
+ job_result = JobResult(job, run_time, "")
+ if time_out is not None:
+ if run_time > time_out:
+ raise JobExeError("Exceeded runtime {r} of {t}. {m}".format(r=run_time, t=time_out, m=msg))
+
+ return job_result
+
+# Make this consistent somehow. Maybe defined 'shortname' in the core model?
+# Martin is doing this for the XML file names
+DATASET_METATYPES_TO_ENDPOINTS = {
+ FileTypes.DS_SUBREADS_H5: "hdfsubreads",
+ FileTypes.DS_SUBREADS: "subreads",
+ FileTypes.DS_ALIGN: "alignments",
+ FileTypes.DS_REF: "references",
+ FileTypes.DS_BARCODE: "barcodes",
+ FileTypes.DS_CCS: "ccsreads",
+ FileTypes.DS_CONTIG: "contigs",
+ FileTypes.DS_ALIGN_CCS: "css-alignments"}
+
+
+def _get_endpoint_or_raise(ds_type):
+ if ds_type in DATASET_METATYPES_TO_ENDPOINTS:
+ return DATASET_METATYPES_TO_ENDPOINTS[ds_type]
+ raise KeyError("Unsupported datasettype {t}. Supported values {v}".format(t=ds_type, v=DATASET_METATYPES_TO_ENDPOINTS.keys()))
+
+
+def _job_id_or_error(job_or_error, custom_err_msg=None):
+ """
+ Extract job id from job creation service (by type)
+ or Raise exception from an EngineJob response
+
+ :raises: JobExeError
+ """
+ if isinstance(job_or_error, ServiceJob):
+ return job_or_error.id
+ else:
+ emsg = job_or_error.get('message', "Unknown")
+ if custom_err_msg is not None:
+ emsg += " {f}".format(f=custom_err_msg)
+ raise JobExeError("Failed to create job. {e}. Raw Response {x}".format(e=emsg, x=job_or_error))
+
+
+def _to_host(h):
+ prefix = "http://"
+ return h if h.startswith(prefix) else prefix + h
+
+
+def _to_ds_file(d):
+ # is_chunk this isn't exposed at the service level
+ return DataStoreFile(d['uuid'], d['sourceId'], d['fileTypeId'], d['path'], is_chunked=False, name=d.get("name", ""), description=d.get("description", ""))
+
+
+def _to_datastore(dx):
+ # Friction to get around service endpoint not returning a list of files
+ ds_files = [_to_ds_file(d) for d in dx]
+ return DataStore(ds_files)
+
+
+def _to_entry_points(d):
+ return [JobEntryPoint.from_d(i) for i in d]
+
+
+class ServiceAccessLayer(object):
+ """General Access Layer for interfacing with the job types on Secondary SMRT Server"""
+
+ ROOT_JM = "/secondary-analysis/job-manager"
+ ROOT_JOBS = ROOT_JM + "/jobs"
+ ROOT_DS = "/secondary-analysis/datasets"
+ ROOT_PT = '/secondary-analysis/resolved-pipeline-templates'
+
+ # in sec when blocking to run a job
+ JOB_DEFAULT_TIMEOUT = 60 * 30
+
+ def __init__(self, base_url, port, debug=False, sleep_time=2):
+ self.base_url = _to_host(base_url)
+ self.port = port
+ # This will display verbose details with respect to the failed request
+ self.debug = debug
+ self._sleep_time = sleep_time
+
+ @property
+ def uri(self):
+ return "{b}:{u}".format(b=self.base_url, u=self.port)
+
+ def _to_url(self, rest):
+ return _to_url(self.uri, rest)
+
+ def __repr__(self):
+ return "<{k} {u} >".format(k=self.__class__.__name__, u=self.uri)
+
+ def to_summary(self):
+ return to_sal_summary(self)
+
+ def get_status(self):
+ """Get status of the server"""
+ return _process_rget(_to_url(self.uri, "/status"))
+
+ def get_job_by_type_and_id(self, job_type, job_id):
+ return _process_rget_with_job_transform_or_none(_to_url(self.uri, "{p}/{t}/{i}".format(i=job_id, t=job_type, p=ServiceAccessLayer.ROOT_JOBS)))
+
+ def get_job_by_id(self, job_id):
+ """Get a Job by int id"""
+ # FIXME. Make this an internal method It's ambiguous which job type type you're asking for
+ return _process_rget_with_job_transform_or_none(_to_url(self.uri, "{r}/{i}".format(i=job_id, r=ServiceAccessLayer.ROOT_JOBS)))
+
+ def _get_job_resource_type(self, job_type, job_id, resource_type_id):
+ # grab the datastore or the reports
+ _d = dict(t=job_type, i=job_id, r=resource_type_id, p=ServiceAccessLayer.ROOT_JOBS)
+ return _process_rget_with_job_transform_or_none(_to_url(self.uri, "{p}/{t}/{i}/{r}".format(**_d)))
+
+ def _get_job_resource_type_with_transform(self, job_type, job_id, resource_type_id, transform_func):
+ _d = dict(t=job_type, i=job_id, r=resource_type_id, p=ServiceAccessLayer.ROOT_JOBS)
+ return _process_rget_or_none(transform_func)(_to_url(self.uri, "{p}/{t}/{i}/{r}".format(**_d)))
+
+ def _get_jobs_by_job_type(self, job_type):
+ return _process_rget_with_jobs_transform(_to_url(self.uri, "{p}/{t}".format(t=job_type, p=ServiceAccessLayer.ROOT_JOBS)))
+
+ def get_analysis_jobs(self):
+ return self._get_jobs_by_job_type(JobTypes.PB_PIPE)
+
+ def get_import_dataset_jobs(self):
+ return self._get_jobs_by_job_type(JobTypes.IMPORT_DS)
+
+ def get_merge_dataset_jobs(self):
+ return self._get_jobs_by_job_type(JobTypes.MERGE_DS)
+
+ def get_fasta_convert_jobs(self):
+ self._get_jobs_by_job_type(JobTypes.CONVERT_FASTA)
+
+ def get_analysis_job_by_id(self, job_id):
+ """Get an Analysis job by id or UUID or return None
+
+ :rtype: ServiceJob
+ """
+ return self.get_job_by_type_and_id(JobTypes.PB_PIPE, job_id)
+
+ def get_analysis_job_datastore(self, job_id):
+ """Get DataStore output from (pbsmrtpipe) analysis job"""
+ # this doesn't work the list is sli
+ return self._get_job_resource_type_with_transform(JobTypes.PB_PIPE, job_id, ServiceResourceTypes.DATASTORE, _to_datastore)
+
+ def get_analysis_job_reports(self, job_id):
+ """Get Reports output from (pbsmrtpipe) analysis job"""
+ return self._get_job_resource_type_with_transform(JobTypes.PB_PIPE, job_id, ServiceResourceTypes.REPORTS, lambda x: x)
+
+ def get_analysis_job_report_details(self, job_id, report_uuid):
+ _d = dict(t=JobTypes.PB_PIPE, i=job_id, r=ServiceResourceTypes.REPORTS, p=ServiceAccessLayer.ROOT_JOBS, u=report_uuid)
+ return _process_rget_or_none(lambda x: x)(_to_url(self.uri, "{p}/{t}/{i}/{r}/{u}".format(**_d)))
+
+ def get_analysis_job_entry_points(self, job_id):
+ return self._get_job_resource_type_with_transform(JobTypes.PB_PIPE, job_id, ServiceResourceTypes.ENTRY_POINTS, _to_entry_points)
+
+ def get_import_dataset_job_datastore(self, job_id):
+ """Get a List of Service DataStore files from an import DataSet job"""
+ return self._get_job_resource_type(JobTypes.IMPORT_DS, job_id, ServiceResourceTypes.DATASTORE)
+
+ def get_merge_dataset_job_datastore(self, job_id):
+ return self._get_job_resource_type(JobTypes.MERGE_DS, job_id, ServiceResourceTypes.DATASTORE)
+
+ def _import_dataset(self, dataset_type, path):
+ # This returns a job resource
+ url = self._to_url("{p}/{x}".format(x=JobTypes.IMPORT_DS, p=ServiceAccessLayer.ROOT_JOBS))
+ return _import_dataset_by_type(dataset_type)(url, path)
+
+ def run_import_dataset_by_type(self, dataset_type, path_to_xml):
+ job_or_error = self._import_dataset(dataset_type, path_to_xml)
+ custom_err_msg = "Import {d} {p}".format(p=path_to_xml, d=dataset_type)
+ job_id = _job_id_or_error(job_or_error, custom_err_msg=custom_err_msg)
+ return _block_for_job_to_complete(self, job_id, sleep_time=self._sleep_time)
+
+ def _run_import_and_block(self, func, path, time_out=None):
+ # func while be self.import_dataset_X
+ job_or_error = func(path)
+ custom_err_msg = "Import {p}".format(p=path)
+ job_id = _job_id_or_error(job_or_error, custom_err_msg=custom_err_msg)
+ return _block_for_job_to_complete(self, job_id, time_out=time_out,
+ sleep_time=self._sleep_time)
+
+ def import_dataset_subread(self, path):
+ return self._import_dataset(FileTypes.DS_SUBREADS, path)
+
+ def run_import_dataset_subread(self, path, time_out=10):
+ return self._run_import_and_block(self.import_dataset_subread, path, time_out=time_out)
+
+ def import_dataset_hdfsubread(self, path):
+ return self._import_dataset(FileTypes.DS_SUBREADS_H5, path)
+
+ def run_import_dataset_hdfsubread(self, path, time_out=10):
+ return self._run_import_and_block(self.import_dataset_hdfsubread, path, time_out=time_out)
+
+ def import_dataset_reference(self, path):
+ return self._import_dataset(FileTypes.DS_REF, path)
+
+ def run_import_dataset_reference(self, path, time_out=10):
+ return self._run_import_and_block(self.import_dataset_reference, path, time_out=time_out)
+
+ def run_import_local_dataset(self, path):
+ """Import a file from FS that is local to where the services are running
+
+ Returns a JobResult instance
+
+ :rtype: JobResult
+ """
+ dataset_meta_type = get_dataset_metadata(path)
+ def _verify_dataset_in_list():
+ file_type = FileTypes.ALL()[dataset_meta_type.metatype]
+ ds_endpoint = _get_endpoint_or_raise(file_type)
+ datasets = self._get_datasets_by_type(ds_endpoint)
+ uuids = {ds['uuid'] for ds in datasets}
+ if not dataset_meta_type.uuid in uuids:
+ raise JobExeError(("Dataset {u} was imported but does not "+
+ "appear in the dataset list; this may "+
+ "indicate XML schema errors.").format(
+ u=dataset_meta_type.uuid))
+ result = self.get_dataset_by_uuid(dataset_meta_type.uuid)
+ if result is None:
+ log.info("Importing dataset {p}".format(p=path))
+ job_result = self.run_import_dataset_by_type(dataset_meta_type.metatype, path)
+ log.info("Confirming database update")
+ # validation 1: attempt to retrieve dataset info
+ result_new = self.get_dataset_by_uuid(dataset_meta_type.uuid)
+ if result_new is None:
+ raise JobExeError(("Dataset {u} was imported but could "+
+ "not be retrieved; this may indicate "+
+ "XML schema errors.").format(
+ u=dataset_meta_type.uuid))
+ # validation 2: make sure it shows up in the listing
+ _verify_dataset_in_list()
+ return job_result
+ else:
+ log.info("{f} already imported. Skipping importing. {r}".format(r=result, f=dataset_meta_type.metatype))
+ _verify_dataset_in_list()
+ # need to clean this up
+ return JobResult(self.get_job_by_id(result['jobId']), 0, "")
+
+ def get_dataset_by_uuid(self, int_or_uuid):
+ """The recommend model is to look up DataSet type by explicit MetaType
+
+ Returns None if the dataset was not found
+ """
+ return _process_rget_or_none(_null_func)(_to_url(self.uri, "{p}/{i}".format(i=int_or_uuid, p=ServiceAccessLayer.ROOT_DS)))
+
+ def get_dataset_by_id(self, dataset_type, int_or_uuid):
+ """Get a Dataset using the DataSetMetaType and (int|uuid) of the dataset"""
+ ds_endpoint = _get_endpoint_or_raise(dataset_type)
+ return _process_rget(_to_url(self.uri, "{p}/{t}/{i}".format(t=ds_endpoint, i=int_or_uuid, p=ServiceAccessLayer.ROOT_DS)))
+
+ def _get_datasets_by_type(self, dstype):
+ return _process_rget(_to_url(self.uri, "{p}/{i}".format(i=dstype, p=ServiceAccessLayer.ROOT_DS)))
+
+ def get_subreadset_by_id(self, int_or_uuid):
+ return self.get_dataset_by_id(FileTypes.DS_SUBREADS, int_or_uuid)
+
+ def get_subreadsets(self):
+ return self._get_datasets_by_type("subreads")
+
+ def get_hdfsubreadset_by_id(self, int_or_uuid):
+ return self.get_dataset_by_id(FileTypes.DS_SUBREADS_H5, int_or_uuid)
+
+ def get_hdfsubreadsets(self):
+ return self._get_datasets_by_type("hdfsubreads")
+
+ def get_referenceset_by_id(self, int_or_uuid):
+ return self.get_dataset_by_id(FileTypes.DS_REF, int_or_uuid)
+
+ def get_referencesets(self):
+ return self._get_datasets_by_type("references")
+
+ def get_alignmentset_by_id(self, int_or_uuid):
+ return self.get_dataset_by_id(FileTypes.DS_ALIGN, int_or_uuid)
+
+ def get_ccsreadset_by_id(self, int_or_uuid):
+ return self.get_dataset_by_id(FileTypes.DS_CCS, int_or_uuid)
+
+ def get_ccsreadsets(self):
+ return self._get_datasets_by_type("ccsreads")
+
+ def get_alignmentsets(self):
+ return self._get_datasets_by_type("alignments")
+
+ def import_fasta(self, fasta_path, name, organism, ploidy):
+ """Convert fasta file to a ReferenceSet and Import. Returns a Job """
+ d = dict(path=fasta_path,
+ name=name,
+ organism=organism,
+ ploidy=ploidy)
+ return _process_rpost_with_transform(ServiceJob.from_d)(self._to_url("{p}/{t}".format(p=ServiceAccessLayer.ROOT_JOBS, t=JobTypes.CONVERT_FASTA)), d)
+
+ def run_import_fasta(self, fasta_path, name, organism, ploidy, time_out=JOB_DEFAULT_TIMEOUT):
+ """Import a Reference into a Block"""""
+ job_or_error = self.import_fasta(fasta_path, name, organism, ploidy)
+ _d = dict(f=fasta_path, n=name, o=organism, p=ploidy)
+ custom_err_msg = "Fasta-convert path:{f} name:{n} organism:{o} ploidy:{p}".format(**_d)
+ job_id = _job_id_or_error(job_or_error, custom_err_msg=custom_err_msg)
+ return _block_for_job_to_complete(self, job_id, time_out=time_out,
+ sleep_time=self._sleep_time)
+
+ def create_logger_resource(self, idx, name, description):
+ _d = dict(id=idx, name=name, description=description)
+ return _process_rpost(_to_url(self.uri, "/smrt-base/loggers"), _d)
+
+ def log_progress_update(self, job_type_id, job_id, message, level, source_id):
+ """This is the generic job logging mechanism"""
+ _d = dict(message=message, level=level, sourceId=source_id)
+ return _process_rpost(_to_url(self.uri, "{p}/{t}/{i}/log".format(t=job_type_id, i=job_id, p=ServiceAccessLayer.ROOT_JOBS)), _d)
+
+ def get_pipeline_template_by_id(self, pipeline_template_id):
+ return _process_rget(_to_url(self.uri, "{p}/{i}".format(i=pipeline_template_id, p=ServiceAccessLayer.ROOT_PT)))
+
+ def create_by_pipeline_template_id(self, name, pipeline_template_id, epoints, task_options=()):
+ """Creates and runs a pbsmrtpipe pipeline by pipeline template id"""
+ # sanity checking to see if pipeline is valid
+ _ = self.get_pipeline_template_by_id(pipeline_template_id)
+
+ seps = [dict(entryId=e.entry_id, fileTypeId=e.dataset_type, datasetId=e.resource) for e in epoints]
+
+ def _to_o(opt_id, opt_value, option_type_id):
+ return dict(optionId=opt_id, value=opt_value, optionTypeId=option_type_id)
+
+ task_options = list(task_options)
+ # FIXME. Need to define this in the scenario IO layer.
+ # workflow_options = [_to_o("woption_01", "value_01")]
+ workflow_options = []
+ d = dict(name=name, pipelineId=pipeline_template_id, entryPoints=seps, taskOptions=task_options, workflowOptions=workflow_options)
+ raw_d = _process_rpost(_to_url(self.uri, "{r}/{p}".format(p=JobTypes.PB_PIPE, r=ServiceAccessLayer.ROOT_JOBS)), d)
+ return ServiceJob.from_d(raw_d)
+
+ def run_by_pipeline_template_id(self, name, pipeline_template_id, epoints, task_options=(), time_out=JOB_DEFAULT_TIMEOUT):
+ """Blocks and runs a job with a timeout"""
+
+ job_or_error = self.create_by_pipeline_template_id(name, pipeline_template_id, epoints, task_options=task_options)
+
+ _d = dict(name=name, p=pipeline_template_id, eps=epoints)
+ custom_err_msg = "Job {n} args: {a}".format(n=name, a=_d)
+
+ job_id = _job_id_or_error(job_or_error, custom_err_msg=custom_err_msg)
+ return _block_for_job_to_complete(self, job_id, time_out=time_out,
+ sleep_time=self._sleep_time)
+
+
+def log_pbsmrtpipe_progress(total_url, message, level, source_id, ignore_errors=True):
+ """Log the status of a pbsmrtpipe to SMRT Server"""
+
+ # Need to clarify the model here. Trying to pass the most minimal
+ # data necessary to pbsmrtpipe.
+ _d = dict(message=message, level=level, sourceId=source_id)
+ if ignore_errors:
+ try:
+ return _process_rpost(total_url, _d)
+ except Exception as e:
+ log.warn("Failed Request to {u} data: {d}. {e}".format(u=total_url, d=_d, e=e))
+ else:
+ return _process_rpost(total_url, _d)
+
+
+def add_datastore_file(total_url, datastore_file, ignore_errors=True):
+ """Add datastore to SMRT Server
+
+ :type datastore_file: DataStoreFile
+ """
+ _d = datastore_file.to_dict()
+ if ignore_errors:
+ try:
+ return _process_rpost(total_url, _d)
+ except Exception as e:
+ log.warn("Failed Request to {u} data: {d}. {e}".format(u=total_url, d=_d, e=e))
+ else:
+ return _process_rpost(total_url, _d)
diff --git a/pbcommand/services/utils.py b/pbcommand/services/utils.py
new file mode 100644
index 0000000..33a8f11
--- /dev/null
+++ b/pbcommand/services/utils.py
@@ -0,0 +1,114 @@
+# This is not public. Might want to move this into service_access_layer
+from collections import defaultdict
+
+from .models import ServiceJob, JobStates, JobTypes
+
+
+def to_ascii(s):
+ # This is not awesome
+ return s.encode('ascii', 'ignore')
+
+
+def _jobs_by_state_gen(sal, job_states):
+ """:type sal: ServiceAccessLayer"""
+
+ states = job_states if isinstance(job_states, (tuple, list)) else [job_states]
+
+ jobs = sal.get_analysis_jobs()
+ for job in jobs:
+ sjob = ServiceJob.from_d(job)
+ if sjob.state in states:
+ yield sjob
+
+
+def get_failed_jobs(sal):
+ return sorted(_jobs_by_state_gen(sal, JobStates.FAILED), key=lambda x: x.created_at, reverse=True)
+
+
+def jobs_summary(jobs):
+ """dict(state) -> count (int) """
+ states_counts = defaultdict(lambda: 0)
+ if jobs:
+ for job in jobs:
+ states_counts[job.state] += 1
+
+ return states_counts
+
+
+def to_jobs_summary(jobs, header=None):
+ """Return string of jobs summary"""
+ header = "Jobs" if header is None else header
+
+ # Make easier to handle Option[Seq[Job]]
+ xjobs = [] if jobs is None else jobs
+
+ outs = []
+ x = outs.append
+ states_counts = jobs_summary(xjobs)
+ x("{h} {n}".format(n=len(xjobs), h=header))
+ for state, c in states_counts.iteritems():
+ x("State {s} {c}".format(c=c, s=state))
+
+ return "\n".join(outs)
+
+
+def to_all_job_types_summary(sal, sep="*****"):
+
+ # only use a subset of the job types
+
+ funcs = [(JobTypes.IMPORT_DS, sal.get_import_dataset_jobs),
+ (JobTypes.MERGE_DS, sal.get_merge_dataset_jobs),
+ (JobTypes.CONVERT_FASTA, sal.get_fasta_convert_jobs),
+ (JobTypes.PB_PIPE, sal.get_analysis_jobs)]
+
+ outs = []
+ x = outs.append
+ x("All Job types Summary")
+ x(sep)
+ for name, func in funcs:
+ out = to_jobs_summary(func(), header="{n} Jobs".format(n=name))
+ x(out)
+ x(sep)
+
+ return "\n".join(outs)
+
+
+def to_all_datasets_summary(sal, sep="****"):
+
+ ds_types = [("SubreadSets", sal.get_subreadsets),
+ ("HdfSubreadSets", sal.get_hdfsubreadsets),
+ ("ReferenceSets", sal.get_referencesets),
+ ("AlignmentSets", sal.get_alignmentsets),
+ #("ConsensusSets", sal.get_ccsreadsets)
+ ]
+
+ outs = []
+ x = outs.append
+ x("Dataset Summary")
+ x(sep)
+ for name, func in ds_types:
+ d = func()
+ ndatasets = len(d)
+ x("{n} {d}".format(n=name, d=ndatasets))
+
+ return "\n".join(outs)
+
+
+def to_sal_summary(sal):
+ """:type sal: ServiceAccessLayer"""
+
+ status = sal.get_status()
+ outs = []
+
+ x = outs.append
+
+ sep = "-" * 10
+
+ x(repr(sal))
+ x("Status {s}".format(s=status['message']))
+ x(sep)
+ x(to_all_datasets_summary(sal, sep=sep))
+ x(sep)
+ x(to_all_job_types_summary(sal, sep=sep))
+
+ return "\n".join(outs)
diff --git a/pbcommand/testkit/core.py b/pbcommand/testkit/core.py
index 7b75c79..c2ccc2b 100644
--- a/pbcommand/testkit/core.py
+++ b/pbcommand/testkit/core.py
@@ -1,6 +1,8 @@
+
import os
import unittest
import logging
+import tempfile
import subprocess
from .base_utils import (HAS_PBCORE,
@@ -46,6 +48,8 @@ class PbTestApp(unittest.TestCase):
# These will be checked against the resolved tool contract values
RESOLVED_TASK_OPTIONS = {}
RESOLVED_NPROC = 1
+ IS_DISTRIBUTED = False
+ RESOLVED_IS_DISTRIBUTED = False
@classmethod
def setUpClass(cls):
@@ -60,13 +64,19 @@ class PbTestApp(unittest.TestCase):
log.debug("validating output file existence from {r}".format(r=rtc))
log.debug("Resolved Output files from {t}".format(t=rtc.task.task_id))
log.debug(rtc.task.output_files)
+
+ # the output files should all have unique paths, otherwise the resolver
+ # has failed
+ emsg = "Non-unique outputs. {o}".format(o=rtc.task.output_files)
+ self.assertEquals(len(rtc.task.output_files), len(set(rtc.task.output_files)), emsg)
+
for i, output_file in enumerate(rtc.task.output_files):
msg = "Unable to find {i}-th output file {p}".format(i=i, p=output_file)
self.assertTrue(os.path.exists(output_file), msg)
def _to_rtc(self, tc, output_dir, tmp_dir):
# handled the polymorphism in subclasses by overriding
- return resolve_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS)
+ return resolve_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS, self.IS_DISTRIBUTED)
def test_run_e2e(self):
# hack to skip running the base Test class (which is the nose default behavior)
@@ -88,7 +98,6 @@ class PbTestApp(unittest.TestCase):
output_tc = get_temp_file("-{n}-tool_contract.json".format(n=self.__class__.__name__), output_dir)
emit_tc_exe = "{e} > {o}".format(e=self.DRIVER_EMIT, o=output_tc)
rcode = subprocess.call([emit_tc_exe], shell=True)
-
self.assertEquals(rcode, 0, "Emitting tool contract failed for '{e}'".format(e=emit_tc_exe))
# sanity marshall-unmashalling
@@ -116,17 +125,19 @@ class PbTestApp(unittest.TestCase):
# Resolved NPROC
self.assertEquals(rtc.task.nproc, self.RESOLVED_NPROC)
+ self.assertEquals(rtc.task.is_distributed, self.RESOLVED_IS_DISTRIBUTED)
log.info("running resolved contract {r}".format(r=output_json_rtc))
exe = "{d} {p}".format(p=output_json_rtc, d=self.DRIVER_RESOLVE)
log.info("Running exe '{e}'".format(e=exe))
- rcode = subprocess.call([exe], shell=True)
- self.assertEqual(rcode, 0, "Running from resolved tool contract failed from {e}".format(e=exe))
+ with tempfile.TemporaryFile() as stdout:
+ rcode = subprocess.call([exe], shell=True,
+ stdout=stdout)
+ self.assertEquals(rcode, 0, "Running from resolved tool contract failed from {x}".format(x=exe))
log.info("Successfully completed running e2e for {d}".format(d=self.DRIVER_EMIT))
self._test_outputs_exists(rtc)
-
self.run_after(rtc, output_dir)
def run_after(self, rtc, output_dir):
@@ -148,7 +159,7 @@ class PbTestScatterApp(PbTestApp):
CHUNK_KEYS = ()
def _to_rtc(self, tc, output_dir, tmp_dir):
- return resolve_scatter_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS, self.MAX_NCHUNKS, self.CHUNK_KEYS)
+ return resolve_scatter_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS, self.MAX_NCHUNKS, self.CHUNK_KEYS, self.IS_DISTRIBUTED)
class PbTestGatherApp(PbTestApp):
@@ -159,4 +170,4 @@ class PbTestGatherApp(PbTestApp):
CHUNK_KEY = PipelineChunk.CHUNK_KEY_PREFIX + 'fasta_id'
def _to_rtc(self, tc, output_dir, tmp_dir):
- return resolve_gather_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS, self.CHUNK_KEY)
\ No newline at end of file
+ return resolve_gather_tool_contract(tc, self.INPUT_FILES, output_dir, tmp_dir, self.MAX_NPROC, self.TASK_OPTIONS, self.CHUNK_KEY, self.IS_DISTRIBUTED)
diff --git a/pbcommand/utils.py b/pbcommand/utils.py
index 87f6fa8..e236f54 100644
--- a/pbcommand/utils.py
+++ b/pbcommand/utils.py
@@ -1,43 +1,224 @@
"""Utils for common funcs, such as setting up a log, composing functions."""
import functools
import os
-import sys
import logging
+import logging.config
import argparse
+import pprint
import traceback
import time
import types
+import subprocess
+from contextlib import contextmanager
+import xml.etree.ElementTree as ET
+
+from pbcommand.models import FileTypes, DataSetMetaData
log = logging.getLogger(__name__)
+log.addHandler(logging.NullHandler()) # suppress the annoying no handlers msg
+
+
+class Constants(object):
+ LOG_FMT_ONLY_MSG = '%(message)s'
+ LOG_FMT_ERR = '%(message)s'
+ LOG_FMT_LVL = '[%(levelname)s] %(message)s'
+ LOG_FMT_MIN = '[%(asctime)-15sZ] %(message)s'
+ LOG_FMT_SIMPLE = '[%(levelname)s] %(asctime)-15sZ %(message)s'
+ LOG_FMT_STD = '[%(levelname)s] %(asctime)-15sZ [%(name)s] %(message)s'
+ LOG_FMT_FULL = '[%(levelname)s] %(asctime)-15sZ [%(name)s %(funcName)s %(lineno)d] %(message)s'
+
+
+class ExternalCommandNotFoundError(Exception):
+ """External command is not found in Path"""
+ pass
+
+
+def _handler_stream_d(stream, level_str, formatter_id):
+ d = {'level': level_str,
+ 'class': "logging.StreamHandler",
+ 'formatter': formatter_id,
+ 'stream': stream}
+ return d
+
+_handler_stdout_stream_d = functools.partial(_handler_stream_d, "ext://sys.stdout")
+_handler_stderr_stream_d = functools.partial(_handler_stream_d, "ext://sys.stderr")
+
+
+def _handler_file(level_str, path, formatter_id):
+ d = {'class': 'logging.FileHandler',
+ 'level': level_str,
+ 'formatter': formatter_id,
+ 'filename': path}
+ return d
+
+
+def _get_default_logging_config_dict(level, file_name_or_none, formatter):
+ """
+ Setup a logger to either a file or console. If file name is none, then
+ a logger will be setup to stdout.
+
+ :note: adds console
+
+ Returns a dict configuration of the logger.
+ """
+
+ level_str = logging.getLevelName(level)
+
+ formatter_id = 'custom_logger_fmt'
+ console_handler_id = "console_handler"
+
+ error_fmt_id = "error_fmt_id"
+ error_handler_id = "error_handler"
+ error_handler_d = _handler_stderr_stream_d(logging.ERROR, error_fmt_id)
+
+ if file_name_or_none is None:
+ handler_d = _handler_stdout_stream_d(level_str, formatter_id)
+ else:
+ handler_d = _handler_file(level_str, file_name_or_none, formatter_id)
+
+ formatters_d = {fid: {'format': fx} for fid, fx in [(formatter_id, formatter), (error_fmt_id, Constants.LOG_FMT_ERR)]}
+
+ handlers_d = {console_handler_id: handler_d,
+ error_handler_id: error_handler_d}
+
+ loggers_d = {"custom": {'handlers': [console_handler_id],
+ 'stderr': {'handlers': [error_handler_id]}}}
+
+ d = {
+ 'version': 1,
+ 'disable_existing_loggers': False, # this fixes the problem
+ 'formatters': formatters_d,
+ 'handlers': handlers_d,
+ 'loggers': loggers_d,
+ 'root': {'handlers': [error_handler_id, console_handler_id], 'level': logging.NOTSET}
+ }
+
+ #print pprint.pformat(d)
+ return d
+
+
+def _get_console_and_file_logging_config_dict(console_level, console_formatter, path, path_level, path_formatter):
+ """
+ Get logging configuration that is both for console and a file.
+
+ :note: A stderr logger handler is also added.
+
+ """
+
+ def _to_handler_d(handlers_, level):
+ return {"handlers": handlers_, "level": level, "propagate": True}
+
+ console_handler_id = "console_handler"
+ console_fmt_id = "console_fmt"
+ console_handler_d = _handler_stdout_stream_d(console_level, console_fmt_id)
+
+ stderr_handler_id = "stderr_handler"
+ error_fmt_id = "error_fmt"
+ stderr_handler_d = _handler_stderr_stream_d(logging.ERROR, console_fmt_id)
+
+ file_handler_id = "file_handler"
+ file_fmt_id = "file_fmt"
+ file_handler_d = _handler_file(path_level, path, file_fmt_id)
+
+ formatters = {console_fmt_id: {"format": console_formatter},
+ file_fmt_id: {"format": path_formatter},
+ error_fmt_id: {"format": Constants.LOG_FMT_ERR}
+ }
+
+ handlers = {console_handler_id: console_handler_d,
+ file_handler_id: file_handler_d,
+ stderr_handler_id: stderr_handler_d}
+
+ loggers = {"console": _to_handler_d([console_handler_id], console_level),
+ "custom_file": _to_handler_d([file_handler_id], path_level),
+ "stderr_err": _to_handler_d([stderr_handler_id], logging.ERROR)
+ }
+
+ d = {'version': 1,
+ 'disable_existing_loggers': False, # this fixes the problem
+ 'formatters': formatters,
+ 'handlers': handlers,
+ 'loggers': loggers,
+ 'root': {'handlers': handlers.keys(), 'level': logging.DEBUG}
+ }
+
+ #print pprint.pformat(d)
+ return d
+
+
+def _setup_logging_config_d(d):
+ logging.config.dictConfig(d)
+ logging.Formatter.converter = time.gmtime
+ return d
+
+
+def setup_logger(file_name_or_none, level, formatter=Constants.LOG_FMT_FULL):
+ """
+
+ :param file_name_or_none: Path to log file, None will default to stdout
+ :param level: logging.LEVEL of
+ :param formatter: Log Formatting string
+ """
+ d = _get_default_logging_config_dict(level, file_name_or_none, formatter)
+ return _setup_logging_config_d(d)
+
+
+def setup_console_and_file_logger(stdout_level, stdout_formatter, path, path_level, path_formatter):
+ d = _get_console_and_file_logging_config_dict(stdout_level, stdout_formatter, path, path_level, path_formatter)
+ return _setup_logging_config_d(d)
-def setup_log(alog, level=logging.INFO, file_name=None, log_filter=None,
- str_formatter='[%(levelname)s] %(asctime)-15sZ [%(name)s %(funcName)s %(lineno)d] %(message)s'):
+def setup_log(alog,
+ level=logging.INFO,
+ file_name=None,
+ log_filter=None,
+ str_formatter=Constants.LOG_FMT_FULL):
"""Core Util to setup log handler
+ THIS NEEDS TO BE DEPRECATED
+
:param alog: a log instance
:param level: (int) Level of logging debug
:param file_name: (str, None) if None, stdout is used, str write to file
:param log_filter: (LogFilter, None)
:param str_formatter: (str) log formatting str
"""
- logging.Formatter.converter = time.gmtime
+ setup_logger(file_name, level, formatter=str_formatter)
- alog.setLevel(logging.DEBUG)
- if file_name is None:
- handler = logging.StreamHandler(sys.stdout)
- else:
- handler = logging.FileHandler(file_name)
- formatter = logging.Formatter(str_formatter)
- handler.setFormatter(formatter)
- handler.setLevel(level)
- if log_filter:
- handler.addFilter(log_filter)
- alog.addHandler(handler)
+ # FIXME. Keeping the interface, but the specific log instance isn't used,
+ # the python logging setup mutates global state
+ if log_filter is not None:
+ alog.warn("log_filter kw is no longer supported")
return alog
+def get_parsed_args_log_level(pargs, default_level=logging.INFO):
+ """
+ Utility for handling logging setup flexibly in a variety of use cases,
+ assuming standard command-line arguments.
+
+ :param pargs: argparse namespace or equivalent
+ :param default_level: logging level to use if the parsed arguments do not
+ specify one
+ """
+ level = default_level
+ if isinstance(level, basestring):
+ level = logging.getLevelName(level)
+ if hasattr(pargs, 'verbosity') and pargs.verbosity > 0:
+ if pargs.verbosity >= 2:
+ level = logging.DEBUG
+ else:
+ level = logging.INFO
+ elif hasattr(pargs, 'debug') and pargs.debug:
+ level = logging.DEBUG
+ elif hasattr(pargs, 'quiet') and pargs.quiet:
+ level = logging.ERROR
+ elif hasattr(pargs, 'log_level'):
+ level = logging.getLevelName(pargs.log_level)
+ return level
+
+
def log_traceback(alog, ex, ex_traceback):
"""
Log a python traceback in the log file
@@ -116,23 +297,31 @@ def which(exe_str):
If path is found, the full path is returned. Else it returns None.
"""
paths = os.environ.get('PATH', None)
- state = None
+ resolved_exe = None
if paths is None:
# log warning
msg = "PATH env var is not defined."
log.error(msg)
- return state
+ return resolved_exe
for path in paths.split(":"):
exe_path = os.path.join(path, exe_str)
# print exe_path
if os.path.exists(exe_path):
- state = exe_path
+ resolved_exe = exe_path
break
- return state
+ # log.debug("Resolved cmd {e} to {x}".format(e=exe_str, x=resolved_exe))
+ return resolved_exe
+
+def which_or_raise(cmd):
+ resolved_cmd = which(cmd)
+ if resolved_cmd is None:
+ raise ExternalCommandNotFoundError("Unable to find required cmd '{c}'".format(c=cmd))
+ else:
+ return resolved_cmd
class Singleton(type):
@@ -157,3 +346,121 @@ class Singleton(type):
if cls.instance is None:
cls.instance = super(Singleton, cls).__call__(*args)
return cls.instance
+
+
+def nfs_exists_check(ff):
+ """
+ Central place for all NFS hackery
+
+ Return whether a file or a dir ff exists or not.
+ Call listdir() instead of os.path.exists() to eliminate NFS errors.
+
+ Added try/catch black hole exception cases to help trigger an NFS refresh
+
+ :rtype bool:
+ """
+ try:
+ # All we really need is opendir(), but listdir() is usually fast.
+ os.listdir(os.path.dirname(os.path.realpath(ff)))
+ # But is it a file or a directory? We do not know until it actually exists.
+ if os.path.exists(ff):
+ return True
+ # Might be a directory, so refresh itself too.
+ # Not sure this is necessary, since we already ran this on parent,
+ # but it cannot hurt.
+ os.listdir(os.path.realpath(ff))
+ if os.path.exists(ff):
+ return True
+ except OSError:
+ pass
+
+ # The rest is probably unnecessary, but it cannot hurt.
+
+ # try to trigger refresh for File case
+ try:
+ f = open(ff, 'r')
+ f.close()
+ except Exception:
+ pass
+
+ # try to trigger refresh for Directory case
+ try:
+ _ = os.stat(ff)
+ _ = os.listdir(ff)
+ except Exception:
+ pass
+
+ # Call externally
+ # this is taken from Yuan
+ cmd = "ls %s" % ff
+ rcode = 1
+ try:
+ p = subprocess.Popen([cmd], shell=True)
+ rcode = p.wait()
+ except Exception:
+ pass
+
+ return rcode == 0
+
+
+def nfs_refresh(path, ntimes=3, sleep_time=1.0):
+ while True:
+ if nfs_exists_check(path):
+ return True
+ ntimes -= 1
+ if ntimes <= 0:
+ break
+ time.sleep(sleep_time)
+ log.warn("NFS refresh failed. unable to resolve {p}".format(p=path))
+ return False
+
+
+ at contextmanager
+def ignored(*exceptions):
+ try:
+ yield
+ except exceptions:
+ pass
+
+
+def get_dataset_metadata(path):
+ """
+ Returns DataSetMeta data or raises ValueError, KeyError
+
+ :param path:
+ :return:
+ """
+ f = ET.parse(path).getroot().attrib
+ mt = f['MetaType']
+ uuid = f['UniqueId']
+ if mt in FileTypes.ALL_DATASET_TYPES().keys():
+ return DataSetMetaData(uuid, mt)
+ else:
+ raise ValueError("Unsupported dataset type '{t}'".format(t=mt))
+
+
+def get_dataset_metadata_or_none(path):
+ """
+ Returns DataSetMeta data, else None
+
+ :param path:
+ :return:
+ """
+ try:
+ return get_dataset_metadata(path)
+ except Exception:
+ return None
+
+
+def is_dataset(path):
+ """peek into the XML to get the MetaType"""
+ return get_dataset_metadata_or_none(path) is not None
+
+
+def walker(root_dir, file_filter_func):
+ """Filter files F(path) -> bool"""
+ for root, dnames, fnames in os.walk(root_dir):
+ for fname in fnames:
+ path = os.path.join(root, fname)
+ if file_filter_func(path):
+ yield path
diff --git a/pbcommand/validators.py b/pbcommand/validators.py
index 8ac41f8..3d595bd 100644
--- a/pbcommand/validators.py
+++ b/pbcommand/validators.py
@@ -1,64 +1,24 @@
import os
import logging
import functools
-import subprocess
-
+from pbcommand.utils import nfs_exists_check
log = logging.getLogger(__name__)
def trigger_nfs_refresh(ff):
- """
- Central place for all NFS hackery
-
- Return whether a file or a dir ff exists or not.
- Call ls instead of python os.path.exists to eliminate NFS errors.
-
- Added try/catch black hole exception cases to help trigger an NFS refresh
-
- :rtype bool:
-
- # Yuan Li and various people contributed.
- """
- # try to trigger refresh for File case
- try:
- f = open(ff, 'r')
- f.close()
- except Exception:
- pass
-
- # try to trigger refresh for Directory case
- try:
- _ = os.stat(ff)
- _ = os.listdir(ff)
- except Exception:
- pass
-
- # Call externally
- cmd = "ls %s" % ff
- _, rcode, _ = subprocess.check_call(cmd)
-
- return rcode == 0
-
-
-def _trigger_nfs_refresh_and_ignore(ff):
- """
-
- :rtype str
- """
- _ = trigger_nfs_refresh(ff)
- return ff
+ # keeping this for backward compatibility
+ return nfs_exists_check(ff)
def _validate_resource(func, resource):
"""Validate the existence of a file/dir"""
- # Attempt to trigger an NFS metadata refresh
- _ = trigger_nfs_refresh(resource)
+ _ = nfs_exists_check(resource)
if func(resource):
return os.path.abspath(resource)
else:
- raise IOError("Unable to find {f}".format(f=resource))
+ raise IOError("Unable to find '{f}'".format(f=resource))
validate_file = functools.partial(_validate_resource, os.path.isfile)
@@ -66,6 +26,26 @@ validate_dir = functools.partial(_validate_resource, os.path.isdir)
validate_output_dir = functools.partial(_validate_resource, os.path.isdir)
+def validate_or(f1, f2, error_msg):
+ """
+ Apply Valid functions f1, then f2 (if failure occurs)
+
+ :param error_msg: Default message to print
+ """
+ @functools.wraps
+ def wrapper(path):
+ try:
+ return f1(path)
+ except Exception:
+ try:
+ return f2(path)
+ except Exception as e:
+ log.error("{m} {p} \n. {e}".format(m=error_msg, p=path, e=repr(e)))
+ raise
+
+ return wrapper
+
+
def validate_report(report_file_name):
"""
Raise ValueError if report contains path seps
@@ -82,7 +62,7 @@ def validate_fofn(fofn):
:raises: IOError if any file is not found.
:return: (str) abspath of the input fofn
"""
- _ = trigger_nfs_refresh(fofn)
+ _ = nfs_exists_check(fofn)
if os.path.isfile(fofn):
file_names = fofn_to_files(os.path.abspath(fofn))
@@ -95,7 +75,7 @@ def validate_fofn(fofn):
def fofn_to_files(fofn):
"""Util func to convert a bas/bax fofn file to a list of bas/bax files."""
- _ = trigger_nfs_refresh(fofn)
+ _ = nfs_exists_check(fofn)
if os.path.exists(fofn):
with open(fofn, 'r') as f:
@@ -105,7 +85,7 @@ def fofn_to_files(fofn):
if not os.path.isfile(bas_file):
# try one more time to find the file by
# performing an NFS refresh
- found = trigger_nfs_refresh(bas_file)
+ found = nfs_exists_check(bas_file)
if not found:
raise IOError("Unable to find bas/bax file '{f}'".format(f=bas_file))
diff --git a/setup.py b/setup.py
index c3032e9..8a91df2 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,7 @@ setup(
name='pbcommand',
version=version,
license='BSD',
- author='mpkocher',
+ author='mpkocher natechols',
author_email='mkocher at pacificbiosciences.com',
url="https://github.com/PacificBiosciences/pbcommand",
download_url='https://github.com/PacificBiosciences/pbcommand/tarball/{v}'.format(v=version),
@@ -48,6 +48,7 @@ setup(
packages=find_packages(),
package_data={"pbcommand": ["schemas/*.avsc"]},
zip_safe=False,
+ entry_points={'console_scripts': ['pbservice = pbcommand.services.cli:main']},
extras_require={"pbcore": ["pbcore", "ipython", "autopep8"],
"interactive": ['prompt_toolkit']},
classifiers=['Development Status :: 4 - Beta',
diff --git a/tests/data/dev_example_dev_txt_app_tool_contract.json b/tests/data/dev_example_dev_txt_app_tool_contract.json
index 0565b70..6620f43 100644
--- a/tests/data/dev_example_dev_txt_app_tool_contract.json
+++ b/tests/data/dev_example_dev_txt_app_tool_contract.json
@@ -43,7 +43,7 @@
{
"title": "Txt outfile",
"description": "Generic Output Txt file",
- "default_name": "output.txt",
+ "default_name": "output",
"id": "txt_out",
"file_type_id": "PacBio.FileTypes.txt"
}
diff --git a/tests/data/dev_example_resolved_tool_contract.json b/tests/data/dev_example_resolved_tool_contract.json
index fce2fe2..03a3a61 100644
--- a/tests/data/dev_example_resolved_tool_contract.json
+++ b/tests/data/dev_example_resolved_tool_contract.json
@@ -17,6 +17,7 @@
"resources": [],
"is_distributed": false,
"task_type": "pbsmrtpipe.task_types.standard",
- "tool_contract_id": "pbcommand.tools.dev_app"
+ "tool_contract_id": "pbcommand.tools.dev_app",
+ "log_level": "INFO"
}
}
diff --git a/tests/data/dev_example_tool_contract.json b/tests/data/dev_example_tool_contract.json
index c320ee6..e151daf 100644
--- a/tests/data/dev_example_tool_contract.json
+++ b/tests/data/dev_example_tool_contract.json
@@ -43,7 +43,7 @@
{
"title": "Filtered Fasta file",
"description": "Filtered Fasta file",
- "default_name": "filter.fasta",
+ "default_name": "filter",
"id": "fasta_out",
"file_type_id": "PacBio.FileTypes.Fasta"
}
diff --git a/tests/data/dev_gather_fasta_app_tool_contract.json b/tests/data/dev_gather_fasta_app_tool_contract.json
index 9095ece..e532bec 100644
--- a/tests/data/dev_gather_fasta_app_tool_contract.json
+++ b/tests/data/dev_gather_fasta_app_tool_contract.json
@@ -15,7 +15,7 @@
{
"title": "Chunk JSON",
"description": "Output Fasta",
- "default_name": "gathered.fasta",
+ "default_name": "gathered",
"id": "output",
"file_type_id": "PacBio.FileTypes.Fasta"
}
diff --git a/tests/data/dev_scatter_fasta_app_tool_contract.json b/tests/data/dev_scatter_fasta_app_tool_contract.json
index 0b7b179..c21571c 100644
--- a/tests/data/dev_scatter_fasta_app_tool_contract.json
+++ b/tests/data/dev_scatter_fasta_app_tool_contract.json
@@ -39,7 +39,7 @@
{
"title": "Chunk JSON",
"description": "Scattered/Chunked Fasta Chunk.json",
- "default_name": "fasta.chunks.json",
+ "default_name": "fasta.chunks",
"id": "cjson",
"file_type_id": "PacBio.FileTypes.CHUNK"
}
diff --git a/tests/data/example-reports/overview.json b/tests/data/example-reports/overview.json
index 5fd9b14..c003db8 100644
--- a/tests/data/example-reports/overview.json
+++ b/tests/data/example-reports/overview.json
@@ -1,6 +1,7 @@
{
"tables": [],
- "_version": "2.1",
+ "_comment": "Manually updated by MK for 0.3.9",
+ "_version": "0.3.9",
"_changelist": 127707,
"attributes": [
{
@@ -15,5 +16,6 @@
}
],
"id": "overview",
+ "title": "Overview Report",
"plotGroups": []
}
\ No newline at end of file
diff --git a/tests/data/pbcommand.tasks.dev_fastq2fasta_tool_contract.json b/tests/data/pbcommand.tasks.dev_fastq2fasta_tool_contract.json
index b3d8630..3e9ba28 100644
--- a/tests/data/pbcommand.tasks.dev_fastq2fasta_tool_contract.json
+++ b/tests/data/pbcommand.tasks.dev_fastq2fasta_tool_contract.json
@@ -5,7 +5,7 @@
"serialization": "json"
},
"tool_contract": {
- "_comment": "Created by v0.2.14",
+ "_comment": "Created by v0.3.5",
"description": "Quick tool dev_fastq2fasta pbcommand.tasks.dev_fastq2fasta",
"input_types": [
{
@@ -20,7 +20,7 @@
"nproc": 1,
"output_types": [
{
- "default_name": "file.fasta",
+ "default_name": "file",
"description": "description for <FileType id=PacBio.FileTypes.Fasta name=file.fasta >",
"file_type_id": "PacBio.FileTypes.Fasta",
"id": "Label PacBio.FileTypes.Fasta_0",
@@ -28,10 +28,57 @@
}
],
"resource_types": [],
- "schema_options": [],
+ "schema_options": [
+ {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "pb_option": {
+ "default": 1234.0,
+ "description": "Beta Description",
+ "name": "Beta Name",
+ "option_id": "pbcommand.task_options.beta",
+ "type": "number"
+ },
+ "properties": {
+ "pbcommand.task_options.beta": {
+ "default": 1234.0,
+ "description": "Beta Description",
+ "title": "Beta Name",
+ "type": "number"
+ }
+ },
+ "required": [
+ "pbcommand.task_options.beta"
+ ],
+ "title": "JSON Schema for pbcommand.task_options.beta",
+ "type": "object"
+ },
+ {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "pb_option": {
+ "default": true,
+ "description": "Option gamma description",
+ "name": "Option gamma",
+ "option_id": "pbcommand.task_options.gamma",
+ "type": "boolean"
+ },
+ "properties": {
+ "pbcommand.task_options.gamma": {
+ "default": true,
+ "description": "Option gamma description",
+ "title": "Option gamma",
+ "type": "boolean"
+ }
+ },
+ "required": [
+ "pbcommand.task_options.gamma"
+ ],
+ "title": "JSON Schema for pbcommand.task_options.gamma",
+ "type": "object"
+ }
+ ],
"task_type": "pbsmrtpipe.task_types.standard",
"tool_contract_id": "pbcommand.tasks.dev_fastq2fasta"
},
"tool_contract_id": "pbcommand.tasks.dev_fastq2fasta",
"version": "0.1.0"
-}
\ No newline at end of file
+}
diff --git a/tests/data/pbcommand.tasks.dev_qhello_world_tool_contract.json b/tests/data/pbcommand.tasks.dev_qhello_world_tool_contract.json
index bdf026e..68edff9 100644
--- a/tests/data/pbcommand.tasks.dev_qhello_world_tool_contract.json
+++ b/tests/data/pbcommand.tasks.dev_qhello_world_tool_contract.json
@@ -20,7 +20,7 @@
"nproc": 1,
"output_types": [
{
- "default_name": "file.fasta",
+ "default_name": "file",
"description": "description for <FileType id=PacBio.FileTypes.Fasta name=file.fasta >",
"file_type_id": "PacBio.FileTypes.Fasta",
"id": "Label PacBio.FileTypes.Fasta_0",
@@ -58,4 +58,4 @@
},
"tool_contract_id": "pbcommand.tasks.dev_qhello_world",
"version": "0.2.1"
-}
\ No newline at end of file
+}
diff --git a/tests/data/pbcommand.tasks.dev_txt_custom_outs_tool_contract.json b/tests/data/pbcommand.tasks.dev_txt_custom_outs_tool_contract.json
index 1e1148d..add091c 100644
--- a/tests/data/pbcommand.tasks.dev_txt_custom_outs_tool_contract.json
+++ b/tests/data/pbcommand.tasks.dev_txt_custom_outs_tool_contract.json
@@ -20,14 +20,14 @@
"nproc": 1,
"output_types": [
{
- "default_name": "PacBio.FileTypes.txt_file_0.txt",
+ "default_name": "PacBio.FileTypes.txt_file_0",
"description": "File <FileType id=PacBio.FileTypes.txt name=file.txt >",
"file_type_id": "PacBio.FileTypes.txt",
"id": "label_PacBio.FileTypes.txt",
"title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
},
{
- "default_name": "PacBio.FileTypes.txt_file_1.txt",
+ "default_name": "PacBio.FileTypes.txt_file_1",
"description": "File <FileType id=PacBio.FileTypes.txt name=file.txt >",
"file_type_id": "PacBio.FileTypes.txt",
"id": "label_PacBio.FileTypes.txt",
@@ -41,4 +41,4 @@
},
"tool_contract_id": "pbcommand.tasks.dev_txt_custom_outs",
"version": "0.1.0"
-}
\ No newline at end of file
+}
diff --git a/tests/data/pbcommand.tasks.dev_txt_hello_tool_contract.json b/tests/data/pbcommand.tasks.dev_txt_hello_tool_contract.json
index f704adb..6e94ae7 100644
--- a/tests/data/pbcommand.tasks.dev_txt_hello_tool_contract.json
+++ b/tests/data/pbcommand.tasks.dev_txt_hello_tool_contract.json
@@ -20,14 +20,14 @@
"nproc": 3,
"output_types": [
{
- "default_name": "file.txt",
+ "default_name": "file",
"description": "description for <FileType id=PacBio.FileTypes.txt name=file.txt >",
"file_type_id": "PacBio.FileTypes.txt",
"id": "Label PacBio.FileTypes.txt_0",
"title": "<FileType id=PacBio.FileTypes.txt name=file.txt >"
},
{
- "default_name": "file.txt",
+ "default_name": "file",
"description": "description for <FileType id=PacBio.FileTypes.txt name=file.txt >",
"file_type_id": "PacBio.FileTypes.txt",
"id": "Label PacBio.FileTypes.txt_1",
@@ -41,4 +41,4 @@
},
"tool_contract_id": "pbcommand.tasks.dev_txt_hello",
"version": "0.1.0"
-}
\ No newline at end of file
+}
diff --git a/tests/data/resolved_contract_01.json b/tests/data/resolved_contract_01.json
index 8f52fc9..9bf0e5d 100644
--- a/tests/data/resolved_contract_01.json
+++ b/tests/data/resolved_contract_01.json
@@ -18,6 +18,7 @@
],
"resources": [],
"task_type": "pbsmrtpipe.task_types.standard",
- "tool_contract_id": "pbcommand.tasks.dev_txt_app"
+ "tool_contract_id": "pbcommand.tasks.dev_txt_app",
+ "log_level": "INFO"
}
-}
\ No newline at end of file
+}
diff --git a/tests/data/resolved_tool_contract_dev_app.json b/tests/data/resolved_tool_contract_dev_app.json
index 8f52fc9..9bf0e5d 100644
--- a/tests/data/resolved_tool_contract_dev_app.json
+++ b/tests/data/resolved_tool_contract_dev_app.json
@@ -18,6 +18,7 @@
],
"resources": [],
"task_type": "pbsmrtpipe.task_types.standard",
- "tool_contract_id": "pbcommand.tasks.dev_txt_app"
+ "tool_contract_id": "pbcommand.tasks.dev_txt_app",
+ "log_level": "INFO"
}
-}
\ No newline at end of file
+}
diff --git a/tests/test_common_cmdline_core.py b/tests/test_common_cmdline_core.py
index 102958d..2ccdbd1 100644
--- a/tests/test_common_cmdline_core.py
+++ b/tests/test_common_cmdline_core.py
@@ -17,7 +17,7 @@ def args_runner(*args, **kwargs):
def _example_parser():
p = get_default_argparser("1.0.0", "Example Mock Parser")
- p = CU.add_debug_option(p)
+ p = CU.add_log_debug_option(p)
p.add_argument('example_file', type=str, help="No testing of existence")
return p
diff --git a/tests/test_e2e_example_apps.py b/tests/test_e2e_example_apps.py
index b9d624c..ac86eeb 100644
--- a/tests/test_e2e_example_apps.py
+++ b/tests/test_e2e_example_apps.py
@@ -29,6 +29,8 @@ class TestQuickDevHelloWorld(pbcommand.testkit.PbTestApp):
REQUIRES_PBCORE = False
INPUT_FILES = [get_data_file("example.txt")]
+ IS_DISTRIBUTED = True
+ RESOLVED_IS_DISTRIBUTED = True
class TestQuickTxt(pbcommand.testkit.PbTestApp):
@@ -38,6 +40,8 @@ class TestQuickTxt(pbcommand.testkit.PbTestApp):
REQUIRES_PBCORE = False
INPUT_FILES = [get_data_file("example.txt")]
+ IS_DISTRIBUTED = True
+ RESOLVED_IS_DISTRIBUTED = False # XXX is_distributed=False in task TC!
class TestQuickCustomTxtCustomOuts(pbcommand.testkit.PbTestApp):
diff --git a/tests/test_load_resolved_tool_contract.py b/tests/test_load_resolved_tool_contract.py
index 4453fd0..84af431 100644
--- a/tests/test_load_resolved_tool_contract.py
+++ b/tests/test_load_resolved_tool_contract.py
@@ -46,7 +46,7 @@ class TestResolveContract(unittest.TestCase):
tmp_file = tempfile.NamedTemporaryFile().name
max_nproc = 2
tool_options = {}
- rtc = resolve_tool_contract(tc, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options)
+ rtc = resolve_tool_contract(tc, input_files, root_output_dir, root_tmp_dir, max_nproc, tool_options, False)
log.info(pprint.pformat(rtc))
self.assertIsNotNone(rtc)
self.assertEqual(os.path.basename(rtc.task.output_files[0]),
diff --git a/tests/test_models_report.py b/tests/test_models_report.py
index 58980df..b4b6dfb 100644
--- a/tests/test_models_report.py
+++ b/tests/test_models_report.py
@@ -1,10 +1,12 @@
-
-import unittest
import json
import logging
+from pprint import pformat
+import re
+import unittest
-from pbcommand.models.report import Report
from pbcommand.pb_io import load_report_from_json
+from pbcommand.models.report import (Report, Attribute, PlotGroup, Plot, Table,
+ Column, PbReportError)
_SERIALIZED_JSON_DIR = 'example-reports'
@@ -32,17 +34,191 @@ class TestReportModel(unittest.TestCase):
},
])
+ def test_report_null_ns(self):
+ """Can't create a report without a namespace."""
+ with self.assertRaises(PbReportError):
+ r = Report(None)
+
+ def test_report_empty_ns(self):
+ """Can't create a report with an empty namespace."""
+ with self.assertRaises(PbReportError):
+ r = Report("")
+
+ def test_duplicate_ids(self):
+ """Can't add elements with the same id."""
+ with self.assertRaises(PbReportError):
+ r = Report('redfang')
+ r.add_attribute(Attribute('a', 'b'))
+ r.add_attribute(Attribute('a', 'c'))
+
+ def test_illegal_id(self):
+ """Ids must be alphanumberic with underscores"""
+ with self.assertRaises(PbReportError):
+ r = Report('redfang')
+ r.add_attribute(Attribute('a b', 'b'))
+ r.add_attribute(Attribute('a', 'c'))
+
+ def test_empty_id(self):
+ with self.assertRaises(PbReportError):
+ r = Report('')
+
+ def test_uppercase_id(self):
+ with self.assertRaises(PbReportError):
+ r = Report('A')
+
+ def test_to_dict(self):
+ """
+ The id of report sub elements is prepended with the id of the parent
+ element when to_dict is called.
+ """
+ r = Report('redfang')
+ a = Attribute('a', 'b')
+ a2 = Attribute('a2', 'b2')
+ r.add_attribute(a)
+ r.add_attribute(a2)
+
+ pg = PlotGroup('pgid')
+ pg.add_plot(Plot('pid', 'anImg'))
+ pg.add_plot(Plot('pid2', 'anImg2'))
+ r.add_plotgroup(pg)
+
+ t = Table('tabid')
+ t.add_column(Column('c1'))
+ r.add_table(t)
+
+ d = r.to_dict()
+
+ log.debug("\n" + pformat(d))
+
+ self.assertEqual('redfang', d['id'])
+ self.assertEqual('redfang.a', d['attributes'][0]['id'])
+ self.assertEqual('redfang.a2', d['attributes'][1]['id'])
+ self.assertEqual('redfang.pgid', d['plotGroups'][0]['id'])
+ self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id'])
+ self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id'])
+
+ self.assertEqual('redfang.tabid', d['tables'][0]['id'])
+ self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id'])
+
+ def test_version_and_changelist(self):
+ r = Report('example')
+ d = r.to_dict()
+ log.info("\n" + pformat(d))
+ self.assertTrue('_version' in d)
+ self.assertTrue('_changelist' in d)
+
+ # Not used anymore. The all version information is encoded in _version.
+ # that should be sufficient.
+ # self.assertTrue(isinstance(d['_changelist'], int))
+ rx = re.compile(r'[0-9]*\.[0-9]*')
+ self.assertIsNotNone(rx.search(d['_version']))
+
+ def test_to_dict_multi(self):
+ """
+ Multiple complex elements.
+ The id of report sub elements is prepended with the id of the parent
+ element when to_dict is called.
+ """
+ r = Report('redfang')
+ a = Attribute('a', 'b')
+ a2 = Attribute('a2', 'b2')
+ r.add_attribute(a)
+ r.add_attribute(a2)
+
+ pg = PlotGroup('pgid')
+ pg.add_plot(Plot('pid', 'anImg'))
+ pg.add_plot(Plot('pid2', 'anImg2'))
+ r.add_plotgroup(pg)
+
+ pg = PlotGroup('pgid2')
+ pg.add_plot(Plot('pid2', 'anImg2'))
+ pg.add_plot(Plot('pid22', 'anImg22'))
+ r.add_plotgroup(pg)
+
+ t = Table('tabid')
+ t.add_column(Column('c1'))
+ r.add_table(t)
+
+ t = Table('tabid2')
+ t.add_column(Column('c2'))
+ r.add_table(t)
+
+ d = r.to_dict()
+
+ log.debug(str(d))
+
+ self.assertEqual('redfang', d['id'])
+ self.assertEqual('redfang.a', d['attributes'][0]['id'])
+ self.assertEqual('redfang.a2', d['attributes'][1]['id'])
+
+ self.assertEqual('redfang.pgid', d['plotGroups'][0]['id'])
+ self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id'])
+ self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id'])
+
+ self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id'])
+ self.assertEqual('redfang.pgid2.pid2', d['plotGroups'][1]['plots'][0]['id'])
+ self.assertEqual('redfang.pgid2.pid22', d['plotGroups'][1]['plots'][1]['id'])
+
+ self.assertEqual('redfang.tabid', d['tables'][0]['id'])
+ self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id'])
+
+ self.assertEqual('redfang.tabid2', d['tables'][1]['id'])
+ self.assertEqual('redfang.tabid2.c2', d['tables'][1]['columns'][0]['id'])
+
+ log.info(repr(r))
+ self.assertIsNotNone(repr(r))
+
+ def test_get_attribute_by_id(self):
+ a = Attribute('a', 'b')
+ a2 = Attribute('b', 'b2')
+ attributes = [a, a2]
+ r = Report('redfang', attributes=attributes)
+
+ a1 = r.get_attribute_by_id('a')
+
+ self.assertEqual(a, a1)
+
+ def test_get_attribute_by_id_with_bad_id(self):
+ a1 = Attribute('a', 'b')
+ a2 = Attribute('b', 'b2')
+ attributes = [a1, a2]
+ report = Report('redfang', attributes=attributes)
+
+ a = report.get_attribute_by_id('a')
+ self.assertEqual(a.value, 'b')
+
+ bad_a = report.get_attribute_by_id('id_that_does_not_exist')
+ self.assertIsNone(bad_a)
+
def test_merge(self):
- r = Report.merge([
- Report.from_simple_dict("pbcommand_test",
- {"n_reads": 50, "n_zmws": 10},
- "pbcommand"),
- Report.from_simple_dict("pbcommand_test",
- {"n_reads": 250, "n_zmws": 50},
- "pbcommand")])
- attr = {a.id: a.value for a in r.attributes}
- self.assertEqual(attr['pbcommand_n_reads'], 300)
- self.assertEqual(attr['pbcommand_n_zmws'], 60)
+ EXPECTED_VALUES = {
+ "n_reads": 300,
+ "n_zmws": 60,
+ }
+ NAMES = {
+ "n_reads": "Number of reads",
+ "n_zmws": "Number of ZMWs"
+ }
+ chunks = [
+ Report("pbcommand_test",
+ attributes=[
+ Attribute(id_="n_reads", value=50, name="Number of reads"),
+ Attribute(id_="n_zmws", value=10, name="Number of ZMWs")],
+ dataset_uuids=["12345"]),
+ Report("pbcommand_test",
+ attributes=[
+ Attribute(id_="n_reads", value=250, name="Number of reads"),
+ Attribute(id_="n_zmws", value=50, name="Number of ZMWs")]),
+ ]
+ r = Report.merge(chunks)
+ self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"])
+ self.assertEqual(r._dataset_uuids, ["12345"])
+ for attr in r.attributes:
+ self.assertEqual(attr.value, EXPECTED_VALUES[attr.id])
+ self.assertEqual(attr.name, NAMES[attr.id])
+ for table in r.tables:
+ for column in table.columns:
+ self.assertEqual(column.header, NAMES[column.id])
def test_merge_tables(self):
names = ['laa_report1.json', 'laa_report2.json']
@@ -66,7 +242,3 @@ class TestReportModel(unittest.TestCase):
'BarcodeFasta3'])
else:
self.assertEqual(col.values, [1, 2, 4, 3])
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/test_models_report_attribute.py b/tests/test_models_report_attribute.py
new file mode 100644
index 0000000..bb5cd0c
--- /dev/null
+++ b/tests/test_models_report_attribute.py
@@ -0,0 +1,53 @@
+
+import unittest
+import logging
+
+from pbcommand.models.report import Attribute, PbReportError
+
+log = logging.getLogger(__name__)
+
+class TestAttribute(unittest.TestCase):
+
+ def test_attribute_null_id(self):
+ """Can't create an attribute without an id."""
+ def _test():
+ a = Attribute(None, 1)
+
+ self.assertRaises(PbReportError, _test)
+
+ def test_attribute_null_value(self):
+ """Can't create an attribute without a value."""
+ def _test():
+ a = Attribute('bob', None)
+
+ self.assertRaises(PbReportError, _test)
+
+ def test_attribute_int_id(self):
+ """Test exception of handling Attribute with int ids"""
+ def _test():
+ a = Attribute(1, 12345)
+
+ self.assertRaises(PbReportError, _test)
+
+ def test_to_dict(self):
+ """
+ Test attribute to_dict function
+ """
+ a = Attribute('bob', 123, "Bob is the name")
+ d = a.to_dict()
+ self.assertEquals('bob', d['id'])
+ self.assertEquals(123, d['value'])
+ self.assertEquals('Bob is the name', d['name'])
+
+ def test_eq(self):
+ a = Attribute('a', 1234, "My Attribute")
+ b = Attribute('b', 1234, "My B Attribute")
+ c = Attribute('a', 1234, "My Attribute")
+ self.assertTrue(a == c)
+ self.assertTrue(a != b)
+ self.assertTrue(b != c)
+
+ def test_repr(self):
+ a = Attribute('a', 1234, "My Attribute")
+ log.info(repr(a))
+ self.assertIsNotNone(repr(a))
diff --git a/tests/test_models_report_column.py b/tests/test_models_report_column.py
new file mode 100644
index 0000000..f9a6c43
--- /dev/null
+++ b/tests/test_models_report_column.py
@@ -0,0 +1,76 @@
+import logging
+import unittest
+
+from pbcommand.models.report import Column
+
+log = logging.getLogger(__name__)
+
+
+class TestColumn(unittest.TestCase):
+
+ def test_column(self):
+ """Test: Can't create a Column without an id."""
+ def none_col():
+ c = Column(None)
+
+ self.assertRaises(none_col)
+
+ def test_repr(self):
+ c = Column('my_column', header="My Column", values=list(xrange(5)))
+ self.assertIsNotNone(repr(c))
+
+# def test_plotgroup_add_duplicate_plot(self):
+# '''
+# Test: Can't add plots with duplicate ids
+# '''
+# try:
+# log.info( TestPlotGroup.test_plotgroup_add_duplicate_plot.__doc__ )
+# pg = PlotGroup('foo')
+# pg.add_plot(Plot('id', 'i1'))
+#
+# try:
+# pg.add_plot( Plot('id', 'i2') )
+# self.fail( 'Cannot add plot with same id' )
+# except PbReportError:
+# pass
+# except:
+# log.error(traceback.format_exc())
+# raise
+#
+#
+#
+# def test_plotgroup_id_prepend(self):
+# '''
+# Test: PlotGroup id gets prepended to plot.id when plot is added
+# '''
+# try:
+# log.info( TestPlotGroup.test_plotgroup_id_prepend.__doc__ )
+# pg = PlotGroup('foo')
+# pg.add_plot( Plot('id', 'i1') )
+# self.assertEqual( 'foo.id', pg.plots[0].id )
+# except:
+# log.error(traceback.format_exc())
+# raise
+#
+#
+# def test_to_dict(self):
+# '''
+# Test plotGroup to_dict function
+# '''
+# try:
+# log.info( TestPlotGroup.test_to_dict.__doc__ )
+# a = PlotGroup(123, 'foo title', 'foo legend', 'foo thumbnail' )
+# a.add_plot( Plot('id', 'i1') )
+#
+# d = a.to_dict()
+# self.assertEquals( 123, d['id'] )
+# self.assertEquals( 'foo title', d['title'] )
+# self.assertEquals( 'foo legend', d['legend'] )
+# self.assertEquals( 'foo thumbnail', d['thumbnail'] )
+# self.assertEquals( 1, len(d['plots']) )
+# except:
+# log.error(traceback.format_exc())
+# raise
+#
+#
+#
diff --git a/tests/test_models_report_plot.py b/tests/test_models_report_plot.py
new file mode 100644
index 0000000..b892b7f
--- /dev/null
+++ b/tests/test_models_report_plot.py
@@ -0,0 +1,43 @@
+import logging
+import unittest
+from pprint import pformat
+
+from pbcommand.models.report import Plot, PbReportError
+
+log = logging.getLogger(__name__)
+
+
+class TestPlot(unittest.TestCase):
+
+ def test_plot_null_id(self):
+ """Can't create an plot without an id."""
+ with self.assertRaises(PbReportError):
+ p = Plot(None, 'foo')
+
+ def test_plot_null_image(self):
+ """Can't create an plot without an image."""
+ def _test():
+ p = Plot('123', None)
+ self.assertRaises(PbReportError, _test)
+
+ def test_to_dict(self):
+ """Test plot to dictionary method"""
+ a = Plot('123', 'foo', caption='foo is the caption')
+ d = a.to_dict()
+ self.assertEquals('123', d['id'])
+ self.assertEquals('foo', d['image'])
+ self.assertEquals('foo is the caption', d['caption'])
+ log.info(pformat(d, indent=4))
+ log.info(repr(a))
+ self.assertIsNotNone(repr(a))
+
+ def test_init_with_thumbnail(self):
+ """Initial with thumbnail"""
+ image = "my_image.png"
+ thumbnail = "my_image_thumb.png"
+ p = Plot('plot_1', image, thumbnail=thumbnail, caption="Awesome image")
+
+ self.assertEqual(p.thumbnail, thumbnail)
+ log.info(pformat(p.to_dict()))
+ self.assertTrue(isinstance(p.to_dict(), dict))
+
diff --git a/tests/test_models_report_plotgroup.py b/tests/test_models_report_plotgroup.py
new file mode 100644
index 0000000..1ee791c
--- /dev/null
+++ b/tests/test_models_report_plotgroup.py
@@ -0,0 +1,58 @@
+
+import unittest
+import logging
+from pprint import pformat
+
+from pbcommand.models.report import PlotGroup, Plot, PbReportError
+
+log = logging.getLogger(__name__)
+
+
+class TestPlotGroup(unittest.TestCase):
+
+ def test_init(self):
+ """Test constructor with kwargs"""
+ plot = Plot('a_plot', 'path/to/image.png', caption="My Image")
+ p = PlotGroup('my_pg', plots=[plot])
+ self.assertIsNotNone(p)
+
+ def test_plotgroup_null_id(self):
+ """Can't create an plotGroup without an id."""
+ def _test():
+ p = PlotGroup(None)
+
+ self.assertRaises(PbReportError, _test)
+
+ def test_plotgroup_add_duplicate_plot(self):
+ """Can't add plots with duplicate ids."""
+ def _test():
+ pg = PlotGroup('foo')
+ pg.add_plot(Plot('id', 'i1'))
+ pg.add_plot(Plot('id', 'i2'))
+
+ self.assertRaises(PbReportError, _test)
+
+ def test_to_dict(self):
+ """Test plotGroup to_dict function."""
+ a = PlotGroup('123', title='foo title', legend='foo legend',
+ thumbnail='foo thumbnail')
+ a.add_plot(Plot('id', 'i1', caption='a caption'))
+
+ d = a.to_dict()
+ log.debug(pformat(d))
+
+ self.assertEquals('123', d['id'])
+ self.assertEquals('foo title', d['title'])
+ self.assertEquals('foo legend', d['legend'])
+ self.assertEquals('foo thumbnail', d['thumbnail'])
+ self.assertEquals(1, len(d['plots']))
+ log.info(a)
+ self.assertIsNotNone(repr(a))
+
+ def test_adding_incorrect_type(self):
+ """Validate type when adding Plots."""
+ def _test():
+ plots = ['Not a plot instance', 'Another bad plot.']
+ p = PlotGroup('my_plotgroup', plots=plots)
+
+ self.assertRaises(TypeError, _test)
diff --git a/tests/test_models_report_table.py b/tests/test_models_report_table.py
new file mode 100644
index 0000000..0f8714a
--- /dev/null
+++ b/tests/test_models_report_table.py
@@ -0,0 +1,121 @@
+import logging
+import unittest
+
+from pbcommand.models.report import Table, Column, PbReportError
+
+log = logging.getLogger(__name__)
+
+
+class TestEmptyTable(unittest.TestCase):
+
+ """Basic Smoke tests"""
+
+ def setUp(self):
+ self.columns = [Column('one', header="One"),
+ Column('two', header="Two"),
+ Column('three', header="Three")]
+
+ self.table = Table('my_table', columns=self.columns)
+
+ def test_str(self):
+ """Smoke test for conversion to str"""
+ log.info(str(self.table))
+ self.assertIsNotNone(str(self.table))
+
+ def test_columns(self):
+ """Test Columns"""
+ self.assertEqual(len(self.table.columns), 3)
+
+ def test_column_values(self):
+ """Basic check for column values"""
+ for column in self.table.columns:
+ self.assertEqual(len(column.values), 0)
+
+ def test_to_dict(self):
+ """Conversion to dictionary"""
+ self.assertTrue(isinstance(self.table.to_dict(), dict))
+ log.info(self.table.to_dict())
+
+
+class TestBasicTable(unittest.TestCase):
+
+ """Basic Smoke tests"""
+
+ def setUp(self):
+ self.columns = [Column('one', header="One"),
+ Column('two', header="Two"),
+ Column('three', header="Three")]
+ self.table = Table('my_table_with_values', columns=self.columns)
+ datum = {'one': list(xrange(3)), 'two': list('abc'),
+ 'three': 'file1 file2 file3'.split()}
+ for k, values in datum.iteritems():
+ for value in values:
+ self.table.add_data_by_column_id(k, value)
+
+ def test_str(self):
+ """Smoke test for conversion to str"""
+ log.info(str(self.table))
+ self.assertIsNotNone(str(self.table))
+
+ def test_columns(self):
+ """Test Columns"""
+ self.assertEqual(len(self.table.columns), 3)
+
+ def test_column_values(self):
+ """Basic check for column values"""
+ for column in self.table.columns:
+ self.assertEqual(len(column.values), 3)
+
+ def test_to_dict(self):
+ """Conversion to dictionary"""
+ self.assertTrue(isinstance(self.table.to_dict(), dict))
+ log.info(self.table.to_dict())
+
+
+class TestTable(unittest.TestCase):
+
+ def test_table(self):
+ """Can't create an Table without an id."""
+ def none_table():
+ t = Table(None)
+ self.assertRaises(none_table)
+
+ def test_add_column(self):
+ """Cannot add column with duplicate id."""
+ cs = [Column('1'), Column('2')]
+ t = Table('foo', columns=cs)
+
+ def add_dupe():
+ t.add_column(Column('2'))
+
+ self.assertSequenceEqual(cs, t.columns)
+
+ self.assertRaises(PbReportError, add_dupe)
+
+ def test_append_data(self):
+ """Append data to columns by index."""
+
+ cs = [Column('1'), Column('2')]
+ t = Table('foo', columns=cs)
+
+ t.append_data(0, 'whatev')
+ t.append_data(0, 'huh')
+ t.append_data(1, 'ernie')
+ t.append_data(1, 'bert')
+
+ self.assertSequenceEqual(['whatev', 'huh'], t.columns[0].values)
+ self.assertSequenceEqual(['ernie', 'bert'], t.columns[1].values)
+
+ def test_add_data_by_column_id(self):
+ """Added data values by column identifier."""
+
+ columns = [Column('one'), Column('two')]
+ table = Table('mytable', columns=columns)
+
+ datum = {'one': 12.0, 'two': 1234.0}
+
+ for k, v in datum.iteritems():
+ table.add_data_by_column_id(k, v)
+
+ self.assertTrue(12.0 in table.columns[0].values)
+ self.assertTrue(1234.0 in table.columns[1].values)
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index f954616..45bb8a1 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -31,7 +31,7 @@ class TestParsers(unittest.TestCase):
file_id="gff",
name="GFF file",
description="GFF file description",
- default_name="annotations.gff")
+ default_name="annotations")
tc_contract = p.to_contract()
d = tc_contract.to_dict()
inputs = d['tool_contract']['input_types']
@@ -54,7 +54,7 @@ class TestParsers(unittest.TestCase):
{
'title': 'GFF file',
'description': 'GFF file description',
- 'default_name': 'annotations.gff',
+ 'default_name': 'annotations',
'id': 'gff',
'file_type_id': 'PacBio.FileTypes.gff'
}
@@ -89,6 +89,22 @@ class TestParsers(unittest.TestCase):
opts2 = pa([])
self.assertFalse(opts2.loud)
+ def test_catch_output_file_extension(self):
+ p = get_pbparser(
+ "pbcommand.tasks.test_parsers",
+ "0.1.0",
+ "Tool Name",
+ "Tool Descripion",
+ "pbcommand-driver-exe ")
+ p.add_output_file_type(
+ file_type=FileTypes.GFF,
+ file_id="gff",
+ name="GFF file",
+ description="GFF file description",
+ default_name="annotations.gff")
+ tc = p.to_contract()
+ self.assertRaises(ValueError, tc.to_dict)
+
# TODO we should add a lot more tests for parser behavior
if __name__ == "__main__":
diff --git a/tests/test_pb_io_report.py b/tests/test_pb_io_report.py
index 8583b08..be37fa7 100644
--- a/tests/test_pb_io_report.py
+++ b/tests/test_pb_io_report.py
@@ -30,6 +30,9 @@ class TestSerializationOverviewReport(unittest.TestCase):
def test_id(self):
self.assertEqual(self.report.id, "overview")
+ def test_title(self):
+ self.assertEqual(self.report.title, "Overview Report")
+
def test_attributes(self):
self.assertTrue(len(self.report.attributes), 2)
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
index f48e293..90343b2 100644
--- a/tests/test_resolver.py
+++ b/tests/test_resolver.py
@@ -22,10 +22,11 @@ class TestScatterResolver(unittest.TestCase):
def test_sanity(self):
d = get_temp_dir("resolved-tool-contract")
tc = load_tool_contract_from(get_data_file(self.FILE_NAME))
- rtc = resolve_scatter_tool_contract(tc, self.INPUT_FILES, d, d, self.MAX_NPROC, self.TOOL_OPTIONS, self.MAX_NCHUNKS, self.CHUNK_KEYS)
+ rtc = resolve_scatter_tool_contract(tc, self.INPUT_FILES, d, d, self.MAX_NPROC, self.TOOL_OPTIONS, self.MAX_NCHUNKS, self.CHUNK_KEYS, False)
self.assertIsInstance(rtc, ResolvedToolContract)
self.assertIsInstance(rtc.task, ResolvedScatteredToolContractTask)
self.assertEqual(rtc.task.max_nchunks, 7)
+ self.assertEqual(rtc.task.is_distributed, False)
class TestGatherResolver(unittest.TestCase):
@@ -40,7 +41,8 @@ class TestGatherResolver(unittest.TestCase):
def test_sanity(self):
d = get_temp_dir("resolved-tool-contract")
tc = load_tool_contract_from(get_data_file(self.FILE_NAME))
- rtc = resolve_gather_tool_contract(tc, self.INPUT_FILES, d, d, self.MAX_NPROC, self.TOOL_OPTIONS, self.CHUNK_KEY)
+ rtc = resolve_gather_tool_contract(tc, self.INPUT_FILES, d, d, self.MAX_NPROC, self.TOOL_OPTIONS, self.CHUNK_KEY, False)
self.assertIsInstance(rtc, ResolvedToolContract)
self.assertIsInstance(rtc.task, ResolvedGatherToolContractTask)
self.assertEqual(rtc.task.chunk_key, self.CHUNK_KEY)
+ self.assertEqual(rtc.task.is_distributed, False)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b70ef4f..856a581 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,9 @@
import functools
import unittest
-from pbcommand.utils import Singleton, compose
+import argparse
+import logging
+
+from pbcommand.utils import Singleton, compose, get_parsed_args_log_level
class TestSingleton(unittest.TestCase):
@@ -50,4 +53,41 @@ class TestCompose(unittest.TestCase):
f = compose(add_five, add_two)
value = f(5)
- self.assertEquals(value, 12)
\ No newline at end of file
+ self.assertEquals(value, 12)
+
+
+class TestLogging(unittest.TestCase):
+
+ def test_get_parsed_args_log_level(self):
+ # XXX more of an integration test, sorry - we need to ensure that
+ # these functions work in combination with get_parsed_args_log_level
+ from pbcommand.common_options import (
+ add_log_debug_option, add_log_quiet_option, add_log_verbose_option,
+ add_log_level_option)
+ def _get_argparser(level="INFO"):
+ p = argparse.ArgumentParser()
+ p.add_argument("--version", action="store_true")
+ add_log_level_option(add_log_debug_option(add_log_quiet_option(
+ add_log_verbose_option(p))), default_level=level)
+ return p
+ p = _get_argparser().parse_args([])
+ l = get_parsed_args_log_level(p)
+ self.assertEqual(l, logging.INFO)
+ p = _get_argparser().parse_args(["--quiet"])
+ l = get_parsed_args_log_level(p)
+ self.assertEqual(l, logging.ERROR)
+ p = _get_argparser().parse_args(["--debug"])
+ l = get_parsed_args_log_level(p)
+ self.assertEqual(l, logging.DEBUG)
+ p = _get_argparser("ERROR").parse_args(["--verbose"])
+ l = get_parsed_args_log_level(p)
+ self.assertEqual(l, logging.INFO)
+ p = _get_argparser("DEBUG").parse_args(["--log-level=WARNING"])
+ l = get_parsed_args_log_level(p)
+ self.assertEqual(l, logging.WARNING)
+ p = _get_argparser("NOTSET").parse_args([])
+ l = get_parsed_args_log_level(p)
+ self.assertEqual(l, logging.NOTSET)
+ p = _get_argparser(logging.NOTSET).parse_args([])
+ l = get_parsed_args_log_level(p)
+ self.assertEqual(l, logging.NOTSET)
diff --git a/tox.ini b/tox.ini
index 9f98f34..8cc4324 100644
--- a/tox.ini
+++ b/tox.ini
@@ -3,10 +3,12 @@
# test suite on all supported python versions. To use it, "pip install tox"
# and then run "tox" from this directory.
+# Adding numpy for not great reasons.
[tox]
envlist = py27
[testenv]
commands = nosetests -s --verbose --logging-config log_nose.cfg
deps =
+ numpy
nose
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pbcommand.git
More information about the debian-med-commit
mailing list