[med-svn] [cwltool] 01/01: Imported Upstream version 1.0.20160203221531
Michael Crusoe
misterc-guest at moszumanska.debian.org
Sat Feb 13 00:33:20 UTC 2016
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to annotated tag upstream/1.0.20160203221531
in repository cwltool.
commit 2f2fe3ebfadf79edfef734254d442ac86c65b627
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date: Fri Feb 5 02:16:23 2016 -0800
Imported Upstream version 1.0.20160203221531
---
PKG-INFO | 68 +
README.rst | 57 +
cwltool.egg-info/PKG-INFO | 68 +
cwltool.egg-info/SOURCES.txt | 62 +
cwltool.egg-info/dependency_links.txt | 1 +
cwltool.egg-info/entry_points.txt | 4 +
cwltool.egg-info/requires.txt | 6 +
cwltool.egg-info/top_level.txt | 1 +
cwltool.egg-info/zip-safe | 1 +
cwltool/__init__.py | 1 +
cwltool/__main__.py | 4 +
cwltool/aslist.py | 5 +
cwltool/builder.py | 164 ++
cwltool/cwlrdf.py | 80 +
cwltool/cwltest.py | 178 ++
cwltool/docker.py | 101 +
cwltool/docker_uid.py | 112 ++
cwltool/draft2tool.py | 289 +++
cwltool/errors.py | 2 +
cwltool/expression.py | 137 ++
cwltool/factory.py | 25 +
cwltool/flatten.py | 20 +
cwltool/job.py | 212 +++
cwltool/main.py | 516 ++++++
cwltool/pathmapper.py | 81 +
cwltool/process.py | 372 ++++
cwltool/sandboxjs.py | 145 ++
cwltool/schemas/draft-2/cwl-avro.yml | 1929 ++++++++++++++++++++
.../schemas/draft-3/CommandLineTool-standalone.yml | 2 +
cwltool/schemas/draft-3/CommandLineTool.yml | 637 +++++++
cwltool/schemas/draft-3/CommonWorkflowLanguage.yml | 11 +
cwltool/schemas/draft-3/Process.yml | 549 ++++++
cwltool/schemas/draft-3/README.md | 21 +
cwltool/schemas/draft-3/UserGuide.yml | 4 +
cwltool/schemas/draft-3/Workflow.yml | 473 +++++
cwltool/schemas/draft-3/concepts.md | 378 ++++
cwltool/schemas/draft-3/contrib.md | 12 +
cwltool/schemas/draft-3/index.yml | 6 +
cwltool/schemas/draft-3/intro.md | 21 +
cwltool/schemas/draft-3/invocation.md | 145 ++
.../salad/schema_salad/metaschema/field_name.yml | 46 +
.../schema_salad/metaschema/field_name_proc.yml | 8 +
.../schema_salad/metaschema/field_name_schema.yml | 14 +
.../schema_salad/metaschema/field_name_src.yml | 8 +
.../salad/schema_salad/metaschema/ident_res.yml | 53 +
.../schema_salad/metaschema/ident_res_proc.yml | 20 +
.../schema_salad/metaschema/ident_res_schema.yml | 14 +
.../schema_salad/metaschema/ident_res_src.yml | 20 +
.../schema_salad/metaschema/import_include.md | 112 ++
.../salad/schema_salad/metaschema/link_res.yml | 55 +
.../schema_salad/metaschema/link_res_proc.yml | 21 +
.../schema_salad/metaschema/link_res_schema.yml | 16 +
.../salad/schema_salad/metaschema/link_res_src.yml | 21 +
.../salad/schema_salad/metaschema/metaschema.yml | 437 +++++
.../draft-3/salad/schema_salad/metaschema/salad.md | 256 +++
.../salad/schema_salad/metaschema/vocab_res.yml | 35 +
.../schema_salad/metaschema/vocab_res_proc.yml | 15 +
.../schema_salad/metaschema/vocab_res_schema.yml | 21 +
.../schema_salad/metaschema/vocab_res_src.yml | 15 +
cwltool/schemas/draft-3/userguide-intro.md | 9 +
cwltool/update.py | 313 ++++
cwltool/workflow.py | 587 ++++++
setup.cfg | 5 +
setup.py | 48 +
64 files changed, 9049 insertions(+)
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..aaeefe9
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,68 @@
+Metadata-Version: 1.1
+Name: cwltool
+Version: 1.0.20160203221531
+Summary: Common workflow language reference implementation
+Home-page: https://github.com/common-workflow-language/common-workflow-language
+Author: Common workflow language working group
+Author-email: common-workflow-language at googlegroups.com
+License: Apache 2.0
+Download-URL: https://github.com/common-workflow-language/common-workflow-language
+Description: ==================================================================
+ Common workflow language tool description reference implementation
+ ==================================================================
+
+ This is the reference implementation of the Common Workflow Language. It is
+ intended to be feature complete and provide comprehensive validation of CWL
+ files as well as provide other tools related to working with CWL.
+
+ This is written and tested for Python 2.7.
+
+ The reference implementation consists of two packages. The "cwltool" package
+ is the primary Python module containing the reference implementation in the
+ "cwltool" module and console executable by the same name.
+
+ The "cwl-runner" package is optional and provides an additional entry point
+ under the alias "cwl-runner", which is the implementation-agnostic name for the
+ default CWL interpreter installed on a host.
+
+ Install
+ -------
+
+ Installing the official package from PyPi (will install "cwltool" package as well)::
+
+ pip install cwl-runner
+
+ Or from source::
+
+ git clone https://github.com/common-workflow-language/cwltool.git
+ cd cwltool && python setup.py install
+ cd cwl-runner && python setup.py install
+
+ Run on the command line
+ -----------------------
+
+ Simple command::
+
+ cwl-runner [tool] [job]
+
+ Import as a module
+ ----------------
+
+ Add::
+
+ import cwltool
+
+ to your script.
+
+ Use with boot2docker
+ --------------------
+ boot2docker is running docker inside a virtual machine and it only mounts ``Users``
+ on it. The default behavoir of CWL is to create temporary directories under e.g.
+ ``/Var`` which is not accessible to Docker containers.
+
+ To run CWL successfully with boot2docker you need to set the ``--tmpdir-prefix``
+ and ``--tmp-outdir-prefix`` to somewhere under ``/Users``::
+
+ $ cwl-runner --tmp-outdir-prefix=/Users/username/project --tmpdir-prefix=/Users/username/project wc-tool.cwl wc-job.json
+
+Platform: UNKNOWN
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..b811f73
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,57 @@
+==================================================================
+Common workflow language tool description reference implementation
+==================================================================
+
+This is the reference implementation of the Common Workflow Language. It is
+intended to be feature complete and provide comprehensive validation of CWL
+files as well as provide other tools related to working with CWL.
+
+This is written and tested for Python 2.7.
+
+The reference implementation consists of two packages. The "cwltool" package
+is the primary Python module containing the reference implementation in the
+"cwltool" module and console executable by the same name.
+
+The "cwl-runner" package is optional and provides an additional entry point
+under the alias "cwl-runner", which is the implementation-agnostic name for the
+default CWL interpreter installed on a host.
+
+Install
+-------
+
+Installing the official package from PyPi (will install "cwltool" package as well)::
+
+ pip install cwl-runner
+
+Or from source::
+
+ git clone https://github.com/common-workflow-language/cwltool.git
+ cd cwltool && python setup.py install
+ cd cwl-runner && python setup.py install
+
+Run on the command line
+-----------------------
+
+Simple command::
+
+ cwl-runner [tool] [job]
+
+Import as a module
+----------------
+
+Add::
+
+ import cwltool
+
+to your script.
+
+Use with boot2docker
+--------------------
+boot2docker is running docker inside a virtual machine and it only mounts ``Users``
+on it. The default behavoir of CWL is to create temporary directories under e.g.
+``/Var`` which is not accessible to Docker containers.
+
+To run CWL successfully with boot2docker you need to set the ``--tmpdir-prefix``
+and ``--tmp-outdir-prefix`` to somewhere under ``/Users``::
+
+ $ cwl-runner --tmp-outdir-prefix=/Users/username/project --tmpdir-prefix=/Users/username/project wc-tool.cwl wc-job.json
diff --git a/cwltool.egg-info/PKG-INFO b/cwltool.egg-info/PKG-INFO
new file mode 100644
index 0000000..aaeefe9
--- /dev/null
+++ b/cwltool.egg-info/PKG-INFO
@@ -0,0 +1,68 @@
+Metadata-Version: 1.1
+Name: cwltool
+Version: 1.0.20160203221531
+Summary: Common workflow language reference implementation
+Home-page: https://github.com/common-workflow-language/common-workflow-language
+Author: Common workflow language working group
+Author-email: common-workflow-language at googlegroups.com
+License: Apache 2.0
+Download-URL: https://github.com/common-workflow-language/common-workflow-language
+Description: ==================================================================
+ Common workflow language tool description reference implementation
+ ==================================================================
+
+ This is the reference implementation of the Common Workflow Language. It is
+ intended to be feature complete and provide comprehensive validation of CWL
+ files as well as provide other tools related to working with CWL.
+
+ This is written and tested for Python 2.7.
+
+ The reference implementation consists of two packages. The "cwltool" package
+ is the primary Python module containing the reference implementation in the
+ "cwltool" module and console executable by the same name.
+
+ The "cwl-runner" package is optional and provides an additional entry point
+ under the alias "cwl-runner", which is the implementation-agnostic name for the
+ default CWL interpreter installed on a host.
+
+ Install
+ -------
+
+ Installing the official package from PyPi (will install "cwltool" package as well)::
+
+ pip install cwl-runner
+
+ Or from source::
+
+ git clone https://github.com/common-workflow-language/cwltool.git
+ cd cwltool && python setup.py install
+ cd cwl-runner && python setup.py install
+
+ Run on the command line
+ -----------------------
+
+ Simple command::
+
+ cwl-runner [tool] [job]
+
+ Import as a module
+ ----------------
+
+ Add::
+
+ import cwltool
+
+ to your script.
+
+ Use with boot2docker
+ --------------------
+ boot2docker is running docker inside a virtual machine and it only mounts ``Users``
+ on it. The default behavoir of CWL is to create temporary directories under e.g.
+ ``/Var`` which is not accessible to Docker containers.
+
+ To run CWL successfully with boot2docker you need to set the ``--tmpdir-prefix``
+ and ``--tmp-outdir-prefix`` to somewhere under ``/Users``::
+
+ $ cwl-runner --tmp-outdir-prefix=/Users/username/project --tmpdir-prefix=/Users/username/project wc-tool.cwl wc-job.json
+
+Platform: UNKNOWN
diff --git a/cwltool.egg-info/SOURCES.txt b/cwltool.egg-info/SOURCES.txt
new file mode 100644
index 0000000..b542e5f
--- /dev/null
+++ b/cwltool.egg-info/SOURCES.txt
@@ -0,0 +1,62 @@
+README.rst
+setup.py
+cwltool/__init__.py
+cwltool/__main__.py
+cwltool/aslist.py
+cwltool/builder.py
+cwltool/cwlrdf.py
+cwltool/cwltest.py
+cwltool/docker.py
+cwltool/docker_uid.py
+cwltool/draft2tool.py
+cwltool/errors.py
+cwltool/expression.py
+cwltool/factory.py
+cwltool/flatten.py
+cwltool/job.py
+cwltool/main.py
+cwltool/pathmapper.py
+cwltool/process.py
+cwltool/sandboxjs.py
+cwltool/update.py
+cwltool/workflow.py
+cwltool.egg-info/PKG-INFO
+cwltool.egg-info/SOURCES.txt
+cwltool.egg-info/dependency_links.txt
+cwltool.egg-info/entry_points.txt
+cwltool.egg-info/requires.txt
+cwltool.egg-info/top_level.txt
+cwltool.egg-info/zip-safe
+cwltool/schemas/draft-2/cwl-avro.yml
+cwltool/schemas/draft-3/CommandLineTool-standalone.yml
+cwltool/schemas/draft-3/CommandLineTool.yml
+cwltool/schemas/draft-3/CommonWorkflowLanguage.yml
+cwltool/schemas/draft-3/Process.yml
+cwltool/schemas/draft-3/README.md
+cwltool/schemas/draft-3/UserGuide.yml
+cwltool/schemas/draft-3/Workflow.yml
+cwltool/schemas/draft-3/concepts.md
+cwltool/schemas/draft-3/contrib.md
+cwltool/schemas/draft-3/index.yml
+cwltool/schemas/draft-3/intro.md
+cwltool/schemas/draft-3/invocation.md
+cwltool/schemas/draft-3/userguide-intro.md
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_proc.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_schema.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_src.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_proc.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_schema.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_src.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/import_include.md
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_proc.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_schema.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_src.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/metaschema.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/salad.md
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_proc.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_schema.yml
+cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_src.yml
\ No newline at end of file
diff --git a/cwltool.egg-info/dependency_links.txt b/cwltool.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/cwltool.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/cwltool.egg-info/entry_points.txt b/cwltool.egg-info/entry_points.txt
new file mode 100644
index 0000000..1664a6f
--- /dev/null
+++ b/cwltool.egg-info/entry_points.txt
@@ -0,0 +1,4 @@
+[console_scripts]
+cwltest = cwltool.cwltest:main
+cwltool = cwltool.main:main
+
diff --git a/cwltool.egg-info/requires.txt b/cwltool.egg-info/requires.txt
new file mode 100644
index 0000000..fc82bc5
--- /dev/null
+++ b/cwltool.egg-info/requires.txt
@@ -0,0 +1,6 @@
+requests
+PyYAML
+rdflib >= 4.2.0
+rdflib-jsonld >= 0.3.0
+shellescape
+schema_salad == 1.6.20160202222448
diff --git a/cwltool.egg-info/top_level.txt b/cwltool.egg-info/top_level.txt
new file mode 100644
index 0000000..f5c7cc1
--- /dev/null
+++ b/cwltool.egg-info/top_level.txt
@@ -0,0 +1 @@
+cwltool
diff --git a/cwltool.egg-info/zip-safe b/cwltool.egg-info/zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/cwltool.egg-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/cwltool/__init__.py b/cwltool/__init__.py
new file mode 100644
index 0000000..70d587a
--- /dev/null
+++ b/cwltool/__init__.py
@@ -0,0 +1 @@
+__author__ = 'peter.amstutz at curoverse.com'
diff --git a/cwltool/__main__.py b/cwltool/__main__.py
new file mode 100644
index 0000000..ae4ff8a
--- /dev/null
+++ b/cwltool/__main__.py
@@ -0,0 +1,4 @@
+import main
+import sys
+
+sys.exit(main.main())
diff --git a/cwltool/aslist.py b/cwltool/aslist.py
new file mode 100644
index 0000000..f34a048
--- /dev/null
+++ b/cwltool/aslist.py
@@ -0,0 +1,5 @@
+def aslist(l):
+ if isinstance(l, list):
+ return l
+ else:
+ return [l]
diff --git a/cwltool/builder.py b/cwltool/builder.py
new file mode 100644
index 0000000..c6b3463
--- /dev/null
+++ b/cwltool/builder.py
@@ -0,0 +1,164 @@
+import copy
+from aslist import aslist
+import expression
+import avro
+import schema_salad.validate as validate
+
+CONTENT_LIMIT = 64 * 1024
+
+def substitute(value, replace):
+ if replace[0] == "^":
+ return substitute(value[0:value.rindex('.')], replace[1:])
+ else:
+ return value + replace
+
+class Builder(object):
+
+ def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
+ bindings = []
+ binding = None
+ if "inputBinding" in schema and isinstance(schema["inputBinding"], dict):
+ binding = copy.copy(schema["inputBinding"])
+
+ if "position" in binding:
+ binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos)
+ else:
+ binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)
+
+ if "valueFrom" in binding:
+ binding["do_eval"] = binding["valueFrom"]
+ binding["valueFrom"] = datum
+
+ # Handle union types
+ if isinstance(schema["type"], list):
+ for t in schema["type"]:
+ if isinstance(t, basestring) and self.names.has_name(t, ""):
+ avsc = self.names.get_name(t, "")
+ elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""):
+ avsc = self.names.get_name(t["name"], "")
+ else:
+ avsc = avro.schema.make_avsc_object(t, self.names)
+ if validate.validate(avsc, datum):
+ schema = copy.deepcopy(schema)
+ schema["type"] = t
+ return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos)
+ raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"]))
+ elif isinstance(schema["type"], dict):
+ st = copy.deepcopy(schema["type"])
+ if binding and "inputBinding" not in st and "itemSeparator" not in binding and st["type"] in ("array", "map"):
+ st["inputBinding"] = {}
+ bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos))
+ else:
+ if schema["type"] in self.schemaDefs:
+ schema = self.schemaDefs[schema["type"]]
+
+ if schema["type"] == "record":
+ for f in schema["fields"]:
+ if f["name"] in datum:
+ bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"]))
+ else:
+ datum[f["name"]] = f.get("default")
+
+ if schema["type"] == "map":
+ for n, item in datum.items():
+ b2 = None
+ if binding:
+ b2 = copy.deepcopy(binding)
+ b2["valueFrom"] = [n, item]
+ bindings.extend(self.bind_input({"type": schema["values"], "inputBinding": b2},
+ item, lead_pos=n, tail_pos=tail_pos))
+ binding = None
+
+ if schema["type"] == "array":
+ for n, item in enumerate(datum):
+ b2 = None
+ if binding:
+ b2 = copy.deepcopy(binding)
+ b2["valueFrom"] = item
+ bindings.extend(self.bind_input({"type": schema["items"], "inputBinding": b2},
+ item, lead_pos=n, tail_pos=tail_pos))
+ binding = None
+
+ if schema["type"] == "File":
+ self.files.append(datum)
+ if binding and binding.get("loadContents"):
+ with self.fs_access.open(datum["path"], "rb") as f:
+ datum["contents"] = f.read(CONTENT_LIMIT)
+
+ if "secondaryFiles" in schema:
+ if "secondaryFiles" not in datum:
+ datum["secondaryFiles"] = []
+ for sf in aslist(schema["secondaryFiles"]):
+ if isinstance(sf, dict) or "$(" in sf or "${" in sf:
+ sfpath = self.do_eval(sf, context=datum)
+ if isinstance(sfpath, basestring):
+ sfpath = {"path": sfpath, "class": "File"}
+ else:
+ sfpath = {"path": substitute(datum["path"], sf), "class": "File"}
+ if isinstance(sfpath, list):
+ datum["secondaryFiles"].extend(sfpath)
+ else:
+ datum["secondaryFiles"].append(sfpath)
+ for sf in datum.get("secondaryFiles", []):
+ self.files.append(sf)
+
+ # Position to front of the sort key
+ if binding:
+ for bi in bindings:
+ bi["position"] = binding["position"] + bi["position"]
+ bindings.append(binding)
+
+ return bindings
+
+ def tostr(self, value):
+ if isinstance(value, dict) and value.get("class") == "File":
+ if "path" not in value:
+ raise WorkflowException("File object must have \"path\": %s" % (value))
+ return value["path"]
+ else:
+ return str(value)
+
+ def generate_arg(self, binding):
+ value = binding["valueFrom"]
+ if "do_eval" in binding:
+ value = self.do_eval(binding["do_eval"], context=value)
+
+ prefix = binding.get("prefix")
+ sep = binding.get("separate", True)
+
+ l = []
+ if isinstance(value, list):
+ if binding.get("itemSeparator"):
+ l = [binding["itemSeparator"].join([self.tostr(v) for v in value])]
+ elif binding.get("do_eval"):
+ value = [v["path"] if isinstance(v, dict) and v.get("class") == "File" else v for v in value]
+ return ([prefix] if prefix else []) + value
+ elif prefix:
+ return [prefix]
+ else:
+ return []
+ elif isinstance(value, dict) and value.get("class") == "File":
+ l = [value]
+ elif isinstance(value, dict):
+ return [prefix] if prefix else []
+ elif value is True and prefix:
+ return [prefix]
+ elif value is False or value is None:
+ return []
+ else:
+ l = [value]
+
+ args = []
+ for j in l:
+ if sep:
+ args.extend([prefix, self.tostr(j)])
+ else:
+ args.append(prefix + self.tostr(j))
+
+ return [a for a in args if a is not None]
+
+ def do_eval(self, ex, context=None, pull_image=True):
+ return expression.do_eval(ex, self.job, self.requirements,
+ self.outdir, self.tmpdir,
+ self.resources,
+ context=context, pull_image=pull_image)
diff --git a/cwltool/cwlrdf.py b/cwltool/cwlrdf.py
new file mode 100644
index 0000000..02b42bf
--- /dev/null
+++ b/cwltool/cwlrdf.py
@@ -0,0 +1,80 @@
+import json
+from rdflib import Graph, plugin
+from rdflib.serializer import Serializer
+
+def printrdf(workflow, wf, ctx, sr):
+ wf["@context"] = ctx
+ g = Graph().parse(data=json.dumps(wf), format='json-ld', location=workflow)
+ print(g.serialize(format=sr))
+
+def printdot(workflow, wf, ctx, sr):
+ wf["@context"] = ctx
+ g = Graph().parse(data=json.dumps(wf), format='json-ld', location=workflow)
+
+ print "digraph {"
+
+ #g.namespace_manager.qname(predicate)
+
+ def lastpart(uri):
+ uri = str(uri)
+ if "/" in uri:
+ return uri[uri.rindex("/")+1:]
+ else:
+ return uri
+
+ qres = g.query(
+ """SELECT ?step ?run
+ WHERE {
+ ?step cwl:run ?run .
+ }""")
+
+ for step, run in qres:
+ print '"%s" [label="%s"]' % (lastpart(step), "%s (%s)" % (lastpart(step), lastpart(run)))
+
+ qres = g.query(
+ """SELECT ?step ?inp ?source
+ WHERE {
+ ?wf cwl:steps ?step .
+ ?step cwl:inputs ?inp .
+ ?inp cwl:source ?source .
+ }""")
+
+ for step, inp, source in qres:
+ print '"%s" [shape=box]' % (lastpart(inp))
+ print '"%s" -> "%s" [label="%s"]' % (lastpart(source), lastpart(inp), "")
+ print '"%s" -> "%s" [label="%s"]' % (lastpart(inp), lastpart(step), "")
+
+ qres = g.query(
+ """SELECT ?step ?out
+ WHERE {
+ ?wf cwl:steps ?step .
+ ?step cwl:outputs ?out .
+ }""")
+
+ for step, out in qres:
+ print '"%s" [shape=box]' % (lastpart(out))
+ print '"%s" -> "%s" [label="%s"]' % (lastpart(step), lastpart(out), "")
+
+ qres = g.query(
+ """SELECT ?out ?source
+ WHERE {
+ ?wf cwl:outputs ?out .
+ ?out cwl:source ?source .
+ }""")
+
+ for out, source in qres:
+ print '"%s" [shape=octagon]' % (lastpart(out))
+ print '"%s" -> "%s" [label="%s"]' % (lastpart(source), lastpart(out), "")
+
+ qres = g.query(
+ """SELECT ?inp
+ WHERE {
+ ?wf rdf:type cwl:Workflow .
+ ?wf cwl:inputs ?inp .
+ }""")
+
+ for (inp,) in qres:
+ print '"%s" [shape=octagon]' % (lastpart(inp))
+
+
+ print "}"
diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py
new file mode 100755
index 0000000..4fa10c4
--- /dev/null
+++ b/cwltool/cwltest.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import shutil
+import tempfile
+import yaml
+import pipes
+import logging
+
+_logger = logging.getLogger("cwltool")
+_logger.addHandler(logging.StreamHandler())
+_logger.setLevel(logging.INFO)
+
+UNSUPPORTED_FEATURE = 33
+
+class CompareFail(Exception):
+ pass
+
+def compare(a, b):
+ try:
+ if isinstance(a, dict):
+ if a.get("class") == "File":
+ if not b["path"].endswith("/" + a["path"]):
+ raise CompareFail("%s does not end with %s" %(b["path"], a["path"]))
+ # ignore empty collections
+ b = {k: v for k, v in b.iteritems()
+ if not isinstance(v, (list, dict)) or len(v) > 0}
+ if len(a) != len(b):
+ raise CompareFail("expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True)))
+ for c in a:
+ if a.get("class") != "File" or c != "path":
+ if c not in b:
+ raise CompareFail("%s not in %s" % (c, b))
+ if not compare(a[c], b[c]):
+ return False
+ return True
+ elif isinstance(a, list):
+ if len(a) != len(b):
+ raise CompareFail("expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True)))
+ for c in xrange(0, len(a)):
+ if not compare(a[c], b[c]):
+ return False
+ return True
+ else:
+ if a != b:
+ raise CompareFail("%s != %s" % (a, b))
+ else:
+ return True
+ except Exception as e:
+ raise CompareFail(str(e))
+
+def run_test(args, i, t):
+ out = {}
+ outdir = None
+ try:
+ if "output" in t:
+ test_command = [args.tool]
+ # Add prefixes if running on MacOSX so that boot2docker writes to /Users
+ if 'darwin' in sys.platform:
+ outdir = tempfile.mkdtemp(prefix=os.path.abspath(os.path.curdir))
+ test_command.extend(["--tmp-outdir-prefix={}".format(outdir), "--tmpdir-prefix={}".format(outdir)])
+ else:
+ outdir = tempfile.mkdtemp()
+ test_command.extend(["--outdir={}".format(outdir),
+ "--quiet",
+ t["tool"],
+ t["job"]])
+ outstr = subprocess.check_output(test_command)
+ out = {"output": json.loads(outstr)}
+ else:
+ test_command = [args.tool,
+ "--conformance-test",
+ "--basedir=" + args.basedir,
+ "--no-container",
+ "--quiet",
+ t["tool"],
+ t["job"]]
+
+ outstr = subprocess.check_output(test_command)
+ out = yaml.load(outstr)
+ except ValueError as v:
+ _logger.error(v)
+ _logger.error(outstr)
+ except subprocess.CalledProcessError as err:
+ if err.returncode == UNSUPPORTED_FEATURE:
+ return UNSUPPORTED_FEATURE
+ else:
+ _logger.error("""Test failed: %s""", " ".join([pipes.quote(tc) for tc in test_command]))
+ _logger.error(t.get("doc"))
+ _logger.error("Returned non-zero")
+ return 1
+ except yaml.scanner.ScannerError as e:
+ _logger.error("""Test failed: %s""", " ".join([pipes.quote(tc) for tc in test_command]))
+ _logger.error(outstr)
+ _logger.error("Parse error %s", str(e))
+
+ pwd = os.path.abspath(os.path.dirname(t["job"]))
+ # t["args"] = map(lambda x: x.replace("$PWD", pwd), t["args"])
+ # if "stdin" in t:
+ # t["stdin"] = t["stdin"].replace("$PWD", pwd)
+
+ failed = False
+ if "output" in t:
+ checkkeys = ["output"]
+ else:
+ checkkeys = ["args", "stdin", "stdout", "createfiles"]
+
+ for key in checkkeys:
+ try:
+ compare(t.get(key), out.get(key))
+ except CompareFail as ex:
+ _logger.warn("""Test failed: %s""", " ".join([pipes.quote(tc) for tc in test_command]))
+ _logger.warn(t.get("doc"))
+ _logger.warn("%s expected %s\n got %s", key,
+ json.dumps(t.get(key), indent=4, sort_keys=True),
+ json.dumps(out.get(key), indent=4, sort_keys=True))
+ _logger.warn("Compare failure %s", ex)
+ failed = True
+
+ if outdir:
+ shutil.rmtree(outdir, True)
+
+ if failed:
+ return 1
+ else:
+ return 0
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--test", type=str, help="YAML file describing test cases", required=True)
+ parser.add_argument("--basedir", type=str, help="Basedir to use for tests", default=".")
+ parser.add_argument("-n", type=int, default=None, help="Run a specific test")
+ parser.add_argument("--tool", type=str, default="cwl-runner",
+ help="CWL runner executable to use (default 'cwl-runner'")
+ args = parser.parse_args()
+
+ if not args.test:
+ parser.print_help()
+ return 1
+
+ with open(args.test) as f:
+ tests = yaml.load(f)
+
+ failures = 0
+ unsupported = 0
+
+ if args.n is not None:
+ sys.stderr.write("\rTest [%i/%i] " % (args.n, len(tests)))
+ rt = run_test(args, args.n-1, tests[args.n-1])
+ if rt == 1:
+ failures += 1
+ elif rt == UNSUPPORTED_FEATURE:
+ unsupported += 1
+ else:
+ for i, t in enumerate(tests):
+ sys.stderr.write("\rTest [%i/%i] " % (i+1, len(tests)))
+ sys.stderr.flush()
+ rt = run_test(args, i, t)
+ if rt == 1:
+ failures += 1
+ elif rt == UNSUPPORTED_FEATURE:
+ unsupported += 1
+
+ if failures == 0 and unsupported == 0:
+ _logger.info("All tests passed")
+ return 0
+ else:
+ _logger.warn("%i failures, %i unsupported features", failures, unsupported)
+ return 1
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/cwltool/docker.py b/cwltool/docker.py
new file mode 100644
index 0000000..e9a56f4
--- /dev/null
+++ b/cwltool/docker.py
@@ -0,0 +1,101 @@
+import subprocess
+import logging
+import sys
+import requests
+import os
+import process
+import re
+import tempfile
+
+_logger = logging.getLogger("cwltool")
+
+def get_image(dockerRequirement, pull_image, dry_run=False):
+ found = False
+
+ if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
+ dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
+
+ for ln in subprocess.check_output(["docker", "images", "--no-trunc", "--all"]).splitlines():
+ try:
+ m = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", ln)
+ sp = dockerRequirement["dockerImageId"].split(":")
+ if len(sp) == 1:
+ sp.append("latest")
+ # check for repository:tag match or image id match
+ if ((sp[0] == m.group(1) and sp[1] == m.group(2)) or dockerRequirement["dockerImageId"] == m.group(3)):
+ found = True
+ break
+ except ValueError:
+ pass
+
+ if not found and pull_image:
+ if "dockerPull" in dockerRequirement:
+ cmd = ["docker", "pull", dockerRequirement["dockerPull"]]
+ _logger.info(str(cmd))
+ if not dry_run:
+ subprocess.check_call(cmd, stdout=sys.stderr)
+ found = True
+ elif "dockerFile" in dockerRequirement:
+ dockerfile_dir = tempfile.mkdtemp()
+ with open(os.path.join(dockerfile_dir, "Dockerfile"), "w") as df:
+ df.write(dockerRequirement["dockerFile"])
+ cmd = ["docker", "build", "--tag=%s" % dockerRequirement["dockerImageId"], dockerfile_dir]
+ _logger.info(str(cmd))
+ if not dry_run:
+ subprocess.check_call(cmd, stdout=sys.stderr)
+ found = True
+ elif "dockerLoad" in dockerRequirement:
+ cmd = ["docker", "load"]
+ _logger.info(str(cmd))
+ if not dry_run:
+ if os.path.exists(dockerRequirement["dockerLoad"]):
+ _logger.info("Loading docker image from %s", dockerRequirement["dockerLoad"])
+ with open(dockerRequirement["dockerLoad"], "rb") as f:
+ loadproc = subprocess.Popen(cmd, stdin=f, stdout=sys.stderr)
+ else:
+ loadproc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=sys.stderr)
+ _logger.info("Sending GET request to %s", dockerRequirement["dockerLoad"])
+ req = requests.get(dockerRequirement["dockerLoad"], stream=True)
+ n = 0
+ for chunk in req.iter_content(1024*1024):
+ n += len(chunk)
+ _logger.info("\r%i bytes" % (n))
+ loadproc.stdin.write(chunk)
+ loadproc.stdin.close()
+ rcode = loadproc.wait()
+ if rcode != 0:
+ raise process.WorkflowException("Docker load returned non-zero exit status %i" % (rcode))
+ found = True
+ elif "dockerImport" in dockerRequirement:
+ cmd = ["docker", "import", dockerRequirement["dockerImport"], dockerRequirement["dockerImageId"]]
+ _logger.info(str(cmd))
+ if not dry_run:
+ subprocess.check_call(cmd, stdout=sys.stderr)
+ found = True
+
+ return found
+
+
+def get_from_requirements(r, req, pull_image, dry_run=False):
+ if r:
+ errmsg = None
+ try:
+ subprocess.check_output(["docker", "version"])
+ except subprocess.CalledProcessError as e:
+ errmsg = "Cannot communicate with docker daemon: " + str(e)
+ except OSError as e:
+ errmsg = "'docker' executable not found: " + str(e)
+
+ if errmsg:
+ if req:
+ raise process.WorkflowException(errmsg)
+ else:
+ return None
+
+ if get_image(r, pull_image, dry_run):
+ return r["dockerImageId"]
+ else:
+ if req:
+ raise process.WorkflowException("Docker image %s not found" % r["dockerImageId"])
+
+ return None
diff --git a/cwltool/docker_uid.py b/cwltool/docker_uid.py
new file mode 100644
index 0000000..e8fbbe2
--- /dev/null
+++ b/cwltool/docker_uid.py
@@ -0,0 +1,112 @@
+import subprocess
+
+
+def docker_vm_uid():
+ """
+ Returns the UID of the default docker user inside the VM
+
+ When a host is using boot2docker or docker-machine to run docker with
+ boot2docker.iso (As on Mac OS X), the UID that mounts the shared filesystem
+ inside the VirtualBox VM is likely different than the user's UID on the host.
+ :return: The numeric UID (as a string) of the docker account inside
+ the boot2docker VM
+ """
+ if boot2docker_running():
+ return boot2docker_uid()
+ elif docker_machine_running():
+ return docker_machine_uid()
+ else:
+ return None
+
+
+def check_output_and_strip(cmd):
+ """
+ Passes a command list to subprocess.check_output, returning None
+ if an expected exception is raised
+ :param cmd: The command to execute
+ :return: Stripped string output of the command, or None if error
+ """
+ try:
+ result = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+ return result.strip()
+ except (OSError, subprocess.CalledProcessError, TypeError, AttributeError):
+ # OSError is raised if command doesn't exist
+ # CalledProcessError is raised if command returns nonzero
+ # AttributeError is raised if result cannot be strip()ped
+ return None
+
+
+def docker_machine_name():
+ """
+ Get the machine name of the active docker-machine machine
+ :return: Name of the active machine or None if error
+ """
+ return check_output_and_strip(['docker-machine', 'active'])
+
+
+def cmd_output_matches(check_cmd, expected_status):
+ """
+ Runs a command and compares output to expected
+ :param check_cmd: Command list to execute
+ :param expected_status: Expected output, e.g. "Running" or "poweroff"
+ :return: Boolean value, indicating whether or not command result matched
+ """
+ if check_output_and_strip(check_cmd) == expected_status:
+ return True
+ else:
+ return False
+
+
+def boot2docker_running():
+ """
+ Checks if boot2docker CLI reports that boot2docker vm is running
+ :return: True if vm is running, False otherwise
+ """
+ return cmd_output_matches(['boot2docker', 'status'], 'running')
+
+
+def docker_machine_running():
+ """
+ Asks docker-machine for active machine and checks if its VM is running
+ :return: True if vm is running, False otherwise
+ """
+ machine_name = docker_machine_name()
+ return cmd_output_matches(['docker-machine', 'status', machine_name], 'Running')
+
+
+def cmd_output_to_int(cmd):
+ """
+ Runs the provided command and returns the integer value of the result
+ :param cmd: The command to run
+ :return: Integer value of result, or None if an error occurred
+ """
+ result = check_output_and_strip(cmd) # may return None
+ if result is not None:
+ try:
+ result = int(result)
+ except ValueError:
+ # ValueError is raised if int conversion fails
+ result = None
+ return result
+
+
+def boot2docker_uid():
+ """
+ Gets the UID of the docker user inside a running boot2docker vm
+ :return: the UID, or None if error (e.g. boot2docker not present or stopped)
+ """
+ return cmd_output_to_int(['boot2docker', 'ssh', 'id', '-u'])
+
+
+def docker_machine_uid():
+ """
+ Asks docker-machine for active machine and gets the UID of the docker user
+ inside the vm
+ :return: the UID, or None if error (e.g. docker-machine not present or stopped)
+ """
+ machine_name = docker_machine_name()
+ return cmd_output_to_int(['docker-machine', 'ssh', machine_name, "id -u"])
+
+
+if __name__ == '__main__':
+ print docker_vm_uid()
diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py
new file mode 100644
index 0000000..f5e50d3
--- /dev/null
+++ b/cwltool/draft2tool.py
@@ -0,0 +1,289 @@
+import avro.schema
+import json
+import copy
+from flatten import flatten
+import functools
+import os
+from pathmapper import PathMapper, DockerPathMapper
+from job import CommandLineJob
+import yaml
+import glob
+import logging
+import hashlib
+import random
+from process import Process, shortname, uniquename
+from errors import WorkflowException
+import schema_salad.validate as validate
+from aslist import aslist
+import expression
+import re
+import urlparse
+import tempfile
+from builder import CONTENT_LIMIT, substitute
+import shellescape
+import errno
+
+_logger = logging.getLogger("cwltool")
+
+class ExpressionTool(Process):
+ def __init__(self, toolpath_object, **kwargs):
+ super(ExpressionTool, self).__init__(toolpath_object, **kwargs)
+
+ class ExpressionJob(object):
+ def run(self, **kwargs):
+ try:
+ self.output_callback(self.builder.do_eval(self.script), "success")
+ except Exception as e:
+ _logger.warn("Failed to evaluate expression:\n%s", e, exc_info=(e if kwargs.get('debug') else False))
+ self.output_callback({}, "permanentFail")
+
+ def job(self, joborder, input_basedir, output_callback, **kwargs):
+ builder = self._init_job(joborder, input_basedir, **kwargs)
+
+ j = ExpressionTool.ExpressionJob()
+ j.builder = builder
+ j.script = self.tool["expression"]
+ j.output_callback = output_callback
+ j.requirements = self.requirements
+ j.hints = self.hints
+ j.outdir = None
+ j.tmpdir = None
+
+ yield j
+
+
+class CommandLineTool(Process):
+ def __init__(self, toolpath_object, **kwargs):
+ super(CommandLineTool, self).__init__(toolpath_object, **kwargs)
+
+ def makeJobRunner(self):
+ return CommandLineJob()
+
+ def makePathMapper(self, reffiles, input_basedir, **kwargs):
+ dockerReq, _ = self.get_requirement("DockerRequirement")
+ try:
+ if dockerReq and kwargs.get("use_container"):
+ return DockerPathMapper(reffiles, input_basedir)
+ else:
+ return PathMapper(reffiles, input_basedir)
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ raise WorkflowException("Missing input file %s" % e)
+
+ def job(self, joborder, input_basedir, output_callback, **kwargs):
+ builder = self._init_job(joborder, input_basedir, **kwargs)
+
+ if self.tool["baseCommand"]:
+ for n, b in enumerate(aslist(self.tool["baseCommand"])):
+ builder.bindings.append({
+ "position": [-1000000, n],
+ "valueFrom": b
+ })
+
+ if self.tool.get("arguments"):
+ for i, a in enumerate(self.tool["arguments"]):
+ if isinstance(a, dict):
+ a = copy.copy(a)
+ if a.get("position"):
+ a["position"] = [a["position"], i]
+ else:
+ a["position"] = [0, i]
+ a["do_eval"] = a["valueFrom"]
+ a["valueFrom"] = None
+ builder.bindings.append(a)
+ else:
+ builder.bindings.append({
+ "position": [0, i],
+ "valueFrom": a
+ })
+
+ builder.bindings.sort(key=lambda a: a["position"])
+
+ reffiles = set((f["path"] for f in builder.files))
+
+ j = self.makeJobRunner()
+ j.joborder = builder.job
+ j.stdin = None
+ j.stdout = None
+ j.successCodes = self.tool.get("successCodes")
+ j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
+ j.permanentFailCodes = self.tool.get("permanentFailCodes")
+ j.requirements = self.requirements
+ j.hints = self.hints
+ j.name = uniquename(kwargs.get("name", str(id(j))))
+
+ _logger.debug("[job %s] initializing from %s%s",
+ j.name,
+ self.tool.get("id", ""),
+ " as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "")
+ _logger.debug("[job %s] %s", j.name, json.dumps(joborder, indent=4))
+
+
+ builder.pathmapper = None
+
+ if self.tool.get("stdin"):
+ j.stdin = builder.do_eval(self.tool["stdin"])
+ if isinstance(j.stdin, dict) and "ref" in j.stdin:
+ j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
+ reffiles.add(j.stdin)
+
+ if self.tool.get("stdout"):
+ j.stdout = builder.do_eval(self.tool["stdout"])
+ if os.path.isabs(j.stdout) or ".." in j.stdout:
+ raise validate.ValidationException("stdout must be a relative path")
+
+ builder.pathmapper = self.makePathMapper(reffiles, input_basedir, **kwargs)
+ builder.requirements = j.requirements
+
+ for f in builder.files:
+ f["path"] = builder.pathmapper.mapper(f["path"])[1]
+
+ _logger.debug("[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4))
+ _logger.debug("[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4))
+
+ dockerReq, _ = self.get_requirement("DockerRequirement")
+ if dockerReq and kwargs.get("use_container"):
+ out_prefix = kwargs.get("tmp_outdir_prefix")
+ j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix)
+ tmpdir_prefix = kwargs.get('tmpdir_prefix')
+ j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix)
+ else:
+ j.outdir = builder.outdir
+ j.tmpdir = builder.tmpdir
+
+ createFiles, _ = self.get_requirement("CreateFileRequirement")
+ j.generatefiles = {}
+ if createFiles:
+ for t in createFiles["fileDef"]:
+ j.generatefiles[builder.do_eval(t["filename"])] = copy.deepcopy(builder.do_eval(t["fileContent"]))
+
+ j.environment = {}
+ evr, _ = self.get_requirement("EnvVarRequirement")
+ if evr:
+ for t in evr["envDef"]:
+ j.environment[t["envName"]] = builder.do_eval(t["envValue"])
+
+ shellcmd, _ = self.get_requirement("ShellCommandRequirement")
+ if shellcmd:
+ cmd = []
+ for b in builder.bindings:
+ arg = builder.generate_arg(b)
+ if b.get("shellQuote", True):
+ arg = [shellescape.quote(a) for a in aslist(arg)]
+ cmd.extend(aslist(arg))
+ j.command_line = ["/bin/sh", "-c", " ".join(cmd)]
+ else:
+ j.command_line = flatten(map(builder.generate_arg, builder.bindings))
+
+ j.pathmapper = builder.pathmapper
+ j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder)
+ j.output_callback = output_callback
+
+ yield j
+
+ def collect_output_ports(self, ports, builder, outdir):
+ try:
+ custom_output = os.path.join(outdir, "cwl.output.json")
+ if builder.fs_access.exists(custom_output):
+ outputdoc = yaml.load(custom_output)
+ validate.validate_ex(self.names.get_name("outputs_record_schema", ""), outputdoc)
+ return outputdoc
+
+ ret = {}
+
+ for port in ports:
+ fragment = shortname(port["id"])
+ ret[fragment] = self.collect_output(port, builder, outdir)
+ validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
+ return ret if ret is not None else {}
+ except validate.ValidationException as e:
+ raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
+
+ def collect_output(self, schema, builder, outdir):
+ r = None
+ if "outputBinding" in schema:
+ binding = schema["outputBinding"]
+ globpatterns = []
+ if "glob" in binding:
+ r = []
+ for gb in aslist(binding["glob"]):
+ try:
+ gb = builder.do_eval(gb)
+ globpatterns.append(gb)
+ if gb:
+ r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))])
+ except (OSError, IOError) as e:
+ _logger.warn(str(e))
+ for files in r:
+ checksum = hashlib.sha1()
+ with builder.fs_access.open(files["path"], "rb") as f:
+ contents = f.read(CONTENT_LIMIT)
+ if binding.get("loadContents"):
+ files["contents"] = contents
+ filesize = 0
+ while contents != "":
+ checksum.update(contents)
+ filesize += len(contents)
+ contents = f.read(1024*1024)
+ files["checksum"] = "sha1$%s" % checksum.hexdigest()
+ files["size"] = filesize
+ if "format" in schema:
+ files["format"] = builder.do_eval(schema["format"], context=files)
+
+ optional = False
+ singlefile = False
+ if isinstance(schema["type"], list):
+ if "null" in schema["type"]:
+ optional = True
+ if "File" in schema["type"]:
+ singlefile = True
+ elif schema["type"] == "File":
+ singlefile = True
+
+ if "outputEval" in binding:
+ r = builder.do_eval(binding["outputEval"], context=r)
+ if singlefile:
+ # Handle single file outputs not wrapped in a list
+ if r is not None and not isinstance(r, (list, tuple)):
+ r = [r]
+ if optional and r is None:
+ pass
+ elif (r is None or len(r) != 1 or not isinstance(r[0], dict) or "path" not in r[0]):
+ raise WorkflowException("Expression must return a file object for %s." % schema["id"])
+
+ if singlefile:
+ if not r and not optional:
+ raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns))
+ elif not r and optional:
+ pass
+ elif isinstance(r, list):
+ if len(r) > 1:
+ raise WorkflowException("Multiple matches for output item that is a single file.")
+ else:
+ r = r[0]
+
+ if "secondaryFiles" in schema:
+ for primary in aslist(r):
+ if isinstance(primary, dict):
+ primary["secondaryFiles"] = []
+ for sf in aslist(schema["secondaryFiles"]):
+ if isinstance(sf, dict) or "$(" in sf or "${" in sf:
+ sfpath = builder.do_eval(sf, context=r)
+ if isinstance(sfpath, basestring):
+ sfpath = {"path": sfpath, "class": "File"}
+ else:
+ sfpath = {"path": substitute(primary["path"], sf), "class": "File"}
+
+ for sfitem in aslist(sfpath):
+ if builder.fs_access.exists(sfitem["path"]):
+ primary["secondaryFiles"].append(sfitem)
+
+ if not r and optional:
+ r = None
+
+ if not r and isinstance(schema["type"], dict) and schema["type"]["type"] == "record":
+ r = {}
+ for f in schema["type"]["fields"]:
+ r[shortname(f["name"])] = self.collect_output(f, builder, outdir)
+
+ return r
diff --git a/cwltool/errors.py b/cwltool/errors.py
new file mode 100644
index 0000000..59203d8
--- /dev/null
+++ b/cwltool/errors.py
@@ -0,0 +1,2 @@
+class WorkflowException(Exception):
+ pass
diff --git a/cwltool/expression.py b/cwltool/expression.py
new file mode 100644
index 0000000..03714fe
--- /dev/null
+++ b/cwltool/expression.py
@@ -0,0 +1,137 @@
+import docker
+import subprocess
+import json
+from aslist import aslist
+import logging
+import os
+from errors import WorkflowException
+import process
+import yaml
+import schema_salad.validate as validate
+import schema_salad.ref_resolver
+import sandboxjs
+import re
+
+_logger = logging.getLogger("cwltool")
+
+def jshead(engineConfig, rootvars):
+ return "\n".join(engineConfig + ["var %s = %s;" % (k, json.dumps(v)) for k, v in rootvars.items()])
+
+def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
+ if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer":
+ try:
+ obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir}
+ return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"])
+ except ValueError as v:
+ raise WorkflowException("%s in %s" % (v, obj))
+
+ if ex["engine"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
+ engineConfig = []
+ for r in reversed(requirements):
+ if r["class"] == "ExpressionEngineRequirement" and r["id"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
+ engineConfig = r.get("engineConfig", [])
+ break
+ return sandboxjs.execjs(ex["script"], jshead(engineConfig, jobinput, context, tmpdir, outdir))
+
+ for r in reversed(requirements):
+ if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]:
+ runtime = []
+
+ class DR(object):
+ pass
+ dr = DR()
+ dr.requirements = r.get("requirements", [])
+ dr.hints = r.get("hints", [])
+
+ (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement")
+ img_id = None
+ if docker_req:
+ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
+ if img_id:
+ runtime = ["docker", "run", "-i", "--rm", img_id]
+
+ inp = {
+ "script": ex["script"],
+ "engineConfig": r.get("engineConfig", []),
+ "job": jobinput,
+ "context": context,
+ "outdir": outdir,
+ "tmpdir": tmpdir,
+ }
+
+ _logger.debug("Invoking expression engine %s with %s",
+ runtime + aslist(r["engineCommand"]),
+ json.dumps(inp, indent=4))
+
+ sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
+ shell=False,
+ close_fds=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+
+ (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
+ if sp.returncode != 0:
+ raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4))
+
+ return json.loads(stdoutdata)
+
+ raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
+
+seg_symbol = r"""\w+"""
+seg_single = r"""\['([^']|\\')+'\]"""
+seg_double = r"""\["([^"]|\\")+"\]"""
+seg_index = r"""\[[0-9]+\]"""
+segments = r"(\.%s|%s|%s|%s)" % (seg_symbol, seg_single, seg_double, seg_index)
+segment_re = re.compile(segments, flags=re.UNICODE)
+param_re = re.compile(r"\$\((%s)%s*\)" % (seg_symbol, segments), flags=re.UNICODE)
+
+def next_seg(remain, obj):
+ if remain:
+ m = segment_re.match(remain)
+ if m.group(0)[0] == '.':
+ return next_seg(remain[m.end(0):], obj[m.group(0)[1:]])
+ elif m.group(0)[1] in ("'", '"'):
+ key = m.group(0)[2:-2].replace("\\'", "'").replace('\\"', '"')
+ return next_seg(remain[m.end(0):], obj[key])
+ else:
+ key = m.group(0)[1:-1]
+ return next_seg(remain[m.end(0):], obj[int(key)])
+ else:
+ return obj
+
+def param_interpolate(ex, obj, strip=True):
+ m = param_re.search(ex)
+ if m:
+ leaf = next_seg(m.group(0)[m.end(1) - m.start(0):-1], obj[m.group(1)])
+ if strip and len(ex.strip()) == len(m.group(0)):
+ return leaf
+ else:
+ leaf = json.dumps(leaf, sort_keys=True)
+ if leaf[0] == '"':
+ leaf = leaf[1:-1]
+ return ex[0:m.start(0)] + leaf + param_interpolate(ex[m.end(0):], obj, False)
+ else:
+ if "$(" in ex or "${" in ex:
+ _logger.warn("Warning possible workflow bug: found '$(' or '${' in '%s' but did not match valid parameter reference and InlineJavascriptRequirement not specified.", ex)
+ return ex
+
+
+def do_eval(ex, jobinput, requirements, outdir, tmpdir, resources, context=None, pull_image=True):
+ runtime = resources.copy()
+ runtime["tmpdir"] = tmpdir
+ runtime["outdir"] = outdir
+
+ rootvars = {
+ "inputs": jobinput,
+ "self": context,
+ "runtime": runtime
+ }
+
+ if isinstance(ex, dict) and "engine" in ex and "script" in ex:
+ return exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image)
+ if isinstance(ex, basestring):
+ for r in requirements:
+ if r["class"] == "InlineJavascriptRequirement":
+ return sandboxjs.interpolate(ex, jshead(r.get("expressionLib", []), rootvars))
+ return param_interpolate(ex, rootvars)
+ return ex
diff --git a/cwltool/factory.py b/cwltool/factory.py
new file mode 100644
index 0000000..53d96e0
--- /dev/null
+++ b/cwltool/factory.py
@@ -0,0 +1,25 @@
+import main
+import workflow
+import os
+
+class Callable(object):
+ def __init__(self, t, factory):
+ self.t = t
+ self.factory = factory
+
+ def __call__(self, **kwargs):
+ return self.factory.executor(self.t, kwargs, os.getcwd(), None, **self.factory.execkwargs)
+
+class Factory(object):
+ def __init__(self, makeTool=workflow.defaultMakeTool,
+ executor=main.single_job_executor,
+ **execkwargs):
+ self.makeTool = makeTool
+ self.executor = executor
+ self.execkwargs = execkwargs
+
+ def make(self, cwl, frag=None, debug=False):
+ l = main.load_tool(cwl, False, True, self.makeTool, debug, urifrag=frag)
+ if type(l) == int:
+ raise Exception("Error loading tool")
+ return Callable(l, self)
diff --git a/cwltool/flatten.py b/cwltool/flatten.py
new file mode 100644
index 0000000..54e918a
--- /dev/null
+++ b/cwltool/flatten.py
@@ -0,0 +1,20 @@
+# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
+def flatten(l, ltypes=(list, tuple)):
+ if l is None:
+ return []
+ if not isinstance(l, ltypes):
+ return [l]
+
+ ltype = type(l)
+ l = list(l)
+ i = 0
+ while i < len(l):
+ while isinstance(l[i], ltypes):
+ if not l[i]:
+ l.pop(i)
+ i -= 1
+ break
+ else:
+ l[i:i + 1] = l[i]
+ i += 1
+ return ltype(l)
diff --git a/cwltool/job.py b/cwltool/job.py
new file mode 100644
index 0000000..6321745
--- /dev/null
+++ b/cwltool/job.py
@@ -0,0 +1,212 @@
+import subprocess
+import os
+import tempfile
+import glob
+import json
+import yaml
+import logging
+import sys
+import requests
+import docker
+from process import get_feature, empty_subtree
+from errors import WorkflowException
+import shutil
+import stat
+import re
+import shellescape
+from docker_uid import docker_vm_uid
+
+_logger = logging.getLogger("cwltool")
+
+needs_shell_quoting_re = re.compile(r"""(^$|[\s|&;()<>\'"$@])""")
+
+def deref_links(outputs):
+ if isinstance(outputs, dict):
+ if outputs.get("class") == "File":
+ st = os.lstat(outputs["path"])
+ if stat.S_ISLNK(st.st_mode):
+ outputs["path"] = os.readlink(outputs["path"])
+ else:
+ for v in outputs.values():
+ deref_links(v)
+ if isinstance(outputs, list):
+ for v in outputs:
+ deref_links(v)
+
+class CommandLineJob(object):
+ def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs):
+ if not os.path.exists(self.outdir):
+ os.makedirs(self.outdir)
+
+ #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp:
+ # json.dump(self.joborder, fp)
+
+ runtime = []
+ env = {"TMPDIR": self.tmpdir}
+
+ (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
+
+ for f in self.pathmapper.files():
+ if not os.path.exists(self.pathmapper.mapper(f)[0]):
+ raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0])
+
+ img_id = None
+ if docker_req and kwargs.get("use_container") is not False:
+ env = os.environ
+ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
+
+ if docker_is_req and img_id is None:
+ raise WorkflowException("Docker is required for running this tool.")
+
+ if img_id:
+ runtime = ["docker", "run", "-i"]
+ for src in self.pathmapper.files():
+ vol = self.pathmapper.mapper(src)
+ runtime.append("--volume=%s:%s:ro" % vol)
+ runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl"))
+ runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp"))
+ runtime.append("--workdir=%s" % ("/var/spool/cwl"))
+ runtime.append("--read-only=true")
+ runtime.append("--net=none")
+ euid = docker_vm_uid() or os.geteuid()
+ runtime.append("--user=%s" % (euid))
+
+ if rm_container:
+ runtime.append("--rm")
+
+ runtime.append("--env=TMPDIR=/tmp")
+
+ for t,v in self.environment.items():
+ runtime.append("--env=%s=%s" % (t, v))
+
+ runtime.append(img_id)
+ else:
+ env = self.environment
+ if not os.path.exists(self.tmpdir):
+ os.makedirs(self.tmpdir)
+ env["TMPDIR"] = self.tmpdir
+ vars_to_preserve = kwargs.get("preserve_environment")
+ if vars_to_preserve is not None:
+ for key, value in os.environ.items():
+ if key in vars_to_preserve and key not in env:
+ env[key] = value
+
+ stdin = None
+ stdout = None
+
+ scr, _ = get_feature(self, "ShellCommandRequirement")
+
+ if scr:
+ shouldquote = lambda x: False
+ else:
+ shouldquote = needs_shell_quoting_re.search
+
+ _logger.info("[job %s] %s$ %s%s%s",
+ self.name,
+ self.outdir,
+ " ".join([shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg) for arg in (runtime + self.command_line)]),
+ ' < %s' % (self.stdin) if self.stdin else '',
+ ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '')
+
+ if dry_run:
+ return (self.outdir, {})
+
+ outputs = {}
+
+ try:
+ for t in self.generatefiles:
+ if isinstance(self.generatefiles[t], dict):
+ src = self.generatefiles[t]["path"]
+ dst = os.path.join(self.outdir, t)
+ if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir:
+ _logger.debug("symlinking %s to %s", dst, src)
+ os.symlink(src, dst)
+ else:
+ with open(os.path.join(self.outdir, t), "w") as f:
+ f.write(self.generatefiles[t])
+
+ if self.stdin:
+ stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb")
+ else:
+ stdin = subprocess.PIPE
+
+ if self.stdout:
+ absout = os.path.join(self.outdir, self.stdout)
+ dn = os.path.dirname(absout)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ stdout = open(absout, "wb")
+ else:
+ stdout = sys.stderr
+
+ sp = subprocess.Popen([str(x) for x in runtime + self.command_line],
+ shell=False,
+ close_fds=True,
+ stdin=stdin,
+ stdout=stdout,
+ env=env,
+ cwd=self.outdir)
+
+ if stdin == subprocess.PIPE:
+ sp.stdin.close()
+
+ rcode = sp.wait()
+
+ if stdin != subprocess.PIPE:
+ stdin.close()
+
+ if stdout is not sys.stderr:
+ stdout.close()
+
+ if self.successCodes and rcode in self.successCodes:
+ processStatus = "success"
+ elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
+ processStatus = "temporaryFail"
+ elif self.permanentFailCodes and rcode in self.permanentFailCodes:
+ processStatus = "permanentFail"
+ elif rcode == 0:
+ processStatus = "success"
+ else:
+ processStatus = "permanentFail"
+
+ for t in self.generatefiles:
+ if isinstance(self.generatefiles[t], dict):
+ src = self.generatefiles[t]["path"]
+ dst = os.path.join(self.outdir, t)
+ if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir:
+ os.remove(dst)
+ os.symlink(self.pathmapper.reversemap(src)[1], dst)
+
+ outputs = self.collect_outputs(self.outdir)
+
+ except OSError as e:
+ if e.errno == 2:
+ if runtime:
+ _logger.error("'%s' not found", runtime[0])
+ else:
+ _logger.error("'%s' not found", self.command_line[0])
+ else:
+ _logger.exception("Exception while running job")
+ processStatus = "permanentFail"
+ except WorkflowException as e:
+ _logger.error("Error while running job: %s" % e)
+ processStatus = "permanentFail"
+ except Exception as e:
+ _logger.exception("Exception while running job")
+ processStatus = "permanentFail"
+
+ if processStatus != "success":
+ _logger.warn("[job %s] completed %s", self.name, processStatus)
+ else:
+ _logger.debug("[job %s] completed %s", self.name, processStatus)
+ _logger.debug("[job %s] %s", self.name, json.dumps(outputs, indent=4))
+
+ self.output_callback(outputs, processStatus)
+
+ if rm_tmpdir:
+ _logger.debug("[job %s] Removing temporary directory %s", self.name, self.tmpdir)
+ shutil.rmtree(self.tmpdir, True)
+
+ if move_outputs and empty_subtree(self.outdir):
+ _logger.debug("[job %s] Removing empty output directory %s", self.name, self.outdir)
+ shutil.rmtree(self.outdir, True)
diff --git a/cwltool/main.py b/cwltool/main.py
new file mode 100755
index 0000000..72d3a45
--- /dev/null
+++ b/cwltool/main.py
@@ -0,0 +1,516 @@
+#!/usr/bin/env python
+
+import draft2tool
+import argparse
+from schema_salad.ref_resolver import Loader
+import json
+import os
+import sys
+import logging
+import workflow
+import schema_salad.validate as validate
+import tempfile
+import schema_salad.jsonld_context
+import schema_salad.makedoc
+import yaml
+import urlparse
+import process
+import job
+from cwlrdf import printrdf, printdot
+import pkg_resources # part of setuptools
+import update
+from process import shortname
+import rdflib
+
+_logger = logging.getLogger("cwltool")
+
+defaultStreamHandler = logging.StreamHandler()
+_logger.addHandler(defaultStreamHandler)
+_logger.setLevel(logging.INFO)
+
+def arg_parser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--conformance-test", action="store_true")
+ parser.add_argument("--basedir", type=str)
+ parser.add_argument("--outdir", type=str, default=os.path.abspath('.'),
+ help="Output directory, default current directory")
+
+ parser.add_argument("--no-container", action="store_false", default=True,
+ help="Do not execute jobs in a Docker container, even when specified by the CommandLineTool",
+ dest="use_container")
+
+ parser.add_argument("--preserve-environment", type=str, nargs='+',
+ help="Preserve specified environment variables when running CommandLineTools",
+ metavar=("VAR1","VAR2"),
+ default=("PATH",),
+ dest="preserve_environment")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--rm-container", action="store_true", default=True,
+ help="Delete Docker container used by jobs after they exit (default)",
+ dest="rm_container")
+
+ exgroup.add_argument("--leave-container", action="store_false",
+ default=True, help="Do not delete Docker container used by jobs after they exit",
+ dest="rm_container")
+
+ parser.add_argument("--tmpdir-prefix", type=str,
+ help="Path prefix for temporary directories",
+ default="tmp")
+
+ parser.add_argument("--tmp-outdir-prefix", type=str,
+ help="Path prefix for intermediate output directories",
+ default="tmp")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--rm-tmpdir", action="store_true", default=True,
+ help="Delete intermediate temporary directories (default)",
+ dest="rm_tmpdir")
+
+ exgroup.add_argument("--leave-tmpdir", action="store_false",
+ default=True, help="Do not delete intermediate temporary directories",
+ dest="rm_tmpdir")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--move-outputs", action="store_true", default=True,
+ help="Move output files to the workflow output directory and delete intermediate output directories (default).",
+ dest="move_outputs")
+
+ exgroup.add_argument("--leave-outputs", action="store_false", default=True,
+ help="Leave output files in intermediate output directories.",
+ dest="move_outputs")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--enable-pull", default=True, action="store_true",
+ help="Try to pull Docker images", dest="enable_pull")
+
+ exgroup.add_argument("--disable-pull", default=True, action="store_false",
+ help="Do not try to pull Docker images", dest="enable_pull")
+
+ parser.add_argument("--dry-run", action="store_true",
+ help="Load and validate but do not execute")
+
+ parser.add_argument("--rdf-serializer",
+ help="Output RDF serialization format used by --print-rdf (one of turtle (default), n3, nt, xml)",
+ default="turtle")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--print-rdf", action="store_true",
+ help="Print corresponding RDF graph for workflow and exit")
+ exgroup.add_argument("--print-dot", action="store_true", help="Print workflow visualization in graphviz format and exit")
+ exgroup.add_argument("--print-pre", action="store_true", help="Print CWL document after preprocessing.")
+ exgroup.add_argument("--version", action="store_true", help="Print version and exit")
+ exgroup.add_argument("--update", action="store_true", help="Update to latest CWL version, print and exit")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--strict", action="store_true", help="Strict validation (unrecognized or out of place fields are error)",
+ default=True, dest="strict")
+ exgroup.add_argument("--non-strict", action="store_false", help="Lenient validation (ignore unrecognized fields)",
+ default=True, dest="strict")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--verbose", action="store_true", help="Default logging")
+ exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
+ exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
+
+ parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
+
+ parser.add_argument("workflow", type=str, nargs="?", default=None)
+ parser.add_argument("job_order", nargs=argparse.REMAINDER)
+
+ return parser
+
+def single_job_executor(t, job_order, input_basedir, args, **kwargs):
+ final_output = []
+ final_status = []
+
+ def output_callback(out, processStatus):
+ final_status.append(processStatus)
+ if processStatus == "success":
+ _logger.info("Final process status is %s", processStatus)
+ else:
+ _logger.warn("Final process status is %s", processStatus)
+ final_output.append(out)
+
+ if kwargs.get("outdir"):
+ pass
+ elif kwargs.get("dry_run"):
+ kwargs["outdir"] = "/tmp"
+ else:
+ kwargs["outdir"] = tempfile.mkdtemp()
+
+ jobiter = t.job(job_order,
+ input_basedir,
+ output_callback,
+ **kwargs)
+
+ if kwargs.get("conformance_test"):
+ job = jobiter.next()
+ a = {"args": job.command_line}
+ if job.stdin:
+ a["stdin"] = job.pathmapper.mapper(job.stdin)[1]
+ if job.stdout:
+ a["stdout"] = job.stdout
+ if job.generatefiles:
+ a["createfiles"] = job.generatefiles
+ return a
+ else:
+ try:
+ for r in jobiter:
+ if r:
+ r.run(**kwargs)
+ else:
+ raise workflow.WorkflowException("Workflow cannot make any more progress.")
+ except workflow.WorkflowException:
+ raise
+ except Exception as e:
+ _logger.exception("Got workflow error")
+ raise workflow.WorkflowException("%s" % e, )
+
+ if final_status[0] != "success":
+ raise workflow.WorkflowException("Process status is %s" % (final_status))
+
+ return final_output[0]
+
+class FileAction(argparse.Action):
+ def __init__(self, option_strings, dest, nargs=None, **kwargs):
+ if nargs is not None:
+ raise ValueError("nargs not allowed")
+ super(FileAction, self).__init__(option_strings, dest, **kwargs)
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, {"class": "File", "path": values})
+
+class FileAppendAction(argparse.Action):
+ def __init__(self, option_strings, dest, nargs=None, **kwargs):
+ if nargs is not None:
+ raise ValueError("nargs not allowed")
+ super(FileAppendAction, self).__init__(option_strings, dest, **kwargs)
+ def __call__(self, parser, namespace, values, option_string=None):
+ g = getattr(namespace, self.dest)
+ if not g:
+ g = []
+ setattr(namespace, self.dest, g)
+ g.append({"class": "File", "path": values})
+
+def generate_parser(toolparser, tool, namemap):
+ toolparser.add_argument("job_order", nargs="?", help="Job input json file")
+ namemap["job_order"] = "job_order"
+
+ for inp in tool.tool["inputs"]:
+ name = shortname(inp["id"])
+ if len(name) == 1:
+ flag = "-"
+ else:
+ flag = "--"
+
+ namemap[name.replace("-", "_")] = name
+
+ inptype = inp["type"]
+
+ required = True
+ if isinstance(inptype, list):
+ if inptype[0] == "null":
+ required = False
+ if len(inptype) == 2:
+ inptype = inptype[1]
+ else:
+ _logger.debug("Can't make command line argument from %s", inptype)
+ return None
+
+ help = inp.get("description", "").replace("%", "%%")
+ kwargs = {}
+
+ if inptype == "File":
+ kwargs["action"] = FileAction
+ elif isinstance(inptype, dict) and inptype["type"] == "array":
+ if inptype["items"] == "File":
+ kwargs["action"] = FileAppendAction
+ else:
+ kwargs["action"] = "append"
+
+ if inptype == "string":
+ kwargs["type"] = str
+ elif inptype == "int":
+ kwargs["type"] = int
+ elif inptype == "float":
+ kwargs["type"] = float
+ elif inptype == "boolean":
+ kwargs["action"] = "store_true"
+
+ if "default" in inp:
+ kwargs["default"] = inp["default"]
+ required = False
+
+ if "type" not in kwargs and "action" not in kwargs:
+ _logger.debug("Can't make command line argument from %s", inptype)
+ return None
+
+ toolparser.add_argument(flag + name, required=required, help=help, **kwargs)
+
+ return toolparser
+
+
+def load_tool(argsworkflow, updateonly, strict, makeTool, debug,
+ print_pre=False,
+ print_rdf=False,
+ print_dot=False,
+ rdf_serializer=None,
+ urifrag=None):
+ (document_loader, avsc_names, schema_metadata) = process.get_schema()
+
+ if isinstance(avsc_names, Exception):
+ raise avsc_names
+
+ jobobj = None
+ if isinstance(argsworkflow, basestring):
+ split = urlparse.urlsplit(argsworkflow)
+ if split.scheme:
+ uri = argsworkflow
+ else:
+ uri = "file://" + os.path.abspath(argsworkflow)
+ fileuri, urifrag = urlparse.urldefrag(uri)
+ workflowobj = document_loader.fetch(fileuri)
+ if isinstance(workflowobj, list):
+ # bare list without a version must be treated as draft-2
+ workflowobj = {"cwlVersion": "https://w3id.org/cwl/cwl#draft-2",
+ "id": fileuri,
+ "@graph": workflowobj}
+
+ if "cwl:tool" in workflowobj:
+ jobobj = workflowobj
+ workflowobj = document_loader.fetch(urlparse.urljoin(uri, workflowobj["cwl:tool"]))
+
+ workflowobj = update.update(workflowobj, document_loader, fileuri)
+ document_loader.idx.clear()
+
+ if updateonly:
+ print json.dumps(workflowobj, indent=4)
+ return 0
+ elif isinstance(argsworkflow, dict):
+ workflowobj = argsworkflow
+ uri = urifrag
+ else:
+ raise schema_salad.validate.ValidationException("Must be URI or dict")
+
+ try:
+ processobj, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, workflowobj, strict)
+ except (schema_salad.validate.ValidationException, RuntimeError) as e:
+ _logger.error("Tool definition failed validation:\n%s", e, exc_info=(e if debug else False))
+ return 1
+
+ if print_pre:
+ print json.dumps(processobj, indent=4)
+ return 0
+
+ if print_rdf:
+ printrdf(argsworkflow, processobj, document_loader.ctx, rdf_serializer)
+ return 0
+
+ if print_dot:
+ printdot(argsworkflow, processobj, document_loader.ctx, rdf_serializer)
+ return 0
+
+ if urifrag:
+ processobj, _ = document_loader.resolve_ref(uri)
+ elif isinstance(processobj, list):
+ _logger.error("Tool file contains graph of multiple objects, must specify one of #%s",
+ ", #".join(urlparse.urldefrag(i["id"])[1]
+ for i in processobj if "id" in i))
+ return 1
+
+ try:
+ t = makeTool(processobj, strict=strict, makeTool=makeTool, loader=document_loader, avsc_names=avsc_names)
+ except (schema_salad.validate.ValidationException) as e:
+ _logger.error("Tool definition failed validation:\n%s", e, exc_info=(e if debug else False))
+ return 1
+ except (RuntimeError, workflow.WorkflowException) as e:
+ _logger.error("Tool definition failed initialization:\n%s", e, exc_info=(e if debug else False))
+ return 1
+
+ if jobobj:
+ for inp in t.tool["inputs"]:
+ if shortname(inp["id"]) in jobobj:
+ inp["default"] = jobobj[shortname(inp["id"])]
+
+ if metadata:
+ t.metadata = metadata
+ else:
+ t.metadata = {"$namespaces": t.tool.get("$namespaces", {}), "$schemas": t.tool.get("$schemas", [])}
+
+ return t
+
+def load_job_order(args, t, parser):
+
+ job_order_object = None
+
+ if args.conformance_test:
+ loader = Loader({})
+ else:
+ jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}}
+ jobloaderctx.update(t.metadata.get("$namespaces", {}))
+ loader = Loader(jobloaderctx)
+
+ if len(args.job_order) == 1 and args.job_order[0][0] != "-":
+ job_order_file = args.job_order[0]
+ elif len(args.job_order) == 1 and args.job_order[0] == "-":
+ job_order_object = yaml.load(stdin)
+ job_order_object, _ = loader.resolve_all(job_order_object, "")
+ else:
+ job_order_file = None
+
+ if job_order_object:
+ input_basedir = args.basedir if args.basedir else os.getcwd()
+ elif job_order_file:
+ input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
+ try:
+ job_order_object, _ = loader.resolve_ref(job_order_file)
+ except Exception as e:
+ _logger.error(e, exc_info=(e if args.debug else False))
+ return 1
+ toolparser = None
+ else:
+ input_basedir = args.basedir if args.basedir else os.getcwd()
+ namemap = {}
+ toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap)
+ if toolparser:
+ if args.tool_help:
+ toolparser.print_help()
+ return 0
+ cmd_line = vars(toolparser.parse_args(args.job_order))
+
+ if cmd_line["job_order"]:
+ try:
+ input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"]))
+ job_order_object = loader.resolve_ref(cmd_line["job_order"])
+ except Exception as e:
+ _logger.error(e, exc_info=(e if args.debug else False))
+ return 1
+ else:
+ job_order_object = {}
+
+ job_order_object.update({namemap[k]: v for k,v in cmd_line.items()})
+
+ _logger.debug("Parsed job order from command line: %s", job_order_object)
+ else:
+ job_order_object = None
+
+ for inp in t.tool["inputs"]:
+ if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
+ if not job_order_object:
+ job_order_object = {}
+ job_order_object[shortname(inp["id"])] = inp["default"]
+
+ if not job_order_object and len(t.tool["inputs"]) > 0:
+ parser.print_help()
+ if toolparser:
+ print "\nOptions for %s " % args.workflow
+ toolparser.print_help()
+ _logger.error("")
+ _logger.error("Input object required")
+ return 1
+
+ return (job_order_object, input_basedir)
+
+
+def main(args=None,
+ executor=single_job_executor,
+ makeTool=workflow.defaultMakeTool,
+ selectResources=None,
+ parser=None,
+ stdin=sys.stdin,
+ stdout=sys.stdout,
+ stderr=sys.stderr):
+
+ _logger.removeHandler(defaultStreamHandler)
+ _logger.addHandler(logging.StreamHandler(stderr))
+
+ if args is None:
+ args = sys.argv[1:]
+
+ if parser is None:
+ parser = arg_parser()
+
+ args = parser.parse_args(args)
+
+ if args.quiet:
+ _logger.setLevel(logging.WARN)
+ if args.debug:
+ _logger.setLevel(logging.DEBUG)
+
+ pkg = pkg_resources.require("cwltool")
+ if pkg:
+ if args.version:
+ print "%s %s" % (sys.argv[0], pkg[0].version)
+ return 0
+ else:
+ _logger.info("%s %s", sys.argv[0], pkg[0].version)
+
+ if not args.workflow:
+ parser.print_help()
+ _logger.error("")
+ _logger.error("CWL document required")
+ return 1
+
+ try:
+ t = load_tool(args.workflow, args.update, args.strict, makeTool, args.debug,
+ print_pre=args.print_pre,
+ print_rdf=args.print_rdf,
+ print_dot=args.print_dot,
+ rdf_serializer=args.rdf_serializer)
+ except Exception as e:
+ _logger.error("I'm sorry, I couldn't load this CWL file.\n%s", e, exc_info=(e if args.debug else False))
+ return 1
+
+ if type(t) == int:
+ return t
+
+ if args.tmp_outdir_prefix != 'tmp':
+ # Use user defined temp directory (if it exists)
+ args.tmp_outdir_prefix = os.path.abspath(args.tmp_outdir_prefix)
+ if not os.path.exists(args.tmp_outdir_prefix):
+ _logger.error("Intermediate output directory prefix doesn't exist, reverting to default")
+ return 1
+
+ if args.tmpdir_prefix != 'tmp':
+ # Use user defined prefix (if the folder exists)
+ args.tmpdir_prefix = os.path.abspath(args.tmpdir_prefix)
+ if not os.path.exists(args.tmpdir_prefix):
+ _logger.error("Temporary directory prefix doesn't exist.")
+ return 1
+
+ job_order_object = load_job_order(args, t, parser)
+
+ if type(job_order_object) == int:
+ return job_order_object
+
+ try:
+ out = executor(t, job_order_object[0],
+ job_order_object[1], args,
+ conformance_test=args.conformance_test,
+ dry_run=args.dry_run,
+ outdir=args.outdir,
+ tmp_outdir_prefix=args.tmp_outdir_prefix,
+ use_container=args.use_container,
+ preserve_environment=args.preserve_environment,
+ pull_image=args.enable_pull,
+ rm_container=args.rm_container,
+ tmpdir_prefix=args.tmpdir_prefix,
+ rm_tmpdir=args.rm_tmpdir,
+ makeTool=makeTool,
+ move_outputs=args.move_outputs,
+ select_resources=selectResources
+ )
+ # This is the workflow output, it needs to be written
+ stdout.write(json.dumps(out, indent=4))
+ stdout.flush()
+ except (validate.ValidationException) as e:
+ _logger.error("Input object failed validation:\n%s", e, exc_info=(e if args.debug else False))
+ return 1
+ except workflow.WorkflowException as e:
+ _logger.error("Workflow error:\n %s", e, exc_info=(e if args.debug else False))
+ return 1
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py
new file mode 100644
index 0000000..f7aab43
--- /dev/null
+++ b/cwltool/pathmapper.py
@@ -0,0 +1,81 @@
+import os
+import random
+import logging
+import stat
+
+_logger = logging.getLogger("cwltool")
+
+def abspath(src, basedir):
+ if src.startswith("file://"):
+ ab = src[7:]
+ else:
+ ab = src if os.path.isabs(src) else os.path.join(basedir, src)
+ return ab
+
+class PathMapper(object):
+ """Mapping of files from relative path provided in the file to a tuple of
+ (absolute local path, absolute container path)"""
+
+ def __init__(self, referenced_files, basedir):
+ self._pathmap = {}
+ for src in referenced_files:
+ ab = abspath(src, basedir)
+ self._pathmap[src] = (ab, ab)
+
+ def mapper(self, src):
+ return self._pathmap[src]
+
+ def files(self):
+ return self._pathmap.keys()
+
+ def reversemap(self, target):
+ for k,v in self._pathmap.items():
+ if v[1] == target:
+ return (k, v[0])
+
+class DockerPathMapper(PathMapper):
+ def __init__(self, referenced_files, basedir):
+ self._pathmap = {}
+ self.dirs = {}
+ for src in referenced_files:
+ ab = abspath(src, basedir)
+ dir, fn = os.path.split(ab)
+
+ subdir = False
+ for d in self.dirs:
+ if dir.startswith(d):
+ subdir = True
+ break
+
+ if not subdir:
+ for d in list(self.dirs):
+ if d.startswith(dir):
+ # 'dir' is a parent of 'd'
+ del self.dirs[d]
+ self.dirs[dir] = True
+
+ prefix = "job" + str(random.randint(1, 1000000000)) + "_"
+
+ names = set()
+ for d in self.dirs:
+ name = os.path.join("/var/lib/cwl", prefix + os.path.basename(d))
+ i = 1
+ while name in names:
+ i += 1
+ name = os.path.join("/var/lib/cwl", prefix + os.path.basename(d) + str(i))
+ names.add(name)
+ self.dirs[d] = name
+
+ for src in referenced_files:
+ ab = abspath(src, basedir)
+
+ deref = ab
+ st = os.lstat(deref)
+ while stat.S_ISLNK(st.st_mode):
+ rl = os.readlink(deref)
+ deref = rl if os.path.isabs(rl) else os.path.join(os.path.dirname(deref), rl)
+ st = os.lstat(deref)
+
+ for d in self.dirs:
+ if ab.startswith(d):
+ self._pathmap[src] = (deref, os.path.join(self.dirs[d], ab[len(d)+1:]))
diff --git a/cwltool/process.py b/cwltool/process.py
new file mode 100644
index 0000000..a74b452
--- /dev/null
+++ b/cwltool/process.py
@@ -0,0 +1,372 @@
+import avro.schema
+import os
+import json
+import schema_salad.validate as validate
+import copy
+import yaml
+import copy
+import logging
+import pprint
+from aslist import aslist
+import schema_salad.schema
+import urlparse
+import pprint
+from pkg_resources import resource_stream
+import stat
+from builder import Builder
+import tempfile
+import glob
+from errors import WorkflowException
+from pathmapper import abspath
+
+from rdflib import URIRef
+from rdflib.namespace import RDFS, OWL
+
+import errno
+
+_logger = logging.getLogger("cwltool")
+
+supportedProcessRequirements = ["DockerRequirement",
+ "ExpressionEngineRequirement",
+ "SchemaDefRequirement",
+ "EnvVarRequirement",
+ "CreateFileRequirement",
+ "ScatterFeatureRequirement",
+ "SubworkflowFeatureRequirement",
+ "MultipleInputFeatureRequirement",
+ "InlineJavascriptRequirement",
+ "ShellCommandRequirement",
+ "StepInputExpressionRequirement"]
+
+cwl_files = ("Workflow.yml",
+ "CommandLineTool.yml",
+ "CommonWorkflowLanguage.yml",
+ "Process.yml",
+ "concepts.md",
+ "contrib.md",
+ "intro.md",
+ "invocation.md")
+
+salad_files = ('metaschema.yml',
+ 'salad.md',
+ 'field_name.yml',
+ 'import_include.md',
+ 'link_res.yml',
+ 'ident_res.yml',
+ 'vocab_res.yml',
+ 'vocab_res.yml',
+ 'field_name_schema.yml',
+ 'field_name_src.yml',
+ 'field_name_proc.yml',
+ 'ident_res_schema.yml',
+ 'ident_res_src.yml',
+ 'ident_res_proc.yml',
+ 'link_res_schema.yml',
+ 'link_res_src.yml',
+ 'link_res_proc.yml',
+ 'vocab_res_schema.yml',
+ 'vocab_res_src.yml',
+ 'vocab_res_proc.yml')
+
+def get_schema():
+ cache = {}
+ for f in cwl_files:
+ rs = resource_stream(__name__, 'schemas/draft-3/' + f)
+ cache["https://w3id.org/cwl/" + f] = rs.read()
+ rs.close()
+
+ for f in salad_files:
+ rs = resource_stream(__name__, 'schemas/draft-3/salad/schema_salad/metaschema/' + f)
+ cache["https://w3id.org/cwl/salad/schema_salad/metaschema/" + f] = rs.read()
+ rs.close()
+
+ return schema_salad.schema.load_schema("https://w3id.org/cwl/CommonWorkflowLanguage.yml", cache=cache)
+
+def get_feature(self, feature):
+ for t in reversed(self.requirements):
+ if t["class"] == feature:
+ return (t, True)
+ for t in reversed(self.hints):
+ if t["class"] == feature:
+ return (t, False)
+ return (None, None)
+
+def shortname(inputid):
+ d = urlparse.urlparse(inputid)
+ if d.fragment:
+ return d.fragment.split("/")[-1].split(".")[-1]
+ else:
+ return d.path.split("/")[-1]
+
+class StdFsAccess(object):
+ def __init__(self, basedir):
+ self.basedir = basedir
+
+ def _abs(self, p):
+ return abspath(p, self.basedir)
+
+ def glob(self, pattern):
+ return glob.glob(self._abs(pattern))
+
+ def open(self, fn, mode):
+ return open(self._abs(fn), mode)
+
+ def exists(self, fn):
+ return os.path.exists(self._abs(fn))
+
+def checkRequirements(rec, supportedProcessRequirements):
+ if isinstance(rec, dict):
+ if "requirements" in rec:
+ for r in rec["requirements"]:
+ if r["class"] not in supportedProcessRequirements:
+ raise Exception("Unsupported requirement %s" % r["class"])
+ if "scatter" in rec:
+ if isinstance(rec["scatter"], list) and rec["scatter"] > 1:
+ raise Exception("Unsupported complex scatter type '%s'" % rec.get("scatterMethod"))
+ for d in rec:
+ checkRequirements(rec[d], supportedProcessRequirements)
+ if isinstance(rec, list):
+ for d in rec:
+ checkRequirements(d, supportedProcessRequirements)
+
+def adjustFiles(rec, op):
+ """Apply a mapping function to each File path in the object `rec`."""
+
+ if isinstance(rec, dict):
+ if rec.get("class") == "File":
+ rec["path"] = op(rec["path"])
+ for d in rec:
+ adjustFiles(rec[d], op)
+ if isinstance(rec, list):
+ for d in rec:
+ adjustFiles(d, op)
+
+def formatSubclassOf(fmt, cls, ontology, visited):
+ """Determine if `fmt` is a subclass of `cls`."""
+
+ if URIRef(fmt) == URIRef(cls):
+ return True
+
+ if ontology is None:
+ return False
+
+ if fmt in visited:
+ return
+
+ visited.add(fmt)
+
+ fmt = URIRef(fmt)
+
+ for s,p,o in ontology.triples( (fmt, RDFS.subClassOf, None) ):
+ # Find parent classes of `fmt` and search upward
+ if formatSubclassOf(o, cls, ontology, visited):
+ return True
+
+ for s,p,o in ontology.triples( (fmt, OWL.equivalentClass, None) ):
+ # Find equivalent classes of `fmt` and search horizontally
+ if formatSubclassOf(o, cls, ontology, visited):
+ return True
+
+ for s,p,o in ontology.triples( (None, OWL.equivalentClass, fmt) ):
+ # Find equivalent classes of `fmt` and search horizontally
+ if formatSubclassOf(s, cls, ontology, visited):
+ return True
+
+ return False
+
+def checkFormat(actualFile, inputFormats, requirements, ontology):
+ for af in aslist(actualFile):
+ if "format" not in af:
+ raise validate.ValidationException("Missing required 'format' for File %s" % af)
+ for inpf in aslist(inputFormats):
+ if af["format"] == inpf or formatSubclassOf(af["format"], inpf, ontology, set()):
+ return
+ raise validate.ValidationException("Incompatible file format %s required format(s) %s" % (af["format"], inputFormats))
+
+class Process(object):
+ def __init__(self, toolpath_object, **kwargs):
+ (_, self.names, _) = get_schema()
+ self.tool = toolpath_object
+ self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", [])
+ self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])
+ if "loader" in kwargs:
+ self.formatgraph = kwargs["loader"].graph
+
+ self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict"))
+
+ self.schemaDefs = {}
+
+ sd, _ = self.get_requirement("SchemaDefRequirement")
+
+ if sd:
+ sdtypes = sd["types"]
+ av = schema_salad.schema.make_valid_avro(sdtypes, {t["name"]: t for t in sdtypes}, set())
+ for i in av:
+ self.schemaDefs[i["name"]] = i
+ avro.schema.make_avsc_object(av, self.names)
+
+ # Build record schema from inputs
+ self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
+ self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}
+
+ for key in ("inputs", "outputs"):
+ for i in self.tool[key]:
+ c = copy.copy(i)
+ doc_url, _ = urlparse.urldefrag(c['id'])
+ c["name"] = shortname(c["id"])
+ del c["id"]
+
+ if "type" not in c:
+ raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])
+
+ if "default" in c and "null" not in aslist(c["type"]):
+ c["type"] = ["null"] + aslist(c["type"])
+ else:
+ c["type"] = c["type"]
+
+ if key == "inputs":
+ self.inputs_record_schema["fields"].append(c)
+ elif key == "outputs":
+ self.outputs_record_schema["fields"].append(c)
+
+ try:
+ self.inputs_record_schema = schema_salad.schema.make_valid_avro(self.inputs_record_schema, {}, set())
+ avro.schema.make_avsc_object(self.inputs_record_schema, self.names)
+ except avro.schema.SchemaParseException as e:
+ raise validate.ValidationException("Got error `%s` while prcoessing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4)))
+
+ try:
+ self.outputs_record_schema = schema_salad.schema.make_valid_avro(self.outputs_record_schema, {}, set())
+ avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
+ except avro.schema.SchemaParseException as e:
+ raise validate.ValidationException("Got error `%s` while prcoessing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
+
+
+ def _init_job(self, joborder, input_basedir, **kwargs):
+ builder = Builder()
+ builder.job = copy.deepcopy(joborder)
+
+ for i in self.tool["inputs"]:
+ d = shortname(i["id"])
+ if d not in builder.job and "default" in i:
+ builder.job[d] = i["default"]
+
+ for r in self.requirements:
+ if r["class"] not in supportedProcessRequirements:
+ raise WorkflowException("Unsupported process requirement %s" % (r["class"]))
+
+ # Validate job order
+ try:
+ validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job)
+ except validate.ValidationException as e:
+ raise WorkflowException("Error validating input record, " + str(e))
+
+ builder.files = []
+ builder.bindings = []
+ builder.schemaDefs = self.schemaDefs
+ builder.names = self.names
+ builder.requirements = self.requirements
+ builder.resources = {}
+
+ dockerReq, _ = self.get_requirement("DockerRequirement")
+ if dockerReq and kwargs.get("use_container"):
+ builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
+ builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp"
+ else:
+ builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
+ builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()
+
+ builder.fs_access = kwargs.get("fs_access") or StdFsAccess(input_basedir)
+
+ if self.formatgraph:
+ for i in self.tool["inputs"]:
+ d = shortname(i["id"])
+ if d in builder.job and i.get("format"):
+ checkFormat(builder.job[d], builder.do_eval(i["format"]), self.requirements, self.formatgraph)
+
+ builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))
+
+ builder.resources = self.evalResources(builder, kwargs)
+
+ return builder
+
+ def evalResources(self, builder, kwargs):
+ resourceReq, _ = self.get_requirement("ResourceRequirement")
+ if resourceReq is None:
+ resourceReq = {}
+ request = {
+ "coresMin": 1,
+ "coresMax": 1,
+ "ramMin": 1024,
+ "ramMax": 1024,
+ "tmpdirMin": 1024,
+ "tmpdirMax": 1024,
+ "outdirMin": 1024,
+ "outdirMax": 1024
+ }
+ for a in ("cores", "ram", "tmpdir", "outdir"):
+ mn = None
+ mx = None
+ if resourceReq.get(a+"Min"):
+ mn = builder.do_eval(resourceReq[a+"Min"])
+ if resourceReq.get(a+"Max"):
+ mx = builder.do_eval(resourceReq[a+"Max"])
+ if mn is None:
+ mn = mx
+ elif mx is None:
+ mx = mn
+
+ if mn:
+ request[a+"Min"] = mn
+ request[a+"Max"] = mx
+
+ if kwargs.get("select_resources"):
+ return kwargs["select_resources"](request)
+ else:
+ return {
+ "cores": request["coresMin"],
+ "ram": request["ramMin"],
+ "tmpdirSize": request["tmpdirMin"],
+ "outdirSize": request["outdirMin"],
+ }
+
+ def validate_hints(self, hints, strict):
+ for r in hints:
+ try:
+ if self.names.get_name(r["class"], "") is not None:
+ validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict)
+ else:
+ _logger.info(validate.ValidationException("Unknown hint %s" % (r["class"])))
+ except validate.ValidationException as v:
+ raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
+
+ def get_requirement(self, feature):
+ return get_feature(self, feature)
+
+def empty_subtree(dirpath):
+ # Test if a directory tree contains any files (does not count empty
+ # subdirectories)
+ for d in os.listdir(dirpath):
+ d = os.path.join(dirpath, d)
+ try:
+ if stat.S_ISDIR(os.stat(d).st_mode):
+ if empty_subtree(d) is False:
+ return False
+ else:
+ return False
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ pass
+ else:
+ raise
+ return True
+
+_names = set()
+def uniquename(stem):
+ c = 1
+ u = stem
+ while u in _names:
+ c += 1
+ u = "%s_%s" % (stem, c)
+ _names.add(u)
+ return u
diff --git a/cwltool/sandboxjs.py b/cwltool/sandboxjs.py
new file mode 100644
index 0000000..9708e0a
--- /dev/null
+++ b/cwltool/sandboxjs.py
@@ -0,0 +1,145 @@
+import subprocess
+import json
+import threading
+
+class JavascriptException(Exception):
+ pass
+
+def execjs(js, jslib):
+ try:
+ nodejs = subprocess.Popen(["nodejs"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except OSError as e:
+ if e.errno == 2:
+ nodejs = subprocess.Popen(["docker", "run",
+ "--attach=STDIN", "--attach=STDOUT", "--attach=STDERR",
+ "--interactive",
+ "--rm",
+ "commonworkflowlanguage/nodejs-engine", "nodejs"],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ else:
+ raise
+
+ fn = "\"use strict\";%s\n(function()%s)()" % (jslib, js if isinstance(js, basestring) and len(js) > 1 and js[0] == '{' else ("{return (%s);}" % js))
+ script = "console.log(JSON.stringify(require(\"vm\").runInNewContext(%s, {})));\n" % json.dumps(fn)
+
+ def term():
+ try:
+ nodejs.terminate()
+ except OSError:
+ pass
+
+ # Time out after 5 seconds
+ tm = threading.Timer(5, term)
+ tm.start()
+
+ stdoutdata, stderrdata = nodejs.communicate(script)
+ tm.cancel()
+
+ if nodejs.returncode != 0:
+ raise JavascriptException("Returncode was: %s\nscript was: %s\nstdout was: '%s'\nstderr was: '%s'\n" % (nodejs.returncode, script, stdoutdata, stderrdata))
+ else:
+ try:
+ return json.loads(stdoutdata)
+ except ValueError:
+ raise JavascriptException("Returncode was: %s\nscript was: %s\nstdout was: '%s'\nstderr was: '%s'\n" % (nodejs.returncode, script, stdoutdata, stderrdata))
+
+class SubstitutionError(Exception):
+ pass
+
+def scanner(scan):
+ DEFAULT = 0
+ DOLLAR = 1
+ PAREN = 2
+ BRACE = 3
+ SINGLE_QUOTE = 4
+ DOUBLE_QUOTE = 5
+ BACKSLASH = 6
+
+ i = 0
+ stack = [DEFAULT]
+ start = 0
+ while i < len(scan):
+ state = stack[-1]
+ c = scan[i]
+
+ if state == DEFAULT:
+ if c == '$':
+ stack.append(DOLLAR)
+ elif c == '\\':
+ stack.append(BACKSLASH)
+ elif state == BACKSLASH:
+ stack.pop()
+ if stack[-1] == DEFAULT:
+ return [i-1, i+1]
+ elif state == DOLLAR:
+ if c == '(':
+ start = i-1
+ stack.append(PAREN)
+ elif c == '{':
+ start = i-1
+ stack.append(BRACE)
+ elif state == PAREN:
+ if c == '(':
+ stack.append(PAREN)
+ elif c == ')':
+ stack.pop()
+ if stack[-1] == DOLLAR:
+ return [start, i+1]
+ elif c == "'":
+ stack.append(SINGLE_QUOTE)
+ elif c == '"':
+ stack.append(DOUBLE_QUOTE)
+ elif state == BRACE:
+ if c == '{':
+ stack.append(BRACE)
+ elif c == '}':
+ stack.pop()
+ if stack[-1] == DOLLAR:
+ return [start, i+1]
+ elif c == "'":
+ stack.append(SINGLE_QUOTE)
+ elif c == '"':
+ stack.append(DOUBLE_QUOTE)
+ elif state == SINGLE_QUOTE:
+ if c == "'":
+ stack.pop()
+ elif c == '\\':
+ stack.append(BACKSLASH)
+ elif state == DOUBLE_QUOTE:
+ if c == '"':
+ stack.pop()
+ elif c == '\\':
+ stack.append(BACKSLASH)
+ i += 1
+
+ if len(stack) > 1:
+ raise SubstitutionError("Substitution error, unfinished block starting at position {}: {}".format(start, scan[start:]))
+ else:
+ return None
+
+
+def interpolate(scan, jslib):
+ scan = scan.strip()
+ parts = []
+ w = scanner(scan)
+ while w:
+ parts.append(scan[0:w[0]])
+
+ if scan[w[0]] == '$':
+ e = execjs(scan[w[0]+1:w[1]], jslib)
+ if w[0] == 0 and w[1] == len(scan):
+ return e
+ leaf = json.dumps(e, sort_keys=True)
+ if leaf[0] == '"':
+ leaf = leaf[1:-1]
+ parts.append(leaf)
+ elif scan[w[0]] == '\\':
+ e = scan[w[1]-1]
+ parts.append(e)
+
+ scan = scan[w[1]:]
+ w = scanner(scan)
+ parts.append(scan)
+ return ''.join(parts)
diff --git a/cwltool/schemas/draft-2/cwl-avro.yml b/cwltool/schemas/draft-2/cwl-avro.yml
new file mode 100644
index 0000000..5f69a6b
--- /dev/null
+++ b/cwltool/schemas/draft-2/cwl-avro.yml
@@ -0,0 +1,1929 @@
+- name: "Common Workflow Language, Draft 2"
+ type: doc
+ doc: |
+ 7 July 2015
+
+ This version:
+ * https://w3id.org/cwl/draft-2/
+
+ Current version:
+ * https://w3id.org/cwl/
+
+ Authors:
+
+ * Peter Amstutz <peter.amstutz at curoverse.com>, Curoverse
+ * Nebojša Tijanić <nebojsa.tijanic at sbgenomics.com>, Seven Bridges Genomics
+
+ Contributers:
+
+ * Luka Stojanovic <luka.stojanovic at sbgenomics.com>, Seven Bridges Genomics
+ * John Chilton <jmchilton at gmail.com>, Galaxy Project, Pennsylvania State University
+ * Michael R. Crusoe <mcrusoe at msu.edu>, Michigan State University
+ * Hervé Ménager <herve.menager at gmail.com>, Institut Pasteur
+ * Maxim Mikheev <mikhmv at biodatomics.com>, BioDatomics
+ * Stian Soiland-Reyes [soiland-reyes at cs.manchester.ac.uk](mailto:soiland-reyes at cs.manchester.ac.uk), University of Manchester
+
+ # Abstract
+
+ A Workflow is an analysis task represented by a directed graph describing a
+ sequence of operations that transform an input data set to output. This
+ specification defines the Common Workflow Language (CWL), a vendor-neutral
+ standard for representing workflows and concrete process steps intended to
+ be portable across a variety of computing platforms.
+
+ # Status of This Document
+
+ This document is the product of the [Common Workflow Language working
+ group](https://groups.google.com/forum/#!forum/common-workflow-language). The
+ latest version of this document is available in the "specification" directory at
+
+ https://github.com/common-workflow-language/common-workflow-language
+
+ The products of the CWL working group (including this document) are made available
+ under the terms of the Apache License, version 2.0.
+
+ # Introduction
+
+ The Common Workflow Language (CWL) working group is an informal, multi-vendor
+ working group consisting of various organizations and individuals that have an
+ interest in portability of data analysis workflows. The goal is to create
+ specifications like this one that enable data scientists to describe analysis
+ tools and workflows that are powerful, easy to use, portable, and support
+ reproducibility.
+
+ ## Introduction to draft 2
+
+ This specification represents the second milestone of the CWL group. Since
+ draft-1, this draft introduces a number of major changes and additions:
+
+ * Use of Avro schema (instead of JSON-schema) and JSON-LD for data modeling.
+ * Significant refactoring of the Command Line Tool description.
+ * Data and execution model for Workflows.
+ * Extension mechanism though "hints" and "requirements".
+
+ ## Purpose
+
+ CWL is designed to express workflows for data-intensive science, such as
+ Bioinformatics, Chemistry, Physics, and Astronomy. This specification is
+ intended to define a data and execution model for Workflows and Command Line
+ Tools that can be implemented on top of a variety of computing platforms,
+ ranging from an individual workstation to cluster, grid, cloud, and high
+ performance computing systems.
+
+ ## References to Other Specifications
+
+ * [JSON](http://json.org)
+ * [JSON-LD](http://json-ld.org)
+ * [JSON Pointer](https://tools.ietf.org/html/draft-ietf-appsawg-json-pointer-04)
+ * [YAML](http://yaml.org)
+ * [Avro](https://avro.apache.org/docs/current/spec.html)
+ * [Uniform Resource Identifier (URI): Generic Syntax](https://tools.ietf.org/html/rfc3986)
+ * [UTF-8](https://www.ietf.org/rfc/rfc2279.txt)
+ * [Portable Operating System Interface (POSIX.1-2008)](http://pubs.opengroup.org/onlinepubs/9699919799/)
+ * [Resource Description Framework (RDF)](http://www.w3.org/RDF/)
+
+ ## Scope
+
+ This document describes the CWL syntax, execution, and object model. It
+ is not intended to document a specific implementation of CWL, however it may
+ serve as a reference for the behavior of conforming implementations.
+
+ ## Terminology
+
+ The terminology used to describe CWL documents is defined in the
+ Concepts section of the specification. The terms defined in the
+ following list are used in building those definitions and in describing the
+ actions of an CWL implementation:
+
+ **may**: Conforming CWL documents and CWL implementations are permitted but
+ not required to behave as described.
+
+ **must**: Conforming CWL documents and CWL implementations are required to behave
+ as described; otherwise they are in error.
+
+ **error**: A violation of the rules of this specification; results are
+ undefined. Conforming implementations may detect and report an error and may
+ recover from it.
+
+ **fatal error**: A violation of the rules of this specification; results are
+ undefined. Conforming implementations must not continue to execute the current
+ process and may report an error.
+
+ **at user option**: Conforming software may or must (depending on the modal verb in
+ the sentence) behave as described; if it does, it must provide users a means to
+ enable or disable the behavior described.
+
+ # Data model
+
+ ## Data concepts
+
+ An **object** is a data structure equivalent to the "object" type in JSON,
+ consisting of a unordered set of name/value pairs (referred to here as
+ **fields**) and where the name is a string and the value is a string, number,
+ boolean, array, or object.
+
+ A **document** is a file containing a serialized object, or an array of objects.
+
+ A **process** is a basic unit of computation which accepts input data,
+ performs some computation, and produces output data.
+
+ An **input object** is an object describing the inputs to a invocation of process.
+
+ An **output object** is an object describing the output of an invocation of a process.
+
+ An **input schema** describes the valid format (required fields, data types)
+ for an input object.
+
+ An **output schema** describes the valid format for a output object.
+
+ **Metadata** is information about workflows, tools, or input items that is
+ not used directly in the computation.
+
+ ## Syntax
+
+ Documents containing CWL objects are serialized and loaded using YAML
+ syntax and UTF-8 text encoding. A conforming implementation must accept
+ all valid YAML documents.
+
+ The CWL schema is defined using Avro Linked Data (avro-ld). Avro-ld is an
+ extension of the Apache Avro schema language to support additional
+ annotations mapping Avro fields to RDF predicates via JSON-LD.
+
+ A CWL document may be validated by transforming the avro-ld schema to a
+ base Apache Avro schema.
+
+ An implementation may interpret a CWL document as
+ [JSON-LD](http://json-ld.org) and convert a CWL document to a [Resource
+ Description Framework (RDF)](http://www.w3.org/RDF/) using the
+ CWL [JSON-LD Context](https://w3id.org/cwl/draft-2/context) (extracted from the avro-ld schema).
+ The CWL [RDFS schema](https://w3id.org/cwl/draft-2/cwl.ttl) defines the classes and properties used by
+ CWL as JSON-LD.
+
+ The latest draft-2 schema is defined here:
+ https://github.com/common-workflow-language/common-workflow-language/blob/master/schemas/draft-2/cwl-avro.yml
+
+
+
+ ## Identifiers
+
+ If an object contains an `id` field, that is used to uniquely identify the
+ object in that document. The value of the `id` field must be unique over the
+ entire document. The format of the `id` field is that of a [relative fragment
+ identifier](https://tools.ietf.org/html/rfc3986#section-3.5), and must start
+ with a hash `#` character.
+
+ An implementation may choose to only honor references to object types for
+ which the `id` field is explicitly listed in this specification.
+
+ When loading a CWL document, an implementation may resolve relative
+ identifiers to absolute URI references. For example, "my_tool.cwl" located
+ in the directory "/home/example/work/" may be transformed to
+ "file:///home/example/work/my_tool.cwl" and a relative fragment reference
+ "#input" in this file may be transformed to
+ "file:///home/example/work/my_tool.cwl#input".
+
+ ## Document preprocessing
+
+ An implementation must resolve `import` directives. An `import` directive
+ is an object consisting of the field `import` specifying a URI. The URI
+ referenced by `import` must be loaded as a CWL document (including
+ recursive preprocessing) and then the `import` object is implicitly
+ replaced by the external resource. URIs may include document fragments
+ referring to objects identified by their `id` field, in which case the `import`
+ directive is replaced by only the fragment object.
+
+ An implementation must resolve `include` directives. An `include`
+ directive is an object consisting of the field `include` specifying a URI.
+ The URI referenced by `include` must be loaded as UTF-8 encoded text
+ document and the `include` directive implicitly replaced by a string with
+ the contents of the document. Because the loaded resource is unparsed,
+ URIs used with `include` must not include fragments.
+
+ ## Extensions and Metadata
+
+ Implementation extensions not required for correct
+ execution (for example, fields related to GUI rendering) may
+ be stored in [process hints](#requirements_and_hints).
+
+ Input metadata (for example, a lab sample identifier) may be explicitly
+ represented within a workflow using input parameters which are propagated
+ to output. Future versions of this specification may define additional
+ facilities for working with input/output metadata.
+
+ Fields for tool and workflow metadata (for example, authorship for use in
+ citations) are not defined in this specification. Future versions of this
+ specification may define such fields.
+
+ # Execution model
+
+ ## Execution concepts
+
+ A **parameter** is a named symbolic input or output of process, with an
+ associated datatype or schema. During execution, values are assigned to
+ parameters to make the input object or output object used for concrete
+ process invocation.
+
+ A **command line tool** is a process characterized by the execution of a
+ standalone, non-interactive program which is invoked on some input,
+ produces output, and then terminates.
+
+ A **workflow** is a process characterized by multiple subprocess steps,
+ where step outputs are connected to the inputs of other downstream steps to
+ form a directed graph, and independent steps may run concurrently.
+
+ A **runtime environment** is the actual hardware and software environment when
+ executing a command line tool. It includes, but is not limited to, the
+ hardware architecture, hardware resources, operating system, software runtime
+ (if applicable, such as the Python interpreter or the JVM), libraries, modules,
+ packages, utilities, and data files required to run the tool.
+
+ A **workflow platform** is a specific hardware and software implementation
+ capable of interpreting a CWL document and executing the processes specified by
+ the document. The responsibilities of the workflow platform may include
+ scheduling process invocation, setting up the necessary runtime environment,
+ making input data available, invoking the tool process, and collecting output.
+
+ It is intended that the workflow platform has broad leeway outside of this
+ specification to optimize use of computing resources and enforce policies
+ not covered by this specifcation. Some areas that are currently out of
+ scope for CWL specification but may be handled by a specific workflow
+ platform include:
+
+ * Data security and permissions.
+ * Scheduling tool invocations on remote cluster or cloud compute nodes.
+ * Using virtual machines or operating system containers to manage the runtime
+ (except as described in [DockerRequirement](#dockerrequirement)).
+ * Using remote or distributed file systems to manage input and output files.
+ * Translating or rewriting file paths.
+ * Determining if a process has previously been executed, skipping it and
+ reusing previous results.
+ * Pausing and resuming of processes or workflows.
+
+ Conforming CWL processes must not assume anything about the runtime
+ environment or workflow platform unless explicitly declared though the use
+ of [process requirements](#processrequirement).
+
+ ## Generic execution process
+
+ The generic execution sequence of a CWL process (including both workflows
+ and concrete process implementations) is as follows.
+
+ 1. Load and validate CWL document, yielding a process object.
+ 2. Load input object.
+ 3. Validate the input object against the `inputs` schema for the process.
+ 4. Validate that process requirements are met.
+ 5. Perform any further setup required by the specific process type.
+ 6. Execute the process.
+ 7. Capture results of process execution into the output object.
+ 8. Validate the output object against the `outputs` schema for the process.
+ 9. Report the output object to the process caller.
+
+ ## Requirements and hints
+
+ A **[process requirement](#processrequirement)** modifies the semantics or runtime
+ environment of a process. If an implementation cannot satisfy all
+ requirements, or a requirement is listed which is not recognized by the
+ implementation, it is a fatal error and the implementation must not attempt
+ to run the process, unless overridden at user option.
+
+ A **hint** is similar to a requirement, however it is not an error if an
+ implementation cannot satisfy all hints. The implementation may report a
+ warning if a hint cannot be satisfied.
+
+ Requirements are inherited. A requirement specified in a Workflow applies
+ to all workflow steps; a requirement specified on a workflow step will
+ apply to the process implementation.
+
+ If the same process requirement appears at different levels of the
+ workflow, the most specific instance of the requirement is used, that is,
+ an entry in `requirements` on a process implementation such as
+ CommandLineTool will take precendence over an entry in `requirements`
+ specified in a workflow step, and an entry in `requirements` on a workflow
+ step takes precedence over the workflow. Entries in `hints` are resolved
+ the same way.
+
+ Requirements override hints. If a process implementation provides a
+ process requirement in `hints` which is also provided in `requirements` by
+ an enclosing workflow or workflow step, the enclosing `requirements` takes
+ precedence.
+
+ Process requirements are the primary mechanism for specifying extensions to
+ the CWL core specification.
+
+ ## Expressions
+
+ An expression is a fragment of executable code which is evaluated by the
+ workflow platform to affect the inputs, outputs, or behavior of a process.
+ In the generic execution sequence, expressions may be evaluated during step
+ 5 (process setup), step 6 (execute process), and/or step 7 (capture
+ output). Expressions are distinct from regular processes in that they are
+ intended to modify the behavior of the workflow itself rather than perform
+ the primary work of the workflow.
+
+ An implementation must provide the predefined `cwl:JsonPointer` expression
+ engine. This expression engine specifies a [JSON
+ Pointer](https://tools.ietf.org/html/draft-ietf-appsawg-json-pointer-04)
+ into an expression input object consisting of the `job` and `context`
+ fields described below.
+
+ An expression engine defined with
+ [ExpressionEngineRequirement](#expressionenginerequirement) is a command
+ line program following the following protocol:
+
+ * On standard input, receive a JSON object with the following fields:
+
+ - **engineConfig**: A list of strings from the `engineConfig` field.
+ Null if `engineConfig` is not specified.
+
+ - **job**: The input object of the current Process (context dependent).
+
+ - **context**: The specific value being transformed (context dependent). May
+ be null.
+
+ - **script**: The code fragment to evaluate.
+
+ - **outdir**: When used in the context of a CommandLineTool, this is
+ the designated output directory that will be used when executing the
+ tool. Null if not applicable.
+
+ - **tmpdir**: When used in the context of a CommandLineTool, this is
+ the designated temporary directory that will be used when executing
+ the tool. Null if not applicable.
+
+ * On standard output, print a single JSON value (string, number, array, object,
+ boolean, or null) for the return value.
+
+ Expressions must be evaluated in an isolated context (a "sandbox") which
+ permits no side effects to leak outside the context, and permit no outside
+ data to leak into the context.
+
+ Implementations may apply limits, such as process isolation, timeouts, and
+ operating system containers/jails to minimize the security risks associated
+ with running untrusted code.
+
+ The order in which expressions are evaluated within a process or workflow
+ is undefined.
+
+ ## Workflow graph
+
+ A workflow describes a set of **steps** and the **dependencies** between
+ those processes. When a process produces output that will be consumed by a
+ second process, the first process is a dependency of the second process.
+ When there is a dependency, the workflow engine must execute the dependency
+ process and wait for it to successfully produce output before executing the
+ dependent process. If two processes are defined in the workflow graph that
+ are not directly or indirectly dependent, these processes are
+ **independent**, and may execute in any order or execute concurrently. A
+ workflow is complete when all steps have been executed.
+
+ ## Success and failure
+
+ A completed process must result in one of `success`, `temporaryFailure` or
+ `permanentFailure` states. An implementation may choose to retry a process
+ execution which resulted in `temporaryFailure`. An implementation may
+ choose to either continue running other steps of a workflow, or terminate
+ immediately upon `permanentFailure`.
+
+ * If any step of a workflow execution results in `permanentFailure`, then the
+ workflow status is `permanentFailure`.
+
+ * If one or more steps result in `temporaryFailure` and all other steps
+ complete `success` or are not executed, then the workflow status is
+ `temporaryFailure`.
+
+ * If all workflow steps are executed and complete with `success`, then the workflow
+ status is `success`.
+
+ ## Executing CWL documents as scripts
+
+ By convention, a CWL document may begin with `#!/usr/bin/env cwl-runner`
+ and be marked as executable (the POSIX "+x" permission bits) to enable it
+ to be executed directly. A workflow platform may support this mode of
+ operation; if so, it must provide `cwl-runner` as an alias for the
+ platform's CWL implementation.
+
+ # Sample CWL workflow
+
+ revtool.cwl:
+ ```
+ #!/usr/bin/env cwl-runner
+ #
+ # Simplest example command line program wrapper for the Unix tool "rev".
+ #
+ class: CommandLineTool
+ description: "Reverse each line using the `rev` command"
+
+ # The "inputs" array defines the structure of the input object that describes
+ # the inputs to the underlying program. Here, there is one input field
+ # defined that will be called "input" and will contain a "File" object.
+ #
+ # The input binding indicates that the input value should be turned into a
+ # command line argument. In this example inputBinding is an empty object,
+ # which indicates that the file name should be added to the command line at
+ # a default location.
+ inputs:
+ - id: "#input"
+ type: File
+ inputBinding: {}
+
+ # The "outputs" array defines the structure of the output object that
+ # describes the outputs of the underlying program. Here, there is one
+ # output field defined that will be called "output", must be a "File" type,
+ # and after the program executes, the output value will be the file
+ # output.txt in the designated output directory.
+ outputs:
+ - id: "#output"
+ type: File
+ outputBinding:
+ glob: output.txt
+
+ # The actual program to execute.
+ baseCommand: rev
+
+ # Specify that the standard output stream must be redirected to a file called
+ # output.txt in the designated output directory.
+ stdout: output.txt
+ ```
+
+ sorttool.cwl:
+ ```
+ #!/usr/bin/env cwl-runner
+ #
+ # Example command line program wrapper for the Unix tool "sort"
+ # demonstrating command line flags.
+ class: CommandLineTool
+ description: "Sort lines using the `sort` command"
+
+ # This example is similar to the previous one, with an additional input
+ # parameter called "reverse". It is a boolean parameter, which is
+ # intepreted as a command line flag. The value of "prefix" is used for
+ # flag to put on the command line if "reverse" is true. If "reverse" is
+ # false, no flag is added.
+ #
+ # This example also introduced the "position" field. This indicates the
+ # sorting order of items on the command line. Lower numbers are placed
+ # before higher numbers. Here, the "--reverse" flag (if present) will be
+ # added to the command line before the input file path.
+ inputs:
+ - id: "#reverse"
+ type: boolean
+ inputBinding:
+ position: 1
+ prefix: "--reverse"
+ - id: "#input"
+ type: File
+ inputBinding:
+ position: 2
+
+ outputs:
+ - id: "#output"
+ type: File
+ outputBinding:
+ glob: output.txt
+
+ baseCommand: sort
+ stdout: output.txt
+ ```
+
+ revsort.cwl:
+ ```
+ #!/usr/bin/env cwl-runner
+ #
+ # This is a two-step workflow which uses "revtool" and "sorttool" defined above.
+ #
+ class: Workflow
+ description: "Reverse the lines in a document, then sort those lines."
+
+ # Requirements specify prerequisites and extensions to the workflow.
+ # In this example, DockerRequirement specifies a default Docker container
+ # in which the command line tools will execute.
+ requirements:
+ - class: DockerRequirement
+ dockerPull: debian:8
+
+ # The inputs array defines the structure of the input object that describes
+ # the inputs to the workflow.
+ #
+ # The "reverse_sort" input parameter demonstrates the "default" field. If the
+ # field "reverse_sort" is not provided in the input object, the default value will
+ # be used.
+ inputs:
+ - id: "#input"
+ type: File
+ description: "The input file to be processed."
+ - id: "#reverse_sort"
+ type: boolean
+ default: true
+ description: "If true, reverse (descending) sort"
+
+ # The "outputs" array defines the structure of the output object that describes
+ # the outputs of the workflow.
+ #
+ # Each output field must be connected to the output of one of the workflow
+ # steps using the "connect" field. Here, the parameter "#output" of the
+ # workflow comes from the "#sorted" output of the "sort" step.
+ outputs:
+ - id: "#output"
+ type: File
+ source: "#sorted.output"
+ description: "The output with the lines reversed and sorted."
+
+ # The "steps" array lists the executable steps that make up the workflow.
+ # The tool to execute each step is listed in the "run" field.
+ #
+ # In the first step, the "inputs" field of the step connects the upstream
+ # parameter "#input" of the workflow to the input parameter of the tool
+ # "revtool.cwl#input".
+ #
+ # In the second step, the "inputs" field of the step connects the output
+ # parameter "#reversed" from the first step to the input parameter of the
+ # tool "sorttool.cwl#input".
+ steps:
+ - inputs:
+ - { id: "#rev.input", source: "#input" }
+ outputs:
+ - { id: "#rev.output" }
+ run: { import: revtool.cwl }
+
+ - inputs:
+ - { id: "#sorted.input", source: "#rev.output" }
+ - { id: "#sorted.reverse", source: "#reverse_sort" }
+ outputs:
+ - { id: "#sorted.output" }
+ run: { import: sorttool.cwl }
+ ```
+
+ Sample input object:
+ ```
+ {
+ "input": {
+ "class": "File",
+ "path": "whale.txt"
+ }
+ }
+ ```
+
+ Sample output object:
+ ```
+ {
+ "output": {
+ "path": "/tmp/tmpdeI_p_/output.txt",
+ "size": 1111,
+ "class": "File",
+ "checksum": "sha1$b9214658cc453331b62c2282b772a5c063dbd284"
+ }
+ }
+ ```
+
+ jsonldPrefixes: {
+ "cwl": "https://w3id.org/cwl/cwl#",
+ "avro": "https://w3id.org/cwl/avro#",
+ "dct": "http://purl.org/dc/terms/",
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
+ }
+ jsonldVocab: cwl
+
+
+- name: Reference
+ type: doc
+ doc: This section specifies the core object types that make up a CWL document.
+
+
+- name: Datatype
+ type: enum
+ docAfter: ProcessRequirement
+ symbols:
+ - "null"
+ - boolean
+ - int
+ - long
+ - float
+ - double
+ - bytes
+ - string
+ - record
+ - enum
+ - array
+ - map
+ - File
+ - Any
+ jsonldPrefix: avro
+ jsonldPredicate:
+ - symbol: File
+ predicate: "cwl:File"
+ - symbol: Any
+ predicate: "cwl:Any"
+ doc: |
+ CWL data types are based on Avro schema declarations. Refer to the [Avro
+ schema declaration
+ documentation](https://avro.apache.org/docs/current/spec.html#schemas) for
+ detailed information. In addition, CWL defines [`File`](#file)
+ as a special record type.
+
+ ## Primitive types
+
+ * **null**: no value
+ * **boolean**: a binary value
+ * **int**: 32-bit signed integer
+ * **long**: 64-bit signed integer
+ * **float**: single precision (32-bit) IEEE 754 floating-point number
+ * **double**: double precision (64-bit) IEEE 754 floating-point number
+ * **bytes**: sequence of uninterpreted 8-bit unsigned bytes
+ * **string**: Unicode character sequence
+
+ ## Complex types
+
+ * **record**: An object with one or more fields defined by name and type
+ * **enum**: A value from a finite set of symbolic values
+ * **array**: An ordered sequence of values
+ * **map**: An unordered collection of key/value pairs
+
+ ## File type
+
+ See [File](#file) below.
+
+ ## Any type
+
+ See [Any](#any) below.
+
+- name: File
+ type: record
+ docParent: Datatype
+ doc: |
+ Represents a file (or group of files if `secondaryFiles` is specified) that
+ must be accessible by tools using standard POSIX file system call API such as
+ open(2) and read(2).
+ fields:
+ - name: "class"
+ type:
+ type: enum
+ name: "File_class"
+ symbols:
+ - File
+ jsonldPredicate:
+ "@id": "@type"
+ "@type": "@vocab"
+ doc: Must be `File` to indicate this object describes a file.
+ - name: "path"
+ type: "string"
+ doc: The path to the file.
+ - name: "checksum"
+ type: ["null", "string"]
+ doc: |
+ Optional hash code for validating file integrity. Currently must be in the form
+ "sha1$ + hexidecimal string" using the SHA-1 algorithm.
+ - name: "size"
+ type: ["null", "long"]
+ doc: Optional file size.
+ - name: "secondaryFiles"
+ type:
+ - "null"
+ - type: array
+ items: File
+ doc: |
+ A list of additional files that are associated with the primary file
+ and must be transferred alongside the primary file. Examples include
+ indexes of the primary file, or external references which must be
+ included when loading primary document. A file object listed in
+ `secondaryFiles` may itself include `secondaryFiles` for which the same
+ rules apply.
+
+
+- name: Any
+ type: enum
+ docParent: Datatype
+ symbols: ["Any"]
+ doc: |
+ The **Any** type validates for any non-null value.
+
+
+- name: Schema
+ type: record
+ doc: "A schema defines a parameter type."
+ docParent: Parameter
+ fields:
+ - name: type
+ doc: "The data type of this parameter."
+ type:
+ - "Datatype"
+ - "Schema"
+ - "string"
+ - type: "array"
+ items: [ "Datatype", "Schema", "string" ]
+ jsonldPredicate:
+ "@id": "avro:type"
+ "@type": "@vocab"
+ - name: fields
+ type:
+ - "null"
+ - type: "array"
+ items: "Schema"
+ jsonldPredicate:
+ "@id": "avro:fields"
+ "@container": "@list"
+ doc: "When `type` is `record`, defines the fields of the record."
+ - name: "symbols"
+ type:
+ - "null"
+ - type: "array"
+ items: "string"
+ jsonldPredicate:
+ "@id": "avro:symbols"
+ "@container": "@list"
+ doc: "When `type` is `enum`, defines the set of valid symbols."
+ - name: items
+ type:
+ - "null"
+ - "Datatype"
+ - "Schema"
+ - "string"
+ - type: "array"
+ items: [ "Datatype", "Schema", "string" ]
+ jsonldPredicate:
+ "@id": "avro:items"
+ "@container": "@list"
+ doc: "When `type` is `array`, defines the type of the array elements."
+ - name: "values"
+ type:
+ - "null"
+ - "Datatype"
+ - "Schema"
+ - "string"
+ - type: "array"
+ items: [ "Datatype", "Schema", "string" ]
+ jsonldPredicate:
+ "@id": "avro:values"
+ "@container": "@list"
+ doc: "When `type` is `map`, defines the value type for the key/value pairs."
+
+
+- name: Parameter
+ type: record
+ docParent: Process
+ abstract: true
+ doc: |
+ Define an input or output parameter to a process.
+
+ fields:
+ - name: type
+ type:
+ - "null"
+ - Datatype
+ - Schema
+ - string
+ - type: array
+ items:
+ - Datatype
+ - Schema
+ - string
+ jsonldPredicate:
+ "@id": "avro:type"
+ "@type": "@vocab"
+ doc: |
+ Specify valid types of data that may be assigned to this parameter.
+ - name: label
+ type:
+ - "null"
+ - string
+ jsondldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this parameter object."
+ - name: description
+ type:
+ - "null"
+ - string
+ jsondldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this parameter object."
+ - name: streamable
+ type: ["null", "boolean"]
+ doc: |
+ Currently only applies if `type` is `File`. A value of `true`
+ indicates that the file is read or written sequentially without
+ seeking. An implementation may use this flag to indicate whether it is
+ valid to stream file contents using a named pipe. Default: `false`.
+ - name: default
+ type: ["null", Any]
+ doc: |
+ The default value for this parameter if not provided in the input
+ object.
+
+
+- name: JsonPointer
+ type: enum
+ docParent: Expression
+ symbols:
+ - "JsonPointer"
+ jsonldPrefix: "cwl"
+
+
+- type: record
+ name: Expression
+ docAfter: ExpressionTool
+ doc: |
+ Define an expression that will be evaluated and used to modify the behavior
+ of a tool or workflow. See [Expressions](#expressions) for more
+ information about expressions
+ and [ExpressionEngineRequirement](#expressionenginerequirement) for
+ information on how to define a expression engine.
+ fields:
+ - name: engine
+ type:
+ - JsonPointer
+ - string
+ doc: |
+ Either `cwl:JsonPointer` or a reference to an
+ ExpressionEngineRequirement defining which engine to use.
+ jsonldPredicate:
+ "@id": "cwl:engine"
+ "@type": "@id"
+ - name: script
+ type: string
+ doc: "The code to be executed by the expression engine."
+
+
+- name: Binding
+ type: record
+ docParent: Parameter
+ fields:
+ - name: loadContents
+ type:
+ - "null"
+ - boolean
+ doc: |
+ Only applies when `type` is `File`. Read up to the first 64 KiB of text from the file and place it in the
+ "contents" field of the file object for manipulation by expressions.
+ - name: secondaryFiles
+ type:
+ - "null"
+ - "string"
+ - Expression
+ - type: "array"
+ items: ["string", "Expression"]
+ doc: |
+ Only applies when `type` is `File`. Describes files that must be
+ included alongside the primary file.
+
+ If the value is an expression, the context of the expression is the input
+ or output File parameter to which this binding applies.
+
+ If the value is a string, it specifies that the following pattern
+ should be applied to the primary file:
+
+ 1. If string begins with one or more caret `^` characters, for each
+ caret, remove the last file extension from the path (the last
+ period `.` and all following characters). If there are no file
+ extensions, the path is unchanged.
+ 2. Append the remainder of the string to the end of the file path.
+
+
+- name: InputSchema
+ type: record
+ extends: Schema
+ docParent: InputParameter
+ specialize: {Schema: InputSchema}
+ fields:
+ - name: inputBinding
+ type: [ "null", "Binding" ]
+ doc: |
+ Describes how to handle a value in the input object convert it
+ into a concrete form for execution, such as command line parameters.
+
+
+- name: OutputSchema
+ type: record
+ extends: Schema
+ docParent: OutputParameter
+ specialize: {Schema: OutputSchema}
+
+- name: InputParameter
+ type: record
+ extends: Parameter
+ docAfter: Parameter
+ specialize: {Schema: InputSchema}
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this parameter object."
+ - name: "inputBinding"
+ type: [ "null", "Binding" ]
+ doc: |
+ Describes how to handle the inputs of a process and convert them
+ into a concrete form for execution, such as command line parameters.
+
+- name: OutputParameter
+ type: record
+ extends: Parameter
+ docAfter: Parameter
+ specialize: {Schema: OutputSchema}
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this parameter object."
+
+
+- type: record
+ name: "FileDef"
+ docParent: CreateFileRequirement
+ doc: |
+ Define a file that must be placed in the designated output directory
+ prior to executing the command line tool. May be the result of executing
+ an expression, such as building a configuration file from a template.
+ fields:
+ - name: "filename"
+ type: ["string", "Expression"]
+ doc: "The name of the file to create in the output directory."
+ - name: "fileContent"
+ type: ["string", "Expression"]
+ doc: |
+ If the value is a string literal or an expression which evaluates to a
+ string, a new file must be created with the string as the file contents.
+
+ If the value is an expression that evaluates to a File object, this
+ indicates the referenced file should be added to the designated output
+ directory prior to executing the tool.
+
+ Files added in this way may be read-only, and may be provided
+ by bind mounts or file system links to avoid
+ unnecessary copying of the input file.
+
+
+- type: record
+ name: EnvironmentDef
+ docParent: EnvVarRequirement
+ doc: |
+ Define an environment variable that will be set in the runtime environment
+ by the workflow platform when executing the command line tool. May be the
+ result of executing an expression, such as getting a parameter from input.
+ fields:
+ - name: "envName"
+ type: "string"
+ doc: The environment variable name
+ - name: "envValue"
+ type: ["string", "Expression"]
+ doc: The environment variable value
+
+
+- type: record
+ name: SchemaDef
+ extends: Schema
+ docParent: SchemaDefRequirement
+ fields:
+ - name: name
+ type: string
+ doc: "The type name being defined."
+
+
+- type: record
+ name: ProcessRequirement
+ docAfter: ExpressionTool
+ abstract: true
+ doc: |
+ A process requirement declares a prerequisite that may or must be fulfilled
+ before executing a process. See [`Process.hints`](#process) and
+ [`Process.requirements`](#process).
+
+ Process requirements are the primary mechanism for specifying extensions to
+ the CWL core specification.
+
+ fields:
+ - name: "class"
+ type: "string"
+ doc: "The specific requirement type."
+ jsonldPredicate:
+ "@id": "@type"
+ "@type": "@vocab"
+
+
+- type: record
+ name: Process
+ abstract: true
+ docAfter: ProcessRequirement
+ doc: |
+
+ The base executable type in CWL is the `Process` object defined by the
+ document. Note that the `Process` object is abstract and cannot be
+ directly executed.
+
+ fields:
+ - name: id
+ type: ["null", string]
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this process object."
+ - name: inputs
+ type:
+ type: array
+ items: InputParameter
+ doc: |
+ Defines the input parameters of the process. The process is ready to
+ run when all required input parameters are associated with concrete
+ values. Input parameters include a schema for each parameter which is
+ used to validate the input object. It may also be used build a user
+ interface for constructing the input object.
+ - name: outputs
+ type:
+ type: array
+ items: OutputParameter
+ doc: |
+ Defines the parameters representing the output of the process. May be
+ used to generate and/or validate the output object.
+ - name: requirements
+ type:
+ - "null"
+ - type: array
+ items: ProcessRequirement
+ doc: >
+ Declares requirements that apply to either the runtime environment or the
+ workflow engine that must be met in order to execute this process. If
+ an implementation cannot satisfy all requirements, or a requirement is
+ listed which is not recognized by the implementation, it is a fatal
+ error and the implementation must not attempt to run the process,
+ unless overridden at user option.
+ - name: hints
+ type:
+ - "null"
+ - type: array
+ items: Any
+ doc: >
+ Declares hints applying to either the runtime environment or the
+ workflow engine that may be helpful in executing this process. It is
+ not an error if an implementation cannot satisfy all hints, however
+ the implementation may report a warning.
+ - name: label
+ type:
+ - "null"
+ - string
+ jsondldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this process object."
+ - name: description
+ type:
+ - "null"
+ - string
+ jsondldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this process object."
+
+- type: record
+ name: CommandLineBinding
+ extends: Binding
+ docParent: CommandInputParameter
+ doc: |
+
+ When listed under `inputBinding` in the input schema, the term
+ "value" refers to the the corresponding value in the input object. For
+ binding objects listed in `CommandLineTool.arguments`, the term "value"
+ refers to the effective value after evaluating `valueFrom`.
+
+ The binding behavior when building the command line depends on the data
+ type of the value. If there is a mismatch between the type described by
+ the input schema and the effective value, such as resulting from an
+ expression evaluation, an implementation must use the data type of the
+ effective value.
+
+ - **string**: Add `prefix` and the string to the command line.
+
+ - **number**: Add `prefix` and decimal representation to command line.
+
+ - **boolean**: If true, add `prefix` to the command line. If false, add
+ nothing.
+
+ - **File**: Add `prefix` and the value of
+ [`File.path`](#file) to the command line.
+
+ - **array**: If `itemSeparator` is specified, add `prefix` and the join
+ the array into a single string with `itemSeparator` separating the
+ items. Otherwise first add `prefix`, then recursively process
+ individual elements.
+
+ - **object**: Add `prefix` only, and recursively add object fields for
+ which `inputBinding` is specified.
+
+ - **null**: Add nothing.
+
+ fields:
+ - name: "position"
+ type: ["null", "int"]
+ doc: "The sorting key. Default position is 0."
+ - name: "prefix"
+ type: [ "null", "string"]
+ doc: "Command line prefix to add before the value."
+ - name: "separate"
+ type: ["null", boolean]
+ doc: |
+ If true (default), then the prefix and value must be added as separate
+ command line arguments; if false, prefix and value must be concatenated
+ into a single command line argument.
+ - name: "itemSeparator"
+ type: ["null", "string"]
+ doc: |
+ Join the array elements into a single string with the elements
+ separated by by `itemSeparator`.
+ - name: "valueFrom"
+ type:
+ - "null"
+ - "string"
+ - "Expression"
+ doc: |
+ If `valueFrom` is a constant string value, use this as the value and
+ apply the binding rules above.
+
+ If `valueFrom` is an expression, evaluate the expression to yield the
+ actual value to use to build the command line and apply the binding
+ rules above. If the inputBinding is associated with an input
+ parameter, the "context" of the expression will be the value of the
+ input parameter.
+
+ When a binding is part of the `CommandLineTool.arguments` field,
+ the `valueFrom` field is required.
+
+
+- type: record
+ name: CommandOutputBinding
+ extends: Binding
+ docParent: CommandOutputParameter
+ doc: |
+ Describes how to generate an output parameter based on the files produced
+ by a CommandLineTool.
+
+ The output parameter is generated by applying these operations in
+ the following order:
+
+ - glob
+ - loadContents
+ - outputEval
+ fields:
+ - name: glob
+ type:
+ - "null"
+ - string
+ - Expression
+ - type: array
+ items: string
+ doc: |
+ Find files relative to the output directory, using POSIX glob(3)
+ pathname matching. If provided an array, find files that match any
+ pattern in the array. If provided an expression, the expression must
+ return a string or an array of strings, which will then be evaluated as
+ one or more glob patterns. Only files which actually exist will be
+ matched and returned.
+ - name: outputEval
+ type:
+ - "null"
+ - Expression
+ doc: |
+ Evaluate an expression to generate the output value. If `glob` was
+ specified, the script `context` will be an array containing any files that were
+ matched. Additionally, if `loadContents` is `true`, the File objects
+ will include up to the first 64 KiB of file contents in the `contents` field.
+
+
+- type: record
+ name: CommandInputSchema
+ extends: InputSchema
+ docParent: CommandInputParameter
+ specialize:
+ InputSchema: CommandInputSchema
+ Binding: CommandLineBinding
+
+
+- type: record
+ name: CommandOutputSchema
+ extends: OutputSchema
+ docParent: CommandOutputParameter
+ specialize:
+ OutputSchema: CommandOutputSchema
+ fields:
+ - name: "outputBinding"
+ type: [ "null", CommandOutputBinding ]
+ doc: |
+ Describes how to handle the concrete outputs of a process step (such as
+ files created by a program) and describe them in the process output
+ parameter.
+
+
+- type: record
+ name: CommandInputParameter
+ extends: InputParameter
+ docParent: CommandLineTool
+ doc: An input parameter for a CommandLineTool.
+ specialize:
+ InputSchema: CommandInputSchema
+ Binding: CommandLineBinding
+
+
+- type: record
+ name: CommandOutputParameter
+ extends: OutputParameter
+ docParent: CommandLineTool
+ doc: An output parameter for a CommandLineTool.
+ specialize:
+ OutputSchema: CommandOutputSchema
+ fields:
+ - name: "outputBinding"
+ type: [ "null", CommandOutputBinding ]
+ doc: |
+ Describes how to handle the concrete outputs of a process step (such as
+ files created by a program) and describe them in the process output
+ parameter.
+
+
+- type: record
+ name: CommandLineTool
+ extends: Process
+ docAfter: Workflow
+ specialize:
+ InputParameter: CommandInputParameter
+ OutputParameter: CommandOutputParameter
+ doc: |
+
+ A CommandLineTool process is a process implementation for executing a
+ non-interactive application in a POSIX environment. To accommodate
+ the enormous variety in syntax and semantics for input, runtime
+ environment, invocation, and output of arbitrary programs, CommandLineTool
+ uses an "input binding" that describes how to translate input
+ parameters to an actual program invocation, and an "output binding" that
+ describes how to generate output parameters from program output.
+
+ # Input binding
+
+ The tool command line is built by applying command line bindings to the
+ input object. Bindings are listed either as part of an [input
+ parameter](#commandinputparameter) using the `inputBinding` field, or
+ separately using the `arguments` field of the CommandLineTool.
+
+ The algorithm to build the command line is as follows. In this algorithm,
+ the sort key is a list consisting of one or more numeric or string
+ elements. Strings are sorted lexicographically based on UTF-8 encoding.
+
+ 1. Collect `CommandLineBinding` objects from `arguments`. Assign a sorting
+ key `[position, i]` where `position` is
+ [`CommandLineBinding.position`](#commandlinebinding) and `i`
+ is the index in the `arguments` list.
+
+ 2. Collect `CommandLineBinding` objects from the `inputs` schema and
+ associate them with values from the input object. Where the input type
+ is a record, array, or map, recursively walk the schema and input object,
+ collecting nested `CommandLineBinding` objects and associating them with
+ values from the input object.
+
+ 3. Create a sorting key by taking the value of the `position` field at
+ each level leading to each leaf binding object. If `position` is not
+ specified, it is not added to the sorting key. For bindings on arrays
+ and maps, the sorting key must include the array index or map key
+ following the position. If and only if two bindings have the same sort
+ key, the tie must be broken using the ordering of the field or parameter
+ name immediately containing the leaf binding.
+
+ 4. Sort elements using the assigned sorting keys. Numeric entries sort
+ before strings.
+
+ 5. In the sorted order, apply the rules defined in
+ [`CommandLineBinding`](#commandlinebinding) to convert bindings to actual
+ command line elements.
+
+ 6. Insert elements from `baseCommand` at the beginning of the command
+ line.
+
+ # Runtime environment
+
+ All files listed in the input object must be made available in the runtime
+ environment. The implementation may use a shared or distributed file
+ system or transfer files via explicit download. Implementations may choose
+ not to provide access to files not explicitly specified by the input object
+ or process requirements.
+
+ Output files produced by tool execution must be written to the **designated
+ output directory**.
+
+ The initial current working directory when executing the tool must be the
+ designated output directory.
+
+ When executing the tool, the child process must not inherit environment
+ variables from the parent process. The tool must execute in a new, empty
+ environment, containing only environment variables defined by
+ [EnvVarRequirement](#envvarrequirement), the default environment of the
+ Docker container specified in [DockerRequirement](#dockerrequirement) (if
+ applicable), and `TMPDIR`.
+
+ The `TMPDIR` environment variable must be set in the runtime environment to
+ the **designated temporary directory**. Any files written to the
+ designated temporary directory may be deleted by the workflow platform when
+ the tool invocation is complete.
+
+ An implementation may forbid the tool from writing to any location in the
+ runtime environment file system other than the designated temporary
+ directory and designated output directory. An implementation may provide
+ read-only input files, and disallow in-place update of input files.
+
+ The standard input stream and standard output stream may be redirected as
+ described in the `stdin` and `stdout` fields.
+
+ ## Extensions
+
+ [DockerRequirement](#dockerrequirement),
+ [CreateFileRequirement](#createfilerequirement), and
+ [EnvVarRequirement](#envvarrequirement) are available as standard
+ extensions to core command line tool semantics for defining the runtime
+ environment.
+
+ # Execution
+
+ Once the command line is built and the runtime environment is created, the
+ actual tool is executed.
+
+ The standard error stream and standard output stream (unless redirected by
+ setting `stdout`) may be captured by platform logging facilities for
+ storage and reporting.
+
+ Tools may be multithreaded or spawn child processes; however, when the
+ parent process exits, the tool is considered finished regardless of whether
+ any detached child processes are still running. Tools must not require any
+ kind of console, GUI, or web based user interaction in order to start and
+ run to completion.
+
+ The exit code of the process indicates if the process completed
+ successfully. By convention, an exit code of zero is treated as success
+ and non-zero exit codes are treated as failure. This may be customized by
+ providing the fields `successCodes`, `temporaryFailCodes`, and
+ `permanentFailCodes`. An implementation may choose to default unspecified
+ non-zero exit codes to either `temporaryFailure` or `permanentFailure`.
+
+ # Output binding
+
+ If the output directory contains a file named "cwl.output.json", that file
+ must be loaded and used as the output object. Otherwise, the output object
+ must be generated by walking the parameters listed in `outputs` and
+ applying output bindings to the tool output. Output bindings are
+ associated with output parameters using the `outputBinding` field. See
+ [`CommandOutputBinding`](#commandoutputbinding) for details.
+
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "@id": "@type"
+ "@type": "@vocab"
+ type: string
+ - name: baseCommand
+ doc: |
+ Specifies the program to execute. If the value is an array, the first
+ element is the program to execute, and subsequent elements are placed
+ at the beginning of the command line in prior to any command line
+ bindings. If the program includes a path separator character it must
+ be an absolute path, otherwise it is an error. If the program does not
+ include a path separator, search the `$PATH` variable in the runtime
+ environment of the workflow runner find the absolute path of the
+ executable.
+ type:
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ "@id": "cwl:baseCommand"
+ "@container": "@list"
+ - name: arguments
+ doc: |
+ Command line bindings which are not directly associated with input parameters.
+ type:
+ - "null"
+ - type: array
+ items: [string, CommandLineBinding]
+ jsonldPredicate:
+ "@id": "cwl:arguments"
+ "@container": "@list"
+ - name: stdin
+ type: ["null", string, Expression]
+ doc: |
+ A path to a file whose contents must be piped into the command's
+ standard input stream.
+ - name: stdout
+ type: ["null", string, Expression]
+ doc: |
+ Capture the command's standard output stream to a file written to
+ the designated output directory.
+
+ If `stdout` is a string, it specifies the file name to use.
+
+ If `stdout` is an expression, the expression is evaluated and must
+ return a string with the file name to use to capture stdout. If the
+ return value is not a string, or the resulting path contains illegal
+ characters (such as the path separator `/`) it is an error.
+ - name: successCodes
+ type:
+ - "null"
+ - type: array
+ items: int
+ doc: |
+ Exit codes that indicate the process completed successfully.
+
+ - name: temporaryFailCodes
+ type:
+ - "null"
+ - type: array
+ items: int
+ doc: |
+ Exit codes that indicate the process failed due to a possibly
+ temporary condition, where excuting the process with the same
+ runtime environment and inputs may produce different results.
+
+ - name: permanentFailCodes
+ type:
+ - "null"
+ - type: array
+ items: int
+ doc:
+ Exit codes that indicate the process failed due to a permanent logic
+ error, where excuting the process with the same runtime environment and
+ same inputs is expected to always fail.
+
+- type: record
+ name: ExpressionTool
+ extends: Process
+ docAfter: CommandLineTool
+ doc: |
+ Execute an expression as a process step.
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "@id": "@type"
+ "@type": "@vocab"
+ type: string
+ - name: expression
+ type: Expression
+ doc: |
+ The expression to execute. The expression must return a JSON object which
+ matches the output parameters of the ExpressionTool.
+
+
+- name: LinkMergeMethod
+ type: enum
+ docParent: WorkflowStepInput
+ doc: The input link merge method, described in [WorkflowStepInput](#workflowstepinput).
+ symbols:
+ - merge_nested
+ - merge_flattened
+
+
+- name: WorkflowOutputParameter
+ type: record
+ extends: OutputParameter
+ docParent: Workflow
+ doc: |
+ Describe an output parameter of a workflow. The parameter must be
+ connected to one or more parameters defined in the workflow that will
+ provide the value of the output parameter.
+ fields:
+ - name: source
+ doc: |
+ Specifies one or more workflow parameters that will provide this output
+ value.
+ jsonldPredicate:
+ "@id": "cwl:source"
+ "@type": "@id"
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ - name: linkMerge
+ type: ["null", LinkMergeMethod]
+ doc: |
+ The method to use to merge multiple inbound links into a single array.
+ If not specified, the default method is "merge_nested".
+
+
+- type: record
+ name: WorkflowStepInput
+ docParent: WorkflowStep
+ doc: |
+ The input of a workflow step connects an upstream parameter (from the
+ workflow inputs, or the outputs of other workflows steps) with the input
+ parameters of the underlying process.
+
+ ## Input object
+
+ A WorkflowStepInput object must contain an `id` field in the form
+ `#fieldname` or `#stepname.fieldname`. When the `id` field contains a
+ period `.` the field name consists of the characters following the final
+ period. This defines a field of the workflow step input object with the
+ value of the `source` parameter(s).
+
+ ## Merging
+
+ If the sink parameter is an array, or named in a [workflow
+ scatter](#workflowstep) operation, there may be multiple inbound data links
+ listed in the `connect` field. The values from the input links are merged
+ depending on the method specified in the `linkMerge` field. If not
+ specified, the default method is "merge_nested".
+
+ * **merge_nested**
+
+ The input must be an array consisting of exactly one entry for each
+ input link. If "merge_nested" is specified with a single link, the value
+ from the link must be wrapped in a single-item list.
+
+ * **merge_flattened**
+
+ 1. The source and sink parameters must be compatible types, or the source
+ type must be compatible with single element from the "items" type of
+ the destination array parameter.
+ 2. Source parameters which are arrays are concatenated.
+ Source parameters which are single element types are appended as
+ single elements.
+
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "A unique identifier for this workflow input parameter."
+ - name: source
+ doc: |
+ Specifies one or more workflow parameters that will provide input to
+ the underlying process parameter.
+ jsonldPredicate:
+ "@id": "cwl:source"
+ "@type": "@id"
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ - name: linkMerge
+ type: ["null", LinkMergeMethod]
+ doc: |
+ The method to use to merge multiple inbound links into a single array.
+ If not specified, the default method is "merge_nested".
+ - name: default
+ type: ["null", Any]
+ doc: |
+ The default value for this parameter if there is no `source`
+ field.
+
+
+- type: record
+ name: WorkflowStepOutput
+ docParent: WorkflowStep
+ doc: |
+ Associate an output parameter of the underlying process with a workflow
+ parameter. The workflow parameter (given in the `id` field) be may be used
+ as a `source` to connect with input parameters of other workflow steps, or
+ with an output parameter of the process.
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: |
+ A unique identifier for this workflow output parameter. This is the
+ identifier to use in the `source` field of `WorkflowStepInput` to
+ connect the output value to downstream parameters.
+
+
+- name: ScatterMethod
+ type: enum
+ docParent: WorkflowStep
+ doc: The scatter method, as described in [workflow step scatter](#workflowstep).
+ symbols:
+ - dotproduct
+ - nested_crossproduct
+ - flat_crossproduct
+
+
+- name: WorkflowStep
+ type: record
+ docParent: Workflow
+ doc: |
+ A workflow step is an executable element of a workflow. It specifies the
+ underlying process implementation (such as `CommandLineTool`) in the `run`
+ field and connects the input and output parameters of the underlying
+ process to workflow parameters.
+
+ # Scatter/gather
+
+ To use scatter/gather,
+ [ScatterFeatureRequirement](#scatterfeaturerequirement) must be specified
+ in the workflow or workflow step requirements.
+
+ A "scatter" operation specifies that the associated workflow step or
+ subworkflow should execute separately over a list of input elements. Each
+ job making up a scatter operaution is independent and may be executed
+ concurrently.
+
+ The `scatter` field specifies one or more input parameters which will be
+ scattered. An input parameter may be listed more than once. The declared
+ type of each input parameter is implicitly wrapped in an array for each
+ time it appears in the `scatter` field. As a result, upstream parameters
+ which are connected to scattered parameters may be arrays.
+
+ All output parameter types are also implicitly wrapped in arrays. Each job
+ in the scatter results in an entry in the output array.
+
+ If `scatter` declares more than one input parameter, `scatterMethod`
+ describes how to decompose the input into a discrete set of jobs.
+
+ * **dotproduct** specifies that each of the input arrays are aligned and one
+ element taken from each array to construct each job. It is an error
+ if all input arrays are not the same length.
+
+ * **nested_crossproduct** specifies the Cartesian product of the inputs,
+ producing a job for every combination of the scattered inputs. The
+ output must be nested arrays for each level of scattering, in the
+ order that the input arrays are listed in the `scatter` field.
+
+ * **flat_crossproduct** specifies the Cartesian product of the inputs,
+ producing a job for every combination of the scattered inputs. The
+ output arrays must be flattened to a single level, but otherwise listed in the
+ order that the input arrays are listed in the `scatter` field.
+
+ # Subworkflows
+
+ To specify a nested workflow as part of a workflow step,
+ [SubworkflowFeatureRequirement](#subworkflowfeaturerequirement) must be
+ specified in the workflow or workflow step requirements.
+
+ fields:
+ - name: id
+ type: ["null", string]
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this workflow step."
+ - name: inputs
+ type:
+ type: array
+ items: WorkflowStepInput
+ doc: |
+ Defines the input parameters of the workflow step. The process is ready to
+ run when all required input parameters are associated with concrete
+ values. Input parameters include a schema for each parameter which is
+ used to validate the input object. It may also be used build a user
+ interface for constructing the input object.
+ - name: outputs
+ type:
+ type: array
+ items: WorkflowStepOutput
+ doc: |
+ Defines the parameters representing the output of the process. May be
+ used to generate and/or validate the output object.
+ - name: requirements
+ type:
+ - "null"
+ - type: array
+ items: ProcessRequirement
+ doc: >
+ Declares requirements that apply to either the runtime environment or the
+ workflow engine that must be met in order to execute this workflow step. If
+ an implementation cannot satisfy all requirements, or a requirement is
+ listed which is not recognized by the implementation, it is a fatal
+ error and the implementation must not attempt to run the process,
+ unless overridden at user option.
+ - name: hints
+ type:
+ - "null"
+ - type: array
+ items: Any
+ doc: >
+ Declares hints applying to either the runtime environment or the
+ workflow engine that may be helpful in executing this workflow step. It is
+ not an error if an implementation cannot satisfy all hints, however
+ the implementation may report a warning.
+ - name: label
+ type:
+ - "null"
+ - string
+ jsondldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this process object."
+ - name: description
+ type:
+ - "null"
+ - string
+ jsondldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this process object."
+ - name: run
+ type: Process
+ doc: |
+ Specifies the process to run.
+ - name: scatter
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ "@id": "cwl:scatter"
+ "@type": "@id"
+ "@container": "@list"
+ - name: scatterMethod
+ doc: |
+ Required if `scatter` is an array of more than one element.
+ type:
+ - "null"
+ - ScatterMethod
+ jsonldPredicate:
+ "@id": "cwl:scatterMethod"
+ "@type": "@vocab"
+
+
+- name: Workflow
+ type: record
+ docParent: Reference
+ extends: Process
+ specialize:
+ OutputParameter: WorkflowOutputParameter
+ doc: |
+ A workflow is a process consisting of one or more `steps`. Each
+ step has input and output parameters defined by the `inputs` and `outputs`
+ fields. A workflow executes as described in [execution model](#workflow_graph).
+
+ # Dependencies
+
+ Dependencies between parameters are expressed using the `source` field on
+ [workflow step input parameters](#workflowstepinput) and [workflow output
+ parameters](#workflowoutputparameter).
+
+ The `source` field expresses the dependency of one parameter on another
+ such that when a value is associated with the parameter specified by
+ `source`, that value is propagated to the destination parameter. When all
+ data links inbound to a given step are fufilled, the step is ready to
+ execute.
+
+ # Extensions
+
+ [ScatterFeatureRequirement](#scatterfeaturerequirement) and
+ [SubworkflowFeatureRequirement](#subworkflowfeaturerequirement) are
+ available as standard extensions to core workflow semantics.
+
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "@id": "@type"
+ "@type": "@vocab"
+ type: string
+ - name: steps
+ doc: |
+ The individual steps that make up the workflow. Each step is executed when all of its
+ input data links are fufilled. An implementation may choose to execute
+ the steps in a different order than listed and/or execute steps
+ concurrently, provided that dependencies between steps are met.
+ type:
+ - type: array
+ items: WorkflowStep
+
+
+- type: record
+ name: DockerRequirement
+ extends: ProcessRequirement
+ doc: |
+ Indicates that a workflow component should be run in a
+ [Docker](http://docker.com) container, and specifies how to fetch or build
+ the image.
+
+ If a CommandLineTool lists `DockerRequirement` under
+ `hints` or `requirements`, it may (or must) be run in the specified Docker
+ container.
+
+ The platform must first acquire or install the correct Docker image as
+ specified by `dockerPull`, `dockerLoad` or `dockerFile`.
+
+ The platform must execute the tool in the container using `docker run` with
+ the appropriate Docker image and tool command line.
+
+ The workflow platform may provide input files and the designated output
+ directory through the use of volume bind mounts. The platform may rewrite
+ file paths in the input object to correspond to the Docker bind mounted
+ locations.
+
+ When running a tool contained in Docker, the workflow platform must not
+ assume anything about the contents of the Docker container, such as the
+ presence or absence of specific software, except to assume that the
+ generated command line represents a valid command within the runtime
+ environment of the container.
+
+ ## Interaction with other requirements
+
+ If [EnvVarRequirement](#envvarrequirement) is specified alongside a
+ DockerRequirement, the environment variables must be provided to Docker
+ using `--env` or `--env-file` and interact with the container's preexisting
+ environment as defined by Docker.
+
+ fields:
+ - name: dockerPull
+ type: ["null", "string"]
+ doc: "Specify a Docker image to retrieve using `docker pull`."
+ - name: "dockerLoad"
+ type: ["null", "string"]
+ doc: "Specify a HTTP URL from which to download a Docker image using `docker load`."
+ - name: dockerFile
+ type: ["null", "string"]
+ doc: "Supply the contents of a Dockerfile which will be built using `docker build`."
+ - name: dockerImageId
+ type: ["null", "string"]
+ doc: |
+ The image id that will be used for `docker run`. May be a
+ human-readable image name or the image identifier hash. May be skipped
+ if `dockerPull` is specified, in which case the `dockerPull` image id
+ must be used.
+ - name: dockerOutputDirectory
+ type: ["null", "string"]
+ doc: |
+ Set the designated output directory to a specific location inside the
+ Docker container.
+
+
+- type: record
+ name: SubworkflowFeatureRequirement
+ extends: ProcessRequirement
+ doc: |
+ Indicates that the workflow platform must support nested workflows in
+ the `run` field of (WorkflowStep)(#workflowstep).
+
+
+- name: CreateFileRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ Define a list of files that must be created by the workflow
+ platform in the designated output directory prior to executing the command
+ line tool. See `FileDef` for details.
+ fields:
+ - name: fileDef
+ type:
+ type: "array"
+ items: "FileDef"
+ doc: The list of files.
+
+
+- name: EnvVarRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ Define a list of environment variables which will be set in the
+ execution environment of the tool. See `EnvironmentDef` for details.
+ fields:
+ - name: envDef
+ type:
+ type: "array"
+ items: "EnvironmentDef"
+ doc: The list of environment variables.
+
+
+- name: ScatterFeatureRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ Indicates that the workflow platform must support the `scatter` and
+ `scatterMethod` fields of [WorkflowStep](#workflowstep).
+
+
+- name: SchemaDefRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ This field consists of an
+ array of type definitions which must be used when interpreting the `inputs` and
+ `outputs` fields. When a symbolic type is encountered that is not in
+ [`Datatype`](#datatype), the implementation must check if
+ the type is defined in `schemaDefs` and use that definition. If the type is not
+ found in `schemaDefs`, it is an error. The entries in `schemaDefs` must be
+ processed in the order listed such that later schema definitions may refer to
+ earlier schema definitions.
+ fields:
+ - name: types
+ type:
+ type: array
+ items: SchemaDef
+ doc: The list of type definitions.
+
+
+- type: record
+ name: ExpressionEngineRequirement
+ extends: ProcessRequirement
+ doc: |
+ Define an expression engine, as described in [Expressions](#expressions).
+
+ fields:
+ - name: id
+ type: string
+ doc: "Used to identify the expression engine in the `engine` field of Expressions."
+ jsonldPredicate: "@id"
+ - name: requirements
+ type:
+ - "null"
+ - type: array
+ items: ProcessRequirement
+ doc: |
+ Requirements to run this expression engine, such as DockerRequirement
+ for specifying a container to run the engine.
+ - name: engineCommand
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: "The command line to invoke the expression engine."
+ - name: engineConfig
+ type:
+ - "null"
+ - type: array
+ items: string
+ doc: |
+ Additional configuration or code fragments that will also be passed to
+ the expression engine. The semantics of this field are defined by the
+ underlying expression engine. Intended for uses such as providing
+ function definitions that will be called from CWL expressions.
diff --git a/cwltool/schemas/draft-3/CommandLineTool-standalone.yml b/cwltool/schemas/draft-3/CommandLineTool-standalone.yml
new file mode 100644
index 0000000..10dbffa
--- /dev/null
+++ b/cwltool/schemas/draft-3/CommandLineTool-standalone.yml
@@ -0,0 +1,2 @@
+- $import: Process.yml
+- $import: CommandLineTool.yml
\ No newline at end of file
diff --git a/cwltool/schemas/draft-3/CommandLineTool.yml b/cwltool/schemas/draft-3/CommandLineTool.yml
new file mode 100644
index 0000000..159ddae
--- /dev/null
+++ b/cwltool/schemas/draft-3/CommandLineTool.yml
@@ -0,0 +1,637 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+
+$graph:
+
+- name: CommandLineToolDoc
+ type: documentation
+ doc:
+ - |
+ # Common Workflow Language (CWL) Command Line Tool Description, draft 3
+
+ This version:
+ * https://w3id.org/cwl/draft-3/
+
+ Current version:
+ * https://w3id.org/cwl/
+ - "\n\n"
+ - {$include: contrib.md}
+ - "\n\n"
+ - |
+ # Abstract
+
+ A Command Line Tool is a non-interactive executable program that reads
+ some input, performs a computation, and terminates after producing some
+ output. Command line programs are a flexible unit of code sharing and
+ reuse, unfortunately the syntax and input/output semantics among command
+ line programs is extremely heterogeneous. A common layer for describing
+ the syntax and semantics of programs can reduce this incidental
+ complexity by providing a consistent way to connect programs together.
+ This specification defines the Common Workflow Language (CWL) Command
+ Line Tool Description, a vendor-neutral standard for describing the
+ syntax and input/output semantics of command line programs.
+
+ - {$include: intro.md}
+
+ - |
+ ## Introduction to draft 3
+
+ This specification represents the third milestone of the CWL group. Since
+ draft-2, this draft introduces the following major changes and additions:
+
+ * Greatly simplified naming within a document with scoped identifiers, as
+ described in the [Schema Salad specification](SchemaSalad.html).
+ * The draft-2 concept of pluggable expression engines has been replaced
+ by a [streamlined expression syntax](#Parameter_references)
+ and standardization on [Javascript](#Expressions).
+ * [File](#File) objects can now include a `format` field to indicate
+ the file type.
+ * The addition of [ShellCommandRequirement](#ShellCommandRequirement).
+ * The addition of [ResourceRequirement](#ResourceRequirement).
+ * The separation of CommandLineTool and Workflow components into
+ separate specifications.
+
+ ## Purpose
+
+ Standalone programs are a flexible and interoperable form of code reuse.
+ Unlike monolithic applications, applications and analysis workflows which
+ are composed of multiple separate programs can be written in multiple
+ languages and execute concurrently on multiple hosts. However, POSIX
+ does not dictate computer-readable grammar or semantics for program input
+ and output, resulting in extremely heterogeneous command line grammar and
+ input/output semantics among program. This a particular problem in
+ distributed computing (multi-node compute clusters) and virtualized
+ environments (such as Docker containers) where it is often necessary to
+ provision resources such as input files before executing the program.
+
+ Often this is gap is filled by hard coding program invocation and
+ implicitly assuming requirements will be met, or abstracting program
+ invocation with wrapper scripts or descriptor documents. Unfortunately,
+ where these approaches are application or platform specific it creates a
+ significant barrier to reproducibility and portability, as methods
+ developed for one platform must be manually ported to be used on new
+ platforms. Similarly it creates redundant work, as wrappers for popular
+ tools must be rewritten for each application or platform in use.
+
+ The Common Workflow Language Command Line Tool Description is designed to
+ provide a common standard description of grammar and semantics for
+ invoking programs used in data-intensive fields such as Bioinformatics,
+ Chemistry, Physics, Astronomy, and Statistics. This specification
+ defines a precise data and execution model for Command Line Tools that
+ can be implemented on a variety of computing platforms, ranging from a
+ single workstation to cluster, grid, cloud, and high performance
+ computing platforms.
+
+ - {$include: concepts.md}
+ - {$include: invocation.md}
+
+- type: record
+ name: FileDef
+ doc: |
+ Define a file that must be placed in the designated output directory
+ prior to executing the command line tool. May be the result of executing
+ an expression, such as building a configuration file from a template.
+ fields:
+ - name: "filename"
+ type: ["string", "#Expression"]
+ doc: "The name of the file to create in the output directory."
+ - name: "fileContent"
+ type: ["string", "#Expression"]
+ doc: |
+ If the value is a string literal or an expression which evaluates to a
+ string, a new file must be created with the string as the file contents.
+
+ If the value is an expression that evaluates to a File object, this
+ indicates the referenced file should be added to the designated output
+ directory prior to executing the tool.
+
+ Files added in this way may be read-only, and may be provided
+ by bind mounts or file system links to avoid
+ unnecessary copying of the input file.
+
+
+- type: record
+ name: EnvironmentDef
+ doc: |
+ Define an environment variable that will be set in the runtime environment
+ by the workflow platform when executing the command line tool. May be the
+ result of executing an expression, such as getting a parameter from input.
+ fields:
+ - name: "envName"
+ type: "string"
+ doc: The environment variable name
+ - name: "envValue"
+ type: ["string", "#Expression"]
+ doc: The environment variable value
+
+- type: record
+ name: CommandLineBinding
+ extends: "#InputBinding"
+ doc: |
+
+ When listed under `inputBinding` in the input schema, the term
+ "value" refers to the the corresponding value in the input object. For
+ binding objects listed in `CommandLineTool.arguments`, the term "value"
+ refers to the effective value after evaluating `valueFrom`.
+
+ The binding behavior when building the command line depends on the data
+ type of the value. If there is a mismatch between the type described by
+ the input schema and the effective value, such as resulting from an
+ expression evaluation, an implementation must use the data type of the
+ effective value.
+
+ - **string**: Add `prefix` and the string to the command line.
+
+ - **number**: Add `prefix` and decimal representation to command line.
+
+ - **boolean**: If true, add `prefix` to the command line. If false, add
+ nothing.
+
+ - **File**: Add `prefix` and the value of
+ [`File.path`](#File) to the command line.
+
+ - **array**: If `itemSeparator` is specified, add `prefix` and the join
+ the array into a single string with `itemSeparator` separating the
+ items. Otherwise first add `prefix`, then recursively process
+ individual elements.
+
+ - **object**: Add `prefix` only, and recursively add object fields for
+ which `inputBinding` is specified.
+
+ - **null**: Add nothing.
+
+ fields:
+ - name: "position"
+ type: ["null", "int"]
+ doc: "The sorting key. Default position is 0."
+ - name: "prefix"
+ type: [ "null", "string"]
+ doc: "Command line prefix to add before the value."
+ - name: "separate"
+ type: ["null", boolean]
+ doc: |
+ If true (default), then the prefix and value must be added as separate
+ command line arguments; if false, prefix and value must be concatenated
+ into a single command line argument.
+ - name: "itemSeparator"
+ type: ["null", "string"]
+ doc: |
+ Join the array elements into a single string with the elements
+ separated by by `itemSeparator`.
+ - name: "valueFrom"
+ type:
+ - "null"
+ - "string"
+ - "#Expression"
+ jsonldPredicate: "cwl:valueFrom"
+ doc: |
+ If `valueFrom` is a constant string value, use this as the value and
+ apply the binding rules above.
+
+ If `valueFrom` is an expression, evaluate the expression to yield the
+ actual value to use to build the command line and apply the binding
+ rules above. If the inputBinding is associated with an input
+ parameter, the value of `self` in the expression will be the value of the
+ input parameter.
+
+ When a binding is part of the `CommandLineTool.arguments` field,
+ the `valueFrom` field is required.
+ - name: shellQuote
+ type: ["null", boolean]
+ doc: |
+ If `ShellCommandRequirement` is in the requirements for the current command,
+ this controls whether the value is quoted on the command line (default is true).
+ Use `shellQuote: false` to inject metacharacters for operations such as pipes.
+
+- type: record
+ name: CommandOutputBinding
+ extends: "#OutputBinding"
+ doc: |
+ Describes how to generate an output parameter based on the files produced
+ by a CommandLineTool.
+
+ The output parameter is generated by applying these operations in
+ the following order:
+
+ - glob
+ - loadContents
+ - outputEval
+ fields:
+ - name: glob
+ type:
+ - "null"
+ - string
+ - "#Expression"
+ - type: array
+ items: string
+ doc: |
+ Find files relative to the output directory, using POSIX glob(3)
+ pathname matching. If provided an array, find files that match any
+ pattern in the array. If provided an expression, the expression must
+ return a string or an array of strings, which will then be evaluated as
+ one or more glob patterns. Must only match and return files which
+ actually exist.
+ - name: loadContents
+ type:
+ - "null"
+ - boolean
+ jsonldPredicate: "cwl:loadContents"
+ doc: |
+ For each file matched in `glob`, read up to
+ the first 64 KiB of text from the file and place it in the `contents`
+ field of the file object for manipulation by `outputEval`.
+ - name: outputEval
+ type:
+ - "null"
+ - string
+ - "#Expression"
+ doc: |
+ Evaluate an expression to generate the output value. If `glob` was
+ specified, the value of `self` must be an array containing file objects
+ that were matched. If no files were matched, `self' must be a zero
+ length array; if a single file was matched, the value of `self` is an
+ array of a single element. Additionally, if `loadContents` is `true`,
+ the File objects must include up to the first 64 KiB of file contents
+ in the `contents` field.
+
+
+- name: CommandInputRecordField
+ type: record
+ extends: "#InputRecordField"
+ specialize:
+ - specializeFrom: "#InputRecordSchema"
+ specializeTo: "#CommandInputRecordSchema"
+ - specializeFrom: "#InputEnumSchema"
+ specializeTo: "#CommandInputEnumSchema"
+ - specializeFrom: "#InputArraySchema"
+ specializeTo: "#CommandInputArraySchema"
+ - specializeFrom: "#InputBinding"
+ specializeTo: "#CommandLineBinding"
+
+
+- name: CommandInputRecordSchema
+ type: record
+ extends: "#InputRecordSchema"
+ specialize:
+ - specializeFrom: "#InputRecordField"
+ specializeTo: "#CommandInputRecordField"
+
+
+- name: CommandInputEnumSchema
+ type: record
+ extends: "#InputEnumSchema"
+ specialize:
+ - specializeFrom: "#InputBinding"
+ specializeTo: "#CommandLineBinding"
+
+
+- name: CommandInputArraySchema
+ type: record
+ extends: "#InputArraySchema"
+ specialize:
+ - specializeFrom: "#InputRecordSchema"
+ specializeTo: "#CommandInputRecordSchema"
+ - specializeFrom: "#InputEnumSchema"
+ specializeTo: "#CommandInputEnumSchema"
+ - specializeFrom: "#InputArraySchema"
+ specializeTo: "#CommandInputArraySchema"
+ - specializeFrom: "#InputBinding"
+ specializeTo: "#CommandLineBinding"
+
+
+- name: CommandOutputRecordField
+ type: record
+ extends: "#OutputRecordField"
+ specialize:
+ - specializeFrom: "#OutputRecordSchema"
+ specializeTo: "#CommandOutputRecordSchema"
+ - specializeFrom: "#OutputEnumSchema"
+ specializeTo: "#CommandOutputEnumSchema"
+ - specializeFrom: "#OutputArraySchema"
+ specializeTo: "#CommandOutputArraySchema"
+ - specializeFrom: "#OutputBinding"
+ specializeTo: "#CommandOutputBinding"
+
+
+- name: CommandOutputRecordSchema
+ type: record
+ extends: "#OutputRecordSchema"
+ specialize:
+ - specializeFrom: "#OutputRecordField"
+ specializeTo: "#CommandOutputRecordField"
+
+
+- name: CommandOutputEnumSchema
+ type: record
+ extends: "#OutputEnumSchema"
+ specialize:
+ - specializeFrom: "#OutputRecordSchema"
+ specializeTo: "#CommandOutputRecordSchema"
+ - specializeFrom: "#OutputEnumSchema"
+ specializeTo: "#CommandOutputEnumSchema"
+ - specializeFrom: "#OutputArraySchema"
+ specializeTo: "#CommandOutputArraySchema"
+ - specializeFrom: "#OutputBinding"
+ specializeTo: "#CommandOutputBinding"
+
+
+- name: CommandOutputArraySchema
+ type: record
+ extends: "#OutputArraySchema"
+ specialize:
+ - specializeFrom: "#OutputRecordSchema"
+ specializeTo: "#CommandOutputRecordSchema"
+ - specializeFrom: "#OutputEnumSchema"
+ specializeTo: "#CommandOutputEnumSchema"
+ - specializeFrom: "#OutputArraySchema"
+ specializeTo: "#CommandOutputArraySchema"
+ - specializeFrom: "#OutputBinding"
+ specializeTo: "#CommandOutputBinding"
+
+
+- type: record
+ name: CommandInputParameter
+ extends: "#InputParameter"
+ doc: An input parameter for a CommandLineTool.
+ specialize:
+ - specializeFrom: "#InputRecordSchema"
+ specializeTo: "#CommandInputRecordSchema"
+ - specializeFrom: "#InputEnumSchema"
+ specializeTo: "#CommandInputEnumSchema"
+ - specializeFrom: "#InputArraySchema"
+ specializeTo: "#CommandInputArraySchema"
+ - specializeFrom: "#InputBinding"
+ specializeTo: "#CommandLineBinding"
+
+
+- type: record
+ name: CommandOutputParameter
+ extends: "#OutputParameter"
+ doc: An output parameter for a CommandLineTool.
+ specialize:
+ - specializeFrom: "#OutputRecordSchema"
+ specializeTo: "#CommandOutputRecordSchema"
+ - specializeFrom: "#OutputEnumSchema"
+ specializeTo: "#CommandOutputEnumSchema"
+ - specializeFrom: "#OutputArraySchema"
+ specializeTo: "#CommandOutputArraySchema"
+ - specializeFrom: "#OutputBinding"
+ specializeTo: "#CommandOutputBinding"
+
+
+- type: record
+ name: CommandLineTool
+ extends: "#Process"
+ documentRoot: true
+ specialize:
+ - specializeFrom: "#InputParameter"
+ specializeTo: "#CommandInputParameter"
+ - specializeFrom: "#OutputParameter"
+ specializeTo: "#CommandOutputParameter"
+ doc: |
+ This defines the schema of the CWL Command Line Tool Description document.
+
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ type: string
+ - name: baseCommand
+ doc: |
+ Specifies the program to execute. If the value is an array, the first
+ element is the program to execute, and subsequent elements are placed
+ at the beginning of the command line in prior to any command line
+ bindings. If the program includes a path separator character it must
+ be an absolute path, otherwise it is an error. If the program does not
+ include a path separator, search the `$PATH` variable in the runtime
+ environment of the workflow runner find the absolute path of the
+ executable.
+ type:
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ "_id": "cwl:baseCommand"
+ "_container": "@list"
+ - name: arguments
+ doc: |
+ Command line bindings which are not directly associated with input parameters.
+ type:
+ - "null"
+ - type: array
+ items: [string, "#CommandLineBinding"]
+ jsonldPredicate:
+ "_id": "cwl:arguments"
+ "_container": "@list"
+ - name: stdin
+ type: ["null", string, "#Expression"]
+ doc: |
+ A path to a file whose contents must be piped into the command's
+ standard input stream.
+ - name: stdout
+ type: ["null", string, "#Expression"]
+ doc: |
+ Capture the command's standard output stream to a file written to
+ the designated output directory.
+
+ If `stdout` is a string, it specifies the file name to use.
+
+ If `stdout` is an expression, the expression is evaluated and must
+ return a string with the file name to use to capture stdout. If the
+ return value is not a string, or the resulting path contains illegal
+ characters (such as the path separator `/`) it is an error.
+ - name: successCodes
+ type:
+ - "null"
+ - type: array
+ items: int
+ doc: |
+ Exit codes that indicate the process completed successfully.
+
+ - name: temporaryFailCodes
+ type:
+ - "null"
+ - type: array
+ items: int
+ doc: |
+ Exit codes that indicate the process failed due to a possibly
+ temporary condition, where excuting the process with the same
+ runtime environment and inputs may produce different results.
+
+ - name: permanentFailCodes
+ type:
+ - "null"
+ - type: array
+ items: int
+ doc:
+ Exit codes that indicate the process failed due to a permanent logic
+ error, where excuting the process with the same runtime environment and
+ same inputs is expected to always fail.
+
+
+- type: record
+ name: DockerRequirement
+ extends: "#ProcessRequirement"
+ doc: |
+ Indicates that a workflow component should be run in a
+ [Docker](http://docker.com) container, and specifies how to fetch or build
+ the image.
+
+ If a CommandLineTool lists `DockerRequirement` under
+ `hints` or `requirements`, it may (or must) be run in the specified Docker
+ container.
+
+ The platform must first acquire or install the correct Docker image as
+ specified by `dockerPull`, `dockerImport`, `dockerLoad` or `dockerFile`.
+
+ The platform must execute the tool in the container using `docker run` with
+ the appropriate Docker image and tool command line.
+
+ The workflow platform may provide input files and the designated output
+ directory through the use of volume bind mounts. The platform may rewrite
+ file paths in the input object to correspond to the Docker bind mounted
+ locations.
+
+ When running a tool contained in Docker, the workflow platform must not
+ assume anything about the contents of the Docker container, such as the
+ presence or absence of specific software, except to assume that the
+ generated command line represents a valid command within the runtime
+ environment of the container.
+
+ ## Interaction with other requirements
+
+ If [EnvVarRequirement](#EnvVarRequirement) is specified alongside a
+ DockerRequirement, the environment variables must be provided to Docker
+ using `--env` or `--env-file` and interact with the container's preexisting
+ environment as defined by Docker.
+
+ fields:
+ - name: dockerPull
+ type: ["null", "string"]
+ doc: "Specify a Docker image to retrieve using `docker pull`."
+ - name: "dockerLoad"
+ type: ["null", "string"]
+ doc: "Specify a HTTP URL from which to download a Docker image using `docker load`."
+ - name: dockerFile
+ type: ["null", "string"]
+ doc: "Supply the contents of a Dockerfile which will be built using `docker build`."
+ - name: dockerImport
+ type: ["null", "string"]
+ doc: "Provide HTTP URL to download and gunzip a Docker images using `docker import."
+ - name: dockerImageId
+ type: ["null", "string"]
+ doc: |
+ The image id that will be used for `docker run`. May be a
+ human-readable image name or the image identifier hash. May be skipped
+ if `dockerPull` is specified, in which case the `dockerPull` image id
+ must be used.
+ - name: dockerOutputDirectory
+ type: ["null", "string"]
+ doc: |
+ Set the designated output directory to a specific location inside the
+ Docker container.
+
+
+
+- name: CreateFileRequirement
+ type: record
+ extends: "#ProcessRequirement"
+ doc: |
+ Define a list of files that must be created by the workflow
+ platform in the designated output directory prior to executing the command
+ line tool. See `FileDef` for details.
+ fields:
+ - name: fileDef
+ type:
+ type: "array"
+ items: "#FileDef"
+ doc: The list of files.
+
+
+- name: EnvVarRequirement
+ type: record
+ extends: "#ProcessRequirement"
+ doc: |
+ Define a list of environment variables which will be set in the
+ execution environment of the tool. See `EnvironmentDef` for details.
+ fields:
+ - name: envDef
+ type:
+ type: "array"
+ items: "#EnvironmentDef"
+ doc: The list of environment variables.
+
+
+- type: record
+ name: ShellCommandRequirement
+ extends: "#ProcessRequirement"
+ doc: |
+ Modify the behavior of CommandLineTool to generate a single string
+ containing a shell command line. Each item in the argument list must be
+ joined into a string separated by single spaces and quoted to prevent
+ intepretation by the shell, unless `CommandLineBinding` for that argument
+ contains `shellQuote: false`. If `shellQuote: false` is specified, the
+ argument is joined into the command string without quoting, which allows
+ the use of shell metacharacters such as `|` for pipes.
+
+
+- type: record
+ name: ResourceRequirement
+ extends: "#ProcessRequirement"
+ doc: |
+ Specify basic hardware resource requirements.
+
+ "min" is the minimum amount of a resource that must be reserved to schedule
+ a job. If "min" cannot be satisfied, the job should not be run.
+
+ "max" is the maximum amount of a resource that the job shall be permitted
+ to use. If a node has sufficient resources, multiple jobs may be scheduled
+ on a single node provided each job's "max" resource requirements are
+ met. If a job attempts to exceed its "max" resource allocation, an
+ implementation may deny additional resources, which may result in job
+ failure.
+
+ If "min" is specified but "max" is not, then "max" == "min"
+ If "max" is specified by "min" is not, then "min" == "max".
+
+ It is an error if max < min.
+
+ It is an error if the value of any of these fields is negative.
+
+ If neither "min" nor "max" is specified for a resource, an implementation may provide a default.
+
+ fields:
+ - name: coresMin
+ type: ["null", long, string, "#Expression"]
+ doc: Minimum reserved number of CPU cores
+
+ - name: coresMax
+ type: ["null", int, string, "#Expression"]
+ doc: Maximum reserved number of CPU cores
+
+ - name: ramMin
+ type: ["null", long, string, "#Expression"]
+ doc: Minimum reserved RAM in mebibytes (2**20)
+
+ - name: ramMax
+ type: ["null", long, string, "#Expression"]
+ doc: Maximum reserved RAM in mebibytes (2**20)
+
+ - name: tmpdirMin
+ type: ["null", long, string, "#Expression"]
+ doc: Minimum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20)
+
+ - name: tmpdirMax
+ type: ["null", long, string, "#Expression"]
+ doc: Maximum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20)
+
+ - name: outdirMin
+ type: ["null", long, string, "#Expression"]
+ doc: Minimum reserved filesystem based storage for the designated output directory, in mebibytes (2**20)
+
+ - name: outdirMax
+ type: ["null", long, string, "#Expression"]
+ doc: Maximum reserved filesystem based storage for the designated output directory, in mebibytes (2**20)
diff --git a/cwltool/schemas/draft-3/CommonWorkflowLanguage.yml b/cwltool/schemas/draft-3/CommonWorkflowLanguage.yml
new file mode 100644
index 0000000..73921e8
--- /dev/null
+++ b/cwltool/schemas/draft-3/CommonWorkflowLanguage.yml
@@ -0,0 +1,11 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+ sld: "https://w3id.org/cwl/salad#"
+
+$graph:
+
+- $import: Process.yml
+- $import: CommandLineTool.yml
+- $import: Workflow.yml
diff --git a/cwltool/schemas/draft-3/Process.yml b/cwltool/schemas/draft-3/Process.yml
new file mode 100644
index 0000000..a080616
--- /dev/null
+++ b/cwltool/schemas/draft-3/Process.yml
@@ -0,0 +1,549 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+ sld: "https://w3id.org/cwl/salad#"
+
+$graph:
+
+- name: "Common Workflow Language, Draft 3"
+ type: documentation
+ doc: {$include: concepts.md}
+
+- $import: "salad/schema_salad/metaschema/metaschema.yml"
+
+- name: BaseTypesDoc
+ type: documentation
+ doc: |
+ ## Base types
+ docChild:
+ - "#CWLType"
+ - "#Process"
+
+- type: enum
+ name: CWLVersions
+ doc: "Version symbols for published CWL document versions."
+ symbols:
+ - draft-3.dev1
+ - draft-3.dev2
+ - draft-3.dev3
+ - draft-3.dev4
+ - draft-3.dev5
+ - draft-3
+
+- name: CWLType
+ type: enum
+ extends: "sld:PrimitiveType"
+ symbols:
+ - File
+ doc:
+ - "Extends primitive types with the concept of a file as a first class type."
+ - "File: A File object"
+
+- name: File
+ type: record
+ docParent: "#CWLType"
+ doc: |
+ Represents a file (or group of files if `secondaryFiles` is specified) that
+ must be accessible by tools using standard POSIX file system call API such as
+ open(2) and read(2).
+ fields:
+ - name: class
+ type:
+ type: enum
+ name: File_class
+ symbols:
+ - cwl:File
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ doc: Must be `File` to indicate this object describes a file.
+ - name: path
+ type: string
+ doc: The path to the file.
+ jsonldPredicate:
+ "_type": "@id"
+ - name: checksum
+ type: ["null", string]
+ doc: |
+ Optional hash code for validating file integrity. Currently must be in the form
+ "sha1$ + hexidecimal string" using the SHA-1 algorithm.
+ - name: size
+ type: ["null", long]
+ doc: Optional file size.
+ - name: "secondaryFiles"
+ type:
+ - "null"
+ - type: array
+ items: "#File"
+ jsonldPredicate: "cwl:secondaryFiles"
+ doc: |
+ A list of additional files that are associated with the primary file
+ and must be transferred alongside the primary file. Examples include
+ indexes of the primary file, or external references which must be
+ included when loading primary document. A file object listed in
+ `secondaryFiles` may itself include `secondaryFiles` for which the same
+ rules apply.
+ - name: format
+ type: ["null", string]
+ jsonldPredicate:
+ _id: cwl:format
+ _type: "@id"
+ identity: true
+ doc: |
+ The format of the file. This must be a URI of a concept node that
+ represents the file format, preferrably defined within an ontology.
+ If no ontology is available, file formats may be tested by exact match.
+
+ Reasoning about format compatability must be done by checking that an
+ input file format is the same, `owl:equivalentClass` or
+ `rdfs:subClassOf` the format required by the input parameter.
+ `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if
+ `<B> owl:equivalentClass <C>` and `<B> owl:subclassOf <A>` then infer
+ `<C> owl:subclassOf <A>`.
+
+ File format ontologies may be provided in the "$schema" metadata at the
+ root of the document. If no ontologies are specified in `$schema`, the
+ runtime may perform exact file format matches.
+
+
+- name: SchemaBase
+ type: record
+ abstract: true
+ fields:
+ - name: secondaryFiles
+ type:
+ - "null"
+ - "string"
+ - "#Expression"
+ - type: "array"
+ items: ["string", "#Expression"]
+ jsonldPredicate: "cwl:secondaryFiles"
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ Describes files that must be included alongside the primary file(s).
+
+ If the value is an expression, the value of `self` in the expression
+ must be the primary input or output File to which this binding applies.
+
+ If the value is a string, it specifies that the following pattern
+ should be applied to the primary file:
+
+ 1. If string begins with one or more caret `^` characters, for each
+ caret, remove the last file extension from the path (the last
+ period `.` and all following characters). If there are no file
+ extensions, the path is unchanged.
+ 2. Append the remainder of the string to the end of the file path.
+
+ - name: format
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ - "#Expression"
+ jsonldPredicate:
+ _id: cwl:format
+ _type: "@id"
+ identity: true
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ For input parameters, this must be one or more URIs of a concept nodes
+ that represents file formats which are allowed as input to this
+ parameter, preferrably defined within an ontology. If no ontology is
+ available, file formats may be tested by exact match.
+
+ For output parameters, this is the file format that will be assigned to
+ the output parameter.
+
+ - name: streamable
+ type: ["null", "boolean"]
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ A value of `true` indicates that the file is read or written
+ sequentially without seeking. An implementation may use this flag to
+ indicate whether it is valid to stream file contents using a named
+ pipe. Default: `false`.
+
+
+- name: Parameter
+ type: record
+ extends: "#SchemaBase"
+ abstract: true
+ doc: |
+ Define an input or output parameter to a process.
+
+ fields:
+ - name: type
+ type:
+ - "null"
+ - "#CWLType"
+ - "sld:RecordSchema"
+ - "sld:EnumSchema"
+ - "sld:ArraySchema"
+ - string
+ - type: array
+ items:
+ - "#CWLType"
+ - "sld:RecordSchema"
+ - "sld:EnumSchema"
+ - "sld:ArraySchema"
+ - string
+ jsonldPredicate:
+ "_id": "sld:type"
+ "_type": "@vocab"
+ doc: |
+ Specify valid types of data that may be assigned to this parameter.
+
+ - name: label
+ type:
+ - "null"
+ - string
+ jsonldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this parameter object."
+
+ - name: description
+ type:
+ - "null"
+ - string
+ jsonldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this parameter object."
+
+
+- type: enum
+ name: Expression
+ doc: |
+ Not a real type. Indicates that a field must allow runtime parameter
+ references. If [InlineJavascriptRequirement](#InlineJavascriptRequirement)
+ is declared and supported by the platform, the field must also allow
+ Javascript expressions.
+ symbols:
+ - cwl:ExpressionPlaceholder
+
+
+- name: InputBinding
+ type: record
+ abstract: true
+ fields:
+ - name: loadContents
+ type:
+ - "null"
+ - boolean
+ jsonldPredicate: "cwl:loadContents"
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ Read up to the first 64 KiB of text from the file and place it in the
+ "contents" field of the file object for use by expressions.
+
+
+- name: OutputBinding
+ type: record
+ abstract: true
+
+
+- name: InputSchema
+ extends: "#SchemaBase"
+ type: record
+ abstract: true
+
+
+- name: OutputSchema
+ extends: "#SchemaBase"
+ type: record
+ abstract: true
+
+
+- name: InputRecordField
+ type: record
+ extends: "sld:RecordField"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: "#InputRecordSchema"
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: "#InputEnumSchema"
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: "#InputArraySchema"
+ fields:
+ - name: inputBinding
+ type: [ "null", "#InputBinding" ]
+ jsonldPredicate: "cwl:inputBinding"
+
+
+- name: InputRecordSchema
+ type: record
+ extends: ["sld:RecordSchema", "#InputSchema"]
+ specialize:
+ - specializeFrom: "sld:RecordField"
+ specializeTo: "#InputRecordField"
+
+
+- name: InputEnumSchema
+ type: record
+ extends: ["sld:EnumSchema", "#InputSchema"]
+ fields:
+ - name: inputBinding
+ type: [ "null", "#InputBinding" ]
+ jsonldPredicate: "cwl:inputBinding"
+
+
+- name: InputArraySchema
+ type: record
+ extends: ["sld:ArraySchema", "#InputSchema"]
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: "#InputRecordSchema"
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: "#InputEnumSchema"
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: "#InputArraySchema"
+ fields:
+ - name: inputBinding
+ type: [ "null", "#InputBinding" ]
+ jsonldPredicate: "cwl:inputBinding"
+
+
+- name: OutputRecordField
+ type: record
+ extends: "sld:RecordField"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: "#OutputRecordSchema"
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: "#OutputEnumSchema"
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: "#OutputArraySchema"
+ fields:
+ - name: outputBinding
+ type: [ "null", "#OutputBinding" ]
+ jsonldPredicate: "cwl:outputBinding"
+
+
+- name: OutputRecordSchema
+ type: record
+ extends: ["sld:RecordSchema", "#OutputSchema"]
+ docParent: "#OutputParameter"
+ specialize:
+ - specializeFrom: "sld:RecordField"
+ specializeTo: "#OutputRecordField"
+
+
+- name: OutputEnumSchema
+ type: record
+ extends: ["sld:EnumSchema", "#OutputSchema"]
+ docParent: "#OutputParameter"
+ fields:
+ - name: outputBinding
+ type: [ "null", "#OutputBinding" ]
+ jsonldPredicate: "cwl:outputBinding"
+
+- name: OutputArraySchema
+ type: record
+ extends: ["sld:ArraySchema", "#OutputSchema"]
+ docParent: "#OutputParameter"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: "#OutputRecordSchema"
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: "#OutputEnumSchema"
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: "#OutputArraySchema"
+ fields:
+ - name: outputBinding
+ type: [ "null", "#OutputBinding" ]
+ jsonldPredicate: "cwl:outputBinding"
+
+
+- name: InputParameter
+ type: record
+ extends: "#Parameter"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: "#InputRecordSchema"
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: "#InputEnumSchema"
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: "#InputArraySchema"
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this parameter object."
+
+ - name: "inputBinding"
+ type: [ "null", "#InputBinding" ]
+ jsonldPredicate: "cwl:inputBinding"
+ doc: |
+ Describes how to handle the inputs of a process and convert them
+ into a concrete form for execution, such as command line parameters.
+
+ - name: default
+ type: ["null", "Any"]
+ jsonldPredicate: "cwl:default"
+ doc: |
+ The default value for this parameter if not provided in the input
+ object.
+
+
+- name: OutputParameter
+ type: record
+ extends: "#Parameter"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: "#OutputRecordSchema"
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: "#OutputEnumSchema"
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: "#OutputArraySchema"
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this parameter object."
+ - name: "outputBinding"
+ type: [ "null", "#OutputBinding" ]
+ jsonldPredicate: "cwl:outputBinding"
+ doc: |
+ Describes how to handle the outputs of a process.
+
+
+- type: record
+ name: ProcessRequirement
+ abstract: true
+ doc: |
+ A process requirement declares a prerequisite that may or must be fulfilled
+ before executing a process. See [`Process.hints`](#process) and
+ [`Process.requirements`](#process).
+
+ Process requirements are the primary mechanism for specifying extensions to
+ the CWL core specification.
+
+ fields:
+ - name: "class"
+ type: "string"
+ doc: "The specific requirement type."
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+
+
+- type: record
+ name: Process
+ abstract: true
+ doc: |
+
+ The base executable type in CWL is the `Process` object defined by the
+ document. Note that the `Process` object is abstract and cannot be
+ directly executed.
+
+ fields:
+ - name: id
+ type: ["null", string]
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this process object."
+ - name: inputs
+ type:
+ type: array
+ items: "#InputParameter"
+ jsonldPredicate: "cwl:inputs"
+ doc: |
+ Defines the input parameters of the process. The process is ready to
+ run when all required input parameters are associated with concrete
+ values. Input parameters include a schema for each parameter which is
+ used to validate the input object. It may also be used to build a user
+ interface for constructing the input object.
+ - name: outputs
+ type:
+ type: array
+ items: "#OutputParameter"
+ jsonldPredicate: "cwl:outputs"
+ doc: |
+ Defines the parameters representing the output of the process. May be
+ used to generate and/or validate the output object.
+ - name: requirements
+ type:
+ - "null"
+ - type: array
+ items: "#ProcessRequirement"
+ jsonldPredicate: "cwl:requirements"
+ doc: |
+ Declares requirements that apply to either the runtime environment or the
+ workflow engine that must be met in order to execute this process. If
+ an implementation cannot satisfy all requirements, or a requirement is
+ listed which is not recognized by the implementation, it is a fatal
+ error and the implementation must not attempt to run the process,
+ unless overridden at user option.
+ - name: hints
+ type:
+ - "null"
+ - type: array
+ items: Any
+ jsonldPredicate: "cwl:hints"
+ doc: |
+ Declares hints applying to either the runtime environment or the
+ workflow engine that may be helpful in executing this process. It is
+ not an error if an implementation cannot satisfy all hints, however
+ the implementation may report a warning.
+ jsonldPredicate:
+ _id: cwl:hints
+ noLinkCheck: true
+ - name: label
+ type:
+ - "null"
+ - string
+ jsonldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this process object."
+ - name: description
+ type:
+ - "null"
+ - string
+ jsonldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this process object."
+ - name: cwlVersion
+ type:
+ - "null"
+ - string
+ doc: "CWL document version"
+ jsonldPredicate:
+ "_id": "cwl:cwlVersion"
+ "_type": "@id"
+
+- name: InlineJavascriptRequirement
+ type: record
+ extends: "#ProcessRequirement"
+ doc: |
+ Indicates that the workflow platform must support inline Javascript expressions.
+ If this requirement is not present, the workflow platform must not perform expression
+ interpolatation.
+ fields:
+ - name: expressionLib
+ type:
+ - "null"
+ - type: array
+ items: string
+ doc: |
+ Additional code fragments that will also be inserted
+ before executing the expression code. Allows for function definitions that may
+ be called from CWL expressions.
+
+
+- name: SchemaDefRequirement
+ type: record
+ extends: "#ProcessRequirement"
+ doc: |
+ This field consists of an array of type definitions which must be used when
+ interpreting the `inputs` and `outputs` fields. When a `type` field
+ contain a URI, the implementation must check if the type is defined in
+ `schemaDefs` and use that definition. If the type is not found in
+ `schemaDefs`, it is an error. The entries in `schemaDefs` must be
+ processed in the order listed such that later schema definitions may refer
+ to earlier schema definitions.
+ fields:
+ - name: types
+ type:
+ type: array
+ items: "#InputSchema"
+ doc: The list of type definitions.
diff --git a/cwltool/schemas/draft-3/README.md b/cwltool/schemas/draft-3/README.md
new file mode 100644
index 0000000..142b728
--- /dev/null
+++ b/cwltool/schemas/draft-3/README.md
@@ -0,0 +1,21 @@
+# Common Workflow Language Specifications, draft-3
+
+The CWL specifications are divided up into several documents.
+
+<!--
+The [User Guide](UserGuide.html) provides a gentle introduction to writing CWL
+command line tools and workflows.
+-->
+
+The [Command Line Tool Description Specification](CommandLineTool.html)
+specifies the document schema and execution semantics for wrapping and
+executing command line tools.
+
+The [Workflow Description Specification](Workflow.html) specifies the document
+schema and execution semantics for composing workflows from components such as
+command line tools and other workflows.
+
+The
+[Semantic Annotations for Linked Avro Data (SALAD) Specification](SchemaSalad.html)
+specifies the preprocessing steps that must be applied when loading CWL
+documents and the schema language used to write the above specifications.
diff --git a/cwltool/schemas/draft-3/UserGuide.yml b/cwltool/schemas/draft-3/UserGuide.yml
new file mode 100644
index 0000000..6bd8e70
--- /dev/null
+++ b/cwltool/schemas/draft-3/UserGuide.yml
@@ -0,0 +1,4 @@
+- name: userguide
+ type: documentation
+ doc:
+ - $include: userguide-intro.md
diff --git a/cwltool/schemas/draft-3/Workflow.yml b/cwltool/schemas/draft-3/Workflow.yml
new file mode 100644
index 0000000..066a66e
--- /dev/null
+++ b/cwltool/schemas/draft-3/Workflow.yml
@@ -0,0 +1,473 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+
+$graph:
+
+- name: "WorkflowDoc"
+ type: documentation
+ doc:
+ - |
+ # Common Workflow Language (CWL) Workflow Description, draft 3
+
+ This version:
+ * https://w3id.org/cwl/draft-3/
+
+ Current version:
+ * https://w3id.org/cwl/
+ - "\n\n"
+ - {$include: contrib.md}
+ - "\n\n"
+ - |
+ # Abstract
+
+ A Workflow is an analysis task represented by a directed graph describing
+ a sequence of operations that transform an input data set to output.
+ This specification defines the Common Workflow Language (CWL) Workflow
+ description, a vendor-neutral standard for representing workflows
+ intended to be portable across a variety of computing platforms.
+
+ - {$include: intro.md}
+
+ - |
+
+ ## Introduction to draft 3
+
+ This specification represents the third milestone of the CWL group. Since
+ draft-2, this draft introduces the following changes and additions:
+
+ * Greatly simplified naming within a document with scoped identifiers, as
+ described in the [Schema Salad specification](SchemaSalad.html).
+ * The draft-2 concept of pluggable expression engines has been replaced
+ by a [streamlined expression syntax)[#Parameter_references]
+ and standardization on [Javascript](#Expressions).
+ * [File](#File) objects can now include a `format` field to indicate
+ the file type.
+ * The addition of [MultipleInputFeatureRequirement](#MultipleInputFeatureRequirement).
+ * The addition of [StepInputExpressionRequirement](#StepInputExpressionRequirement).
+ * The separation of Workflow and CommandLineTool components into
+ separate specifications.
+
+ ## Purpose
+
+ The Common Workflow Language Command Line Tool Description express
+ workflows for data-intensive science, such as Bioinformatics, Chemistry,
+ Physics, and Astronomy. This specification is intended to define a data
+ and execution model for Workflows that can be implemented on top of a
+ variety of computing platforms, ranging from an individual workstation to
+ cluster, grid, cloud, and high performance computing systems.
+
+ - {$include: concepts.md}
+
+
+- type: record
+ name: ExpressionTool
+ extends: "#Process"
+ documentRoot: true
+ doc: |
+ Execute an expression as a process step.
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ type: string
+ - name: expression
+ type: [string, "#Expression"]
+ doc: |
+ The expression to execute. The expression must return a JSON object which
+ matches the output parameters of the ExpressionTool.
+
+- name: LinkMergeMethod
+ type: enum
+ docParent: "#WorkflowStepInput"
+ doc: The input link merge method, described in [WorkflowStepInput](#WorkflowStepInput).
+ symbols:
+ - merge_nested
+ - merge_flattened
+
+
+- name: WorkflowOutputParameter
+ type: record
+ extends: ["#OutputParameter", "#Sink"]
+ docParent: "#Workflow"
+ doc: |
+ Describe an output parameter of a workflow. The parameter must be
+ connected to one or more parameters defined in the workflow that will
+ provide the value of the output parameter.
+
+
+- name: Sink
+ type: record
+ abstract: true
+ fields:
+ - name: source
+ doc: |
+ Specifies one or more workflow parameters that will provide input to
+ the underlying process parameter.
+ jsonldPredicate:
+ "_id": "cwl:source"
+ "_type": "@id"
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ - name: linkMerge
+ type: ["null", "#LinkMergeMethod"]
+ doc: |
+ The method to use to merge multiple inbound links into a single array.
+ If not specified, the default method is "merge_nested".
+
+
+- type: record
+ name: WorkflowStepInput
+ extends: "#Sink"
+ docParent: "#WorkflowStep"
+ doc: |
+ The input of a workflow step connects an upstream parameter (from the
+ workflow inputs, or the outputs of other workflows steps) with the input
+ parameters of the underlying process.
+
+ ## Input object
+
+ A WorkflowStepInput object must contain an `id` field in the form
+ `#fieldname` or `#stepname.fieldname`. When the `id` field contains a
+ period `.` the field name consists of the characters following the final
+ period. This defines a field of the workflow step input object with the
+ value of the `source` parameter(s).
+
+ ## Merging
+
+ To merge multiple inbound data links,
+ [MultipleInputFeatureRequirement](#MultipleInputFeatureRequirement) must be specified
+ in the workflow or workflow step requirements.
+
+ If the sink parameter is an array, or named in a [workflow
+ scatter](#WorkflowStep) operation, there may be multiple inbound data links
+ listed in the `source` field. The values from the input links are merged
+ depending on the method specified in the `linkMerge` field. If not
+ specified, the default method is "merge_nested".
+
+ * **merge_nested**
+
+ The input must be an array consisting of exactly one entry for each
+ input link. If "merge_nested" is specified with a single link, the value
+ from the link must be wrapped in a single-item list.
+
+ * **merge_flattened**
+
+ 1. The source and sink parameters must be compatible types, or the source
+ type must be compatible with single element from the "items" type of
+ the destination array parameter.
+ 2. Source parameters which are arrays are concatenated.
+ Source parameters which are single element types are appended as
+ single elements.
+
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "A unique identifier for this workflow input parameter."
+ - name: default
+ type: ["null", Any]
+ doc: |
+ The default value for this parameter if there is no `source`
+ field.
+ jsonldPredicate: "cwl:default"
+ - name: valueFrom
+ type:
+ - "null"
+ - "string"
+ - "#Expression"
+ jsonldPredicate: "cwl:valueFrom"
+ doc: |
+ To use valueFrom, [StepInputExpressionRequirement](#StepInputExpressionRequirement) must
+ be specified in the workflow or workflow step requirements.
+
+ If `valueFrom` is a constant string value, use this as the value for
+ this input parameter.
+
+ If `valueFrom` is a parameter reference or expression, it must be
+ evaluated to yield the actual value to be assiged to the input field.
+
+ The `self` value of in the parameter reference or expression must be
+ the value of the parameter(s) specified in the `source` field, or
+ null if there is no `source` field.
+
+ The value of `inputs` in the parameter reference or expression is the
+ input object to the workflow step after assigning the `source` values,
+ but before evaluating any step with `valueFrom`. The order of
+ evaluating `valueFrom` among step input parameters is undefined.
+
+
+- type: record
+ name: WorkflowStepOutput
+ docParent: "#WorkflowStep"
+ doc: |
+ Associate an output parameter of the underlying process with a workflow
+ parameter. The workflow parameter (given in the `id` field) be may be used
+ as a `source` to connect with input parameters of other workflow steps, or
+ with an output parameter of the process.
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: |
+ A unique identifier for this workflow output parameter. This is the
+ identifier to use in the `source` field of `WorkflowStepInput` to
+ connect the output value to downstream parameters.
+
+
+- name: ScatterMethod
+ type: enum
+ docParent: "#WorkflowStep"
+ doc: The scatter method, as described in [workflow step scatter](#WorkflowStep).
+ symbols:
+ - dotproduct
+ - nested_crossproduct
+ - flat_crossproduct
+
+
+- name: WorkflowStep
+ type: record
+ docParent: "#Workflow"
+ doc: |
+ A workflow step is an executable element of a workflow. It specifies the
+ underlying process implementation (such as `CommandLineTool`) in the `run`
+ field and connects the input and output parameters of the underlying
+ process to workflow parameters.
+
+ # Scatter/gather
+
+ To use scatter/gather,
+ [ScatterFeatureRequirement](#ScatterFeatureRequirement) must be specified
+ in the workflow or workflow step requirements.
+
+ A "scatter" operation specifies that the associated workflow step or
+ subworkflow should execute separately over a list of input elements. Each
+ job making up a scatter operation is independent and may be executed
+ concurrently.
+
+ The `scatter` field specifies one or more input parameters which will be
+ scattered. An input parameter may be listed more than once. The declared
+ type of each input parameter is implicitly wrapped in an array for each
+ time it appears in the `scatter` field. As a result, upstream parameters
+ which are connected to scattered parameters may be arrays.
+
+ All output parameter types are also implicitly wrapped in arrays. Each job
+ in the scatter results in an entry in the output array.
+
+ If `scatter` declares more than one input parameter, `scatterMethod`
+ describes how to decompose the input into a discrete set of jobs.
+
+ * **dotproduct** specifies that each of the input arrays are aligned and one
+ element taken from each array to construct each job. It is an error
+ if all input arrays are not the same length.
+
+ * **nested_crossproduct** specifies the Cartesian product of the inputs,
+ producing a job for every combination of the scattered inputs. The
+ output must be nested arrays for each level of scattering, in the
+ order that the input arrays are listed in the `scatter` field.
+
+ * **flat_crossproduct** specifies the Cartesian product of the inputs,
+ producing a job for every combination of the scattered inputs. The
+ output arrays must be flattened to a single level, but otherwise listed in the
+ order that the input arrays are listed in the `scatter` field.
+
+ # Subworkflows
+
+ To specify a nested workflow as part of a workflow step,
+ [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) must be
+ specified in the workflow or workflow step requirements.
+
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this workflow step."
+ - name: inputs
+ type:
+ type: array
+ items: "#WorkflowStepInput"
+ jsonldPredicate: "cwl:inputs"
+ doc: |
+ Defines the input parameters of the workflow step. The process is ready to
+ run when all required input parameters are associated with concrete
+ values. Input parameters include a schema for each parameter which is
+ used to validate the input object. It may also be used build a user
+ interface for constructing the input object.
+ - name: outputs
+ type:
+ type: array
+ items: "#WorkflowStepOutput"
+ jsonldPredicate: "cwl:outputs"
+ doc: |
+ Defines the parameters representing the output of the process. May be
+ used to generate and/or validate the output object.
+ - name: requirements
+ type:
+ - "null"
+ - type: array
+ items: "#ProcessRequirement"
+ jsonldPredicate: "cwl:requirements"
+ doc: |
+ Declares requirements that apply to either the runtime environment or the
+ workflow engine that must be met in order to execute this workflow step. If
+ an implementation cannot satisfy all requirements, or a requirement is
+ listed which is not recognized by the implementation, it is a fatal
+ error and the implementation must not attempt to run the process,
+ unless overridden at user option.
+ - name: hints
+ type:
+ - "null"
+ - type: array
+ items: "Any"
+ jsonldPredicate: "cwl:hints"
+ doc: |
+ Declares hints applying to either the runtime environment or the
+ workflow engine that may be helpful in executing this workflow step. It is
+ not an error if an implementation cannot satisfy all hints, however
+ the implementation may report a warning.
+ jsonldPredicate:
+ _id: cwl:hints
+ noLinkCheck: true
+ - name: label
+ type:
+ - "null"
+ - string
+ jsonldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this process object."
+ - name: description
+ type:
+ - "null"
+ - string
+ jsonldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this process object."
+ - name: run
+ type: [string, "#Process"]
+ jsonldPredicate:
+ "_id": "cwl:run"
+ "_type": "@id"
+ doc: |
+ Specifies the process to run.
+ - name: scatter
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ "_id": "cwl:scatter"
+ "_type": "@id"
+ "_container": "@list"
+ - name: scatterMethod
+ doc: |
+ Required if `scatter` is an array of more than one element.
+ type:
+ - "null"
+ - "#ScatterMethod"
+ jsonldPredicate:
+ "_id": "cwl:scatterMethod"
+ "_type": "@vocab"
+
+
+- name: Workflow
+ type: record
+ extends: "#Process"
+ documentRoot: true
+ specialize:
+ specializeFrom: "#OutputParameter"
+ specializeTo: "#WorkflowOutputParameter"
+ doc: |
+ A workflow describes a set of **steps** and the **dependencies** between
+ those processes. When a process produces output that will be consumed by a
+ second process, the first process is a dependency of the second process.
+
+ When there is a dependency, the workflow engine must execute the preceeding
+ process and wait for it to successfully produce output before executing the
+ dependent process. If two processes are defined in the workflow graph that
+ are not directly or indirectly dependent, these processes are
+ **independent**, and may execute in any order or execute concurrently. A
+ workflow is complete when all steps have been executed.
+
+ Dependencies between parameters are expressed using the `source` field on
+ [workflow step input parameters](#WorkflowStepInput) and [workflow output
+ parameters](#WorkflowOutputParameter).
+
+ The `source` field expresses the dependency of one parameter on another
+ such that when a value is associated with the parameter specified by
+ `source`, that value is propagated to the destination parameter. When all
+ data links inbound to a given step are fufilled, the step is ready to
+ execute.
+
+ ## Workflow success and failure
+
+ A completed process must result in one of `success`, `temporaryFailure` or
+ `permanentFailure` states. An implementation may choose to retry a process
+ execution which resulted in `temporaryFailure`. An implementation may
+ choose to either continue running other steps of a workflow, or terminate
+ immediately upon `permanentFailure`.
+
+ * If any step of a workflow execution results in `permanentFailure`, then the
+ workflow status is `permanentFailure`.
+
+ * If one or more steps result in `temporaryFailure` and all other steps
+ complete `success` or are not executed, then the workflow status is
+ `temporaryFailure`.
+
+ * If all workflow steps are executed and complete with `success`, then the workflow
+ status is `success`.
+
+ # Extensions
+
+ [ScatterFeatureRequirement](#ScatterFeatureRequirement) and
+ [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) are
+ available as standard extensions to core workflow semantics.
+
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ type: string
+ - name: steps
+ doc: |
+ The individual steps that make up the workflow. Each step is executed when all of its
+ input data links are fufilled. An implementation may choose to execute
+ the steps in a different order than listed and/or execute steps
+ concurrently, provided that dependencies between steps are met.
+ type:
+ - type: array
+ items: "#WorkflowStep"
+
+
+
+- type: record
+ name: SubworkflowFeatureRequirement
+ extends: "#ProcessRequirement"
+ doc: |
+ Indicates that the workflow platform must support nested workflows in
+ the `run` field of (WorkflowStep)(#WorkflowStep).
+
+- name: ScatterFeatureRequirement
+ type: record
+ extends: "#ProcessRequirement"
+ doc: |
+ Indicates that the workflow platform must support the `scatter` and
+ `scatterMethod` fields of [WorkflowStep](#WorkflowStep).
+
+- name: MultipleInputFeatureRequirement
+ type: record
+ extends: "#ProcessRequirement"
+ doc: |
+ Indicates that the workflow platform must support multiple inbound data links
+ listed in the `source` field of [WorkflowStepInput](#WorkflowStepInput).
+
+- type: record
+ name: StepInputExpressionRequirement
+ extends: "#ProcessRequirement"
+ doc: |
+ Indicate that the workflow platform must support the `valueFrom` field
+ of [WorkflowStepInput](#WorkflowStepInput).
\ No newline at end of file
diff --git a/cwltool/schemas/draft-3/concepts.md b/cwltool/schemas/draft-3/concepts.md
new file mode 100644
index 0000000..024735d
--- /dev/null
+++ b/cwltool/schemas/draft-3/concepts.md
@@ -0,0 +1,378 @@
+## References to Other Specifications
+
+**Javascript Object Notation (JSON)**: http://json.org
+
+**JSON Linked Data (JSON-LD)**: http://json-ld.org
+
+**YAML**: http://yaml.org
+
+**Avro**: https://avro.apache.org/docs/current/spec.html
+
+**Uniform Resource Identifier (URI) Generic Syntax**: https://tools.ietf.org/html/rfc3986)
+
+**Portable Operating System Interface (POSIX.1-2008)**: http://pubs.opengroup.org/onlinepubs/9699919799/
+
+**Resource Description Framework (RDF)**: http://www.w3.org/RDF/
+
+## Scope
+
+This document describes CWL syntax, execution, and object model. It
+is not intended to document a CWL specific implementation, however it may
+serve as a reference for the behavior of conforming implementations.
+
+## Terminology
+
+The terminology used to describe CWL documents is defined in the
+Concepts section of the specification. The terms defined in the
+following list are used in building those definitions and in describing the
+actions of an CWL implementation:
+
+**may**: Conforming CWL documents and CWL implementations are permitted but
+not required to behave as described.
+
+**must**: Conforming CWL documents and CWL implementations are required to behave
+as described; otherwise they are in error.
+
+**error**: A violation of the rules of this specification; results are
+undefined. Conforming implementations may detect and report an error and may
+recover from it.
+
+**fatal error**: A violation of the rules of this specification; results are
+undefined. Conforming implementations must not continue to execute the current
+process and may report an error.
+
+**at user option**: Conforming software may or must (depending on the modal verb in
+the sentence) behave as described; if it does, it must provide users a means to
+enable or disable the behavior described.
+
+**deprecated**: Conforming software may implement a behavior for backwards
+compatibility. Portable CWL documents should not rely on deprecated behavior.
+Behavior marked as deprecated may be removed entirely from future revisions of
+the CWL specification.
+
+# Data model
+
+## Data concepts
+
+An **object** is a data structure equivalent to the "object" type in JSON,
+consisting of a unordered set of name/value pairs (referred to here as
+**fields**) and where the name is a string and the value is a string, number,
+boolean, array, or object.
+
+A **document** is a file containing a serialized object, or an array of objects.
+
+A **process** is a basic unit of computation which accepts input data,
+performs some computation, and produces output data.
+
+An **input object** is an object describing the inputs to a invocation of process.
+
+An **output object** is an object describing the output of an invocation of a process.
+
+An **input schema** describes the valid format (required fields, data types)
+for an input object.
+
+An **output schema** describes the valid format for a output object.
+
+**Metadata** is information about workflows, tools, or input items that is
+not used directly in the computation.
+
+## Syntax
+
+CWL documents must consist of an object or array of objects represented using
+JSON or YAML syntax. Upon loading, a CWL implementation must apply the
+preprocessing steps described in the
+[Semantic Annotations for Linked Avro Data (SALAD) Specification](SchemaSalad.html).
+A implementation may formally validate the structure of a CWL document using
+SALAD schemas located at
+https://github.com/common-workflow-language/common-workflow-language/tree/master/draft-3
+
+## Identifiers
+
+If an object contains an `id` field, that is used to uniquely identify the
+object in that document. The value of the `id` field must be unique over the
+entire document. Identifiers may be resolved relative to other the document
+base and/or other identifiers following the rules are described in the
+[Schema Salad specification](SchemaSalad.html#Identifier_resolution).
+
+An implementation may choose to only honor references to object types for
+which the `id` field is explicitly listed in this specification.
+
+## Document preprocessing
+
+An implementation must resolve [$import](SchemaSalad.html#Import) and
+[$include](SchemaSalad.html#Import) directives as described in the
+[Schema Salad specification](SchemaSalad.html).
+
+## Extensions and Metadata
+
+Input metadata (for example, a lab sample identifier) may be represented within
+a tool or workflow using input parameters which are explicitly propagated to
+output. Future versions of this specification may define additional facilities
+for working with input/output metadata.
+
+Implementation extensions not required for correct execution (for example,
+fields related to GUI presentation) and metadata about the tool or workflow
+itself (for example, authorship for use in citations) may be provided as
+additional fields on any object. Such extensions fields must use a namespace
+prefix listed in the `$namespaces` section of the document as described in the
+[Schema Salad specification](SchemaSalad.html#Explicit_context).
+
+Implementation extensions which modify execution semantics must be [listed in
+the `requirements` field](#Requirements_and_hints).
+
+# Execution model
+
+## Execution concepts
+
+A **parameter** is a named symbolic input or output of process, with an
+associated datatype or schema. During execution, values are assigned to
+parameters to make the input object or output object used for concrete
+process invocation.
+
+A **command line tool** is a process characterized by the execution of a
+standalone, non-interactive program which is invoked on some input,
+produces output, and then terminates.
+
+A **workflow** is a process characterized by multiple subprocess steps,
+where step outputs are connected to the inputs of other downstream steps to
+form a directed graph, and independent steps may run concurrently.
+
+A **runtime environment** is the actual hardware and software environment when
+executing a command line tool. It includes, but is not limited to, the
+hardware architecture, hardware resources, operating system, software runtime
+(if applicable, such as the Python interpreter or the JVM), libraries, modules,
+packages, utilities, and data files required to run the tool.
+
+A **workflow platform** is a specific hardware and software implementation
+capable of interpreting CWL documents and executing the processes specified by
+the document. The responsibilities of the workflow platform may include
+scheduling process invocation, setting up the necessary runtime environment,
+making input data available, invoking the tool process, and collecting output.
+
+A workflow platform may choose to only implement the Command Line Tool
+Description part of the CWL specification.
+
+It is intended that the workflow platform has broad leeway outside of this
+specification to optimize use of computing resources and enforce policies
+not covered by this specification. Some areas that are currently out of
+scope for CWL specification but may be handled by a specific workflow
+platform include:
+
+* Data security and permissions.
+* Scheduling tool invocations on remote cluster or cloud compute nodes.
+* Using virtual machines or operating system containers to manage the runtime
+(except as described in [DockerRequirement](CommandLineTool.html#DockerRequirement)).
+* Using remote or distributed file systems to manage input and output files.
+* Transforming file paths.
+* Determining if a process has previously been executed, skipping it and
+reusing previous results.
+* Pausing, resuming or checkpointing processes or workflows.
+
+Conforming CWL processes must not assume anything about the runtime
+environment or workflow platform unless explicitly declared though the use
+of [process requirements](#Requirements_and_hints).
+
+## Generic execution process
+
+The generic execution sequence of a CWL process (including workflows and
+command line line tools) is as follows.
+
+1. Load, process and validate a CWL document, yielding a process object.
+2. Load input object.
+3. Validate the input object against the `inputs` schema for the process.
+4. Validate that process requirements are met.
+5. Perform any further setup required by the specific process type.
+6. Execute the process.
+7. Capture results of process execution into the output object.
+8. Validate the output object against the `outputs` schema for the process.
+9. Report the output object to the process caller.
+
+## Requirements and hints
+
+A **process requirement** modifies the semantics or runtime
+environment of a process. If an implementation cannot satisfy all
+requirements, or a requirement is listed which is not recognized by the
+implementation, it is a fatal error and the implementation must not attempt
+to run the process, unless overridden at user option.
+
+A **hint** is similar to a requirement, however it is not an error if an
+implementation cannot satisfy all hints. The implementation may report a
+warning if a hint cannot be satisfied.
+
+Requirements are inherited. A requirement specified in a Workflow applies
+to all workflow steps; a requirement specified on a workflow step will
+apply to the process implementation.
+
+If the same process requirement appears at different levels of the
+workflow, the most specific instance of the requirement is used, that is,
+an entry in `requirements` on a process implementation such as
+CommandLineTool will take precedence over an entry in `requirements`
+specified in a workflow step, and an entry in `requirements` on a workflow
+step takes precedence over the workflow. Entries in `hints` are resolved
+the same way.
+
+Requirements override hints. If a process implementation provides a
+process requirement in `hints` which is also provided in `requirements` by
+an enclosing workflow or workflow step, the enclosing `requirements` takes
+precedence.
+
+## Parameter references
+
+Parameter references are denoted by the syntax `$(...)` and may be used in any
+field permitting the pseudo-type `Expression`, as specified by this document.
+Conforming implementations must support parameter references. Parameter
+references use the following subset of
+[Javascript/ECMAScript 5.1](http://www.ecma-international.org/ecma-262/5.1/)
+syntax.
+
+In the following BNF grammar, character classes and grammar rules are denoted
+in '{}', '-' denotes exclusion from a character class, '(())' denotes grouping,
+'|' denotes alternates, trailing '*' denotes zero or more repeats, '+' denote
+one or more repeats, all other characters are literal values.
+
+<p>
+<table class="table">
+<tr><td>symbol:: </td><td>{Unicode alphanumeric}+</td></tr>
+<tr><td>singleq:: </td><td>[' (( {character - '} | \' ))* ']</td></tr>
+<tr><td>doubleq:: </td><td>[" (( {character - "} | \" ))* "]</td></tr>
+<tr><td>index:: </td><td>[ {decimal digit}+ ]</td></tr>
+<tr><td>segment:: </td><td>. {symbol} | {singleq} | {doubleq} | {index}</td></tr>
+<tr><td>parameter::</td><td>$( {symbol} {segment}*)</td></tr>
+</table>
+</p>
+
+Use the following algorithm to resolve a parameter reference:
+
+ 1. Match the leading symbol as key
+ 2. Look up the key in the parameter context (described below) to get the current value.
+ It is an error if the key is not found in the parameter context.
+ 3. If there are no subsequent segments, terminate and return current value
+ 4. Else, match the next segment
+ 5. Extract the symbol, string, or index from the segment as key
+ 6. Look up the key in current value and assign as new current value. If
+ the key is a symbol or string, the current value must be an object.
+ If the key is an index, the current value must be an array or string.
+ It is an error if the key does not match the required type, or the key is not found or out
+ of range.
+ 7. Repeat steps 3-6
+
+The root namespace is the parameter context. The following parameters must
+be provided:
+
+ * `inputs`: The input object to the current Process.
+ * `self`: A context-specific value. The contextual values for 'self' are
+ documented for specific fields elsewhere in this specification. If
+ a contextual value of 'self' is not documented for a field, it
+ must be 'null'.
+ * `runtime`: An object containing configuration details. Specific to the
+ process type. An implementation may provide may provide
+ opaque strings for any or all fields of `runtime`. These must be
+ filled in by the platform after processing the Tool but before actual
+ execution. Parameter references and expressions may only use the
+ literal string value of the field and must not perform computation on
+ the contents.
+
+If the value of a field has no leading or trailing non-whitespace
+characters around a parameter reference, the effective value of the field
+becomes the value of the referenced parameter, preserving the return type.
+
+If the value of a field has non-whitespace leading or trailing characters
+around an parameter reference, it is subject to string interpolation. The
+effective value of the field is a string containing the leading characters;
+followed by the string value of the parameter reference; followed by the
+trailing characters. The string value of the parameter reference is its
+textual JSON representation with the following rules:
+
+ * Leading and trailing quotes are stripped from strings
+ * Objects entries are sorted by key
+
+Multiple parameter references may appear in a single field. This case is
+must be treated as a string interpolation. After interpolating the first
+parameter reference, interpolation must be recursively applied to the
+trailing characters to yield the final string value.
+
+## Expressions
+
+An expression is a fragment of [Javascript/ECMAScript
+5.1](http://www.ecma-international.org/ecma-262/5.1/) code which is
+evaluated by the workflow platform to affect the inputs, outputs, or
+behavior of a process. In the generic execution sequence, expressions may
+be evaluated during step 5 (process setup), step 6 (execute process),
+and/or step 7 (capture output). Expressions are distinct from regular
+processes in that they are intended to modify the behavior of the workflow
+itself rather than perform the primary work of the workflow.
+
+To declare the use of expressions, the document must include the process
+requirement `InlineJavascriptRequirement`. Expressions may be used in any
+field permitting the pseudo-type `Expression`, as specified by this
+document.
+
+Expressions are denoted by the syntax `$(...)` or `${...}`. A code
+fragment wrapped in the `$(...)` syntax must be evaluated as a
+[ECMAScript expression](http://www.ecma-international.org/ecma-262/5.1/#sec-11). A
+code fragment wrapped in the `${...}` syntax must be evaluated as a
+[EMACScript function body](http://www.ecma-international.org/ecma-262/5.1/#sec-13)
+for an anonymous, zero-argument function. Expressions must return a valid JSON
+data type: one of null, string, number, boolean, array, object.
+Implementations must permit any syntactically valid Javascript and account
+for nesting of parenthesis or braces and that strings that may contain
+parenthesis or braces when scanning for expressions.
+
+The runtime must include any code defined in the ["expressionLib" field of
+InlineJavascriptRequirement](#InlineJavascriptRequirement) prior to
+executing the actual expression.
+
+Before executing the expression, the runtime must initialize as global
+variables the fields of the parameter context described above.
+
+The effective value of the field after expression evaluation follows the
+same rules as parameter references discussed above. Multiple expressions
+may appear in a single field.
+
+Expressions must be evaluated in an isolated context (a "sandbox") which
+permits no side effects to leak outside the context. Expressions also must
+be evaluated in [Javascript strict mode](http://www.ecma-international.org/ecma-262/5.1/#sec-4.2.2).
+
+The order in which expressions are evaluated is undefined except where
+otherwise noted in this document.
+
+An implementation may choose to implement parameter references by
+evaluating as a Javascript expression. The results of evaluating
+parameter references must be identical whether implemented by Javascript
+evaluation or some other means.
+
+Implementations may apply other limits, such as process isolation, timeouts,
+and operating system containers/jails to minimize the security risks associated
+with running untrusted code embedded in a CWL document.
+
+## Success and failure
+
+A completed process must result in one of `success`, `temporaryFailure` or
+`permanentFailure` states. An implementation may choose to retry a process
+execution which resulted in `temporaryFailure`. An implementation may
+choose to either continue running other steps of a workflow, or terminate
+immediately upon `permanentFailure`.
+
+* If any step of a workflow execution results in `permanentFailure`, then the
+workflow status is `permanentFailure`.
+
+* If one or more steps result in `temporaryFailure` and all other steps
+complete `success` or are not executed, then the workflow status is
+`temporaryFailure`.
+
+* If all workflow steps are executed and complete with `success`, then the workflow
+status is `success`.
+
+## Executing CWL documents as scripts
+
+By convention, a CWL document may begin with `#!/usr/bin/env cwl-runner`
+and be marked as executable (the POSIX "+x" permission bits) to enable it
+to be executed directly. A workflow platform may support this mode of
+operation; if so, it must provide `cwl-runner` as an alias for the
+platform's CWL implementation.
+
+A CWL input object document may similarly begin with `#!/usr/bin/env
+cwl-runner` and be marked as executable. In this case, the input object
+must include the field `cwl:tool` supplying a URI to the default CWL
+document that should be executed using the fields of the input object as
+input parameters.
diff --git a/cwltool/schemas/draft-3/contrib.md b/cwltool/schemas/draft-3/contrib.md
new file mode 100644
index 0000000..cce32d1
--- /dev/null
+++ b/cwltool/schemas/draft-3/contrib.md
@@ -0,0 +1,12 @@
+Authors:
+
+* Peter Amstutz <peter.amstutz at curoverse.com>, Arvados Project, Curoverse
+* Nebojša Tijanić <nebojsa.tijanic at sbgenomics.com>, Seven Bridges Genomics
+
+Contributers:
+
+* John Chilton <jmchilton at gmail.com>, Galaxy Project, Pennsylvania State University
+* Michael R. Crusoe <crusoe at ucdavis.edu>, University of California, Davis
+* Hervé Ménager <herve.menager at gmail.com>, Institut Pasteur
+* Stian Soiland-Reyes [soiland-reyes at cs.manchester.ac.uk](mailto:soiland-reyes at cs.manchester.ac.uk), University of Manchester
+* Luka Stojanovic <luka.stojanovic at sbgenomics.com>, Seven Bridges Genomics
diff --git a/cwltool/schemas/draft-3/index.yml b/cwltool/schemas/draft-3/index.yml
new file mode 100644
index 0000000..9f22f39
--- /dev/null
+++ b/cwltool/schemas/draft-3/index.yml
@@ -0,0 +1,6 @@
+# Common Workflow Language draft-3 specifications
+
+CWL draft-3 specification consists of the following documents:
+
+* Command Line Tool description specification
+* Workflow description specification
diff --git a/cwltool/schemas/draft-3/intro.md b/cwltool/schemas/draft-3/intro.md
new file mode 100644
index 0000000..5ab669a
--- /dev/null
+++ b/cwltool/schemas/draft-3/intro.md
@@ -0,0 +1,21 @@
+# Status of This Document
+
+This document is the product of the [Common Workflow Language working
+group](https://groups.google.com/forum/#!forum/common-workflow-language). The
+latest version of this document is available in the "draft-3" directory at
+
+https://github.com/common-workflow-language/common-workflow-language
+
+The products of the CWL working group (including this document) are made available
+under the terms of the Apache License, version 2.0.
+
+<!--ToC-->
+
+# Introduction
+
+The Common Workflow Language (CWL) working group is an informal, multi-vendor
+working group consisting of various organizations and individuals that have an
+interest in portability of data analysis workflows. The goal is to create
+specifications like this one that enable data scientists to describe analysis
+tools and workflows that are powerful, easy to use, portable, and support
+reproducibility.
diff --git a/cwltool/schemas/draft-3/invocation.md b/cwltool/schemas/draft-3/invocation.md
new file mode 100644
index 0000000..1342711
--- /dev/null
+++ b/cwltool/schemas/draft-3/invocation.md
@@ -0,0 +1,145 @@
+# Running a Command
+
+To accommodate the enormous variety in syntax and semantics for input, runtime
+environment, invocation, and output of arbitrary programs, a CommandLineTool
+defines an "input binding" that describes how to translate abstract input
+parameters to an concrete program invocation, and an "output binding" that
+describes how to generate output parameters from program output.
+
+## Input binding
+
+The tool command line is built by applying command line bindings to the
+input object. Bindings are listed either as part of an [input
+parameter](#CommandInputParameter) using the `inputBinding` field, or
+separately using the `arguments` field of the CommandLineTool.
+
+The algorithm to build the command line is as follows. In this algorithm,
+the sort key is a list consisting of one or more numeric or string
+elements. Strings are sorted lexicographically based on UTF-8 encoding.
+
+ 1. Collect `CommandLineBinding` objects from `arguments`. Assign a sorting
+ key `[position, i]` where `position` is
+ [`CommandLineBinding.position`](#CommandLineBinding) and `i`
+ is the index in the `arguments` list.
+
+ 2. Collect `CommandLineBinding` objects from the `inputs` schema and
+ associate them with values from the input object. Where the input type
+ is a record, array, or map, recursively walk the schema and input object,
+ collecting nested `CommandLineBinding` objects and associating them with
+ values from the input object.
+
+ 3. Create a sorting key by taking the value of the `position` field at
+ each level leading to each leaf binding object. If `position` is not
+ specified, it is not added to the sorting key. For bindings on arrays
+ and maps, the sorting key must include the array index or map key
+ following the position. If and only if two bindings have the same sort
+ key, the tie must be broken using the ordering of the field or parameter
+ name immediately containing the leaf binding.
+
+ 4. Sort elements using the assigned sorting keys. Numeric entries sort
+ before strings.
+
+ 5. In the sorted order, apply the rules defined in
+ [`CommandLineBinding`](#CommandLineBinding) to convert bindings to actual
+ command line elements.
+
+ 6. Insert elements from `baseCommand` at the beginning of the command
+ line.
+
+## Runtime environment
+
+All files listed in the input object must be made available in the runtime
+environment. The implementation may use a shared or distributed file
+system or transfer files via explicit download. Implementations may choose
+not to provide access to files not explicitly specified in the input object
+or process requirements.
+
+Output files produced by tool execution must be written to the **designated
+output directory**. The initial current working directory when executing
+the tool must be the designated output directory.
+
+Files may also be written to the **designated temporary directory**. This
+directory must be isolated and not shared with other processes. Any files
+written to the designated temporary directory may be automatically deleted by
+the workflow platform immediately after the tool terminates.
+
+For compatibility, files may be written to the **system temporary directory**
+which must be located at `/tmp`. Because the system temporary directory may be
+shared with other processes on the system, files placed in the system temporary
+directory are not guaranteed to be deleted automatically. Correct tools must
+clean up temporary files written to the system temporary directory. A tool
+must not use the system temporary directory as a backchannel communication with
+other tools. It is valid for the system temporary directory to be the same as
+the designated temporary directory.
+
+When executing the tool, the tool must execute in a new, empty environment
+with only the environment variables described below; the child process must
+not inherit environment variables from the parent process except as
+specified or at user option.
+
+ * `HOME` must be set to the designated output directory.
+ * `TMPDIR` must be set to the designated temporary directory.
+ when the tool invocation and output collection is complete.
+ * `PATH` may be inherited from the parent process, except when run in a
+ container that provides its own `PATH`.
+ * Variables defined by [EnvVarRequirement](#EnvVarRequirement)
+ * The default environment of the container, such as when using
+ [DockerRequirement](#DockerRequirement)
+
+An implementation may forbid the tool from writing to any location in the
+runtime environment file system other than the designated temporary directory,
+system temporary directory, and designated output directory. An implementation
+may provide read-only input files, and disallow in-place update of input files.
+The designated temporary directory and designated output directory may reside
+on different mount points on different file systems.
+
+The `runtime` section available in [parameter references](#Parameter_references)
+and [expressions](#Expressions) contains the following fields. As noted
+earlier, an implementation may perform deferred resolution of runtime fields by providing
+opaque strings for any or all of the following fields; parameter references
+and expressions may only use the literal string value of the field and must
+not perform computation on the contents.
+
+ * `runtime.outdir`: an absolute path to the designated output directory
+ * `runtime.tmpdir`: an absolute path to the designated temporary directory
+ * `runtime.cores`: number of CPU cores reserved for the tool process
+ * `runtime.ram`: amount of RAM in mebibytes (2**20) reserved for the tool process
+ * `runtime.outdirSize`: reserved storage space available in the designated output directory
+ * `runtime.tmpdirSize`: reserved storage space available in the designated temporary directory
+
+See [ResourceRequirement](#ResourceRequirement) for details on how to
+describe the hardware resources required by a tool.
+
+The standard input stream and standard output stream may be redirected as
+described in the `stdin` and `stdout` fields.
+
+## Execution
+
+Once the command line is built and the runtime environment is created, the
+actual tool is executed.
+
+The standard error stream and standard output stream (unless redirected by
+setting `stdout`) may be captured by platform logging facilities for
+storage and reporting.
+
+Tools may be multithreaded or spawn child processes; however, when the
+parent process exits, the tool is considered finished regardless of whether
+any detached child processes are still running. Tools must not require any
+kind of console, GUI, or web based user interaction in order to start and
+run to completion.
+
+The exit code of the process indicates if the process completed
+successfully. By convention, an exit code of zero is treated as success
+and non-zero exit codes are treated as failure. This may be customized by
+providing the fields `successCodes`, `temporaryFailCodes`, and
+`permanentFailCodes`. An implementation may choose to default unspecified
+non-zero exit codes to either `temporaryFailure` or `permanentFailure`.
+
+## Output binding
+
+If the output directory contains a file named "cwl.output.json", that file
+must be loaded and used as the output object. Otherwise, the output object
+must be generated by walking the parameters listed in `outputs` and
+applying output bindings to the tool output. Output bindings are
+associated with output parameters using the `outputBinding` field. See
+[`CommandOutputBinding`](#CommandOutputBinding) for details.
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name.yml
new file mode 100644
index 0000000..44e95a2
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name.yml
@@ -0,0 +1,46 @@
+- |
+ ## Field name resolution
+
+ The document schema declares the vocabulary of known field names. During
+ preprocessing traversal, field name in the document which are not part of
+ the schema vocabulary must be resolved to absolute URIs. Under "strict"
+ validation, it is an error for a document to include fields which are not
+ part of the vocabulary and not resolvable to absolute URIs. Fields names
+ which are not part of the vocabulary are resolved using the following
+ rules:
+
+ * If an field name URI begins with a namespace prefix declared in the
+ document context (`@context`) followed by a colon `:`, the prefix and
+ colon must be replaced by the namespace declared in `@context`.
+
+ * If there is a vocabulary term which maps to the URI of a resolved
+ field, the field name must be replace with the vocabulary term.
+
+ * If a field name URI is an absolute URI consisting of a scheme and path
+ and is not part of the vocabulary, no processing occurs.
+
+ Field name resolution is not relative. It must not be affected by the
+ base URI.
+
+ ### Field name resolution example
+
+ Given the following schema:
+
+ ```
+- $include: field_name_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: field_name_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: field_name_proc.yml
+- |
+ ```
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_proc.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_proc.yml
new file mode 100644
index 0000000..a53ef4b
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_proc.yml
@@ -0,0 +1,8 @@
+ {
+ "base": "one",
+ "form": {
+ "base": "two",
+ "http://example.com/three": "three",
+ },
+ "http://example.com/acid#four": "four"
+ }
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_schema.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_schema.yml
new file mode 100644
index 0000000..5089c4b
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_schema.yml
@@ -0,0 +1,14 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "base",
+ "type": "string",
+ "jsonldPredicate": "http://example.com/base"
+ }]
+ }]
+}
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_src.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_src.yml
new file mode 100644
index 0000000..1ed79b9
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/field_name_src.yml
@@ -0,0 +1,8 @@
+ {
+ "base": "one",
+ "form": {
+ "http://example.com/base": "two",
+ "http://example.com/three": "three",
+ },
+ "acid:four": "four"
+ }
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res.yml
new file mode 100644
index 0000000..45f4efb
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res.yml
@@ -0,0 +1,53 @@
+- |
+ ## Identifier resolution
+
+ The schema may designate one or more fields as identifier fields to identify
+ specific objects. Processing must resolve relative identifiers to absolute
+ identifiers using the following rules:
+
+ * If an identifier URI is prefixed with `#` it is a URI relative
+ fragment identifier. It is resolved relative to the base URI by setting
+ or replacing the fragment portion of the base URI.
+
+ * If an identifier URI does not contain a scheme and is not prefixed `#` it
+ is a parent relative fragment identifier. It is resolved relative to the
+ base URI by the following rule: if the base URI does not contain a
+ document fragment, set the fragment portion of the base URI. If the base
+ URI does contain a document fragment, append a slash `/` followed by the
+ identifier field to the fragment portion of the base URI.
+
+ * If an identifier URI begins with a namespace prefix declared in
+ `$namespaces` followed by a colon `:`, the prefix and colon must be
+ replaced by the namespace declared in `$namespaces`.
+
+ * If an identifier URI is an absolute URI consisting of a scheme and path,
+ no processing occurs.
+
+ When preprocessing visits a node containing an identifier, that identifier
+ must be used as the base URI to process child nodes.
+
+ It is an error for more than one object in a document to have the same
+ absolute URI.
+
+ ### Identifier resolution example
+
+ Given the following schema:
+
+ ```
+- $include: ident_res_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: ident_res_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: ident_res_proc.yml
+- |
+ ```
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_proc.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_proc.yml
new file mode 100644
index 0000000..24d3ea8
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_proc.yml
@@ -0,0 +1,20 @@
+{
+ "id": "http://example.com/base",
+ "form": {
+ "id": "http://example.com/base#one",
+ "things": [
+ {
+ "id": "http://example.com/base#one/two"
+ },
+ {
+ "id": "http://example.com/base#three"
+ },
+ {
+ "id": "http://example.com/four#five",
+ },
+ {
+ "id": "http://example.com/acid#six",
+ }
+ ]
+ }
+}
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_schema.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_schema.yml
new file mode 100644
index 0000000..8a7bb04
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_schema.yml
@@ -0,0 +1,14 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "id",
+ "type": "string",
+ "jsonldPredicate": "@id"
+ }]
+ }]
+}
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_src.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_src.yml
new file mode 100644
index 0000000..bbbd96e
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/ident_res_src.yml
@@ -0,0 +1,20 @@
+ {
+ "id": "http://example.com/base",
+ "form": {
+ "id": "one",
+ "things": [
+ {
+ "id": "two"
+ },
+ {
+ "id": "#three",
+ },
+ {
+ "id": "four#five",
+ },
+ {
+ "id": "acid:six",
+ }
+ ]
+ }
+ }
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/import_include.md b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/import_include.md
new file mode 100644
index 0000000..0ad06bf
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/import_include.md
@@ -0,0 +1,112 @@
+## Import
+
+During preprocessing traversal, an implementation must resolve `$import`
+directives. An `$import` directive is an object consisting of exactly one
+field `$import` specifying resource by URI string. It is an error if there
+are additional fields in the `$import` object, such additional fields must
+be ignored.
+
+The URI string must be resolved to an absolute URI using the link
+resolution rules described previously. Implementations must support
+loading from `file`, `http` and `https` resources. The URI referenced by
+`$import` must be loaded and recursively preprocessed as a Salad document.
+The external imported document does not inherit the context of the
+importing document, and the default base URI for processing the imported
+document must be the URI used to retrieve the imported document. If the
+`$import` URI includes a document fragment, the fragment must be excluded
+from the base URI used to preprocess the imported document.
+
+Once loaded and processed, the `$import` node is replaced in the document
+structure by the object or array yielded from the import operation.
+
+URIs may reference document fragments which refer to specific an object in
+the target document. This indicates that the `$import` node must be
+replaced by only the object with the appropriate fragment identifier.
+
+It is a fatal error if an import directive refers to an external resource
+or resource fragment which does not exist or is not accessible.
+
+### Import example
+
+import.yml:
+```
+{
+ "hello": "world"
+}
+
+```
+
+parent.yml:
+```
+{
+ "form": {
+ "bar": {
+ "$import": "import.yml"
+ }
+ }
+}
+
+```
+
+This becomes:
+
+```
+{
+ "form": {
+ "bar": {
+ "hello": "world"
+ }
+ }
+}
+```
+
+## Include
+
+During preprocessing traversal, an implementation must resolve `$include`
+directives. An `$include` directive is an object consisting of exactly one
+field `$include` specifying a URI string. It is an error if there are
+additional fields in the `$include` object, such additional fields must be
+ignored.
+
+The URI string must be resolved to an absolute URI using the link
+resolution rules described previously. The URI referenced by `$include` must
+be loaded as a text data. Implementations must support loading from
+`file`, `http` and `https` resources. Implementations may transcode the
+character encoding of the text data to match that of the parent document,
+but must not interpret or parse the text document in any other way.
+
+Once loaded, the `$include` node is replaced in the document structure by a
+string containing the text data loaded from the resource.
+
+It is a fatal error if an import directive refers to an external resource
+which does not exist or is not accessible.
+
+### Include example
+
+parent.yml:
+```
+{
+ "form": {
+ "bar": {
+ "$include": "include.txt"
+ }
+ }
+}
+
+```
+
+include.txt:
+```
+hello world
+
+```
+
+This becomes:
+
+```
+{
+ "form": {
+ "bar": "hello world"
+ }
+}
+```
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res.yml
new file mode 100644
index 0000000..9346f8a
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res.yml
@@ -0,0 +1,55 @@
+- |
+ ## Link resolution
+
+ The schema may designate one or more fields as link fields reference other
+ objects. Processing must resolve links to either absolute URIs using the
+ following rules:
+
+ * If a reference URI is prefixed with `#` it is a relative
+ fragment identifier. It is resolved relative to the base URI by setting
+ or replacing the fragment portion of the base URI.
+
+ * If a reference URI does not contain a scheme and is not prefixed with `#`
+ it is a path relative reference. If the reference URI contains `#` in any
+ position other than the first character, the reference URI must be divided
+ into a path portion and a fragment portion split on the first instance of
+ `#`. The path portion is resolved relative to the base URI by the following
+ rule: if the path portion of the base URI ends in a slash `/`, append the
+ path portion of the reference URI to the path portion of the base URI. If
+ the path portion of the base URI does not end in a slash, replace the final
+ path segment with the path portion of the reference URI. Replace the
+ fragment portion of the base URI with the fragment portion of the reference
+ URI.
+
+ * If a reference URI begins with a namespace prefix declared in `$namespaces`
+ followed by a colon `:`, the prefix and colon must be replaced by the
+ namespace declared in `$namespaces`.
+
+ * If a reference URI is an absolute URI consisting of a scheme and path,
+ no processing occurs.
+
+ Link resolution must not affect the base URI used to resolve identifiers
+ and other links.
+
+ ### Link resolution example
+
+ Given the following schema:
+
+ ```
+- $include: link_res_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: link_res_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: link_res_proc.yml
+- |
+ ```
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_proc.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_proc.yml
new file mode 100644
index 0000000..03e539d
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_proc.yml
@@ -0,0 +1,21 @@
+{
+ "$base": "http://example.com/base",
+ "link": "http://example.com/base/zero",
+ "form": {
+ "link": "http://example.com/one",
+ "things": [
+ {
+ "link": "http://example.com/two"
+ },
+ {
+ "link": "http://example.com/base#three"
+ },
+ {
+ "link": "http://example.com/four#five",
+ },
+ {
+ "link": "http://example.com/acid#six",
+ }
+ ]
+ }
+}
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_schema.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_schema.yml
new file mode 100644
index 0000000..76420d3
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_schema.yml
@@ -0,0 +1,16 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "link",
+ "type": "string",
+ "jsonldPredicate": {
+ "_type": "@id"
+ }
+ }]
+ }]
+}
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_src.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_src.yml
new file mode 100644
index 0000000..23f7a29
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/link_res_src.yml
@@ -0,0 +1,21 @@
+{
+ "$base": "http://example.com/base",
+ "link": "http://example.com/base/zero",
+ "form": {
+ "link": "one",
+ "things": [
+ {
+ "link": "two"
+ },
+ {
+ "link": "#three",
+ },
+ {
+ "link": "four#five",
+ },
+ {
+ "link": "acid:six",
+ }
+ ]
+ }
+}
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/metaschema.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/metaschema.yml
new file mode 100644
index 0000000..6e90775
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/metaschema.yml
@@ -0,0 +1,437 @@
+$base: "https://w3id.org/cwl/salad#"
+
+$namespaces:
+ sld: "https://w3id.org/cwl/salad#"
+ dct: "http://purl.org/dc/terms/"
+ rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ rdfs: "http://www.w3.org/2000/01/rdf-schema#"
+ xsd: "http://www.w3.org/2001/XMLSchema#"
+
+$graph:
+
+- name: "Semantic_Annotations_for_Linked_Avro_Data"
+ type: documentation
+ doc:
+ - $include: salad.md
+ - $import: field_name.yml
+ - $import: ident_res.yml
+ - $import: link_res.yml
+ - $import: vocab_res.yml
+ - $include: import_include.md
+
+- name: "Link_Validation"
+ type: documentation
+ doc: |
+ # Link validation
+
+ Once a document has been preprocessed, an implementation may validate
+ links. The link validation traversal may visit fields which the schema
+ designates as link fields and check that each URI references an existing
+ object in the current document, an imported document, file system, or
+ network resource. Failure to validate links may be a fatal error. Link
+ validation behavior for individual fields may be modified by `identity` and
+ `noLinkCheck` in the `jsonldPredicate` section of the field schema.
+
+
+- name: "Schema_validation"
+ type: documentation
+ doc: ""
+
+
+# - name: "JSON_LD_Context"
+# type: documentation
+# doc: |
+# # Generating JSON-LD Context
+
+# How to generate the json-ld context...
+
+
+- name: PrimitiveType
+ type: enum
+ symbols:
+ - "sld:null"
+ - "xsd:boolean"
+ - "xsd:int"
+ - "xsd:long"
+ - "xsd:float"
+ - "xsd:double"
+ - "xsd:string"
+ doc:
+ - |
+ Salad data types are based on Avro schema declarations. Refer to the
+ [Avro schema declaration documentation](https://avro.apache.org/docs/current/spec.html#schemas) for
+ detailed information.
+ - "null: no value"
+ - "boolean: a binary value"
+ - "int: 32-bit signed integer"
+ - "long: 64-bit signed integer"
+ - "float: single precision (32-bit) IEEE 754 floating-point number"
+ - "double: double precision (64-bit) IEEE 754 floating-point number"
+ - "string: Unicode character sequence"
+
+
+- name: "Any"
+ type: enum
+ symbols: ["#Any"]
+ doc: |
+ The **Any** type validates for any non-null value.
+
+
+- name: JsonldPredicate
+ type: record
+ doc: |
+ Attached to a record field to define how the parent record field is handled for
+ URI resolution and JSON-LD context generation.
+ fields:
+ - name: _id
+ type: ["null", string]
+ jsonldPredicate:
+ _id: sld:_id
+ _type: "@id"
+ identity: true
+ doc: |
+ The predicate URI that this field corresponds to.
+ Corresponds to JSON-LD `@id` directive.
+ - name: _type
+ type: ["null", string]
+ doc: |
+ The context type hint, corresponds to JSON-LD `@type` directive.
+
+ * If the value of this field is `@id` and `identity` is false or
+ unspecified, the parent field must be resolved using the link
+ resolution rules. If `identity` is true, the parent field must be
+ resolved using the identifier expansion rules.
+
+ * If the value of this field is `@vocab`, the parent field must be
+ resolved using the vocabulary resolution rules.
+
+ - name: _container
+ type: ["null", string]
+ doc: |
+ Structure hint, corresponds to JSON-LD `@container` directive.
+ - name: identity
+ type: ["null", boolean]
+ doc: |
+ If true and `_type` is `@id` this indicates that the parent field must
+ be resolved according to identity resolution rules instead of link
+ resolution rules. In addition, the field value is considered an
+ assertion that the linked value exists; absence of an object in the loaded document
+ with the URI is not an error.
+ - name: noLinkCheck
+ type: ["null", boolean]
+ doc: |
+ If true, this indicates that link validation traversal must stop at
+ this field. This field (it is is a URI) or any fields under it (if it
+ is an object or array) are not subject to link checking.
+
+
+- name: SpecializeDef
+ type: record
+ fields:
+ - name: specializeFrom
+ type: string
+ doc: "The data type to be replaced"
+ jsonldPredicate:
+ _id: "sld:specializeFrom"
+ _type: "@id"
+
+ - name: specializeTo
+ type: string
+ doc: "The new data type to replace with"
+ jsonldPredicate:
+ _id: "sld:specializeTo"
+ _type: "@id"
+
+
+- name: NamedType
+ type: record
+ abstract: true
+ fields:
+ - name: name
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The identifier for this type"
+
+
+- name: DocType
+ type: record
+ abstract: true
+ fields:
+ - name: doc
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: "A documentation string for this type, or an array of strings which should be concatenated."
+ jsonldPredicate: "sld:doc"
+
+ - name: docParent
+ type: ["null", string]
+ doc: |
+ Hint to indicate that during documentation generation, documentation
+ for this type should appear in a subsection under `docParent`.
+ jsonldPredicate:
+ _id: "sld:docParent"
+ _type: "@id"
+
+ - name: docChild
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: |
+ Hint to indicate that during documentation generation, documentation
+ for `docChild` should appear in a subsection under this type.
+ jsonldPredicate:
+ _id: "sld:docChild"
+ _type: "@id"
+
+ - name: docAfter
+ type: ["null", string]
+ doc: |
+ Hint to indicate that during documentation generation, documentation
+ for this type should appear after the `docAfter` section at the same
+ level.
+ jsonldPredicate:
+ _id: "sld:docAfter"
+ _type: "@id"
+
+
+- name: SchemaDefinedType
+ type: record
+ extends: "#DocType"
+ doc: |
+ Abstract base for schema-defined types.
+ abstract: true
+ fields:
+ - name: jsonldPredicate
+ type:
+ - "null"
+ - string
+ - "#JsonldPredicate"
+ doc: |
+ Annotate this type with linked data context.
+ jsonldPredicate: "sld:jsonldPredicate"
+
+ - name: documentRoot
+ type: ["null", boolean]
+ doc: |
+ If true, indicates that the type is a valid at the document root. At
+ least one type in a schema must be tagged with `documentRoot: true`.
+
+
+- name: RecordField
+ type: record
+ doc: "A field of a record."
+ fields:
+ - name: name
+ type: string
+ jsonldPredicate: "@id"
+ doc: |
+ The name of the field
+
+ - name: doc
+ type: ["null", string]
+ doc: |
+ A documentation string for this field
+ jsonldPredicate: "sld:doc"
+
+ - name: type
+ type:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ - type: array
+ items:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+ doc: |
+ The field type
+
+
+- name: SaladRecordField
+ type: record
+ extends: "#RecordField"
+ doc: "A field of a record."
+ fields:
+ - name: jsonldPredicate
+ type:
+ - "null"
+ - string
+ - "#JsonldPredicate"
+ doc: |
+ Annotate this type with linked data context.
+ jsonldPredicate: "sld:jsonldPredicate"
+
+- name: RecordSchema
+ type: record
+ fields:
+ - name: type
+ doc: "Must be `record`"
+ type:
+ name: Record_symbol
+ type: enum
+ symbols:
+ - "sld:record"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+
+ - name: "fields"
+ type:
+ - "null"
+ - type: "array"
+ items: "#RecordField"
+
+ jsonldPredicate: "sld:fields"
+ doc: "Defines the fields of the record."
+
+
+- name: SaladRecordSchema
+ type: record
+ extends: ["#NamedType", "#RecordSchema", "#SchemaDefinedType"]
+ documentRoot: true
+ specialize:
+ specializeFrom: "#RecordField"
+ specializeTo: "#SaladRecordField"
+ fields:
+ - name: abstract
+ type: ["null", boolean]
+ doc: |
+ If true, this record is abstract and may be used as a base for other
+ records, but is not valid on its own.
+
+ - name: extends
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ _id: "sld:extends"
+ _type: "@id"
+ doc: |
+ Indicates that this record inherits fields from one or more base records.
+
+ - name: specialize
+ type:
+ - "null"
+ - "#SpecializeDef"
+ - type: array
+ items: "#SpecializeDef"
+ doc: |
+ Only applies if `extends` is declared. Apply type specialization using the
+ base record as a template. For each field inherited from the base
+ record, replace any instance of the type `specializeFrom` with
+ `specializeTo`.
+
+
+- name: EnumSchema
+ type: record
+ doc: |
+ Define an enumerated type.
+ fields:
+ - name: type
+ doc: "Must be `enum`"
+ type:
+ name: Enum_symbol
+ type: enum
+ symbols:
+ - "sld:enum"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+
+ - name: "symbols"
+ type:
+ - type: "array"
+ items: "string"
+ jsonldPredicate:
+ _id: "sld:symbols"
+ _type: "@id"
+ identity: true
+ doc: "Defines the set of valid symbols."
+
+
+- name: SaladEnumSchema
+ type: record
+ extends: ["#EnumSchema", "#SchemaDefinedType"]
+ documentRoot: true
+ doc: |
+ Define an enumerated type.
+ fields:
+ - name: extends
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ _id: "sld:extends"
+ _type: "@id"
+ doc: |
+ Indicates that this enum inherits symbols from a base enum.
+
+
+- name: ArraySchema
+ type: record
+ fields:
+ - name: type
+ doc: "Must be `array`"
+ type:
+ name: Array_symbol
+ type: enum
+ symbols:
+ - "sld:array"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+
+ - name: items
+ type:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ - type: array
+ items:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ jsonldPredicate:
+ _id: "sld:items"
+ _type: "@vocab"
+ doc: "Defines the type of the array elements."
+
+
+- name: Documentation
+ type: record
+ extends: ["#NamedType", "#DocType"]
+ documentRoot: true
+ doc: |
+ A documentation section. This type exists to facilitate self-documenting
+ schemas but has no role in formal validation.
+ fields:
+ - name: type
+ doc: "Must be `documentation`"
+ type:
+ name: Documentation_symbol
+ type: enum
+ symbols:
+ - "sld:documentation"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/salad.md b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/salad.md
new file mode 100644
index 0000000..6dd3e6a
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/salad.md
@@ -0,0 +1,256 @@
+# Semantic Annotations for Linked Avro Data (SALAD)
+
+Author:
+
+* Peter Amstutz <peter.amstutz at curoverse.com>, Curoverse
+
+Contributors:
+
+* The developers of Apache Avro
+* The developers of JSON-LD
+* Nebojša Tijanić <nebojsa.tijanic at sbgenomics.com>, Seven Bridges Genomics
+
+# Abstract
+
+Salad is a schema language for describing structured linked data documents
+in JSON or YAML documents. A Salad schema provides rules for
+preprocessing, structural validation, and link checking for documents
+described by a Salad schema. Salad builds on JSON-LD and the Apache Avro
+data serialization system, and extends Avro with features for rich data
+modeling such as inheritance, template specialization, object identifiers,
+and object references. Salad was developed to provide a bridge between the
+record oriented data modeling supported by Apache Avro and the Semantic
+Web.
+
+# Status of This Document
+
+This document is the product of the [Common Workflow Language working
+group](https://groups.google.com/forum/#!forum/common-workflow-language). The
+latest version of this document is available in the "schema_salad" directory at
+
+https://github.com/common-workflow-language/schema_salad
+
+The products of the CWL working group (including this document) are made available
+under the terms of the Apache License, version 2.0.
+
+<!--ToC-->
+
+# Introduction
+
+The JSON data model is an extremely popular way to represent structured
+data. It is attractive because of it's relative simplicity and is a
+natural fit with the standard types of many programming languages.
+However, this simplicity means that basic JSON lacks expressive features
+useful for working with complex data structures and document formats, such
+as schemas, object references, and namespaces.
+
+JSON-LD is a W3C standard providing a way to describe how to interpret a
+JSON document as Linked Data by means of a "context". JSON-LD provides a
+powerful solution for representing object references and namespaces in JSON
+based on standard web URIs, but is not itself a schema language. Without a
+schema providing a well defined structure, it is difficult to process an
+arbitrary JSON-LD document as idiomatic JSON because there are many ways to
+express the same data that are logically equivalent but structurally
+distinct.
+
+Several schema languages exist for describing and validating JSON data,
+such as the Apache Avro data serialization system, however none understand
+linked data. As a result, to fully take advantage of JSON-LD to build the
+next generation of linked data applications, one must maintain separate
+JSON schema, JSON-LD context, RDF schema, and human documentation, despite
+significant overlap of content and obvious need for these documents to stay
+synchronized.
+
+Schema Salad is designed to address this gap. It provides a schema
+language and processing rules for describing structured JSON content
+permitting URI resolution and strict document validation. The schema
+language supports linked data through annotations that describe the linked
+data interpretation of the content, enables generation of JSON-LD context
+and RDF schema, and production of RDF triples by applying the JSON-LD
+context. The schema language also provides for robust support of inline
+documentation.
+
+## Introduction to draft 1
+
+This is the first version of Schema Salad. It is developed concurrently
+with draft 3 of the Common Workflow Language for use in specifying the
+Common Workflow Language, however Schema Salad is intended to be useful to
+a broader audience.
+
+## References to Other Specifications
+
+**Javascript Object Notation (JSON)**: http://json.org
+
+**JSON Linked Data (JSON-LD)**: http://json-ld.org
+
+**YAML**: http://yaml.org
+
+**Avro**: https://avro.apache.org/docs/current/spec.html
+
+**Uniform Resource Identifier (URI) Generic Syntax**: https://tools.ietf.org/html/rfc3986)
+
+**Resource Description Framework (RDF)**: http://www.w3.org/RDF/
+
+**UTF-8**: https://www.ietf.org/rfc/rfc2279.txt)
+
+## Scope
+
+This document describes the syntax, data model, algorithms, and schema
+language for working with Salad documents. It is not intended to document
+a specific implementation of Salad, however it may serve as a reference for
+the behavior of conforming implementations.
+
+## Terminology
+
+The terminology used to describe Salad documents is defined in the Concepts
+section of the specification. The terms defined in the following list are
+used in building those definitions and in describing the actions of an
+Salad implementation:
+
+**may**: Conforming Salad documents and Salad implementations are permitted but
+not required to be interpreted as described.
+
+**must**: Conforming Salad documents and Salad implementations are required
+to be interpreted as described; otherwise they are in error.
+
+**error**: A violation of the rules of this specification; results are
+undefined. Conforming implementations may detect and report an error and may
+recover from it.
+
+**fatal error**: A violation of the rules of this specification; results
+are undefined. Conforming implementations must not continue to process the
+document and may report an error.
+
+**at user option**: Conforming software may or must (depending on the modal verb in
+the sentence) behave as described; if it does, it must provide users a means to
+enable or disable the behavior described.
+
+# Document model
+
+## Data concepts
+
+An **object** is a data structure equivalent to the "object" type in JSON,
+consisting of a unordered set of name/value pairs (referred to here as
+**fields**) and where the name is a string and the value is a string, number,
+boolean, array, or object.
+
+A **document** is a file containing a serialized object, or an array of
+objects.
+
+A **document type** is a class of files that share a common structure and
+semantics.
+
+A **document schema** is a formal description of the grammar of a document type.
+
+A **base URI** is a context-dependent URI used to resolve relative references.
+
+An **identifier** is a URI that designates a single document or single
+object within a document.
+
+A **vocabulary** is the set of symbolic field names and enumerated symbols defined
+by a document schema, where each term maps to absolute URI.
+
+## Syntax
+
+Conforming Salad documents are serialized and loaded using YAML syntax and
+UTF-8 text encoding. Salad documents are written using the JSON-compatible
+subset of YAML. Features of YAML such as headers and type tags that are
+not found in the standard JSON data model must not be used in conforming
+Salad documents. It is a fatal error if the document is not valid YAML.
+
+A Salad document must consist only of either a single root object or an
+array of objects.
+
+## Document context
+
+### Implied context
+
+The implicit context consists of the vocabulary defined by the schema and
+the base URI. By default, the base URI must be the URI that was used to
+load the document. It may be overridden by an explicit context.
+
+### Explicit context
+
+If a document consists of a root object, this object may contain the
+fields `$base`, `$namespaces`, `$schemas`, and `$graph`:
+
+ * `$base`: Must be a string. Set the base URI for the document used to
+ resolve relative references.
+
+ * `$namespaces`: Must be an object with strings as values. The keys of
+ the object are namespace prefixes used in the document; the values of
+ the object are the prefix expansions.
+
+ * `$schemas`: Must be an array of strings. This field may list URI
+ references to documents in RDF-XML format which will be queried for RDF
+ schema data. The subjects and predicates described by the RDF schema
+ may provide additional semantic context for the document, and may be
+ used for validation of prefixed extension fields found in the document.
+
+Other directives beginning with `$` must be ignored.
+
+## Document graph
+
+If a document consists of a single root object, this object may contain the
+field `$graph`. This field must be an array of objects. If present, this
+field holds the primary content of the document. A document that consists
+of array of objects at the root is an implicit graph.
+
+## Document metadata
+
+If a document consists of a single root object, metadata about the
+document, such as authorship, may be declared in the root object.
+
+## Document schema
+
+Document preprocessing, link validation and schema validation require a
+document schema. A schema may consist of:
+
+ * At least one record definition object which defines valid fields that
+ make up a record type. Record field definitions include the valid types
+ that may be assigned to each field and annotations to indicate fields
+ that represent identifiers and links, described below in "Semantic
+ Annotations".
+
+ * Any number of enumerated type objects which define a set of finite set of symbols that are
+ valid value of the type.
+
+ * Any number of documentation objects which allow in-line documentation of the schema.
+
+The schema for defining a salad schema (the metaschema) is described in
+detail in "Schema validation".
+
+### Record field annotations
+
+In a document schema, record field definitions may include the field
+`jsonldPredicate`, which may be either a string or object. Implementations
+must use the following document preprocessing of fields by the following
+rules:
+
+ * If the value of `jsonldPredicate` is `@id`, the field is an identifier
+ field.
+
+ * If the value of `jsonldPredicate` is an object, and contains that
+ object contains the field `_type` with the value `@id`, the field is a
+ link field.
+
+ * If the value of `jsonldPredicate` is an object, and contains that
+ object contains the field `_type` with the value `@vocab`, the field is a
+ vocabulary field, which is a subtype of link field.
+
+## Document traversal
+
+To perform document document preprocessing, link validation and schema
+validation, the document must be traversed starting from the fields or
+array items of the root object or array and recursively visiting each child
+item which contains an object or arrays.
+
+# Document preprocessing
+
+After processing the explicit context (if any), document preprocessing
+begins. Starting from the document root, object fields values or array
+items which contain objects or arrays are recursively traversed
+depth-first. For each visited object, field names, identifier fields, link
+fields, vocabulary fields, and `$import` and `$include` directives must be
+processed as described in this section. The order of traversal of child
+nodes within a parent node is undefined.
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res.yml
new file mode 100644
index 0000000..4555f5b
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res.yml
@@ -0,0 +1,35 @@
+- |
+ ## Vocabulary resolution
+
+ The schema may designate one or more vocabulary fields which use terms
+ defined in the vocabulary. Processing must resolve vocabulary fields to
+ either vocabulary terms or absolute URIs by first applying the link
+ resolution rules defined above, then applying the following additional
+ rule:
+
+ * If a reference URI is a vocabulary field, and there is a vocabulary
+ term which maps to the resolved URI, the reference must be replace with
+ the vocabulary term.
+
+ ### Vocabulary resolution example
+
+ Given the following schema:
+
+ ```
+- $include: vocab_res_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: vocab_res_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: vocab_res_proc.yml
+- |
+ ```
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_proc.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_proc.yml
new file mode 100644
index 0000000..d13ab15
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_proc.yml
@@ -0,0 +1,15 @@
+ {
+ "form": {
+ "things": [
+ {
+ "voc": "red",
+ },
+ {
+ "voc": "red",
+ },
+ {
+ "voc": "http://example.com/acid#blue",
+ }
+ ]
+ }
+ }
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_schema.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_schema.yml
new file mode 100644
index 0000000..92b271e
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_schema.yml
@@ -0,0 +1,21 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "Colors",
+ "type": "enum",
+ "symbols": ["acid:red"]
+ },
+ {
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "voc",
+ "type": "string",
+ "jsonldPredicate": {
+ "_type": "@vocab"
+ }
+ }]
+ }]
+}
diff --git a/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_src.yml b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_src.yml
new file mode 100644
index 0000000..82954f1
--- /dev/null
+++ b/cwltool/schemas/draft-3/salad/schema_salad/metaschema/vocab_res_src.yml
@@ -0,0 +1,15 @@
+ {
+ "form": {
+ "things": [
+ {
+ "voc": "red",
+ },
+ {
+ "voc": "http://example.com/acid#red",
+ },
+ {
+ "voc": "http://example.com/acid#blue",
+ }
+ ]
+ }
+ }
diff --git a/cwltool/schemas/draft-3/userguide-intro.md b/cwltool/schemas/draft-3/userguide-intro.md
new file mode 100644
index 0000000..bf60a25
--- /dev/null
+++ b/cwltool/schemas/draft-3/userguide-intro.md
@@ -0,0 +1,9 @@
+# A Gentle Introduction to the Common Workflow Language
+
+Hello!
+
+This guide will introduce you to writing tool wrappers and workflows using the
+Common Workflow Language (CWL). This guide describes the current stable
+specification, draft 3.
+
+## What is CWL?
diff --git a/cwltool/update.py b/cwltool/update.py
new file mode 100644
index 0000000..ed7b8dc
--- /dev/null
+++ b/cwltool/update.py
@@ -0,0 +1,313 @@
+import sys
+import urlparse
+import json
+import re
+from aslist import aslist
+
+def findId(doc, frg):
+ if isinstance(doc, dict):
+ if "id" in doc and doc["id"] == frg:
+ return doc
+ else:
+ for d in doc:
+ f = findId(doc[d], frg)
+ if f:
+ return f
+ if isinstance(doc, list):
+ for d in doc:
+ f = findId(d, frg)
+ if f:
+ return f
+ return None
+
+def fixType(doc):
+ if isinstance(doc, list):
+ return [fixType(f) for f in doc]
+
+ if isinstance(doc, basestring):
+ if doc not in ("null", "boolean", "int", "long", "float", "double", "string", "File", "record", "enum", "array", "Any") and "#" not in doc:
+ return "#" + doc
+ return doc
+
+def _draft2toDraft3dev1(doc, loader, baseuri):
+ try:
+ if isinstance(doc, dict):
+ if "import" in doc:
+ imp = urlparse.urljoin(baseuri, doc["import"])
+ r = loader.fetch(imp)
+ if isinstance(r, list):
+ r = {"@graph": r}
+ r["id"] = imp
+ _, frag = urlparse.urldefrag(imp)
+ if frag:
+ frag = "#" + frag
+ r = findId(r, frag)
+ return _draft2toDraft3dev1(r, loader, imp)
+
+ if "include" in doc:
+ return loader.fetch_text(urlparse.urljoin(baseuri, doc["include"]))
+
+ for t in ("type", "items"):
+ if t in doc:
+ doc[t] = fixType(doc[t])
+
+ if "steps" in doc:
+ if not isinstance(doc["steps"], list):
+ raise Exception("Value of 'steps' must be a list")
+ for i, s in enumerate(doc["steps"]):
+ if "id" not in s:
+ s["id"] = "step%i" % i
+ for inp in s.get("inputs", []):
+ if isinstance(inp.get("source"), list):
+ if "requirements" not in doc:
+ doc["requirements"] = []
+ doc["requirements"].append({"class": "MultipleInputFeatureRequirement"})
+
+
+ for a in doc:
+ doc[a] = _draft2toDraft3dev1(doc[a], loader, baseuri)
+
+ if isinstance(doc, list):
+ return [_draft2toDraft3dev1(a, loader, baseuri) for a in doc]
+
+ return doc
+ except Exception as e:
+ err = json.dumps(doc, indent=4)
+ if "id" in doc:
+ err = doc["id"]
+ elif "name" in doc:
+ err = doc["name"]
+ import traceback
+ raise Exception("Error updating '%s'\n %s\n%s" % (err, e, traceback.format_exc(e)))
+
+def draft2toDraft3dev1(doc, loader, baseuri):
+ return (_draft2toDraft3dev1(doc, loader, baseuri), "https://w3id.org/cwl/cwl#draft-3.dev1")
+
+digits = re.compile("\d+")
+
+def updateScript(sc):
+ sc = sc.replace("$job", "inputs")
+ sc = sc.replace("$tmpdir", "runtime.tmpdir")
+ sc = sc.replace("$outdir", "runtime.outdir")
+ sc = sc.replace("$self", "self")
+ return sc
+
+def _updateDev2Script(ent):
+ if isinstance(ent, dict) and "engine" in ent:
+ if ent["engine"] == "cwl:JsonPointer":
+ sp = ent["script"].split("/")
+ if sp[0] in ("tmpdir", "outdir"):
+ return "$(runtime.%s)" % sp[0]
+ else:
+ if not sp[0]:
+ sp.pop(0)
+ front = sp.pop(0)
+ sp = [str(i) if digits.match(i) else "'"+i+"'"
+ for i in sp]
+ if front == "job":
+ return "$(inputs[%s])" % ']['.join(sp)
+ elif front == "context":
+ return "$(self[%s])" % ']['.join(sp)
+ else:
+ sc = updateScript(ent["script"])
+ if sc[0] == "{":
+ return "$" + sc
+ else:
+ return "$(%s)" % sc
+ else:
+ return ent
+
+def _draftDraft3dev1toDev2(doc, loader, baseuri):
+ doc = _updateDev2Script(doc)
+ if isinstance(doc, basestring):
+ return doc
+
+ # Convert expressions
+ if isinstance(doc, dict):
+ if "@import" in doc:
+ r, _ = loader.resolve_ref(doc["@import"], base_url=baseuri)
+ return _draftDraft3dev1toDev2(r, loader, r["id"])
+
+ for a in doc:
+ doc[a] = _draftDraft3dev1toDev2(doc[a], loader, baseuri)
+
+ if "class" in doc and (doc["class"] in ("CommandLineTool", "Workflow", "ExpressionTool")):
+ added = False
+ if "requirements" in doc:
+ for r in doc["requirements"]:
+ if r["class"] == "ExpressionEngineRequirement":
+ if "engineConfig" in r:
+ doc["requirements"].append({
+ "class":"InlineJavascriptRequirement",
+ "expressionLib": [updateScript(sc) for sc in aslist(r["engineConfig"])]
+ })
+ added = True
+ doc["requirements"] = [rq for rq in doc["requirements"] if rq["class"] != "ExpressionEngineRequirement"]
+ break
+ else:
+ doc["requirements"] = []
+ if not added:
+ doc["requirements"].append({"class":"InlineJavascriptRequirement"})
+
+ elif isinstance(doc, list):
+ return [_draftDraft3dev1toDev2(a, loader, baseuri) for a in doc]
+
+ return doc
+
+def draftDraft3dev1toDev2(doc, loader, baseuri):
+ return (_draftDraft3dev1toDev2(doc, loader, baseuri), "https://w3id.org/cwl/cwl#draft-3.dev2")
+
+def _draftDraft3dev2toDev3(doc, loader, baseuri):
+ try:
+ if isinstance(doc, dict):
+ if "@import" in doc:
+ if doc["@import"][0] == "#":
+ return doc["@import"]
+ else:
+ imp = urlparse.urljoin(baseuri, doc["@import"])
+ r = loader.fetch(imp)
+ if isinstance(r, list):
+ r = {"@graph": r}
+ r["id"] = imp
+ _, frag = urlparse.urldefrag(imp)
+ if frag:
+ frag = "#" + frag
+ r = findId(r, frag)
+ return _draftDraft3dev2toDev3(r, loader, imp)
+
+ if "@include" in doc:
+ return loader.fetch_text(urlparse.urljoin(baseuri, doc["@include"]))
+
+ for a in doc:
+ doc[a] = _draftDraft3dev2toDev3(doc[a], loader, baseuri)
+
+ if isinstance(doc, list):
+ return [_draftDraft3dev2toDev3(a, loader, baseuri) for a in doc]
+
+ return doc
+ except Exception as e:
+ err = json.dumps(doc, indent=4)
+ if "id" in doc:
+ err = doc["id"]
+ elif "name" in doc:
+ err = doc["name"]
+ import traceback
+ raise Exception("Error updating '%s'\n %s\n%s" % (err, e, traceback.format_exc(e)))
+
+def draftDraft3dev2toDev3(doc, loader, baseuri):
+ return (_draftDraft3dev2toDev3(doc, loader, baseuri), "https://w3id.org/cwl/cwl#draft-3.dev3")
+
+
+def traverseImport(doc, loader, baseuri, func):
+ if "$import" in doc:
+ if doc["$import"][0] == "#":
+ return doc["$import"]
+ else:
+ imp = urlparse.urljoin(baseuri, doc["$import"])
+ r = loader.fetch(imp)
+ if isinstance(r, list):
+ r = {"$graph": r}
+ r["id"] = imp
+ _, frag = urlparse.urldefrag(imp)
+ if frag:
+ frag = "#" + frag
+ r = findId(r, frag)
+ return func(r, loader, imp)
+
+def _draftDraft3dev3toDev4(doc, loader, baseuri):
+ try:
+ if isinstance(doc, dict):
+ r = traverseImport(doc, loader, baseuri, _draftDraft3dev3toDev4)
+ if r is not None:
+ return r
+
+ if "@graph" in doc:
+ doc["$graph"] = doc["@graph"]
+ del doc["@graph"]
+
+ for a in doc:
+ doc[a] = _draftDraft3dev3toDev4(doc[a], loader, baseuri)
+
+ if isinstance(doc, list):
+ return [_draftDraft3dev3toDev4(a, loader, baseuri) for a in doc]
+
+ return doc
+ except Exception as e:
+ err = json.dumps(doc, indent=4)
+ if "id" in doc:
+ err = doc["id"]
+ elif "name" in doc:
+ err = doc["name"]
+ import traceback
+ raise Exception("Error updating '%s'\n %s\n%s" % (err, e, traceback.format_exc(e)))
+
+
+def draftDraft3dev3toDev4(doc, loader, baseuri):
+ return (_draftDraft3dev3toDev4(doc, loader, baseuri), "https://w3id.org/cwl/cwl#draft-3.dev4")
+
+def _draftDraft3dev4toDev5(doc, loader, baseuri):
+ try:
+ if isinstance(doc, dict):
+ r = traverseImport(doc, loader, baseuri, _draftDraft3dev4toDev5)
+ if r is not None:
+ return r
+
+ for b in ("inputBinding", "outputBinding"):
+ if b in doc and "secondaryFiles" in doc[b]:
+ doc["secondaryFiles"] = doc[b]["secondaryFiles"]
+ del doc[b]["secondaryFiles"]
+
+ for a in doc:
+ doc[a] = _draftDraft3dev4toDev5(doc[a], loader, baseuri)
+
+ if isinstance(doc, list):
+ return [_draftDraft3dev4toDev5(a, loader, baseuri) for a in doc]
+
+ return doc
+ except Exception as e:
+ err = json.dumps(doc, indent=4)
+ if "id" in doc:
+ err = doc["id"]
+ elif "name" in doc:
+ err = doc["name"]
+ import traceback
+ raise Exception("Error updating '%s'\n %s\n%s" % (err, e, traceback.format_exc(e)))
+
+
+def draftDraft3dev4toDev5(doc, loader, baseuri):
+ return (_draftDraft3dev4toDev5(doc, loader, baseuri), "https://w3id.org/cwl/cwl#draft-3.dev5")
+
+def draftDraft3dev5toFinal(doc, loader, baseuri):
+ return (doc, "https://w3id.org/cwl/cwl#draft-3")
+
+
+def update(doc, loader, baseuri):
+ updates = {
+ "https://w3id.org/cwl/cwl#draft-2": draft2toDraft3dev1,
+ "https://w3id.org/cwl/cwl#draft-3.dev1": draftDraft3dev1toDev2,
+ "https://w3id.org/cwl/cwl#draft-3.dev2": draftDraft3dev2toDev3,
+ "https://w3id.org/cwl/cwl#draft-3.dev3": draftDraft3dev3toDev4,
+ "https://w3id.org/cwl/cwl#draft-3.dev4": draftDraft3dev4toDev5,
+ "https://w3id.org/cwl/cwl#draft-3.dev5": draftDraft3dev5toFinal,
+ "https://w3id.org/cwl/cwl#draft-3": None
+ }
+
+ def identity(doc, loader, baseuri):
+ v = doc.get("cwlVersion")
+ if v:
+ return (doc, loader.expand_url(v, ""))
+ else:
+ return (doc, "https://w3id.org/cwl/cwl#draft-2")
+
+ nextupdate = identity
+
+ while nextupdate:
+ (doc, version) = nextupdate(doc, loader, baseuri)
+ if version in updates:
+ nextupdate = updates[version]
+ else:
+ raise Exception("Unrecognized version %s" % version)
+
+ doc["cwlVersion"] = version
+
+ return doc
diff --git a/cwltool/workflow.py b/cwltool/workflow.py
new file mode 100644
index 0000000..84fb584
--- /dev/null
+++ b/cwltool/workflow.py
@@ -0,0 +1,587 @@
+import job
+import draft2tool
+from aslist import aslist
+from process import Process, get_feature, empty_subtree, shortname, uniquename
+from errors import WorkflowException
+import copy
+import logging
+import random
+import os
+from collections import namedtuple
+import pprint
+import functools
+import schema_salad.validate as validate
+import urlparse
+import pprint
+import tempfile
+import shutil
+import json
+import schema_salad
+import expression
+
+_logger = logging.getLogger("cwltool")
+
+WorkflowStateItem = namedtuple('WorkflowStateItem', ['parameter', 'value'])
+
+def defaultMakeTool(toolpath_object, **kwargs):
+ if not isinstance(toolpath_object, dict):
+ raise WorkflowException("Not a dict: `%s`" % toolpath_object)
+ if "class" in toolpath_object:
+ if toolpath_object["class"] == "CommandLineTool":
+ return draft2tool.CommandLineTool(toolpath_object, **kwargs)
+ elif toolpath_object["class"] == "ExpressionTool":
+ return draft2tool.ExpressionTool(toolpath_object, **kwargs)
+ elif toolpath_object["class"] == "Workflow":
+ return Workflow(toolpath_object, **kwargs)
+
+ raise WorkflowException("Missing or invalid 'class' field in %s, expecting one of: CommandLineTool, ExpressionTool, Workflow" % toolpath_object["id"])
+
+def findfiles(wo, fn=None):
+ if fn is None:
+ fn = []
+ if isinstance(wo, dict):
+ if wo.get("class") == "File":
+ fn.append(wo)
+ findfiles(wo.get("secondaryFiles", None), fn)
+ else:
+ for w in wo.values():
+ findfiles(w, fn)
+ elif isinstance(wo, list):
+ for w in wo:
+ findfiles(w, fn)
+ return fn
+
+
+def match_types(sinktype, src, iid, inputobj, linkMerge, valueFrom):
+ if isinstance(sinktype, list):
+ # Sink is union type
+ for st in sinktype:
+ if match_types(st, src, iid, inputobj, linkMerge, valueFrom):
+ return True
+ elif isinstance(src.parameter["type"], list):
+ # Source is union type
+ # Check that every source type is compatible with the sink.
+ for st in src.parameter["type"]:
+ srccopy = copy.deepcopy(src)
+ srccopy.parameter["type"] = st
+ if not match_types(st, srccopy, iid, inputobj, linkMerge, valueFrom):
+ return False
+ return True
+ elif linkMerge:
+ if iid not in inputobj:
+ inputobj[iid] = []
+ if linkMerge == "merge_nested":
+ inputobj[iid].append(src.value)
+ elif linkMerge == "merge_flattened":
+ if isinstance(src.value, list):
+ inputobj[iid].extend(src.value)
+ else:
+ inputobj[iid].append(src.value)
+ else:
+ raise WorkflowException("Unrecognized linkMerge enum '%s'" % linkMerge)
+ return True
+ elif valueFrom is not None or are_same_type(src.parameter["type"], sinktype) or sinktype == "Any":
+ # simply assign the value from state to input
+ inputobj[iid] = copy.deepcopy(src.value)
+ return True
+ return False
+
+def are_same_type(one, two):
+ """Check for identical type specifications, ignoring extra keys like inputBinding.
+ """
+ if isinstance(one, dict) and isinstance(two, dict):
+ if one["type"] == "array" and two["type"] == "array":
+ return are_same_type(one["items"], two["items"])
+ elif one["type"] == two["type"]:
+ return True
+ else:
+ return False
+ else:
+ return one == two
+
+
+def object_from_state(state, parms, frag_only, supportsMultipleInput):
+ inputobj = {}
+ for inp in parms:
+ iid = inp["id"]
+ if frag_only:
+ iid = shortname(iid)
+ if "source" in inp:
+ if isinstance(inp["source"], list) and not supportsMultipleInput:
+ raise WorkflowException("Workflow contains multiple inbound links to a single parameter but MultipleInputFeatureRequirement is not declared.")
+ connections = aslist(inp["source"])
+ for src in connections:
+ if src in state and state[src] is not None:
+ if not match_types(inp["type"], state[src], iid, inputobj,
+ inp.get("linkMerge", ("merge_nested" if len(connections) > 1 else None)),
+ valueFrom=inp.get("valueFrom")):
+ raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, state[src].parameter["type"], inp["id"], inp["type"]))
+ elif src not in state:
+ raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"]))
+ else:
+ return None
+ elif "default" in inp:
+ inputobj[iid] = inp["default"]
+ elif "valueFrom" in inp:
+ inputobj[iid] = None
+ else:
+ raise WorkflowException("Value for %s not specified" % (inp["id"]))
+ return inputobj
+
+
+class WorkflowJobStep(object):
+ def __init__(self, step):
+ self.step = step
+ self.tool = step.tool
+ self.id = step.id
+ self.submitted = False
+ self.completed = False
+ self.name = uniquename("step %s" % shortname(self.id))
+
+ def job(self, joborder, basedir, output_callback, **kwargs):
+ kwargs["part_of"] = self.name
+ kwargs["name"] = shortname(self.id)
+ for j in self.step.job(joborder, basedir, output_callback, **kwargs):
+ yield j
+
+class WorkflowJob(object):
+ def __init__(self, workflow, **kwargs):
+ self.workflow = workflow
+ self.tool = workflow.tool
+ self.steps = [WorkflowJobStep(s) for s in workflow.steps]
+ self.id = workflow.tool["id"]
+ if "outdir" in kwargs:
+ self.outdir = kwargs["outdir"]
+ elif "tmp_outdir_prefix" in kwargs:
+ self.outdir = tempfile.mkdtemp(prefix=kwargs["tmp_outdir_prefix"])
+ else:
+ # tmp_outdir_prefix defaults to tmp, so this is unlikely to be used
+ self.outdir = tempfile.mkdtemp()
+
+ self.name = uniquename(kwargs.get("name", shortname(self.workflow.tool["id"])))
+
+ _logger.debug("[workflow %s] initialized from %s", self.name, self.tool["id"])
+
+ def receive_output(self, step, outputparms, jobout, processStatus):
+ _logger.debug("[workflow %s] step %s completed", self.name, id(step))
+ for i in outputparms:
+ if "id" in i:
+ if i["id"] in jobout:
+ self.state[i["id"]] = WorkflowStateItem(i, jobout[i["id"]])
+ else:
+ _logger.error("Output is missing expected field %s" % i["id"])
+ processStatus = "permanentFail"
+
+ if processStatus != "success":
+ if self.processStatus != "permanentFail":
+ self.processStatus = processStatus
+
+ if processStatus == "success":
+ _logger.info("Workflow step %s completion status is %s", step.id, processStatus)
+ else:
+ _logger.warn("Workflow step %s completion status is %s", step.id, processStatus)
+
+ step.completed = True
+
+ def try_make_job(self, step, basedir, **kwargs):
+ inputparms = step.tool["inputs"]
+ outputparms = step.tool["outputs"]
+
+ supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])
+
+ try:
+ inputobj = object_from_state(self.state, inputparms, False, supportsMultipleInput)
+ if inputobj is None:
+ _logger.debug("[workflow %s] job step %s not ready", self.name, step.id)
+ return
+
+ _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self))
+
+ if step.submitted:
+ return
+
+ callback = functools.partial(self.receive_output, step, outputparms)
+
+ valueFrom = {i["id"]: i["valueFrom"] for i in step.tool["inputs"] if "valueFrom" in i}
+
+ if len(valueFrom) > 0 and not bool(self.workflow.get_requirement("StepInputExpressionRequirement")[0]):
+ raise WorkflowException("Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements")
+
+ vfinputs = {shortname(k): v for k,v in inputobj.iteritems()}
+ def valueFromFunc(k, v):
+ if k in valueFrom:
+ return expression.do_eval(valueFrom[k], vfinputs, self.workflow.requirements,
+ None, None, {}, context=v)
+ else:
+ return v
+
+ if "scatter" in step.tool:
+ scatter = aslist(step.tool["scatter"])
+ method = step.tool.get("scatterMethod")
+ if method is None and len(scatter) != 1:
+ raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")
+ if "valueFrom" not in kwargs:
+ kwargs["valueFrom"] = valueFromFunc
+ if method == "dotproduct" or method is None:
+ jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
+ callback, **kwargs)
+ elif method == "nested_crossproduct":
+ jobs = nested_crossproduct_scatter(step, inputobj,
+ basedir, scatter, callback, **kwargs)
+ elif method == "flat_crossproduct":
+ jobs = flat_crossproduct_scatter(step, inputobj, basedir,
+ scatter, callback, 0, **kwargs)
+ else:
+ _logger.debug("[workflow %s] Job is input %s", self.name, json.dumps(inputobj, indent=4))
+ inputobj = {k: valueFromFunc(k, v) for k,v in inputobj.items()}
+ _logger.debug("[workflow %s] Evaluated job input to %s", self.name, json.dumps(inputobj, indent=4))
+ jobs = step.job(inputobj, basedir, callback, **kwargs)
+
+ step.submitted = True
+
+ for j in jobs:
+ yield j
+ except WorkflowException:
+ raise
+ except Exception as e:
+ _logger.exception("Unhandled exception")
+ self.processStatus = "permanentFail"
+ step.completed = True
+
+ def run(self, **kwargs):
+ _logger.debug("[workflow %s] starting", self.name)
+
+ def job(self, joborder, basedir, output_callback, move_outputs=True, **kwargs):
+ self.state = {}
+ self.processStatus = "success"
+
+ if "outdir" in kwargs:
+ del kwargs["outdir"]
+
+ for i in self.tool["inputs"]:
+ iid = shortname(i["id"])
+ if iid in joborder:
+ self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid]))
+ elif "default" in i:
+ self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"]))
+ else:
+ raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"]))
+
+ for s in self.steps:
+ for out in s.tool["outputs"]:
+ self.state[out["id"]] = None
+
+ output_dirs = set()
+
+ completed = 0
+ while completed < len(self.steps) and self.processStatus == "success":
+ made_progress = False
+ completed = 0
+ for step in self.steps:
+ if step.completed:
+ completed += 1
+ else:
+ for newjob in self.try_make_job(step, basedir, **kwargs):
+ if newjob:
+ made_progress = True
+ if newjob.outdir:
+ output_dirs.add(newjob.outdir)
+ yield newjob
+ if not made_progress and completed < len(self.steps):
+ yield None
+
+ supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])
+
+ wo = object_from_state(self.state, self.tool["outputs"], True, supportsMultipleInput)
+
+ if wo is None:
+ raise WorkflowException("Output for workflow not available")
+
+ if move_outputs:
+ targets = set()
+ conflicts = set()
+
+ outfiles = findfiles(wo)
+
+ for f in outfiles:
+ for a in output_dirs:
+ if f["path"].startswith(a):
+ src = f["path"]
+ dst = os.path.join(self.outdir, src[len(a)+1:])
+ if dst in targets:
+ conflicts.add(dst)
+ else:
+ targets.add(dst)
+
+ for f in outfiles:
+ for a in output_dirs:
+ if f["path"].startswith(a):
+ src = f["path"]
+ dst = os.path.join(self.outdir, src[len(a)+1:])
+ if dst in conflicts:
+ sp = os.path.splitext(dst)
+ dst = "%s-%s%s" % (sp[0], str(random.randint(1, 1000000000)), sp[1])
+ dirname = os.path.dirname(dst)
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+ _logger.debug("[workflow %s] Moving '%s' to '%s'", self.name, src, dst)
+ shutil.move(src, dst)
+ f["path"] = dst
+
+ for a in output_dirs:
+ if os.path.exists(a) and empty_subtree(a):
+ if kwargs.get("rm_tmpdir", True):
+ _logger.debug("[workflow %s] Removing intermediate output directory %s", self.name, a)
+ shutil.rmtree(a, True)
+
+ _logger.info("[workflow %s] outdir is %s", self.name, self.outdir)
+
+ output_callback(wo, self.processStatus)
+
+
+class Workflow(Process):
+ def __init__(self, toolpath_object, **kwargs):
+ super(Workflow, self).__init__(toolpath_object, **kwargs)
+
+ kwargs["requirements"] = self.requirements
+ kwargs["hints"] = self.hints
+
+ makeTool = kwargs.get("makeTool")
+ self.steps = [WorkflowStep(step, n, **kwargs) for n,step in enumerate(self.tool.get("steps", []))]
+ random.shuffle(self.steps)
+
+ # TODO: statically validate data links instead of doing it at runtime.
+
+ def job(self, joborder, basedir, output_callback, **kwargs):
+ builder = self._init_job(joborder, basedir, **kwargs)
+ wj = WorkflowJob(self, **kwargs)
+ yield wj
+
+ kwargs["part_of"] = "workflow %s" % wj.name
+
+ for w in wj.job(builder.job, basedir, output_callback, **kwargs):
+ yield w
+
+
+class WorkflowStep(Process):
+ def __init__(self, toolpath_object, pos, **kwargs):
+ if "id" in toolpath_object:
+ self.id = toolpath_object["id"]
+ else:
+ self.id = "#step" + str(pos)
+
+ try:
+ makeTool = kwargs.get("makeTool")
+ runobj = None
+ if isinstance(toolpath_object["run"], basestring):
+ runobj, _ = schema_salad.schema.load_and_validate(kwargs["loader"],
+ kwargs["avsc_names"],
+ toolpath_object["run"],
+ True)
+ else:
+ runobj = toolpath_object["run"]
+ self.embedded_tool = makeTool(runobj, **kwargs)
+ except validate.ValidationException as v:
+ raise WorkflowException("Tool definition %s failed validation:\n%s" % (toolpath_object["run"], validate.indent(str(v))))
+
+ for field in ("inputs", "outputs"):
+ for i in toolpath_object[field]:
+ inputid = i["id"]
+ p = shortname(inputid)
+ found = False
+ for a in self.embedded_tool.tool[field]:
+ frag = shortname(a["id"])
+ if frag == p:
+ i.update(a)
+ found = True
+ if not found:
+ i["type"] = "Any"
+ #raise WorkflowException("Parameter '%s' of %s in workflow step %s does not correspond to parameter in %s" % (p, field, self.id, self.embedded_tool.tool.get("id")))
+ i["id"] = inputid
+
+ super(WorkflowStep, self).__init__(toolpath_object, **kwargs)
+
+ if self.embedded_tool.tool["class"] == "Workflow":
+ (feature, _) = self.get_requirement("SubworkflowFeatureRequirement")
+ if not feature:
+ raise WorkflowException("Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements")
+
+ if "scatter" in self.tool:
+ (feature, _) = self.get_requirement("ScatterFeatureRequirement")
+ if not feature:
+ raise WorkflowException("Workflow contains scatter but ScatterFeatureRequirement not in requirements")
+
+ inputparms = copy.deepcopy(self.tool["inputs"])
+ outputparms = copy.deepcopy(self.tool["outputs"])
+ scatter = aslist(self.tool["scatter"])
+
+ method = self.tool.get("scatterMethod")
+ if method is None and len(scatter) != 1:
+ raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")
+
+ inp_map = {i["id"]: i for i in inputparms}
+ for s in scatter:
+ if s not in inp_map:
+ raise WorkflowException("Invalid Scatter parameter '%s'" % s)
+
+ inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]}
+
+ if self.tool.get("scatterMethod") == "nested_crossproduct":
+ nesting = len(scatter)
+ else:
+ nesting = 1
+
+ for r in xrange(0, nesting):
+ for i in outputparms:
+ i["type"] = {"type": "array", "items": i["type"]}
+ self.tool["inputs"] = inputparms
+ self.tool["outputs"] = outputparms
+
+ def receive_output(self, output_callback, jobout, processStatus):
+ #_logger.debug("WorkflowStep output from run is %s", jobout)
+ output = {}
+ for i in self.tool["outputs"]:
+ field = shortname(i["id"])
+ if field in jobout:
+ output[i["id"]] = jobout[field]
+ else:
+ processStatus = "permanentFail"
+ output_callback(output, processStatus)
+
+ def job(self, joborder, basedir, output_callback, **kwargs):
+ for i in self.tool["inputs"]:
+ p = i["id"]
+ field = shortname(p)
+ joborder[field] = joborder[i["id"]]
+ del joborder[i["id"]]
+
+ kwargs["requirements"] = kwargs.get("requirements", []) + self.tool.get("requirements", [])
+ kwargs["hints"] = kwargs.get("hints", []) + self.tool.get("hints", [])
+
+ try:
+ for t in self.embedded_tool.job(joborder, basedir,
+ functools.partial(self.receive_output, output_callback),
+ **kwargs):
+ yield t
+ except WorkflowException:
+ raise
+ except Exception as e:
+ _logger.exception("Unexpected exception")
+ raise WorkflowException(str(e))
+
+
+class ReceiveScatterOutput(object):
+ def __init__(self, output_callback, dest):
+ self.dest = dest
+ self.completed = 0
+ self.processStatus = "success"
+ self.total = None
+ self.output_callback = output_callback
+
+ def receive_scatter_output(self, index, jobout, processStatus):
+ for k,v in jobout.items():
+ self.dest[k][index] = v
+
+ if processStatus != "success":
+ if self.processStatus != "permanentFail":
+ self.processStatus = processStatus
+
+ self.completed += 1
+
+ if self.completed == self.total:
+ self.output_callback(self.dest, self.processStatus)
+
+ def setTotal(self, total):
+ self.total = total
+ if self.completed == self.total:
+ self.output_callback(self.dest, self.processStatus)
+
+
+def dotproduct_scatter(process, joborder, basedir, scatter_keys, output_callback, **kwargs):
+ l = None
+ for s in scatter_keys:
+ if l is None:
+ l = len(joborder[s])
+ elif l != len(joborder[s]):
+ raise WorkflowException("Length of input arrays must be equal when performing dotproduct scatter.")
+
+ output = {}
+ for i in process.tool["outputs"]:
+ output[i["id"]] = [None] * l
+
+ rc = ReceiveScatterOutput(output_callback, output)
+
+ for n in range(0, l):
+ jo = copy.copy(joborder)
+ for s in scatter_keys:
+ jo[s] = kwargs["valueFrom"](s, joborder[s][n])
+
+ for j in process.job(jo, basedir, functools.partial(rc.receive_scatter_output, n), **kwargs):
+ yield j
+
+ rc.setTotal(l)
+
+
+def nested_crossproduct_scatter(process, joborder, basedir, scatter_keys, output_callback, **kwargs):
+ scatter_key = scatter_keys[0]
+ l = len(joborder[scatter_key])
+ output = {}
+ for i in process.tool["outputs"]:
+ output[i["id"]] = [None] * l
+
+ rc = ReceiveScatterOutput(output_callback, output)
+
+ for n in range(0, l):
+ jo = copy.copy(joborder)
+ jo[scatter_key] = kwargs["valueFrom"](scatter_key, joborder[scatter_key][n])
+
+ if len(scatter_keys) == 1:
+ for j in process.job(jo, basedir, functools.partial(rc.receive_scatter_output, n), **kwargs):
+ yield j
+ else:
+ for j in nested_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], functools.partial(rc.receive_scatter_output, n), **kwargs):
+ yield j
+
+ rc.setTotal(l)
+
+
+def crossproduct_size(joborder, scatter_keys):
+ scatter_key = scatter_keys[0]
+ if len(scatter_keys) == 1:
+ sum = len(joborder[scatter_key])
+ else:
+ sum = 0
+ for n in range(0, len(joborder[scatter_key])):
+ jo = copy.copy(joborder)
+ jo[scatter_key] = joborder[scatter_key][n]
+ sum += crossproduct_size(joborder, scatter_keys[1:])
+ return sum
+
+def flat_crossproduct_scatter(process, joborder, basedir, scatter_keys, output_callback, startindex, **kwargs):
+ scatter_key = scatter_keys[0]
+ l = len(joborder[scatter_key])
+
+ if startindex == 0 and not isinstance(output_callback, ReceiveScatterOutput):
+ output = {}
+ for i in process.tool["outputs"]:
+ output[i["id"]] = [None] * crossproduct_size(joborder, scatter_keys)
+ rc = ReceiveScatterOutput(output_callback, output)
+ else:
+ rc = output_callback
+
+ put = startindex
+ for n in range(0, l):
+ jo = copy.copy(joborder)
+ jo[scatter_key] = kwargs["valueFrom"](scatter_key, joborder[scatter_key][n])
+
+ if len(scatter_keys) == 1:
+ for j in process.job(jo, basedir, functools.partial(rc.receive_scatter_output, put), **kwargs):
+ yield j
+ put += 1
+ else:
+ for j in flat_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], rc, put, **kwargs):
+ if j:
+ put += 1
+ yield j
+
+ rc.setTotal(put)
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..9d8b109
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,5 @@
+[egg_info]
+tag_build = .20160203221531
+tag_date = 0
+tag_svn_revision = 0
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..dd5e8fe
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import setuptools.command.egg_info as egg_info_cmd
+import shutil
+
+from setuptools import setup, find_packages
+
+SETUP_DIR = os.path.dirname(__file__)
+README = os.path.join(SETUP_DIR, 'README.rst')
+
+try:
+ import gittaggers
+ tagger = gittaggers.EggInfoFromGit
+except ImportError:
+ tagger = egg_info_cmd.egg_info
+
+setup(name='cwltool',
+ version='1.0',
+ description='Common workflow language reference implementation',
+ long_description=open(README).read(),
+ author='Common workflow language working group',
+ author_email='common-workflow-language at googlegroups.com',
+ url="https://github.com/common-workflow-language/common-workflow-language",
+ download_url="https://github.com/common-workflow-language/common-workflow-language",
+ license='Apache 2.0',
+ packages=["cwltool"],
+ package_data={'cwltool': ['schemas/draft-3/*.yml',
+ 'schemas/draft-3/*.md',
+ 'schemas/draft-3/salad/schema_salad/metaschema/*.yml',
+ 'schemas/draft-3/salad/schema_salad/metaschema/*.md']},
+ install_requires=[
+ 'requests',
+ 'PyYAML',
+ 'rdflib >= 4.2.0',
+ 'rdflib-jsonld >= 0.3.0',
+ 'shellescape',
+ 'schema_salad == 1.6.20160202222448'
+ ],
+ test_suite='tests',
+ tests_require=[],
+ entry_points={
+ 'console_scripts': [ "cwltool=cwltool.main:main", "cwltest=cwltool.cwltest:main" ]
+ },
+ zip_safe=True,
+ cmdclass={'egg_info': tagger},
+)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/cwltool.git
More information about the debian-med-commit
mailing list