[med-svn] [python-schema-salad] 06/09: New upstream version 2.2.20170111180227
Michael Crusoe
misterc-guest at moszumanska.debian.org
Fri Jan 13 05:29:30 UTC 2017
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to branch master
in repository python-schema-salad.
commit f5d6df0b4999d4e01d61ca3e20275c56b92b00cd
Author: Michael R. Crusoe <michael.crusoe at gmail.com>
Date: Thu Jan 12 00:16:40 2017 -0800
New upstream version 2.2.20170111180227
---
MANIFEST.in | 5 +-
PKG-INFO | 6 +-
README.rst | 4 +-
schema_salad.egg-info/PKG-INFO | 6 +-
schema_salad.egg-info/SOURCES.txt | 31 +-
schema_salad.egg-info/pbr.json | 2 +-
schema_salad.egg-info/requires.txt | 3 +-
schema_salad/add_dictlist.py | 1 +
schema_salad/aslist.py | 1 +
schema_salad/flatten.py | 2 +
schema_salad/jsonld_context.py | 42 +-
schema_salad/main.py | 43 +-
schema_salad/makedoc.py | 26 +-
schema_salad/ref_resolver.py | 433 ++++++----
schema_salad/schema.py | 136 +++-
schema_salad/sourceline.py | 165 ++++
schema_salad/tests/.coverage | 1 +
schema_salad/tests/frag.yml | 4 +
schema_salad/tests/test_errors.py | 31 +
schema_salad/tests/test_errors.py~ | 1 +
schema_salad/tests/test_examples.py | 92 ++-
schema_salad/tests/test_fetch.py~ | 13 +
schema_salad/tests/test_schema/CommandLineTool.yml | 894 +++++++++++++++++++++
.../tests/test_schema/CommonWorkflowLanguage.yml | 11 +
schema_salad/tests/test_schema/Process.yml | 743 +++++++++++++++++
schema_salad/tests/test_schema/Workflow.yml | 582 ++++++++++++++
schema_salad/tests/test_schema/concepts.md | 1 +
schema_salad/tests/test_schema/contrib.md | 1 +
schema_salad/tests/test_schema/intro.md | 1 +
schema_salad/tests/test_schema/invocation.md | 1 +
schema_salad/tests/test_schema/metaschema_base.yml | 164 ++++
schema_salad/tests/test_schema/test1.cwl | 1 +
schema_salad/tests/test_schema/test10.cwl | 10 +
schema_salad/tests/test_schema/test11.cwl | 10 +
schema_salad/tests/test_schema/test2.cwl | 1 +
schema_salad/tests/test_schema/test3.cwl | 6 +
schema_salad/tests/test_schema/test4.cwl | 6 +
schema_salad/tests/test_schema/test5.cwl | 6 +
schema_salad/tests/test_schema/test6.cwl | 5 +
schema_salad/tests/test_schema/test7.cwl | 10 +
schema_salad/tests/test_schema/test8.cwl | 10 +
schema_salad/tests/test_schema/test9.cwl | 10 +
schema_salad/tests/test_validate.pyx | 71 ++
schema_salad/tests/test_validate.py~ | 70 ++
schema_salad/tests/util.py | 13 +
schema_salad/validate.py | 147 ++--
setup.cfg | 2 +-
setup.py | 5 +-
48 files changed, 3483 insertions(+), 346 deletions(-)
diff --git a/MANIFEST.in b/MANIFEST.in
index bf8066c..abcfe2a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,7 @@
include gittaggers.py Makefile
-include schema_salad/tests/*.py schema_salad/tests/*.yml schema_salad/tests/*.owl
+include schema_salad/tests/*
+include schema_salad/tests/test_schema/*.md
+include schema_salad/tests/test_schema/*.yml
+include schema_salad/tests/test_schema/*.cwl
include schema_salad/metaschema/*
global-exclude *.pyc
diff --git a/PKG-INFO b/PKG-INFO
index ac8aacc..d3f87c0 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: schema-salad
-Version: 1.21.20161215163938
+Version: 2.2.20170111180227
Summary: Schema Annotations for Linked Avro Data (SALAD)
Home-page: https://github.com/common-workflow-language/common-workflow-language
Author: Common workflow language working group
@@ -79,8 +79,8 @@ Description: Schema Salad
.. _JSON-LD: http://json-ld.org
.. _Avro: http://avro.apache.org
.. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
- .. _specification: http://www.commonwl.org/draft-3/SchemaSalad.html
- .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+ .. _specification: http://www.commonwl.org/v1.0/SchemaSalad.html
+ .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/v1.0/CommandLineTool.yml
.. _RDF: https://www.w3.org/RDF/
Platform: UNKNOWN
diff --git a/README.rst b/README.rst
index 11d1b06..e66427c 100644
--- a/README.rst
+++ b/README.rst
@@ -70,6 +70,6 @@ provides for robust support of inline documentation.
.. _JSON-LD: http://json-ld.org
.. _Avro: http://avro.apache.org
.. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
-.. _specification: http://www.commonwl.org/draft-3/SchemaSalad.html
-.. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+.. _specification: http://www.commonwl.org/v1.0/SchemaSalad.html
+.. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/v1.0/CommandLineTool.yml
.. _RDF: https://www.w3.org/RDF/
diff --git a/schema_salad.egg-info/PKG-INFO b/schema_salad.egg-info/PKG-INFO
index ac8aacc..d3f87c0 100644
--- a/schema_salad.egg-info/PKG-INFO
+++ b/schema_salad.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: schema-salad
-Version: 1.21.20161215163938
+Version: 2.2.20170111180227
Summary: Schema Annotations for Linked Avro Data (SALAD)
Home-page: https://github.com/common-workflow-language/common-workflow-language
Author: Common workflow language working group
@@ -79,8 +79,8 @@ Description: Schema Salad
.. _JSON-LD: http://json-ld.org
.. _Avro: http://avro.apache.org
.. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
- .. _specification: http://www.commonwl.org/draft-3/SchemaSalad.html
- .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+ .. _specification: http://www.commonwl.org/v1.0/SchemaSalad.html
+ .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/v1.0/CommandLineTool.yml
.. _RDF: https://www.w3.org/RDF/
Platform: UNKNOWN
diff --git a/schema_salad.egg-info/SOURCES.txt b/schema_salad.egg-info/SOURCES.txt
index 78c842e..73a6c34 100644
--- a/schema_salad.egg-info/SOURCES.txt
+++ b/schema_salad.egg-info/SOURCES.txt
@@ -14,6 +14,7 @@ schema_salad/main.py
schema_salad/makedoc.py
schema_salad/ref_resolver.py
schema_salad/schema.py
+schema_salad/sourceline.py
schema_salad/validate.py
schema_salad.egg-info/PKG-INFO
schema_salad.egg-info/SOURCES.txt
@@ -45,9 +46,37 @@ schema_salad/metaschema/vocab_res.yml
schema_salad/metaschema/vocab_res_proc.yml
schema_salad/metaschema/vocab_res_schema.yml
schema_salad/metaschema/vocab_res_src.yml
+schema_salad/tests/.coverage
schema_salad/tests/EDAM.owl
schema_salad/tests/Process.yml
schema_salad/tests/__init__.py
+schema_salad/tests/frag.yml
schema_salad/tests/mixin.yml
+schema_salad/tests/test_errors.py
+schema_salad/tests/test_errors.py~
schema_salad/tests/test_examples.py
-schema_salad/tests/test_fetch.py
\ No newline at end of file
+schema_salad/tests/test_fetch.py
+schema_salad/tests/test_fetch.py~
+schema_salad/tests/test_validate.pyx
+schema_salad/tests/test_validate.py~
+schema_salad/tests/util.py
+schema_salad/tests/test_schema/CommandLineTool.yml
+schema_salad/tests/test_schema/CommonWorkflowLanguage.yml
+schema_salad/tests/test_schema/Process.yml
+schema_salad/tests/test_schema/Workflow.yml
+schema_salad/tests/test_schema/concepts.md
+schema_salad/tests/test_schema/contrib.md
+schema_salad/tests/test_schema/intro.md
+schema_salad/tests/test_schema/invocation.md
+schema_salad/tests/test_schema/metaschema_base.yml
+schema_salad/tests/test_schema/test1.cwl
+schema_salad/tests/test_schema/test10.cwl
+schema_salad/tests/test_schema/test11.cwl
+schema_salad/tests/test_schema/test2.cwl
+schema_salad/tests/test_schema/test3.cwl
+schema_salad/tests/test_schema/test4.cwl
+schema_salad/tests/test_schema/test5.cwl
+schema_salad/tests/test_schema/test6.cwl
+schema_salad/tests/test_schema/test7.cwl
+schema_salad/tests/test_schema/test8.cwl
+schema_salad/tests/test_schema/test9.cwl
\ No newline at end of file
diff --git a/schema_salad.egg-info/pbr.json b/schema_salad.egg-info/pbr.json
index b645198..f3b1371 100644
--- a/schema_salad.egg-info/pbr.json
+++ b/schema_salad.egg-info/pbr.json
@@ -1 +1 @@
-{"is_release": false, "git_version": "2b328bc"}
\ No newline at end of file
+{"is_release": false, "git_version": "a5bbb36"}
\ No newline at end of file
diff --git a/schema_salad.egg-info/requires.txt b/schema_salad.egg-info/requires.txt
index 756fee8..a27d4da 100644
--- a/schema_salad.egg-info/requires.txt
+++ b/schema_salad.egg-info/requires.txt
@@ -1,9 +1,8 @@
setuptools
requests >= 1.0
-ruamel.yaml >= 0.12.4, < 0.12.5
+ruamel.yaml >= 0.12.4
rdflib >= 4.2.0, < 4.3.0
rdflib-jsonld >= 0.3.0, < 0.5.0
-html5lib >= 0.90, <= 0.9999999
mistune >= 0.7.3, < 0.8
typing >= 3.5.2, < 3.6
CacheControl >= 0.11.7, < 0.12
diff --git a/schema_salad/add_dictlist.py b/schema_salad/add_dictlist.py
index 53bd4d4..711f580 100644
--- a/schema_salad/add_dictlist.py
+++ b/schema_salad/add_dictlist.py
@@ -1,6 +1,7 @@
import sys
from typing import Any, Dict
+
def add_dictlist(di, key, val): # type: (Dict, Any, Any) -> None
if key not in di:
di[key] = []
diff --git a/schema_salad/aslist.py b/schema_salad/aslist.py
index 0332a2b..27602ab 100644
--- a/schema_salad/aslist.py
+++ b/schema_salad/aslist.py
@@ -1,6 +1,7 @@
import sys
from typing import Any, List
+
def aslist(l): # type: (Any) -> List
"""Convenience function to wrap single items and lists, and return lists unchanged."""
diff --git a/schema_salad/flatten.py b/schema_salad/flatten.py
index 90c93d2..a417b34 100644
--- a/schema_salad/flatten.py
+++ b/schema_salad/flatten.py
@@ -2,6 +2,8 @@ import sys
from typing import Any, Tuple
# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
+
+
def flatten(l, ltypes=(list, tuple)):
# type: (Any, Any) -> Any
if l is None:
diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py
index d4d203f..7141b07 100755
--- a/schema_salad/jsonld_context.py
+++ b/schema_salad/jsonld_context.py
@@ -20,13 +20,19 @@ import urlparse
import logging
from .aslist import aslist
from typing import Any, cast, Dict, Iterable, Tuple, Union
-from .ref_resolver import Loader
+from .ref_resolver import Loader, ContextType
_logger = logging.getLogger("salad")
-def pred(datatype, field, name, context, defaultBase, namespaces):
- # type: (Dict[str, Union[Dict, str]], Dict, str, Loader.ContextType, str, Dict[str, rdflib.namespace.Namespace]) -> Union[Dict, str]
+def pred(datatype, # type: Dict[str, Union[Dict, str]]
+ field, # type: Dict
+ name, # type: str
+ context, # type: ContextType
+ defaultBase, # type: str
+ namespaces # type: Dict[str, rdflib.namespace.Namespace]
+ ):
+ # type: (...) -> Union[Dict, str]
split = urlparse.urlsplit(name)
vee = None # type: Union[str, unicode]
@@ -84,8 +90,14 @@ def pred(datatype, field, name, context, defaultBase, namespaces):
return ret
-def process_type(t, g, context, defaultBase, namespaces, defaultPrefix):
- # type: (Dict[str, Any], Graph, Loader.ContextType, str, Dict[str, rdflib.namespace.Namespace], str) -> None
+def process_type(t, # type: Dict[str, Any]
+ g, # type: Graph
+ context, # type: ContextType
+ defaultBase, # type: str
+ namespaces, # type: Dict[str, rdflib.namespace.Namespace]
+ defaultPrefix # type: str
+ ):
+ # type: (...) -> None
if t["type"] == "record":
recordname = t["name"]
@@ -154,8 +166,8 @@ def process_type(t, g, context, defaultBase, namespaces, defaultPrefix):
def salad_to_jsonld_context(j, schema_ctx):
- # type: (Iterable, Dict[str, Any]) -> Tuple[Loader.ContextType, Graph]
- context = {} # type: Loader.ContextType
+ # type: (Iterable, Dict[str, Any]) -> Tuple[ContextType, Graph]
+ context = {} # type: ContextType
namespaces = {}
g = Graph()
defaultPrefix = ""
@@ -178,8 +190,11 @@ def salad_to_jsonld_context(j, schema_ctx):
return (context, g)
-def fix_jsonld_ids(obj, ids):
- # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]], List[unicode]) -> None
+
+def fix_jsonld_ids(obj, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]]]
+ ids # type: List[unicode]
+ ):
+ # type: (...) -> None
if isinstance(obj, dict):
for i in ids:
if i in obj:
@@ -190,8 +205,13 @@ def fix_jsonld_ids(obj, ids):
for entry in obj:
fix_jsonld_ids(entry, ids)
-def makerdf(workflow, wf, ctx, graph=None):
- # type: (Union[str, unicode], Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Loader.ContextType, Graph) -> Graph
+
+def makerdf(workflow, # type: Union[str, unicode]
+ wf, # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]]
+ ctx, # type: ContextType
+ graph=None # type: Graph
+ ):
+ # type: (...) -> Graph
prefixes = {}
idfields = []
for k, v in ctx.iteritems():
diff --git a/schema_salad/main.py b/schema_salad/main.py
index 1896e8f..f51184b 100644
--- a/schema_salad/main.py
+++ b/schema_salad/main.py
@@ -3,28 +3,36 @@ import argparse
import logging
import sys
import traceback
-import pkg_resources # part of setuptools
-from . import schema
-from . import jsonld_context
-from . import makedoc
import json
-from rdflib import Graph, plugin
-from rdflib.serializer import Serializer
import os
import urlparse
-from .ref_resolver import Loader
-from . import validate
+import pkg_resources # part of setuptools
+
from typing import Any, Dict, List, Union
+from rdflib import Graph, plugin
+from rdflib.serializer import Serializer
+
+from . import schema
+from . import jsonld_context
+from . import makedoc
+from . import validate
+from .sourceline import strip_dup_lineno
+from .ref_resolver import Loader
+
_logger = logging.getLogger("salad")
from rdflib.plugin import register, Parser
register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser')
-def printrdf(workflow, wf, ctx, sr):
- # type: (str, Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Dict[unicode, Any], str) -> None
+def printrdf(workflow, # type: str
+ wf, # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]]
+ ctx, # type: Dict[unicode, Any]
+ sr # type: str
+ ):
+ # type: (...) -> None
g = jsonld_context.makerdf(workflow, wf, ctx)
print(g.serialize(format=sr))
@@ -104,10 +112,14 @@ def main(argsl=None): # type: (List[str]) -> int
schema_raw_doc, schema_uri)
except (validate.ValidationException) as e:
_logger.error("Schema `%s` failed link checking:\n%s",
- args.schema, e, exc_info=(True if args.debug else False))
+ args.schema, e, exc_info=(True if args.debug else False))
_logger.debug("Index is %s", metaschema_loader.idx.keys())
_logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
return 1
+ except (RuntimeError) as e:
+ _logger.error("Schema `%s` read error:\n%s",
+ args.schema, e, exc_info=(True if args.debug else False))
+ return 1
# Optionally print the schema after ref resolution
if not args.document and args.print_pre:
@@ -121,7 +133,8 @@ def main(argsl=None): # type: (List[str]) -> int
# Validate the schema document against the metaschema
try:
schema.validate_doc(metaschema_names, schema_doc,
- metaschema_loader, args.strict)
+ metaschema_loader, args.strict,
+ source=schema_metadata["name"])
except validate.ValidationException as e:
_logger.error("While validating schema `%s`:\n%s" %
(args.schema, str(e)))
@@ -149,8 +162,8 @@ def main(argsl=None): # type: (List[str]) -> int
if isinstance(avsc_names, Exception):
_logger.error("Schema `%s` error:\n%s", args.schema,
- avsc_names, exc_info=((type(avsc_names), avsc_names,
- None) if args.debug else None))
+ avsc_names, exc_info=((type(avsc_names), avsc_names,
+ None) if args.debug else None))
if args.print_avro:
print(json.dumps(avsc_obj, indent=4))
return 1
@@ -188,7 +201,7 @@ def main(argsl=None): # type: (List[str]) -> int
document, doc_metadata = document_loader.resolve_ref(uri)
except (validate.ValidationException, RuntimeError) as e:
_logger.error("Document `%s` failed validation:\n%s",
- args.document, e, exc_info=args.debug)
+ args.document, strip_dup_lineno(unicode(e)), exc_info=args.debug)
return 1
# Optionally print the document after ref resolution
diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py
index 91be285..0325ad8 100644
--- a/schema_salad/makedoc.py
+++ b/schema_salad/makedoc.py
@@ -220,8 +220,13 @@ class RenderType(object):
("docAfter" not in f))):
self.render_type(f, 1)
- def typefmt(self, tp, redirects, nbsp=False, jsonldPredicate=None):
- # type: (Any, Dict[str, str], bool, Dict[str, str]) -> Union[str, unicode]
+ def typefmt(self,
+ tp, # type: Any
+ redirects, # type: Dict[str, str]
+ nbsp=False, # type: bool
+ jsonldPredicate=None # type: Dict[str, str]
+ ):
+ # type: (...) -> Union[str, unicode]
global primitiveType
if isinstance(tp, list):
if nbsp and len(tp) <= 3:
@@ -230,16 +235,20 @@ class RenderType(object):
return " | ".join([self.typefmt(n, redirects) for n in tp])
if isinstance(tp, dict):
if tp["type"] == "https://w3id.org/cwl/salad#array":
- ar = "array<%s>" % (self.typefmt(tp["items"], redirects, nbsp=True))
+ ar = "array<%s>" % (self.typefmt(
+ tp["items"], redirects, nbsp=True))
if jsonldPredicate and "mapSubject" in jsonldPredicate:
if "mapPredicate" in jsonldPredicate:
ar += " | map<%s.%s, %s.%s>" % (self.typefmt(tp["items"], redirects),
- jsonldPredicate["mapSubject"],
- self.typefmt(tp["items"], redirects),
- jsonldPredicate["mapPredicate"])
+ jsonldPredicate[
+ "mapSubject"],
+ self.typefmt(
+ tp["items"], redirects),
+ jsonldPredicate["mapPredicate"])
ar += " | map<%s.%s, %s>" % (self.typefmt(tp["items"], redirects),
- jsonldPredicate["mapSubject"],
- self.typefmt(tp["items"], redirects))
+ jsonldPredicate[
+ "mapSubject"],
+ self.typefmt(tp["items"], redirects))
return ar
if tp["type"] in ("https://w3id.org/cwl/salad#record", "https://w3id.org/cwl/salad#enum"):
frg = schema.avro_name(tp["name"])
@@ -414,6 +423,7 @@ def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink):
<html>
<head>
<meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
""")
diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py
index e37e840..ed25f0e 100644
--- a/schema_salad/ref_resolver.py
+++ b/schema_salad/ref_resolver.py
@@ -4,36 +4,57 @@ import json
import hashlib
import logging
import collections
+import urllib
import urlparse
import re
import copy
-import pprint
+import urllib
from StringIO import StringIO
from . import validate
from .aslist import aslist
from .flatten import flatten
+from .sourceline import SourceLine, add_lc_filename, relname
import requests
from cachecontrol.wrapper import CacheControl
from cachecontrol.caches import FileCache
import ruamel.yaml as yaml
-
-try:
- from ruamel.yaml import CSafeLoader as SafeLoader
-except ImportError:
- from ruamel.yaml import SafeLoader # type: ignore
+from ruamel.yaml.comments import CommentedSeq, CommentedMap
import rdflib
+from rdflib import Graph
from rdflib.namespace import RDF, RDFS, OWL
from rdflib.plugins.parsers.notation3 import BadSyntax
import xml.sax
from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple,
- TypeVar, Union)
+ TypeVar, Union)
_logger = logging.getLogger("salad")
-
-class NormDict(dict):
+ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]]
+DocumentType = TypeVar('DocumentType', CommentedSeq, CommentedMap)
+DocumentOrStrType = TypeVar(
+ 'DocumentOrStrType', CommentedSeq, CommentedMap, unicode)
+
+def file_uri(path): # type: (str) -> str
+ if path.startswith("file://"):
+ return path
+ pathsp = path.split("#", 2)
+ frag = "#" + urllib.quote(str(pathsp[1])) if len(pathsp) == 2 else ""
+ urlpath = urllib.pathname2url(str(pathsp[0]))
+ if urlpath.startswith("//"):
+ return "file:%s%s" % (urlpath, frag)
+ else:
+ return "file://%s%s" % (urlpath, frag)
+
+def uri_file_path(url): # type: (str) -> str
+ split = urlparse.urlsplit(url)
+ if split.scheme == "file":
+ return urllib.url2pathname(str(split.path)) + ("#" + urllib.unquote(str(split.fragment)) if split.fragment else "")
+ else:
+ raise ValueError("Not a file URI")
+
+class NormDict(CommentedMap):
def __init__(self, normalize=unicode): # type: (type) -> None
super(NormDict, self).__init__()
@@ -105,7 +126,7 @@ class DefaultFetcher(Fetcher):
return resp.text
elif scheme == 'file':
try:
- with open(path) as fp:
+ with open(urllib.url2pathname(str(path))) as fp:
read = fp.read()
if hasattr(read, "decode"):
return read.decode("utf-8")
@@ -134,7 +155,7 @@ class DefaultFetcher(Fetcher):
return False
return True
elif scheme == 'file':
- return os.path.exists(path)
+ return os.path.exists(urllib.url2pathname(str(path)))
else:
raise ValueError('Unsupported scheme in url: %s' % url)
@@ -142,15 +163,11 @@ class DefaultFetcher(Fetcher):
return urlparse.urljoin(base_url, url)
class Loader(object):
-
- ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]]
- DocumentType = Union[List, Dict[unicode, Any]]
-
def __init__(self,
ctx, # type: ContextType
schemagraph=None, # type: rdflib.graph.Graph
foreign_properties=None, # type: Set[unicode]
- idx=None, # type: Dict[unicode, Union[dict, list, unicode]]
+ idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
cache=None, # type: Dict[unicode, Any]
session=None, # type: requests.sessions.Session
fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
@@ -158,22 +175,26 @@ class Loader(object):
# type: (...) -> None
normalize = lambda url: urlparse.urlsplit(url).geturl()
+ self.idx = None # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
if idx is not None:
self.idx = idx
else:
self.idx = NormDict(normalize)
- self.ctx = {} # type: Loader.ContextType
+ self.ctx = {} # type: ContextType
+ self.graph = None # type: Graph
if schemagraph is not None:
self.graph = schemagraph
else:
self.graph = rdflib.graph.Graph()
+ self.foreign_properties = None # type: Set[unicode]
if foreign_properties is not None:
self.foreign_properties = foreign_properties
else:
self.foreign_properties = set()
+ self.cache = None # type: Dict[unicode, Any]
if cache is not None:
self.cache = cache
else:
@@ -194,23 +215,29 @@ class Loader(object):
self.fetch_text = self.fetcher.fetch_text
self.check_exists = self.fetcher.check_exists
- self.url_fields = None # type: Set[unicode]
- self.scoped_ref_fields = None # type: Dict[unicode, int]
- self.vocab_fields = None # type: Set[unicode]
- self.identifiers = None # type: Set[unicode]
- self.identity_links = None # type: Set[unicode]
- self.standalone = None # type: Set[unicode]
- self.nolinkcheck = None # type: Set[unicode]
- self.vocab = {} # type: Dict[unicode, unicode]
- self.rvocab = {} # type: Dict[unicode, unicode]
- self.idmap = None # type: Dict[unicode, Any]
- self.mapPredicate = None # type: Dict[unicode, unicode]
- self.type_dsl_fields = None # type: Set[unicode]
+ self.url_fields = None # type: Set[unicode]
+ self.scoped_ref_fields = None # type: Dict[unicode, int]
+ self.vocab_fields = None # type: Set[unicode]
+ self.identifiers = None # type: Set[unicode]
+ self.identity_links = None # type: Set[unicode]
+ self.standalone = None # type: Set[unicode]
+ self.nolinkcheck = None # type: Set[unicode]
+ self.vocab = {} # type: Dict[unicode, unicode]
+ self.rvocab = {} # type: Dict[unicode, unicode]
+ self.idmap = None # type: Dict[unicode, Any]
+ self.mapPredicate = None # type: Dict[unicode, unicode]
+ self.type_dsl_fields = None # type: Set[unicode]
self.add_context(ctx)
- def expand_url(self, url, base_url, scoped_id=False, vocab_term=False, scoped_ref=None):
- # type: (unicode, unicode, bool, bool, int) -> unicode
+ def expand_url(self,
+ url, # type: unicode
+ base_url, # type: unicode
+ scoped_id=False, # type: bool
+ vocab_term=False, # type: bool
+ scoped_ref=None # type: int
+ ):
+ # type: (...) -> unicode
if url in (u"@id", u"@type"):
return url
@@ -294,7 +321,7 @@ class Loader(object):
self.idx[unicode(s)] = None
def add_context(self, newcontext, baseuri=""):
- # type: (Loader.ContextType, unicode) -> None
+ # type: (ContextType, unicode) -> None
if self.vocab:
raise validate.ValidationException(
"Refreshing context that already has stuff in it")
@@ -356,33 +383,48 @@ class Loader(object):
_logger.debug("vocab_fields is %s", self.vocab_fields)
_logger.debug("vocab is %s", self.vocab)
- def resolve_ref(self, ref, base_url=None, checklinks=True):
- # type: (Union[Dict[unicode, Any], unicode], unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
- base_url = base_url or u'file://%s/' % os.path.abspath('.')
+ def resolve_ref(self,
+ ref, # type: Union[CommentedMap, CommentedSeq, unicode]
+ base_url=None, # type: unicode
+ checklinks=True # type: bool
+ ):
+ # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]]
- obj = None # type: Dict[unicode, Any]
+ obj = None # type: CommentedMap
+ resolved_obj = None # type: Union[CommentedMap, CommentedSeq, unicode]
inc = False
- mixin = None
+ mixin = None # type: Dict[unicode, Any]
+ if not base_url:
+ base_url = file_uri(os.getcwd()) + "/"
+
+ if isinstance(ref, (str, unicode)) and os.sep == "\\":
+ # Convert Windows path separator in ref
+ ref = ref.replace("\\", "/")
+
+ sl = SourceLine(obj, None, ValueError)
# If `ref` is a dict, look for special directives.
- if isinstance(ref, dict):
+ if isinstance(ref, CommentedMap):
obj = ref
- if u"$import" in obj:
+ if "$import" in obj:
+ sl = SourceLine(obj, "$import", RuntimeError)
if len(obj) == 1:
ref = obj[u"$import"]
obj = None
else:
- raise ValueError(
- u"'$import' must be the only field in %s" % (str(obj)))
- elif u"$include" in obj:
+ raise sl.makeError(
+ u"'$import' must be the only field in %s" % (unicode(obj)))
+ elif "$include" in obj:
+ sl = SourceLine(obj, "$include", RuntimeError)
if len(obj) == 1:
ref = obj[u"$include"]
inc = True
obj = None
else:
- raise ValueError(
- u"'$include' must be the only field in %s" % (str(obj)))
- elif u"$mixin" in obj:
+ raise sl.makeError(
+ u"'$include' must be the only field in %s" % (unicode(obj)))
+ elif "$mixin" in obj:
+ sl = SourceLine(obj, "$mixin", RuntimeError)
ref = obj[u"$mixin"]
mixin = obj
obj = None
@@ -393,37 +435,38 @@ class Loader(object):
ref = obj[identifier]
break
if not ref:
- raise ValueError(
- u"Object `%s` does not have identifier field in %s" % (obj, self.identifiers))
+ raise sl.makeError(
+ u"Object `%s` does not have identifier field in %s" % (relname(obj), self.identifiers))
if not isinstance(ref, (str, unicode)):
- raise ValueError(u"Must be string: `%s`" % str(ref))
+ raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" % (type(ref), unicode(ref)))
url = self.expand_url(ref, base_url, scoped_id=(obj is not None))
-
# Has this reference been loaded already?
if url in self.idx and (not mixin):
return self.idx[url], {}
- # "$include" directive means load raw text
- if inc:
- return self.fetch_text(url), {}
+ sl.raise_type = RuntimeError
+ with sl:
+ # "$include" directive means load raw text
+ if inc:
+ return self.fetch_text(url), {}
- doc = None
- if obj:
- for identifier in self.identifiers:
- obj[identifier] = url
- doc_url = url
- else:
- # Load structured document
- doc_url, frg = urlparse.urldefrag(url)
- if doc_url in self.idx and (not mixin):
- # If the base document is in the index, it was already loaded,
- # so if we didn't find the reference earlier then it must not
- # exist.
- raise validate.ValidationException(
- u"Reference `#%s` not found in file `%s`." % (frg, doc_url))
- doc = self.fetch(doc_url, inject_ids=(not mixin))
+ doc = None
+ if obj:
+ for identifier in self.identifiers:
+ obj[identifier] = url
+ doc_url = url
+ else:
+ # Load structured document
+ doc_url, frg = urlparse.urldefrag(url)
+ if doc_url in self.idx and (not mixin):
+ # If the base document is in the index, it was already loaded,
+ # so if we didn't find the reference earlier then it must not
+ # exist.
+ raise validate.ValidationException(
+ u"Reference `#%s` not found in file `%s`." % (frg, doc_url))
+ doc = self.fetch(doc_url, inject_ids=(not mixin))
# Recursively expand urls and resolve directives
if mixin:
@@ -443,10 +486,11 @@ class Loader(object):
if url in self.idx:
resolved_obj = self.idx[url]
else:
- raise RuntimeError("Reference `%s` is not in the index. "
- "Index contains:\n %s" % (url, "\n ".join(self.idx)))
+ raise RuntimeError(
+ "Reference `%s` is not in the index. Index contains:\n %s"
+ % (url, "\n ".join(self.idx)))
- if isinstance(resolved_obj, (dict)):
+ if isinstance(resolved_obj, CommentedMap):
if u"$graph" in resolved_obj:
metadata = _copy_dict_without_key(resolved_obj, u"$graph")
return resolved_obj[u"$graph"], metadata
@@ -455,9 +499,11 @@ class Loader(object):
else:
return resolved_obj, metadata
-
- def _resolve_idmap(self, document, loader):
- # type: (Dict[unicode, Union[Dict[unicode, Dict[unicode, unicode]], List[Dict[unicode, Any]]]], Loader) -> None
+ def _resolve_idmap(self,
+ document, # type: CommentedMap
+ loader # type: Loader
+ ):
+ # type: (...) -> None
# Convert fields with mapSubject into lists
# use mapPredicate if the mapped value isn't a dict.
for idmapField in loader.idmap:
@@ -466,27 +512,46 @@ class Loader(object):
if (isinstance(idmapFieldValue, dict)
and "$import" not in idmapFieldValue
and "$include" not in idmapFieldValue):
- ls = []
+ ls = CommentedSeq()
for k in sorted(idmapFieldValue.keys()):
val = idmapFieldValue[k]
- v = None # type: Dict[unicode, Any]
- if not isinstance(val, dict):
+ v = None # type: CommentedMap
+ if not isinstance(val, CommentedMap):
if idmapField in loader.mapPredicate:
- v = {loader.mapPredicate[idmapField]: val}
+ v = CommentedMap(
+ ((loader.mapPredicate[idmapField], val),))
+ v.lc.add_kv_line_col(
+ loader.mapPredicate[idmapField],
+ document[idmapField].lc.data[k])
+ v.lc.filename = document.lc.filename
else:
raise validate.ValidationException(
"mapSubject '%s' value '%s' is not a dict"
"and does not have a mapPredicate", k, v)
else:
v = val
+
v[loader.idmap[idmapField]] = k
+ v.lc.add_kv_line_col(loader.idmap[idmapField],
+ document[idmapField].lc.data[k])
+ v.lc.filename = document.lc.filename
+
+ ls.lc.add_kv_line_col(
+ len(ls), document[idmapField].lc.data[k])
+
+ ls.lc.filename = document.lc.filename
ls.append(v)
+
document[idmapField] = ls
typeDSLregex = re.compile(ur"^([^[?]+)(\[\])?(\?)?$")
- def _type_dsl(self, t):
- # type: (Union[unicode, Dict, List]) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]]
+ def _type_dsl(self,
+ t, # type: Union[unicode, Dict, List]
+ lc,
+ filename):
+ # type: (...) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]]
+
if not isinstance(t, (str, unicode)):
return t
@@ -496,34 +561,59 @@ class Loader(object):
first = m.group(1)
second = third = None
if m.group(2):
- second = {u"type": u"array",
- u"items": first}
+ second = CommentedMap((("type", "array"),
+ ("items", first)))
+ second.lc.add_kv_line_col("type", lc)
+ second.lc.add_kv_line_col("items", lc)
+ second.lc.filename = filename
if m.group(3):
- third = [u"null", second or first]
+ third = CommentedSeq([u"null", second or first])
+ third.lc.add_kv_line_col(0, lc)
+ third.lc.add_kv_line_col(1, lc)
+ third.lc.filename = filename
return third or second or first
- def _resolve_type_dsl(self, document, loader):
- # type: (Dict[unicode, Union[unicode, Dict[unicode, unicode], List]], Loader) -> None
+ def _resolve_type_dsl(self,
+ document, # type: CommentedMap
+ loader # type: Loader
+ ):
+ # type: (...) -> None
for d in loader.type_dsl_fields:
if d in document:
- datum = document[d]
+ datum2 = datum = document[d]
if isinstance(datum, (str, unicode)):
- document[d] = self._type_dsl(datum)
- elif isinstance(datum, list):
- document[d] = [self._type_dsl(t) for t in datum]
- datum2 = document[d]
- if isinstance(datum2, list):
- document[d] = flatten(datum2)
+ datum2 = self._type_dsl(datum, document.lc.data[
+ d], document.lc.filename)
+ elif isinstance(datum, CommentedSeq):
+ datum2 = CommentedSeq()
+ for n, t in enumerate(datum):
+ datum2.lc.add_kv_line_col(
+ len(datum2), datum.lc.data[n])
+ datum2.append(self._type_dsl(
+ t, datum.lc.data[n], document.lc.filename))
+ if isinstance(datum2, CommentedSeq):
+ datum3 = CommentedSeq()
seen = [] # type: List[unicode]
- uniq = []
- for item in document[d]:
- if item not in seen:
- uniq.append(item)
- seen.append(item)
- document[d] = uniq
+ for i, item in enumerate(datum2):
+ if isinstance(item, CommentedSeq):
+ for j, v in enumerate(item):
+ if v not in seen:
+ datum3.lc.add_kv_line_col(
+ len(datum3), item.lc.data[j])
+ datum3.append(v)
+ seen.append(v)
+ else:
+ if item not in seen:
+ datum3.lc.add_kv_line_col(
+ len(datum3), datum2.lc.data[i])
+ datum3.append(item)
+ seen.append(item)
+ document[d] = datum3
+ else:
+ document[d] = datum2
def _resolve_identifier(self, document, loader, base_url):
- # type: (Dict[unicode, unicode], Loader, unicode) -> unicode
+ # type: (CommentedMap, Loader, unicode) -> unicode
# Expand identifier field (usually 'id') to resolve scope
for identifer in loader.identifiers:
if identifer in document:
@@ -564,8 +654,12 @@ class Loader(object):
document[d2] = document[d]
del document[d]
- def _resolve_uris(self, document, loader, base_url):
- # type: (Dict[unicode, Union[unicode, List[unicode]]], Loader, unicode) -> None
+ def _resolve_uris(self,
+ document, # type: Dict[unicode, Union[unicode, List[unicode]]]
+ loader, # type: Loader
+ base_url # type: unicode
+ ):
+ # type: (...) -> None
# Resolve remaining URLs based on document base
for d in loader.url_fields:
if d in document:
@@ -576,35 +670,43 @@ class Loader(object):
vocab_term=(d in loader.vocab_fields),
scoped_ref=self.scoped_ref_fields.get(d))
elif isinstance(datum, list):
- document[d] = [
- loader.expand_url(
- url, base_url, scoped_id=False,
- vocab_term=(d in loader.vocab_fields),
- scoped_ref=self.scoped_ref_fields.get(d))
- if isinstance(url, (str, unicode))
- else url for url in datum]
-
-
- def resolve_all(self, document, base_url, file_base=None, checklinks=True):
- # type: (DocumentType, unicode, unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
+ for i, url in enumerate(datum):
+ if isinstance(url, (str, unicode)):
+ datum[i] = loader.expand_url(
+ url, base_url, scoped_id=False,
+ vocab_term=(d in loader.vocab_fields),
+ scoped_ref=self.scoped_ref_fields.get(d))
+
+
+ def resolve_all(self,
+ document, # type: Union[CommentedMap, CommentedSeq]
+ base_url, # type: unicode
+ file_base=None, # type: unicode
+ checklinks=True # type: bool
+ ):
+ # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]]
loader = self
- metadata = {} # type: Dict[unicode, Any]
+ metadata = CommentedMap() # type: CommentedMap
if file_base is None:
file_base = base_url
- if isinstance(document, dict):
+ if isinstance(document, CommentedMap):
# Handle $import and $include
if (u'$import' in document or u'$include' in document):
- return self.resolve_ref(document, base_url=file_base, checklinks=checklinks)
+ return self.resolve_ref(
+ document, base_url=file_base, checklinks=checklinks)
elif u'$mixin' in document:
- return self.resolve_ref(document, base_url=base_url, checklinks=checklinks)
- elif isinstance(document, list):
+ return self.resolve_ref(
+ document, base_url=base_url, checklinks=checklinks)
+ elif isinstance(document, CommentedSeq):
pass
+ elif isinstance(document, (list, dict)):
+ raise Exception("Expected CommentedMap or CommentedSeq, got %s: `%s`" % (type(document), document))
else:
return (document, metadata)
newctx = None # type: Loader
- if isinstance(document, dict):
+ if isinstance(document, CommentedMap):
# Handle $base, $profile, $namespaces, $schemas and $graph
if u"$base" in document:
base_url = document[u"$base"]
@@ -633,8 +735,9 @@ class Loader(object):
if u"$graph" in document:
metadata = _copy_dict_without_key(document, u"$graph")
document = document[u"$graph"]
- resolved_metadata = loader.resolve_all(metadata, base_url,
- file_base=file_base, checklinks=False)[0]
+ resolved_metadata = loader.resolve_all(
+ metadata, base_url, file_base=file_base,
+ checklinks=False)[0]
if isinstance(resolved_metadata, dict):
metadata = resolved_metadata
else:
@@ -642,7 +745,7 @@ class Loader(object):
"Validation error, metadata must be dict: %s"
% (resolved_metadata))
- if isinstance(document, dict):
+ if isinstance(document, CommentedMap):
self._normalize_fields(document, loader)
self._resolve_idmap(document, loader)
self._resolve_type_dsl(document, loader)
@@ -657,19 +760,26 @@ class Loader(object):
except validate.ValidationException as v:
_logger.warn("loader is %s", id(loader), exc_info=True)
raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (
- id(loader), file_base, key, validate.indent(str(v))))
+ id(loader), file_base, key, validate.indent(unicode(v))))
- elif isinstance(document, list):
+ elif isinstance(document, CommentedSeq):
i = 0
try:
while i < len(document):
val = document[i]
- if isinstance(val, dict) and (u"$import" in val or u"$mixin" in val):
- l, _ = loader.resolve_ref(val, base_url=file_base, checklinks=False)
- if isinstance(l, list): # never true?
+ if isinstance(val, CommentedMap) and (u"$import" in val or u"$mixin" in val):
+ l, _ = loader.resolve_ref(
+ val, base_url=file_base, checklinks=False)
+ if isinstance(l, CommentedSeq):
+ lc = document.lc.data[i]
del document[i]
- for item in aslist(l):
- document.insert(i, item)
+ llen = len(l)
+ for j in range(len(document) + llen, i + llen, -1):
+ document.lc.data[
+ j - 1] = document.lc.data[j - llen]
+ for item in l:
+ document.insert(i, item) # type: ignore
+ document.lc.data[i] = lc
i += 1
else:
document[i] = l
@@ -681,7 +791,7 @@ class Loader(object):
except validate.ValidationException as v:
_logger.warn("failed", exc_info=True)
raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (
- id(loader), file_base, i, validate.indent(str(v))))
+ id(loader), file_base, i, validate.indent(unicode(v))))
for identifer in loader.identity_links:
if identifer in metadata:
@@ -691,7 +801,7 @@ class Loader(object):
loader.idx[metadata[identifer]] = document
if checklinks:
- document = self.validate_links(document, u"")
+ self.validate_links(document, u"")
return document, metadata
@@ -704,11 +814,12 @@ class Loader(object):
textIO = StringIO(text.decode('utf-8'))
else:
textIO = StringIO(text)
- textIO.name = url # type: ignore
- result = yaml.load(textIO, Loader=SafeLoader)
+ textIO.name = url # type: ignore
+ result = yaml.round_trip_load(textIO) # type: ignore
+ add_lc_filename(result, url)
except yaml.parser.ParserError as e:
raise validate.ValidationException("Syntax error %s" % (e))
- if isinstance(result, dict) and inject_ids and self.identifiers:
+ if isinstance(result, CommentedMap) and inject_ids and self.identifiers:
for identifier in self.identifiers:
if identifier not in result:
result[identifier] = url
@@ -718,7 +829,7 @@ class Loader(object):
return result
- FieldType = TypeVar('FieldType', unicode, List[unicode], Dict[unicode, Any])
+ FieldType = TypeVar('FieldType', unicode, CommentedSeq, CommentedMap)
def validate_scoped(self, field, link, docid):
# type: (unicode, unicode, unicode) -> unicode
@@ -742,7 +853,7 @@ class Loader(object):
break
sp.pop()
raise validate.ValidationException(
- "Field `%s` contains undefined reference to `%s`, tried %s" % (field, link, tried))
+ "Field `%s` references unknown identifier `%s`, tried %s" % (field, link, ", ".join(tried)))
def validate_link(self, field, link, docid):
# type: (unicode, FieldType, unicode) -> FieldType
@@ -762,7 +873,7 @@ class Loader(object):
elif not self.check_exists(link):
raise validate.ValidationException(
"Field `%s` contains undefined reference to `%s`" % (field, link))
- elif isinstance(link, list):
+ elif isinstance(link, CommentedSeq):
errors = []
for n, i in enumerate(link):
try:
@@ -771,12 +882,12 @@ class Loader(object):
errors.append(v)
if errors:
raise validate.ValidationException(
- "\n".join([str(e) for e in errors]))
- elif isinstance(link, dict):
+ "\n".join([unicode(e) for e in errors]))
+ elif isinstance(link, CommentedMap):
self.validate_links(link, docid)
else:
- raise validate.ValidationException("Link must be a str, unicode, "
- "list, or a dict.")
+ raise validate.ValidationException(
+ "`%s` field is %s, expected string, list, or a dict." % (field, type(link).__name__))
return link
def getid(self, d): # type: (Any) -> unicode
@@ -788,59 +899,65 @@ class Loader(object):
return None
def validate_links(self, document, base_url):
- # type: (DocumentType, unicode) -> DocumentType
+ # type: (Union[CommentedMap, CommentedSeq, unicode], unicode) -> None
docid = self.getid(document)
if not docid:
docid = base_url
- errors = []
- iterator = None # type: Any
+ errors = [] # type: List[Exception]
+ iterator = None # type: Any
if isinstance(document, list):
iterator = enumerate(document)
elif isinstance(document, dict):
try:
for d in self.url_fields:
+ sl = SourceLine(document, d, validate.ValidationException)
if d in document and d not in self.identity_links:
document[d] = self.validate_link(d, document[d], docid)
except validate.ValidationException as v:
- errors.append(v)
+ errors.append(sl.makeError(unicode(v)))
if hasattr(document, "iteritems"):
iterator = document.iteritems()
else:
iterator = document.items()
else:
- return document
+ return
for key, val in iterator:
+ sl = SourceLine(document, key, validate.ValidationException)
try:
- document[key] = self.validate_links(val, docid)
+ self.validate_links(val, docid)
except validate.ValidationException as v:
if key not in self.nolinkcheck:
docid2 = self.getid(val)
if docid2:
- errors.append(validate.ValidationException(
- "While checking object `%s`\n%s" % (docid2, validate.indent(str(v)))))
+ errors.append(sl.makeError("checking object `%s`\n%s" % (
+ relname(docid2), validate.indent(unicode(v)))))
else:
if isinstance(key, basestring):
- errors.append(validate.ValidationException(
- "While checking field `%s`\n%s" % (key, validate.indent(str(v)))))
+ errors.append(sl.makeError("checking field `%s`\n%s" % (
+ key, validate.indent(unicode(v)))))
else:
- errors.append(validate.ValidationException(
- "While checking position %s\n%s" % (key, validate.indent(str(v)))))
+ errors.append(sl.makeError("checking item\n%s" % (
+ validate.indent(unicode(v)))))
if errors:
if len(errors) > 1:
raise validate.ValidationException(
- "\n".join([str(e) for e in errors]))
+ u"\n".join([unicode(e) for e in errors]))
else:
raise errors[0]
- return document
+ return
+
+D = TypeVar('D', CommentedMap, ContextType)
def _copy_dict_without_key(from_dict, filtered_key):
- # type: (Dict, Any) -> Dict
- new_dict = {}
- for key, value in from_dict.items():
- if key != filtered_key:
- new_dict[key] = value
+ # type: (D, Any) -> D
+ new_dict = copy.copy(from_dict)
+ if filtered_key in new_dict:
+ del new_dict[filtered_key] # type: ignore
+ if isinstance(from_dict, CommentedMap):
+ new_dict.lc.data = copy.copy(from_dict.lc.data)
+ new_dict.lc.filename = from_dict.lc.filename
return new_dict
diff --git a/schema_salad/schema.py b/schema_salad/schema.py
index fc7afe2..342ec46 100644
--- a/schema_salad/schema.py
+++ b/schema_salad/schema.py
@@ -5,22 +5,23 @@ import sys
import pprint
from pkg_resources import resource_stream
import ruamel.yaml as yaml
-try:
- from ruamel.yaml import CSafeLoader as SafeLoader
-except ImportError:
- from ruamel.yaml import SafeLoader # type: ignore
import avro.schema
from . import validate
import json
import urlparse
+import os
AvroSchemaFromJSONData = avro.schema.make_avsc_object
# AvroSchemaFromJSONData=avro.schema.SchemaFromJSONData
+from avro.schema import Names, SchemaParseException
from . import ref_resolver
+from .ref_resolver import Loader, DocumentType
from .flatten import flatten
import logging
from .aslist import aslist
from . import jsonld_context
+from .sourceline import SourceLine, strip_dup_lineno, add_lc_filename, bullets, relname
from typing import Any, AnyStr, cast, Dict, List, Tuple, TypeVar, Union
+from ruamel.yaml.comments import CommentedSeq, CommentedMap
_logger = logging.getLogger("salad")
@@ -48,7 +49,7 @@ salad_files = ('metaschema.yml',
def get_metaschema():
- # type: () -> Tuple[avro.schema.Names, List[Dict[unicode, Any]], ref_resolver.Loader]
+ # type: () -> Tuple[Names, List[Dict[unicode, Any]], Loader]
loader = ref_resolver.Loader({
"Any": "https://w3id.org/cwl/salad#Any",
"ArraySchema": "https://w3id.org/cwl/salad#ArraySchema",
@@ -162,8 +163,8 @@ def get_metaschema():
loader.cache["https://w3id.org/cwl/salad"] = rs.read()
rs.close()
- j = yaml.load(loader.cache["https://w3id.org/cwl/salad"],
- Loader=SafeLoader)
+ j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) # type: ignore
+ add_lc_filename(j, "metaschema.yml")
j, _ = loader.resolve_all(j, "https://w3id.org/cwl/salad#")
# pprint.pprint(j)
@@ -177,8 +178,14 @@ def get_metaschema():
return (sch_names, j, loader)
-def load_schema(schema_ref, cache=None):
- # type: (Union[unicode, Dict[unicode, Any]], Dict) -> Tuple[ref_resolver.Loader, Union[avro.schema.Names, avro.schema.SchemaParseException], Dict[unicode, Any], ref_resolver.Loader]
+def load_schema(schema_ref, # type: Union[CommentedMap, CommentedSeq, unicode]
+ cache=None # type: Dict
+ ):
+ # type: (...) -> Tuple[Loader, Union[Names, SchemaParseException], Dict[unicode, Any], Loader]
+ """Load a schema that can be used to validate documents using load_and_validate.
+
+ return document_loader, avsc_names, schema_metadata, metaschema_loader"""
+
metaschema_names, metaschema_doc, metaschema_loader = get_metaschema()
if cache is not None:
metaschema_loader.cache.update(cache)
@@ -194,7 +201,7 @@ def load_schema(schema_ref, cache=None):
schema_doc, metactx)
# Create the loader that will be used to load the target document.
- document_loader = ref_resolver.Loader(schema_ctx, cache=cache)
+ document_loader = Loader(schema_ctx, cache=cache)
# Make the Avro validation that will be used to validate the target
# document
@@ -202,19 +209,53 @@ def load_schema(schema_ref, cache=None):
return document_loader, avsc_names, schema_metadata, metaschema_loader
-def load_and_validate(document_loader, avsc_names, document, strict):
- # type: (ref_resolver.Loader, avro.schema.Names, Union[Dict[unicode, Any], unicode], bool) -> Tuple[Any, Dict[unicode, Any]]
- if isinstance(document, dict):
- data, metadata = document_loader.resolve_all(document, document["id"])
- else:
- data, metadata = document_loader.resolve_ref(document)
- validate_doc(avsc_names, data, document_loader, strict)
+def load_and_validate(document_loader, # type: Loader
+ avsc_names, # type: Names
+ document, # type: Union[CommentedMap, unicode]
+ strict # type: bool
+ ):
+ # type: (...) -> Tuple[Any, Dict[unicode, Any]]
+ """Load a document and validate it with the provided schema.
+
return data, metadata
+ """
+ try:
+ if isinstance(document, CommentedMap):
+ source = document["id"]
+ data, metadata = document_loader.resolve_all(
+ document, document["id"], checklinks=False)
+ else:
+ source = document
+ data, metadata = document_loader.resolve_ref(
+ document, checklinks=False)
+ except validate.ValidationException as v:
+ raise validate.ValidationException(strip_dup_lineno(str(v)))
+ validationErrors = u""
+ try:
+ document_loader.validate_links(data, u"")
+ except validate.ValidationException as v:
+ validationErrors = unicode(v) + "\n"
-def validate_doc(schema_names, doc, loader, strict):
- # type: (avro.schema.Names, Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode], ref_resolver.Loader, bool) -> None
+ try:
+ validate_doc(avsc_names, data, document_loader, strict, source=source)
+ except validate.ValidationException as v:
+ validationErrors += unicode(v)
+
+ if validationErrors:
+ raise validate.ValidationException(validationErrors)
+
+ return data, metadata
+
+
+def validate_doc(schema_names, # type: Names
+ doc, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode]
+ loader, # type: Loader
+ strict, # type: bool
+ source=None
+ ):
+ # type: (...) -> None
has_root = False
for r in schema_names.names.values():
if ((hasattr(r, 'get_prop') and r.get_prop(u"documentRoot")) or (
@@ -228,8 +269,10 @@ def validate_doc(schema_names, doc, loader, strict):
if isinstance(doc, list):
validate_doc = doc
- elif isinstance(doc, dict):
- validate_doc = [doc]
+ elif isinstance(doc, CommentedMap):
+ validate_doc = CommentedSeq([doc])
+ validate_doc.lc.add_kv_line_col(0, [doc.lc.line, doc.lc.col])
+ validate_doc.lc.filename = doc.lc.filename
else:
raise validate.ValidationException("Document must be dict or list")
@@ -241,10 +284,12 @@ def validate_doc(schema_names, doc, loader, strict):
anyerrors = []
for pos, item in enumerate(validate_doc):
+ sl = SourceLine(validate_doc, pos, unicode)
success = False
for r in roots:
success = validate.validate_ex(
- r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=False)
+ r, item, loader.identifiers, strict,
+ foreign_properties=loader.foreign_properties, raise_ex=False)
if success:
break
@@ -258,28 +303,33 @@ def validate_doc(schema_names, doc, loader, strict):
try:
validate.validate_ex(
- r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=True)
+ r, item, loader.identifiers, strict,
+ foreign_properties=loader.foreign_properties,
+ raise_ex=True)
except validate.ClassValidationException as e:
- errors = [u"Could not validate `%s` because\n%s" % (
- name, validate.indent(str(e), nolead=False))]
+ errors = [sl.makeError(u"tried `%s` but\n%s" % (
+ name, validate.indent(str(e), nolead=False)))]
break
except validate.ValidationException as e:
- errors.append(u"Could not validate as `%s` because\n%s" % (
- name, validate.indent(str(e), nolead=False)))
+ errors.append(sl.makeError(u"tried `%s` but\n%s" % (
+ name, validate.indent(str(e), nolead=False))))
- objerr = u"Validation error at position %i" % pos
+ objerr = sl.makeError(u"Invalid")
for ident in loader.identifiers:
if ident in item:
- objerr = u"Validation error in object %s" % (item[ident])
+ objerr = sl.makeError(
+ u"Object `%s` is not valid because"
+ % (relname(item[ident])))
break
anyerrors.append(u"%s\n%s" %
- (objerr, validate.indent(u"\n".join(errors))))
+ (objerr, validate.indent(bullets(errors, "- "))))
if anyerrors:
- raise validate.ValidationException(u"\n".join(anyerrors))
+ raise validate.ValidationException(
+ strip_dup_lineno(bullets(anyerrors, "* ")))
def replace_type(items, spec, loader, found):
- # type: (Any, Dict[unicode, Any], ref_resolver.Loader, Set[unicode]) -> Any
+ # type: (Any, Dict[unicode, Any], Loader, Set[unicode]) -> Any
""" Go through and replace types in the 'spec' mapping"""
items = copy.deepcopy(items)
@@ -331,8 +381,13 @@ def avro_name(url): # type: (AnyStr) -> AnyStr
Avro = TypeVar('Avro', Dict[unicode, Any], List[Any], unicode)
-def make_valid_avro(items, alltypes, found, union=False):
- # type: (Avro, Dict[unicode, Dict[unicode, Any]], Set[unicode], bool) -> Union[Avro, Dict]
+
+def make_valid_avro(items, # type: Avro
+ alltypes, # type: Dict[unicode, Dict[unicode, Any]]
+ found, # type: Set[unicode]
+ union=False # type: bool
+ ):
+ # type: (...) -> Union[Avro, Dict]
items = copy.deepcopy(items)
if isinstance(items, dict):
if items.get("name"):
@@ -365,13 +420,13 @@ def make_valid_avro(items, alltypes, found, union=False):
if union and isinstance(items, (str, unicode)):
if items in alltypes and avro_name(items) not in found:
return cast(Dict, make_valid_avro(alltypes[items], alltypes, found,
- union=union))
+ union=union))
items = avro_name(items)
return items
def extend_and_specialize(items, loader):
- # type: (List[Dict[unicode, Any]], ref_resolver.Loader) -> List[Dict[unicode, Any]]
+ # type: (List[Dict[unicode, Any]], Loader) -> List[Dict[unicode, Any]]
"""Apply 'extend' and 'specialize' to fully materialize derived record
types."""
@@ -443,7 +498,8 @@ def extend_and_specialize(items, loader):
for t in n:
if t.get("abstract") and t["name"] not in extended_by:
- raise validate.ValidationException("%s is abstract but missing a concrete subtype" % t["name"])
+ raise validate.ValidationException(
+ "%s is abstract but missing a concrete subtype" % t["name"])
for t in n:
if "fields" in t:
@@ -452,8 +508,10 @@ def extend_and_specialize(items, loader):
return n
-def make_avro_schema(i, loader):
- # type: (List[Dict[unicode, Any]], ref_resolver.Loader) -> Tuple[Union[avro.schema.Names,avro.schema.SchemaParseException], List[Dict[unicode, Any]]]
+def make_avro_schema(i, # type: List[Dict[unicode, Any]]
+ loader # type: Loader
+ ):
+ # type: (...) -> Tuple[Union[Names, SchemaParseException], List[Dict[unicode, Any]]]
names = avro.schema.Names()
j = extend_and_specialize(i, loader)
diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py
new file mode 100644
index 0000000..e09171c
--- /dev/null
+++ b/schema_salad/sourceline.py
@@ -0,0 +1,165 @@
+import ruamel.yaml
+from ruamel.yaml.comments import CommentedBase, CommentedMap, CommentedSeq
+import re
+import os
+
+from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple,
+ TypeVar, Union, Text)
+
+lineno_re = re.compile(u"^(.*?:[0-9]+:[0-9]+: )(( *)(.*))")
+
+def _add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, AnyStr) -> None
+ if isinstance(r, ruamel.yaml.comments.CommentedBase):
+ r.lc.filename = source
+ if isinstance(r, list):
+ for d in r:
+ _add_lc_filename(d, source)
+ elif isinstance(r, dict):
+ for d in r.itervalues():
+ _add_lc_filename(d, source)
+
+def relname(source): # type: (AnyStr) -> AnyStr
+ if source.startswith("file://"):
+ source = source[7:]
+ source = os.path.relpath(source)
+ return source
+
+def add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, AnyStr) -> None
+ _add_lc_filename(r, relname(source))
+
+def reflow(text, maxline, shift=""): # type: (AnyStr, int, AnyStr) -> AnyStr
+ if maxline < 20:
+ maxline = 20
+ if len(text) > maxline:
+ sp = text.rfind(' ', 0, maxline)
+ if sp < 1:
+ sp = text.find(' ', sp+1)
+ if sp == -1:
+ sp = len(text)
+ if sp < len(text):
+ return "%s\n%s%s" % (text[0:sp], shift, reflow(text[sp+1:], maxline, shift))
+ return text
+
+def indent(v, nolead=False, shift=u" ", bullet=u" "): # type: (Text, bool, Text, Text) -> Text
+ if nolead:
+ return v.splitlines()[0] + u"\n".join([shift + l for l in v.splitlines()[1:]])
+ else:
+ def lineno(i, l): # type: (int, Text) -> Text
+ r = lineno_re.match(l)
+ if r:
+ return r.group(1) + (bullet if i == 0 else shift) + r.group(2)
+ else:
+ return (bullet if i == 0 else shift) + l
+
+ return u"\n".join([lineno(i, l) for i, l in enumerate(v.splitlines())])
+
+def bullets(textlist, bul): # type: (List[Text], Text) -> Text
+ if len(textlist) == 1:
+ return textlist[0]
+ else:
+ return "\n".join(indent(t, bullet=bul) for t in textlist)
+
+def strip_dup_lineno(text, maxline=None): # type: (Text, int) -> Text
+ if maxline is None:
+ maxline = int(os.environ.get("COLUMNS", "100"))
+ pre = None
+ msg = []
+ for l in text.splitlines():
+ g = lineno_re.match(l)
+ if not g:
+ msg.append(l)
+ continue
+ shift = len(g.group(1)) + len(g.group(3))
+ g2 = reflow(g.group(2), maxline-shift, " " * shift)
+ if g.group(1) != pre:
+ pre = g.group(1)
+ msg.append(pre + g2)
+ else:
+ g2 = reflow(g.group(2), maxline-len(g.group(1)), " " * (len(g.group(1))+len(g.group(3))))
+ msg.append(" " * len(g.group(1)) + g2)
+ return "\n".join(msg)
+
+def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, List], List[int], unicode) -> Union[int, float, str, unicode, CommentedMap, CommentedSeq]
+ if lc is None:
+ lc = [0, 0, 0, 0]
+ if fn is None:
+ fn = "test"
+
+ if isinstance(d, CommentedMap):
+ fn = d.lc.filename if hasattr(d.lc, "filename") else fn
+ for k,v in d.iteritems():
+ if k in d.lc.data:
+ d[k] = cmap(v, lc=d.lc.data[k], fn=fn)
+ else:
+ d[k] = cmap(v, lc, fn=fn)
+ return d
+ if isinstance(d, CommentedSeq):
+ fn = d.lc.filename if hasattr(d.lc, "filename") else fn
+ for k,v in enumerate(d):
+ if k in d.lc.data:
+ d[k] = cmap(v, lc=d.lc.data[k], fn=fn)
+ else:
+ d[k] = cmap(v, lc, fn=fn)
+ return d
+ if isinstance(d, dict):
+ cm = CommentedMap()
+ for k in sorted(d.keys()):
+ v = d[k]
+ if isinstance(v, CommentedBase):
+ uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col]
+ vfn = v.lc.filename if hasattr(v.lc, "filename") else fn
+ else:
+ uselc = lc
+ vfn = fn
+ cm[k] = cmap(v, lc=uselc, fn=vfn)
+ cm.lc.add_kv_line_col(k, uselc)
+ cm.lc.filename = fn
+ return cm
+ if isinstance(d, list):
+ cs = CommentedSeq()
+ for k,v in enumerate(d):
+ if isinstance(v, CommentedBase):
+ uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col]
+ vfn = v.lc.filename if hasattr(v.lc, "filename") else fn
+ else:
+ uselc = lc
+ vfn = fn
+ cs.append(cmap(v, lc=uselc, fn=vfn))
+ cs.lc.add_kv_line_col(k, uselc)
+ cs.lc.filename = fn
+ return cs
+ else:
+ return d
+
+class SourceLine(object):
+ def __init__(self, item, key=None, raise_type=unicode): # type: (Any, Any, Callable) -> None
+ self.item = item
+ self.key = key
+ self.raise_type = raise_type
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ if not exc_value:
+ return
+ raise self.makeError(unicode(exc_value))
+
+ def makeError(self, msg): # type: (Text) -> Any
+ if not isinstance(self.item, ruamel.yaml.comments.CommentedBase):
+ return self.raise_type(msg)
+ errs = []
+ if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data:
+ lead = "%s:%i:%i:" % (self.item.lc.filename,
+ self.item.lc.line+1,
+ self.item.lc.col+1)
+ else:
+ lead = "%s:%i:%i:" % (self.item.lc.filename,
+ self.item.lc.data[self.key][0]+1,
+ self.item.lc.data[self.key][1]+1)
+ for m in msg.splitlines():
+ if lineno_re.match(m):
+ errs.append(m)
+ else:
+ errs.append("%s %s" % (lead, m))
+ return self.raise_type("\n".join(errs))
diff --git a/schema_salad/tests/.coverage b/schema_salad/tests/.coverage
new file mode 100644
index 0000000..b4ab5e5
--- /dev/null
+++ b/schema_salad/tests/.coverage
@@ -0,0 +1 @@
+!coverage.py: This is a private format, don't read it directly!{"lines": {"/home/peter/work/salad/schema_salad/validate.py": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 15, 19, 20, 21, 22, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 38, 39, 41, 43, 44, 48, 51, 52, 54, 56, 57, 58, 60, 63, 64, 65, 66, 72, 73, 74, 75, 79, 80, 82, 83, 91, 92, 93, 94, 100, 109, 118, 119, 127, 128, 129, 131, 132, 133, 135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 155, 157, 158, 160, [...]
\ No newline at end of file
diff --git a/schema_salad/tests/frag.yml b/schema_salad/tests/frag.yml
new file mode 100644
index 0000000..7e8818d
--- /dev/null
+++ b/schema_salad/tests/frag.yml
@@ -0,0 +1,4 @@
+- id: foo1
+ bar: b1
+- id: foo2
+ bar: b2
\ No newline at end of file
diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py
new file mode 100644
index 0000000..25a5eea
--- /dev/null
+++ b/schema_salad/tests/test_errors.py
@@ -0,0 +1,31 @@
+from .util import get_data
+import unittest
+from typing import cast
+from schema_salad.schema import load_schema, load_and_validate
+from schema_salad.validate import ValidationException
+from avro.schema import Names
+
+class TestErrors(unittest.TestCase):
+ def test_errors(self):
+ document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
+ get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))
+ avsc_names = cast(Names, avsc_names)
+
+ for t in ("test_schema/test1.cwl",
+ "test_schema/test2.cwl",
+ "test_schema/test3.cwl",
+ "test_schema/test4.cwl",
+ "test_schema/test5.cwl",
+ "test_schema/test6.cwl",
+ "test_schema/test7.cwl",
+ "test_schema/test8.cwl",
+ "test_schema/test9.cwl",
+ "test_schema/test10.cwl",
+ "test_schema/test11.cwl"):
+ with self.assertRaises(ValidationException):
+ try:
+ load_and_validate(document_loader, avsc_names,
+ unicode(get_data("tests/"+t)), True)
+ except ValidationException as e:
+ print "\n", e
+ raise
diff --git a/schema_salad/tests/test_errors.py~ b/schema_salad/tests/test_errors.py~
new file mode 100644
index 0000000..01058d8
--- /dev/null
+++ b/schema_salad/tests/test_errors.py~
@@ -0,0 +1 @@
+g
diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py
index 48462ac..6b0277c 100644
--- a/schema_salad/tests/test_examples.py
+++ b/schema_salad/tests/test_examples.py
@@ -1,41 +1,32 @@
+from .util import get_data
import unittest
import schema_salad.ref_resolver
import schema_salad.main
import schema_salad.schema
from schema_salad.jsonld_context import makerdf
-from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore
import rdflib
-import ruamel.yaml as yaml
+import ruamel.yaml
import json
import os
+from schema_salad.sourceline import cmap
try:
from ruamel.yaml import CSafeLoader as SafeLoader
except ImportError:
from ruamel.yaml import SafeLoader # type: ignore
-
-def get_data(filename):
- filepath = None
- try:
- filepath = resource_filename(
- Requirement.parse("schema-salad"), filename)
- except ResolutionError:
- pass
- if not filepath or not os.path.isfile(filepath):
- filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename)
- return filepath
+from ruamel.yaml.comments import CommentedSeq, CommentedMap
class TestSchemas(unittest.TestCase):
def test_schemas(self):
l = schema_salad.ref_resolver.Loader({})
- ra, _ = l.resolve_all({
+ ra, _ = l.resolve_all(cmap({
u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
u"$namespaces": {u"edam": u"http://edamontology.org/"},
u"edam:has_format": u"edam:format_1915"
- }, "")
+ }), "")
self.assertEqual({
u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
@@ -74,7 +65,7 @@ class TestSchemas(unittest.TestCase):
argsl=[get_data("tests/Process.yml")]))
def test_jsonld_ctx(self):
- ldr, _, _, _ = schema_salad.schema.load_schema({
+ ldr, _, _, _ = schema_salad.schema.load_schema(cmap({
"$base": "Y",
"name": "X",
"$namespaces": {
@@ -84,9 +75,9 @@ class TestSchemas(unittest.TestCase):
"name": "ExampleType",
"type": "enum",
"symbols": ["asym", "bsym"]}]
- })
+ }))
- ra, _ = ldr.resolve_all({"foo:bar": "asym"}, "X")
+ ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X")
self.assertEqual(ra, {
'http://example.com/foo#bar': 'asym'
@@ -106,7 +97,7 @@ class TestSchemas(unittest.TestCase):
},
"id": "@id"})
- ra, _ = ldr.resolve_all({
+ ra, _ = ldr.resolve_all(cmap({
"id": "stuff",
"inputs": {
"zip": 1,
@@ -116,7 +107,7 @@ class TestSchemas(unittest.TestCase):
"other": {
'n': 9
}
- }, "http://example2.com/")
+ }), "http://example2.com/")
self.assertEqual("http://example2.com/#stuff", ra["id"])
for item in ra["inputs"]:
@@ -159,7 +150,7 @@ class TestSchemas(unittest.TestCase):
},
"id": "@id"})
- ra, _ = ldr.resolve_all({
+ ra, _ = ldr.resolve_all(cmap({
"inputs": {
"inp": "string",
"inp2": "string"
@@ -188,7 +179,7 @@ class TestSchemas(unittest.TestCase):
"out": ["out"]
}
}
- }, "http://example2.com/")
+ }), "http://example2.com/")
self.assertEquals(
{'inputs': [{
@@ -234,13 +225,15 @@ class TestSchemas(unittest.TestCase):
get_data("metaschema/%s_schema.yml" % a))
with open(get_data("metaschema/%s_src.yml" % a)) as src_fp:
src = ldr.resolve_all(
- yaml.load(src_fp, Loader=SafeLoader), "", checklinks=False)[0]
+ ruamel.yaml.round_trip_load(src_fp), "",
+ checklinks=False)[0]
with open(get_data("metaschema/%s_proc.yml" % a)) as src_proc:
- proc = yaml.load(src_proc, Loader=SafeLoader)
+ proc = ruamel.yaml.safe_load(src_proc)
self.assertEqual(proc, src)
def test_yaml_float_test(self):
- self.assertEqual(yaml.load("float-test: 2e-10")["float-test"], 2e-10)
+ self.assertEqual(ruamel.yaml.safe_load("float-test: 2e-10")["float-test"],
+ 2e-10)
def test_typedsl_ref(self):
ldr = schema_salad.ref_resolver.Loader({})
@@ -254,16 +247,16 @@ class TestSchemas(unittest.TestCase):
}
})
- ra, _ = ldr.resolve_all({"type": "File"}, "")
+ ra, _ = ldr.resolve_all(cmap({"type": "File"}), "")
self.assertEqual({'type': 'File'}, ra)
- ra, _ = ldr.resolve_all({"type": "File?"}, "")
+ ra, _ = ldr.resolve_all(cmap({"type": "File?"}), "")
self.assertEqual({'type': ['null', 'File']}, ra)
- ra, _ = ldr.resolve_all({"type": "File[]"}, "")
+ ra, _ = ldr.resolve_all(cmap({"type": "File[]"}), "")
self.assertEqual({'type': {'items': 'File', 'type': 'array'}}, ra)
- ra, _ = ldr.resolve_all({"type": "File[]?"}, "")
+ ra, _ = ldr.resolve_all(cmap({"type": "File[]?"}), "")
self.assertEqual(
{'type': ['null', {'items': 'File', 'type': 'array'}]}, ra)
@@ -280,12 +273,12 @@ class TestSchemas(unittest.TestCase):
}
ldr.add_context(ctx)
- ra, _ = ldr.resolve_all({
+ ra, _ = ldr.resolve_all(cmap({
"id": "foo",
"bar": {
"id": "baz"
}
- }, "http://example.com")
+ }), "http://example.com")
self.assertEqual({'id': 'http://example.com/#foo',
'bar': {
'id': 'http://example.com/#foo/baz'},
@@ -294,12 +287,12 @@ class TestSchemas(unittest.TestCase):
g = makerdf(None, ra, ctx)
print(g.serialize(format="n3"))
- ra, _ = ldr.resolve_all({
+ ra, _ = ldr.resolve_all(cmap({
"location": "foo",
"bar": {
"location": "baz"
}
- }, "http://example.com", checklinks=False)
+ }), "http://example.com", checklinks=False)
self.assertEqual({'location': 'http://example.com/foo',
'bar': {
'location': 'http://example.com/baz'},
@@ -308,12 +301,12 @@ class TestSchemas(unittest.TestCase):
g = makerdf(None, ra, ctx)
print(g.serialize(format="n3"))
- ra, _ = ldr.resolve_all({
+ ra, _ = ldr.resolve_all(cmap({
"id": "foo",
"bar": {
"location": "baz"
}
- }, "http://example.com", checklinks=False)
+ }), "http://example.com", checklinks=False)
self.assertEqual({'id': 'http://example.com/#foo',
'bar': {
'location': 'http://example.com/baz'},
@@ -322,12 +315,12 @@ class TestSchemas(unittest.TestCase):
g = makerdf(None, ra, ctx)
print(g.serialize(format="n3"))
- ra, _ = ldr.resolve_all({
+ ra, _ = ldr.resolve_all(cmap({
"location": "foo",
"bar": {
"id": "baz"
}
- }, "http://example.com", checklinks=False)
+ }), "http://example.com", checklinks=False)
self.assertEqual({'location': 'http://example.com/foo',
'bar': {
'id': 'http://example.com/#baz'},
@@ -337,20 +330,20 @@ class TestSchemas(unittest.TestCase):
print(g.serialize(format="n3"))
def test_mixin(self):
+ base_url = "file://" + os.getcwd() + "/tests/"
ldr = schema_salad.ref_resolver.Loader({})
- ra = ldr.resolve_ref({"$mixin": get_data("tests/mixin.yml"), "one": "five"},
- base_url="file://" + os.getcwd() + "/tests/")
+ ra = ldr.resolve_ref(cmap({"$mixin": get_data("tests/mixin.yml"), "one": "five"}),
+ base_url=base_url)
self.assertEqual({'id': 'four', 'one': 'five'}, ra[0])
-
ldr = schema_salad.ref_resolver.Loader({"id": "@id"})
- base_url = "file://" + os.getcwd() + "/tests/"
- ra = ldr.resolve_all([{
+
+ ra = ldr.resolve_all(cmap([{
"id": "a",
"m": {"$mixin": get_data("tests/mixin.yml")}
}, {
"id": "b",
"m": {"$mixin": get_data("tests/mixin.yml")}
- }], base_url=base_url)
+ }]), base_url=base_url)
self.assertEqual([{
'id': base_url + '#a',
'm': {
@@ -364,6 +357,19 @@ class TestSchemas(unittest.TestCase):
'one': 'two'}
}], ra[0])
+ def test_fragment(self):
+ ldr = schema_salad.ref_resolver.Loader({"id": "@id"})
+ b, _ = ldr.resolve_ref(get_data("tests/frag.yml#foo2"))
+ self.assertEquals({"id": b["id"], "bar":"b2"}, b)
+
+ def test_file_uri(self):
+ # Note: this test probably won't pass on Windows. Someone with a
+ # windows box should add an alternate test.
+ self.assertEquals("file:///foo/bar%20baz/quux", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux"))
+ self.assertEquals("/foo/bar baz/quux", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux"))
+ self.assertEquals("file:///foo/bar%20baz/quux#zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong"))
+ self.assertEquals("/foo/bar baz/quux#zing zong", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux#zing%20zong"))
+
if __name__ == '__main__':
unittest.main()
diff --git a/schema_salad/tests/test_fetch.py~ b/schema_salad/tests/test_fetch.py~
new file mode 100644
index 0000000..422d945
--- /dev/null
+++ b/schema_salad/tests/test_fetch.py~
@@ -0,0 +1,13 @@
+import unittest
+import schema_salad.ref_resolver
+import schema_salad.main
+import schema_salad.schema
+from schema_salad.jsonld_context import makerdf
+from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore
+import rdflib
+import ruamel.yaml as yaml
+import json
+import os
+
+class TestFetcher(unittest.TestCase):
+ def test_schemas(self):
diff --git a/schema_salad/tests/test_schema/CommandLineTool.yml b/schema_salad/tests/test_schema/CommandLineTool.yml
new file mode 100644
index 0000000..181c51c
--- /dev/null
+++ b/schema_salad/tests/test_schema/CommandLineTool.yml
@@ -0,0 +1,894 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+
+$graph:
+
+- name: CommandLineToolDoc
+ type: documentation
+ doc:
+ - |
+ # Common Workflow Language (CWL) Command Line Tool Description, v1.0
+
+ This version:
+ * https://w3id.org/cwl/v1.0/
+
+ Current version:
+ * https://w3id.org/cwl/
+ - "\n\n"
+ - {$include: contrib.md}
+ - "\n\n"
+ - |
+ # Abstract
+
+ A Command Line Tool is a non-interactive executable program that reads
+ some input, performs a computation, and terminates after producing some
+ output. Command line programs are a flexible unit of code sharing and
+ reuse, unfortunately the syntax and input/output semantics among command
+ line programs is extremely heterogeneous. A common layer for describing
+ the syntax and semantics of programs can reduce this incidental
+ complexity by providing a consistent way to connect programs together.
+ This specification defines the Common Workflow Language (CWL) Command
+ Line Tool Description, a vendor-neutral standard for describing the
+ syntax and input/output semantics of command line programs.
+
+ - {$include: intro.md}
+
+ - |
+ ## Introduction to v1.0
+
+ This specification represents the first full release from the CWL group.
+ Since draft-3, version 1.0 introduces the following changes and additions:
+
+ * The [Directory](#Directory) type.
+ * Syntax simplifcations: denoted by the `map<>` syntax. Example: inputs
+ contains a list of items, each with an id. Now one can specify
+ a mapping of that identifier to the corresponding
+ `CommandInputParamater`.
+ ```
+ inputs:
+ - id: one
+ type: string
+ doc: First input parameter
+ - id: two
+ type: int
+ doc: Second input parameter
+ ```
+ can be
+ ```
+ inputs:
+ one:
+ type: string
+ doc: First input parameter
+ two:
+ type: int
+ doc: Second input parameter
+ ```
+ * [InitialWorkDirRequirement](#InitialWorkDirRequirement): list of
+ files and subdirectories to be present in the output directory prior
+ to execution.
+ * Shortcuts for specifying the standard [output](#stdout) and/or
+ [error](#stderr) streams as a (streamable) File output.
+ * [SoftwareRequirement](#SoftwareRequirement) for describing software
+ dependencies of a tool.
+ * The common `description` field has been renamed to `doc`.
+
+ ## Errata
+
+ Post v1.0 release changes to the spec.
+
+ * 13 July 2016: Mark `baseCommand` as optional and update descriptive text.
+
+ ## Purpose
+
+ Standalone programs are a flexible and interoperable form of code reuse.
+ Unlike monolithic applications, applications and analysis workflows which
+ are composed of multiple separate programs can be written in multiple
+ languages and execute concurrently on multiple hosts. However, POSIX
+ does not dictate computer-readable grammar or semantics for program input
+ and output, resulting in extremely heterogeneous command line grammar and
+ input/output semantics among program. This is a particular problem in
+ distributed computing (multi-node compute clusters) and virtualized
+ environments (such as Docker containers) where it is often necessary to
+ provision resources such as input files before executing the program.
+
+ Often this gap is filled by hard coding program invocation and
+ implicitly assuming requirements will be met, or abstracting program
+ invocation with wrapper scripts or descriptor documents. Unfortunately,
+ where these approaches are application or platform specific it creates a
+ significant barrier to reproducibility and portability, as methods
+ developed for one platform must be manually ported to be used on new
+ platforms. Similarly it creates redundant work, as wrappers for popular
+ tools must be rewritten for each application or platform in use.
+
+ The Common Workflow Language Command Line Tool Description is designed to
+ provide a common standard description of grammar and semantics for
+ invoking programs used in data-intensive fields such as Bioinformatics,
+ Chemistry, Physics, Astronomy, and Statistics. This specification
+ defines a precise data and execution model for Command Line Tools that
+ can be implemented on a variety of computing platforms, ranging from a
+ single workstation to cluster, grid, cloud, and high performance
+ computing platforms.
+
+ - {$include: concepts.md}
+ - {$include: invocation.md}
+
+
+- type: record
+ name: EnvironmentDef
+ doc: |
+ Define an environment variable that will be set in the runtime environment
+ by the workflow platform when executing the command line tool. May be the
+ result of executing an expression, such as getting a parameter from input.
+ fields:
+ - name: envName
+ type: string
+ doc: The environment variable name
+ - name: envValue
+ type: [string, Expression]
+ doc: The environment variable value
+
+- type: record
+ name: CommandLineBinding
+ extends: InputBinding
+ doc: |
+
+ When listed under `inputBinding` in the input schema, the term
+ "value" refers to the the corresponding value in the input object. For
+ binding objects listed in `CommandLineTool.arguments`, the term "value"
+ refers to the effective value after evaluating `valueFrom`.
+
+ The binding behavior when building the command line depends on the data
+ type of the value. If there is a mismatch between the type described by
+ the input schema and the effective value, such as resulting from an
+ expression evaluation, an implementation must use the data type of the
+ effective value.
+
+ - **string**: Add `prefix` and the string to the command line.
+
+ - **number**: Add `prefix` and decimal representation to command line.
+
+ - **boolean**: If true, add `prefix` to the command line. If false, add
+ nothing.
+
+ - **File**: Add `prefix` and the value of
+ [`File.path`](#File) to the command line.
+
+ - **array**: If `itemSeparator` is specified, add `prefix` and the join
+ the array into a single string with `itemSeparator` separating the
+ items. Otherwise first add `prefix`, then recursively process
+ individual elements.
+
+ - **object**: Add `prefix` only, and recursively add object fields for
+ which `inputBinding` is specified.
+
+ - **null**: Add nothing.
+
+ fields:
+ - name: position
+ type: int?
+ doc: "The sorting key. Default position is 0."
+ - name: prefix
+ type: string?
+ doc: "Command line prefix to add before the value."
+ - name: separate
+ type: boolean?
+ doc: |
+ If true (default), then the prefix and value must be added as separate
+ command line arguments; if false, prefix and value must be concatenated
+ into a single command line argument.
+ - name: itemSeparator
+ type: string?
+ doc: |
+ Join the array elements into a single string with the elements
+ separated by by `itemSeparator`.
+ - name: valueFrom
+ type:
+ - "null"
+ - string
+ - Expression
+ jsonldPredicate: "cwl:valueFrom"
+ doc: |
+ If `valueFrom` is a constant string value, use this as the value and
+ apply the binding rules above.
+
+ If `valueFrom` is an expression, evaluate the expression to yield the
+ actual value to use to build the command line and apply the binding
+ rules above. If the inputBinding is associated with an input
+ parameter, the value of `self` in the expression will be the value of the
+ input parameter.
+
+ When a binding is part of the `CommandLineTool.arguments` field,
+ the `valueFrom` field is required.
+ - name: shellQuote
+ type: boolean?
+ doc: |
+ If `ShellCommandRequirement` is in the requirements for the current command,
+ this controls whether the value is quoted on the command line (default is true).
+ Use `shellQuote: false` to inject metacharacters for operations such as pipes.
+
+- type: record
+ name: CommandOutputBinding
+ extends: OutputBinding
+ doc: |
+ Describes how to generate an output parameter based on the files produced
+ by a CommandLineTool.
+
+ The output parameter is generated by applying these operations in
+ the following order:
+
+ - glob
+ - loadContents
+ - outputEval
+ fields:
+ - name: glob
+ type:
+ - "null"
+ - string
+ - Expression
+ - type: array
+ items: string
+ doc: |
+ Find files relative to the output directory, using POSIX glob(3)
+ pathname matching. If an array is provided, find files that match any
+ pattern in the array. If an expression is provided, the expression must
+ return a string or an array of strings, which will then be evaluated as
+ one or more glob patterns. Must only match and return files which
+ actually exist.
+ - name: loadContents
+ type:
+ - "null"
+ - boolean
+ jsonldPredicate: "cwl:loadContents"
+ doc: |
+ For each file matched in `glob`, read up to
+ the first 64 KiB of text from the file and place it in the `contents`
+ field of the file object for manipulation by `outputEval`.
+ - name: outputEval
+ type:
+ - "null"
+ - string
+ - Expression
+ doc: |
+ Evaluate an expression to generate the output value. If `glob` was
+ specified, the value of `self` must be an array containing file objects
+ that were matched. If no files were matched, `self` must be a zero
+ length array; if a single file was matched, the value of `self` is an
+ array of a single element. Additionally, if `loadContents` is `true`,
+ the File objects must include up to the first 64 KiB of file contents
+ in the `contents` field.
+
+
+- name: CommandInputRecordField
+ type: record
+ extends: InputRecordField
+ specialize:
+ - specializeFrom: InputRecordSchema
+ specializeTo: CommandInputRecordSchema
+ - specializeFrom: InputEnumSchema
+ specializeTo: CommandInputEnumSchema
+ - specializeFrom: InputArraySchema
+ specializeTo: CommandInputArraySchema
+ - specializeFrom: InputBinding
+ specializeTo: CommandLineBinding
+
+
+- name: CommandInputRecordSchema
+ type: record
+ extends: InputRecordSchema
+ specialize:
+ - specializeFrom: InputRecordField
+ specializeTo: CommandInputRecordField
+
+
+- name: CommandInputEnumSchema
+ type: record
+ extends: InputEnumSchema
+ specialize:
+ - specializeFrom: InputBinding
+ specializeTo: CommandLineBinding
+
+
+- name: CommandInputArraySchema
+ type: record
+ extends: InputArraySchema
+ specialize:
+ - specializeFrom: InputRecordSchema
+ specializeTo: CommandInputRecordSchema
+ - specializeFrom: InputEnumSchema
+ specializeTo: CommandInputEnumSchema
+ - specializeFrom: InputArraySchema
+ specializeTo: CommandInputArraySchema
+ - specializeFrom: InputBinding
+ specializeTo: CommandLineBinding
+
+
+- name: CommandOutputRecordField
+ type: record
+ extends: OutputRecordField
+ specialize:
+ - specializeFrom: OutputRecordSchema
+ specializeTo: CommandOutputRecordSchema
+ - specializeFrom: OutputEnumSchema
+ specializeTo: CommandOutputEnumSchema
+ - specializeFrom: OutputArraySchema
+ specializeTo: CommandOutputArraySchema
+ - specializeFrom: OutputBinding
+ specializeTo: CommandOutputBinding
+
+
+- name: CommandOutputRecordSchema
+ type: record
+ extends: OutputRecordSchema
+ specialize:
+ - specializeFrom: OutputRecordField
+ specializeTo: CommandOutputRecordField
+
+
+- name: CommandOutputEnumSchema
+ type: record
+ extends: OutputEnumSchema
+ specialize:
+ - specializeFrom: OutputRecordSchema
+ specializeTo: CommandOutputRecordSchema
+ - specializeFrom: OutputEnumSchema
+ specializeTo: CommandOutputEnumSchema
+ - specializeFrom: OutputArraySchema
+ specializeTo: CommandOutputArraySchema
+ - specializeFrom: OutputBinding
+ specializeTo: CommandOutputBinding
+
+
+- name: CommandOutputArraySchema
+ type: record
+ extends: OutputArraySchema
+ specialize:
+ - specializeFrom: OutputRecordSchema
+ specializeTo: CommandOutputRecordSchema
+ - specializeFrom: OutputEnumSchema
+ specializeTo: CommandOutputEnumSchema
+ - specializeFrom: OutputArraySchema
+ specializeTo: CommandOutputArraySchema
+ - specializeFrom: OutputBinding
+ specializeTo: CommandOutputBinding
+
+
+- type: record
+ name: CommandInputParameter
+ extends: InputParameter
+ doc: An input parameter for a CommandLineTool.
+ specialize:
+ - specializeFrom: InputRecordSchema
+ specializeTo: CommandInputRecordSchema
+ - specializeFrom: InputEnumSchema
+ specializeTo: CommandInputEnumSchema
+ - specializeFrom: InputArraySchema
+ specializeTo: CommandInputArraySchema
+ - specializeFrom: InputBinding
+ specializeTo: CommandLineBinding
+
+- type: record
+ name: CommandOutputParameter
+ extends: OutputParameter
+ doc: An output parameter for a CommandLineTool.
+ specialize:
+ - specializeFrom: OutputBinding
+ specializeTo: CommandOutputBinding
+ fields:
+ - name: type
+ type:
+ - "null"
+ - CWLType
+ - stdout
+ - stderr
+ - CommandOutputRecordSchema
+ - CommandOutputEnumSchema
+ - CommandOutputArraySchema
+ - string
+ - type: array
+ items:
+ - CWLType
+ - CommandOutputRecordSchema
+ - CommandOutputEnumSchema
+ - CommandOutputArraySchema
+ - string
+ jsonldPredicate:
+ "_id": "sld:type"
+ "_type": "@vocab"
+ refScope: 2
+ typeDSL: True
+ doc: |
+ Specify valid types of data that may be assigned to this parameter.
+
+- name: stdout
+ type: enum
+ symbols: [ "cwl:stdout" ]
+ docParent: "#CommandOutputParameter"
+ doc: |
+ Only valid as a `type` for a `CommandLineTool` output with no
+ `outputBinding` set.
+
+ The following
+ ```
+ outputs:
+ an_output_name:
+ type: stdout
+
+ stdout: a_stdout_file
+ ```
+ is equivalent to
+ ```
+ outputs:
+ an_output_name:
+ type: File
+ streamable: true
+ outputBinding:
+ glob: a_stdout_file
+
+ stdout: a_stdout_file
+ ```
+
+ If there is no `stdout` name provided, a random filename will be created.
+ For example, the following
+ ```
+ outputs:
+ an_output_name:
+ type: stdout
+ ```
+ is equivalent to
+ ```
+ outputs:
+ an_output_name:
+ type: File
+ streamable: true
+ outputBinding:
+ glob: random_stdout_filenameABCDEFG
+
+ stdout: random_stdout_filenameABCDEFG
+ ```
+
+
+- name: stderr
+ type: enum
+ symbols: [ "cwl:stderr" ]
+ docParent: "#CommandOutputParameter"
+ doc: |
+ Only valid as a `type` for a `CommandLineTool` output with no
+ `outputBinding` set.
+
+ The following
+ ```
+ outputs:
+ an_output_name:
+ type: stderr
+
+ stderr: a_stderr_file
+ ```
+ is equivalent to
+ ```
+ outputs:
+ an_output_name:
+ type: File
+ streamable: true
+ outputBinding:
+ glob: a_stderr_file
+
+ stderr: a_stderr_file
+ ```
+
+ If there is no `stderr` name provided, a random filename will be created.
+ For example, the following
+ ```
+ outputs:
+ an_output_name:
+ type: stderr
+ ```
+ is equivalent to
+ ```
+ outputs:
+ an_output_name:
+ type: File
+ streamable: true
+ outputBinding:
+ glob: random_stderr_filenameABCDEFG
+
+ stderr: random_stderr_filenameABCDEFG
+ ```
+
+
+- type: record
+ name: CommandLineTool
+ extends: Process
+ documentRoot: true
+ specialize:
+ - specializeFrom: InputParameter
+ specializeTo: CommandInputParameter
+ - specializeFrom: OutputParameter
+ specializeTo: CommandOutputParameter
+ doc: |
+ This defines the schema of the CWL Command Line Tool Description document.
+
+ fields:
+ - name: class
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ type: string
+ - name: baseCommand
+ doc: |
+ Specifies the program to execute. If an array, the first element of
+ the array is the command to execute, and subsequent elements are
+ mandatory command line arguments. The elements in `baseCommand` must
+ appear before any command line bindings from `inputBinding` or
+ `arguments`.
+
+ If `baseCommand` is not provided or is an empty array, the first
+ element of the command line produced after processing `inputBinding` or
+ `arguments` must be used as the program to execute.
+
+ If the program includes a path separator character it must
+ be an absolute path, otherwise it is an error. If the program does not
+ include a path separator, search the `$PATH` variable in the runtime
+ environment of the workflow runner find the absolute path of the
+ executable.
+ type:
+ - string?
+ - string[]?
+ jsonldPredicate:
+ "_id": "cwl:baseCommand"
+ "_container": "@list"
+ - name: arguments
+ doc: |
+ Command line bindings which are not directly associated with input parameters.
+ type:
+ - "null"
+ - type: array
+ items: [string, Expression, CommandLineBinding]
+ jsonldPredicate:
+ "_id": "cwl:arguments"
+ "_container": "@list"
+ - name: stdin
+ type: ["null", string, Expression]
+ doc: |
+ A path to a file whose contents must be piped into the command's
+ standard input stream.
+ - name: stderr
+ type: ["null", string, Expression]
+ jsonldPredicate: "https://w3id.org/cwl/cwl#stderr"
+ doc: |
+ Capture the command's standard error stream to a file written to
+ the designated output directory.
+
+ If `stderr` is a string, it specifies the file name to use.
+
+ If `stderr` is an expression, the expression is evaluated and must
+ return a string with the file name to use to capture stderr. If the
+ return value is not a string, or the resulting path contains illegal
+ characters (such as the path separator `/`) it is an error.
+ - name: stdout
+ type: ["null", string, Expression]
+ jsonldPredicate: "https://w3id.org/cwl/cwl#stdout"
+ doc: |
+ Capture the command's standard output stream to a file written to
+ the designated output directory.
+
+ If `stdout` is a string, it specifies the file name to use.
+
+ If `stdout` is an expression, the expression is evaluated and must
+ return a string with the file name to use to capture stdout. If the
+ return value is not a string, or the resulting path contains illegal
+ characters (such as the path separator `/`) it is an error.
+ - name: successCodes
+ type: int[]?
+ doc: |
+ Exit codes that indicate the process completed successfully.
+
+ - name: temporaryFailCodes
+ type: int[]?
+ doc: |
+ Exit codes that indicate the process failed due to a possibly
+ temporary condition, where executing the process with the same
+ runtime environment and inputs may produce different results.
+
+ - name: permanentFailCodes
+ type: int[]?
+ doc:
+ Exit codes that indicate the process failed due to a permanent logic
+ error, where executing the process with the same runtime environment and
+ same inputs is expected to always fail.
+
+
+- type: record
+ name: DockerRequirement
+ extends: ProcessRequirement
+ doc: |
+ Indicates that a workflow component should be run in a
+ [Docker](http://docker.com) container, and specifies how to fetch or build
+ the image.
+
+ If a CommandLineTool lists `DockerRequirement` under
+ `hints` (or `requirements`), it may (or must) be run in the specified Docker
+ container.
+
+ The platform must first acquire or install the correct Docker image as
+ specified by `dockerPull`, `dockerImport`, `dockerLoad` or `dockerFile`.
+
+ The platform must execute the tool in the container using `docker run` with
+ the appropriate Docker image and tool command line.
+
+ The workflow platform may provide input files and the designated output
+ directory through the use of volume bind mounts. The platform may rewrite
+ file paths in the input object to correspond to the Docker bind mounted
+ locations.
+
+ When running a tool contained in Docker, the workflow platform must not
+ assume anything about the contents of the Docker container, such as the
+ presence or absence of specific software, except to assume that the
+ generated command line represents a valid command within the runtime
+ environment of the container.
+
+ ## Interaction with other requirements
+
+ If [EnvVarRequirement](#EnvVarRequirement) is specified alongside a
+ DockerRequirement, the environment variables must be provided to Docker
+ using `--env` or `--env-file` and interact with the container's preexisting
+ environment as defined by Docker.
+
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'DockerRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ - name: dockerPull
+ type: string?
+ doc: "Specify a Docker image to retrieve using `docker pull`."
+ - name: dockerLoad
+ type: string?
+ doc: "Specify a HTTP URL from which to download a Docker image using `docker load`."
+ - name: dockerFile
+ type: string?
+ doc: "Supply the contents of a Dockerfile which will be built using `docker build`."
+ - name: dockerImport
+ type: string?
+ doc: "Provide HTTP URL to download and gunzip a Docker images using `docker import."
+ - name: dockerImageId
+ type: string?
+ doc: |
+ The image id that will be used for `docker run`. May be a
+ human-readable image name or the image identifier hash. May be skipped
+ if `dockerPull` is specified, in which case the `dockerPull` image id
+ must be used.
+ - name: dockerOutputDirectory
+ type: string?
+ doc: |
+ Set the designated output directory to a specific location inside the
+ Docker container.
+
+
+- type: record
+ name: SoftwareRequirement
+ extends: ProcessRequirement
+ doc: |
+ A list of software packages that should be configured in the environment of
+ the defined process.
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'SoftwareRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ - name: packages
+ type: SoftwarePackage[]
+ doc: "The list of software to be configured."
+ jsonldPredicate:
+ mapSubject: package
+ mapPredicate: specs
+
+- name: SoftwarePackage
+ type: record
+ fields:
+ - name: package
+ type: string
+ doc: "The common name of the software to be configured."
+ - name: version
+ type: string[]?
+ doc: "The (optional) version of the software to configured."
+ - name: specs
+ type: string[]?
+ doc: |
+ Must be one or more IRIs identifying resources for installing or
+ enabling the software. Implementations may provide resolvers which map
+ well-known software spec IRIs to some configuration action.
+
+ For example, an IRI `https://packages.debian.org/jessie/bowtie` could
+ be resolved with `apt-get install bowtie`. An IRI
+ `https://anaconda.org/bioconda/bowtie` could be resolved with `conda
+ install -c bioconda bowtie`.
+
+ Tools may also provide IRIs to index entries such as
+ [RRID](http://www.identifiers.org/rrid/), such as
+ `http://identifiers.org/rrid/RRID:SCR_005476`
+
+
+- name: Dirent
+ type: record
+ doc: |
+ Define a file or subdirectory that must be placed in the designated output
+ directory prior to executing the command line tool. May be the result of
+ executing an expression, such as building a configuration file from a
+ template.
+ fields:
+ - name: entryname
+ type: ["null", string, Expression]
+ jsonldPredicate:
+ _id: cwl:entryname
+ doc: |
+ The name of the file or subdirectory to create in the output directory.
+ If `entry` is a File or Directory, this overrides `basename`. Optional.
+ - name: entry
+ type: [string, Expression]
+ jsonldPredicate:
+ _id: cwl:entry
+ doc: |
+ If the value is a string literal or an expression which evaluates to a
+ string, a new file must be created with the string as the file contents.
+
+ If the value is an expression that evaluates to a `File` object, this
+ indicates the referenced file should be added to the designated output
+ directory prior to executing the tool.
+
+ If the value is an expression that evaluates to a `Dirent` object, this
+ indicates that the File or Directory in `entry` should be added to the
+ designated output directory with the name in `entryname`.
+
+ If `writable` is false, the file may be made available using a bind
+ mount or file system link to avoid unnecessary copying of the input
+ file.
+ - name: writable
+ type: boolean?
+ doc: |
+ If true, the file or directory must be writable by the tool. Changes
+ to the file or directory must be isolated and not visible by any other
+ CommandLineTool process. This may be implemented by making a copy of
+ the original file or directory. Default false (files and directories
+ read-only by default).
+
+
+- name: InitialWorkDirRequirement
+ type: record
+ extends: ProcessRequirement
+ doc:
+ Define a list of files and subdirectories that must be created by the
+ workflow platform in the designated output directory prior to executing the
+ command line tool.
+ fields:
+ - name: class
+ type: string
+ doc: InitialWorkDirRequirement
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ - name: listing
+ type:
+ - type: array
+ items: [File, Directory, Dirent, string, Expression]
+ - string
+ - Expression
+ jsonldPredicate:
+ _id: "cwl:listing"
+ doc: |
+ The list of files or subdirectories that must be placed in the
+ designated output directory prior to executing the command line tool.
+
+ May be an expression. If so, the expression return value must validate
+ as `{type: array, items: [File, Directory]}`.
+
+
+- name: EnvVarRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ Define a list of environment variables which will be set in the
+ execution environment of the tool. See `EnvironmentDef` for details.
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'EnvVarRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ - name: envDef
+ type: EnvironmentDef[]
+ doc: The list of environment variables.
+ jsonldPredicate:
+ mapSubject: envName
+ mapPredicate: envValue
+
+
+- type: record
+ name: ShellCommandRequirement
+ extends: ProcessRequirement
+ doc: |
+ Modify the behavior of CommandLineTool to generate a single string
+ containing a shell command line. Each item in the argument list must be
+ joined into a string separated by single spaces and quoted to prevent
+ intepretation by the shell, unless `CommandLineBinding` for that argument
+ contains `shellQuote: false`. If `shellQuote: false` is specified, the
+ argument is joined into the command string without quoting, which allows
+ the use of shell metacharacters such as `|` for pipes.
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'ShellCommandRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+
+
+- type: record
+ name: ResourceRequirement
+ extends: ProcessRequirement
+ doc: |
+ Specify basic hardware resource requirements.
+
+ "min" is the minimum amount of a resource that must be reserved to schedule
+ a job. If "min" cannot be satisfied, the job should not be run.
+
+ "max" is the maximum amount of a resource that the job shall be permitted
+ to use. If a node has sufficient resources, multiple jobs may be scheduled
+ on a single node provided each job's "max" resource requirements are
+ met. If a job attempts to exceed its "max" resource allocation, an
+ implementation may deny additional resources, which may result in job
+ failure.
+
+ If "min" is specified but "max" is not, then "max" == "min"
+ If "max" is specified by "min" is not, then "min" == "max".
+
+ It is an error if max < min.
+
+ It is an error if the value of any of these fields is negative.
+
+ If neither "min" nor "max" is specified for a resource, an implementation may provide a default.
+
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'ResourceRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ - name: coresMin
+ type: ["null", long, string, Expression]
+ doc: Minimum reserved number of CPU cores
+
+ - name: coresMax
+ type: ["null", int, string, Expression]
+ doc: Maximum reserved number of CPU cores
+
+ - name: ramMin
+ type: ["null", long, string, Expression]
+ doc: Minimum reserved RAM in mebibytes (2**20)
+
+ - name: ramMax
+ type: ["null", long, string, Expression]
+ doc: Maximum reserved RAM in mebibytes (2**20)
+
+ - name: tmpdirMin
+ type: ["null", long, string, Expression]
+ doc: Minimum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20)
+
+ - name: tmpdirMax
+ type: ["null", long, string, Expression]
+ doc: Maximum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20)
+
+ - name: outdirMin
+ type: ["null", long, string, Expression]
+ doc: Minimum reserved filesystem based storage for the designated output directory, in mebibytes (2**20)
+
+ - name: outdirMax
+ type: ["null", long, string, Expression]
+ doc: Maximum reserved filesystem based storage for the designated output directory, in mebibytes (2**20)
diff --git a/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml b/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml
new file mode 100644
index 0000000..73921e8
--- /dev/null
+++ b/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml
@@ -0,0 +1,11 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+ sld: "https://w3id.org/cwl/salad#"
+
+$graph:
+
+- $import: Process.yml
+- $import: CommandLineTool.yml
+- $import: Workflow.yml
diff --git a/schema_salad/tests/test_schema/Process.yml b/schema_salad/tests/test_schema/Process.yml
new file mode 100644
index 0000000..8b9bce5
--- /dev/null
+++ b/schema_salad/tests/test_schema/Process.yml
@@ -0,0 +1,743 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+ sld: "https://w3id.org/cwl/salad#"
+
+$graph:
+
+- name: "Common Workflow Language, v1.0"
+ type: documentation
+ doc: {$include: concepts.md}
+
+- $import: "metaschema_base.yml"
+
+- name: BaseTypesDoc
+ type: documentation
+ doc: |
+ ## Base types
+ docChild:
+ - "#CWLType"
+ - "#Process"
+
+- type: enum
+ name: CWLVersion
+ doc: "Version symbols for published CWL document versions."
+ symbols:
+ - cwl:draft-2
+ - cwl:draft-3.dev1
+ - cwl:draft-3.dev2
+ - cwl:draft-3.dev3
+ - cwl:draft-3.dev4
+ - cwl:draft-3.dev5
+ - cwl:draft-3
+ - cwl:draft-4.dev1
+ - cwl:draft-4.dev2
+ - cwl:draft-4.dev3
+ - cwl:v1.0.dev4
+ - cwl:v1.0
+
+- name: CWLType
+ type: enum
+ extends: "sld:PrimitiveType"
+ symbols:
+ - cwl:File
+ - cwl:Directory
+ doc:
+ - "Extends primitive types with the concept of a file and directory as a builtin type."
+ - "File: A File object"
+ - "Directory: A Directory object"
+
+- name: File
+ type: record
+ docParent: "#CWLType"
+ doc: |
+ Represents a file (or group of files if `secondaryFiles` is specified) that
+ must be accessible by tools using standard POSIX file system call API such as
+ open(2) and read(2).
+ fields:
+ - name: class
+ type:
+ type: enum
+ name: File_class
+ symbols:
+ - cwl:File
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ doc: Must be `File` to indicate this object describes a file.
+ - name: location
+ type: string?
+ doc: |
+ An IRI that identifies the file resource. This may be a relative
+ reference, in which case it must be resolved using the base IRI of the
+ document. The location may refer to a local or remote resource; the
+ implementation must use the IRI to retrieve file content. If an
+ implementation is unable to retrieve the file content stored at a
+ remote resource (due to unsupported protocol, access denied, or other
+ issue) it must signal an error.
+
+ If the `location` field is not provided, the `contents` field must be
+ provided. The implementation must assign a unique identifier for
+ the `location` field.
+
+ If the `path` field is provided but the `location` field is not, an
+ implementation may assign the value of the `path` field to `location`,
+ then follow the rules above.
+ jsonldPredicate:
+ _id: "@id"
+ _type: "@id"
+ - name: path
+ type: string?
+ doc: |
+ The local host path where the File is available when a CommandLineTool is
+ executed. This field must be set by the implementation. The final
+ path component must match the value of `basename`. This field
+ must not be used in any other context. The command line tool being
+ executed must be able to to access the file at `path` using the POSIX
+ `open(2)` syscall.
+
+ As a special case, if the `path` field is provided but the `location`
+ field is not, an implementation may assign the value of the `path`
+ field to `location`, and remove the `path` field.
+
+ If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02)
+ (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`,
+ `<space>`, `<tab>`, and `<newline>`) or characters
+ [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml)
+ for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452)
+ then implementations may terminate the process with a
+ `permanentFailure`.
+ jsonldPredicate:
+ "_id": "cwl:path"
+ "_type": "@id"
+ - name: basename
+ type: string?
+ doc: |
+ The base name of the file, that is, the name of the file without any
+ leading directory path. The base name must not contain a slash `/`.
+
+ If not provided, the implementation must set this field based on the
+ `location` field by taking the final path component after parsing
+ `location` as an IRI. If `basename` is provided, it is not required to
+ match the value from `location`.
+
+ When this file is made available to a CommandLineTool, it must be named
+ with `basename`, i.e. the final component of the `path` field must match
+ `basename`.
+ jsonldPredicate: "cwl:basename"
+ - name: dirname
+ type: string?
+ doc: |
+ The name of the directory containing file, that is, the path leading up
+ to the final slash in the path such that `dirname + '/' + basename ==
+ path`.
+
+ The implementation must set this field based on the value of `path`
+ prior to evaluating parameter references or expressions in a
+ CommandLineTool document. This field must not be used in any other
+ context.
+ - name: nameroot
+ type: string?
+ doc: |
+ The basename root such that `nameroot + nameext == basename`, and
+ `nameext` is empty or begins with a period and contains at most one
+ period. For the purposess of path splitting leading periods on the
+ basename are ignored; a basename of `.cshrc` will have a nameroot of
+ `.cshrc`.
+
+ The implementation must set this field automatically based on the value
+ of `basename` prior to evaluating parameter references or expressions.
+ - name: nameext
+ type: string?
+ doc: |
+ The basename extension such that `nameroot + nameext == basename`, and
+ `nameext` is empty or begins with a period and contains at most one
+ period. Leading periods on the basename are ignored; a basename of
+ `.cshrc` will have an empty `nameext`.
+
+ The implementation must set this field automatically based on the value
+ of `basename` prior to evaluating parameter references or expressions.
+ - name: checksum
+ type: string?
+ doc: |
+ Optional hash code for validating file integrity. Currently must be in the form
+ "sha1$ + hexadecimal string" using the SHA-1 algorithm.
+ - name: size
+ type: long?
+ doc: Optional file size
+ - name: "secondaryFiles"
+ type:
+ - "null"
+ - type: array
+ items: [File, Directory]
+ jsonldPredicate: "cwl:secondaryFiles"
+ doc: |
+ A list of additional files that are associated with the primary file
+ and must be transferred alongside the primary file. Examples include
+ indexes of the primary file, or external references which must be
+ included when loading primary document. A file object listed in
+ `secondaryFiles` may itself include `secondaryFiles` for which the same
+ rules apply.
+ - name: format
+ type: string?
+ jsonldPredicate:
+ _id: cwl:format
+ _type: "@id"
+ identity: true
+ doc: |
+ The format of the file: this must be an IRI of a concept node that
+ represents the file format, preferrably defined within an ontology.
+ If no ontology is available, file formats may be tested by exact match.
+
+ Reasoning about format compatability must be done by checking that an
+ input file format is the same, `owl:equivalentClass` or
+ `rdfs:subClassOf` the format required by the input parameter.
+ `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if
+ `<B> owl:equivalentClass <C>` and `<B> owl:subclassOf <A>` then infer
+ `<C> owl:subclassOf <A>`.
+
+ File format ontologies may be provided in the "$schema" metadata at the
+ root of the document. If no ontologies are specified in `$schema`, the
+ runtime may perform exact file format matches.
+ - name: contents
+ type: string?
+ doc: |
+ File contents literal. Maximum of 64 KiB.
+
+ If neither `location` nor `path` is provided, `contents` must be
+ non-null. The implementation must assign a unique identifier for the
+ `location` field. When the file is staged as input to CommandLineTool,
+ the value of `contents` must be written to a file.
+
+ If `loadContents` of `inputBinding` or `outputBinding` is true and
+ `location` is valid, the implementation must read up to the first 64
+ KiB of text from the file and place it in the "contents" field.
+
+
+- name: Directory
+ type: record
+ docAfter: "#File"
+ doc: |
+ Represents a directory to present to a command line tool.
+ fields:
+ - name: class
+ type:
+ type: enum
+ name: Directory_class
+ symbols:
+ - cwl:Directory
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ doc: Must be `Directory` to indicate this object describes a Directory.
+ - name: location
+ type: string?
+ doc: |
+ An IRI that identifies the directory resource. This may be a relative
+ reference, in which case it must be resolved using the base IRI of the
+ document. The location may refer to a local or remote resource. If
+ the `listing` field is not set, the implementation must use the
+ location IRI to retrieve directory listing. If an implementation is
+ unable to retrieve the directory listing stored at a remote resource (due to
+ unsupported protocol, access denied, or other issue) it must signal an
+ error.
+
+ If the `location` field is not provided, the `listing` field must be
+ provided. The implementation must assign a unique identifier for
+ the `location` field.
+
+ If the `path` field is provided but the `location` field is not, an
+ implementation may assign the value of the `path` field to `location`,
+ then follow the rules above.
+ jsonldPredicate:
+ _id: "@id"
+ _type: "@id"
+ - name: path
+ type: string?
+ doc: |
+ The local path where the Directory is made available prior to executing a
+ CommandLineTool. This must be set by the implementation. This field
+ must not be used in any other context. The command line tool being
+ executed must be able to to access the directory at `path` using the POSIX
+ `opendir(2)` syscall.
+
+ If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02)
+ (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`,
+ `<space>`, `<tab>`, and `<newline>`) or characters
+ [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml)
+ for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452)
+ then implementations may terminate the process with a
+ `permanentFailure`.
+ jsonldPredicate:
+ _id: "cwl:path"
+ _type: "@id"
+ - name: basename
+ type: string?
+ doc: |
+ The base name of the directory, that is, the name of the file without any
+ leading directory path. The base name must not contain a slash `/`.
+
+ If not provided, the implementation must set this field based on the
+ `location` field by taking the final path component after parsing
+ `location` as an IRI. If `basename` is provided, it is not required to
+ match the value from `location`.
+
+ When this file is made available to a CommandLineTool, it must be named
+ with `basename`, i.e. the final component of the `path` field must match
+ `basename`.
+ jsonldPredicate: "cwl:basename"
+ - name: listing
+ type:
+ - "null"
+ - type: array
+ items: [File, Directory]
+ doc: |
+ List of files or subdirectories contained in this directory. The name
+ of each file or subdirectory is determined by the `basename` field of
+ each `File` or `Directory` object. It is an error if a `File` shares a
+ `basename` with any other entry in `listing`. If two or more
+ `Directory` object share the same `basename`, this must be treated as
+ equivalent to a single subdirectory with the listings recursively
+ merged.
+ jsonldPredicate:
+ _id: "cwl:listing"
+
+- name: SchemaBase
+ type: record
+ abstract: true
+ fields:
+ - name: label
+ type:
+ - "null"
+ - string
+ jsonldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this object."
+
+
+- name: Parameter
+ type: record
+ extends: SchemaBase
+ abstract: true
+ doc: |
+ Define an input or output parameter to a process.
+
+ fields:
+ - name: secondaryFiles
+ type:
+ - "null"
+ - string
+ - Expression
+ - type: array
+ items: [string, Expression]
+ jsonldPredicate: "cwl:secondaryFiles"
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ Describes files that must be included alongside the primary file(s).
+
+ If the value is an expression, the value of `self` in the expression
+ must be the primary input or output File to which this binding applies.
+
+ If the value is a string, it specifies that the following pattern
+ should be applied to the primary file:
+
+ 1. If string begins with one or more caret `^` characters, for each
+ caret, remove the last file extension from the path (the last
+ period `.` and all following characters). If there are no file
+ extensions, the path is unchanged.
+ 2. Append the remainder of the string to the end of the file path.
+
+ - name: format
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ - Expression
+ jsonldPredicate:
+ _id: cwl:format
+ _type: "@id"
+ identity: true
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ For input parameters, this must be one or more IRIs of concept nodes
+ that represents file formats which are allowed as input to this
+ parameter, preferrably defined within an ontology. If no ontology is
+ available, file formats may be tested by exact match.
+
+ For output parameters, this is the file format that will be assigned to
+ the output parameter.
+
+ - name: streamable
+ type: boolean?
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ A value of `true` indicates that the file is read or written
+ sequentially without seeking. An implementation may use this flag to
+ indicate whether it is valid to stream file contents using a named
+ pipe. Default: `false`.
+
+ - name: doc
+ type:
+ - string?
+ - string[]?
+ doc: "A documentation string for this type, or an array of strings which should be concatenated."
+ jsonldPredicate: "rdfs:comment"
+
+
+- type: enum
+ name: Expression
+ doc: |
+ 'Expression' is not a real type. It indicates that a field must allow
+ runtime parameter references. If [InlineJavascriptRequirement](#InlineJavascriptRequirement)
+ is declared and supported by the platform, the field must also allow
+ Javascript expressions.
+ symbols:
+ - cwl:ExpressionPlaceholder
+
+
+- name: InputBinding
+ type: record
+ abstract: true
+ fields:
+ - name: loadContents
+ type:
+ - "null"
+ - boolean
+ jsonldPredicate: "cwl:loadContents"
+ doc: |
+ Only valid when `type: File` or is an array of `items: File`.
+
+ Read up to the first 64 KiB of text from the file and place it in the
+ "contents" field of the file object for use by expressions.
+
+
+- name: OutputBinding
+ type: record
+ abstract: true
+
+
+- name: InputSchema
+ extends: SchemaBase
+ type: record
+ abstract: true
+
+
+- name: OutputSchema
+ extends: SchemaBase
+ type: record
+ abstract: true
+
+
+- name: InputRecordField
+ type: record
+ extends: "sld:RecordField"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: InputRecordSchema
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: InputEnumSchema
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: InputArraySchema
+ - specializeFrom: "sld:PrimitiveType"
+ specializeTo: CWLType
+ fields:
+ - name: inputBinding
+ type: InputBinding?
+ jsonldPredicate: "cwl:inputBinding"
+ - name: label
+ type: string?
+ jsonldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this process object."
+
+
+- name: InputRecordSchema
+ type: record
+ extends: ["sld:RecordSchema", InputSchema]
+ specialize:
+ - specializeFrom: "sld:RecordField"
+ specializeTo: InputRecordField
+
+
+- name: InputEnumSchema
+ type: record
+ extends: ["sld:EnumSchema", InputSchema]
+ fields:
+ - name: inputBinding
+ type: InputBinding?
+ jsonldPredicate: "cwl:inputBinding"
+
+
+- name: InputArraySchema
+ type: record
+ extends: ["sld:ArraySchema", InputSchema]
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: InputRecordSchema
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: InputEnumSchema
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: InputArraySchema
+ - specializeFrom: "sld:PrimitiveType"
+ specializeTo: CWLType
+ fields:
+ - name: inputBinding
+ type: InputBinding?
+ jsonldPredicate: "cwl:inputBinding"
+
+
+- name: OutputRecordField
+ type: record
+ extends: "sld:RecordField"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: OutputRecordSchema
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: OutputEnumSchema
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: OutputArraySchema
+ - specializeFrom: "sld:PrimitiveType"
+ specializeTo: CWLType
+ fields:
+ - name: outputBinding
+ type: OutputBinding?
+ jsonldPredicate: "cwl:outputBinding"
+
+
+- name: OutputRecordSchema
+ type: record
+ extends: ["sld:RecordSchema", "#OutputSchema"]
+ docParent: "#OutputParameter"
+ specialize:
+ - specializeFrom: "sld:RecordField"
+ specializeTo: OutputRecordField
+
+
+- name: OutputEnumSchema
+ type: record
+ extends: ["sld:EnumSchema", OutputSchema]
+ docParent: "#OutputParameter"
+ fields:
+ - name: outputBinding
+ type: OutputBinding?
+ jsonldPredicate: "cwl:outputBinding"
+
+- name: OutputArraySchema
+ type: record
+ extends: ["sld:ArraySchema", OutputSchema]
+ docParent: "#OutputParameter"
+ specialize:
+ - specializeFrom: "sld:RecordSchema"
+ specializeTo: OutputRecordSchema
+ - specializeFrom: "sld:EnumSchema"
+ specializeTo: OutputEnumSchema
+ - specializeFrom: "sld:ArraySchema"
+ specializeTo: OutputArraySchema
+ - specializeFrom: "sld:PrimitiveType"
+ specializeTo: CWLType
+ fields:
+ - name: outputBinding
+ type: OutputBinding?
+ jsonldPredicate: "cwl:outputBinding"
+
+
+- name: InputParameter
+ type: record
+ extends: Parameter
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this parameter object."
+
+ - name: inputBinding
+ type: InputBinding?
+ jsonldPredicate: "cwl:inputBinding"
+ doc: |
+ Describes how to handle the inputs of a process and convert them
+ into a concrete form for execution, such as command line parameters.
+
+ - name: default
+ type: Any?
+ jsonldPredicate: "cwl:default"
+ doc: |
+ The default value for this parameter if not provided in the input
+ object.
+
+ - name: type
+ type:
+ - "null"
+ - CWLType
+ - InputRecordSchema
+ - InputEnumSchema
+ - InputArraySchema
+ - string
+ - type: array
+ items:
+ - CWLType
+ - InputRecordSchema
+ - InputEnumSchema
+ - InputArraySchema
+ - string
+ jsonldPredicate:
+ "_id": "sld:type"
+ "_type": "@vocab"
+ refScope: 2
+ typeDSL: True
+ doc: |
+ Specify valid types of data that may be assigned to this parameter.
+
+- name: OutputParameter
+ type: record
+ extends: Parameter
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this parameter object."
+ - name: outputBinding
+ type: OutputBinding?
+ jsonldPredicate: "cwl:outputBinding"
+ doc: |
+ Describes how to handle the outputs of a process.
+
+
+- type: record
+ name: ProcessRequirement
+ abstract: true
+ doc: |
+ A process requirement declares a prerequisite that may or must be fulfilled
+ before executing a process. See [`Process.hints`](#process) and
+ [`Process.requirements`](#process).
+
+ Process requirements are the primary mechanism for specifying extensions to
+ the CWL core specification.
+
+
+- type: record
+ name: Process
+ abstract: true
+ doc: |
+
+ The base executable type in CWL is the `Process` object defined by the
+ document. Note that the `Process` object is abstract and cannot be
+ directly executed.
+
+ fields:
+ - name: id
+ type: string?
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this process object."
+ - name: inputs
+ type:
+ type: array
+ items: InputParameter
+ jsonldPredicate:
+ _id: "cwl:inputs"
+ mapSubject: id
+ mapPredicate: type
+ doc: |
+ Defines the input parameters of the process. The process is ready to
+ run when all required input parameters are associated with concrete
+ values. Input parameters include a schema for each parameter which is
+ used to validate the input object. It may also be used to build a user
+ interface for constructing the input object.
+ - name: outputs
+ type:
+ type: array
+ items: OutputParameter
+ jsonldPredicate:
+ _id: "cwl:outputs"
+ mapSubject: id
+ mapPredicate: type
+ doc: |
+ Defines the parameters representing the output of the process. May be
+ used to generate and/or validate the output object.
+ - name: requirements
+ type: ProcessRequirement[]?
+ jsonldPredicate:
+ _id: "cwl:requirements"
+ mapSubject: class
+ doc: |
+ Declares requirements that apply to either the runtime environment or the
+ workflow engine that must be met in order to execute this process. If
+ an implementation cannot satisfy all requirements, or a requirement is
+ listed which is not recognized by the implementation, it is a fatal
+ error and the implementation must not attempt to run the process,
+ unless overridden at user option.
+ - name: hints
+ type: Any[]?
+ doc: |
+ Declares hints applying to either the runtime environment or the
+ workflow engine that may be helpful in executing this process. It is
+ not an error if an implementation cannot satisfy all hints, however
+ the implementation may report a warning.
+ jsonldPredicate:
+ _id: cwl:hints
+ noLinkCheck: true
+ mapSubject: class
+ - name: label
+ type: string?
+ jsonldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this process object."
+ - name: doc
+ type: string?
+ jsonldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this process object."
+ - name: cwlVersion
+ type: CWLVersion?
+ doc: |
+ CWL document version. Always required at the document root. Not
+ required for a Process embedded inside another Process.
+ jsonldPredicate:
+ "_id": "cwl:cwlVersion"
+ "_type": "@vocab"
+
+- name: InlineJavascriptRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ Indicates that the workflow platform must support inline Javascript expressions.
+ If this requirement is not present, the workflow platform must not perform expression
+ interpolatation.
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'InlineJavascriptRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ - name: expressionLib
+ type: string[]?
+ doc: |
+ Additional code fragments that will also be inserted
+ before executing the expression code. Allows for function definitions that may
+ be called from CWL expressions.
+
+
+- name: SchemaDefRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ This field consists of an array of type definitions which must be used when
+ interpreting the `inputs` and `outputs` fields. When a `type` field
+ contain a IRI, the implementation must check if the type is defined in
+ `schemaDefs` and use that definition. If the type is not found in
+ `schemaDefs`, it is an error. The entries in `schemaDefs` must be
+ processed in the order listed such that later schema definitions may refer
+ to earlier schema definitions.
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'SchemaDefRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ - name: types
+ type:
+ type: array
+ items: InputSchema
+ doc: The list of type definitions.
diff --git a/schema_salad/tests/test_schema/Workflow.yml b/schema_salad/tests/test_schema/Workflow.yml
new file mode 100644
index 0000000..26bde8e
--- /dev/null
+++ b/schema_salad/tests/test_schema/Workflow.yml
@@ -0,0 +1,582 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+ cwl: "https://w3id.org/cwl/cwl#"
+
+$graph:
+
+- name: "WorkflowDoc"
+ type: documentation
+ doc:
+ - |
+ # Common Workflow Language (CWL) Workflow Description, v1.0
+
+ This version:
+ * https://w3id.org/cwl/v1.0/
+
+ Current version:
+ * https://w3id.org/cwl/
+ - "\n\n"
+ - {$include: contrib.md}
+ - "\n\n"
+ - |
+ # Abstract
+
+ One way to define a workflow is: an analysis task represented by a
+ directed graph describing a sequence of operations that transform an
+ input data set to output. This specification defines the Common Workflow
+ Language (CWL) Workflow description, a vendor-neutral standard for
+ representing workflows intended to be portable across a variety of
+ computing platforms.
+
+ - {$include: intro.md}
+
+ - |
+
+ ## Introduction to v1.0
+
+ This specification represents the first full release from the CWL group.
+ Since draft-3, this draft introduces the following changes and additions:
+
+ * The `inputs` and `outputs` fields have been renamed `in` and `out`.
+ * Syntax simplifcations: denoted by the `map<>` syntax. Example: `in`
+ contains a list of items, each with an id. Now one can specify
+ a mapping of that identifier to the corresponding
+ `InputParameter`.
+ ```
+ in:
+ - id: one
+ type: string
+ doc: First input parameter
+ - id: two
+ type: int
+ doc: Second input parameter
+ ```
+ can be
+ ```
+ in:
+ one:
+ type: string
+ doc: First input parameter
+ two:
+ type: int
+ doc: Second input parameter
+ ```
+ * The common field `description` has been renamed to `doc`.
+
+ ## Purpose
+
+ The Common Workflow Language Command Line Tool Description express
+ workflows for data-intensive science, such as Bioinformatics, Chemistry,
+ Physics, and Astronomy. This specification is intended to define a data
+ and execution model for Workflows that can be implemented on top of a
+ variety of computing platforms, ranging from an individual workstation to
+ cluster, grid, cloud, and high performance computing systems.
+
+ - {$include: concepts.md}
+
+- name: ExpressionToolOutputParameter
+ type: record
+ extends: OutputParameter
+ fields:
+ - name: type
+ type:
+ - "null"
+ - "#CWLType"
+ - "#OutputRecordSchema"
+ - "#OutputEnumSchema"
+ - "#OutputArraySchema"
+ - string
+ - type: array
+ items:
+ - "#CWLType"
+ - "#OutputRecordSchema"
+ - "#OutputEnumSchema"
+ - "#OutputArraySchema"
+ - string
+ jsonldPredicate:
+ "_id": "sld:type"
+ "_type": "@vocab"
+ refScope: 2
+ typeDSL: True
+ doc: |
+ Specify valid types of data that may be assigned to this parameter.
+
+- type: record
+ name: ExpressionTool
+ extends: Process
+ specialize:
+ - specializeFrom: "#OutputParameter"
+ specializeTo: "#ExpressionToolOutputParameter"
+ documentRoot: true
+ doc: |
+ Execute an expression as a Workflow step.
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ type: string
+ - name: expression
+ type: [string, Expression]
+ doc: |
+ The expression to execute. The expression must return a JSON object which
+ matches the output parameters of the ExpressionTool.
+
+- name: LinkMergeMethod
+ type: enum
+ docParent: "#WorkflowStepInput"
+ doc: The input link merge method, described in [WorkflowStepInput](#WorkflowStepInput).
+ symbols:
+ - merge_nested
+ - merge_flattened
+
+
+- name: WorkflowOutputParameter
+ type: record
+ extends: OutputParameter
+ docParent: "#Workflow"
+ doc: |
+ Describe an output parameter of a workflow. The parameter must be
+ connected to one or more parameters defined in the workflow that will
+ provide the value of the output parameter.
+ fields:
+ - name: outputSource
+ doc: |
+ Specifies one or more workflow parameters that supply the value of to
+ the output parameter.
+ jsonldPredicate:
+ "_id": "cwl:outputSource"
+ "_type": "@id"
+ refScope: 0
+ type:
+ - string?
+ - string[]?
+ - name: linkMerge
+ type: ["null", "#LinkMergeMethod"]
+ jsonldPredicate: "cwl:linkMerge"
+ doc: |
+ The method to use to merge multiple sources into a single array.
+ If not specified, the default method is "merge_nested".
+ - name: type
+ type:
+ - "null"
+ - "#CWLType"
+ - "#OutputRecordSchema"
+ - "#OutputEnumSchema"
+ - "#OutputArraySchema"
+ - string
+ - type: array
+ items:
+ - "#CWLType"
+ - "#OutputRecordSchema"
+ - "#OutputEnumSchema"
+ - "#OutputArraySchema"
+ - string
+ jsonldPredicate:
+ "_id": "sld:type"
+ "_type": "@vocab"
+ refScope: 2
+ typeDSL: True
+ doc: |
+ Specify valid types of data that may be assigned to this parameter.
+
+
+- name: Sink
+ type: record
+ abstract: true
+ fields:
+ - name: source
+ doc: |
+ Specifies one or more workflow parameters that will provide input to
+ the underlying step parameter.
+ jsonldPredicate:
+ "_id": "cwl:source"
+ "_type": "@id"
+ refScope: 2
+ type:
+ - string?
+ - string[]?
+ - name: linkMerge
+ type: LinkMergeMethod?
+ jsonldPredicate: "cwl:linkMerge"
+ doc: |
+ The method to use to merge multiple inbound links into a single array.
+ If not specified, the default method is "merge_nested".
+
+
+- type: record
+ name: WorkflowStepInput
+ extends: Sink
+ docParent: "#WorkflowStep"
+ doc: |
+ The input of a workflow step connects an upstream parameter (from the
+ workflow inputs, or the outputs of other workflows steps) with the input
+ parameters of the underlying step.
+
+ ## Input object
+
+ A WorkflowStepInput object must contain an `id` field in the form
+ `#fieldname` or `#stepname.fieldname`. When the `id` field contains a
+ period `.` the field name consists of the characters following the final
+ period. This defines a field of the workflow step input object with the
+ value of the `source` parameter(s).
+
+ ## Merging
+
+ To merge multiple inbound data links,
+ [MultipleInputFeatureRequirement](#MultipleInputFeatureRequirement) must be specified
+ in the workflow or workflow step requirements.
+
+ If the sink parameter is an array, or named in a [workflow
+ scatter](#WorkflowStep) operation, there may be multiple inbound data links
+ listed in the `source` field. The values from the input links are merged
+ depending on the method specified in the `linkMerge` field. If not
+ specified, the default method is "merge_nested".
+
+ * **merge_nested**
+
+ The input must be an array consisting of exactly one entry for each
+ input link. If "merge_nested" is specified with a single link, the value
+ from the link must be wrapped in a single-item list.
+
+ * **merge_flattened**
+
+ 1. The source and sink parameters must be compatible types, or the source
+ type must be compatible with single element from the "items" type of
+ the destination array parameter.
+ 2. Source parameters which are arrays are concatenated.
+ Source parameters which are single element types are appended as
+ single elements.
+
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "A unique identifier for this workflow input parameter."
+ - name: default
+ type: ["null", Any]
+ doc: |
+ The default value for this parameter if there is no `source`
+ field.
+ jsonldPredicate: "cwl:default"
+ - name: valueFrom
+ type:
+ - "null"
+ - "string"
+ - "#Expression"
+ jsonldPredicate: "cwl:valueFrom"
+ doc: |
+ To use valueFrom, [StepInputExpressionRequirement](#StepInputExpressionRequirement) must
+ be specified in the workflow or workflow step requirements.
+
+ If `valueFrom` is a constant string value, use this as the value for
+ this input parameter.
+
+ If `valueFrom` is a parameter reference or expression, it must be
+ evaluated to yield the actual value to be assiged to the input field.
+
+ The `self` value of in the parameter reference or expression must be
+ the value of the parameter(s) specified in the `source` field, or
+ null if there is no `source` field.
+
+ The value of `inputs` in the parameter reference or expression must be
+ the input object to the workflow step after assigning the `source`
+ values and then scattering. The order of evaluating `valueFrom` among
+ step input parameters is undefined and the result of evaluating
+ `valueFrom` on a parameter must not be visible to evaluation of
+ `valueFrom` on other parameters.
+
+
+- type: record
+ name: WorkflowStepOutput
+ docParent: "#WorkflowStep"
+ doc: |
+ Associate an output parameter of the underlying process with a workflow
+ parameter. The workflow parameter (given in the `id` field) be may be used
+ as a `source` to connect with input parameters of other workflow steps, or
+ with an output parameter of the process.
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: |
+ A unique identifier for this workflow output parameter. This is the
+ identifier to use in the `source` field of `WorkflowStepInput` to
+ connect the output value to downstream parameters.
+
+
+- name: ScatterMethod
+ type: enum
+ docParent: "#WorkflowStep"
+ doc: The scatter method, as described in [workflow step scatter](#WorkflowStep).
+ symbols:
+ - dotproduct
+ - nested_crossproduct
+ - flat_crossproduct
+
+
+- name: WorkflowStep
+ type: record
+ docParent: "#Workflow"
+ doc: |
+ A workflow step is an executable element of a workflow. It specifies the
+ underlying process implementation (such as `CommandLineTool` or another
+ `Workflow`) in the `run` field and connects the input and output parameters
+ of the underlying process to workflow parameters.
+
+ # Scatter/gather
+
+ To use scatter/gather,
+ [ScatterFeatureRequirement](#ScatterFeatureRequirement) must be specified
+ in the workflow or workflow step requirements.
+
+ A "scatter" operation specifies that the associated workflow step or
+ subworkflow should execute separately over a list of input elements. Each
+ job making up a scatter operation is independent and may be executed
+ concurrently.
+
+ The `scatter` field specifies one or more input parameters which will be
+ scattered. An input parameter may be listed more than once. The declared
+ type of each input parameter is implicitly wrapped in an array for each
+ time it appears in the `scatter` field. As a result, upstream parameters
+ which are connected to scattered parameters may be arrays.
+
+ All output parameter types are also implicitly wrapped in arrays. Each job
+ in the scatter results in an entry in the output array.
+
+ If `scatter` declares more than one input parameter, `scatterMethod`
+ describes how to decompose the input into a discrete set of jobs.
+
+ * **dotproduct** specifies that each of the input arrays are aligned and one
+ element taken from each array to construct each job. It is an error
+ if all input arrays are not the same length.
+
+ * **nested_crossproduct** specifies the Cartesian product of the inputs,
+ producing a job for every combination of the scattered inputs. The
+ output must be nested arrays for each level of scattering, in the
+ order that the input arrays are listed in the `scatter` field.
+
+ * **flat_crossproduct** specifies the Cartesian product of the inputs,
+ producing a job for every combination of the scattered inputs. The
+ output arrays must be flattened to a single level, but otherwise listed in the
+ order that the input arrays are listed in the `scatter` field.
+
+ # Subworkflows
+
+ To specify a nested workflow as part of a workflow step,
+ [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) must be
+ specified in the workflow or workflow step requirements.
+
+ fields:
+ - name: id
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The unique identifier for this workflow step."
+ - name: in
+ type: WorkflowStepInput[]
+ jsonldPredicate:
+ _id: "cwl:in"
+ mapSubject: id
+ mapPredicate: source
+ doc: |
+ Defines the input parameters of the workflow step. The process is ready to
+ run when all required input parameters are associated with concrete
+ values. Input parameters include a schema for each parameter which is
+ used to validate the input object. It may also be used build a user
+ interface for constructing the input object.
+ - name: out
+ type:
+ - type: array
+ items: [string, WorkflowStepOutput]
+ jsonldPredicate:
+ _id: "cwl:out"
+ _type: "@id"
+ identity: true
+ doc: |
+ Defines the parameters representing the output of the process. May be
+ used to generate and/or validate the output object.
+ - name: requirements
+ type: ProcessRequirement[]?
+ jsonldPredicate:
+ _id: "cwl:requirements"
+ mapSubject: class
+ doc: |
+ Declares requirements that apply to either the runtime environment or the
+ workflow engine that must be met in order to execute this workflow step. If
+ an implementation cannot satisfy all requirements, or a requirement is
+ listed which is not recognized by the implementation, it is a fatal
+ error and the implementation must not attempt to run the process,
+ unless overridden at user option.
+ - name: hints
+ type: Any[]?
+ jsonldPredicate:
+ _id: "cwl:hints"
+ noLinkCheck: true
+ mapSubject: class
+ doc: |
+ Declares hints applying to either the runtime environment or the
+ workflow engine that may be helpful in executing this workflow step. It is
+ not an error if an implementation cannot satisfy all hints, however
+ the implementation may report a warning.
+ - name: label
+ type: string?
+ jsonldPredicate: "rdfs:label"
+ doc: "A short, human-readable label of this process object."
+ - name: doc
+ type: string?
+ jsonldPredicate: "rdfs:comment"
+ doc: "A long, human-readable description of this process object."
+ - name: run
+ type: [string, Process]
+ jsonldPredicate:
+ "_id": "cwl:run"
+ "_type": "@id"
+ doc: |
+ Specifies the process to run.
+ - name: scatter
+ type:
+ - string?
+ - string[]?
+ jsonldPredicate:
+ "_id": "cwl:scatter"
+ "_type": "@id"
+ "_container": "@list"
+ refScope: 0
+ - name: scatterMethod
+ doc: |
+ Required if `scatter` is an array of more than one element.
+ type: ScatterMethod?
+ jsonldPredicate:
+ "_id": "cwl:scatterMethod"
+ "_type": "@vocab"
+
+
+- name: Workflow
+ type: record
+ extends: "#Process"
+ documentRoot: true
+ specialize:
+ - specializeFrom: "#OutputParameter"
+ specializeTo: "#WorkflowOutputParameter"
+ doc: |
+ A workflow describes a set of **steps** and the **dependencies** between
+ those steps. When a step produces output that will be consumed by a
+ second step, the first step is a dependency of the second step.
+
+ When there is a dependency, the workflow engine must execute the preceeding
+ step and wait for it to successfully produce output before executing the
+ dependent step. If two steps are defined in the workflow graph that
+ are not directly or indirectly dependent, these steps are **independent**,
+ and may execute in any order or execute concurrently. A workflow is
+ complete when all steps have been executed.
+
+ Dependencies between parameters are expressed using the `source` field on
+ [workflow step input parameters](#WorkflowStepInput) and [workflow output
+ parameters](#WorkflowOutputParameter).
+
+ The `source` field expresses the dependency of one parameter on another
+ such that when a value is associated with the parameter specified by
+ `source`, that value is propagated to the destination parameter. When all
+ data links inbound to a given step are fufilled, the step is ready to
+ execute.
+
+ ## Workflow success and failure
+
+ A completed step must result in one of `success`, `temporaryFailure` or
+ `permanentFailure` states. An implementation may choose to retry a step
+ execution which resulted in `temporaryFailure`. An implementation may
+ choose to either continue running other steps of a workflow, or terminate
+ immediately upon `permanentFailure`.
+
+ * If any step of a workflow execution results in `permanentFailure`, then
+ the workflow status is `permanentFailure`.
+
+ * If one or more steps result in `temporaryFailure` and all other steps
+ complete `success` or are not executed, then the workflow status is
+ `temporaryFailure`.
+
+ * If all workflow steps are executed and complete with `success`, then the
+ workflow status is `success`.
+
+ # Extensions
+
+ [ScatterFeatureRequirement](#ScatterFeatureRequirement) and
+ [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) are
+ available as standard [extensions](#Extensions_and_Metadata) to core
+ workflow semantics.
+
+ fields:
+ - name: "class"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+ type: string
+ - name: steps
+ doc: |
+ The individual steps that make up the workflow. Each step is executed when all of its
+ input data links are fufilled. An implementation may choose to execute
+ the steps in a different order than listed and/or execute steps
+ concurrently, provided that dependencies between steps are met.
+ type:
+ - type: array
+ items: "#WorkflowStep"
+ jsonldPredicate:
+ mapSubject: id
+
+
+- type: record
+ name: SubworkflowFeatureRequirement
+ extends: ProcessRequirement
+ doc: |
+ Indicates that the workflow platform must support nested workflows in
+ the `run` field of [WorkflowStep](#WorkflowStep).
+ fields:
+ - name: "class"
+ type: "string"
+ doc: "Always 'SubworkflowFeatureRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+
+- name: ScatterFeatureRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ Indicates that the workflow platform must support the `scatter` and
+ `scatterMethod` fields of [WorkflowStep](#WorkflowStep).
+ fields:
+ - name: "class"
+ type: "string"
+ doc: "Always 'ScatterFeatureRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+
+- name: MultipleInputFeatureRequirement
+ type: record
+ extends: ProcessRequirement
+ doc: |
+ Indicates that the workflow platform must support multiple inbound data links
+ listed in the `source` field of [WorkflowStepInput](#WorkflowStepInput).
+ fields:
+ - name: "class"
+ type: "string"
+ doc: "Always 'MultipleInputFeatureRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
+
+- type: record
+ name: StepInputExpressionRequirement
+ extends: ProcessRequirement
+ doc: |
+ Indicate that the workflow platform must support the `valueFrom` field
+ of [WorkflowStepInput](#WorkflowStepInput).
+ fields:
+ - name: "class"
+ type: "string"
+ doc: "Always 'StepInputExpressionRequirement'"
+ jsonldPredicate:
+ "_id": "@type"
+ "_type": "@vocab"
diff --git a/schema_salad/tests/test_schema/concepts.md b/schema_salad/tests/test_schema/concepts.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/concepts.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/contrib.md b/schema_salad/tests/test_schema/contrib.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/contrib.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/intro.md b/schema_salad/tests/test_schema/intro.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/intro.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/invocation.md b/schema_salad/tests/test_schema/invocation.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/invocation.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/metaschema_base.yml b/schema_salad/tests/test_schema/metaschema_base.yml
new file mode 100644
index 0000000..73511d1
--- /dev/null
+++ b/schema_salad/tests/test_schema/metaschema_base.yml
@@ -0,0 +1,164 @@
+$base: "https://w3id.org/cwl/salad#"
+
+$namespaces:
+ sld: "https://w3id.org/cwl/salad#"
+ dct: "http://purl.org/dc/terms/"
+ rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ rdfs: "http://www.w3.org/2000/01/rdf-schema#"
+ xsd: "http://www.w3.org/2001/XMLSchema#"
+
+$graph:
+- name: PrimitiveType
+ type: enum
+ symbols:
+ - "sld:null"
+ - "xsd:boolean"
+ - "xsd:int"
+ - "xsd:long"
+ - "xsd:float"
+ - "xsd:double"
+ - "xsd:string"
+ doc:
+ - |
+ Salad data types are based on Avro schema declarations. Refer to the
+ [Avro schema declaration documentation](https://avro.apache.org/docs/current/spec.html#schemas) for
+ detailed information.
+ - "null: no value"
+ - "boolean: a binary value"
+ - "int: 32-bit signed integer"
+ - "long: 64-bit signed integer"
+ - "float: single precision (32-bit) IEEE 754 floating-point number"
+ - "double: double precision (64-bit) IEEE 754 floating-point number"
+ - "string: Unicode character sequence"
+
+
+- name: Any
+ type: enum
+ symbols: ["#Any"]
+ doc: |
+ The **Any** type validates for any non-null value.
+
+
+- name: RecordField
+ type: record
+ doc: A field of a record.
+ fields:
+ - name: name
+ type: string
+ jsonldPredicate: "@id"
+ doc: |
+ The name of the field
+
+ - name: doc
+ type: string?
+ doc: |
+ A documentation string for this field
+ jsonldPredicate: "rdfs:comment"
+
+ - name: type
+ type:
+ - PrimitiveType
+ - RecordSchema
+ - EnumSchema
+ - ArraySchema
+ - string
+ - type: array
+ items:
+ - PrimitiveType
+ - RecordSchema
+ - EnumSchema
+ - ArraySchema
+ - string
+ jsonldPredicate:
+ _id: sld:type
+ _type: "@vocab"
+ typeDSL: true
+ refScope: 2
+ doc: |
+ The field type
+
+
+- name: RecordSchema
+ type: record
+ fields:
+ type:
+ doc: "Must be `record`"
+ type:
+ name: Record_symbol
+ type: enum
+ symbols:
+ - "sld:record"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+ typeDSL: true
+ refScope: 2
+ fields:
+ type: RecordField[]?
+ jsonldPredicate:
+ _id: sld:fields
+ mapSubject: name
+ mapPredicate: type
+ doc: "Defines the fields of the record."
+
+
+- name: EnumSchema
+ type: record
+ doc: |
+ Define an enumerated type.
+ fields:
+ type:
+ doc: "Must be `enum`"
+ type:
+ name: Enum_symbol
+ type: enum
+ symbols:
+ - "sld:enum"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+ typeDSL: true
+ refScope: 2
+ symbols:
+ type: string[]
+ jsonldPredicate:
+ _id: "sld:symbols"
+ _type: "@id"
+ identity: true
+ doc: "Defines the set of valid symbols."
+
+
+- name: ArraySchema
+ type: record
+ fields:
+ type:
+ doc: "Must be `array`"
+ type:
+ name: Array_symbol
+ type: enum
+ symbols:
+ - "sld:array"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+ typeDSL: true
+ refScope: 2
+ items:
+ type:
+ - PrimitiveType
+ - RecordSchema
+ - EnumSchema
+ - ArraySchema
+ - string
+ - type: array
+ items:
+ - PrimitiveType
+ - RecordSchema
+ - EnumSchema
+ - ArraySchema
+ - string
+ jsonldPredicate:
+ _id: "sld:items"
+ _type: "@vocab"
+ refScope: 2
+ doc: "Defines the type of the array elements."
diff --git a/schema_salad/tests/test_schema/test1.cwl b/schema_salad/tests/test_schema/test1.cwl
new file mode 100644
index 0000000..2406c86
--- /dev/null
+++ b/schema_salad/tests/test_schema/test1.cwl
@@ -0,0 +1 @@
+class: Workflow
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test10.cwl b/schema_salad/tests/test_schema/test10.cwl
new file mode 100644
index 0000000..2860807
--- /dev/null
+++ b/schema_salad/tests/test_schema/test10.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: string
+steps:
+ step1:
+ scatterMethod: [record]
+ in: []
+ out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test11.cwl b/schema_salad/tests/test_schema/test11.cwl
new file mode 100644
index 0000000..43281fb
--- /dev/null
+++ b/schema_salad/tests/test_schema/test11.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: string
+steps:
+ step1:
+ run: blub.cwl
+ in: []
+ out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test2.cwl b/schema_salad/tests/test_schema/test2.cwl
new file mode 100644
index 0000000..96ae140
--- /dev/null
+++ b/schema_salad/tests/test_schema/test2.cwl
@@ -0,0 +1 @@
+class: xWorkflow
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test3.cwl b/schema_salad/tests/test_schema/test3.cwl
new file mode 100644
index 0000000..517e920
--- /dev/null
+++ b/schema_salad/tests/test_schema/test3.cwl
@@ -0,0 +1,6 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: xstring
+steps: []
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test4.cwl b/schema_salad/tests/test_schema/test4.cwl
new file mode 100644
index 0000000..e57292d
--- /dev/null
+++ b/schema_salad/tests/test_schema/test4.cwl
@@ -0,0 +1,6 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: 12
+steps: []
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test5.cwl b/schema_salad/tests/test_schema/test5.cwl
new file mode 100644
index 0000000..8a7ba22
--- /dev/null
+++ b/schema_salad/tests/test_schema/test5.cwl
@@ -0,0 +1,6 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: string
+steps: [12]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test6.cwl b/schema_salad/tests/test_schema/test6.cwl
new file mode 100644
index 0000000..eff4ac5
--- /dev/null
+++ b/schema_salad/tests/test_schema/test6.cwl
@@ -0,0 +1,5 @@
+inputs:
+ foo: string
+outputs:
+ bar: string
+steps: [12]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test7.cwl b/schema_salad/tests/test_schema/test7.cwl
new file mode 100644
index 0000000..0e12c12
--- /dev/null
+++ b/schema_salad/tests/test_schema/test7.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: string
+steps:
+ step1:
+ scatter_method: blub
+ in: []
+ out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test8.cwl b/schema_salad/tests/test_schema/test8.cwl
new file mode 100644
index 0000000..128cb4a
--- /dev/null
+++ b/schema_salad/tests/test_schema/test8.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: string
+steps:
+ step1:
+ scatterMethod: abc
+ in: []
+ out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test9.cwl b/schema_salad/tests/test_schema/test9.cwl
new file mode 100644
index 0000000..2d7ff4c
--- /dev/null
+++ b/schema_salad/tests/test_schema/test9.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+ foo: string
+outputs:
+ bar: string
+steps:
+ step1:
+ scatterMethod: 12
+ in: []
+ out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_validate.pyx b/schema_salad/tests/test_validate.pyx
new file mode 100644
index 0000000..b37127a
--- /dev/null
+++ b/schema_salad/tests/test_validate.pyx
@@ -0,0 +1,71 @@
+import unittest
+import json
+from schema_salad.schema import load_schema
+from schema_salad.validate import validate_ex
+from schema_salad.sourceline import cmap
+
+class TestValidate(unittest.TestCase):
+ schema = cmap({"name": "_", "$graph":[{
+ "name": "File",
+ "type": "record",
+ "fields": [{
+ "name": "class",
+ "type": {
+ "type": "enum",
+ "name": "File_class",
+ "symbols": ["#_/File"]
+ },
+ "jsonldPredicate": {
+ "_id": "@type",
+ "_type": "@vocab"
+ }
+ }, {
+ "name": "location",
+ "type": "string",
+ "jsonldPredicate": "_:location"
+ }]
+ }, {
+ "name": "Directory",
+ "type": "record",
+ "fields": [{
+ "name": "class",
+ "type": {
+ "type": "enum",
+ "name": "Directory_class",
+ "symbols": ["#_/Directory"]
+ },
+ "jsonldPredicate": {
+ "_id": "@type",
+ "_type": "@vocab"
+ }
+ }, {
+ "name": "location",
+ "type": "string",
+ "jsonldPredicate": "_:location"
+ }, {
+ "name": "listing",
+ "type": {
+ "type": "array",
+ "items": ["File", "Directory"]
+ }
+ }],
+ }]})
+
+ def test_validate_big(self):
+ document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+ with open("biglisting.yml") as f:
+ biglisting = json.load(f)
+
+ self.assertEquals(True, validate_ex(avsc_names.get_name("Directory", ""), biglisting,
+ strict=True, raise_ex=False))
+
+
+ # def test_validate_small(self):
+ # document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+ # with open("smalllisting.yml") as f:
+ # smalllisting = json.load(f)
+
+ # validate_ex(avsc_names.get_name("Directory", ""), smalllisting,
+ # strict=True, raise_ex=True)
diff --git a/schema_salad/tests/test_validate.py~ b/schema_salad/tests/test_validate.py~
new file mode 100644
index 0000000..db0fd1b
--- /dev/null
+++ b/schema_salad/tests/test_validate.py~
@@ -0,0 +1,70 @@
+import unittest
+import json
+from schema_salad.schema import load_schema
+from schema_salad.validate import validate_ex
+
+class TestValidate(unittest.TestCase):
+ schema = {"name": "_", "$graph":[{
+ "name": "File",
+ "type": "record",
+ "fields": [{
+ "name": "class",
+ "type": {
+ "type": "enum",
+ "name": "File_class",
+ "symbols": ["#_/File"]
+ },
+ "jsonldPredicate": {
+ "_id": "@type",
+ "_type": "@vocab"
+ }
+ }, {
+ "name": "location",
+ "type": "string",
+ "jsonldPredicate": "_:location"
+ }]
+ }, {
+ "name": "Directory",
+ "type": "record",
+ "fields": [{
+ "name": "class",
+ "type": {
+ "type": "enum",
+ "name": "Directory_class",
+ "symbols": ["#_/Directory"]
+ },
+ "jsonldPredicate": {
+ "_id": "@type",
+ "_type": "@vocab"
+ }
+ }, {
+ "name": "location",
+ "type": "string",
+ "jsonldPredicate": "_:location"
+ }, {
+ "name": "listing",
+ "type": {
+ "type": "array",
+ "items": ["File", "Directory"]
+ }
+ }],
+ }]}
+
+ def test_validate_big(self):
+ document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+ with open("biglisting.yml") as f:
+ biglisting = json.load(f)
+
+ self.assertEquals(True, validate_ex(avsc_names.get_name("Directory", ""), biglisting,
+ strict=True, raise_ex=False))
+
+
+ # def test_validate_small(self):
+ # document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+ # with open("smalllisting.yml") as f:
+ # smalllisting = json.load(f)
+
+ # validate_ex(avsc_names.get_name("Directory", ""), smalllisting,
+ # strict=True, raise_ex=True)
diff --git a/schema_salad/tests/util.py b/schema_salad/tests/util.py
new file mode 100644
index 0000000..0fcaf52
--- /dev/null
+++ b/schema_salad/tests/util.py
@@ -0,0 +1,13 @@
+from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore
+import os
+
+def get_data(filename):
+ filepath = None
+ try:
+ filepath = resource_filename(
+ Requirement.parse("schema-salad"), filename)
+ except ResolutionError:
+ pass
+ if not filepath or not os.path.isfile(filepath):
+ filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename)
+ return filepath
diff --git a/schema_salad/validate.py b/schema_salad/validate.py
index 119d1c0..75e094b 100644
--- a/schema_salad/validate.py
+++ b/schema_salad/validate.py
@@ -1,18 +1,31 @@
import pprint
import avro.schema
+from avro.schema import Schema
import sys
import urlparse
+import re
from typing import Any, Union
+from .sourceline import SourceLine, lineno_re, bullets, indent
+
class ValidationException(Exception):
pass
+
class ClassValidationException(ValidationException):
pass
-def validate(expected_schema, datum, identifiers=set(), strict=False, foreign_properties=set()):
- # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool
- return validate_ex(expected_schema, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=False)
+
+def validate(expected_schema, # type: Schema
+ datum, # type: Any
+ identifiers=set(), # type: Set[unicode]
+ strict=False, # type: bool
+ foreign_properties=set() # type: Set[unicode]
+ ):
+ # type: (...) -> bool
+ return validate_ex(
+ expected_schema, datum, identifiers, strict=strict,
+ foreign_properties=foreign_properties, raise_ex=False)
INT_MIN_VALUE = -(1 << 31)
@@ -20,11 +33,6 @@ INT_MAX_VALUE = (1 << 31) - 1
LONG_MIN_VALUE = -(1 << 63)
LONG_MAX_VALUE = (1 << 63) - 1
-def indent(v, nolead=False): # type: (Union[str, unicode], bool) -> unicode
- if nolead:
- return v.splitlines()[0] + u"\n".join([u" " + l for l in v.splitlines()[1:]])
- else:
- return u"\n".join([" " + l for l in v.splitlines()])
def friendly(v): # type: (Any) -> Any
if isinstance(v, avro.schema.NamedSchema):
@@ -38,11 +46,6 @@ def friendly(v): # type: (Any) -> Any
else:
return v
-def multi(v, q=""): # type: (Union[str, unicode], Union[str, unicode]) -> unicode
- if '\n' in v:
- return u"%s%s%s\n" % (q, v, q)
- else:
- return u"%s%s%s" % (q, v, q)
def vpformat(datum): # type: (Any) -> str
a = pprint.pformat(datum)
@@ -50,9 +53,15 @@ def vpformat(datum): # type: (Any) -> str
a = a[0:160] + "[...]"
return a
-def validate_ex(expected_schema, datum, identifiers=None, strict=False,
- foreign_properties=None, raise_ex=True):
- # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode], bool) -> bool
+
+def validate_ex(expected_schema, # type: Schema
+ datum, # type: Any
+ identifiers=None, # type: Set[unicode]
+ strict=False, # type: bool
+ foreign_properties=None, # type: Set[unicode]
+ raise_ex=True # type: bool
+ ):
+ # type: (...) -> bool
"""Determine if a python datum is an instance of a schema."""
if not identifiers:
@@ -68,7 +77,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s` is not null" % vpformat(datum))
+ raise ValidationException(u"the value is not null")
else:
return False
elif schema_type == 'boolean':
@@ -76,7 +85,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s` is not boolean" % vpformat(datum))
+ raise ValidationException(u"the value is not boolean")
else:
return False
elif schema_type == 'string':
@@ -87,7 +96,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s` is not string" % vpformat(datum))
+ raise ValidationException(u"the value is not string")
else:
return False
elif schema_type == 'bytes':
@@ -95,12 +104,13 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s` is not bytes" % vpformat(datum))
+ raise ValidationException(
+ u"the value `%s` is not bytes" % vpformat(datum))
else:
return False
elif schema_type == 'int':
if ((isinstance(datum, int) or isinstance(datum, long))
- and INT_MIN_VALUE <= datum <= INT_MAX_VALUE):
+ and INT_MIN_VALUE <= datum <= INT_MAX_VALUE):
return True
else:
if raise_ex:
@@ -109,28 +119,22 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
return False
elif schema_type == 'long':
if ((isinstance(datum, int) or isinstance(datum, long))
- and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE):
+ and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE):
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s` is not long" % vpformat(datum))
+ raise ValidationException(
+ u"the value `%s` is not long" % vpformat(datum))
else:
return False
elif schema_type in ['float', 'double']:
if (isinstance(datum, int) or isinstance(datum, long)
- or isinstance(datum, float)):
- return True
- else:
- if raise_ex:
- raise ValidationException(u"the value `%s` is not float or double" % vpformat(datum))
- else:
- return False
- elif isinstance(expected_schema, avro.schema.FixedSchema):
- if isinstance(datum, str) and len(datum) == expected_schema.size:
+ or isinstance(datum, float)):
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s` is not fixed" % vpformat(datum))
+ raise ValidationException(
+ u"the value `%s` is not float or double" % vpformat(datum))
else:
return False
elif isinstance(expected_schema, avro.schema.EnumSchema):
@@ -142,28 +146,42 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
raise ValidationException(u"'Any' type must be non-null")
else:
return False
+ if not isinstance(datum, basestring):
+ if raise_ex:
+ raise ValidationException(
+ u"value is a %s but expected a string" % (type(datum).__name__))
+ else:
+ return False
if datum in expected_schema.symbols:
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat(datum), expected_schema.name, "'" + "', '".join(expected_schema.symbols) + "'"))
+ raise ValidationException(u"the value %s is not a valid %s, expected %s%s" % (vpformat(datum), expected_schema.name,
+ "one of " if len(
+ expected_schema.symbols) > 1 else "",
+ "'" + "', '".join(expected_schema.symbols) + "'"))
else:
return False
elif isinstance(expected_schema, avro.schema.ArraySchema):
if isinstance(datum, list):
for i, d in enumerate(datum):
try:
- if not validate_ex(expected_schema.items, d, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex):
+ sl = SourceLine(datum, i, ValidationException)
+ if not validate_ex(expected_schema.items, d, identifiers, strict=strict,
+ foreign_properties=foreign_properties,
+ raise_ex=raise_ex):
return False
except ValidationException as v:
if raise_ex:
- raise ValidationException(u"At position %i\n%s" % (i, indent(str(v))))
+ raise sl.makeError(
+ unicode("item is invalid because\n%s" % (indent(str(v)))))
else:
return False
return True
else:
if raise_ex:
- raise ValidationException(u"the value `%s` is not a list, expected list of %s" % (vpformat(datum), friendly(expected_schema.items)))
+ raise ValidationException(u"the value is not a list, expected list of %s" % (
+ friendly(expected_schema.items)))
else:
return False
elif isinstance(expected_schema, avro.schema.UnionSchema):
@@ -175,34 +193,47 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
return False
errors = [] # type: List[unicode]
+ checked = []
for s in expected_schema.schemas:
+ if isinstance(datum, list) and not isinstance(s, avro.schema.ArraySchema):
+ continue
+ elif isinstance(datum, dict) and not isinstance(s, avro.schema.RecordSchema):
+ continue
+ elif isinstance(datum, (bool, int, long, float, basestring)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)):
+ continue
+ elif datum is not None and s.type == "null":
+ continue
+
+ checked.append(s)
try:
- validate_ex(s, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=True)
+ validate_ex(s, datum, identifiers, strict=strict,
+ foreign_properties=foreign_properties, raise_ex=True)
except ClassValidationException as e:
raise
except ValidationException as e:
errors.append(unicode(e))
-
- raise ValidationException(u"the value %s is not a valid type in the union, expected one of:\n%s" % (
- multi(vpformat(datum), '`'), u"\n".join([
- u"- %s, but\n %s" % (
- friendly(expected_schema.schemas[i]), indent(multi(errors[i])))
- for i in range(0, len(expected_schema.schemas))])))
+ if errors:
+ raise ValidationException(bullets(["tried %s but\n%s" % (friendly(
+ checked[i]), indent(errors[i])) for i in range(0, len(errors))], "- "))
+ else:
+ raise ValidationException("value is a %s, expected %s" % (
+ type(datum).__name__, friendly(expected_schema)))
elif isinstance(expected_schema, avro.schema.RecordSchema):
if not isinstance(datum, dict):
if raise_ex:
- raise ValidationException(u"`%s`\n is not a dict" % vpformat(datum))
+ raise ValidationException(u"is not a dict")
else:
return False
classmatch = None
for f in expected_schema.fields:
- if f.name == "class":
- d = datum.get("class")
+ if f.name in ("class",):
+ d = datum.get(f.name)
if not d:
if raise_ex:
- raise ValidationException(u"Missing 'class' field")
+ raise ValidationException(
+ u"Missing '%s' field" % (f.name))
else:
return False
if expected_schema.name != d:
@@ -212,7 +243,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
errors = []
for f in expected_schema.fields:
- if f.name == "class":
+ if f.name in ("class",):
continue
if f.name in datum:
@@ -224,13 +255,16 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
fieldval = None
try:
- if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex):
+ sl = SourceLine(datum, f.name, unicode)
+ if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties,
+ raise_ex=raise_ex):
return False
except ValidationException as v:
if f.name not in datum:
errors.append(u"missing required field `%s`" % f.name)
else:
- errors.append(u"could not validate field `%s` because\n%s" % (f.name, multi(indent(str(v)))))
+ errors.append(sl.makeError(u"the `%s` field is not valid because\n%s" % (
+ f.name, indent(str(v)))))
if strict:
for d in datum:
@@ -239,21 +273,24 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
if d == f.name:
found = True
if not found:
+ sl = SourceLine(datum, d, unicode)
if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"):
if not raise_ex:
return False
split = urlparse.urlsplit(d)
if split.scheme:
- errors.append(u"could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d))
+ errors.append(sl.makeError(
+ u"unrecognized extension field `%s` and strict is True. Did you include a $schemas section?" % (d)))
else:
- errors.append(u"could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d, ", ".join(fn.name for fn in expected_schema.fields)))
+ errors.append(sl.makeError(u"invalid field `%s`, expected one of: %s" % (
+ d, ", ".join("'%s'" % fn.name for fn in expected_schema.fields))))
if errors:
if raise_ex:
if classmatch:
- raise ClassValidationException(u"%s record %s" % (classmatch, "\n".join(errors)))
+ raise ClassValidationException(bullets(errors, "* "))
else:
- raise ValidationException(u"\n".join(errors))
+ raise ValidationException(bullets(errors, "* "))
else:
return False
else:
diff --git a/setup.cfg b/setup.cfg
index 9d8f2af..522ffdd 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -11,7 +11,7 @@ test = pytest
addopts = --pyarg schema_salad
[egg_info]
-tag_build = .20161215163938
+tag_build = .20170111180227
tag_date = 0
tag_svn_revision = 0
diff --git a/setup.py b/setup.py
index 1dd27dc..1e8c2fd 100755
--- a/setup.py
+++ b/setup.py
@@ -30,10 +30,9 @@ else:
install_requires = [
'setuptools',
'requests >= 1.0',
- 'ruamel.yaml >= 0.12.4, < 0.12.5',
+ 'ruamel.yaml >= 0.12.4',
'rdflib >= 4.2.0, < 4.3.0',
'rdflib-jsonld >= 0.3.0, < 0.5.0',
- 'html5lib >= 0.90, <= 0.9999999',
'mistune >= 0.7.3, < 0.8',
'typing >= 3.5.2, < 3.6',
'CacheControl >= 0.11.7, < 0.12',
@@ -48,7 +47,7 @@ install_requires.append("avro") # TODO: remove me once cwltool is
extras_require = {} # TODO: to be removed when the above is added
setup(name='schema-salad',
- version='1.21',
+ version='2.2',
description='Schema Annotations for Linked Avro Data (SALAD)',
long_description=open(README).read(),
author='Common workflow language working group',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-schema-salad.git
More information about the debian-med-commit
mailing list