[med-svn] [python-schema-salad] 06/09: New upstream version 2.2.20170111180227

Fri Jan 13 05:29:30 UTC 2017

This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository python-schema-salad.

commit f5d6df0b4999d4e01d61ca3e20275c56b92b00cd
Author: Michael R. Crusoe <michael.crusoe at gmail.com>
Date:   Thu Jan 12 00:16:40 2017 -0800

    New upstream version 2.2.20170111180227
---
 MANIFEST.in                                        |   5 +-
 PKG-INFO                                           |   6 +-
 README.rst                                         |   4 +-
 schema_salad.egg-info/PKG-INFO                     |   6 +-
 schema_salad.egg-info/SOURCES.txt                  |  31 +-
 schema_salad.egg-info/pbr.json                     |   2 +-
 schema_salad.egg-info/requires.txt                 |   3 +-
 schema_salad/add_dictlist.py                       |   1 +
 schema_salad/aslist.py                             |   1 +
 schema_salad/flatten.py                            |   2 +
 schema_salad/jsonld_context.py                     |  42 +-
 schema_salad/main.py                               |  43 +-
 schema_salad/makedoc.py                            |  26 +-
 schema_salad/ref_resolver.py                       | 433 ++++++----
 schema_salad/schema.py                             | 136 +++-
 schema_salad/sourceline.py                         | 165 ++++
 schema_salad/tests/.coverage                       |   1 +
 schema_salad/tests/frag.yml                        |   4 +
 schema_salad/tests/test_errors.py                  |  31 +
 schema_salad/tests/test_errors.py~                 |   1 +
 schema_salad/tests/test_examples.py                |  92 ++-
 schema_salad/tests/test_fetch.py~                  |  13 +
 schema_salad/tests/test_schema/CommandLineTool.yml | 894 +++++++++++++++++++++
 .../tests/test_schema/CommonWorkflowLanguage.yml   |  11 +
 schema_salad/tests/test_schema/Process.yml         | 743 +++++++++++++++++
 schema_salad/tests/test_schema/Workflow.yml        | 582 ++++++++++++++
 schema_salad/tests/test_schema/concepts.md         |   1 +
 schema_salad/tests/test_schema/contrib.md          |   1 +
 schema_salad/tests/test_schema/intro.md            |   1 +
 schema_salad/tests/test_schema/invocation.md       |   1 +
 schema_salad/tests/test_schema/metaschema_base.yml | 164 ++++
 schema_salad/tests/test_schema/test1.cwl           |   1 +
 schema_salad/tests/test_schema/test10.cwl          |  10 +
 schema_salad/tests/test_schema/test11.cwl          |  10 +
 schema_salad/tests/test_schema/test2.cwl           |   1 +
 schema_salad/tests/test_schema/test3.cwl           |   6 +
 schema_salad/tests/test_schema/test4.cwl           |   6 +
 schema_salad/tests/test_schema/test5.cwl           |   6 +
 schema_salad/tests/test_schema/test6.cwl           |   5 +
 schema_salad/tests/test_schema/test7.cwl           |  10 +
 schema_salad/tests/test_schema/test8.cwl           |  10 +
 schema_salad/tests/test_schema/test9.cwl           |  10 +
 schema_salad/tests/test_validate.pyx               |  71 ++
 schema_salad/tests/test_validate.py~               |  70 ++
 schema_salad/tests/util.py                         |  13 +
 schema_salad/validate.py                           | 147 ++--
 setup.cfg                                          |   2 +-
 setup.py                                           |   5 +-
 48 files changed, 3483 insertions(+), 346 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index bf8066c..abcfe2a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,7 @@
 include gittaggers.py Makefile
-include schema_salad/tests/*.py schema_salad/tests/*.yml schema_salad/tests/*.owl
+include schema_salad/tests/*
+include schema_salad/tests/test_schema/*.md
+include schema_salad/tests/test_schema/*.yml
+include schema_salad/tests/test_schema/*.cwl
 include schema_salad/metaschema/*
 global-exclude *.pyc
diff --git a/PKG-INFO b/PKG-INFO
index ac8aacc..d3f87c0 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: schema-salad
-Version: 1.21.20161215163938
+Version: 2.2.20170111180227
 Summary: Schema Annotations for Linked Avro Data (SALAD)
 Home-page: https://github.com/common-workflow-language/common-workflow-language
 Author: Common workflow language working group
@@ -79,8 +79,8 @@ Description: Schema Salad
         .. _JSON-LD: http://json-ld.org
         .. _Avro: http://avro.apache.org
         .. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
-        .. _specification: http://www.commonwl.org/draft-3/SchemaSalad.html
-        .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+        .. _specification: http://www.commonwl.org/v1.0/SchemaSalad.html
+        .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/v1.0/CommandLineTool.yml
         .. _RDF: https://www.w3.org/RDF/
         
 Platform: UNKNOWN
diff --git a/README.rst b/README.rst
index 11d1b06..e66427c 100644
--- a/README.rst
+++ b/README.rst
@@ -70,6 +70,6 @@ provides for robust support of inline documentation.
 .. _JSON-LD: http://json-ld.org
 .. _Avro: http://avro.apache.org
 .. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
-.. _specification: http://www.commonwl.org/draft-3/SchemaSalad.html
-.. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+.. _specification: http://www.commonwl.org/v1.0/SchemaSalad.html
+.. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/v1.0/CommandLineTool.yml
 .. _RDF: https://www.w3.org/RDF/
diff --git a/schema_salad.egg-info/PKG-INFO b/schema_salad.egg-info/PKG-INFO
index ac8aacc..d3f87c0 100644
--- a/schema_salad.egg-info/PKG-INFO
+++ b/schema_salad.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: schema-salad
-Version: 1.21.20161215163938
+Version: 2.2.20170111180227
 Summary: Schema Annotations for Linked Avro Data (SALAD)
 Home-page: https://github.com/common-workflow-language/common-workflow-language
 Author: Common workflow language working group
@@ -79,8 +79,8 @@ Description: Schema Salad
         .. _JSON-LD: http://json-ld.org
         .. _Avro: http://avro.apache.org
         .. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
-        .. _specification: http://www.commonwl.org/draft-3/SchemaSalad.html
-        .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+        .. _specification: http://www.commonwl.org/v1.0/SchemaSalad.html
+        .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/v1.0/CommandLineTool.yml
         .. _RDF: https://www.w3.org/RDF/
         
 Platform: UNKNOWN
diff --git a/schema_salad.egg-info/SOURCES.txt b/schema_salad.egg-info/SOURCES.txt
index 78c842e..73a6c34 100644
--- a/schema_salad.egg-info/SOURCES.txt
+++ b/schema_salad.egg-info/SOURCES.txt
@@ -14,6 +14,7 @@ schema_salad/main.py
 schema_salad/makedoc.py
 schema_salad/ref_resolver.py
 schema_salad/schema.py
+schema_salad/sourceline.py
 schema_salad/validate.py
 schema_salad.egg-info/PKG-INFO
 schema_salad.egg-info/SOURCES.txt
@@ -45,9 +46,37 @@ schema_salad/metaschema/vocab_res.yml
 schema_salad/metaschema/vocab_res_proc.yml
 schema_salad/metaschema/vocab_res_schema.yml
 schema_salad/metaschema/vocab_res_src.yml
+schema_salad/tests/.coverage
 schema_salad/tests/EDAM.owl
 schema_salad/tests/Process.yml
 schema_salad/tests/__init__.py
+schema_salad/tests/frag.yml
 schema_salad/tests/mixin.yml
+schema_salad/tests/test_errors.py
+schema_salad/tests/test_errors.py~
 schema_salad/tests/test_examples.py
-schema_salad/tests/test_fetch.py
\ No newline at end of file
+schema_salad/tests/test_fetch.py
+schema_salad/tests/test_fetch.py~
+schema_salad/tests/test_validate.pyx
+schema_salad/tests/test_validate.py~
+schema_salad/tests/util.py
+schema_salad/tests/test_schema/CommandLineTool.yml
+schema_salad/tests/test_schema/CommonWorkflowLanguage.yml
+schema_salad/tests/test_schema/Process.yml
+schema_salad/tests/test_schema/Workflow.yml
+schema_salad/tests/test_schema/concepts.md
+schema_salad/tests/test_schema/contrib.md
+schema_salad/tests/test_schema/intro.md
+schema_salad/tests/test_schema/invocation.md
+schema_salad/tests/test_schema/metaschema_base.yml
+schema_salad/tests/test_schema/test1.cwl
+schema_salad/tests/test_schema/test10.cwl
+schema_salad/tests/test_schema/test11.cwl
+schema_salad/tests/test_schema/test2.cwl
+schema_salad/tests/test_schema/test3.cwl
+schema_salad/tests/test_schema/test4.cwl
+schema_salad/tests/test_schema/test5.cwl
+schema_salad/tests/test_schema/test6.cwl
+schema_salad/tests/test_schema/test7.cwl
+schema_salad/tests/test_schema/test8.cwl
+schema_salad/tests/test_schema/test9.cwl
\ No newline at end of file
diff --git a/schema_salad.egg-info/pbr.json b/schema_salad.egg-info/pbr.json
index b645198..f3b1371 100644
--- a/schema_salad.egg-info/pbr.json
+++ b/schema_salad.egg-info/pbr.json
@@ -1 +1 @@
-{"is_release": false, "git_version": "2b328bc"}
\ No newline at end of file
+{"is_release": false, "git_version": "a5bbb36"}
\ No newline at end of file
diff --git a/schema_salad.egg-info/requires.txt b/schema_salad.egg-info/requires.txt
index 756fee8..a27d4da 100644
--- a/schema_salad.egg-info/requires.txt
+++ b/schema_salad.egg-info/requires.txt
@@ -1,9 +1,8 @@
 setuptools
 requests >= 1.0
-ruamel.yaml >= 0.12.4, < 0.12.5
+ruamel.yaml >= 0.12.4
 rdflib >= 4.2.0, < 4.3.0
 rdflib-jsonld >= 0.3.0, < 0.5.0
-html5lib >= 0.90, <= 0.9999999
 mistune >= 0.7.3, < 0.8
 typing >= 3.5.2, < 3.6
 CacheControl >= 0.11.7, < 0.12
diff --git a/schema_salad/add_dictlist.py b/schema_salad/add_dictlist.py
index 53bd4d4..711f580 100644
--- a/schema_salad/add_dictlist.py
+++ b/schema_salad/add_dictlist.py
@@ -1,6 +1,7 @@
 import sys
 from typing import Any, Dict
 
+
 def add_dictlist(di, key, val):  # type: (Dict, Any, Any) -> None
     if key not in di:
         di[key] = []
diff --git a/schema_salad/aslist.py b/schema_salad/aslist.py
index 0332a2b..27602ab 100644
--- a/schema_salad/aslist.py
+++ b/schema_salad/aslist.py
@@ -1,6 +1,7 @@
 import sys
 from typing import Any, List
 
+
 def aslist(l):  # type: (Any) -> List
     """Convenience function to wrap single items and lists, and return lists unchanged."""
 
diff --git a/schema_salad/flatten.py b/schema_salad/flatten.py
index 90c93d2..a417b34 100644
--- a/schema_salad/flatten.py
+++ b/schema_salad/flatten.py
@@ -2,6 +2,8 @@ import sys
 from typing import Any, Tuple
 
 # http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
+
+
 def flatten(l, ltypes=(list, tuple)):
     # type: (Any, Any) -> Any
     if l is None:
diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py
index d4d203f..7141b07 100755
--- a/schema_salad/jsonld_context.py
+++ b/schema_salad/jsonld_context.py
@@ -20,13 +20,19 @@ import urlparse
 import logging
 from .aslist import aslist
 from typing import Any, cast, Dict, Iterable, Tuple, Union
-from .ref_resolver import Loader
+from .ref_resolver import Loader, ContextType
 
 _logger = logging.getLogger("salad")
 
 
-def pred(datatype, field, name, context, defaultBase, namespaces):
-    # type: (Dict[str, Union[Dict, str]], Dict, str, Loader.ContextType, str, Dict[str, rdflib.namespace.Namespace]) -> Union[Dict, str]
+def pred(datatype,      # type: Dict[str, Union[Dict, str]]
+         field,         # type: Dict
+         name,          # type: str
+         context,       # type: ContextType
+         defaultBase,   # type: str
+         namespaces     # type: Dict[str, rdflib.namespace.Namespace]
+         ):
+    # type: (...) -> Union[Dict, str]
     split = urlparse.urlsplit(name)
 
     vee = None  # type: Union[str, unicode]
@@ -84,8 +90,14 @@ def pred(datatype, field, name, context, defaultBase, namespaces):
     return ret
 
 
-def process_type(t, g, context, defaultBase, namespaces, defaultPrefix):
-    # type: (Dict[str, Any], Graph, Loader.ContextType, str, Dict[str, rdflib.namespace.Namespace], str) -> None
+def process_type(t,             # type: Dict[str, Any]
+                 g,             # type: Graph
+                 context,       # type: ContextType
+                 defaultBase,   # type: str
+                 namespaces,    # type: Dict[str, rdflib.namespace.Namespace]
+                 defaultPrefix  # type: str
+                 ):
+    # type: (...) -> None
     if t["type"] == "record":
         recordname = t["name"]
 
@@ -154,8 +166,8 @@ def process_type(t, g, context, defaultBase, namespaces, defaultPrefix):
 
 
 def salad_to_jsonld_context(j, schema_ctx):
-    # type: (Iterable, Dict[str, Any]) -> Tuple[Loader.ContextType, Graph]
-    context = {}  # type: Loader.ContextType
+    # type: (Iterable, Dict[str, Any]) -> Tuple[ContextType, Graph]
+    context = {}  # type: ContextType
     namespaces = {}
     g = Graph()
     defaultPrefix = ""
@@ -178,8 +190,11 @@ def salad_to_jsonld_context(j, schema_ctx):
 
     return (context, g)
 
-def fix_jsonld_ids(obj, ids):
-    # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]], List[unicode]) -> None
+
+def fix_jsonld_ids(obj,     # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]]]
+                   ids      # type: List[unicode]
+                   ):
+    # type: (...) -> None
     if isinstance(obj, dict):
         for i in ids:
             if i in obj:
@@ -190,8 +205,13 @@ def fix_jsonld_ids(obj, ids):
         for entry in obj:
             fix_jsonld_ids(entry, ids)
 
-def makerdf(workflow, wf, ctx, graph=None):
-    # type: (Union[str, unicode], Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Loader.ContextType, Graph) -> Graph
+
+def makerdf(workflow,       # type: Union[str, unicode]
+            wf,             # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]]
+            ctx,            # type: ContextType
+            graph=None      # type: Graph
+            ):
+    # type: (...) -> Graph
     prefixes = {}
     idfields = []
     for k, v in ctx.iteritems():
diff --git a/schema_salad/main.py b/schema_salad/main.py
index 1896e8f..f51184b 100644
--- a/schema_salad/main.py
+++ b/schema_salad/main.py
@@ -3,28 +3,36 @@ import argparse
 import logging
 import sys
 import traceback
-import pkg_resources  # part of setuptools
-from . import schema
-from . import jsonld_context
-from . import makedoc
 import json
-from rdflib import Graph, plugin
-from rdflib.serializer import Serializer
 import os
 import urlparse
 
-from .ref_resolver import Loader
-from . import validate
+import pkg_resources  # part of setuptools
+
 from typing import Any, Dict, List, Union
 
+from rdflib import Graph, plugin
+from rdflib.serializer import Serializer
+
+from . import schema
+from . import jsonld_context
+from . import makedoc
+from . import validate
+from .sourceline import strip_dup_lineno
+from .ref_resolver import Loader
+
 _logger = logging.getLogger("salad")
 
 from rdflib.plugin import register, Parser
 register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser')
 
 
-def printrdf(workflow, wf, ctx, sr):
-    # type: (str, Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Dict[unicode, Any], str) -> None
+def printrdf(workflow,  # type: str
+             wf,        # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]]
+             ctx,       # type: Dict[unicode, Any]
+             sr         # type: str
+             ):
+    # type: (...) -> None
     g = jsonld_context.makerdf(workflow, wf, ctx)
     print(g.serialize(format=sr))
 
@@ -104,10 +112,14 @@ def main(argsl=None):  # type: (List[str]) -> int
             schema_raw_doc, schema_uri)
     except (validate.ValidationException) as e:
         _logger.error("Schema `%s` failed link checking:\n%s",
-                args.schema, e, exc_info=(True if args.debug else False))
+                      args.schema, e, exc_info=(True if args.debug else False))
         _logger.debug("Index is %s", metaschema_loader.idx.keys())
         _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
         return 1
+    except (RuntimeError) as e:
+        _logger.error("Schema `%s` read error:\n%s",
+                      args.schema, e, exc_info=(True if args.debug else False))
+        return 1
 
     # Optionally print the schema after ref resolution
     if not args.document and args.print_pre:
@@ -121,7 +133,8 @@ def main(argsl=None):  # type: (List[str]) -> int
     # Validate the schema document against the metaschema
     try:
         schema.validate_doc(metaschema_names, schema_doc,
-                            metaschema_loader, args.strict)
+                            metaschema_loader, args.strict,
+                            source=schema_metadata["name"])
     except validate.ValidationException as e:
         _logger.error("While validating schema `%s`:\n%s" %
                       (args.schema, str(e)))
@@ -149,8 +162,8 @@ def main(argsl=None):  # type: (List[str]) -> int
 
     if isinstance(avsc_names, Exception):
         _logger.error("Schema `%s` error:\n%s", args.schema,
-                avsc_names, exc_info=((type(avsc_names), avsc_names,
-                    None) if args.debug else None))
+                      avsc_names, exc_info=((type(avsc_names), avsc_names,
+                                             None) if args.debug else None))
         if args.print_avro:
             print(json.dumps(avsc_obj, indent=4))
         return 1
@@ -188,7 +201,7 @@ def main(argsl=None):  # type: (List[str]) -> int
         document, doc_metadata = document_loader.resolve_ref(uri)
     except (validate.ValidationException, RuntimeError) as e:
         _logger.error("Document `%s` failed validation:\n%s",
-                      args.document, e, exc_info=args.debug)
+                      args.document, strip_dup_lineno(unicode(e)), exc_info=args.debug)
         return 1
 
     # Optionally print the document after ref resolution
diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py
index 91be285..0325ad8 100644
--- a/schema_salad/makedoc.py
+++ b/schema_salad/makedoc.py
@@ -220,8 +220,13 @@ class RenderType(object):
                  ("docAfter" not in f))):
                 self.render_type(f, 1)
 
-    def typefmt(self, tp, redirects, nbsp=False, jsonldPredicate=None):
-        # type: (Any, Dict[str, str], bool, Dict[str, str]) -> Union[str, unicode]
+    def typefmt(self,
+                tp,                     # type: Any
+                redirects,              # type: Dict[str, str]
+                nbsp=False,             # type: bool
+                jsonldPredicate=None    # type: Dict[str, str]
+                ):
+        # type: (...) -> Union[str, unicode]
         global primitiveType
         if isinstance(tp, list):
             if nbsp and len(tp) <= 3:
@@ -230,16 +235,20 @@ class RenderType(object):
                 return " | ".join([self.typefmt(n, redirects) for n in tp])
         if isinstance(tp, dict):
             if tp["type"] == "https://w3id.org/cwl/salad#array":
-                ar = "array<%s>" % (self.typefmt(tp["items"], redirects, nbsp=True))
+                ar = "array<%s>" % (self.typefmt(
+                    tp["items"], redirects, nbsp=True))
                 if jsonldPredicate and "mapSubject" in jsonldPredicate:
                     if "mapPredicate" in jsonldPredicate:
                         ar += " | map<%s.%s, %s.%s&gt" % (self.typefmt(tp["items"], redirects),
-                                                           jsonldPredicate["mapSubject"],
-                                                           self.typefmt(tp["items"], redirects),
-                                                           jsonldPredicate["mapPredicate"])
+                                                                  jsonldPredicate[
+                                                                      "mapSubject"],
+                                                                  self.typefmt(
+                                                                      tp["items"], redirects),
+                                                                  jsonldPredicate["mapPredicate"])
                     ar += " | map<%s.%s, %s&gt" % (self.typefmt(tp["items"], redirects),
-                                                          jsonldPredicate["mapSubject"],
-                                                          self.typefmt(tp["items"], redirects))
+                                                           jsonldPredicate[
+                                                               "mapSubject"],
+                                                           self.typefmt(tp["items"], redirects))
                 return ar
             if tp["type"] in ("https://w3id.org/cwl/salad#record", "https://w3id.org/cwl/salad#enum"):
                 frg = schema.avro_name(tp["name"])
@@ -414,6 +423,7 @@ def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink):
     <html>
     <head>
     <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
     """)
 
diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py
index e37e840..ed25f0e 100644
--- a/schema_salad/ref_resolver.py
+++ b/schema_salad/ref_resolver.py
@@ -4,36 +4,57 @@ import json
 import hashlib
 import logging
 import collections
+import urllib
 import urlparse
 import re
 import copy
-import pprint
+import urllib
 from StringIO import StringIO
 
 from . import validate
 from .aslist import aslist
 from .flatten import flatten
+from .sourceline import SourceLine, add_lc_filename, relname
 
 import requests
 from cachecontrol.wrapper import CacheControl
 from cachecontrol.caches import FileCache
 import ruamel.yaml as yaml
-
-try:
-    from ruamel.yaml import CSafeLoader as SafeLoader
-except ImportError:
-    from ruamel.yaml import SafeLoader  # type: ignore
+from ruamel.yaml.comments import CommentedSeq, CommentedMap
 
 import rdflib
+from rdflib import Graph
 from rdflib.namespace import RDF, RDFS, OWL
 from rdflib.plugins.parsers.notation3 import BadSyntax
 import xml.sax
 from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple,
-         TypeVar, Union)
+                    TypeVar, Union)
 
 _logger = logging.getLogger("salad")
-
-class NormDict(dict):
+ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]]
+DocumentType = TypeVar('DocumentType', CommentedSeq, CommentedMap)
+DocumentOrStrType = TypeVar(
+    'DocumentOrStrType', CommentedSeq, CommentedMap, unicode)
+
+def file_uri(path):  # type: (str) -> str
+    if path.startswith("file://"):
+        return path
+    pathsp = path.split("#", 2)
+    frag = "#" + urllib.quote(str(pathsp[1])) if len(pathsp) == 2 else ""
+    urlpath = urllib.pathname2url(str(pathsp[0]))
+    if urlpath.startswith("//"):
+        return "file:%s%s" % (urlpath, frag)
+    else:
+        return "file://%s%s" % (urlpath, frag)
+
+def uri_file_path(url):  # type: (str) -> str
+    split = urlparse.urlsplit(url)
+    if split.scheme == "file":
+        return urllib.url2pathname(str(split.path)) + ("#" + urllib.unquote(str(split.fragment)) if split.fragment else "")
+    else:
+        raise ValueError("Not a file URI")
+
+class NormDict(CommentedMap):
 
     def __init__(self, normalize=unicode):  # type: (type) -> None
         super(NormDict, self).__init__()
@@ -105,7 +126,7 @@ class DefaultFetcher(Fetcher):
             return resp.text
         elif scheme == 'file':
             try:
-                with open(path) as fp:
+                with open(urllib.url2pathname(str(path))) as fp:
                     read = fp.read()
                 if hasattr(read, "decode"):
                     return read.decode("utf-8")
@@ -134,7 +155,7 @@ class DefaultFetcher(Fetcher):
                 return False
             return True
         elif scheme == 'file':
-            return os.path.exists(path)
+            return os.path.exists(urllib.url2pathname(str(path)))
         else:
             raise ValueError('Unsupported scheme in url: %s' % url)
 
@@ -142,15 +163,11 @@ class DefaultFetcher(Fetcher):
         return urlparse.urljoin(base_url, url)
 
 class Loader(object):
-
-    ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]]
-    DocumentType = Union[List, Dict[unicode, Any]]
-
     def __init__(self,
                  ctx,                       # type: ContextType
                  schemagraph=None,          # type: rdflib.graph.Graph
                  foreign_properties=None,   # type: Set[unicode]
-                 idx=None,                  # type: Dict[unicode, Union[dict, list, unicode]]
+                 idx=None,                  # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
                  cache=None,                # type: Dict[unicode, Any]
                  session=None,              # type: requests.sessions.Session
                  fetcher_constructor=None   # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
@@ -158,22 +175,26 @@ class Loader(object):
         # type: (...) -> None
 
         normalize = lambda url: urlparse.urlsplit(url).geturl()
+        self.idx = None     # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
         if idx is not None:
             self.idx = idx
         else:
             self.idx = NormDict(normalize)
 
-        self.ctx = {}  # type: Loader.ContextType
+        self.ctx = {}       # type: ContextType
+        self.graph = None   # type: Graph
         if schemagraph is not None:
             self.graph = schemagraph
         else:
             self.graph = rdflib.graph.Graph()
 
+        self.foreign_properties = None  # type: Set[unicode]
         if foreign_properties is not None:
             self.foreign_properties = foreign_properties
         else:
             self.foreign_properties = set()
 
+        self.cache = None   # type: Dict[unicode, Any]
         if cache is not None:
             self.cache = cache
         else:
@@ -194,23 +215,29 @@ class Loader(object):
         self.fetch_text = self.fetcher.fetch_text
         self.check_exists = self.fetcher.check_exists
 
-        self.url_fields = None  # type: Set[unicode]
-        self.scoped_ref_fields = None  # type: Dict[unicode, int]
-        self.vocab_fields = None  # type: Set[unicode]
-        self.identifiers = None  # type: Set[unicode]
-        self.identity_links = None  # type: Set[unicode]
-        self.standalone = None  # type: Set[unicode]
-        self.nolinkcheck = None  # type: Set[unicode]
-        self.vocab = {}  # type: Dict[unicode, unicode]
-        self.rvocab = {}  # type: Dict[unicode, unicode]
-        self.idmap = None  # type: Dict[unicode, Any]
-        self.mapPredicate = None  # type: Dict[unicode, unicode]
-        self.type_dsl_fields = None  # type: Set[unicode]
+        self.url_fields = None          # type: Set[unicode]
+        self.scoped_ref_fields = None   # type: Dict[unicode, int]
+        self.vocab_fields = None        # type: Set[unicode]
+        self.identifiers = None         # type: Set[unicode]
+        self.identity_links = None      # type: Set[unicode]
+        self.standalone = None          # type: Set[unicode]
+        self.nolinkcheck = None         # type: Set[unicode]
+        self.vocab = {}                 # type: Dict[unicode, unicode]
+        self.rvocab = {}                # type: Dict[unicode, unicode]
+        self.idmap = None               # type: Dict[unicode, Any]
+        self.mapPredicate = None        # type: Dict[unicode, unicode]
+        self.type_dsl_fields = None     # type: Set[unicode]
 
         self.add_context(ctx)
 
-    def expand_url(self, url, base_url, scoped_id=False, vocab_term=False, scoped_ref=None):
-        # type: (unicode, unicode, bool, bool, int) -> unicode
+    def expand_url(self,
+                   url,                 # type: unicode
+                   base_url,            # type: unicode
+                   scoped_id=False,     # type: bool
+                   vocab_term=False,    # type: bool
+                   scoped_ref=None      # type: int
+                   ):
+        # type: (...) -> unicode
         if url in (u"@id", u"@type"):
             return url
 
@@ -294,7 +321,7 @@ class Loader(object):
             self.idx[unicode(s)] = None
 
     def add_context(self, newcontext, baseuri=""):
-        # type: (Loader.ContextType, unicode) -> None
+        # type: (ContextType, unicode) -> None
         if self.vocab:
             raise validate.ValidationException(
                 "Refreshing context that already has stuff in it")
@@ -356,33 +383,48 @@ class Loader(object):
         _logger.debug("vocab_fields is %s", self.vocab_fields)
         _logger.debug("vocab is %s", self.vocab)
 
-    def resolve_ref(self, ref, base_url=None, checklinks=True):
-        # type: (Union[Dict[unicode, Any], unicode], unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
-        base_url = base_url or u'file://%s/' % os.path.abspath('.')
+    def resolve_ref(self,
+                    ref,             # type: Union[CommentedMap, CommentedSeq, unicode]
+                    base_url=None,   # type: unicode
+                    checklinks=True  # type: bool
+                    ):
+        # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]]
 
-        obj = None  # type: Dict[unicode, Any]
+        obj = None              # type: CommentedMap
+        resolved_obj = None     # type: Union[CommentedMap, CommentedSeq, unicode]
         inc = False
-        mixin = None
+        mixin = None            # type: Dict[unicode, Any]
 
+        if not base_url:
+            base_url = file_uri(os.getcwd()) + "/"
+
+        if isinstance(ref, (str, unicode)) and os.sep == "\\":
+            # Convert Windows path separator in ref
+            ref = ref.replace("\\", "/")
+
+        sl = SourceLine(obj, None, ValueError)
         # If `ref` is a dict, look for special directives.
-        if isinstance(ref, dict):
+        if isinstance(ref, CommentedMap):
             obj = ref
-            if u"$import" in obj:
+            if "$import" in obj:
+                sl = SourceLine(obj, "$import", RuntimeError)
                 if len(obj) == 1:
                     ref = obj[u"$import"]
                     obj = None
                 else:
-                    raise ValueError(
-                        u"'$import' must be the only field in %s" % (str(obj)))
-            elif u"$include" in obj:
+                    raise sl.makeError(
+                        u"'$import' must be the only field in %s" % (unicode(obj)))
+            elif "$include" in obj:
+                sl = SourceLine(obj, "$include", RuntimeError)
                 if len(obj) == 1:
                     ref = obj[u"$include"]
                     inc = True
                     obj = None
                 else:
-                    raise ValueError(
-                        u"'$include' must be the only field in %s" % (str(obj)))
-            elif u"$mixin" in obj:
+                    raise sl.makeError(
+                        u"'$include' must be the only field in %s" % (unicode(obj)))
+            elif "$mixin" in obj:
+                sl = SourceLine(obj, "$mixin", RuntimeError)
                 ref = obj[u"$mixin"]
                 mixin = obj
                 obj = None
@@ -393,37 +435,38 @@ class Loader(object):
                         ref = obj[identifier]
                         break
                 if not ref:
-                    raise ValueError(
-                        u"Object `%s` does not have identifier field in %s" % (obj, self.identifiers))
+                    raise sl.makeError(
+                        u"Object `%s` does not have identifier field in %s" % (relname(obj), self.identifiers))
 
         if not isinstance(ref, (str, unicode)):
-            raise ValueError(u"Must be string: `%s`" % str(ref))
+            raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" % (type(ref), unicode(ref)))
 
         url = self.expand_url(ref, base_url, scoped_id=(obj is not None))
-
         # Has this reference been loaded already?
         if url in self.idx and (not mixin):
             return self.idx[url], {}
 
-        # "$include" directive means load raw text
-        if inc:
-            return self.fetch_text(url), {}
+        sl.raise_type = RuntimeError
+        with sl:
+            # "$include" directive means load raw text
+            if inc:
+                return self.fetch_text(url), {}
 
-        doc = None
-        if obj:
-            for identifier in self.identifiers:
-                obj[identifier] = url
-            doc_url = url
-        else:
-            # Load structured document
-            doc_url, frg = urlparse.urldefrag(url)
-            if doc_url in self.idx and (not mixin):
-                # If the base document is in the index, it was already loaded,
-                # so if we didn't find the reference earlier then it must not
-                # exist.
-                raise validate.ValidationException(
-                    u"Reference `#%s` not found in file `%s`." % (frg, doc_url))
-            doc = self.fetch(doc_url, inject_ids=(not mixin))
+            doc = None
+            if obj:
+                for identifier in self.identifiers:
+                    obj[identifier] = url
+                doc_url = url
+            else:
+                # Load structured document
+                doc_url, frg = urlparse.urldefrag(url)
+                if doc_url in self.idx and (not mixin):
+                    # If the base document is in the index, it was already loaded,
+                    # so if we didn't find the reference earlier then it must not
+                    # exist.
+                    raise validate.ValidationException(
+                        u"Reference `#%s` not found in file `%s`." % (frg, doc_url))
+                doc = self.fetch(doc_url, inject_ids=(not mixin))
 
         # Recursively expand urls and resolve directives
         if mixin:
@@ -443,10 +486,11 @@ class Loader(object):
             if url in self.idx:
                 resolved_obj = self.idx[url]
             else:
-                raise RuntimeError("Reference `%s` is not in the index. "
-                    "Index contains:\n  %s" % (url, "\n  ".join(self.idx)))
+                raise RuntimeError(
+                    "Reference `%s` is not in the index. Index contains:\n  %s"
+                    % (url, "\n  ".join(self.idx)))
 
-        if isinstance(resolved_obj, (dict)):
+        if isinstance(resolved_obj, CommentedMap):
             if u"$graph" in resolved_obj:
                 metadata = _copy_dict_without_key(resolved_obj, u"$graph")
                 return resolved_obj[u"$graph"], metadata
@@ -455,9 +499,11 @@ class Loader(object):
         else:
             return resolved_obj, metadata
 
-
-    def _resolve_idmap(self, document, loader):
-        # type: (Dict[unicode, Union[Dict[unicode, Dict[unicode, unicode]], List[Dict[unicode, Any]]]], Loader) -> None
+    def _resolve_idmap(self,
+                       document,    # type: CommentedMap
+                       loader       # type: Loader
+                       ):
+        # type: (...) -> None
         # Convert fields with mapSubject into lists
         # use mapPredicate if the mapped value isn't a dict.
         for idmapField in loader.idmap:
@@ -466,27 +512,46 @@ class Loader(object):
                 if (isinstance(idmapFieldValue, dict)
                         and "$import" not in idmapFieldValue
                         and "$include" not in idmapFieldValue):
-                    ls = []
+                    ls = CommentedSeq()
                     for k in sorted(idmapFieldValue.keys()):
                         val = idmapFieldValue[k]
-                        v = None  # type: Dict[unicode, Any]
-                        if not isinstance(val, dict):
+                        v = None  # type: CommentedMap
+                        if not isinstance(val, CommentedMap):
                             if idmapField in loader.mapPredicate:
-                                v = {loader.mapPredicate[idmapField]: val}
+                                v = CommentedMap(
+                                    ((loader.mapPredicate[idmapField], val),))
+                                v.lc.add_kv_line_col(
+                                    loader.mapPredicate[idmapField],
+                                    document[idmapField].lc.data[k])
+                                v.lc.filename = document.lc.filename
                             else:
                                 raise validate.ValidationException(
                                     "mapSubject '%s' value '%s' is not a dict"
                                     "and does not have a mapPredicate", k, v)
                         else:
                             v = val
+
                         v[loader.idmap[idmapField]] = k
+                        v.lc.add_kv_line_col(loader.idmap[idmapField],
+                                             document[idmapField].lc.data[k])
+                        v.lc.filename = document.lc.filename
+
+                        ls.lc.add_kv_line_col(
+                            len(ls), document[idmapField].lc.data[k])
+
+                        ls.lc.filename = document.lc.filename
                         ls.append(v)
+
                     document[idmapField] = ls
 
     typeDSLregex = re.compile(ur"^([^[?]+)(\[\])?(\?)?$")
 
-    def _type_dsl(self, t):
-        # type: (Union[unicode, Dict, List]) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]]
+    def _type_dsl(self,
+                  t,        # type: Union[unicode, Dict, List]
+                  lc,
+                  filename):
+        # type: (...) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]]
+
         if not isinstance(t, (str, unicode)):
             return t
 
@@ -496,34 +561,59 @@ class Loader(object):
         first = m.group(1)
         second = third = None
         if m.group(2):
-            second = {u"type": u"array",
-                 u"items": first}
+            second = CommentedMap((("type", "array"),
+                                   ("items", first)))
+            second.lc.add_kv_line_col("type", lc)
+            second.lc.add_kv_line_col("items", lc)
+            second.lc.filename = filename
         if m.group(3):
-            third = [u"null", second or first]
+            third = CommentedSeq([u"null", second or first])
+            third.lc.add_kv_line_col(0, lc)
+            third.lc.add_kv_line_col(1, lc)
+            third.lc.filename = filename
         return third or second or first
 
-    def _resolve_type_dsl(self, document, loader):
-        # type: (Dict[unicode, Union[unicode, Dict[unicode, unicode], List]], Loader) -> None
+    def _resolve_type_dsl(self,
+                          document,  # type: CommentedMap
+                          loader     # type: Loader
+                          ):
+        # type: (...) -> None
         for d in loader.type_dsl_fields:
             if d in document:
-                datum = document[d]
+                datum2 = datum = document[d]
                 if isinstance(datum, (str, unicode)):
-                    document[d] = self._type_dsl(datum)
-                elif isinstance(datum, list):
-                    document[d] = [self._type_dsl(t) for t in datum]
-                datum2 = document[d]
-                if isinstance(datum2, list):
-                    document[d] = flatten(datum2)
+                    datum2 = self._type_dsl(datum, document.lc.data[
+                                            d], document.lc.filename)
+                elif isinstance(datum, CommentedSeq):
+                    datum2 = CommentedSeq()
+                    for n, t in enumerate(datum):
+                        datum2.lc.add_kv_line_col(
+                            len(datum2), datum.lc.data[n])
+                        datum2.append(self._type_dsl(
+                            t, datum.lc.data[n], document.lc.filename))
+                if isinstance(datum2, CommentedSeq):
+                    datum3 = CommentedSeq()
                     seen = []  # type: List[unicode]
-                    uniq = []
-                    for item in document[d]:
-                        if item not in seen:
-                            uniq.append(item)
-                            seen.append(item)
-                    document[d] = uniq
+                    for i, item in enumerate(datum2):
+                        if isinstance(item, CommentedSeq):
+                            for j, v in enumerate(item):
+                                if v not in seen:
+                                    datum3.lc.add_kv_line_col(
+                                        len(datum3), item.lc.data[j])
+                                    datum3.append(v)
+                                    seen.append(v)
+                        else:
+                            if item not in seen:
+                                datum3.lc.add_kv_line_col(
+                                    len(datum3), datum2.lc.data[i])
+                                datum3.append(item)
+                                seen.append(item)
+                    document[d] = datum3
+                else:
+                    document[d] = datum2
 
     def _resolve_identifier(self, document, loader, base_url):
-        # type: (Dict[unicode, unicode], Loader, unicode) -> unicode
+        # type: (CommentedMap, Loader, unicode) -> unicode
         # Expand identifier field (usually 'id') to resolve scope
         for identifer in loader.identifiers:
             if identifer in document:
@@ -564,8 +654,12 @@ class Loader(object):
                 document[d2] = document[d]
                 del document[d]
 
-    def _resolve_uris(self, document, loader, base_url):
-        # type: (Dict[unicode, Union[unicode, List[unicode]]], Loader, unicode) -> None
+    def _resolve_uris(self,
+                      document,  # type: Dict[unicode, Union[unicode, List[unicode]]]
+                      loader,    # type: Loader
+                      base_url   # type: unicode
+                      ):
+        # type: (...) -> None
         # Resolve remaining URLs based on document base
         for d in loader.url_fields:
             if d in document:
@@ -576,35 +670,43 @@ class Loader(object):
                         vocab_term=(d in loader.vocab_fields),
                         scoped_ref=self.scoped_ref_fields.get(d))
                 elif isinstance(datum, list):
-                    document[d] = [
-                        loader.expand_url(
-                            url, base_url, scoped_id=False,
-                            vocab_term=(d in loader.vocab_fields),
-                            scoped_ref=self.scoped_ref_fields.get(d))
-                        if isinstance(url, (str, unicode))
-                        else url for url in datum]
-
-
-    def resolve_all(self, document, base_url, file_base=None, checklinks=True):
-        # type: (DocumentType, unicode, unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]]
+                    for i, url in enumerate(datum):
+                        if isinstance(url, (str, unicode)):
+                            datum[i] = loader.expand_url(
+                                url, base_url, scoped_id=False,
+                                vocab_term=(d in loader.vocab_fields),
+                                scoped_ref=self.scoped_ref_fields.get(d))
+
+
+    def resolve_all(self,
+                    document,           # type: Union[CommentedMap, CommentedSeq]
+                    base_url,           # type: unicode
+                    file_base=None,     # type: unicode
+                    checklinks=True     # type: bool
+                    ):
+        # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]]
         loader = self
-        metadata = {}  # type: Dict[unicode, Any]
+        metadata = CommentedMap()  # type: CommentedMap
         if file_base is None:
             file_base = base_url
 
-        if isinstance(document, dict):
+        if isinstance(document, CommentedMap):
             # Handle $import and $include
             if (u'$import' in document or u'$include' in document):
-                return self.resolve_ref(document, base_url=file_base, checklinks=checklinks)
+                return self.resolve_ref(
+                    document, base_url=file_base, checklinks=checklinks)
             elif u'$mixin' in document:
-                return self.resolve_ref(document, base_url=base_url, checklinks=checklinks)
-        elif isinstance(document, list):
+                return self.resolve_ref(
+                    document, base_url=base_url, checklinks=checklinks)
+        elif isinstance(document, CommentedSeq):
             pass
+        elif isinstance(document, (list, dict)):
+            raise Exception("Expected CommentedMap or CommentedSeq, got %s: `%s`" % (type(document), document))
         else:
             return (document, metadata)
 
         newctx = None  # type: Loader
-        if isinstance(document, dict):
+        if isinstance(document, CommentedMap):
             # Handle $base, $profile, $namespaces, $schemas and $graph
             if u"$base" in document:
                 base_url = document[u"$base"]
@@ -633,8 +735,9 @@ class Loader(object):
             if u"$graph" in document:
                 metadata = _copy_dict_without_key(document, u"$graph")
                 document = document[u"$graph"]
-                resolved_metadata = loader.resolve_all(metadata, base_url,
-                        file_base=file_base, checklinks=False)[0]
+                resolved_metadata = loader.resolve_all(
+                    metadata, base_url, file_base=file_base,
+                    checklinks=False)[0]
                 if isinstance(resolved_metadata, dict):
                     metadata = resolved_metadata
                 else:
@@ -642,7 +745,7 @@ class Loader(object):
                         "Validation error, metadata must be dict: %s"
                         % (resolved_metadata))
 
-        if isinstance(document, dict):
+        if isinstance(document, CommentedMap):
             self._normalize_fields(document, loader)
             self._resolve_idmap(document, loader)
             self._resolve_type_dsl(document, loader)
@@ -657,19 +760,26 @@ class Loader(object):
             except validate.ValidationException as v:
                 _logger.warn("loader is %s", id(loader), exc_info=True)
                 raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (
-                    id(loader), file_base, key, validate.indent(str(v))))
+                    id(loader), file_base, key, validate.indent(unicode(v))))
 
-        elif isinstance(document, list):
+        elif isinstance(document, CommentedSeq):
             i = 0
             try:
                 while i < len(document):
                     val = document[i]
-                    if isinstance(val, dict) and (u"$import" in val or u"$mixin" in val):
-                        l, _ = loader.resolve_ref(val, base_url=file_base, checklinks=False)
-                        if isinstance(l, list):  # never true?
+                    if isinstance(val, CommentedMap) and (u"$import" in val or u"$mixin" in val):
+                        l, _ = loader.resolve_ref(
+                            val, base_url=file_base, checklinks=False)
+                        if isinstance(l, CommentedSeq):
+                            lc = document.lc.data[i]
                             del document[i]
-                            for item in aslist(l):
-                                document.insert(i, item)
+                            llen = len(l)
+                            for j in range(len(document) + llen, i + llen, -1):
+                                document.lc.data[
+                                    j - 1] = document.lc.data[j - llen]
+                            for item in l:
+                                document.insert(i, item)  # type: ignore
+                                document.lc.data[i] = lc
                                 i += 1
                         else:
                             document[i] = l
@@ -681,7 +791,7 @@ class Loader(object):
             except validate.ValidationException as v:
                 _logger.warn("failed", exc_info=True)
                 raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (
-                    id(loader), file_base, i, validate.indent(str(v))))
+                    id(loader), file_base, i, validate.indent(unicode(v))))
 
             for identifer in loader.identity_links:
                 if identifer in metadata:
@@ -691,7 +801,7 @@ class Loader(object):
                         loader.idx[metadata[identifer]] = document
 
         if checklinks:
-            document = self.validate_links(document, u"")
+            self.validate_links(document, u"")
 
         return document, metadata
 
@@ -704,11 +814,12 @@ class Loader(object):
                 textIO = StringIO(text.decode('utf-8'))
             else:
                 textIO = StringIO(text)
-            textIO.name = url  # type: ignore
-            result = yaml.load(textIO, Loader=SafeLoader)
+            textIO.name = url    # type: ignore
+            result = yaml.round_trip_load(textIO)  # type: ignore
+            add_lc_filename(result, url)
         except yaml.parser.ParserError as e:
             raise validate.ValidationException("Syntax error %s" % (e))
-        if isinstance(result, dict) and inject_ids and self.identifiers:
+        if isinstance(result, CommentedMap) and inject_ids and self.identifiers:
             for identifier in self.identifiers:
                 if identifier not in result:
                     result[identifier] = url
@@ -718,7 +829,7 @@ class Loader(object):
         return result
 
 
-    FieldType = TypeVar('FieldType', unicode, List[unicode], Dict[unicode, Any])
+    FieldType = TypeVar('FieldType', unicode, CommentedSeq, CommentedMap)
 
     def validate_scoped(self, field, link, docid):
         # type: (unicode, unicode, unicode) -> unicode
@@ -742,7 +853,7 @@ class Loader(object):
                 break
             sp.pop()
         raise validate.ValidationException(
-            "Field `%s` contains undefined reference to `%s`, tried %s" % (field, link, tried))
+            "Field `%s` references unknown identifier `%s`, tried %s" % (field, link, ", ".join(tried)))
 
     def validate_link(self, field, link, docid):
         # type: (unicode, FieldType, unicode) -> FieldType
@@ -762,7 +873,7 @@ class Loader(object):
                 elif not self.check_exists(link):
                     raise validate.ValidationException(
                         "Field `%s` contains undefined reference to `%s`" % (field, link))
-        elif isinstance(link, list):
+        elif isinstance(link, CommentedSeq):
             errors = []
             for n, i in enumerate(link):
                 try:
@@ -771,12 +882,12 @@ class Loader(object):
                     errors.append(v)
             if errors:
                 raise validate.ValidationException(
-                    "\n".join([str(e) for e in errors]))
-        elif isinstance(link, dict):
+                    "\n".join([unicode(e) for e in errors]))
+        elif isinstance(link, CommentedMap):
             self.validate_links(link, docid)
         else:
-            raise validate.ValidationException("Link must be a str, unicode, "
-                                               "list, or a dict.")
+            raise validate.ValidationException(
+                "`%s` field is %s, expected string, list, or a dict." % (field, type(link).__name__))
         return link
 
     def getid(self, d):  # type: (Any) -> unicode
@@ -788,59 +899,65 @@ class Loader(object):
         return None
 
     def validate_links(self, document, base_url):
-        # type: (DocumentType, unicode) -> DocumentType
+        # type: (Union[CommentedMap, CommentedSeq, unicode], unicode) -> None
         docid = self.getid(document)
         if not docid:
             docid = base_url
 
-        errors = []
-        iterator = None  # type: Any
+        errors = []         # type: List[Exception]
+        iterator = None     # type: Any
         if isinstance(document, list):
             iterator = enumerate(document)
         elif isinstance(document, dict):
             try:
                 for d in self.url_fields:
+                    sl = SourceLine(document, d, validate.ValidationException)
                     if d in document and d not in self.identity_links:
                         document[d] = self.validate_link(d, document[d], docid)
             except validate.ValidationException as v:
-                errors.append(v)
+                errors.append(sl.makeError(unicode(v)))
             if hasattr(document, "iteritems"):
                 iterator = document.iteritems()
             else:
                 iterator = document.items()
         else:
-            return document
+            return
 
         for key, val in iterator:
+            sl = SourceLine(document, key, validate.ValidationException)
             try:
-                document[key] = self.validate_links(val, docid)
+                self.validate_links(val, docid)
             except validate.ValidationException as v:
                 if key not in self.nolinkcheck:
                     docid2 = self.getid(val)
                     if docid2:
-                        errors.append(validate.ValidationException(
-                            "While checking object `%s`\n%s" % (docid2, validate.indent(str(v)))))
+                        errors.append(sl.makeError("checking object `%s`\n%s" % (
+                            relname(docid2), validate.indent(unicode(v)))))
                     else:
                         if isinstance(key, basestring):
-                            errors.append(validate.ValidationException(
-                                "While checking field `%s`\n%s" % (key, validate.indent(str(v)))))
+                            errors.append(sl.makeError("checking field `%s`\n%s" % (
+                                key, validate.indent(unicode(v)))))
                         else:
-                            errors.append(validate.ValidationException(
-                                "While checking position %s\n%s" % (key, validate.indent(str(v)))))
+                            errors.append(sl.makeError("checking item\n%s" % (
+                                validate.indent(unicode(v)))))
 
         if errors:
             if len(errors) > 1:
                 raise validate.ValidationException(
-                    "\n".join([str(e) for e in errors]))
+                    u"\n".join([unicode(e) for e in errors]))
             else:
                 raise errors[0]
-        return document
+        return
+
 
+D = TypeVar('D', CommentedMap, ContextType)
 
 def _copy_dict_without_key(from_dict, filtered_key):
-    # type: (Dict, Any) -> Dict
-    new_dict = {}
-    for key, value in from_dict.items():
-        if key != filtered_key:
-            new_dict[key] = value
+    # type: (D, Any) -> D
+    new_dict = copy.copy(from_dict)
+    if filtered_key in new_dict:
+        del new_dict[filtered_key]  # type: ignore
+    if isinstance(from_dict, CommentedMap):
+        new_dict.lc.data = copy.copy(from_dict.lc.data)
+        new_dict.lc.filename = from_dict.lc.filename
     return new_dict
diff --git a/schema_salad/schema.py b/schema_salad/schema.py
index fc7afe2..342ec46 100644
--- a/schema_salad/schema.py
+++ b/schema_salad/schema.py
@@ -5,22 +5,23 @@ import sys
 import pprint
 from pkg_resources import resource_stream
 import ruamel.yaml as yaml
-try:
-    from ruamel.yaml import CSafeLoader as SafeLoader
-except ImportError:
-    from ruamel.yaml import SafeLoader  # type: ignore
 import avro.schema
 from . import validate
 import json
 import urlparse
+import os
 AvroSchemaFromJSONData = avro.schema.make_avsc_object
 # AvroSchemaFromJSONData=avro.schema.SchemaFromJSONData
+from avro.schema import Names, SchemaParseException
 from . import ref_resolver
+from .ref_resolver import Loader, DocumentType
 from .flatten import flatten
 import logging
 from .aslist import aslist
 from . import jsonld_context
+from .sourceline import SourceLine, strip_dup_lineno, add_lc_filename, bullets, relname
 from typing import Any, AnyStr, cast, Dict, List, Tuple, TypeVar, Union
+from ruamel.yaml.comments import CommentedSeq, CommentedMap
 
 _logger = logging.getLogger("salad")
 
@@ -48,7 +49,7 @@ salad_files = ('metaschema.yml',
 
 
 def get_metaschema():
-    # type: () -> Tuple[avro.schema.Names, List[Dict[unicode, Any]], ref_resolver.Loader]
+    # type: () -> Tuple[Names, List[Dict[unicode, Any]], Loader]
     loader = ref_resolver.Loader({
         "Any": "https://w3id.org/cwl/salad#Any",
         "ArraySchema": "https://w3id.org/cwl/salad#ArraySchema",
@@ -162,8 +163,8 @@ def get_metaschema():
     loader.cache["https://w3id.org/cwl/salad"] = rs.read()
     rs.close()
 
-    j = yaml.load(loader.cache["https://w3id.org/cwl/salad"],
-            Loader=SafeLoader)
+    j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"])  # type: ignore
+    add_lc_filename(j, "metaschema.yml")
     j, _ = loader.resolve_all(j, "https://w3id.org/cwl/salad#")
 
     # pprint.pprint(j)
@@ -177,8 +178,14 @@ def get_metaschema():
     return (sch_names, j, loader)
 
 
-def load_schema(schema_ref, cache=None):
-    # type: (Union[unicode, Dict[unicode, Any]], Dict) -> Tuple[ref_resolver.Loader, Union[avro.schema.Names, avro.schema.SchemaParseException], Dict[unicode, Any], ref_resolver.Loader]
+def load_schema(schema_ref,  # type: Union[CommentedMap, CommentedSeq, unicode]
+                cache=None   # type: Dict
+                ):
+    # type: (...) -> Tuple[Loader, Union[Names, SchemaParseException], Dict[unicode, Any], Loader]
+    """Load a schema that can be used to validate documents using load_and_validate.
+
+    return document_loader, avsc_names, schema_metadata, metaschema_loader"""
+
     metaschema_names, metaschema_doc, metaschema_loader = get_metaschema()
     if cache is not None:
         metaschema_loader.cache.update(cache)
@@ -194,7 +201,7 @@ def load_schema(schema_ref, cache=None):
         schema_doc, metactx)
 
     # Create the loader that will be used to load the target document.
-    document_loader = ref_resolver.Loader(schema_ctx, cache=cache)
+    document_loader = Loader(schema_ctx, cache=cache)
 
     # Make the Avro validation that will be used to validate the target
     # document
@@ -202,19 +209,53 @@ def load_schema(schema_ref, cache=None):
 
     return document_loader, avsc_names, schema_metadata, metaschema_loader
 
-def load_and_validate(document_loader, avsc_names, document, strict):
-    # type: (ref_resolver.Loader, avro.schema.Names, Union[Dict[unicode, Any], unicode], bool) -> Tuple[Any, Dict[unicode, Any]]
-    if isinstance(document, dict):
-        data, metadata = document_loader.resolve_all(document, document["id"])
-    else:
-        data, metadata = document_loader.resolve_ref(document)
 
-    validate_doc(avsc_names, data, document_loader, strict)
+def load_and_validate(document_loader,  # type: Loader
+                      avsc_names,       # type: Names
+                      document,         # type: Union[CommentedMap, unicode]
+                      strict            # type: bool
+                      ):
+    # type: (...) -> Tuple[Any, Dict[unicode, Any]]
+    """Load a document and validate it with the provided schema.
+
     return data, metadata
+    """
+    try:
+        if isinstance(document, CommentedMap):
+            source = document["id"]
+            data, metadata = document_loader.resolve_all(
+                document, document["id"], checklinks=False)
+        else:
+            source = document
+            data, metadata = document_loader.resolve_ref(
+                document, checklinks=False)
+    except validate.ValidationException as v:
+        raise validate.ValidationException(strip_dup_lineno(str(v)))
 
+    validationErrors = u""
+    try:
+        document_loader.validate_links(data, u"")
+    except validate.ValidationException as v:
+        validationErrors = unicode(v) + "\n"
 
-def validate_doc(schema_names, doc, loader, strict):
-    # type: (avro.schema.Names, Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode], ref_resolver.Loader, bool) -> None
+    try:
+        validate_doc(avsc_names, data, document_loader, strict, source=source)
+    except validate.ValidationException as v:
+        validationErrors += unicode(v)
+
+    if validationErrors:
+        raise validate.ValidationException(validationErrors)
+
+    return data, metadata
+
+
+def validate_doc(schema_names,  # type: Names
+                 doc,           # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode]
+                 loader,        # type: Loader
+                 strict,        # type: bool
+                 source=None
+                 ):
+    # type: (...) -> None
     has_root = False
     for r in schema_names.names.values():
         if ((hasattr(r, 'get_prop') and r.get_prop(u"documentRoot")) or (
@@ -228,8 +269,10 @@ def validate_doc(schema_names, doc, loader, strict):
 
     if isinstance(doc, list):
         validate_doc = doc
-    elif isinstance(doc, dict):
-        validate_doc = [doc]
+    elif isinstance(doc, CommentedMap):
+        validate_doc = CommentedSeq([doc])
+        validate_doc.lc.add_kv_line_col(0, [doc.lc.line, doc.lc.col])
+        validate_doc.lc.filename = doc.lc.filename
     else:
         raise validate.ValidationException("Document must be dict or list")
 
@@ -241,10 +284,12 @@ def validate_doc(schema_names, doc, loader, strict):
 
     anyerrors = []
     for pos, item in enumerate(validate_doc):
+        sl = SourceLine(validate_doc, pos, unicode)
         success = False
         for r in roots:
             success = validate.validate_ex(
-                r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=False)
+                r, item, loader.identifiers, strict,
+                foreign_properties=loader.foreign_properties, raise_ex=False)
             if success:
                 break
 
@@ -258,28 +303,33 @@ def validate_doc(schema_names, doc, loader, strict):
 
                 try:
                     validate.validate_ex(
-                        r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=True)
+                        r, item, loader.identifiers, strict,
+                        foreign_properties=loader.foreign_properties,
+                        raise_ex=True)
                 except validate.ClassValidationException as e:
-                    errors = [u"Could not validate `%s` because\n%s" % (
-                        name, validate.indent(str(e), nolead=False))]
+                    errors = [sl.makeError(u"tried `%s` but\n%s" % (
+                        name, validate.indent(str(e), nolead=False)))]
                     break
                 except validate.ValidationException as e:
-                    errors.append(u"Could not validate as `%s` because\n%s" % (
-                        name, validate.indent(str(e), nolead=False)))
+                    errors.append(sl.makeError(u"tried `%s` but\n%s" % (
+                        name, validate.indent(str(e), nolead=False))))
 
-            objerr = u"Validation error at position %i" % pos
+            objerr = sl.makeError(u"Invalid")
             for ident in loader.identifiers:
                 if ident in item:
-                    objerr = u"Validation error in object %s" % (item[ident])
+                    objerr = sl.makeError(
+                        u"Object `%s` is not valid because"
+                        % (relname(item[ident])))
                     break
             anyerrors.append(u"%s\n%s" %
-                             (objerr, validate.indent(u"\n".join(errors))))
+                             (objerr, validate.indent(bullets(errors, "- "))))
     if anyerrors:
-        raise validate.ValidationException(u"\n".join(anyerrors))
+        raise validate.ValidationException(
+            strip_dup_lineno(bullets(anyerrors, "* ")))
 
 
 def replace_type(items, spec, loader, found):
-    # type: (Any, Dict[unicode, Any], ref_resolver.Loader, Set[unicode]) -> Any
+    # type: (Any, Dict[unicode, Any], Loader, Set[unicode]) -> Any
     """ Go through and replace types in the 'spec' mapping"""
 
     items = copy.deepcopy(items)
@@ -331,8 +381,13 @@ def avro_name(url):  # type: (AnyStr) -> AnyStr
 
 Avro = TypeVar('Avro', Dict[unicode, Any], List[Any], unicode)
 
-def make_valid_avro(items, alltypes, found, union=False):
-    # type: (Avro, Dict[unicode, Dict[unicode, Any]], Set[unicode], bool) -> Union[Avro, Dict]
+
+def make_valid_avro(items,          # type: Avro
+                    alltypes,       # type: Dict[unicode, Dict[unicode, Any]]
+                    found,          # type: Set[unicode]
+                    union=False     # type: bool
+                    ):
+    # type: (...) -> Union[Avro, Dict]
     items = copy.deepcopy(items)
     if isinstance(items, dict):
         if items.get("name"):
@@ -365,13 +420,13 @@ def make_valid_avro(items, alltypes, found, union=False):
     if union and isinstance(items, (str, unicode)):
         if items in alltypes and avro_name(items) not in found:
             return cast(Dict, make_valid_avro(alltypes[items], alltypes, found,
-                        union=union))
+                                              union=union))
         items = avro_name(items)
     return items
 
 
 def extend_and_specialize(items, loader):
-    # type: (List[Dict[unicode, Any]], ref_resolver.Loader) -> List[Dict[unicode, Any]]
+    # type: (List[Dict[unicode, Any]], Loader) -> List[Dict[unicode, Any]]
     """Apply 'extend' and 'specialize' to fully materialize derived record
     types."""
 
@@ -443,7 +498,8 @@ def extend_and_specialize(items, loader):
 
     for t in n:
         if t.get("abstract") and t["name"] not in extended_by:
-            raise validate.ValidationException("%s is abstract but missing a concrete subtype" % t["name"])
+            raise validate.ValidationException(
+                "%s is abstract but missing a concrete subtype" % t["name"])
 
     for t in n:
         if "fields" in t:
@@ -452,8 +508,10 @@ def extend_and_specialize(items, loader):
     return n
 
 
-def make_avro_schema(i, loader):
-    # type: (List[Dict[unicode, Any]], ref_resolver.Loader) -> Tuple[Union[avro.schema.Names,avro.schema.SchemaParseException], List[Dict[unicode, Any]]]
+def make_avro_schema(i,         # type: List[Dict[unicode, Any]]
+                     loader     # type: Loader
+                     ):
+    # type: (...) -> Tuple[Union[Names, SchemaParseException], List[Dict[unicode, Any]]]
     names = avro.schema.Names()
 
     j = extend_and_specialize(i, loader)
diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py
new file mode 100644
index 0000000..e09171c
--- /dev/null
+++ b/schema_salad/sourceline.py
@@ -0,0 +1,165 @@
+import ruamel.yaml
+from ruamel.yaml.comments import CommentedBase, CommentedMap, CommentedSeq
+import re
+import os
+
+from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple,
+                    TypeVar, Union, Text)
+
+lineno_re = re.compile(u"^(.*?:[0-9]+:[0-9]+: )(( *)(.*))")
+
+def _add_lc_filename(r, source):  # type: (ruamel.yaml.comments.CommentedBase, AnyStr) -> None
+    if isinstance(r, ruamel.yaml.comments.CommentedBase):
+        r.lc.filename = source
+    if isinstance(r, list):
+        for d in r:
+            _add_lc_filename(d, source)
+    elif isinstance(r, dict):
+        for d in r.itervalues():
+            _add_lc_filename(d, source)
+
+def relname(source):  # type: (AnyStr) -> AnyStr
+    if source.startswith("file://"):
+        source = source[7:]
+        source = os.path.relpath(source)
+    return source
+
+def add_lc_filename(r, source):  # type: (ruamel.yaml.comments.CommentedBase, AnyStr) -> None
+    _add_lc_filename(r, relname(source))
+
+def reflow(text, maxline, shift=""):  # type: (AnyStr, int, AnyStr) -> AnyStr
+    if maxline < 20:
+        maxline = 20
+    if len(text) > maxline:
+        sp = text.rfind(' ', 0, maxline)
+        if sp < 1:
+            sp = text.find(' ', sp+1)
+            if sp == -1:
+                sp = len(text)
+        if sp < len(text):
+            return "%s\n%s%s" % (text[0:sp], shift, reflow(text[sp+1:], maxline, shift))
+    return text
+
+def indent(v, nolead=False, shift=u"  ", bullet=u"  "):  # type: (Text, bool, Text, Text) -> Text
+    if nolead:
+        return v.splitlines()[0] + u"\n".join([shift + l for l in v.splitlines()[1:]])
+    else:
+        def lineno(i, l):  # type: (int, Text) -> Text
+            r = lineno_re.match(l)
+            if r:
+                return r.group(1) + (bullet if i == 0 else shift) + r.group(2)
+            else:
+                return (bullet if i == 0 else shift) + l
+
+        return u"\n".join([lineno(i, l) for i, l in enumerate(v.splitlines())])
+
+def bullets(textlist, bul):  # type: (List[Text], Text) -> Text
+    if len(textlist) == 1:
+        return textlist[0]
+    else:
+        return "\n".join(indent(t, bullet=bul) for t in textlist)
+
+def strip_dup_lineno(text, maxline=None):  # type: (Text, int) -> Text
+    if maxline is None:
+        maxline = int(os.environ.get("COLUMNS", "100"))
+    pre = None
+    msg = []
+    for l in text.splitlines():
+        g = lineno_re.match(l)
+        if not g:
+            msg.append(l)
+            continue
+        shift = len(g.group(1)) + len(g.group(3))
+        g2 = reflow(g.group(2), maxline-shift, " " * shift)
+        if g.group(1) != pre:
+            pre = g.group(1)
+            msg.append(pre + g2)
+        else:
+            g2 = reflow(g.group(2), maxline-len(g.group(1)), " " * (len(g.group(1))+len(g.group(3))))
+            msg.append(" " * len(g.group(1)) + g2)
+    return "\n".join(msg)
+
+def cmap(d, lc=None, fn=None):  # type: (Union[int, float, str, unicode, Dict, List], List[int], unicode) -> Union[int, float, str, unicode, CommentedMap, CommentedSeq]
+    if lc is None:
+        lc = [0, 0, 0, 0]
+    if fn is None:
+        fn = "test"
+
+    if isinstance(d, CommentedMap):
+        fn = d.lc.filename if hasattr(d.lc, "filename") else fn
+        for k,v in d.iteritems():
+            if k in d.lc.data:
+                d[k] = cmap(v, lc=d.lc.data[k], fn=fn)
+            else:
+                d[k] = cmap(v, lc, fn=fn)
+        return d
+    if isinstance(d, CommentedSeq):
+        fn = d.lc.filename if hasattr(d.lc, "filename") else fn
+        for k,v in enumerate(d):
+            if k in d.lc.data:
+                d[k] = cmap(v, lc=d.lc.data[k], fn=fn)
+            else:
+                d[k] = cmap(v, lc, fn=fn)
+        return d
+    if isinstance(d, dict):
+        cm = CommentedMap()
+        for k in sorted(d.keys()):
+            v = d[k]
+            if isinstance(v, CommentedBase):
+                uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col]
+                vfn = v.lc.filename if hasattr(v.lc, "filename") else fn
+            else:
+                uselc = lc
+                vfn = fn
+            cm[k] = cmap(v, lc=uselc, fn=vfn)
+            cm.lc.add_kv_line_col(k, uselc)
+            cm.lc.filename = fn
+        return cm
+    if isinstance(d, list):
+        cs = CommentedSeq()
+        for k,v in enumerate(d):
+            if isinstance(v, CommentedBase):
+                uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col]
+                vfn = v.lc.filename if hasattr(v.lc, "filename") else fn
+            else:
+                uselc = lc
+                vfn = fn
+            cs.append(cmap(v, lc=uselc, fn=vfn))
+            cs.lc.add_kv_line_col(k, uselc)
+            cs.lc.filename = fn
+        return cs
+    else:
+        return d
+
+class SourceLine(object):
+    def __init__(self, item, key=None, raise_type=unicode):  # type: (Any, Any, Callable) -> None
+        self.item = item
+        self.key = key
+        self.raise_type = raise_type
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if not exc_value:
+            return
+        raise self.makeError(unicode(exc_value))
+
+    def makeError(self, msg):  # type: (Text) -> Any
+        if not isinstance(self.item, ruamel.yaml.comments.CommentedBase):
+            return self.raise_type(msg)
+        errs = []
+        if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data:
+            lead = "%s:%i:%i:" % (self.item.lc.filename,
+                                  self.item.lc.line+1,
+                                  self.item.lc.col+1)
+        else:
+            lead = "%s:%i:%i:" % (self.item.lc.filename,
+                                  self.item.lc.data[self.key][0]+1,
+                                  self.item.lc.data[self.key][1]+1)
+        for m in msg.splitlines():
+            if lineno_re.match(m):
+                errs.append(m)
+            else:
+                errs.append("%s %s" % (lead, m))
+        return self.raise_type("\n".join(errs))
diff --git a/schema_salad/tests/.coverage b/schema_salad/tests/.coverage
new file mode 100644
index 0000000..b4ab5e5
--- /dev/null
+++ b/schema_salad/tests/.coverage
@@ -0,0 +1 @@
+!coverage.py: This is a private format, don't read it directly!{"lines": {"/home/peter/work/salad/schema_salad/validate.py": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 15, 19, 20, 21, 22, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 38, 39, 41, 43, 44, 48, 51, 52, 54, 56, 57, 58, 60, 63, 64, 65, 66, 72, 73, 74, 75, 79, 80, 82, 83, 91, 92, 93, 94, 100, 109, 118, 119, 127, 128, 129, 131, 132, 133, 135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 155, 157, 158, 160, [...]
\ No newline at end of file
diff --git a/schema_salad/tests/frag.yml b/schema_salad/tests/frag.yml
new file mode 100644
index 0000000..7e8818d
--- /dev/null
+++ b/schema_salad/tests/frag.yml
@@ -0,0 +1,4 @@
+- id: foo1
+  bar: b1
+- id: foo2
+  bar: b2
\ No newline at end of file
diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py
new file mode 100644
index 0000000..25a5eea
--- /dev/null
+++ b/schema_salad/tests/test_errors.py
@@ -0,0 +1,31 @@
+from .util import get_data
+import unittest
+from typing import cast
+from schema_salad.schema import load_schema, load_and_validate
+from schema_salad.validate import ValidationException
+from avro.schema import Names
+
+class TestErrors(unittest.TestCase):
+    def test_errors(self):
+        document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
+            get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))
+        avsc_names = cast(Names, avsc_names)
+
+        for t in ("test_schema/test1.cwl",
+                  "test_schema/test2.cwl",
+                  "test_schema/test3.cwl",
+                  "test_schema/test4.cwl",
+                  "test_schema/test5.cwl",
+                  "test_schema/test6.cwl",
+                  "test_schema/test7.cwl",
+                  "test_schema/test8.cwl",
+                  "test_schema/test9.cwl",
+                  "test_schema/test10.cwl",
+                  "test_schema/test11.cwl"):
+            with self.assertRaises(ValidationException):
+                try:
+                    load_and_validate(document_loader, avsc_names,
+                            unicode(get_data("tests/"+t)), True)
+                except ValidationException as e:
+                    print "\n", e
+                    raise
diff --git a/schema_salad/tests/test_errors.py~ b/schema_salad/tests/test_errors.py~
new file mode 100644
index 0000000..01058d8
--- /dev/null
+++ b/schema_salad/tests/test_errors.py~
@@ -0,0 +1 @@
+g
diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py
index 48462ac..6b0277c 100644
--- a/schema_salad/tests/test_examples.py
+++ b/schema_salad/tests/test_examples.py
@@ -1,41 +1,32 @@
+from .util import get_data
 import unittest
 import schema_salad.ref_resolver
 import schema_salad.main
 import schema_salad.schema
 from schema_salad.jsonld_context import makerdf
-from pkg_resources import Requirement, resource_filename, ResolutionError  # type: ignore
 import rdflib
-import ruamel.yaml as yaml
+import ruamel.yaml
 import json
 import os
+from schema_salad.sourceline import cmap
 
 try:
     from ruamel.yaml import CSafeLoader as SafeLoader
 except ImportError:
     from ruamel.yaml import SafeLoader  # type: ignore
 
-
-def get_data(filename):
-    filepath = None
-    try:
-        filepath = resource_filename(
-            Requirement.parse("schema-salad"), filename)
-    except ResolutionError:
-        pass
-    if not filepath or not os.path.isfile(filepath):
-        filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename)
-    return filepath
+from ruamel.yaml.comments import CommentedSeq, CommentedMap
 
 
 class TestSchemas(unittest.TestCase):
     def test_schemas(self):
         l = schema_salad.ref_resolver.Loader({})
 
-        ra, _ = l.resolve_all({
+        ra, _ = l.resolve_all(cmap({
             u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
             u"$namespaces": {u"edam": u"http://edamontology.org/"},
             u"edam:has_format": u"edam:format_1915"
-        }, "")
+        }), "")
 
         self.assertEqual({
             u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
@@ -74,7 +65,7 @@ class TestSchemas(unittest.TestCase):
             argsl=[get_data("tests/Process.yml")]))
 
     def test_jsonld_ctx(self):
-        ldr, _, _, _ = schema_salad.schema.load_schema({
+        ldr, _, _, _ = schema_salad.schema.load_schema(cmap({
             "$base": "Y",
             "name": "X",
             "$namespaces": {
@@ -84,9 +75,9 @@ class TestSchemas(unittest.TestCase):
                 "name": "ExampleType",
                 "type": "enum",
                 "symbols": ["asym", "bsym"]}]
-        })
+        }))
 
-        ra, _ = ldr.resolve_all({"foo:bar": "asym"}, "X")
+        ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X")
 
         self.assertEqual(ra, {
             'http://example.com/foo#bar': 'asym'
@@ -106,7 +97,7 @@ class TestSchemas(unittest.TestCase):
             },
             "id": "@id"})
 
-        ra, _ = ldr.resolve_all({
+        ra, _ = ldr.resolve_all(cmap({
             "id": "stuff",
             "inputs": {
                 "zip": 1,
@@ -116,7 +107,7 @@ class TestSchemas(unittest.TestCase):
             "other": {
                 'n': 9
             }
-        }, "http://example2.com/")
+        }), "http://example2.com/")
 
         self.assertEqual("http://example2.com/#stuff", ra["id"])
         for item in ra["inputs"]:
@@ -159,7 +150,7 @@ class TestSchemas(unittest.TestCase):
             },
             "id": "@id"})
 
-        ra, _ = ldr.resolve_all({
+        ra, _ = ldr.resolve_all(cmap({
             "inputs": {
                 "inp": "string",
                 "inp2": "string"
@@ -188,7 +179,7 @@ class TestSchemas(unittest.TestCase):
                     "out": ["out"]
                 }
             }
-        }, "http://example2.com/")
+        }), "http://example2.com/")
 
         self.assertEquals(
             {'inputs': [{
@@ -234,13 +225,15 @@ class TestSchemas(unittest.TestCase):
                 get_data("metaschema/%s_schema.yml" % a))
             with open(get_data("metaschema/%s_src.yml" % a)) as src_fp:
                 src = ldr.resolve_all(
-                    yaml.load(src_fp, Loader=SafeLoader), "", checklinks=False)[0]
+                    ruamel.yaml.round_trip_load(src_fp), "",
+                    checklinks=False)[0]
             with open(get_data("metaschema/%s_proc.yml" % a)) as src_proc:
-                proc = yaml.load(src_proc, Loader=SafeLoader)
+                proc = ruamel.yaml.safe_load(src_proc)
             self.assertEqual(proc, src)
 
     def test_yaml_float_test(self):
-        self.assertEqual(yaml.load("float-test: 2e-10")["float-test"], 2e-10)
+        self.assertEqual(ruamel.yaml.safe_load("float-test: 2e-10")["float-test"],
+                2e-10)
 
     def test_typedsl_ref(self):
         ldr = schema_salad.ref_resolver.Loader({})
@@ -254,16 +247,16 @@ class TestSchemas(unittest.TestCase):
             }
         })
 
-        ra, _ = ldr.resolve_all({"type": "File"}, "")
+        ra, _ = ldr.resolve_all(cmap({"type": "File"}), "")
         self.assertEqual({'type': 'File'}, ra)
 
-        ra, _ = ldr.resolve_all({"type": "File?"}, "")
+        ra, _ = ldr.resolve_all(cmap({"type": "File?"}), "")
         self.assertEqual({'type': ['null', 'File']}, ra)
 
-        ra, _ = ldr.resolve_all({"type": "File[]"}, "")
+        ra, _ = ldr.resolve_all(cmap({"type": "File[]"}), "")
         self.assertEqual({'type': {'items': 'File', 'type': 'array'}}, ra)
 
-        ra, _ = ldr.resolve_all({"type": "File[]?"}, "")
+        ra, _ = ldr.resolve_all(cmap({"type": "File[]?"}), "")
         self.assertEqual(
             {'type': ['null', {'items': 'File', 'type': 'array'}]}, ra)
 
@@ -280,12 +273,12 @@ class TestSchemas(unittest.TestCase):
         }
         ldr.add_context(ctx)
 
-        ra, _ = ldr.resolve_all({
+        ra, _ = ldr.resolve_all(cmap({
             "id": "foo",
             "bar": {
                 "id": "baz"
             }
-        }, "http://example.com")
+        }), "http://example.com")
         self.assertEqual({'id': 'http://example.com/#foo',
                           'bar': {
                               'id': 'http://example.com/#foo/baz'},
@@ -294,12 +287,12 @@ class TestSchemas(unittest.TestCase):
         g = makerdf(None, ra, ctx)
         print(g.serialize(format="n3"))
 
-        ra, _ = ldr.resolve_all({
+        ra, _ = ldr.resolve_all(cmap({
             "location": "foo",
             "bar": {
                 "location": "baz"
             }
-        }, "http://example.com", checklinks=False)
+        }), "http://example.com", checklinks=False)
         self.assertEqual({'location': 'http://example.com/foo',
                           'bar': {
                               'location': 'http://example.com/baz'},
@@ -308,12 +301,12 @@ class TestSchemas(unittest.TestCase):
         g = makerdf(None, ra, ctx)
         print(g.serialize(format="n3"))
 
-        ra, _ = ldr.resolve_all({
+        ra, _ = ldr.resolve_all(cmap({
             "id": "foo",
             "bar": {
                 "location": "baz"
             }
-        }, "http://example.com", checklinks=False)
+        }), "http://example.com", checklinks=False)
         self.assertEqual({'id': 'http://example.com/#foo',
                           'bar': {
                               'location': 'http://example.com/baz'},
@@ -322,12 +315,12 @@ class TestSchemas(unittest.TestCase):
         g = makerdf(None, ra, ctx)
         print(g.serialize(format="n3"))
 
-        ra, _ = ldr.resolve_all({
+        ra, _ = ldr.resolve_all(cmap({
             "location": "foo",
             "bar": {
                 "id": "baz"
             }
-        }, "http://example.com", checklinks=False)
+        }), "http://example.com", checklinks=False)
         self.assertEqual({'location': 'http://example.com/foo',
                           'bar': {
                               'id': 'http://example.com/#baz'},
@@ -337,20 +330,20 @@ class TestSchemas(unittest.TestCase):
         print(g.serialize(format="n3"))
 
     def test_mixin(self):
+        base_url = "file://" + os.getcwd() + "/tests/"
         ldr = schema_salad.ref_resolver.Loader({})
-        ra = ldr.resolve_ref({"$mixin": get_data("tests/mixin.yml"), "one": "five"},
-                             base_url="file://" + os.getcwd() + "/tests/")
+        ra = ldr.resolve_ref(cmap({"$mixin": get_data("tests/mixin.yml"), "one": "five"}),
+                             base_url=base_url)
         self.assertEqual({'id': 'four', 'one': 'five'}, ra[0])
-
         ldr = schema_salad.ref_resolver.Loader({"id": "@id"})
-        base_url = "file://" + os.getcwd() + "/tests/"
-        ra = ldr.resolve_all([{
+
+        ra = ldr.resolve_all(cmap([{
             "id": "a",
             "m": {"$mixin": get_data("tests/mixin.yml")}
         }, {
             "id": "b",
             "m": {"$mixin": get_data("tests/mixin.yml")}
-        }], base_url=base_url)
+        }]), base_url=base_url)
         self.assertEqual([{
             'id': base_url + '#a',
             'm': {
@@ -364,6 +357,19 @@ class TestSchemas(unittest.TestCase):
                 'one': 'two'}
         }], ra[0])
 
+    def test_fragment(self):
+        ldr = schema_salad.ref_resolver.Loader({"id": "@id"})
+        b, _ = ldr.resolve_ref(get_data("tests/frag.yml#foo2"))
+        self.assertEquals({"id": b["id"], "bar":"b2"}, b)
+
+    def test_file_uri(self):
+        # Note: this test probably won't pass on Windows.  Someone with a
+        # windows box should add an alternate test.
+        self.assertEquals("file:///foo/bar%20baz/quux", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux"))
+        self.assertEquals("/foo/bar baz/quux", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux"))
+        self.assertEquals("file:///foo/bar%20baz/quux#zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong"))
+        self.assertEquals("/foo/bar baz/quux#zing zong", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux#zing%20zong"))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/schema_salad/tests/test_fetch.py~ b/schema_salad/tests/test_fetch.py~
new file mode 100644
index 0000000..422d945
--- /dev/null
+++ b/schema_salad/tests/test_fetch.py~
@@ -0,0 +1,13 @@
+import unittest
+import schema_salad.ref_resolver
+import schema_salad.main
+import schema_salad.schema
+from schema_salad.jsonld_context import makerdf
+from pkg_resources import Requirement, resource_filename, ResolutionError  # type: ignore
+import rdflib
+import ruamel.yaml as yaml
+import json
+import os
+
+class TestFetcher(unittest.TestCase):
+    def test_schemas(self):
diff --git a/schema_salad/tests/test_schema/CommandLineTool.yml b/schema_salad/tests/test_schema/CommandLineTool.yml
new file mode 100644
index 0000000..181c51c
--- /dev/null
+++ b/schema_salad/tests/test_schema/CommandLineTool.yml
@@ -0,0 +1,894 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+  cwl: "https://w3id.org/cwl/cwl#"
+
+$graph:
+
+- name: CommandLineToolDoc
+  type: documentation
+  doc:
+    - |
+      # Common Workflow Language (CWL) Command Line Tool Description, v1.0
+
+      This version:
+        * https://w3id.org/cwl/v1.0/
+
+      Current version:
+        * https://w3id.org/cwl/
+    - "\n\n"
+    - {$include: contrib.md}
+    - "\n\n"
+    - |
+      # Abstract
+
+      A Command Line Tool is a non-interactive executable program that reads
+      some input, performs a computation, and terminates after producing some
+      output.  Command line programs are a flexible unit of code sharing and
+      reuse, unfortunately the syntax and input/output semantics among command
+      line programs is extremely heterogeneous. A common layer for describing
+      the syntax and semantics of programs can reduce this incidental
+      complexity by providing a consistent way to connect programs together.
+      This specification defines the Common Workflow Language (CWL) Command
+      Line Tool Description, a vendor-neutral standard for describing the
+      syntax and input/output semantics of command line programs.
+
+    - {$include: intro.md}
+
+    - |
+      ## Introduction to v1.0
+
+      This specification represents the first full release from the CWL group.
+      Since draft-3, version 1.0 introduces the following changes and additions:
+
+        * The [Directory](#Directory) type.
+        * Syntax simplifcations: denoted by the `map<>` syntax. Example: inputs
+          contains a list of items, each with an id. Now one can specify
+          a mapping of that identifier to the corresponding
+          `CommandInputParamater`.
+          ```
+          inputs:
+           - id: one
+             type: string
+             doc: First input parameter
+           - id: two
+             type: int
+             doc: Second input parameter
+          ```
+          can be
+          ```
+          inputs:
+           one:
+            type: string
+            doc: First input parameter
+           two:
+            type: int
+            doc: Second input parameter
+          ```
+        * [InitialWorkDirRequirement](#InitialWorkDirRequirement): list of
+          files and subdirectories to be present in the output directory prior
+          to execution.
+        * Shortcuts for specifying the standard [output](#stdout) and/or
+          [error](#stderr) streams as a (streamable) File output.
+        * [SoftwareRequirement](#SoftwareRequirement) for describing software
+          dependencies of a tool.
+        * The common `description` field has been renamed to `doc`.
+
+      ## Errata
+
+      Post v1.0 release changes to the spec.
+
+        * 13 July 2016: Mark `baseCommand` as optional and update descriptive text.
+
+      ## Purpose
+
+      Standalone programs are a flexible and interoperable form of code reuse.
+      Unlike monolithic applications, applications and analysis workflows which
+      are composed of multiple separate programs can be written in multiple
+      languages and execute concurrently on multiple hosts.  However, POSIX
+      does not dictate computer-readable grammar or semantics for program input
+      and output, resulting in extremely heterogeneous command line grammar and
+      input/output semantics among program.  This is a particular problem in
+      distributed computing (multi-node compute clusters) and virtualized
+      environments (such as Docker containers) where it is often necessary to
+      provision resources such as input files before executing the program.
+
+      Often this gap is filled by hard coding program invocation and
+      implicitly assuming requirements will be met, or abstracting program
+      invocation with wrapper scripts or descriptor documents.  Unfortunately,
+      where these approaches are application or platform specific it creates a
+      significant barrier to reproducibility and portability, as methods
+      developed for one platform must be manually ported to be used on new
+      platforms.  Similarly it creates redundant work, as wrappers for popular
+      tools must be rewritten for each application or platform in use.
+
+      The Common Workflow Language Command Line Tool Description is designed to
+      provide a common standard description of grammar and semantics for
+      invoking programs used in data-intensive fields such as Bioinformatics,
+      Chemistry, Physics, Astronomy, and Statistics.  This specification
+      defines a precise data and execution model for Command Line Tools that
+      can be implemented on a variety of computing platforms, ranging from a
+      single workstation to cluster, grid, cloud, and high performance
+      computing platforms.
+
+    - {$include: concepts.md}
+    - {$include: invocation.md}
+
+
+- type: record
+  name: EnvironmentDef
+  doc: |
+    Define an environment variable that will be set in the runtime environment
+    by the workflow platform when executing the command line tool.  May be the
+    result of executing an expression, such as getting a parameter from input.
+  fields:
+    - name: envName
+      type: string
+      doc: The environment variable name
+    - name: envValue
+      type: [string, Expression]
+      doc: The environment variable value
+
+- type: record
+  name: CommandLineBinding
+  extends: InputBinding
+  doc: |
+
+    When listed under `inputBinding` in the input schema, the term
+    "value" refers to the the corresponding value in the input object.  For
+    binding objects listed in `CommandLineTool.arguments`, the term "value"
+    refers to the effective value after evaluating `valueFrom`.
+
+    The binding behavior when building the command line depends on the data
+    type of the value.  If there is a mismatch between the type described by
+    the input schema and the effective value, such as resulting from an
+    expression evaluation, an implementation must use the data type of the
+    effective value.
+
+      - **string**: Add `prefix` and the string to the command line.
+
+      - **number**: Add `prefix` and decimal representation to command line.
+
+      - **boolean**: If true, add `prefix` to the command line.  If false, add
+          nothing.
+
+      - **File**: Add `prefix` and the value of
+        [`File.path`](#File) to the command line.
+
+      - **array**: If `itemSeparator` is specified, add `prefix` and the join
+          the array into a single string with `itemSeparator` separating the
+          items.  Otherwise first add `prefix`, then recursively process
+          individual elements.
+
+      - **object**: Add `prefix` only, and recursively add object fields for
+          which `inputBinding` is specified.
+
+      - **null**: Add nothing.
+
+  fields:
+    - name: position
+      type: int?
+      doc: "The sorting key.  Default position is 0."
+    - name: prefix
+      type: string?
+      doc: "Command line prefix to add before the value."
+    - name: separate
+      type: boolean?
+      doc: |
+        If true (default), then the prefix and value must be added as separate
+        command line arguments; if false, prefix and value must be concatenated
+        into a single command line argument.
+    - name: itemSeparator
+      type: string?
+      doc: |
+        Join the array elements into a single string with the elements
+        separated by by `itemSeparator`.
+    - name: valueFrom
+      type:
+        - "null"
+        - string
+        - Expression
+      jsonldPredicate: "cwl:valueFrom"
+      doc: |
+        If `valueFrom` is a constant string value, use this as the value and
+        apply the binding rules above.
+
+        If `valueFrom` is an expression, evaluate the expression to yield the
+        actual value to use to build the command line and apply the binding
+        rules above.  If the inputBinding is associated with an input
+        parameter, the value of `self` in the expression will be the value of the
+        input parameter.
+
+        When a binding is part of the `CommandLineTool.arguments` field,
+        the `valueFrom` field is required.
+    - name: shellQuote
+      type: boolean?
+      doc: |
+        If `ShellCommandRequirement` is in the requirements for the current command,
+        this controls whether the value is quoted on the command line (default is true).
+        Use `shellQuote: false` to inject metacharacters for operations such as pipes.
+
+- type: record
+  name: CommandOutputBinding
+  extends: OutputBinding
+  doc: |
+    Describes how to generate an output parameter based on the files produced
+    by a CommandLineTool.
+
+    The output parameter is generated by applying these operations in
+    the following order:
+
+      - glob
+      - loadContents
+      - outputEval
+  fields:
+    - name: glob
+      type:
+        - "null"
+        - string
+        - Expression
+        - type: array
+          items: string
+      doc: |
+        Find files relative to the output directory, using POSIX glob(3)
+        pathname matching.  If an array is provided, find files that match any
+        pattern in the array.  If an expression is provided, the expression must
+        return a string or an array of strings, which will then be evaluated as
+        one or more glob patterns.  Must only match and return files which
+        actually exist.
+    - name: loadContents
+      type:
+        - "null"
+        - boolean
+      jsonldPredicate: "cwl:loadContents"
+      doc: |
+        For each file matched in `glob`, read up to
+        the first 64 KiB of text from the file and place it in the `contents`
+        field of the file object for manipulation by `outputEval`.
+    - name: outputEval
+      type:
+        - "null"
+        - string
+        - Expression
+      doc: |
+        Evaluate an expression to generate the output value.  If `glob` was
+        specified, the value of `self` must be an array containing file objects
+        that were matched.  If no files were matched, `self` must be a zero
+        length array; if a single file was matched, the value of `self` is an
+        array of a single element.  Additionally, if `loadContents` is `true`,
+        the File objects must include up to the first 64 KiB of file contents
+        in the `contents` field.
+
+
+- name: CommandInputRecordField
+  type: record
+  extends: InputRecordField
+  specialize:
+    - specializeFrom: InputRecordSchema
+      specializeTo: CommandInputRecordSchema
+    - specializeFrom: InputEnumSchema
+      specializeTo: CommandInputEnumSchema
+    - specializeFrom: InputArraySchema
+      specializeTo: CommandInputArraySchema
+    - specializeFrom: InputBinding
+      specializeTo: CommandLineBinding
+
+
+- name: CommandInputRecordSchema
+  type: record
+  extends: InputRecordSchema
+  specialize:
+    - specializeFrom: InputRecordField
+      specializeTo: CommandInputRecordField
+
+
+- name: CommandInputEnumSchema
+  type: record
+  extends: InputEnumSchema
+  specialize:
+    - specializeFrom: InputBinding
+      specializeTo: CommandLineBinding
+
+
+- name: CommandInputArraySchema
+  type: record
+  extends: InputArraySchema
+  specialize:
+    - specializeFrom: InputRecordSchema
+      specializeTo: CommandInputRecordSchema
+    - specializeFrom: InputEnumSchema
+      specializeTo: CommandInputEnumSchema
+    - specializeFrom: InputArraySchema
+      specializeTo: CommandInputArraySchema
+    - specializeFrom: InputBinding
+      specializeTo: CommandLineBinding
+
+
+- name: CommandOutputRecordField
+  type: record
+  extends: OutputRecordField
+  specialize:
+    - specializeFrom: OutputRecordSchema
+      specializeTo: CommandOutputRecordSchema
+    - specializeFrom: OutputEnumSchema
+      specializeTo: CommandOutputEnumSchema
+    - specializeFrom: OutputArraySchema
+      specializeTo: CommandOutputArraySchema
+    - specializeFrom: OutputBinding
+      specializeTo: CommandOutputBinding
+
+
+- name: CommandOutputRecordSchema
+  type: record
+  extends: OutputRecordSchema
+  specialize:
+    - specializeFrom: OutputRecordField
+      specializeTo: CommandOutputRecordField
+
+
+- name: CommandOutputEnumSchema
+  type: record
+  extends: OutputEnumSchema
+  specialize:
+    - specializeFrom: OutputRecordSchema
+      specializeTo: CommandOutputRecordSchema
+    - specializeFrom: OutputEnumSchema
+      specializeTo: CommandOutputEnumSchema
+    - specializeFrom: OutputArraySchema
+      specializeTo: CommandOutputArraySchema
+    - specializeFrom: OutputBinding
+      specializeTo: CommandOutputBinding
+
+
+- name: CommandOutputArraySchema
+  type: record
+  extends: OutputArraySchema
+  specialize:
+    - specializeFrom: OutputRecordSchema
+      specializeTo: CommandOutputRecordSchema
+    - specializeFrom: OutputEnumSchema
+      specializeTo: CommandOutputEnumSchema
+    - specializeFrom: OutputArraySchema
+      specializeTo: CommandOutputArraySchema
+    - specializeFrom: OutputBinding
+      specializeTo: CommandOutputBinding
+
+
+- type: record
+  name: CommandInputParameter
+  extends: InputParameter
+  doc: An input parameter for a CommandLineTool.
+  specialize:
+    - specializeFrom: InputRecordSchema
+      specializeTo: CommandInputRecordSchema
+    - specializeFrom: InputEnumSchema
+      specializeTo: CommandInputEnumSchema
+    - specializeFrom: InputArraySchema
+      specializeTo: CommandInputArraySchema
+    - specializeFrom: InputBinding
+      specializeTo: CommandLineBinding
+
+- type: record
+  name: CommandOutputParameter
+  extends: OutputParameter
+  doc: An output parameter for a CommandLineTool.
+  specialize:
+    - specializeFrom: OutputBinding
+      specializeTo: CommandOutputBinding
+  fields:
+    - name: type
+      type:
+        - "null"
+        - CWLType
+        - stdout
+        - stderr
+        - CommandOutputRecordSchema
+        - CommandOutputEnumSchema
+        - CommandOutputArraySchema
+        - string
+        - type: array
+          items:
+            - CWLType
+            - CommandOutputRecordSchema
+            - CommandOutputEnumSchema
+            - CommandOutputArraySchema
+            - string
+      jsonldPredicate:
+        "_id": "sld:type"
+        "_type": "@vocab"
+        refScope: 2
+        typeDSL: True
+      doc: |
+        Specify valid types of data that may be assigned to this parameter.
+
+- name: stdout
+  type: enum
+  symbols: [ "cwl:stdout" ]
+  docParent: "#CommandOutputParameter"
+  doc: |
+    Only valid as a `type` for a `CommandLineTool` output with no
+    `outputBinding` set.
+
+    The following
+    ```
+    outputs:
+       an_output_name:
+       type: stdout
+
+    stdout: a_stdout_file
+    ```
+    is equivalent to
+    ```
+    outputs:
+      an_output_name:
+        type: File
+        streamable: true
+        outputBinding:
+          glob: a_stdout_file
+
+    stdout: a_stdout_file
+    ```
+
+    If there is no `stdout` name provided, a random filename will be created.
+    For example, the following
+    ```
+    outputs:
+      an_output_name:
+        type: stdout
+    ```
+    is equivalent to
+    ```
+    outputs:
+      an_output_name:
+        type: File
+        streamable: true
+        outputBinding:
+          glob: random_stdout_filenameABCDEFG
+
+    stdout: random_stdout_filenameABCDEFG
+    ```
+
+
+- name: stderr
+  type: enum
+  symbols: [ "cwl:stderr" ]
+  docParent: "#CommandOutputParameter"
+  doc: |
+    Only valid as a `type` for a `CommandLineTool` output with no
+    `outputBinding` set.
+
+    The following
+    ```
+    outputs:
+      an_output_name:
+      type: stderr
+
+    stderr: a_stderr_file
+    ```
+    is equivalent to
+    ```
+    outputs:
+      an_output_name:
+        type: File
+        streamable: true
+        outputBinding:
+          glob: a_stderr_file
+
+    stderr: a_stderr_file
+    ```
+
+    If there is no `stderr` name provided, a random filename will be created.
+    For example, the following
+    ```
+    outputs:
+      an_output_name:
+        type: stderr
+    ```
+    is equivalent to
+    ```
+    outputs:
+      an_output_name:
+        type: File
+        streamable: true
+        outputBinding:
+          glob: random_stderr_filenameABCDEFG
+
+    stderr: random_stderr_filenameABCDEFG
+    ```
+
+
+- type: record
+  name: CommandLineTool
+  extends: Process
+  documentRoot: true
+  specialize:
+    - specializeFrom: InputParameter
+      specializeTo: CommandInputParameter
+    - specializeFrom: OutputParameter
+      specializeTo: CommandOutputParameter
+  doc: |
+    This defines the schema of the CWL Command Line Tool Description document.
+
+  fields:
+    - name: class
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+      type: string
+    - name: baseCommand
+      doc: |
+        Specifies the program to execute.  If an array, the first element of
+        the array is the command to execute, and subsequent elements are
+        mandatory command line arguments.  The elements in `baseCommand` must
+        appear before any command line bindings from `inputBinding` or
+        `arguments`.
+
+        If `baseCommand` is not provided or is an empty array, the first
+        element of the command line produced after processing `inputBinding` or
+        `arguments` must be used as the program to execute.
+
+        If the program includes a path separator character it must
+        be an absolute path, otherwise it is an error.  If the program does not
+        include a path separator, search the `$PATH` variable in the runtime
+        environment of the workflow runner find the absolute path of the
+        executable.
+      type:
+        - string?
+        - string[]?
+      jsonldPredicate:
+        "_id": "cwl:baseCommand"
+        "_container": "@list"
+    - name: arguments
+      doc: |
+        Command line bindings which are not directly associated with input parameters.
+      type:
+        - "null"
+        - type: array
+          items: [string, Expression, CommandLineBinding]
+      jsonldPredicate:
+        "_id": "cwl:arguments"
+        "_container": "@list"
+    - name: stdin
+      type: ["null", string, Expression]
+      doc: |
+        A path to a file whose contents must be piped into the command's
+        standard input stream.
+    - name: stderr
+      type: ["null", string, Expression]
+      jsonldPredicate: "https://w3id.org/cwl/cwl#stderr"
+      doc: |
+        Capture the command's standard error stream to a file written to
+        the designated output directory.
+
+        If `stderr` is a string, it specifies the file name to use.
+
+        If `stderr` is an expression, the expression is evaluated and must
+        return a string with the file name to use to capture stderr.  If the
+        return value is not a string, or the resulting path contains illegal
+        characters (such as the path separator `/`) it is an error.
+    - name: stdout
+      type: ["null", string, Expression]
+      jsonldPredicate: "https://w3id.org/cwl/cwl#stdout"
+      doc: |
+        Capture the command's standard output stream to a file written to
+        the designated output directory.
+
+        If `stdout` is a string, it specifies the file name to use.
+
+        If `stdout` is an expression, the expression is evaluated and must
+        return a string with the file name to use to capture stdout.  If the
+        return value is not a string, or the resulting path contains illegal
+        characters (such as the path separator `/`) it is an error.
+    - name: successCodes
+      type: int[]?
+      doc: |
+        Exit codes that indicate the process completed successfully.
+
+    - name: temporaryFailCodes
+      type: int[]?
+      doc: |
+        Exit codes that indicate the process failed due to a possibly
+        temporary condition, where executing the process with the same
+        runtime environment and inputs may produce different results.
+
+    - name: permanentFailCodes
+      type: int[]?
+      doc:
+        Exit codes that indicate the process failed due to a permanent logic
+        error, where executing the process with the same runtime environment and
+        same inputs is expected to always fail.
+
+
+- type: record
+  name: DockerRequirement
+  extends: ProcessRequirement
+  doc: |
+    Indicates that a workflow component should be run in a
+    [Docker](http://docker.com) container, and specifies how to fetch or build
+    the image.
+
+    If a CommandLineTool lists `DockerRequirement` under
+    `hints` (or `requirements`), it may (or must) be run in the specified Docker
+    container.
+
+    The platform must first acquire or install the correct Docker image as
+    specified by `dockerPull`, `dockerImport`, `dockerLoad` or `dockerFile`.
+
+    The platform must execute the tool in the container using `docker run` with
+    the appropriate Docker image and tool command line.
+
+    The workflow platform may provide input files and the designated output
+    directory through the use of volume bind mounts.  The platform may rewrite
+    file paths in the input object to correspond to the Docker bind mounted
+    locations.
+
+    When running a tool contained in Docker, the workflow platform must not
+    assume anything about the contents of the Docker container, such as the
+    presence or absence of specific software, except to assume that the
+    generated command line represents a valid command within the runtime
+    environment of the container.
+
+    ## Interaction with other requirements
+
+    If [EnvVarRequirement](#EnvVarRequirement) is specified alongside a
+    DockerRequirement, the environment variables must be provided to Docker
+    using `--env` or `--env-file` and interact with the container's preexisting
+    environment as defined by Docker.
+
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'DockerRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: dockerPull
+      type: string?
+      doc: "Specify a Docker image to retrieve using `docker pull`."
+    - name: dockerLoad
+      type: string?
+      doc: "Specify a HTTP URL from which to download a Docker image using `docker load`."
+    - name: dockerFile
+      type: string?
+      doc: "Supply the contents of a Dockerfile which will be built using `docker build`."
+    - name: dockerImport
+      type: string?
+      doc: "Provide HTTP URL to download and gunzip a Docker images using `docker import."
+    - name: dockerImageId
+      type: string?
+      doc: |
+        The image id that will be used for `docker run`.  May be a
+        human-readable image name or the image identifier hash.  May be skipped
+        if `dockerPull` is specified, in which case the `dockerPull` image id
+        must be used.
+    - name: dockerOutputDirectory
+      type: string?
+      doc: |
+        Set the designated output directory to a specific location inside the
+        Docker container.
+
+
+- type: record
+  name: SoftwareRequirement
+  extends: ProcessRequirement
+  doc: |
+    A list of software packages that should be configured in the environment of
+    the defined process.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'SoftwareRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: packages
+      type: SoftwarePackage[]
+      doc: "The list of software to be configured."
+      jsonldPredicate:
+        mapSubject: package
+        mapPredicate: specs
+
+- name: SoftwarePackage
+  type: record
+  fields:
+    - name: package
+      type: string
+      doc: "The common name of the software to be configured."
+    - name: version
+      type: string[]?
+      doc: "The (optional) version of the software to configured."
+    - name: specs
+      type: string[]?
+      doc: |
+        Must be one or more IRIs identifying resources for installing or
+        enabling the software.  Implementations may provide resolvers which map
+        well-known software spec IRIs to some configuration action.
+
+        For example, an IRI `https://packages.debian.org/jessie/bowtie` could
+        be resolved with `apt-get install bowtie`.  An IRI
+        `https://anaconda.org/bioconda/bowtie` could be resolved with `conda
+        install -c bioconda bowtie`.
+
+        Tools may also provide IRIs to index entries such as
+        [RRID](http://www.identifiers.org/rrid/), such as
+        `http://identifiers.org/rrid/RRID:SCR_005476`
+
+
+- name: Dirent
+  type: record
+  doc: |
+    Define a file or subdirectory that must be placed in the designated output
+    directory prior to executing the command line tool.  May be the result of
+    executing an expression, such as building a configuration file from a
+    template.
+  fields:
+    - name: entryname
+      type: ["null", string, Expression]
+      jsonldPredicate:
+        _id: cwl:entryname
+      doc: |
+        The name of the file or subdirectory to create in the output directory.
+        If `entry` is a File or Directory, this overrides `basename`.  Optional.
+    - name: entry
+      type: [string, Expression]
+      jsonldPredicate:
+        _id: cwl:entry
+      doc: |
+        If the value is a string literal or an expression which evaluates to a
+        string, a new file must be created with the string as the file contents.
+
+        If the value is an expression that evaluates to a `File` object, this
+        indicates the referenced file should be added to the designated output
+        directory prior to executing the tool.
+
+        If the value is an expression that evaluates to a `Dirent` object, this
+        indicates that the File or Directory in `entry` should be added to the
+        designated output directory with the name in `entryname`.
+
+        If `writable` is false, the file may be made available using a bind
+        mount or file system link to avoid unnecessary copying of the input
+        file.
+    - name: writable
+      type: boolean?
+      doc: |
+        If true, the file or directory must be writable by the tool.  Changes
+        to the file or directory must be isolated and not visible by any other
+        CommandLineTool process.  This may be implemented by making a copy of
+        the original file or directory.  Default false (files and directories
+        read-only by default).
+
+
+- name: InitialWorkDirRequirement
+  type: record
+  extends: ProcessRequirement
+  doc:
+    Define a list of files and subdirectories that must be created by the
+    workflow platform in the designated output directory prior to executing the
+    command line tool.
+  fields:
+    - name: class
+      type: string
+      doc: InitialWorkDirRequirement
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: listing
+      type:
+        - type: array
+          items: [File, Directory, Dirent, string, Expression]
+        - string
+        - Expression
+      jsonldPredicate:
+        _id: "cwl:listing"
+      doc: |
+        The list of files or subdirectories that must be placed in the
+        designated output directory prior to executing the command line tool.
+
+        May be an expression.  If so, the expression return value must validate
+        as `{type: array, items: [File, Directory]}`.
+
+
+- name: EnvVarRequirement
+  type: record
+  extends: ProcessRequirement
+  doc: |
+    Define a list of environment variables which will be set in the
+    execution environment of the tool.  See `EnvironmentDef` for details.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'EnvVarRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: envDef
+      type: EnvironmentDef[]
+      doc: The list of environment variables.
+      jsonldPredicate:
+        mapSubject: envName
+        mapPredicate: envValue
+
+
+- type: record
+  name: ShellCommandRequirement
+  extends: ProcessRequirement
+  doc: |
+    Modify the behavior of CommandLineTool to generate a single string
+    containing a shell command line.  Each item in the argument list must be
+    joined into a string separated by single spaces and quoted to prevent
+    intepretation by the shell, unless `CommandLineBinding` for that argument
+    contains `shellQuote: false`.  If `shellQuote: false` is specified, the
+    argument is joined into the command string without quoting, which allows
+    the use of shell metacharacters such as `|` for pipes.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'ShellCommandRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+
+
+- type: record
+  name: ResourceRequirement
+  extends: ProcessRequirement
+  doc: |
+    Specify basic hardware resource requirements.
+
+    "min" is the minimum amount of a resource that must be reserved to schedule
+    a job. If "min" cannot be satisfied, the job should not be run.
+
+    "max" is the maximum amount of a resource that the job shall be permitted
+    to use. If a node has sufficient resources, multiple jobs may be scheduled
+    on a single node provided each job's "max" resource requirements are
+    met. If a job attempts to exceed its "max" resource allocation, an
+    implementation may deny additional resources, which may result in job
+    failure.
+
+    If "min" is specified but "max" is not, then "max" == "min"
+    If "max" is specified by "min" is not, then "min" == "max".
+
+    It is an error if max < min.
+
+    It is an error if the value of any of these fields is negative.
+
+    If neither "min" nor "max" is specified for a resource, an implementation may provide a default.
+
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'ResourceRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: coresMin
+      type: ["null", long, string, Expression]
+      doc: Minimum reserved number of CPU cores
+
+    - name: coresMax
+      type: ["null", int, string, Expression]
+      doc: Maximum reserved number of CPU cores
+
+    - name: ramMin
+      type: ["null", long, string, Expression]
+      doc: Minimum reserved RAM in mebibytes (2**20)
+
+    - name: ramMax
+      type: ["null", long, string, Expression]
+      doc: Maximum reserved RAM in mebibytes (2**20)
+
+    - name: tmpdirMin
+      type: ["null", long, string, Expression]
+      doc: Minimum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20)
+
+    - name: tmpdirMax
+      type: ["null", long, string, Expression]
+      doc: Maximum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20)
+
+    - name: outdirMin
+      type: ["null", long, string, Expression]
+      doc: Minimum reserved filesystem based storage for the designated output directory, in mebibytes (2**20)
+
+    - name: outdirMax
+      type: ["null", long, string, Expression]
+      doc: Maximum reserved filesystem based storage for the designated output directory, in mebibytes (2**20)
diff --git a/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml b/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml
new file mode 100644
index 0000000..73921e8
--- /dev/null
+++ b/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml
@@ -0,0 +1,11 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+  cwl: "https://w3id.org/cwl/cwl#"
+  sld: "https://w3id.org/cwl/salad#"
+
+$graph:
+
+- $import: Process.yml
+- $import: CommandLineTool.yml
+- $import: Workflow.yml
diff --git a/schema_salad/tests/test_schema/Process.yml b/schema_salad/tests/test_schema/Process.yml
new file mode 100644
index 0000000..8b9bce5
--- /dev/null
+++ b/schema_salad/tests/test_schema/Process.yml
@@ -0,0 +1,743 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+  cwl: "https://w3id.org/cwl/cwl#"
+  sld: "https://w3id.org/cwl/salad#"
+
+$graph:
+
+- name: "Common Workflow Language, v1.0"
+  type: documentation
+  doc: {$include: concepts.md}
+
+- $import: "metaschema_base.yml"
+
+- name: BaseTypesDoc
+  type: documentation
+  doc: |
+    ## Base types
+  docChild:
+    - "#CWLType"
+    - "#Process"
+
+- type: enum
+  name: CWLVersion
+  doc: "Version symbols for published CWL document versions."
+  symbols:
+    - cwl:draft-2
+    - cwl:draft-3.dev1
+    - cwl:draft-3.dev2
+    - cwl:draft-3.dev3
+    - cwl:draft-3.dev4
+    - cwl:draft-3.dev5
+    - cwl:draft-3
+    - cwl:draft-4.dev1
+    - cwl:draft-4.dev2
+    - cwl:draft-4.dev3
+    - cwl:v1.0.dev4
+    - cwl:v1.0
+
+- name: CWLType
+  type: enum
+  extends: "sld:PrimitiveType"
+  symbols:
+    - cwl:File
+    - cwl:Directory
+  doc:
+    - "Extends primitive types with the concept of a file and directory as a builtin type."
+    - "File: A File object"
+    - "Directory: A Directory object"
+
+- name: File
+  type: record
+  docParent: "#CWLType"
+  doc: |
+    Represents a file (or group of files if `secondaryFiles` is specified) that
+    must be accessible by tools using standard POSIX file system call API such as
+    open(2) and read(2).
+  fields:
+    - name: class
+      type:
+        type: enum
+        name: File_class
+        symbols:
+          - cwl:File
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+      doc: Must be `File` to indicate this object describes a file.
+    - name: location
+      type: string?
+      doc: |
+        An IRI that identifies the file resource.  This may be a relative
+        reference, in which case it must be resolved using the base IRI of the
+        document.  The location may refer to a local or remote resource; the
+        implementation must use the IRI to retrieve file content.  If an
+        implementation is unable to retrieve the file content stored at a
+        remote resource (due to unsupported protocol, access denied, or other
+        issue) it must signal an error.
+
+        If the `location` field is not provided, the `contents` field must be
+        provided.  The implementation must assign a unique identifier for
+        the `location` field.
+
+        If the `path` field is provided but the `location` field is not, an
+        implementation may assign the value of the `path` field to `location`,
+        then follow the rules above.
+      jsonldPredicate:
+        _id: "@id"
+        _type: "@id"
+    - name: path
+      type: string?
+      doc: |
+        The local host path where the File is available when a CommandLineTool is
+        executed.  This field must be set by the implementation.  The final
+        path component must match the value of `basename`.  This field
+        must not be used in any other context.  The command line tool being
+        executed must be able to to access the file at `path` using the POSIX
+        `open(2)` syscall.
+
+        As a special case, if the `path` field is provided but the `location`
+        field is not, an implementation may assign the value of the `path`
+        field to `location`, and remove the `path` field.
+
+        If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02)
+        (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`,
+        `<space>`, `<tab>`, and `<newline>`) or characters
+        [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml)
+        for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452)
+        then implementations may terminate the process with a
+        `permanentFailure`.
+      jsonldPredicate:
+        "_id": "cwl:path"
+        "_type": "@id"
+    - name: basename
+      type: string?
+      doc: |
+        The base name of the file, that is, the name of the file without any
+        leading directory path.  The base name must not contain a slash `/`.
+
+        If not provided, the implementation must set this field based on the
+        `location` field by taking the final path component after parsing
+        `location` as an IRI.  If `basename` is provided, it is not required to
+        match the value from `location`.
+
+        When this file is made available to a CommandLineTool, it must be named
+        with `basename`, i.e. the final component of the `path` field must match
+        `basename`.
+      jsonldPredicate: "cwl:basename"
+    - name: dirname
+      type: string?
+      doc: |
+        The name of the directory containing file, that is, the path leading up
+        to the final slash in the path such that `dirname + '/' + basename ==
+        path`.
+
+        The implementation must set this field based on the value of `path`
+        prior to evaluating parameter references or expressions in a
+        CommandLineTool document.  This field must not be used in any other
+        context.
+    - name: nameroot
+      type: string?
+      doc: |
+        The basename root such that `nameroot + nameext == basename`, and
+        `nameext` is empty or begins with a period and contains at most one
+        period.  For the purposess of path splitting leading periods on the
+        basename are ignored; a basename of `.cshrc` will have a nameroot of
+        `.cshrc`.
+
+        The implementation must set this field automatically based on the value
+        of `basename` prior to evaluating parameter references or expressions.
+    - name: nameext
+      type: string?
+      doc: |
+        The basename extension such that `nameroot + nameext == basename`, and
+        `nameext` is empty or begins with a period and contains at most one
+        period.  Leading periods on the basename are ignored; a basename of
+        `.cshrc` will have an empty `nameext`.
+
+        The implementation must set this field automatically based on the value
+        of `basename` prior to evaluating parameter references or expressions.
+    - name: checksum
+      type: string?
+      doc: |
+        Optional hash code for validating file integrity.  Currently must be in the form
+        "sha1$ + hexadecimal string" using the SHA-1 algorithm.
+    - name: size
+      type: long?
+      doc: Optional file size
+    - name: "secondaryFiles"
+      type:
+        - "null"
+        - type: array
+          items: [File, Directory]
+      jsonldPredicate: "cwl:secondaryFiles"
+      doc: |
+        A list of additional files that are associated with the primary file
+        and must be transferred alongside the primary file.  Examples include
+        indexes of the primary file, or external references which must be
+        included when loading primary document.  A file object listed in
+        `secondaryFiles` may itself include `secondaryFiles` for which the same
+        rules apply.
+    - name: format
+      type: string?
+      jsonldPredicate:
+        _id: cwl:format
+        _type: "@id"
+        identity: true
+      doc: |
+        The format of the file: this must be an IRI of a concept node that
+        represents the file format, preferrably defined within an ontology.
+        If no ontology is available, file formats may be tested by exact match.
+
+        Reasoning about format compatability must be done by checking that an
+        input file format is the same, `owl:equivalentClass` or
+        `rdfs:subClassOf` the format required by the input parameter.
+        `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if
+        `<B> owl:equivalentClass <C>` and `<B> owl:subclassOf <A>` then infer
+        `<C> owl:subclassOf <A>`.
+
+        File format ontologies may be provided in the "$schema" metadata at the
+        root of the document.  If no ontologies are specified in `$schema`, the
+        runtime may perform exact file format matches.
+    - name: contents
+      type: string?
+      doc: |
+        File contents literal.  Maximum of 64 KiB.
+
+        If neither `location` nor `path` is provided, `contents` must be
+        non-null.  The implementation must assign a unique identifier for the
+        `location` field.  When the file is staged as input to CommandLineTool,
+        the value of `contents` must be written to a file.
+
+        If `loadContents` of `inputBinding` or `outputBinding` is true and
+        `location` is valid, the implementation must read up to the first 64
+        KiB of text from the file and place it in the "contents" field.
+
+
+- name: Directory
+  type: record
+  docAfter: "#File"
+  doc: |
+    Represents a directory to present to a command line tool.
+  fields:
+    - name: class
+      type:
+        type: enum
+        name: Directory_class
+        symbols:
+          - cwl:Directory
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+      doc: Must be `Directory` to indicate this object describes a Directory.
+    - name: location
+      type: string?
+      doc: |
+        An IRI that identifies the directory resource.  This may be a relative
+        reference, in which case it must be resolved using the base IRI of the
+        document.  The location may refer to a local or remote resource.  If
+        the `listing` field is not set, the implementation must use the
+        location IRI to retrieve directory listing.  If an implementation is
+        unable to retrieve the directory listing stored at a remote resource (due to
+        unsupported protocol, access denied, or other issue) it must signal an
+        error.
+
+        If the `location` field is not provided, the `listing` field must be
+        provided.  The implementation must assign a unique identifier for
+        the `location` field.
+
+        If the `path` field is provided but the `location` field is not, an
+        implementation may assign the value of the `path` field to `location`,
+        then follow the rules above.
+      jsonldPredicate:
+        _id: "@id"
+        _type: "@id"
+    - name: path
+      type: string?
+      doc: |
+        The local path where the Directory is made available prior to executing a
+        CommandLineTool.  This must be set by the implementation.  This field
+        must not be used in any other context.  The command line tool being
+        executed must be able to to access the directory at `path` using the POSIX
+        `opendir(2)` syscall.
+
+        If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02)
+        (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`,
+        `<space>`, `<tab>`, and `<newline>`) or characters
+        [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml)
+        for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452)
+        then implementations may terminate the process with a
+        `permanentFailure`.
+      jsonldPredicate:
+        _id: "cwl:path"
+        _type: "@id"
+    - name: basename
+      type: string?
+      doc: |
+        The base name of the directory, that is, the name of the file without any
+        leading directory path.  The base name must not contain a slash `/`.
+
+        If not provided, the implementation must set this field based on the
+        `location` field by taking the final path component after parsing
+        `location` as an IRI.  If `basename` is provided, it is not required to
+        match the value from `location`.
+
+        When this file is made available to a CommandLineTool, it must be named
+        with `basename`, i.e. the final component of the `path` field must match
+        `basename`.
+      jsonldPredicate: "cwl:basename"
+    - name: listing
+      type:
+        - "null"
+        - type: array
+          items: [File, Directory]
+      doc: |
+        List of files or subdirectories contained in this directory.  The name
+        of each file or subdirectory is determined by the `basename` field of
+        each `File` or `Directory` object.  It is an error if a `File` shares a
+        `basename` with any other entry in `listing`.  If two or more
+        `Directory` object share the same `basename`, this must be treated as
+        equivalent to a single subdirectory with the listings recursively
+        merged.
+      jsonldPredicate:
+        _id: "cwl:listing"
+
+- name: SchemaBase
+  type: record
+  abstract: true
+  fields:
+    - name: label
+      type:
+        - "null"
+        - string
+      jsonldPredicate: "rdfs:label"
+      doc: "A short, human-readable label of this object."
+
+
+- name: Parameter
+  type: record
+  extends: SchemaBase
+  abstract: true
+  doc: |
+    Define an input or output parameter to a process.
+
+  fields:
+    - name: secondaryFiles
+      type:
+        - "null"
+        - string
+        - Expression
+        - type: array
+          items: [string, Expression]
+      jsonldPredicate: "cwl:secondaryFiles"
+      doc: |
+        Only valid when `type: File` or is an array of `items: File`.
+
+        Describes files that must be included alongside the primary file(s).
+
+        If the value is an expression, the value of `self` in the expression
+        must be the primary input or output File to which this binding applies.
+
+        If the value is a string, it specifies that the following pattern
+        should be applied to the primary file:
+
+          1. If string begins with one or more caret `^` characters, for each
+            caret, remove the last file extension from the path (the last
+            period `.` and all following characters).  If there are no file
+            extensions, the path is unchanged.
+          2. Append the remainder of the string to the end of the file path.
+
+    - name: format
+      type:
+        - "null"
+        - string
+        - type: array
+          items: string
+        - Expression
+      jsonldPredicate:
+        _id: cwl:format
+        _type: "@id"
+        identity: true
+      doc: |
+        Only valid when `type: File` or is an array of `items: File`.
+
+        For input parameters, this must be one or more IRIs of concept nodes
+        that represents file formats which are allowed as input to this
+        parameter, preferrably defined within an ontology.  If no ontology is
+        available, file formats may be tested by exact match.
+
+        For output parameters, this is the file format that will be assigned to
+        the output parameter.
+
+    - name: streamable
+      type: boolean?
+      doc: |
+        Only valid when `type: File` or is an array of `items: File`.
+
+        A value of `true` indicates that the file is read or written
+        sequentially without seeking.  An implementation may use this flag to
+        indicate whether it is valid to stream file contents using a named
+        pipe.  Default: `false`.
+
+    - name: doc
+      type:
+        - string?
+        - string[]?
+      doc: "A documentation string for this type, or an array of strings which should be concatenated."
+      jsonldPredicate: "rdfs:comment"
+
+
+- type: enum
+  name: Expression
+  doc: |
+    'Expression' is not a real type.  It indicates that a field must allow
+    runtime parameter references.  If [InlineJavascriptRequirement](#InlineJavascriptRequirement)
+    is declared and supported by the platform, the field must also allow
+    Javascript expressions.
+  symbols:
+    - cwl:ExpressionPlaceholder
+
+
+- name: InputBinding
+  type: record
+  abstract: true
+  fields:
+    - name: loadContents
+      type:
+        - "null"
+        - boolean
+      jsonldPredicate: "cwl:loadContents"
+      doc: |
+        Only valid when `type: File` or is an array of `items: File`.
+
+        Read up to the first 64 KiB of text from the file and place it in the
+        "contents" field of the file object for use by expressions.
+
+
+- name: OutputBinding
+  type: record
+  abstract: true
+
+
+- name: InputSchema
+  extends: SchemaBase
+  type: record
+  abstract: true
+
+
+- name: OutputSchema
+  extends: SchemaBase
+  type: record
+  abstract: true
+
+
+- name: InputRecordField
+  type: record
+  extends: "sld:RecordField"
+  specialize:
+    - specializeFrom: "sld:RecordSchema"
+      specializeTo: InputRecordSchema
+    - specializeFrom: "sld:EnumSchema"
+      specializeTo: InputEnumSchema
+    - specializeFrom: "sld:ArraySchema"
+      specializeTo: InputArraySchema
+    - specializeFrom: "sld:PrimitiveType"
+      specializeTo: CWLType
+  fields:
+    - name: inputBinding
+      type: InputBinding?
+      jsonldPredicate: "cwl:inputBinding"
+    - name: label
+      type: string?
+      jsonldPredicate: "rdfs:label"
+      doc: "A short, human-readable label of this process object."
+
+
+- name: InputRecordSchema
+  type: record
+  extends: ["sld:RecordSchema", InputSchema]
+  specialize:
+    - specializeFrom: "sld:RecordField"
+      specializeTo: InputRecordField
+
+
+- name: InputEnumSchema
+  type: record
+  extends: ["sld:EnumSchema", InputSchema]
+  fields:
+    - name: inputBinding
+      type: InputBinding?
+      jsonldPredicate: "cwl:inputBinding"
+
+
+- name: InputArraySchema
+  type: record
+  extends: ["sld:ArraySchema", InputSchema]
+  specialize:
+    - specializeFrom: "sld:RecordSchema"
+      specializeTo: InputRecordSchema
+    - specializeFrom: "sld:EnumSchema"
+      specializeTo: InputEnumSchema
+    - specializeFrom: "sld:ArraySchema"
+      specializeTo: InputArraySchema
+    - specializeFrom: "sld:PrimitiveType"
+      specializeTo: CWLType
+  fields:
+    - name: inputBinding
+      type: InputBinding?
+      jsonldPredicate: "cwl:inputBinding"
+
+
+- name: OutputRecordField
+  type: record
+  extends: "sld:RecordField"
+  specialize:
+    - specializeFrom: "sld:RecordSchema"
+      specializeTo: OutputRecordSchema
+    - specializeFrom: "sld:EnumSchema"
+      specializeTo: OutputEnumSchema
+    - specializeFrom: "sld:ArraySchema"
+      specializeTo: OutputArraySchema
+    - specializeFrom: "sld:PrimitiveType"
+      specializeTo: CWLType
+  fields:
+    - name: outputBinding
+      type: OutputBinding?
+      jsonldPredicate: "cwl:outputBinding"
+
+
+- name: OutputRecordSchema
+  type: record
+  extends: ["sld:RecordSchema", "#OutputSchema"]
+  docParent: "#OutputParameter"
+  specialize:
+    - specializeFrom: "sld:RecordField"
+      specializeTo: OutputRecordField
+
+
+- name: OutputEnumSchema
+  type: record
+  extends: ["sld:EnumSchema", OutputSchema]
+  docParent: "#OutputParameter"
+  fields:
+    - name: outputBinding
+      type: OutputBinding?
+      jsonldPredicate: "cwl:outputBinding"
+
+- name: OutputArraySchema
+  type: record
+  extends: ["sld:ArraySchema", OutputSchema]
+  docParent: "#OutputParameter"
+  specialize:
+    - specializeFrom: "sld:RecordSchema"
+      specializeTo: OutputRecordSchema
+    - specializeFrom: "sld:EnumSchema"
+      specializeTo: OutputEnumSchema
+    - specializeFrom: "sld:ArraySchema"
+      specializeTo: OutputArraySchema
+    - specializeFrom: "sld:PrimitiveType"
+      specializeTo: CWLType
+  fields:
+    - name: outputBinding
+      type: OutputBinding?
+      jsonldPredicate: "cwl:outputBinding"
+
+
+- name: InputParameter
+  type: record
+  extends: Parameter
+  fields:
+    - name: id
+      type: string
+      jsonldPredicate: "@id"
+      doc: "The unique identifier for this parameter object."
+
+    - name: inputBinding
+      type: InputBinding?
+      jsonldPredicate: "cwl:inputBinding"
+      doc: |
+        Describes how to handle the inputs of a process and convert them
+        into a concrete form for execution, such as command line parameters.
+
+    - name: default
+      type: Any?
+      jsonldPredicate: "cwl:default"
+      doc: |
+        The default value for this parameter if not provided in the input
+        object.
+
+    - name: type
+      type:
+        - "null"
+        - CWLType
+        - InputRecordSchema
+        - InputEnumSchema
+        - InputArraySchema
+        - string
+        - type: array
+          items:
+            - CWLType
+            - InputRecordSchema
+            - InputEnumSchema
+            - InputArraySchema
+            - string
+      jsonldPredicate:
+        "_id": "sld:type"
+        "_type": "@vocab"
+        refScope: 2
+        typeDSL: True
+      doc: |
+        Specify valid types of data that may be assigned to this parameter.
+
+- name: OutputParameter
+  type: record
+  extends: Parameter
+  fields:
+    - name: id
+      type: string
+      jsonldPredicate: "@id"
+      doc: "The unique identifier for this parameter object."
+    - name: outputBinding
+      type: OutputBinding?
+      jsonldPredicate: "cwl:outputBinding"
+      doc: |
+        Describes how to handle the outputs of a process.
+
+
+- type: record
+  name: ProcessRequirement
+  abstract: true
+  doc: |
+    A process requirement declares a prerequisite that may or must be fulfilled
+    before executing a process.  See [`Process.hints`](#process) and
+    [`Process.requirements`](#process).
+
+    Process requirements are the primary mechanism for specifying extensions to
+    the CWL core specification.
+
+
+- type: record
+  name: Process
+  abstract: true
+  doc: |
+
+    The base executable type in CWL is the `Process` object defined by the
+    document.  Note that the `Process` object is abstract and cannot be
+    directly executed.
+
+  fields:
+    - name: id
+      type: string?
+      jsonldPredicate: "@id"
+      doc: "The unique identifier for this process object."
+    - name: inputs
+      type:
+        type: array
+        items: InputParameter
+      jsonldPredicate:
+        _id: "cwl:inputs"
+        mapSubject: id
+        mapPredicate: type
+      doc: |
+        Defines the input parameters of the process.  The process is ready to
+        run when all required input parameters are associated with concrete
+        values.  Input parameters include a schema for each parameter which is
+        used to validate the input object.  It may also be used to build a user
+        interface for constructing the input object.
+    - name: outputs
+      type:
+        type: array
+        items: OutputParameter
+      jsonldPredicate:
+        _id: "cwl:outputs"
+        mapSubject: id
+        mapPredicate: type
+      doc: |
+        Defines the parameters representing the output of the process.  May be
+        used to generate and/or validate the output object.
+    - name: requirements
+      type: ProcessRequirement[]?
+      jsonldPredicate:
+        _id: "cwl:requirements"
+        mapSubject: class
+      doc: |
+        Declares requirements that apply to either the runtime environment or the
+        workflow engine that must be met in order to execute this process.  If
+        an implementation cannot satisfy all requirements, or a requirement is
+        listed which is not recognized by the implementation, it is a fatal
+        error and the implementation must not attempt to run the process,
+        unless overridden at user option.
+    - name: hints
+      type: Any[]?
+      doc: |
+        Declares hints applying to either the runtime environment or the
+        workflow engine that may be helpful in executing this process.  It is
+        not an error if an implementation cannot satisfy all hints, however
+        the implementation may report a warning.
+      jsonldPredicate:
+        _id: cwl:hints
+        noLinkCheck: true
+        mapSubject: class
+    - name: label
+      type: string?
+      jsonldPredicate: "rdfs:label"
+      doc: "A short, human-readable label of this process object."
+    - name: doc
+      type: string?
+      jsonldPredicate: "rdfs:comment"
+      doc: "A long, human-readable description of this process object."
+    - name: cwlVersion
+      type: CWLVersion?
+      doc: |
+        CWL document version. Always required at the document root. Not
+        required for a Process embedded inside another Process.
+      jsonldPredicate:
+        "_id": "cwl:cwlVersion"
+        "_type": "@vocab"
+
+- name: InlineJavascriptRequirement
+  type: record
+  extends: ProcessRequirement
+  doc: |
+    Indicates that the workflow platform must support inline Javascript expressions.
+    If this requirement is not present, the workflow platform must not perform expression
+    interpolatation.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'InlineJavascriptRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: expressionLib
+      type: string[]?
+      doc: |
+        Additional code fragments that will also be inserted
+        before executing the expression code.  Allows for function definitions that may
+        be called from CWL expressions.
+
+
+- name: SchemaDefRequirement
+  type: record
+  extends: ProcessRequirement
+  doc: |
+    This field consists of an array of type definitions which must be used when
+    interpreting the `inputs` and `outputs` fields.  When a `type` field
+    contain a IRI, the implementation must check if the type is defined in
+    `schemaDefs` and use that definition.  If the type is not found in
+    `schemaDefs`, it is an error.  The entries in `schemaDefs` must be
+    processed in the order listed such that later schema definitions may refer
+    to earlier schema definitions.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'SchemaDefRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: types
+      type:
+        type: array
+        items: InputSchema
+      doc: The list of type definitions.
diff --git a/schema_salad/tests/test_schema/Workflow.yml b/schema_salad/tests/test_schema/Workflow.yml
new file mode 100644
index 0000000..26bde8e
--- /dev/null
+++ b/schema_salad/tests/test_schema/Workflow.yml
@@ -0,0 +1,582 @@
+$base: "https://w3id.org/cwl/cwl#"
+
+$namespaces:
+  cwl: "https://w3id.org/cwl/cwl#"
+
+$graph:
+
+- name: "WorkflowDoc"
+  type: documentation
+  doc:
+    - |
+      # Common Workflow Language (CWL) Workflow Description, v1.0
+
+      This version:
+        * https://w3id.org/cwl/v1.0/
+
+      Current version:
+        * https://w3id.org/cwl/
+    - "\n\n"
+    - {$include: contrib.md}
+    - "\n\n"
+    - |
+      # Abstract
+
+      One way to define a workflow is: an analysis task represented by a
+      directed graph describing a sequence of operations that transform an
+      input data set to output. This specification defines the Common Workflow
+      Language (CWL) Workflow description, a vendor-neutral standard for
+      representing workflows intended to be portable across a variety of
+      computing platforms.
+
+    - {$include: intro.md}
+
+    - |
+
+      ## Introduction to v1.0
+
+      This specification represents the first full release from the CWL group.
+      Since draft-3, this draft introduces the following changes and additions:
+
+        * The `inputs` and `outputs` fields have been renamed `in` and `out`.
+        * Syntax simplifcations: denoted by the `map<>` syntax. Example: `in`
+          contains a list of items, each with an id. Now one can specify
+          a mapping of that identifier to the corresponding
+          `InputParameter`.
+          ```
+          in:
+           - id: one
+             type: string
+             doc: First input parameter
+           - id: two
+             type: int
+             doc: Second input parameter
+          ```
+          can be
+          ```
+          in:
+           one:
+            type: string
+            doc: First input parameter
+           two:
+            type: int
+            doc: Second input parameter
+          ```
+        * The common field `description` has been renamed to `doc`.
+
+      ## Purpose
+
+      The Common Workflow Language Command Line Tool Description express
+      workflows for data-intensive science, such as Bioinformatics, Chemistry,
+      Physics, and Astronomy.  This specification is intended to define a data
+      and execution model for Workflows that can be implemented on top of a
+      variety of computing platforms, ranging from an individual workstation to
+      cluster, grid, cloud, and high performance computing systems.
+
+    - {$include: concepts.md}
+
+- name: ExpressionToolOutputParameter
+  type: record
+  extends: OutputParameter
+  fields:
+    - name: type
+      type:
+        - "null"
+        - "#CWLType"
+        - "#OutputRecordSchema"
+        - "#OutputEnumSchema"
+        - "#OutputArraySchema"
+        - string
+        - type: array
+          items:
+            - "#CWLType"
+            - "#OutputRecordSchema"
+            - "#OutputEnumSchema"
+            - "#OutputArraySchema"
+            - string
+      jsonldPredicate:
+        "_id": "sld:type"
+        "_type": "@vocab"
+        refScope: 2
+        typeDSL: True
+      doc: |
+        Specify valid types of data that may be assigned to this parameter.
+
+- type: record
+  name: ExpressionTool
+  extends: Process
+  specialize:
+    - specializeFrom: "#OutputParameter"
+      specializeTo: "#ExpressionToolOutputParameter"
+  documentRoot: true
+  doc: |
+    Execute an expression as a Workflow step.
+  fields:
+    - name: "class"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+      type: string
+    - name: expression
+      type: [string, Expression]
+      doc: |
+        The expression to execute.  The expression must return a JSON object which
+        matches the output parameters of the ExpressionTool.
+
+- name: LinkMergeMethod
+  type: enum
+  docParent: "#WorkflowStepInput"
+  doc: The input link merge method, described in [WorkflowStepInput](#WorkflowStepInput).
+  symbols:
+    - merge_nested
+    - merge_flattened
+
+
+- name: WorkflowOutputParameter
+  type: record
+  extends: OutputParameter
+  docParent: "#Workflow"
+  doc: |
+    Describe an output parameter of a workflow.  The parameter must be
+    connected to one or more parameters defined in the workflow that will
+    provide the value of the output parameter.
+  fields:
+    - name: outputSource
+      doc: |
+        Specifies one or more workflow parameters that supply the value of to
+        the output parameter.
+      jsonldPredicate:
+        "_id": "cwl:outputSource"
+        "_type": "@id"
+        refScope: 0
+      type:
+        - string?
+        - string[]?
+    - name: linkMerge
+      type: ["null", "#LinkMergeMethod"]
+      jsonldPredicate: "cwl:linkMerge"
+      doc: |
+        The method to use to merge multiple sources into a single array.
+        If not specified, the default method is "merge_nested".
+    - name: type
+      type:
+        - "null"
+        - "#CWLType"
+        - "#OutputRecordSchema"
+        - "#OutputEnumSchema"
+        - "#OutputArraySchema"
+        - string
+        - type: array
+          items:
+            - "#CWLType"
+            - "#OutputRecordSchema"
+            - "#OutputEnumSchema"
+            - "#OutputArraySchema"
+            - string
+      jsonldPredicate:
+        "_id": "sld:type"
+        "_type": "@vocab"
+        refScope: 2
+        typeDSL: True
+      doc: |
+        Specify valid types of data that may be assigned to this parameter.
+
+
+- name: Sink
+  type: record
+  abstract: true
+  fields:
+    - name: source
+      doc: |
+        Specifies one or more workflow parameters that will provide input to
+        the underlying step parameter.
+      jsonldPredicate:
+        "_id": "cwl:source"
+        "_type": "@id"
+        refScope: 2
+      type:
+        - string?
+        - string[]?
+    - name: linkMerge
+      type: LinkMergeMethod?
+      jsonldPredicate: "cwl:linkMerge"
+      doc: |
+        The method to use to merge multiple inbound links into a single array.
+        If not specified, the default method is "merge_nested".
+
+
+- type: record
+  name: WorkflowStepInput
+  extends: Sink
+  docParent: "#WorkflowStep"
+  doc: |
+    The input of a workflow step connects an upstream parameter (from the
+    workflow inputs, or the outputs of other workflows steps) with the input
+    parameters of the underlying step.
+
+    ## Input object
+
+    A WorkflowStepInput object must contain an `id` field in the form
+    `#fieldname` or `#stepname.fieldname`.  When the `id` field contains a
+    period `.` the field name consists of the characters following the final
+    period.  This defines a field of the workflow step input object with the
+    value of the `source` parameter(s).
+
+    ## Merging
+
+    To merge multiple inbound data links,
+    [MultipleInputFeatureRequirement](#MultipleInputFeatureRequirement) must be specified
+    in the workflow or workflow step requirements.
+
+    If the sink parameter is an array, or named in a [workflow
+    scatter](#WorkflowStep) operation, there may be multiple inbound data links
+    listed in the `source` field.  The values from the input links are merged
+    depending on the method specified in the `linkMerge` field.  If not
+    specified, the default method is "merge_nested".
+
+    * **merge_nested**
+
+      The input must be an array consisting of exactly one entry for each
+      input link.  If "merge_nested" is specified with a single link, the value
+      from the link must be wrapped in a single-item list.
+
+    * **merge_flattened**
+
+      1. The source and sink parameters must be compatible types, or the source
+         type must be compatible with single element from the "items" type of
+         the destination array parameter.
+      2. Source parameters which are arrays are concatenated.
+         Source parameters which are single element types are appended as
+         single elements.
+
+  fields:
+    - name: id
+      type: string
+      jsonldPredicate: "@id"
+      doc: "A unique identifier for this workflow input parameter."
+    - name: default
+      type: ["null", Any]
+      doc: |
+        The default value for this parameter if there is no `source`
+        field.
+      jsonldPredicate: "cwl:default"
+    - name: valueFrom
+      type:
+        - "null"
+        - "string"
+        - "#Expression"
+      jsonldPredicate: "cwl:valueFrom"
+      doc: |
+        To use valueFrom, [StepInputExpressionRequirement](#StepInputExpressionRequirement) must
+        be specified in the workflow or workflow step requirements.
+
+        If `valueFrom` is a constant string value, use this as the value for
+        this input parameter.
+
+        If `valueFrom` is a parameter reference or expression, it must be
+        evaluated to yield the actual value to be assiged to the input field.
+
+        The `self` value of in the parameter reference or expression must be
+        the value of the parameter(s) specified in the `source` field, or
+        null if there is no `source` field.
+
+        The value of `inputs` in the parameter reference or expression must be
+        the input object to the workflow step after assigning the `source`
+        values and then scattering.  The order of evaluating `valueFrom` among
+        step input parameters is undefined and the result of evaluating
+        `valueFrom` on a parameter must not be visible to evaluation of
+        `valueFrom` on other parameters.
+
+
+- type: record
+  name: WorkflowStepOutput
+  docParent: "#WorkflowStep"
+  doc: |
+    Associate an output parameter of the underlying process with a workflow
+    parameter.  The workflow parameter (given in the `id` field) be may be used
+    as a `source` to connect with input parameters of other workflow steps, or
+    with an output parameter of the process.
+  fields:
+    - name: id
+      type: string
+      jsonldPredicate: "@id"
+      doc: |
+        A unique identifier for this workflow output parameter.  This is the
+        identifier to use in the `source` field of `WorkflowStepInput` to
+        connect the output value to downstream parameters.
+
+
+- name: ScatterMethod
+  type: enum
+  docParent: "#WorkflowStep"
+  doc: The scatter method, as described in [workflow step scatter](#WorkflowStep).
+  symbols:
+    - dotproduct
+    - nested_crossproduct
+    - flat_crossproduct
+
+
+- name: WorkflowStep
+  type: record
+  docParent: "#Workflow"
+  doc: |
+    A workflow step is an executable element of a workflow.  It specifies the
+    underlying process implementation (such as `CommandLineTool` or another
+    `Workflow`) in the `run` field and connects the input and output parameters
+    of the underlying process to workflow parameters.
+
+    # Scatter/gather
+
+    To use scatter/gather,
+    [ScatterFeatureRequirement](#ScatterFeatureRequirement) must be specified
+    in the workflow or workflow step requirements.
+
+    A "scatter" operation specifies that the associated workflow step or
+    subworkflow should execute separately over a list of input elements.  Each
+    job making up a scatter operation is independent and may be executed
+    concurrently.
+
+    The `scatter` field specifies one or more input parameters which will be
+    scattered.  An input parameter may be listed more than once.  The declared
+    type of each input parameter is implicitly wrapped in an array for each
+    time it appears in the `scatter` field.  As a result, upstream parameters
+    which are connected to scattered parameters may be arrays.
+
+    All output parameter types are also implicitly wrapped in arrays.  Each job
+    in the scatter results in an entry in the output array.
+
+    If `scatter` declares more than one input parameter, `scatterMethod`
+    describes how to decompose the input into a discrete set of jobs.
+
+      * **dotproduct** specifies that each of the input arrays are aligned and one
+          element taken from each array to construct each job.  It is an error
+          if all input arrays are not the same length.
+
+      * **nested_crossproduct** specifies the Cartesian product of the inputs,
+          producing a job for every combination of the scattered inputs.  The
+          output must be nested arrays for each level of scattering, in the
+          order that the input arrays are listed in the `scatter` field.
+
+      * **flat_crossproduct** specifies the Cartesian product of the inputs,
+          producing a job for every combination of the scattered inputs.  The
+          output arrays must be flattened to a single level, but otherwise listed in the
+          order that the input arrays are listed in the `scatter` field.
+
+    # Subworkflows
+
+    To specify a nested workflow as part of a workflow step,
+    [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) must be
+    specified in the workflow or workflow step requirements.
+
+  fields:
+    - name: id
+      type: string
+      jsonldPredicate: "@id"
+      doc: "The unique identifier for this workflow step."
+    - name: in
+      type: WorkflowStepInput[]
+      jsonldPredicate:
+        _id: "cwl:in"
+        mapSubject: id
+        mapPredicate: source
+      doc: |
+        Defines the input parameters of the workflow step.  The process is ready to
+        run when all required input parameters are associated with concrete
+        values.  Input parameters include a schema for each parameter which is
+        used to validate the input object.  It may also be used build a user
+        interface for constructing the input object.
+    - name: out
+      type:
+        - type: array
+          items: [string, WorkflowStepOutput]
+      jsonldPredicate:
+        _id: "cwl:out"
+        _type: "@id"
+        identity: true
+      doc: |
+        Defines the parameters representing the output of the process.  May be
+        used to generate and/or validate the output object.
+    - name: requirements
+      type: ProcessRequirement[]?
+      jsonldPredicate:
+        _id: "cwl:requirements"
+        mapSubject: class
+      doc: |
+        Declares requirements that apply to either the runtime environment or the
+        workflow engine that must be met in order to execute this workflow step.  If
+        an implementation cannot satisfy all requirements, or a requirement is
+        listed which is not recognized by the implementation, it is a fatal
+        error and the implementation must not attempt to run the process,
+        unless overridden at user option.
+    - name: hints
+      type: Any[]?
+      jsonldPredicate:
+        _id: "cwl:hints"
+        noLinkCheck: true
+        mapSubject: class
+      doc: |
+        Declares hints applying to either the runtime environment or the
+        workflow engine that may be helpful in executing this workflow step.  It is
+        not an error if an implementation cannot satisfy all hints, however
+        the implementation may report a warning.
+    - name: label
+      type: string?
+      jsonldPredicate: "rdfs:label"
+      doc: "A short, human-readable label of this process object."
+    - name: doc
+      type: string?
+      jsonldPredicate: "rdfs:comment"
+      doc: "A long, human-readable description of this process object."
+    - name: run
+      type: [string, Process]
+      jsonldPredicate:
+        "_id": "cwl:run"
+        "_type": "@id"
+      doc: |
+        Specifies the process to run.
+    - name: scatter
+      type:
+        - string?
+        - string[]?
+      jsonldPredicate:
+        "_id": "cwl:scatter"
+        "_type": "@id"
+        "_container": "@list"
+        refScope: 0
+    - name: scatterMethod
+      doc: |
+        Required if `scatter` is an array of more than one element.
+      type: ScatterMethod?
+      jsonldPredicate:
+        "_id": "cwl:scatterMethod"
+        "_type": "@vocab"
+
+
+- name: Workflow
+  type: record
+  extends: "#Process"
+  documentRoot: true
+  specialize:
+    - specializeFrom: "#OutputParameter"
+      specializeTo: "#WorkflowOutputParameter"
+  doc: |
+    A workflow describes a set of **steps** and the **dependencies** between
+    those steps.  When a step produces output that will be consumed by a
+    second step, the first step is a dependency of the second step.
+
+    When there is a dependency, the workflow engine must execute the preceeding
+    step and wait for it to successfully produce output before executing the
+    dependent step.  If two steps are defined in the workflow graph that
+    are not directly or indirectly dependent, these steps are **independent**,
+    and may execute in any order or execute concurrently.  A workflow is
+    complete when all steps have been executed.
+
+    Dependencies between parameters are expressed using the `source` field on
+    [workflow step input parameters](#WorkflowStepInput) and [workflow output
+    parameters](#WorkflowOutputParameter).
+
+    The `source` field expresses the dependency of one parameter on another
+    such that when a value is associated with the parameter specified by
+    `source`, that value is propagated to the destination parameter.  When all
+    data links inbound to a given step are fufilled, the step is ready to
+    execute.
+
+    ## Workflow success and failure
+
+    A completed step must result in one of `success`, `temporaryFailure` or
+    `permanentFailure` states.  An implementation may choose to retry a step
+    execution which resulted in `temporaryFailure`.  An implementation may
+    choose to either continue running other steps of a workflow, or terminate
+    immediately upon `permanentFailure`.
+
+    * If any step of a workflow execution results in `permanentFailure`, then
+    the workflow status is `permanentFailure`.
+
+    * If one or more steps result in `temporaryFailure` and all other steps
+    complete `success` or are not executed, then the workflow status is
+    `temporaryFailure`.
+
+    * If all workflow steps are executed and complete with `success`, then the
+    workflow status is `success`.
+
+    # Extensions
+
+    [ScatterFeatureRequirement](#ScatterFeatureRequirement) and
+    [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) are
+    available as standard [extensions](#Extensions_and_Metadata) to core
+    workflow semantics.
+
+  fields:
+    - name: "class"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+      type: string
+    - name: steps
+      doc: |
+        The individual steps that make up the workflow.  Each step is executed when all of its
+        input data links are fufilled.  An implementation may choose to execute
+        the steps in a different order than listed and/or execute steps
+        concurrently, provided that dependencies between steps are met.
+      type:
+        - type: array
+          items: "#WorkflowStep"
+      jsonldPredicate:
+          mapSubject: id
+
+
+- type: record
+  name: SubworkflowFeatureRequirement
+  extends: ProcessRequirement
+  doc: |
+    Indicates that the workflow platform must support nested workflows in
+    the `run` field of [WorkflowStep](#WorkflowStep).
+  fields:
+    - name: "class"
+      type: "string"
+      doc: "Always 'SubworkflowFeatureRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+
+- name: ScatterFeatureRequirement
+  type: record
+  extends: ProcessRequirement
+  doc: |
+    Indicates that the workflow platform must support the `scatter` and
+    `scatterMethod` fields of [WorkflowStep](#WorkflowStep).
+  fields:
+    - name: "class"
+      type: "string"
+      doc: "Always 'ScatterFeatureRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+
+- name: MultipleInputFeatureRequirement
+  type: record
+  extends: ProcessRequirement
+  doc: |
+    Indicates that the workflow platform must support multiple inbound data links
+    listed in the `source` field of [WorkflowStepInput](#WorkflowStepInput).
+  fields:
+    - name: "class"
+      type: "string"
+      doc: "Always 'MultipleInputFeatureRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+
+- type: record
+  name: StepInputExpressionRequirement
+  extends: ProcessRequirement
+  doc: |
+    Indicate that the workflow platform must support the `valueFrom` field
+    of [WorkflowStepInput](#WorkflowStepInput).
+  fields:
+    - name: "class"
+      type: "string"
+      doc: "Always 'StepInputExpressionRequirement'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
diff --git a/schema_salad/tests/test_schema/concepts.md b/schema_salad/tests/test_schema/concepts.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/concepts.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/contrib.md b/schema_salad/tests/test_schema/contrib.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/contrib.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/intro.md b/schema_salad/tests/test_schema/intro.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/intro.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/invocation.md b/schema_salad/tests/test_schema/invocation.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad/tests/test_schema/invocation.md
@@ -0,0 +1 @@
+
diff --git a/schema_salad/tests/test_schema/metaschema_base.yml b/schema_salad/tests/test_schema/metaschema_base.yml
new file mode 100644
index 0000000..73511d1
--- /dev/null
+++ b/schema_salad/tests/test_schema/metaschema_base.yml
@@ -0,0 +1,164 @@
+$base: "https://w3id.org/cwl/salad#"
+
+$namespaces:
+  sld:  "https://w3id.org/cwl/salad#"
+  dct:  "http://purl.org/dc/terms/"
+  rdf:  "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+  rdfs: "http://www.w3.org/2000/01/rdf-schema#"
+  xsd:  "http://www.w3.org/2001/XMLSchema#"
+
+$graph:
+- name: PrimitiveType
+  type: enum
+  symbols:
+    - "sld:null"
+    - "xsd:boolean"
+    - "xsd:int"
+    - "xsd:long"
+    - "xsd:float"
+    - "xsd:double"
+    - "xsd:string"
+  doc:
+    - |
+      Salad data types are based on Avro schema declarations.  Refer to the
+      [Avro schema declaration documentation](https://avro.apache.org/docs/current/spec.html#schemas) for
+      detailed information.
+    - "null: no value"
+    - "boolean: a binary value"
+    - "int: 32-bit signed integer"
+    - "long: 64-bit signed integer"
+    - "float: single precision (32-bit) IEEE 754 floating-point number"
+    - "double: double precision (64-bit) IEEE 754 floating-point number"
+    - "string: Unicode character sequence"
+
+
+- name: Any
+  type: enum
+  symbols: ["#Any"]
+  doc: |
+    The **Any** type validates for any non-null value.
+
+
+- name: RecordField
+  type: record
+  doc: A field of a record.
+  fields:
+    - name: name
+      type: string
+      jsonldPredicate: "@id"
+      doc: |
+        The name of the field
+
+    - name: doc
+      type: string?
+      doc: |
+        A documentation string for this field
+      jsonldPredicate: "rdfs:comment"
+
+    - name: type
+      type:
+        - PrimitiveType
+        - RecordSchema
+        - EnumSchema
+        - ArraySchema
+        - string
+        - type: array
+          items:
+            - PrimitiveType
+            - RecordSchema
+            - EnumSchema
+            - ArraySchema
+            - string
+      jsonldPredicate:
+        _id: sld:type
+        _type: "@vocab"
+        typeDSL: true
+        refScope: 2
+      doc: |
+        The field type
+
+
+- name: RecordSchema
+  type: record
+  fields:
+    type:
+      doc: "Must be `record`"
+      type:
+        name: Record_symbol
+        type: enum
+        symbols:
+          - "sld:record"
+      jsonldPredicate:
+        _id: "sld:type"
+        _type: "@vocab"
+        typeDSL: true
+        refScope: 2
+    fields:
+      type: RecordField[]?
+      jsonldPredicate:
+        _id: sld:fields
+        mapSubject: name
+        mapPredicate: type
+      doc: "Defines the fields of the record."
+
+
+- name: EnumSchema
+  type: record
+  doc: |
+    Define an enumerated type.
+  fields:
+    type:
+      doc: "Must be `enum`"
+      type:
+        name: Enum_symbol
+        type: enum
+        symbols:
+          - "sld:enum"
+      jsonldPredicate:
+        _id: "sld:type"
+        _type: "@vocab"
+        typeDSL: true
+        refScope: 2
+    symbols:
+      type: string[]
+      jsonldPredicate:
+        _id: "sld:symbols"
+        _type: "@id"
+        identity: true
+      doc: "Defines the set of valid symbols."
+
+
+- name: ArraySchema
+  type: record
+  fields:
+    type:
+      doc: "Must be `array`"
+      type:
+        name: Array_symbol
+        type: enum
+        symbols:
+          - "sld:array"
+      jsonldPredicate:
+        _id: "sld:type"
+        _type: "@vocab"
+        typeDSL: true
+        refScope: 2
+    items:
+      type:
+        - PrimitiveType
+        - RecordSchema
+        - EnumSchema
+        - ArraySchema
+        - string
+        - type: array
+          items:
+            - PrimitiveType
+            - RecordSchema
+            - EnumSchema
+            - ArraySchema
+            - string
+      jsonldPredicate:
+        _id: "sld:items"
+        _type: "@vocab"
+        refScope: 2
+      doc: "Defines the type of the array elements."
diff --git a/schema_salad/tests/test_schema/test1.cwl b/schema_salad/tests/test_schema/test1.cwl
new file mode 100644
index 0000000..2406c86
--- /dev/null
+++ b/schema_salad/tests/test_schema/test1.cwl
@@ -0,0 +1 @@
+class: Workflow
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test10.cwl b/schema_salad/tests/test_schema/test10.cwl
new file mode 100644
index 0000000..2860807
--- /dev/null
+++ b/schema_salad/tests/test_schema/test10.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: string
+steps:
+  step1:
+    scatterMethod: [record]
+    in: []
+    out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test11.cwl b/schema_salad/tests/test_schema/test11.cwl
new file mode 100644
index 0000000..43281fb
--- /dev/null
+++ b/schema_salad/tests/test_schema/test11.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: string
+steps:
+  step1:
+    run: blub.cwl
+    in: []
+    out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test2.cwl b/schema_salad/tests/test_schema/test2.cwl
new file mode 100644
index 0000000..96ae140
--- /dev/null
+++ b/schema_salad/tests/test_schema/test2.cwl
@@ -0,0 +1 @@
+class: xWorkflow
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test3.cwl b/schema_salad/tests/test_schema/test3.cwl
new file mode 100644
index 0000000..517e920
--- /dev/null
+++ b/schema_salad/tests/test_schema/test3.cwl
@@ -0,0 +1,6 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: xstring
+steps: []
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test4.cwl b/schema_salad/tests/test_schema/test4.cwl
new file mode 100644
index 0000000..e57292d
--- /dev/null
+++ b/schema_salad/tests/test_schema/test4.cwl
@@ -0,0 +1,6 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: 12
+steps: []
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test5.cwl b/schema_salad/tests/test_schema/test5.cwl
new file mode 100644
index 0000000..8a7ba22
--- /dev/null
+++ b/schema_salad/tests/test_schema/test5.cwl
@@ -0,0 +1,6 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: string
+steps: [12]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test6.cwl b/schema_salad/tests/test_schema/test6.cwl
new file mode 100644
index 0000000..eff4ac5
--- /dev/null
+++ b/schema_salad/tests/test_schema/test6.cwl
@@ -0,0 +1,5 @@
+inputs:
+  foo: string
+outputs:
+  bar: string
+steps: [12]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test7.cwl b/schema_salad/tests/test_schema/test7.cwl
new file mode 100644
index 0000000..0e12c12
--- /dev/null
+++ b/schema_salad/tests/test_schema/test7.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: string
+steps:
+  step1:
+    scatter_method: blub
+    in: []
+    out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test8.cwl b/schema_salad/tests/test_schema/test8.cwl
new file mode 100644
index 0000000..128cb4a
--- /dev/null
+++ b/schema_salad/tests/test_schema/test8.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: string
+steps:
+  step1:
+    scatterMethod: abc
+    in: []
+    out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_schema/test9.cwl b/schema_salad/tests/test_schema/test9.cwl
new file mode 100644
index 0000000..2d7ff4c
--- /dev/null
+++ b/schema_salad/tests/test_schema/test9.cwl
@@ -0,0 +1,10 @@
+class: Workflow
+inputs:
+  foo: string
+outputs:
+  bar: string
+steps:
+  step1:
+    scatterMethod: 12
+    in: []
+    out: [out]
\ No newline at end of file
diff --git a/schema_salad/tests/test_validate.pyx b/schema_salad/tests/test_validate.pyx
new file mode 100644
index 0000000..b37127a
--- /dev/null
+++ b/schema_salad/tests/test_validate.pyx
@@ -0,0 +1,71 @@
+import unittest
+import json
+from schema_salad.schema import load_schema
+from schema_salad.validate import validate_ex
+from schema_salad.sourceline import cmap
+
+class TestValidate(unittest.TestCase):
+    schema = cmap({"name": "_", "$graph":[{
+        "name": "File",
+        "type": "record",
+        "fields": [{
+            "name": "class",
+            "type": {
+                "type": "enum",
+                "name": "File_class",
+                "symbols": ["#_/File"]
+            },
+            "jsonldPredicate": {
+                "_id": "@type",
+                "_type": "@vocab"
+            }
+        }, {
+            "name": "location",
+            "type": "string",
+            "jsonldPredicate": "_:location"
+        }]
+    }, {
+        "name": "Directory",
+        "type": "record",
+        "fields": [{
+            "name": "class",
+            "type": {
+                "type": "enum",
+                "name": "Directory_class",
+                "symbols": ["#_/Directory"]
+            },
+            "jsonldPredicate": {
+                "_id": "@type",
+                "_type": "@vocab"
+            }
+        }, {
+            "name": "location",
+            "type": "string",
+            "jsonldPredicate": "_:location"
+        }, {
+            "name": "listing",
+            "type": {
+                "type": "array",
+                "items": ["File", "Directory"]
+            }
+        }],
+    }]})
+
+    def test_validate_big(self):
+        document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+        with open("biglisting.yml") as f:
+            biglisting = json.load(f)
+
+        self.assertEquals(True, validate_ex(avsc_names.get_name("Directory", ""), biglisting,
+                                            strict=True, raise_ex=False))
+
+
+    # def test_validate_small(self):
+    #     document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+    #     with open("smalllisting.yml") as f:
+    #         smalllisting = json.load(f)
+
+    #     validate_ex(avsc_names.get_name("Directory", ""), smalllisting,
+    #                 strict=True, raise_ex=True)
diff --git a/schema_salad/tests/test_validate.py~ b/schema_salad/tests/test_validate.py~
new file mode 100644
index 0000000..db0fd1b
--- /dev/null
+++ b/schema_salad/tests/test_validate.py~
@@ -0,0 +1,70 @@
+import unittest
+import json
+from schema_salad.schema import load_schema
+from schema_salad.validate import validate_ex
+
+class TestValidate(unittest.TestCase):
+    schema = {"name": "_", "$graph":[{
+        "name": "File",
+        "type": "record",
+        "fields": [{
+            "name": "class",
+            "type": {
+                "type": "enum",
+                "name": "File_class",
+                "symbols": ["#_/File"]
+            },
+            "jsonldPredicate": {
+                "_id": "@type",
+                "_type": "@vocab"
+            }
+        }, {
+            "name": "location",
+            "type": "string",
+            "jsonldPredicate": "_:location"
+        }]
+    }, {
+        "name": "Directory",
+        "type": "record",
+        "fields": [{
+            "name": "class",
+            "type": {
+                "type": "enum",
+                "name": "Directory_class",
+                "symbols": ["#_/Directory"]
+            },
+            "jsonldPredicate": {
+                "_id": "@type",
+                "_type": "@vocab"
+            }
+        }, {
+            "name": "location",
+            "type": "string",
+            "jsonldPredicate": "_:location"
+        }, {
+            "name": "listing",
+            "type": {
+                "type": "array",
+                "items": ["File", "Directory"]
+            }
+        }],
+    }]}
+
+    def test_validate_big(self):
+        document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+        with open("biglisting.yml") as f:
+            biglisting = json.load(f)
+
+        self.assertEquals(True, validate_ex(avsc_names.get_name("Directory", ""), biglisting,
+                                            strict=True, raise_ex=False))
+
+
+    # def test_validate_small(self):
+    #     document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(self.schema)
+
+    #     with open("smalllisting.yml") as f:
+    #         smalllisting = json.load(f)
+
+    #     validate_ex(avsc_names.get_name("Directory", ""), smalllisting,
+    #                 strict=True, raise_ex=True)
diff --git a/schema_salad/tests/util.py b/schema_salad/tests/util.py
new file mode 100644
index 0000000..0fcaf52
--- /dev/null
+++ b/schema_salad/tests/util.py
@@ -0,0 +1,13 @@
+from pkg_resources import Requirement, resource_filename, ResolutionError  # type: ignore
+import os
+
+def get_data(filename):
+    filepath = None
+    try:
+        filepath = resource_filename(
+            Requirement.parse("schema-salad"), filename)
+    except ResolutionError:
+        pass
+    if not filepath or not os.path.isfile(filepath):
+        filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename)
+    return filepath
diff --git a/schema_salad/validate.py b/schema_salad/validate.py
index 119d1c0..75e094b 100644
--- a/schema_salad/validate.py
+++ b/schema_salad/validate.py
@@ -1,18 +1,31 @@
 import pprint
 import avro.schema
+from avro.schema import Schema
 import sys
 import urlparse
+import re
 from typing import Any, Union
+from .sourceline import SourceLine, lineno_re, bullets, indent
+
 
 class ValidationException(Exception):
     pass
 
+
 class ClassValidationException(ValidationException):
     pass
 
-def validate(expected_schema, datum, identifiers=set(), strict=False, foreign_properties=set()):
-    # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool
-    return validate_ex(expected_schema, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=False)
+
+def validate(expected_schema,           # type: Schema
+             datum,                     # type: Any
+             identifiers=set(),         # type: Set[unicode]
+             strict=False,              # type: bool
+             foreign_properties=set()   # type: Set[unicode]
+             ):
+    # type: (...) -> bool
+    return validate_ex(
+        expected_schema, datum, identifiers, strict=strict,
+        foreign_properties=foreign_properties, raise_ex=False)
 
 
 INT_MIN_VALUE = -(1 << 31)
@@ -20,11 +33,6 @@ INT_MAX_VALUE = (1 << 31) - 1
 LONG_MIN_VALUE = -(1 << 63)
 LONG_MAX_VALUE = (1 << 63) - 1
 
-def indent(v, nolead=False):  # type: (Union[str, unicode], bool) -> unicode
-    if nolead:
-        return v.splitlines()[0] + u"\n".join([u"  " + l for l in v.splitlines()[1:]])
-    else:
-        return u"\n".join(["  " + l for l in v.splitlines()])
 
 def friendly(v):  # type: (Any) -> Any
     if isinstance(v, avro.schema.NamedSchema):
@@ -38,11 +46,6 @@ def friendly(v):  # type: (Any) -> Any
     else:
         return v
 
-def multi(v, q=""):  # type: (Union[str, unicode], Union[str, unicode]) -> unicode
-    if '\n' in v:
-        return u"%s%s%s\n" % (q, v, q)
-    else:
-        return u"%s%s%s" % (q, v, q)
 
 def vpformat(datum):  # type: (Any) -> str
     a = pprint.pformat(datum)
@@ -50,9 +53,15 @@ def vpformat(datum):  # type: (Any) -> str
         a = a[0:160] + "[...]"
     return a
 
-def validate_ex(expected_schema, datum, identifiers=None, strict=False,
-                foreign_properties=None, raise_ex=True):
-    # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode], bool) -> bool
+
+def validate_ex(expected_schema,            # type: Schema
+                datum,                      # type: Any
+                identifiers=None,           # type: Set[unicode]
+                strict=False,               # type: bool
+                foreign_properties=None,    # type: Set[unicode]
+                raise_ex=True               # type: bool
+                ):
+    # type: (...) -> bool
     """Determine if a python datum is an instance of a schema."""
 
     if not identifiers:
@@ -68,7 +77,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s` is not null" % vpformat(datum))
+                raise ValidationException(u"the value is not null")
             else:
                 return False
     elif schema_type == 'boolean':
@@ -76,7 +85,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s` is not boolean" % vpformat(datum))
+                raise ValidationException(u"the value is not boolean")
             else:
                 return False
     elif schema_type == 'string':
@@ -87,7 +96,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s` is not string" % vpformat(datum))
+                raise ValidationException(u"the value is not string")
             else:
                 return False
     elif schema_type == 'bytes':
@@ -95,12 +104,13 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s` is not bytes" % vpformat(datum))
+                raise ValidationException(
+                    u"the value `%s` is not bytes" % vpformat(datum))
             else:
                 return False
     elif schema_type == 'int':
         if ((isinstance(datum, int) or isinstance(datum, long))
-            and INT_MIN_VALUE <= datum <= INT_MAX_VALUE):
+                and INT_MIN_VALUE <= datum <= INT_MAX_VALUE):
             return True
         else:
             if raise_ex:
@@ -109,28 +119,22 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
                 return False
     elif schema_type == 'long':
         if ((isinstance(datum, int) or isinstance(datum, long))
-            and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE):
+                and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE):
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s` is not long" % vpformat(datum))
+                raise ValidationException(
+                    u"the value `%s` is not long" % vpformat(datum))
             else:
                 return False
     elif schema_type in ['float', 'double']:
         if (isinstance(datum, int) or isinstance(datum, long)
-            or isinstance(datum, float)):
-            return True
-        else:
-            if raise_ex:
-                raise ValidationException(u"the value `%s` is not float or double" % vpformat(datum))
-            else:
-                return False
-    elif isinstance(expected_schema, avro.schema.FixedSchema):
-        if isinstance(datum, str) and len(datum) == expected_schema.size:
+                or isinstance(datum, float)):
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s` is not fixed" % vpformat(datum))
+                raise ValidationException(
+                    u"the value `%s` is not float or double" % vpformat(datum))
             else:
                 return False
     elif isinstance(expected_schema, avro.schema.EnumSchema):
@@ -142,28 +146,42 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
                     raise ValidationException(u"'Any' type must be non-null")
                 else:
                     return False
+        if not isinstance(datum, basestring):
+            if raise_ex:
+                raise ValidationException(
+                    u"value is a %s but expected a string" % (type(datum).__name__))
+            else:
+                return False
         if datum in expected_schema.symbols:
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat(datum), expected_schema.name, "'" + "', '".join(expected_schema.symbols) + "'"))
+                raise ValidationException(u"the value %s is not a valid %s, expected %s%s" % (vpformat(datum), expected_schema.name,
+                                                                                              "one of " if len(
+                                                                                                  expected_schema.symbols) > 1 else "",
+                                                                                              "'" + "', '".join(expected_schema.symbols) + "'"))
             else:
                 return False
     elif isinstance(expected_schema, avro.schema.ArraySchema):
         if isinstance(datum, list):
             for i, d in enumerate(datum):
                 try:
-                    if not validate_ex(expected_schema.items, d, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex):
+                    sl = SourceLine(datum, i, ValidationException)
+                    if not validate_ex(expected_schema.items, d, identifiers, strict=strict,
+                                       foreign_properties=foreign_properties,
+                                       raise_ex=raise_ex):
                         return False
                 except ValidationException as v:
                     if raise_ex:
-                        raise ValidationException(u"At position %i\n%s" % (i, indent(str(v))))
+                        raise sl.makeError(
+                            unicode("item is invalid because\n%s" % (indent(str(v)))))
                     else:
                         return False
             return True
         else:
             if raise_ex:
-                raise ValidationException(u"the value `%s` is not a list, expected list of %s" % (vpformat(datum), friendly(expected_schema.items)))
+                raise ValidationException(u"the value is not a list, expected list of %s" % (
+                    friendly(expected_schema.items)))
             else:
                 return False
     elif isinstance(expected_schema, avro.schema.UnionSchema):
@@ -175,34 +193,47 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
             return False
 
         errors = []  # type: List[unicode]
+        checked = []
         for s in expected_schema.schemas:
+            if isinstance(datum, list) and not isinstance(s, avro.schema.ArraySchema):
+                continue
+            elif isinstance(datum, dict) and not isinstance(s, avro.schema.RecordSchema):
+                continue
+            elif isinstance(datum, (bool, int, long, float, basestring)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)):
+                continue
+            elif datum is not None and s.type == "null":
+                continue
+
+            checked.append(s)
             try:
-                validate_ex(s, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=True)
+                validate_ex(s, datum, identifiers, strict=strict,
+                            foreign_properties=foreign_properties, raise_ex=True)
             except ClassValidationException as e:
                 raise
             except ValidationException as e:
                 errors.append(unicode(e))
-
-        raise ValidationException(u"the value %s is not a valid type in the union, expected one of:\n%s" % (
-            multi(vpformat(datum), '`'), u"\n".join([
-                u"- %s, but\n %s" % (
-                    friendly(expected_schema.schemas[i]), indent(multi(errors[i])))
-                for i in range(0, len(expected_schema.schemas))])))
+        if errors:
+            raise ValidationException(bullets(["tried %s but\n%s" % (friendly(
+                checked[i]), indent(errors[i])) for i in range(0, len(errors))], "- "))
+        else:
+            raise ValidationException("value is a %s, expected %s" % (
+                type(datum).__name__, friendly(expected_schema)))
 
     elif isinstance(expected_schema, avro.schema.RecordSchema):
         if not isinstance(datum, dict):
             if raise_ex:
-                raise ValidationException(u"`%s`\n is not a dict" % vpformat(datum))
+                raise ValidationException(u"is not a dict")
             else:
                 return False
 
         classmatch = None
         for f in expected_schema.fields:
-            if f.name == "class":
-                d = datum.get("class")
+            if f.name in ("class",):
+                d = datum.get(f.name)
                 if not d:
                     if raise_ex:
-                        raise ValidationException(u"Missing 'class' field")
+                        raise ValidationException(
+                            u"Missing '%s' field" % (f.name))
                     else:
                         return False
                 if expected_schema.name != d:
@@ -212,7 +243,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
 
         errors = []
         for f in expected_schema.fields:
-            if f.name == "class":
+            if f.name in ("class",):
                 continue
 
             if f.name in datum:
@@ -224,13 +255,16 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
                     fieldval = None
 
             try:
-                if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex):
+                sl = SourceLine(datum, f.name, unicode)
+                if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties,
+                                   raise_ex=raise_ex):
                     return False
             except ValidationException as v:
                 if f.name not in datum:
                     errors.append(u"missing required field `%s`" % f.name)
                 else:
-                    errors.append(u"could not validate field `%s` because\n%s" % (f.name, multi(indent(str(v)))))
+                    errors.append(sl.makeError(u"the `%s` field is not valid because\n%s" % (
+                        f.name, indent(str(v)))))
 
         if strict:
             for d in datum:
@@ -239,21 +273,24 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
                     if d == f.name:
                         found = True
                 if not found:
+                    sl = SourceLine(datum, d, unicode)
                     if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"):
                         if not raise_ex:
                             return False
                         split = urlparse.urlsplit(d)
                         if split.scheme:
-                            errors.append(u"could not validate extension field `%s` because it is not recognized and strict is True.  Did you include a $schemas section?" % (d))
+                            errors.append(sl.makeError(
+                                u"unrecognized extension field `%s` and strict is True.  Did you include a $schemas section?" % (d)))
                         else:
-                            errors.append(u"could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d, ", ".join(fn.name for fn in expected_schema.fields)))
+                            errors.append(sl.makeError(u"invalid field `%s`, expected one of: %s" % (
+                                d, ", ".join("'%s'" % fn.name for fn in expected_schema.fields))))
 
         if errors:
             if raise_ex:
                 if classmatch:
-                    raise ClassValidationException(u"%s record %s" % (classmatch, "\n".join(errors)))
+                    raise ClassValidationException(bullets(errors, "* "))
                 else:
-                    raise ValidationException(u"\n".join(errors))
+                    raise ValidationException(bullets(errors, "* "))
             else:
                 return False
         else:
diff --git a/setup.cfg b/setup.cfg
index 9d8f2af..522ffdd 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -11,7 +11,7 @@ test = pytest
 addopts = --pyarg schema_salad
 
 [egg_info]
-tag_build = .20161215163938
+tag_build = .20170111180227
 tag_date = 0
 tag_svn_revision = 0
 
diff --git a/setup.py b/setup.py
index 1dd27dc..1e8c2fd 100755
--- a/setup.py
+++ b/setup.py
@@ -30,10 +30,9 @@ else:
 install_requires = [
     'setuptools',
     'requests >= 1.0',
-    'ruamel.yaml >= 0.12.4, < 0.12.5',
+    'ruamel.yaml >= 0.12.4',
     'rdflib >= 4.2.0, < 4.3.0',
     'rdflib-jsonld >= 0.3.0, < 0.5.0',
-    'html5lib >= 0.90, <= 0.9999999',
     'mistune >= 0.7.3, < 0.8',
     'typing >= 3.5.2, < 3.6',
     'CacheControl >= 0.11.7, < 0.12',
@@ -48,7 +47,7 @@ install_requires.append("avro")  # TODO: remove me once cwltool is
 extras_require = {}               # TODO: to be removed when the above is added
 
 setup(name='schema-salad',
-      version='1.21',
+      version='2.2',
       description='Schema Annotations for Linked Avro Data (SALAD)',
       long_description=open(README).read(),
       author='Common workflow language working group',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-schema-salad.git