[med-svn] [python-schema-salad] 01/01: Imported Upstream version 1.6.20160202222448
Michael Crusoe
misterc-guest at moszumanska.debian.org
Sun Feb 7 14:03:43 UTC 2016
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to annotated tag upstream/1.6.20160202222448
in repository python-schema-salad.
commit 93e80b1377ab054ad8480815d8b36a1c72c2414c
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date: Fri Feb 5 02:55:57 2016 -0800
Imported Upstream version 1.6.20160202222448
---
PKG-INFO | 86 +++++
README.rst | 75 ++++
schema_salad.egg-info/PKG-INFO | 86 +++++
schema_salad.egg-info/SOURCES.txt | 39 ++
schema_salad.egg-info/dependency_links.txt | 1 +
schema_salad.egg-info/entry_points.txt | 3 +
schema_salad.egg-info/pbr.json | 1 +
schema_salad.egg-info/requires.txt | 6 +
schema_salad.egg-info/top_level.txt | 1 +
schema_salad.egg-info/zip-safe | 1 +
schema_salad/__init__.py | 7 +
schema_salad/__main__.py | 4 +
schema_salad/aslist.py | 7 +
schema_salad/flatten.py | 20 +
schema_salad/jsonld_context.py | 154 ++++++++
schema_salad/main.py | 213 +++++++++++
schema_salad/makedoc.py | 466 ++++++++++++++++++++++++
schema_salad/metaschema/field_name.yml | 46 +++
schema_salad/metaschema/field_name_proc.yml | 8 +
schema_salad/metaschema/field_name_schema.yml | 14 +
schema_salad/metaschema/field_name_src.yml | 8 +
schema_salad/metaschema/ident_res.yml | 53 +++
schema_salad/metaschema/ident_res_proc.yml | 20 +
schema_salad/metaschema/ident_res_schema.yml | 14 +
schema_salad/metaschema/ident_res_src.yml | 20 +
schema_salad/metaschema/import_include.md | 112 ++++++
schema_salad/metaschema/link_res.yml | 55 +++
schema_salad/metaschema/link_res_proc.yml | 21 ++
schema_salad/metaschema/link_res_schema.yml | 16 +
schema_salad/metaschema/link_res_src.yml | 21 ++
schema_salad/metaschema/metaschema.yml | 437 ++++++++++++++++++++++
schema_salad/metaschema/salad.md | 256 +++++++++++++
schema_salad/metaschema/vocab_res.yml | 35 ++
schema_salad/metaschema/vocab_res_proc.yml | 15 +
schema_salad/metaschema/vocab_res_schema.yml | 21 ++
schema_salad/metaschema/vocab_res_src.yml | 15 +
schema_salad/ref_resolver.py | 502 ++++++++++++++++++++++++++
schema_salad/schema.py | 392 ++++++++++++++++++++
schema_salad/validate.py | 174 +++++++++
setup.cfg | 5 +
setup.py | 45 +++
41 files changed, 3475 insertions(+)
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..23c35dd
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,86 @@
+Metadata-Version: 1.1
+Name: schema-salad
+Version: 1.6.20160202222448
+Summary: Schema Annotations for Linked Avro Data (SALAD)
+Home-page: https://github.com/common-workflow-language/common-workflow-language
+Author: Common workflow language working group
+Author-email: common-workflow-language at googlegroups.com
+License: Apache 2.0
+Download-URL: https://github.com/common-workflow-language/common-workflow-language
+Description: Schema Salad
+ ------------
+
+ Salad is a schema language for describing JSON or YAML structured linked data
+ documents. Salad is based originally on JSON-LD_ and the Apache Avro_ data
+ serialization system.
+
+ Salad schema describes rules for preprocessing, structural validation, and link
+ checking for documents described by a Salad schema. Salad features for rich
+ data modeling such as inheritance, template specialization, object identifiers,
+ object references, documentation generation, and transformation to RDF_. Salad
+ provides a bridge between document and record oriented data modeling and the
+ Semantic Web.
+
+ Usage
+ -----
+
+ ::
+
+ $ pip install schema_salad
+ $ schema-salad-tool
+ usage: schema-salad-tool [-h] [--rdf-serializer RDF_SERIALIZER]
+ [--print-jsonld-context | --print-doc | --print-rdfs | --print-avro | --print-rdf | --print-pre | --print-index | --print-metadata | --version]
+ [--strict | --non-strict]
+ [--verbose | --quiet | --debug]
+ schema [document]
+ $ python
+ >>> import schema_salad
+
+ Documentation
+ -------------
+
+ See the specification_ and the metaschema_ (salad schema for itself). For an
+ example application of Schema Salad see the Common Workflow Language_.
+
+ Rationale
+ ---------
+
+ The JSON data model is an popular way to represent structured data. It is
+ attractive because of it's relative simplicity and is a natural fit with the
+ standard types of many programming languages. However, this simplicity comes
+ at the cost that basic JSON lacks expressive features useful for working with
+ complex data structures and document formats, such as schemas, object
+ references, and namespaces.
+
+ JSON-LD is a W3C standard providing a way to describe how to interpret a JSON
+ document as Linked Data by means of a "context". JSON-LD provides a powerful
+ solution for representing object references and namespaces in JSON based on
+ standard web URIs, but is not itself a schema language. Without a schema
+ providing a well defined structure, it is difficult to process an arbitrary
+ JSON-LD document as idiomatic JSON because there are many ways to express the
+ same data that are logically equivalent but structurally distinct.
+
+ Several schema languages exist for describing and validating JSON data, such as
+ JSON Schema and Apache Avro data serialization system, however none
+ understand linked data. As a result, to fully take advantage of JSON-LD to
+ build the next generation of linked data applications, one must maintain
+ separate JSON schema, JSON-LD context, RDF schema, and human documentation,
+ despite significant overlap of content and obvious need for these documents to
+ stay synchronized.
+
+ Schema Salad is designed to address this gap. It provides a schema language
+ and processing rules for describing structured JSON content permitting URI
+ resolution and strict document validation. The schema language supports linked
+ data through annotations that describe the linked data interpretation of the
+ content, enables generation of JSON-LD context and RDF schema, and production
+ of RDF triples by applying the JSON-LD context. The schema language also
+ provides for robust support of inline documentation.
+
+ .. _JSON-LD: http://json-ld.org
+ .. _Avro: http://avro.apache.org
+ .. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
+ .. _specification: https://common-workflow-language.github.io/draft-3/SchemaSalad.html
+ .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+ .. _RDF: https://www.w3.org/RDF/
+
+Platform: UNKNOWN
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..49a077f
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,75 @@
+Schema Salad
+------------
+
+Salad is a schema language for describing JSON or YAML structured linked data
+documents. Salad is based originally on JSON-LD_ and the Apache Avro_ data
+serialization system.
+
+Salad schema describes rules for preprocessing, structural validation, and link
+checking for documents described by a Salad schema. Salad features for rich
+data modeling such as inheritance, template specialization, object identifiers,
+object references, documentation generation, and transformation to RDF_. Salad
+provides a bridge between document and record oriented data modeling and the
+Semantic Web.
+
+Usage
+-----
+
+::
+
+ $ pip install schema_salad
+ $ schema-salad-tool
+ usage: schema-salad-tool [-h] [--rdf-serializer RDF_SERIALIZER]
+ [--print-jsonld-context | --print-doc | --print-rdfs | --print-avro | --print-rdf | --print-pre | --print-index | --print-metadata | --version]
+ [--strict | --non-strict]
+ [--verbose | --quiet | --debug]
+ schema [document]
+ $ python
+ >>> import schema_salad
+
+Documentation
+-------------
+
+See the specification_ and the metaschema_ (salad schema for itself). For an
+example application of Schema Salad see the Common Workflow Language_.
+
+Rationale
+---------
+
+The JSON data model is an popular way to represent structured data. It is
+attractive because of it's relative simplicity and is a natural fit with the
+standard types of many programming languages. However, this simplicity comes
+at the cost that basic JSON lacks expressive features useful for working with
+complex data structures and document formats, such as schemas, object
+references, and namespaces.
+
+JSON-LD is a W3C standard providing a way to describe how to interpret a JSON
+document as Linked Data by means of a "context". JSON-LD provides a powerful
+solution for representing object references and namespaces in JSON based on
+standard web URIs, but is not itself a schema language. Without a schema
+providing a well defined structure, it is difficult to process an arbitrary
+JSON-LD document as idiomatic JSON because there are many ways to express the
+same data that are logically equivalent but structurally distinct.
+
+Several schema languages exist for describing and validating JSON data, such as
+JSON Schema and Apache Avro data serialization system, however none
+understand linked data. As a result, to fully take advantage of JSON-LD to
+build the next generation of linked data applications, one must maintain
+separate JSON schema, JSON-LD context, RDF schema, and human documentation,
+despite significant overlap of content and obvious need for these documents to
+stay synchronized.
+
+Schema Salad is designed to address this gap. It provides a schema language
+and processing rules for describing structured JSON content permitting URI
+resolution and strict document validation. The schema language supports linked
+data through annotations that describe the linked data interpretation of the
+content, enables generation of JSON-LD context and RDF schema, and production
+of RDF triples by applying the JSON-LD context. The schema language also
+provides for robust support of inline documentation.
+
+.. _JSON-LD: http://json-ld.org
+.. _Avro: http://avro.apache.org
+.. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
+.. _specification: https://common-workflow-language.github.io/draft-3/SchemaSalad.html
+.. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+.. _RDF: https://www.w3.org/RDF/
diff --git a/schema_salad.egg-info/PKG-INFO b/schema_salad.egg-info/PKG-INFO
new file mode 100644
index 0000000..23c35dd
--- /dev/null
+++ b/schema_salad.egg-info/PKG-INFO
@@ -0,0 +1,86 @@
+Metadata-Version: 1.1
+Name: schema-salad
+Version: 1.6.20160202222448
+Summary: Schema Annotations for Linked Avro Data (SALAD)
+Home-page: https://github.com/common-workflow-language/common-workflow-language
+Author: Common workflow language working group
+Author-email: common-workflow-language at googlegroups.com
+License: Apache 2.0
+Download-URL: https://github.com/common-workflow-language/common-workflow-language
+Description: Schema Salad
+ ------------
+
+ Salad is a schema language for describing JSON or YAML structured linked data
+ documents. Salad is based originally on JSON-LD_ and the Apache Avro_ data
+ serialization system.
+
+ Salad schema describes rules for preprocessing, structural validation, and link
+ checking for documents described by a Salad schema. Salad features for rich
+ data modeling such as inheritance, template specialization, object identifiers,
+ object references, documentation generation, and transformation to RDF_. Salad
+ provides a bridge between document and record oriented data modeling and the
+ Semantic Web.
+
+ Usage
+ -----
+
+ ::
+
+ $ pip install schema_salad
+ $ schema-salad-tool
+ usage: schema-salad-tool [-h] [--rdf-serializer RDF_SERIALIZER]
+ [--print-jsonld-context | --print-doc | --print-rdfs | --print-avro | --print-rdf | --print-pre | --print-index | --print-metadata | --version]
+ [--strict | --non-strict]
+ [--verbose | --quiet | --debug]
+ schema [document]
+ $ python
+ >>> import schema_salad
+
+ Documentation
+ -------------
+
+ See the specification_ and the metaschema_ (salad schema for itself). For an
+ example application of Schema Salad see the Common Workflow Language_.
+
+ Rationale
+ ---------
+
+ The JSON data model is an popular way to represent structured data. It is
+ attractive because of it's relative simplicity and is a natural fit with the
+ standard types of many programming languages. However, this simplicity comes
+ at the cost that basic JSON lacks expressive features useful for working with
+ complex data structures and document formats, such as schemas, object
+ references, and namespaces.
+
+ JSON-LD is a W3C standard providing a way to describe how to interpret a JSON
+ document as Linked Data by means of a "context". JSON-LD provides a powerful
+ solution for representing object references and namespaces in JSON based on
+ standard web URIs, but is not itself a schema language. Without a schema
+ providing a well defined structure, it is difficult to process an arbitrary
+ JSON-LD document as idiomatic JSON because there are many ways to express the
+ same data that are logically equivalent but structurally distinct.
+
+ Several schema languages exist for describing and validating JSON data, such as
+ JSON Schema and Apache Avro data serialization system, however none
+ understand linked data. As a result, to fully take advantage of JSON-LD to
+ build the next generation of linked data applications, one must maintain
+ separate JSON schema, JSON-LD context, RDF schema, and human documentation,
+ despite significant overlap of content and obvious need for these documents to
+ stay synchronized.
+
+ Schema Salad is designed to address this gap. It provides a schema language
+ and processing rules for describing structured JSON content permitting URI
+ resolution and strict document validation. The schema language supports linked
+ data through annotations that describe the linked data interpretation of the
+ content, enables generation of JSON-LD context and RDF schema, and production
+ of RDF triples by applying the JSON-LD context. The schema language also
+ provides for robust support of inline documentation.
+
+ .. _JSON-LD: http://json-ld.org
+ .. _Avro: http://avro.apache.org
+ .. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml
+ .. _specification: https://common-workflow-language.github.io/draft-3/SchemaSalad.html
+ .. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml
+ .. _RDF: https://www.w3.org/RDF/
+
+Platform: UNKNOWN
diff --git a/schema_salad.egg-info/SOURCES.txt b/schema_salad.egg-info/SOURCES.txt
new file mode 100644
index 0000000..36ba01e
--- /dev/null
+++ b/schema_salad.egg-info/SOURCES.txt
@@ -0,0 +1,39 @@
+README.rst
+setup.py
+schema_salad/__init__.py
+schema_salad/__main__.py
+schema_salad/aslist.py
+schema_salad/flatten.py
+schema_salad/jsonld_context.py
+schema_salad/main.py
+schema_salad/makedoc.py
+schema_salad/ref_resolver.py
+schema_salad/schema.py
+schema_salad/validate.py
+schema_salad.egg-info/PKG-INFO
+schema_salad.egg-info/SOURCES.txt
+schema_salad.egg-info/dependency_links.txt
+schema_salad.egg-info/entry_points.txt
+schema_salad.egg-info/pbr.json
+schema_salad.egg-info/requires.txt
+schema_salad.egg-info/top_level.txt
+schema_salad.egg-info/zip-safe
+schema_salad/metaschema/field_name.yml
+schema_salad/metaschema/field_name_proc.yml
+schema_salad/metaschema/field_name_schema.yml
+schema_salad/metaschema/field_name_src.yml
+schema_salad/metaschema/ident_res.yml
+schema_salad/metaschema/ident_res_proc.yml
+schema_salad/metaschema/ident_res_schema.yml
+schema_salad/metaschema/ident_res_src.yml
+schema_salad/metaschema/import_include.md
+schema_salad/metaschema/link_res.yml
+schema_salad/metaschema/link_res_proc.yml
+schema_salad/metaschema/link_res_schema.yml
+schema_salad/metaschema/link_res_src.yml
+schema_salad/metaschema/metaschema.yml
+schema_salad/metaschema/salad.md
+schema_salad/metaschema/vocab_res.yml
+schema_salad/metaschema/vocab_res_proc.yml
+schema_salad/metaschema/vocab_res_schema.yml
+schema_salad/metaschema/vocab_res_src.yml
\ No newline at end of file
diff --git a/schema_salad.egg-info/dependency_links.txt b/schema_salad.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/schema_salad.egg-info/entry_points.txt b/schema_salad.egg-info/entry_points.txt
new file mode 100644
index 0000000..9cd6059
--- /dev/null
+++ b/schema_salad.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+schema-salad-tool = schema_salad.main:main
+
diff --git a/schema_salad.egg-info/pbr.json b/schema_salad.egg-info/pbr.json
new file mode 100644
index 0000000..60082fd
--- /dev/null
+++ b/schema_salad.egg-info/pbr.json
@@ -0,0 +1 @@
+{"is_release": false, "git_version": "d382c03"}
\ No newline at end of file
diff --git a/schema_salad.egg-info/requires.txt b/schema_salad.egg-info/requires.txt
new file mode 100644
index 0000000..6c09549
--- /dev/null
+++ b/schema_salad.egg-info/requires.txt
@@ -0,0 +1,6 @@
+requests
+PyYAML
+avro
+rdflib >= 4.2.0
+rdflib-jsonld >= 0.3.0
+mistune
diff --git a/schema_salad.egg-info/top_level.txt b/schema_salad.egg-info/top_level.txt
new file mode 100644
index 0000000..469e18d
--- /dev/null
+++ b/schema_salad.egg-info/top_level.txt
@@ -0,0 +1 @@
+schema_salad
diff --git a/schema_salad.egg-info/zip-safe b/schema_salad.egg-info/zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/schema_salad.egg-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/schema_salad/__init__.py b/schema_salad/__init__.py
new file mode 100644
index 0000000..f661a1e
--- /dev/null
+++ b/schema_salad/__init__.py
@@ -0,0 +1,7 @@
+import logging
+
+__author__ = 'peter.amstutz at curoverse.com'
+
+_logger = logging.getLogger("salad")
+_logger.addHandler(logging.StreamHandler())
+_logger.setLevel(logging.INFO)
diff --git a/schema_salad/__main__.py b/schema_salad/__main__.py
new file mode 100644
index 0000000..ae4ff8a
--- /dev/null
+++ b/schema_salad/__main__.py
@@ -0,0 +1,4 @@
+import main
+import sys
+
+sys.exit(main.main())
diff --git a/schema_salad/aslist.py b/schema_salad/aslist.py
new file mode 100644
index 0000000..962ff09
--- /dev/null
+++ b/schema_salad/aslist.py
@@ -0,0 +1,7 @@
+def aslist(l):
+ """Convenience function to wrap single items and lists, and return lists unchanged."""
+
+ if isinstance(l, list):
+ return l
+ else:
+ return [l]
diff --git a/schema_salad/flatten.py b/schema_salad/flatten.py
new file mode 100644
index 0000000..54e918a
--- /dev/null
+++ b/schema_salad/flatten.py
@@ -0,0 +1,20 @@
+# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
+def flatten(l, ltypes=(list, tuple)):
+ if l is None:
+ return []
+ if not isinstance(l, ltypes):
+ return [l]
+
+ ltype = type(l)
+ l = list(l)
+ i = 0
+ while i < len(l):
+ while isinstance(l[i], ltypes):
+ if not l[i]:
+ l.pop(i)
+ i -= 1
+ break
+ else:
+ l[i:i + 1] = l[i]
+ i += 1
+ return ltype(l)
diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py
new file mode 100755
index 0000000..2e638b9
--- /dev/null
+++ b/schema_salad/jsonld_context.py
@@ -0,0 +1,154 @@
+import shutil
+import json
+import yaml
+import os
+import subprocess
+import copy
+import pprint
+import re
+import sys
+import rdflib
+from rdflib import Graph, URIRef
+import rdflib.namespace
+from rdflib.namespace import RDF, RDFS
+import urlparse
+import logging
+from aslist import aslist
+
+_logger = logging.getLogger("salad")
+
+def pred(datatype, field, name, context, defaultBase, namespaces):
+ split = urlparse.urlsplit(name)
+
+ v = None
+
+ if split.scheme:
+ v = name
+ (ns, ln) = rdflib.namespace.split_uri(unicode(v))
+ name = ln
+ if ns[0:-1] in namespaces:
+ v = unicode(namespaces[ns[0:-1]][ln])
+ _logger.debug("name, v %s %s", name, v)
+
+ if field and "jsonldPredicate" in field:
+ if isinstance(field["jsonldPredicate"], dict):
+ v = {("@"+k[1:] if k.startswith("_") else k): v
+ for k,v in field["jsonldPredicate"].items() }
+ else:
+ v = field["jsonldPredicate"]
+ elif "jsonldPredicate" in datatype:
+ for d in datatype["jsonldPredicate"]:
+ if d["symbol"] == name:
+ v = d["predicate"]
+ # if not v:
+ # if field and "jsonldPrefix" in field:
+ # defaultBase = field["jsonldPrefix"]
+ # elif "jsonldPrefix" in datatype:
+ # defaultBase = datatype["jsonldPrefix"]
+
+ if not v:
+ v = defaultBase + name
+
+ if name in context:
+ if context[name] != v:
+ raise Exception("Predicate collision on %s, '%s' != '%s'" % (name, context[name], v))
+ else:
+ _logger.debug("Adding to context '%s' %s (%s)", name, v, type(v))
+ context[name] = v
+
+ return v
+
+def process_type(t, g, context, defaultBase, namespaces, defaultPrefix):
+ if t["type"] == "record":
+ recordname = t["name"]
+
+ _logger.debug("Processing record %s\n", t)
+
+ classnode = URIRef(recordname)
+ g.add((classnode, RDF.type, RDFS.Class))
+
+ split = urlparse.urlsplit(recordname)
+ if "jsonldPrefix" in t:
+ predicate = "%s:%s" % (t["jsonldPrefix"], recordname)
+ elif split.scheme:
+ (ns, ln) = rdflib.namespace.split_uri(unicode(recordname))
+ predicate = recordname
+ recordname = ln
+ else:
+ predicate = "%s:%s" % (defaultPrefix, recordname)
+
+ if context.get(recordname, predicate) != predicate:
+ raise Exception("Predicate collision on '%s', '%s' != '%s'" % (recordname, context[t["name"]], predicate))
+
+ if not recordname:
+ raise Exception()
+
+ _logger.debug("Adding to context '%s' %s (%s)", recordname, predicate, type(predicate))
+ context[recordname] = predicate
+
+ for i in t.get("fields", []):
+ fieldname = i["name"]
+
+ _logger.debug("Processing field %s", i)
+
+ v = pred(t, i, fieldname, context, defaultPrefix, namespaces)
+
+ if isinstance(v, basestring):
+ v = v if v[0] != "@" else None
+ else:
+ v = v["_ at id"] if v.get("_ at id", "@")[0] != "@" else None
+
+ if v:
+ (ns, ln) = rdflib.namespace.split_uri(unicode(v))
+ if ns[0:-1] in namespaces:
+ propnode = namespaces[ns[0:-1]][ln]
+ else:
+ propnode = URIRef(v)
+
+ g.add((propnode, RDF.type, RDF.Property))
+ g.add((propnode, RDFS.domain, classnode))
+
+ # TODO generate range from datatype.
+
+ if isinstance(i["type"], dict) and "name" in i["type"]:
+ process_type(i["type"], g, context, defaultBase, namespaces, defaultPrefix)
+
+ if "extends" in t:
+ for e in aslist(t["extends"]):
+ g.add((classnode, RDFS.subClassOf, URIRef(e)))
+ elif t["type"] == "enum":
+ _logger.debug("Processing enum %s", t["name"])
+
+ for i in t["symbols"]:
+ pred(t, None, i, context, defaultBase, namespaces)
+
+
+def salad_to_jsonld_context(j, schema_ctx):
+ context = {}
+ namespaces = {}
+ g = Graph()
+ defaultPrefix = ""
+
+ for k,v in schema_ctx.items():
+ context[k] = v
+ namespaces[k] = rdflib.namespace.Namespace(v)
+
+ if "@base" in context:
+ defaultBase = context["@base"]
+ del context["@base"]
+ else:
+ defaultBase = ""
+
+ for k,v in namespaces.items():
+ g.bind(k, v)
+
+ for t in j:
+ process_type(t, g, context, defaultBase, namespaces, defaultPrefix)
+
+ return (context, g)
+
+if __name__ == "__main__":
+ with open(sys.argv[1]) as f:
+ j = yaml.load(f)
+ (ctx, g) = salad_to_jsonld_context(j)
+ print json.dumps(ctx, indent=4, sort_keys=True)
diff --git a/schema_salad/main.py b/schema_salad/main.py
new file mode 100644
index 0000000..c27f87c
--- /dev/null
+++ b/schema_salad/main.py
@@ -0,0 +1,213 @@
+import argparse
+import logging
+import sys
+import pkg_resources # part of setuptools
+import schema
+import jsonld_context
+import makedoc
+import json
+from rdflib import Graph, plugin
+from rdflib.serializer import Serializer
+import yaml
+import os
+import urlparse
+
+from ref_resolver import Loader
+import validate
+
+_logger = logging.getLogger("salad")
+
+from rdflib.plugin import register, Parser
+import rdflib_jsonld.parser
+register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser')
+
+def printrdf(workflow, wf, ctx, sr):
+ g = Graph().parse(data=json.dumps(wf), format='json-ld', location=workflow, context=ctx)
+ print(g.serialize(format=sr))
+
+def main(args=None):
+ if args is None:
+ args = sys.argv[1:]
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--rdf-serializer",
+ help="Output RDF serialization format used by --print-rdf (one of turtle (default), n3, nt, xml)",
+ default="turtle")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--print-jsonld-context", action="store_true", help="Print JSON-LD context for schema")
+ exgroup.add_argument("--print-doc", action="store_true", help="Print HTML documentation from schema")
+ exgroup.add_argument("--print-rdfs", action="store_true", help="Print RDF schema")
+ exgroup.add_argument("--print-avro", action="store_true", help="Print Avro schema")
+
+ exgroup.add_argument("--print-rdf", action="store_true", help="Print corresponding RDF graph for document")
+ exgroup.add_argument("--print-pre", action="store_true", help="Print document after preprocessing")
+ exgroup.add_argument("--print-index", action="store_true", help="Print node index")
+ exgroup.add_argument("--print-metadata", action="store_true", help="Print document metadata")
+ exgroup.add_argument("--version", action="store_true", help="Print version")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--strict", action="store_true", help="Strict validation (unrecognized or out of place fields are error)",
+ default=True, dest="strict")
+ exgroup.add_argument("--non-strict", action="store_false", help="Lenient validation (ignore unrecognized fields)",
+ default=True, dest="strict")
+
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--verbose", action="store_true", help="Default logging")
+ exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
+ exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
+
+ parser.add_argument("schema", type=str)
+ parser.add_argument("document", type=str, nargs="?", default=None)
+
+ args = parser.parse_args(args)
+
+ if args.quiet:
+ _logger.setLevel(logging.WARN)
+ if args.debug:
+ _logger.setLevel(logging.DEBUG)
+
+ pkg = pkg_resources.require("schema_salad")
+ if pkg:
+ if args.version:
+ print "%s %s" % (sys.argv[0], pkg[0].version)
+ return 0
+ else:
+ _logger.info("%s %s", sys.argv[0], pkg[0].version)
+
+ # Get the metaschema to validate the schema
+ metaschema_names, metaschema_doc, metaschema_loader = schema.get_metaschema()
+
+ # Load schema document and resolve refs
+
+ schema_uri = args.schema
+ if not urlparse.urlparse(schema_uri)[0]:
+ schema_uri = "file://" + os.path.abspath(schema_uri)
+ schema_raw_doc = metaschema_loader.fetch(schema_uri)
+ schema_doc, schema_metadata = metaschema_loader.resolve_all(schema_raw_doc, schema_uri)
+
+ # Optionally print the schema after ref resolution
+ if not args.document and args.print_pre:
+ print json.dumps(schema_doc, indent=4)
+ return 0
+
+ if not args.document and args.print_index:
+ print json.dumps(metaschema_loader.idx.keys(), indent=4)
+ return 0
+
+ # Validate links in the schema document
+ try:
+ metaschema_loader.validate_links(schema_doc)
+ except (validate.ValidationException) as e:
+ _logger.error("Schema `%s` failed link checking:\n%s", args.schema, e, exc_info=(e if args.debug else False))
+ _logger.debug("Index is %s", metaschema_loader.idx.keys())
+ _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
+ return 1
+
+ # Validate the schema document against the metaschema
+ try:
+ schema.validate_doc(metaschema_names, schema_doc, metaschema_loader, args.strict)
+ except validate.ValidationException as e:
+ _logger.error("While validating schema `%s`:\n%s" % (args.schema, str(e)))
+ return 1
+
+ # Get the json-ld context and RDFS representation from the schema
+ metactx = {}
+ if isinstance(schema_raw_doc, dict):
+ metactx = schema_raw_doc.get("$namespaces", {})
+ if "$base" in schema_raw_doc:
+ metactx["@base"] = schema_raw_doc["$base"]
+ (schema_ctx, rdfs) = jsonld_context.salad_to_jsonld_context(schema_doc, metactx)
+
+ # Create the loader that will be used to load the target document.
+ document_loader = Loader(schema_ctx)
+
+ # Make the Avro validation that will be used to validate the target document
+ (avsc_names, avsc_obj) = schema.make_avro_schema(schema_doc, document_loader)
+
+ if isinstance(avsc_names, Exception):
+ _logger.error("Schema `%s` error:\n%s", args.schema, avsc_names, exc_info=(avsc_names if args.debug else False))
+ if args.print_avro:
+ print json.dumps(avsc_obj, indent=4)
+ return 1
+
+ # Optionally print Avro-compatible schema from schema
+ if args.print_avro:
+ print json.dumps(avsc_obj, indent=4)
+ return 0
+
+ # Optionally print the json-ld context from the schema
+ if args.print_jsonld_context:
+ j = {"@context": schema_ctx}
+ print json.dumps(j, indent=4, sort_keys=True)
+ return 0
+
+ # Optionally print the RDFS graph from the schema
+ if args.print_rdfs:
+ print(rdfs.serialize(format=args.rdf_serializer))
+ return 0
+
+ # Optionally create documentation page from the schema
+ if args.print_doc:
+ makedoc.avrold_doc(schema_doc, sys.stdout)
+ return 0
+
+ if args.print_metadata and not args.document:
+ print json.dumps(schema_metadata, indent=4)
+ return 0
+
+ # If no document specified, all done.
+ if not args.document:
+ print "Schema `%s` is valid" % args.schema
+ return 0
+
+ # Load target document and resolve refs
+ try:
+ uri = args.document
+ if not urlparse.urlparse(uri)[0]:
+ doc = "file://" + os.path.abspath(uri)
+ document, doc_metadata = document_loader.resolve_ref(uri)
+ except (validate.ValidationException, RuntimeError) as e:
+ _logger.error("Document `%s` failed validation:\n%s", args.document, e, exc_info=(e if args.debug else False))
+ return 1
+
+ # Optionally print the document after ref resolution
+ if args.print_pre:
+ print json.dumps(document, indent=4)
+ return 0
+
+ if args.print_index:
+ print json.dumps(document_loader.idx.keys(), indent=4)
+ return 0
+
+ # Validate links in the target document
+ try:
+ document_loader.validate_links(document)
+ except (validate.ValidationException) as e:
+ _logger.error("Document `%s` failed link checking:\n%s", args.document, e, exc_info=(e if args.debug else False))
+ _logger.debug("Index is %s", json.dumps(document_loader.idx.keys(), indent=4))
+ return 1
+
+ # Validate the schema document against the metaschema
+ try:
+ schema.validate_doc(avsc_names, document, document_loader, args.strict)
+ except validate.ValidationException as e:
+ _logger.error("While validating document `%s`:\n%s" % (args.document, str(e)))
+ return 1
+
+ # Optionally convert the document to RDF
+ if args.print_rdf:
+ printrdf(args.document, document, schema_ctx, args.rdf_serializer)
+ return 0
+
+ if args.print_metadata:
+ print json.dumps(doc_metadata, indent=4)
+ return 0
+
+ print "Document `%s` is valid" % args.document
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py
new file mode 100644
index 0000000..06330a9
--- /dev/null
+++ b/schema_salad/makedoc.py
@@ -0,0 +1,466 @@
+import mistune
+import schema
+import json
+import yaml
+import os
+import copy
+import re
+import sys
+import StringIO
+import logging
+import urlparse
+from aslist import aslist
+import re
+import argparse
+
+_logger = logging.getLogger("salad")
+
+def has_types(items):
+ r = []
+ if isinstance(items, dict):
+ if items["type"] == "https://w3id.org/cwl/salad#record":
+ return [items["name"]]
+ for n in ("type", "items", "values"):
+ if n in items:
+ r.extend(has_types(items[n]))
+ return r
+ if isinstance(items, list):
+ for i in items:
+ r.extend(has_types(i))
+ return r
+ if isinstance(items, basestring):
+ return [items]
+ return []
+
+def linkto(item):
+ _, frg = urlparse.urldefrag(item)
+ return "[%s](#%s)" % (frg, to_id(frg))
+
+class MyRenderer(mistune.Renderer):
+ def header(self, text, level, raw=None):
+ return """<h%i id="%s">%s</h1>""" % (level, to_id(text), text)
+
+def to_id(text):
+ textid = text
+ if text[0] in ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"):
+ try:
+ textid = text[text.index(" ")+1:]
+ except ValueError:
+ pass
+ textid = textid.replace(" ", "_")
+ return textid
+
+class ToC(object):
+ def __init__(self):
+ self.first_toc_entry = True
+ self.numbering = [0]
+ self.toc = ""
+ self.start_numbering = True
+
+ def add_entry(self, thisdepth, title):
+ depth = len(self.numbering)
+ if thisdepth < depth:
+ self.toc += "</ol>"
+ for n in range(0, depth-thisdepth):
+ self.numbering.pop()
+ self.toc += "</li></ol>"
+ self.numbering[-1] += 1
+ elif thisdepth == depth:
+ if not self.first_toc_entry:
+ self.toc += "</ol>"
+ else:
+ self.first_toc_entry = False
+ self.numbering[-1] += 1
+ elif thisdepth > depth:
+ self.numbering.append(1)
+
+ if self.start_numbering:
+ num = "%i.%s" % (self.numbering[0], ".".join([str(n) for n in self.numbering[1:]]))
+ else:
+ num = ""
+ self.toc += """<li><a href="#%s">%s %s</a><ol>\n""" %(to_id(title),
+ num, title)
+ return num
+
+ def contents(self, id):
+ c = """<h1 id="%s">Table of contents</h1>
+ <nav class="tocnav"><ol>%s""" % (id, self.toc)
+ c += "</ol>"
+ for i in range(0, len(self.numbering)):
+ c += "</li></ol>"
+ c += """</nav>"""
+ return c
+
+basicTypes = ("https://w3id.org/cwl/salad#null",
+ "http://www.w3.org/2001/XMLSchema#boolean",
+ "http://www.w3.org/2001/XMLSchema#int",
+ "http://www.w3.org/2001/XMLSchema#long",
+ "http://www.w3.org/2001/XMLSchema#float",
+ "http://www.w3.org/2001/XMLSchema#double",
+ "http://www.w3.org/2001/XMLSchema#string",
+ "https://w3id.org/cwl/salad#record",
+ "https://w3id.org/cwl/salad#enum",
+ "https://w3id.org/cwl/salad#array")
+
+def add_dictlist(di, key, val):
+ if key not in di:
+ di[key] = []
+ di[key].append(val)
+
+def number_headings(toc, maindoc):
+ mdlines = []
+ skip = False
+ for line in maindoc.splitlines():
+ if line.strip() == "# Introduction":
+ toc.start_numbering = True
+ toc.numbering = [0]
+
+ if line == "```":
+ skip = not skip
+
+ if not skip:
+ m = re.match(r'^(#+) (.*)', line)
+ if m:
+ num = toc.add_entry(len(m.group(1)), m.group(2))
+ line = "%s %s %s" % (m.group(1), num, m.group(2))
+ line = re.sub(r'^(https?://\S+)', r'[\1](\1)', line)
+ mdlines.append(line)
+
+ maindoc = '\n'.join(mdlines)
+ return maindoc
+
+def fix_doc(doc):
+ if isinstance(doc, list):
+ doc = "".join(doc)
+ return "\n".join([re.sub(r"<([^>@]+@[^>]+)>", r"[\1](mailto:\1)", d) for d in doc.splitlines()])
+
+class RenderType(object):
+ def __init__(self, toc, j, renderlist, redirects):
+ self.typedoc = StringIO.StringIO()
+ self.toc = toc
+ self.subs = {}
+ self.docParent = {}
+ self.docAfter = {}
+ self.rendered = set()
+ self.redirects = redirects
+ self.title = None
+
+ for t in j:
+ if "extends" in t:
+ for e in aslist(t["extends"]):
+ add_dictlist(self.subs, e, t["name"])
+ #if "docParent" not in t and "docAfter" not in t:
+ # add_dictlist(self.docParent, e, t["name"])
+
+ if t.get("docParent"):
+ add_dictlist(self.docParent, t["docParent"], t["name"])
+
+ if t.get("docChild"):
+ for c in aslist(t["docChild"]):
+ add_dictlist(self.docParent, t["name"], c)
+
+ if t.get("docAfter"):
+ add_dictlist(self.docAfter, t["docAfter"], t["name"])
+
+ _, _, metaschema_loader = schema.get_metaschema()
+ alltypes = schema.extend_and_specialize(j, metaschema_loader)
+
+ self.typemap = {}
+ self.uses = {}
+ self.record_refs = {}
+ for t in alltypes:
+ self.typemap[t["name"]] = t
+ try:
+ if t["type"] == "record":
+ self.record_refs[t["name"]] = []
+ for f in t.get("fields", []):
+ p = has_types(f)
+ for tp in p:
+ if tp not in self.uses:
+ self.uses[tp] = []
+ if (t["name"], f["name"]) not in self.uses[tp]:
+ _, frg1 = urlparse.urldefrag(t["name"])
+ _, frg2 = urlparse.urldefrag(f["name"])
+ self.uses[tp].append((frg1, frg2))
+ if tp not in basicTypes and tp not in self.record_refs[t["name"]]:
+ self.record_refs[t["name"]].append(tp)
+ except KeyError as e:
+ _logger.error("Did not find 'type' in %s", t)
+ raise
+
+ for f in alltypes:
+ if (f["name"] in renderlist or
+ ((not renderlist) and
+ ("extends" not in f) and
+ ("docParent" not in f) and
+ ("docAfter" not in f))):
+ self.render_type(f, 1)
+
+ def typefmt(self, tp, redirects, nbsp=False):
+ global primitiveType
+ if isinstance(tp, list):
+ if nbsp and len(tp) <= 3:
+ return " | ".join([self.typefmt(n, redirects) for n in tp])
+ else:
+ return " | ".join([self.typefmt(n, redirects) for n in tp])
+ if isinstance(tp, dict):
+ if tp["type"] == "https://w3id.org/cwl/salad#array":
+ return "array<%s>" % (self.typefmt(tp["items"], redirects, nbsp=True))
+ if tp["type"] in ("https://w3id.org/cwl/salad#record", "https://w3id.org/cwl/salad#enum"):
+ frg = schema.avro_name(tp["name"])
+ if tp["name"] in redirects:
+ return """<a href="%s">%s</a>""" % (redirects[tp["name"]], frg)
+ elif tp["name"] in self.typemap:
+ return """<a href="#%s">%s</a>""" % (to_id(frg), frg)
+ else:
+ return frg
+ if isinstance(tp["type"], dict):
+ return self.typefmt(tp["type"], redirects)
+ else:
+ if str(tp) in redirects:
+ return """<a href="%s">%s</a>""" % (redirects[tp], redirects[tp])
+ elif str(tp) in basicTypes:
+ return """<a href="%s">%s</a>""" % (primitiveType, schema.avro_name(str(tp)))
+ else:
+ _, frg = urlparse.urldefrag(tp)
+ if frg:
+ tp = frg
+ return """<a href="#%s">%s</a>""" % (to_id(tp), tp)
+
+
+ def render_type(self, f, depth):
+ if f["name"] in self.rendered or f["name"] in self.redirects:
+ return
+ self.rendered.add(f["name"])
+
+ if "doc" not in f:
+ f["doc"] = ""
+
+ f["type"] = copy.deepcopy(f)
+ f["doc"] = ""
+ f = f["type"]
+
+ if "doc" not in f:
+ f["doc"] = ""
+
+ def extendsfrom(item, ex):
+ if "extends" in item:
+ for e in aslist(item["extends"]):
+ ex.insert(0, self.typemap[e])
+ extendsfrom(self.typemap[e], ex)
+
+ ex = [f]
+ extendsfrom(f, ex)
+
+ enumDesc = {}
+ if f["type"] == "enum" and isinstance(f["doc"], list):
+ for e in ex:
+ for i in e["doc"]:
+ idx = i.find(":")
+ if idx > -1:
+ enumDesc[i[:idx]] = i[idx+1:]
+ e["doc"] = [i for i in e["doc"] if i.find(":") == -1 or i.find(" ") < i.find(":")]
+
+ f["doc"] = fix_doc(f["doc"])
+
+ if f["type"] == "record":
+ for field in f.get("fields", []):
+ if "doc" not in field:
+ field["doc"] = ""
+
+ if f["type"] != "documentation":
+ lines = []
+ for l in f["doc"].splitlines():
+ if len(l) > 0 and l[0] == "#":
+ l = ("#" * depth) + l
+ lines.append(l)
+ f["doc"] = "\n".join(lines)
+
+ _, frg = urlparse.urldefrag(f["name"])
+ num = self.toc.add_entry(depth, frg)
+ doc = "## %s %s\n" % (num, frg)
+ else:
+ doc = ""
+
+ if self.title is None:
+ self.title = f["doc"][0:f["doc"].index("\n")][2:]
+
+ if f["type"] == "documentation":
+ f["doc"] = number_headings(self.toc, f["doc"])
+
+ #if "extends" in f:
+ # doc += "\n\nExtends "
+ # doc += ", ".join([" %s" % linkto(ex) for ex in aslist(f["extends"])])
+ #if f["name"] in self.subs:
+ # doc += "\n\nExtended by"
+ # doc += ", ".join([" %s" % linkto(s) for s in self.subs[f["name"]]])
+ #if f["name"] in self.uses:
+ # doc += "\n\nReferenced by"
+ # doc += ", ".join([" [%s.%s](#%s)" % (s[0], s[1], to_id(s[0])) for s in self.uses[f["name"]]])
+
+ doc = doc + "\n\n" + f["doc"]
+
+ doc = mistune.markdown(doc, renderer=MyRenderer())
+
+ if f["type"] == "record":
+ doc += "<h3>Fields</h3>"
+ doc += """<table class="table table-striped">"""
+ doc += "<tr><th>field</th><th>type</th><th>required</th><th>description</th></tr>"
+ required = []
+ optional = []
+ for i in f.get("fields", []):
+ tp = i["type"]
+ if isinstance(tp, list) and tp[0] == "https://w3id.org/cwl/salad#null":
+ opt = False
+ tp = tp[1:]
+ else:
+ opt = True
+
+ desc = i["doc"]
+ #if "inherited_from" in i:
+ # desc = "%s _Inherited from %s_" % (desc, linkto(i["inherited_from"]))
+
+ frg = schema.avro_name(i["name"])
+ tr = "<td><code>%s</code></td><td>%s</td><td>%s</td><td>%s</td>" % (frg, self.typefmt(tp, self.redirects), opt, mistune.markdown(desc))
+ if opt:
+ required.append(tr)
+ else:
+ optional.append(tr)
+ for i in required+optional:
+ doc += "<tr>" + i + "</tr>"
+ doc += """</table>"""
+ elif f["type"] == "enum":
+ doc += "<h3>Symbols</h3>"
+ doc += """<table class="table table-striped">"""
+ doc += "<tr><th>symbol</th><th>description</th></tr>"
+ for e in ex:
+ for i in e.get("symbols", []):
+ doc += "<tr>"
+ frg = schema.avro_name(i)
+ doc += "<td><code>%s</code></td><td>%s</td>" % (frg, enumDesc.get(frg, ""))
+ doc += "</tr>"
+ doc += """</table>"""
+ f["doc"] = doc
+
+ self.typedoc.write(f["doc"])
+
+ subs = self.docParent.get(f["name"], []) + self.record_refs.get(f["name"], [])
+ if len(subs) == 1:
+ self.render_type(self.typemap[subs[0]], depth)
+ else:
+ for s in subs:
+ self.render_type(self.typemap[s], depth+1)
+
+ for s in self.docAfter.get(f["name"], []):
+ self.render_type(self.typemap[s], depth)
+
+def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink):
+ toc = ToC()
+ toc.start_numbering = False
+
+ rt = RenderType(toc, j, renderlist, redirects)
+ content = rt.typedoc.getvalue()
+
+ outdoc.write("""
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <meta charset="UTF-8">
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
+ """)
+
+ outdoc.write("<title>%s</title>" % (rt.title))
+
+ outdoc.write("""
+ <style>
+ :target {
+ padding-top: 61px;
+ margin-top: -61px;
+ }
+ body {
+ padding-top: 61px;
+ }
+ .tocnav ol {
+ list-style: none
+ }
+ </style>
+ </head>
+ <body>
+ """)
+
+ outdoc.write("""
+ <nav class="navbar navbar-default navbar-fixed-top">
+ <div class="container">
+ <div class="navbar-header">
+ <a class="navbar-brand" href="%s">%s</a>
+ """ % (brandlink, brand))
+
+ if u"<!--ToC-->" in content:
+ content = content.replace(u"<!--ToC-->", toc.contents("toc"))
+ outdoc.write("""
+ <ul class="nav navbar-nav">
+ <li><a href="#toc">Table of contents</a></li>
+ </ul>
+ """)
+
+ outdoc.write("""
+ </div>
+ </div>
+ </nav>
+ """)
+
+ outdoc.write("""
+ <div class="container">
+ """)
+
+ outdoc.write("""
+ <div class="row">
+ """)
+
+ outdoc.write("""
+ <div class="col-md-12" role="main" id="main">""")
+
+ outdoc.write(content.encode("utf-8"))
+
+ outdoc.write("""</div>""")
+
+ outdoc.write("""
+ </div>
+ </div>
+ </body>
+ </html>""")
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("schema")
+ parser.add_argument('--only', action='append')
+ parser.add_argument('--redirect', action='append')
+ parser.add_argument('--brand')
+ parser.add_argument('--brandlink')
+ parser.add_argument('--primtype', default="#PrimitiveType")
+
+ args = parser.parse_args()
+
+ s = []
+ a = args.schema
+ with open(a) as f:
+ if a.endswith("md"):
+ s.append({"name": os.path.splitext(os.path.basename(a))[0],
+ "type": "documentation",
+ "doc": f.read().decode("utf-8")
+ })
+ else:
+ uri = "file://" + os.path.abspath(a)
+ _, _, metaschema_loader = schema.get_metaschema()
+ j, schema_metadata = metaschema_loader.resolve_ref(uri, "")
+ if isinstance(j, list):
+ s.extend(j)
+ else:
+ s.append(j)
+
+ primitiveType = args.primtype
+
+ redirect = {r.split("=")[0]:r.split("=")[1] for r in args.redirect} if args.redirect else {}
+ renderlist = args.only if args.only else []
+ avrold_doc(s, sys.stdout, renderlist, redirect, args.brand, args.brandlink)
diff --git a/schema_salad/metaschema/field_name.yml b/schema_salad/metaschema/field_name.yml
new file mode 100644
index 0000000..44e95a2
--- /dev/null
+++ b/schema_salad/metaschema/field_name.yml
@@ -0,0 +1,46 @@
+- |
+ ## Field name resolution
+
+ The document schema declares the vocabulary of known field names. During
+ preprocessing traversal, field name in the document which are not part of
+ the schema vocabulary must be resolved to absolute URIs. Under "strict"
+ validation, it is an error for a document to include fields which are not
+ part of the vocabulary and not resolvable to absolute URIs. Fields names
+ which are not part of the vocabulary are resolved using the following
+ rules:
+
+ * If an field name URI begins with a namespace prefix declared in the
+ document context (`@context`) followed by a colon `:`, the prefix and
+ colon must be replaced by the namespace declared in `@context`.
+
+ * If there is a vocabulary term which maps to the URI of a resolved
+ field, the field name must be replace with the vocabulary term.
+
+ * If a field name URI is an absolute URI consisting of a scheme and path
+ and is not part of the vocabulary, no processing occurs.
+
+ Field name resolution is not relative. It must not be affected by the
+ base URI.
+
+ ### Field name resolution example
+
+ Given the following schema:
+
+ ```
+- $include: field_name_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: field_name_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: field_name_proc.yml
+- |
+ ```
diff --git a/schema_salad/metaschema/field_name_proc.yml b/schema_salad/metaschema/field_name_proc.yml
new file mode 100644
index 0000000..a53ef4b
--- /dev/null
+++ b/schema_salad/metaschema/field_name_proc.yml
@@ -0,0 +1,8 @@
+ {
+ "base": "one",
+ "form": {
+ "base": "two",
+ "http://example.com/three": "three",
+ },
+ "http://example.com/acid#four": "four"
+ }
diff --git a/schema_salad/metaschema/field_name_schema.yml b/schema_salad/metaschema/field_name_schema.yml
new file mode 100644
index 0000000..5089c4b
--- /dev/null
+++ b/schema_salad/metaschema/field_name_schema.yml
@@ -0,0 +1,14 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "base",
+ "type": "string",
+ "jsonldPredicate": "http://example.com/base"
+ }]
+ }]
+}
diff --git a/schema_salad/metaschema/field_name_src.yml b/schema_salad/metaschema/field_name_src.yml
new file mode 100644
index 0000000..1ed79b9
--- /dev/null
+++ b/schema_salad/metaschema/field_name_src.yml
@@ -0,0 +1,8 @@
+ {
+ "base": "one",
+ "form": {
+ "http://example.com/base": "two",
+ "http://example.com/three": "three",
+ },
+ "acid:four": "four"
+ }
diff --git a/schema_salad/metaschema/ident_res.yml b/schema_salad/metaschema/ident_res.yml
new file mode 100644
index 0000000..45f4efb
--- /dev/null
+++ b/schema_salad/metaschema/ident_res.yml
@@ -0,0 +1,53 @@
+- |
+ ## Identifier resolution
+
+ The schema may designate one or more fields as identifier fields to identify
+ specific objects. Processing must resolve relative identifiers to absolute
+ identifiers using the following rules:
+
+ * If an identifier URI is prefixed with `#` it is a URI relative
+ fragment identifier. It is resolved relative to the base URI by setting
+ or replacing the fragment portion of the base URI.
+
+ * If an identifier URI does not contain a scheme and is not prefixed `#` it
+ is a parent relative fragment identifier. It is resolved relative to the
+ base URI by the following rule: if the base URI does not contain a
+ document fragment, set the fragment portion of the base URI. If the base
+ URI does contain a document fragment, append a slash `/` followed by the
+ identifier field to the fragment portion of the base URI.
+
+ * If an identifier URI begins with a namespace prefix declared in
+ `$namespaces` followed by a colon `:`, the prefix and colon must be
+ replaced by the namespace declared in `$namespaces`.
+
+ * If an identifier URI is an absolute URI consisting of a scheme and path,
+ no processing occurs.
+
+ When preprocessing visits a node containing an identifier, that identifier
+ must be used as the base URI to process child nodes.
+
+ It is an error for more than one object in a document to have the same
+ absolute URI.
+
+ ### Identifier resolution example
+
+ Given the following schema:
+
+ ```
+- $include: ident_res_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: ident_res_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: ident_res_proc.yml
+- |
+ ```
diff --git a/schema_salad/metaschema/ident_res_proc.yml b/schema_salad/metaschema/ident_res_proc.yml
new file mode 100644
index 0000000..24d3ea8
--- /dev/null
+++ b/schema_salad/metaschema/ident_res_proc.yml
@@ -0,0 +1,20 @@
+{
+ "id": "http://example.com/base",
+ "form": {
+ "id": "http://example.com/base#one",
+ "things": [
+ {
+ "id": "http://example.com/base#one/two"
+ },
+ {
+ "id": "http://example.com/base#three"
+ },
+ {
+ "id": "http://example.com/four#five",
+ },
+ {
+ "id": "http://example.com/acid#six",
+ }
+ ]
+ }
+}
diff --git a/schema_salad/metaschema/ident_res_schema.yml b/schema_salad/metaschema/ident_res_schema.yml
new file mode 100644
index 0000000..8a7bb04
--- /dev/null
+++ b/schema_salad/metaschema/ident_res_schema.yml
@@ -0,0 +1,14 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "id",
+ "type": "string",
+ "jsonldPredicate": "@id"
+ }]
+ }]
+}
diff --git a/schema_salad/metaschema/ident_res_src.yml b/schema_salad/metaschema/ident_res_src.yml
new file mode 100644
index 0000000..bbbd96e
--- /dev/null
+++ b/schema_salad/metaschema/ident_res_src.yml
@@ -0,0 +1,20 @@
+ {
+ "id": "http://example.com/base",
+ "form": {
+ "id": "one",
+ "things": [
+ {
+ "id": "two"
+ },
+ {
+ "id": "#three",
+ },
+ {
+ "id": "four#five",
+ },
+ {
+ "id": "acid:six",
+ }
+ ]
+ }
+ }
diff --git a/schema_salad/metaschema/import_include.md b/schema_salad/metaschema/import_include.md
new file mode 100644
index 0000000..0ad06bf
--- /dev/null
+++ b/schema_salad/metaschema/import_include.md
@@ -0,0 +1,112 @@
+## Import
+
+During preprocessing traversal, an implementation must resolve `$import`
+directives. An `$import` directive is an object consisting of exactly one
+field `$import` specifying resource by URI string. It is an error if there
+are additional fields in the `$import` object, such additional fields must
+be ignored.
+
+The URI string must be resolved to an absolute URI using the link
+resolution rules described previously. Implementations must support
+loading from `file`, `http` and `https` resources. The URI referenced by
+`$import` must be loaded and recursively preprocessed as a Salad document.
+The external imported document does not inherit the context of the
+importing document, and the default base URI for processing the imported
+document must be the URI used to retrieve the imported document. If the
+`$import` URI includes a document fragment, the fragment must be excluded
+from the base URI used to preprocess the imported document.
+
+Once loaded and processed, the `$import` node is replaced in the document
+structure by the object or array yielded from the import operation.
+
+URIs may reference document fragments which refer to specific an object in
+the target document. This indicates that the `$import` node must be
+replaced by only the object with the appropriate fragment identifier.
+
+It is a fatal error if an import directive refers to an external resource
+or resource fragment which does not exist or is not accessible.
+
+### Import example
+
+import.yml:
+```
+{
+ "hello": "world"
+}
+
+```
+
+parent.yml:
+```
+{
+ "form": {
+ "bar": {
+ "$import": "import.yml"
+ }
+ }
+}
+
+```
+
+This becomes:
+
+```
+{
+ "form": {
+ "bar": {
+ "hello": "world"
+ }
+ }
+}
+```
+
+## Include
+
+During preprocessing traversal, an implementation must resolve `$include`
+directives. An `$include` directive is an object consisting of exactly one
+field `$include` specifying a URI string. It is an error if there are
+additional fields in the `$include` object, such additional fields must be
+ignored.
+
+The URI string must be resolved to an absolute URI using the link
+resolution rules described previously. The URI referenced by `$include` must
+be loaded as a text data. Implementations must support loading from
+`file`, `http` and `https` resources. Implementations may transcode the
+character encoding of the text data to match that of the parent document,
+but must not interpret or parse the text document in any other way.
+
+Once loaded, the `$include` node is replaced in the document structure by a
+string containing the text data loaded from the resource.
+
+It is a fatal error if an import directive refers to an external resource
+which does not exist or is not accessible.
+
+### Include example
+
+parent.yml:
+```
+{
+ "form": {
+ "bar": {
+ "$include": "include.txt"
+ }
+ }
+}
+
+```
+
+include.txt:
+```
+hello world
+
+```
+
+This becomes:
+
+```
+{
+ "form": {
+ "bar": "hello world"
+ }
+}
+```
diff --git a/schema_salad/metaschema/link_res.yml b/schema_salad/metaschema/link_res.yml
new file mode 100644
index 0000000..9346f8a
--- /dev/null
+++ b/schema_salad/metaschema/link_res.yml
@@ -0,0 +1,55 @@
+- |
+ ## Link resolution
+
+ The schema may designate one or more fields as link fields reference other
+ objects. Processing must resolve links to either absolute URIs using the
+ following rules:
+
+ * If a reference URI is prefixed with `#` it is a relative
+ fragment identifier. It is resolved relative to the base URI by setting
+ or replacing the fragment portion of the base URI.
+
+ * If a reference URI does not contain a scheme and is not prefixed with `#`
+ it is a path relative reference. If the reference URI contains `#` in any
+ position other than the first character, the reference URI must be divided
+ into a path portion and a fragment portion split on the first instance of
+ `#`. The path portion is resolved relative to the base URI by the following
+ rule: if the path portion of the base URI ends in a slash `/`, append the
+ path portion of the reference URI to the path portion of the base URI. If
+ the path portion of the base URI does not end in a slash, replace the final
+ path segment with the path portion of the reference URI. Replace the
+ fragment portion of the base URI with the fragment portion of the reference
+ URI.
+
+ * If a reference URI begins with a namespace prefix declared in `$namespaces`
+ followed by a colon `:`, the prefix and colon must be replaced by the
+ namespace declared in `$namespaces`.
+
+ * If a reference URI is an absolute URI consisting of a scheme and path,
+ no processing occurs.
+
+ Link resolution must not affect the base URI used to resolve identifiers
+ and other links.
+
+ ### Link resolution example
+
+ Given the following schema:
+
+ ```
+- $include: link_res_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: link_res_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: link_res_proc.yml
+- |
+ ```
diff --git a/schema_salad/metaschema/link_res_proc.yml b/schema_salad/metaschema/link_res_proc.yml
new file mode 100644
index 0000000..03e539d
--- /dev/null
+++ b/schema_salad/metaschema/link_res_proc.yml
@@ -0,0 +1,21 @@
+{
+ "$base": "http://example.com/base",
+ "link": "http://example.com/base/zero",
+ "form": {
+ "link": "http://example.com/one",
+ "things": [
+ {
+ "link": "http://example.com/two"
+ },
+ {
+ "link": "http://example.com/base#three"
+ },
+ {
+ "link": "http://example.com/four#five",
+ },
+ {
+ "link": "http://example.com/acid#six",
+ }
+ ]
+ }
+}
diff --git a/schema_salad/metaschema/link_res_schema.yml b/schema_salad/metaschema/link_res_schema.yml
new file mode 100644
index 0000000..76420d3
--- /dev/null
+++ b/schema_salad/metaschema/link_res_schema.yml
@@ -0,0 +1,16 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "link",
+ "type": "string",
+ "jsonldPredicate": {
+ "_type": "@id"
+ }
+ }]
+ }]
+}
diff --git a/schema_salad/metaschema/link_res_src.yml b/schema_salad/metaschema/link_res_src.yml
new file mode 100644
index 0000000..23f7a29
--- /dev/null
+++ b/schema_salad/metaschema/link_res_src.yml
@@ -0,0 +1,21 @@
+{
+ "$base": "http://example.com/base",
+ "link": "http://example.com/base/zero",
+ "form": {
+ "link": "one",
+ "things": [
+ {
+ "link": "two"
+ },
+ {
+ "link": "#three",
+ },
+ {
+ "link": "four#five",
+ },
+ {
+ "link": "acid:six",
+ }
+ ]
+ }
+}
diff --git a/schema_salad/metaschema/metaschema.yml b/schema_salad/metaschema/metaschema.yml
new file mode 100644
index 0000000..6e90775
--- /dev/null
+++ b/schema_salad/metaschema/metaschema.yml
@@ -0,0 +1,437 @@
+$base: "https://w3id.org/cwl/salad#"
+
+$namespaces:
+ sld: "https://w3id.org/cwl/salad#"
+ dct: "http://purl.org/dc/terms/"
+ rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ rdfs: "http://www.w3.org/2000/01/rdf-schema#"
+ xsd: "http://www.w3.org/2001/XMLSchema#"
+
+$graph:
+
+- name: "Semantic_Annotations_for_Linked_Avro_Data"
+ type: documentation
+ doc:
+ - $include: salad.md
+ - $import: field_name.yml
+ - $import: ident_res.yml
+ - $import: link_res.yml
+ - $import: vocab_res.yml
+ - $include: import_include.md
+
+- name: "Link_Validation"
+ type: documentation
+ doc: |
+ # Link validation
+
+ Once a document has been preprocessed, an implementation may validate
+ links. The link validation traversal may visit fields which the schema
+ designates as link fields and check that each URI references an existing
+ object in the current document, an imported document, file system, or
+ network resource. Failure to validate links may be a fatal error. Link
+ validation behavior for individual fields may be modified by `identity` and
+ `noLinkCheck` in the `jsonldPredicate` section of the field schema.
+
+
+- name: "Schema_validation"
+ type: documentation
+ doc: ""
+
+
+# - name: "JSON_LD_Context"
+# type: documentation
+# doc: |
+# # Generating JSON-LD Context
+
+# How to generate the json-ld context...
+
+
+- name: PrimitiveType
+ type: enum
+ symbols:
+ - "sld:null"
+ - "xsd:boolean"
+ - "xsd:int"
+ - "xsd:long"
+ - "xsd:float"
+ - "xsd:double"
+ - "xsd:string"
+ doc:
+ - |
+ Salad data types are based on Avro schema declarations. Refer to the
+ [Avro schema declaration documentation](https://avro.apache.org/docs/current/spec.html#schemas) for
+ detailed information.
+ - "null: no value"
+ - "boolean: a binary value"
+ - "int: 32-bit signed integer"
+ - "long: 64-bit signed integer"
+ - "float: single precision (32-bit) IEEE 754 floating-point number"
+ - "double: double precision (64-bit) IEEE 754 floating-point number"
+ - "string: Unicode character sequence"
+
+
+- name: "Any"
+ type: enum
+ symbols: ["#Any"]
+ doc: |
+ The **Any** type validates for any non-null value.
+
+
+- name: JsonldPredicate
+ type: record
+ doc: |
+ Attached to a record field to define how the parent record field is handled for
+ URI resolution and JSON-LD context generation.
+ fields:
+ - name: _id
+ type: ["null", string]
+ jsonldPredicate:
+ _id: sld:_id
+ _type: "@id"
+ identity: true
+ doc: |
+ The predicate URI that this field corresponds to.
+ Corresponds to JSON-LD `@id` directive.
+ - name: _type
+ type: ["null", string]
+ doc: |
+ The context type hint, corresponds to JSON-LD `@type` directive.
+
+ * If the value of this field is `@id` and `identity` is false or
+ unspecified, the parent field must be resolved using the link
+ resolution rules. If `identity` is true, the parent field must be
+ resolved using the identifier expansion rules.
+
+ * If the value of this field is `@vocab`, the parent field must be
+ resolved using the vocabulary resolution rules.
+
+ - name: _container
+ type: ["null", string]
+ doc: |
+ Structure hint, corresponds to JSON-LD `@container` directive.
+ - name: identity
+ type: ["null", boolean]
+ doc: |
+ If true and `_type` is `@id` this indicates that the parent field must
+ be resolved according to identity resolution rules instead of link
+ resolution rules. In addition, the field value is considered an
+ assertion that the linked value exists; absence of an object in the loaded document
+ with the URI is not an error.
+ - name: noLinkCheck
+ type: ["null", boolean]
+ doc: |
+ If true, this indicates that link validation traversal must stop at
+ this field. This field (it is is a URI) or any fields under it (if it
+ is an object or array) are not subject to link checking.
+
+
+- name: SpecializeDef
+ type: record
+ fields:
+ - name: specializeFrom
+ type: string
+ doc: "The data type to be replaced"
+ jsonldPredicate:
+ _id: "sld:specializeFrom"
+ _type: "@id"
+
+ - name: specializeTo
+ type: string
+ doc: "The new data type to replace with"
+ jsonldPredicate:
+ _id: "sld:specializeTo"
+ _type: "@id"
+
+
+- name: NamedType
+ type: record
+ abstract: true
+ fields:
+ - name: name
+ type: string
+ jsonldPredicate: "@id"
+ doc: "The identifier for this type"
+
+
+- name: DocType
+ type: record
+ abstract: true
+ fields:
+ - name: doc
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: "A documentation string for this type, or an array of strings which should be concatenated."
+ jsonldPredicate: "sld:doc"
+
+ - name: docParent
+ type: ["null", string]
+ doc: |
+ Hint to indicate that during documentation generation, documentation
+ for this type should appear in a subsection under `docParent`.
+ jsonldPredicate:
+ _id: "sld:docParent"
+ _type: "@id"
+
+ - name: docChild
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: |
+ Hint to indicate that during documentation generation, documentation
+ for `docChild` should appear in a subsection under this type.
+ jsonldPredicate:
+ _id: "sld:docChild"
+ _type: "@id"
+
+ - name: docAfter
+ type: ["null", string]
+ doc: |
+ Hint to indicate that during documentation generation, documentation
+ for this type should appear after the `docAfter` section at the same
+ level.
+ jsonldPredicate:
+ _id: "sld:docAfter"
+ _type: "@id"
+
+
+- name: SchemaDefinedType
+ type: record
+ extends: "#DocType"
+ doc: |
+ Abstract base for schema-defined types.
+ abstract: true
+ fields:
+ - name: jsonldPredicate
+ type:
+ - "null"
+ - string
+ - "#JsonldPredicate"
+ doc: |
+ Annotate this type with linked data context.
+ jsonldPredicate: "sld:jsonldPredicate"
+
+ - name: documentRoot
+ type: ["null", boolean]
+ doc: |
+ If true, indicates that the type is a valid at the document root. At
+ least one type in a schema must be tagged with `documentRoot: true`.
+
+
+- name: RecordField
+ type: record
+ doc: "A field of a record."
+ fields:
+ - name: name
+ type: string
+ jsonldPredicate: "@id"
+ doc: |
+ The name of the field
+
+ - name: doc
+ type: ["null", string]
+ doc: |
+ A documentation string for this field
+ jsonldPredicate: "sld:doc"
+
+ - name: type
+ type:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ - type: array
+ items:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+ doc: |
+ The field type
+
+
+- name: SaladRecordField
+ type: record
+ extends: "#RecordField"
+ doc: "A field of a record."
+ fields:
+ - name: jsonldPredicate
+ type:
+ - "null"
+ - string
+ - "#JsonldPredicate"
+ doc: |
+ Annotate this type with linked data context.
+ jsonldPredicate: "sld:jsonldPredicate"
+
+- name: RecordSchema
+ type: record
+ fields:
+ - name: type
+ doc: "Must be `record`"
+ type:
+ name: Record_symbol
+ type: enum
+ symbols:
+ - "sld:record"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+
+ - name: "fields"
+ type:
+ - "null"
+ - type: "array"
+ items: "#RecordField"
+
+ jsonldPredicate: "sld:fields"
+ doc: "Defines the fields of the record."
+
+
+- name: SaladRecordSchema
+ type: record
+ extends: ["#NamedType", "#RecordSchema", "#SchemaDefinedType"]
+ documentRoot: true
+ specialize:
+ specializeFrom: "#RecordField"
+ specializeTo: "#SaladRecordField"
+ fields:
+ - name: abstract
+ type: ["null", boolean]
+ doc: |
+ If true, this record is abstract and may be used as a base for other
+ records, but is not valid on its own.
+
+ - name: extends
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ _id: "sld:extends"
+ _type: "@id"
+ doc: |
+ Indicates that this record inherits fields from one or more base records.
+
+ - name: specialize
+ type:
+ - "null"
+ - "#SpecializeDef"
+ - type: array
+ items: "#SpecializeDef"
+ doc: |
+ Only applies if `extends` is declared. Apply type specialization using the
+ base record as a template. For each field inherited from the base
+ record, replace any instance of the type `specializeFrom` with
+ `specializeTo`.
+
+
+- name: EnumSchema
+ type: record
+ doc: |
+ Define an enumerated type.
+ fields:
+ - name: type
+ doc: "Must be `enum`"
+ type:
+ name: Enum_symbol
+ type: enum
+ symbols:
+ - "sld:enum"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+
+ - name: "symbols"
+ type:
+ - type: "array"
+ items: "string"
+ jsonldPredicate:
+ _id: "sld:symbols"
+ _type: "@id"
+ identity: true
+ doc: "Defines the set of valid symbols."
+
+
+- name: SaladEnumSchema
+ type: record
+ extends: ["#EnumSchema", "#SchemaDefinedType"]
+ documentRoot: true
+ doc: |
+ Define an enumerated type.
+ fields:
+ - name: extends
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ jsonldPredicate:
+ _id: "sld:extends"
+ _type: "@id"
+ doc: |
+ Indicates that this enum inherits symbols from a base enum.
+
+
+- name: ArraySchema
+ type: record
+ fields:
+ - name: type
+ doc: "Must be `array`"
+ type:
+ name: Array_symbol
+ type: enum
+ symbols:
+ - "sld:array"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
+
+ - name: items
+ type:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ - type: array
+ items:
+ - "#PrimitiveType"
+ - "#RecordSchema"
+ - "#EnumSchema"
+ - "#ArraySchema"
+ - string
+ jsonldPredicate:
+ _id: "sld:items"
+ _type: "@vocab"
+ doc: "Defines the type of the array elements."
+
+
+- name: Documentation
+ type: record
+ extends: ["#NamedType", "#DocType"]
+ documentRoot: true
+ doc: |
+ A documentation section. This type exists to facilitate self-documenting
+ schemas but has no role in formal validation.
+ fields:
+ - name: type
+ doc: "Must be `documentation`"
+ type:
+ name: Documentation_symbol
+ type: enum
+ symbols:
+ - "sld:documentation"
+ jsonldPredicate:
+ _id: "sld:type"
+ _type: "@vocab"
diff --git a/schema_salad/metaschema/salad.md b/schema_salad/metaschema/salad.md
new file mode 100644
index 0000000..6dd3e6a
--- /dev/null
+++ b/schema_salad/metaschema/salad.md
@@ -0,0 +1,256 @@
+# Semantic Annotations for Linked Avro Data (SALAD)
+
+Author:
+
+* Peter Amstutz <peter.amstutz at curoverse.com>, Curoverse
+
+Contributors:
+
+* The developers of Apache Avro
+* The developers of JSON-LD
+* Nebojša Tijanić <nebojsa.tijanic at sbgenomics.com>, Seven Bridges Genomics
+
+# Abstract
+
+Salad is a schema language for describing structured linked data documents
+in JSON or YAML documents. A Salad schema provides rules for
+preprocessing, structural validation, and link checking for documents
+described by a Salad schema. Salad builds on JSON-LD and the Apache Avro
+data serialization system, and extends Avro with features for rich data
+modeling such as inheritance, template specialization, object identifiers,
+and object references. Salad was developed to provide a bridge between the
+record oriented data modeling supported by Apache Avro and the Semantic
+Web.
+
+# Status of This Document
+
+This document is the product of the [Common Workflow Language working
+group](https://groups.google.com/forum/#!forum/common-workflow-language). The
+latest version of this document is available in the "schema_salad" directory at
+
+https://github.com/common-workflow-language/schema_salad
+
+The products of the CWL working group (including this document) are made available
+under the terms of the Apache License, version 2.0.
+
+<!--ToC-->
+
+# Introduction
+
+The JSON data model is an extremely popular way to represent structured
+data. It is attractive because of it's relative simplicity and is a
+natural fit with the standard types of many programming languages.
+However, this simplicity means that basic JSON lacks expressive features
+useful for working with complex data structures and document formats, such
+as schemas, object references, and namespaces.
+
+JSON-LD is a W3C standard providing a way to describe how to interpret a
+JSON document as Linked Data by means of a "context". JSON-LD provides a
+powerful solution for representing object references and namespaces in JSON
+based on standard web URIs, but is not itself a schema language. Without a
+schema providing a well defined structure, it is difficult to process an
+arbitrary JSON-LD document as idiomatic JSON because there are many ways to
+express the same data that are logically equivalent but structurally
+distinct.
+
+Several schema languages exist for describing and validating JSON data,
+such as the Apache Avro data serialization system, however none understand
+linked data. As a result, to fully take advantage of JSON-LD to build the
+next generation of linked data applications, one must maintain separate
+JSON schema, JSON-LD context, RDF schema, and human documentation, despite
+significant overlap of content and obvious need for these documents to stay
+synchronized.
+
+Schema Salad is designed to address this gap. It provides a schema
+language and processing rules for describing structured JSON content
+permitting URI resolution and strict document validation. The schema
+language supports linked data through annotations that describe the linked
+data interpretation of the content, enables generation of JSON-LD context
+and RDF schema, and production of RDF triples by applying the JSON-LD
+context. The schema language also provides for robust support of inline
+documentation.
+
+## Introduction to draft 1
+
+This is the first version of Schema Salad. It is developed concurrently
+with draft 3 of the Common Workflow Language for use in specifying the
+Common Workflow Language, however Schema Salad is intended to be useful to
+a broader audience.
+
+## References to Other Specifications
+
+**Javascript Object Notation (JSON)**: http://json.org
+
+**JSON Linked Data (JSON-LD)**: http://json-ld.org
+
+**YAML**: http://yaml.org
+
+**Avro**: https://avro.apache.org/docs/current/spec.html
+
+**Uniform Resource Identifier (URI) Generic Syntax**: https://tools.ietf.org/html/rfc3986)
+
+**Resource Description Framework (RDF)**: http://www.w3.org/RDF/
+
+**UTF-8**: https://www.ietf.org/rfc/rfc2279.txt)
+
+## Scope
+
+This document describes the syntax, data model, algorithms, and schema
+language for working with Salad documents. It is not intended to document
+a specific implementation of Salad, however it may serve as a reference for
+the behavior of conforming implementations.
+
+## Terminology
+
+The terminology used to describe Salad documents is defined in the Concepts
+section of the specification. The terms defined in the following list are
+used in building those definitions and in describing the actions of an
+Salad implementation:
+
+**may**: Conforming Salad documents and Salad implementations are permitted but
+not required to be interpreted as described.
+
+**must**: Conforming Salad documents and Salad implementations are required
+to be interpreted as described; otherwise they are in error.
+
+**error**: A violation of the rules of this specification; results are
+undefined. Conforming implementations may detect and report an error and may
+recover from it.
+
+**fatal error**: A violation of the rules of this specification; results
+are undefined. Conforming implementations must not continue to process the
+document and may report an error.
+
+**at user option**: Conforming software may or must (depending on the modal verb in
+the sentence) behave as described; if it does, it must provide users a means to
+enable or disable the behavior described.
+
+# Document model
+
+## Data concepts
+
+An **object** is a data structure equivalent to the "object" type in JSON,
+consisting of a unordered set of name/value pairs (referred to here as
+**fields**) and where the name is a string and the value is a string, number,
+boolean, array, or object.
+
+A **document** is a file containing a serialized object, or an array of
+objects.
+
+A **document type** is a class of files that share a common structure and
+semantics.
+
+A **document schema** is a formal description of the grammar of a document type.
+
+A **base URI** is a context-dependent URI used to resolve relative references.
+
+An **identifier** is a URI that designates a single document or single
+object within a document.
+
+A **vocabulary** is the set of symbolic field names and enumerated symbols defined
+by a document schema, where each term maps to absolute URI.
+
+## Syntax
+
+Conforming Salad documents are serialized and loaded using YAML syntax and
+UTF-8 text encoding. Salad documents are written using the JSON-compatible
+subset of YAML. Features of YAML such as headers and type tags that are
+not found in the standard JSON data model must not be used in conforming
+Salad documents. It is a fatal error if the document is not valid YAML.
+
+A Salad document must consist only of either a single root object or an
+array of objects.
+
+## Document context
+
+### Implied context
+
+The implicit context consists of the vocabulary defined by the schema and
+the base URI. By default, the base URI must be the URI that was used to
+load the document. It may be overridden by an explicit context.
+
+### Explicit context
+
+If a document consists of a root object, this object may contain the
+fields `$base`, `$namespaces`, `$schemas`, and `$graph`:
+
+ * `$base`: Must be a string. Set the base URI for the document used to
+ resolve relative references.
+
+ * `$namespaces`: Must be an object with strings as values. The keys of
+ the object are namespace prefixes used in the document; the values of
+ the object are the prefix expansions.
+
+ * `$schemas`: Must be an array of strings. This field may list URI
+ references to documents in RDF-XML format which will be queried for RDF
+ schema data. The subjects and predicates described by the RDF schema
+ may provide additional semantic context for the document, and may be
+ used for validation of prefixed extension fields found in the document.
+
+Other directives beginning with `$` must be ignored.
+
+## Document graph
+
+If a document consists of a single root object, this object may contain the
+field `$graph`. This field must be an array of objects. If present, this
+field holds the primary content of the document. A document that consists
+of array of objects at the root is an implicit graph.
+
+## Document metadata
+
+If a document consists of a single root object, metadata about the
+document, such as authorship, may be declared in the root object.
+
+## Document schema
+
+Document preprocessing, link validation and schema validation require a
+document schema. A schema may consist of:
+
+ * At least one record definition object which defines valid fields that
+ make up a record type. Record field definitions include the valid types
+ that may be assigned to each field and annotations to indicate fields
+ that represent identifiers and links, described below in "Semantic
+ Annotations".
+
+ * Any number of enumerated type objects which define a set of finite set of symbols that are
+ valid value of the type.
+
+ * Any number of documentation objects which allow in-line documentation of the schema.
+
+The schema for defining a salad schema (the metaschema) is described in
+detail in "Schema validation".
+
+### Record field annotations
+
+In a document schema, record field definitions may include the field
+`jsonldPredicate`, which may be either a string or object. Implementations
+must use the following document preprocessing of fields by the following
+rules:
+
+ * If the value of `jsonldPredicate` is `@id`, the field is an identifier
+ field.
+
+ * If the value of `jsonldPredicate` is an object, and contains that
+ object contains the field `_type` with the value `@id`, the field is a
+ link field.
+
+ * If the value of `jsonldPredicate` is an object, and contains that
+ object contains the field `_type` with the value `@vocab`, the field is a
+ vocabulary field, which is a subtype of link field.
+
+## Document traversal
+
+To perform document document preprocessing, link validation and schema
+validation, the document must be traversed starting from the fields or
+array items of the root object or array and recursively visiting each child
+item which contains an object or arrays.
+
+# Document preprocessing
+
+After processing the explicit context (if any), document preprocessing
+begins. Starting from the document root, object fields values or array
+items which contain objects or arrays are recursively traversed
+depth-first. For each visited object, field names, identifier fields, link
+fields, vocabulary fields, and `$import` and `$include` directives must be
+processed as described in this section. The order of traversal of child
+nodes within a parent node is undefined.
diff --git a/schema_salad/metaschema/vocab_res.yml b/schema_salad/metaschema/vocab_res.yml
new file mode 100644
index 0000000..4555f5b
--- /dev/null
+++ b/schema_salad/metaschema/vocab_res.yml
@@ -0,0 +1,35 @@
+- |
+ ## Vocabulary resolution
+
+ The schema may designate one or more vocabulary fields which use terms
+ defined in the vocabulary. Processing must resolve vocabulary fields to
+ either vocabulary terms or absolute URIs by first applying the link
+ resolution rules defined above, then applying the following additional
+ rule:
+
+ * If a reference URI is a vocabulary field, and there is a vocabulary
+ term which maps to the resolved URI, the reference must be replace with
+ the vocabulary term.
+
+ ### Vocabulary resolution example
+
+ Given the following schema:
+
+ ```
+- $include: vocab_res_schema.yml
+- |
+ ```
+
+ Process the following example:
+
+ ```
+- $include: vocab_res_src.yml
+- |
+ ```
+
+ This becomes:
+
+ ```
+- $include: vocab_res_proc.yml
+- |
+ ```
diff --git a/schema_salad/metaschema/vocab_res_proc.yml b/schema_salad/metaschema/vocab_res_proc.yml
new file mode 100644
index 0000000..d13ab15
--- /dev/null
+++ b/schema_salad/metaschema/vocab_res_proc.yml
@@ -0,0 +1,15 @@
+ {
+ "form": {
+ "things": [
+ {
+ "voc": "red",
+ },
+ {
+ "voc": "red",
+ },
+ {
+ "voc": "http://example.com/acid#blue",
+ }
+ ]
+ }
+ }
diff --git a/schema_salad/metaschema/vocab_res_schema.yml b/schema_salad/metaschema/vocab_res_schema.yml
new file mode 100644
index 0000000..92b271e
--- /dev/null
+++ b/schema_salad/metaschema/vocab_res_schema.yml
@@ -0,0 +1,21 @@
+{
+ "$namespaces": {
+ "acid": "http://example.com/acid#"
+ },
+ "$graph": [{
+ "name": "Colors",
+ "type": "enum",
+ "symbols": ["acid:red"]
+ },
+ {
+ "name": "ExampleType",
+ "type": "record",
+ "fields": [{
+ "name": "voc",
+ "type": "string",
+ "jsonldPredicate": {
+ "_type": "@vocab"
+ }
+ }]
+ }]
+}
diff --git a/schema_salad/metaschema/vocab_res_src.yml b/schema_salad/metaschema/vocab_res_src.yml
new file mode 100644
index 0000000..82954f1
--- /dev/null
+++ b/schema_salad/metaschema/vocab_res_src.yml
@@ -0,0 +1,15 @@
+ {
+ "form": {
+ "things": [
+ {
+ "voc": "red",
+ },
+ {
+ "voc": "http://example.com/acid#red",
+ },
+ {
+ "voc": "http://example.com/acid#blue",
+ }
+ ]
+ }
+ }
diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py
new file mode 100644
index 0000000..6cf0810
--- /dev/null
+++ b/schema_salad/ref_resolver.py
@@ -0,0 +1,502 @@
+import os
+import json
+import hashlib
+import logging
+import collections
+import requests
+import urlparse
+import yaml
+import validate
+import pprint
+import StringIO
+from aslist import aslist
+import rdflib
+from rdflib.namespace import RDF, RDFS, OWL
+import xml.sax
+
+_logger = logging.getLogger("salad")
+
+class NormDict(dict):
+ def __init__(self, normalize=unicode):
+ super(NormDict, self).__init__()
+ self.normalize = normalize
+
+ def __getitem__(self, key):
+ return super(NormDict, self).__getitem__(self.normalize(key))
+
+ def __setitem__(self, key, value):
+ return super(NormDict, self).__setitem__(self.normalize(key), value)
+
+ def __delitem__(self, key):
+ return super(NormDict, self).__delitem__(self.normalize(key))
+
+ def __contains__(self, key):
+ return super(NormDict, self).__contains__(self.normalize(key))
+
+def merge_properties(a, b):
+ c = {}
+ for i in a:
+ if i not in b:
+ c[i] = a[i]
+ for i in b:
+ if i not in a:
+ c[i] = b[i]
+ for i in a:
+ if i in b:
+ c[i] = aslist(a[i]) + aslist(b[i])
+
+ return c
+
+def SubLoader(loader):
+ return Loader(loader.ctx, schemagraph=loader.graph, foreign_properties=loader.foreign_properties, idx=loader.idx, cache=loader.cache)
+
+class Loader(object):
+ def __init__(self, ctx, schemagraph=None, foreign_properties=None, idx=None, cache=None):
+ normalize = lambda url: urlparse.urlsplit(url).geturl()
+ if idx is not None:
+ self.idx = idx
+ else:
+ self.idx = NormDict(normalize)
+
+ self.ctx = {}
+ if schemagraph is not None:
+ self.graph = schemagraph
+ else:
+ self.graph = rdflib.Graph()
+
+ if foreign_properties is not None:
+ self.foreign_properties = foreign_properties
+ else:
+ self.foreign_properties = set()
+
+ if cache is not None:
+ self.cache = cache
+ else:
+ self.cache = {}
+
+ self.url_fields = set()
+ self.vocab_fields = set()
+ self.identifiers = set()
+ self.identity_links = set()
+ self.standalone = set()
+ self.nolinkcheck = set()
+ self.vocab = {}
+ self.rvocab = {}
+
+ self.add_context(ctx)
+
+ def expand_url(self, url, base_url, scoped=False, vocab_term=False):
+ if url in ("@id", "@type"):
+ return url
+
+ if vocab_term and url in self.vocab:
+ return url
+
+ if self.vocab and ":" in url:
+ prefix = url.split(":")[0]
+ if prefix in self.vocab:
+ url = self.vocab[prefix] + url[len(prefix)+1:]
+
+ split = urlparse.urlsplit(url)
+
+ if split.scheme or url.startswith("$(") or url.startswith("${"):
+ pass
+ elif scoped and not split.fragment:
+ splitbase = urlparse.urlsplit(base_url)
+ frg = ""
+ if splitbase.fragment:
+ frg = splitbase.fragment + "/" + split.path
+ else:
+ frg = split.path
+ url = urlparse.urlunsplit((splitbase.scheme, splitbase.netloc, splitbase.path, splitbase.query, frg))
+ else:
+ url = urlparse.urljoin(base_url, url)
+
+ if vocab_term and url in self.rvocab:
+ return self.rvocab[url]
+ else:
+ return url
+
+ def _add_properties(self, s):
+ for _, _, rng in self.graph.triples( (s, RDFS.range, None) ):
+ literal = ((str(rng).startswith("http://www.w3.org/2001/XMLSchema#") and not str(rng) == "http://www.w3.org/2001/XMLSchema#anyURI") or
+ str(rng) == "http://www.w3.org/2000/01/rdf-schema#Literal")
+ if not literal:
+ self.url_fields.add(str(s))
+ self.foreign_properties.add(str(s))
+
+ def add_namespaces(self, ns):
+ self.vocab.update(ns)
+
+ def add_schemas(self, ns, base_url):
+ for sch in aslist(ns):
+ try:
+ self.graph.parse(urlparse.urljoin(base_url, sch), format="xml")
+ except xml.sax.SAXParseException:
+ self.graph.parse(urlparse.urljoin(base_url, sch), format="turtle")
+
+ for s, _, _ in self.graph.triples( (None, RDF.type, RDF.Property) ):
+ self._add_properties(s)
+ for s, _, o in self.graph.triples( (None, RDFS.subPropertyOf, None) ):
+ self._add_properties(s)
+ self._add_properties(o)
+ for s, _, _ in self.graph.triples( (None, RDFS.range, None) ):
+ self._add_properties(s)
+ for s, _, _ in self.graph.triples( (None, RDF.type, OWL.ObjectProperty) ):
+ self._add_properties(s)
+
+ for s, _, _ in self.graph.triples( (None, None, None) ):
+ self.idx[str(s)] = True
+
+
+ def add_context(self, newcontext, baseuri=""):
+ if self.vocab:
+ raise validate.ValidationException("Refreshing context that already has stuff in it")
+
+ self.url_fields = set()
+ self.vocab_fields = set()
+ self.identifiers = set()
+ self.identity_links = set()
+ self.standalone = set()
+ self.nolinkcheck = set()
+ self.vocab = {}
+ self.rvocab = {}
+
+ self.ctx.update({k: v for k,v in newcontext.iteritems() if k != "@context"})
+
+ _logger.debug("ctx is %s", self.ctx)
+
+ for c in self.ctx:
+ if self.ctx[c] == "@id":
+ self.identifiers.add(c)
+ self.identity_links.add(c)
+ elif isinstance(self.ctx[c], dict) and self.ctx[c].get("@type") == "@id":
+ self.url_fields.add(c)
+ if self.ctx[c].get("identity", False):
+ self.identity_links.add(c)
+ elif isinstance(self.ctx[c], dict) and self.ctx[c].get("@type") == "@vocab":
+ self.url_fields.add(c)
+ self.vocab_fields.add(c)
+
+ if isinstance(self.ctx[c], dict) and self.ctx[c].get("noLinkCheck"):
+ self.nolinkcheck.add(c)
+
+ if isinstance(self.ctx[c], dict) and "@id" in self.ctx[c]:
+ self.vocab[c] = self.ctx[c]["@id"]
+ elif isinstance(self.ctx[c], basestring):
+ self.vocab[c] = self.ctx[c]
+
+ for k,v in self.vocab.items():
+ self.rvocab[self.expand_url(v, "", scoped=False)] = k
+
+ _logger.debug("identifiers is %s", self.identifiers)
+ _logger.debug("identity_links is %s", self.identity_links)
+ _logger.debug("url_fields is %s", self.url_fields)
+ _logger.debug("vocab_fields is %s", self.vocab_fields)
+ _logger.debug("vocab is %s", self.vocab)
+
+
+ def resolve_ref(self, ref, base_url=None):
+ base_url = base_url or 'file://%s/' % os.path.abspath('.')
+
+ obj = None
+ inc = False
+ merge = None
+
+ # If `ref` is a dict, look for special directives.
+ if isinstance(ref, dict):
+ obj = ref
+ if "$import" in ref:
+ if len(obj) == 1:
+ ref = obj["$import"]
+ obj = None
+ else:
+ raise ValueError("'$import' must be the only field in %s" % (str(obj)))
+ elif "$include" in obj:
+ if len(obj) == 1:
+ ref = obj["$include"]
+ inc = True
+ obj = None
+ else:
+ raise ValueError("'$include' must be the only field in %s" % (str(obj)))
+ else:
+ ref = None
+ for identifier in self.identifiers:
+ if identifier in obj:
+ ref = obj[identifier]
+ break
+ if not ref:
+ raise ValueError("Object `%s` does not have identifier field in %s" % (obj, self.identifiers))
+
+ if not isinstance(ref, basestring):
+ raise ValueError("Must be string: `%s`" % str(ref))
+
+ url = self.expand_url(ref, base_url, scoped=(obj is not None))
+
+ # Has this reference been loaded already?
+ if url in self.idx:
+ if merge:
+ obj = self.idx[url].copy()
+ else:
+ return self.idx[url], {}
+
+ # "$include" directive means load raw text
+ if inc:
+ return self.fetch_text(url), {}
+
+ if obj:
+ for identifier in self.identifiers:
+ obj[identifier] = url
+ doc_url = url
+ else:
+ # Load structured document
+ doc_url, frg = urlparse.urldefrag(url)
+ if doc_url in self.idx:
+ raise validate.ValidationException("Reference `#%s` not found in file `%s`." % (frg, doc_url))
+ obj = self.fetch(doc_url)
+
+ # Recursively expand urls and resolve directives
+ obj, metadata = self.resolve_all(obj, doc_url)
+
+ # Requested reference should be in the index now, otherwise it's a bad reference
+ if url is not None:
+ if url in self.idx:
+ obj = self.idx[url]
+ else:
+ raise RuntimeError("Reference `%s` is not in the index. Index contains:\n %s" % (url, "\n ".join(self.idx)))
+
+ if "$graph" in obj:
+ metadata = {k: v for k,v in obj.items() if k != "$graph"}
+ obj = obj["$graph"]
+ return obj, metadata
+ else:
+ return obj, metadata
+
+ def resolve_all(self, document, base_url, file_base=None):
+ loader = self
+ metadata = {}
+ if file_base is None:
+ file_base = base_url
+
+ if isinstance(document, dict):
+ # Handle $import and $include
+ if ('$import' in document or '$include' in document):
+ return self.resolve_ref(document, file_base)
+ elif isinstance(document, list):
+ pass
+ else:
+ return document, metadata
+
+ newctx = None
+ if isinstance(document, dict):
+ # Handle $base, $profile, $namespaces, $schemas and $graph
+ if "$base" in document:
+ base_url = document["$base"]
+
+ if "$profile" in document:
+ if not newctx:
+ newctx = SubLoader(self)
+ prof = self.fetch(document["$profile"])
+ newctx.add_namespaces(document.get("$namespaces", {}), document["$profile"])
+ newctx.add_schemas(document.get("$schemas", []), document["$profile"])
+
+ if "$namespaces" in document:
+ if not newctx:
+ newctx = SubLoader(self)
+ newctx.add_namespaces(document["$namespaces"])
+
+ if "$schemas" in document:
+ if not newctx:
+ newctx = SubLoader(self)
+ newctx.add_schemas(document["$schemas"], file_base)
+
+ if newctx:
+ loader = newctx
+
+ if "$graph" in document:
+ metadata = {k: v for k,v in document.items() if k != "$graph"}
+ document = document["$graph"]
+ metadata, _ = loader.resolve_all(metadata, base_url, file_base)
+
+ if isinstance(document, dict):
+ for identifer in loader.identity_links:
+ if identifer in document:
+ if isinstance(document[identifer], basestring):
+ document[identifer] = loader.expand_url(document[identifer], base_url, scoped=True)
+ if document[identifer] not in loader.idx or isinstance(loader.idx[document[identifer]], basestring):
+ loader.idx[document[identifer]] = document
+ base_url = document[identifer]
+ elif isinstance(document[identifer], list):
+ for n, v in enumerate(document[identifer]):
+ document[identifer][n] = loader.expand_url(document[identifer][n], base_url, scoped=True)
+ if document[identifer][n] not in loader.idx:
+ loader.idx[document[identifer][n]] = document[identifer][n]
+
+ for d in document:
+ d2 = loader.expand_url(d, "", scoped=False, vocab_term=True)
+ if d != d2:
+ document[d2] = document[d]
+ del document[d]
+
+ for d in loader.url_fields:
+ if d in document:
+ if isinstance(document[d], basestring):
+ document[d] = loader.expand_url(document[d], base_url, scoped=False, vocab_term=(d in loader.vocab_fields))
+ elif isinstance(document[d], list):
+ document[d] = [loader.expand_url(url, base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) if isinstance(url, basestring) else url for url in document[d] ]
+
+ try:
+ for key, val in document.items():
+ document[key], _ = loader.resolve_all(val, base_url, file_base)
+ except validate.ValidationException as v:
+ _logger.debug("loader is %s", id(loader))
+ raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (id(loader), file_base, key, validate.indent(str(v))))
+
+ elif isinstance(document, list):
+ i = 0
+ try:
+ while i < len(document):
+ val = document[i]
+ if isinstance(val, dict) and "$import" in val:
+ l, _ = loader.resolve_ref(val, file_base)
+ if isinstance(l, list):
+ del document[i]
+ for item in aslist(l):
+ document.insert(i, item)
+ i += 1
+ else:
+ document[i] = l
+ i += 1
+ else:
+ document[i], _ = loader.resolve_all(val, base_url, file_base)
+ i += 1
+ except validate.ValidationException as v:
+ raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (id(loader), file_base, i, validate.indent(str(v))))
+
+ for identifer in loader.identity_links:
+ if identifer in metadata:
+ if isinstance(metadata[identifer], basestring):
+ metadata[identifer] = loader.expand_url(metadata[identifer], base_url, scoped=True)
+ loader.idx[metadata[identifer]] = document
+
+ return document, metadata
+
+ def fetch_text(self, url):
+ if url in self.cache:
+ return self.cache[url]
+
+ split = urlparse.urlsplit(url)
+ scheme, path = split.scheme, split.path
+
+ if scheme in ['http', 'https'] and requests:
+ try:
+ resp = requests.get(url)
+ resp.raise_for_status()
+ except Exception as e:
+ raise RuntimeError(url, e)
+ return resp.text
+ elif scheme == 'file':
+ try:
+ with open(path) as fp:
+ return fp.read().decode("utf-8")
+ except (OSError, IOError) as e:
+ raise RuntimeError('Error reading %s %s' % (url, e))
+ else:
+ raise ValueError('Unsupported scheme in url: %s' % url)
+
+ def fetch(self, url):
+ if url in self.idx:
+ return self.idx[url]
+ try:
+ text = StringIO.StringIO(self.fetch_text(url))
+ text.name = url
+ result = yaml.load(text)
+ except yaml.parser.ParserError as e:
+ raise validate.ValidationException("Syntax error %s" % (e))
+ if isinstance(result, dict) and self.identifiers:
+ for identifier in self.identifiers:
+ if identifier not in result:
+ result[identifier] = url
+ self.idx[self.expand_url(result[identifier], url)] = result
+ else:
+ self.idx[url] = result
+ return result
+
+ def check_file(self, fn):
+ if fn.startswith("file://"):
+ u = urlparse.urlsplit(fn)
+ return os.path.exists(u.path)
+ else:
+ return False
+
+ def validate_link(self, field, link):
+ if field in self.nolinkcheck:
+ return True
+ if isinstance(link, basestring):
+ if field in self.vocab_fields:
+ if link not in self.vocab and link not in self.idx and link not in self.rvocab:
+ if not self.check_file(link):
+ raise validate.ValidationException("Field `%s` contains undefined reference to `%s`" % (field, link))
+ elif link not in self.idx and link not in self.rvocab:
+ if not self.check_file(link):
+ raise validate.ValidationException("Field `%s` contains undefined reference to `%s`" % (field, link))
+ elif isinstance(link, list):
+ errors = []
+ for i in link:
+ try:
+ self.validate_link(field, i)
+ except validate.ValidationException as v:
+ errors.append(v)
+ if errors:
+ raise validate.ValidationException("\n".join([str(e) for e in errors]))
+ elif isinstance(link, dict):
+ self.validate_links(link)
+ return True
+
+ def getid(self, d):
+ if isinstance(d, dict):
+ for i in self.identifiers:
+ if i in d:
+ if isinstance(d[i], basestring):
+ return d[i]
+ return None
+
+ def validate_links(self, document):
+ docid = self.getid(document)
+ if docid is None:
+ docid = ""
+
+ errors = []
+ if isinstance(document, list):
+ iterator = enumerate(document)
+ elif isinstance(document, dict):
+ try:
+ for d in self.url_fields:
+ if d not in self.identity_links and d in document:
+ self.validate_link(d, document[d])
+ except validate.ValidationException as v:
+ errors.append(v)
+ iterator = document.iteritems()
+ else:
+ return
+
+ for key, val in iterator:
+ try:
+ self.validate_links(val)
+ except validate.ValidationException as v:
+ if key not in self.nolinkcheck:
+ docid = self.getid(val)
+ if docid:
+ errors.append(validate.ValidationException("While checking object `%s`\n%s" % (docid, validate.indent(str(v)))))
+ else:
+ if isinstance(key, basestring):
+ errors.append(validate.ValidationException("While checking field `%s`\n%s" % (key, validate.indent(str(v)))))
+ else:
+ errors.append(validate.ValidationException("While checking position %s\n%s" % (key, validate.indent(str(v)))))
+
+ if errors:
+ if len(errors) > 1:
+ raise validate.ValidationException("\n".join([str(e) for e in errors]))
+ else:
+ raise errors[0]
+ return
diff --git a/schema_salad/schema.py b/schema_salad/schema.py
new file mode 100644
index 0000000..afa714a
--- /dev/null
+++ b/schema_salad/schema.py
@@ -0,0 +1,392 @@
+import avro
+import copy
+from makedoc import add_dictlist
+import sys
+import pprint
+from pkg_resources import resource_stream
+import yaml
+import avro.schema
+import validate
+import json
+import urlparse
+import ref_resolver
+from flatten import flatten
+import logging
+from aslist import aslist
+import jsonld_context
+import schema_salad.schema
+
+_logger = logging.getLogger("salad")
+
+salad_files = ('metaschema.yml',
+ 'salad.md',
+ 'field_name.yml',
+ 'import_include.md',
+ 'link_res.yml',
+ 'ident_res.yml',
+ 'vocab_res.yml',
+ 'vocab_res.yml',
+ 'field_name_schema.yml',
+ 'field_name_src.yml',
+ 'field_name_proc.yml',
+ 'ident_res_schema.yml',
+ 'ident_res_src.yml',
+ 'ident_res_proc.yml',
+ 'link_res_schema.yml',
+ 'link_res_src.yml',
+ 'link_res_proc.yml',
+ 'vocab_res_schema.yml',
+ 'vocab_res_src.yml',
+ 'vocab_res_proc.yml')
+
+def get_metaschema():
+ loader = ref_resolver.Loader({
+ "Any": "https://w3id.org/cwl/salad#Any",
+ "ArraySchema": "https://w3id.org/cwl/salad#ArraySchema",
+ "DocType": "https://w3id.org/cwl/salad#DocType",
+ "Documentation": "https://w3id.org/cwl/salad#Documentation",
+ "EnumSchema": "https://w3id.org/cwl/salad#EnumSchema",
+ "JsonldPredicate": "https://w3id.org/cwl/salad#JsonldPredicate",
+ "NamedType": "https://w3id.org/cwl/salad#NamedType",
+ "RecordField": "https://w3id.org/cwl/salad#RecordField",
+ "RecordSchema": "https://w3id.org/cwl/salad#RecordSchema",
+ "SaladEnumSchema": "https://w3id.org/cwl/salad#SaladEnumSchema",
+ "SaladRecordField": "https://w3id.org/cwl/salad#SaladRecordField",
+ "SaladRecordSchema": "https://w3id.org/cwl/salad#SaladRecordSchema",
+ "SchemaDefinedType": "https://w3id.org/cwl/salad#SchemaDefinedType",
+ "SpecializeDef": "https://w3id.org/cwl/salad#SpecializeDef",
+ "_container": "https://w3id.org/cwl/salad#JsonldPredicate/_container",
+ "_id": {
+ "@id": "https://w3id.org/cwl/salad#_id",
+ "@type": "@id",
+ "identity": True
+ },
+ "_type": "https://w3id.org/cwl/salad#JsonldPredicate/_type",
+ "abstract": "https://w3id.org/cwl/salad#SaladRecordSchema/abstract",
+ "array": "https://w3id.org/cwl/salad#array",
+ "boolean": "http://www.w3.org/2001/XMLSchema#boolean",
+ "dct": "http://purl.org/dc/terms/",
+ "doc": "sld:doc",
+ "docAfter": {
+ "@id": "https://w3id.org/cwl/salad#docAfter",
+ "@type": "@id"
+ },
+ "docParent": {
+ "@id": "https://w3id.org/cwl/salad#docParent",
+ "@type": "@id"
+ },
+ "docChild": {
+ "@id": "https://w3id.org/cwl/salad#docChild",
+ "@type": "@id"
+ },
+ "documentRoot": "https://w3id.org/cwl/salad#SchemaDefinedType/documentRoot",
+ "documentation": "https://w3id.org/cwl/salad#documentation",
+ "double": "http://www.w3.org/2001/XMLSchema#double",
+ "enum": "https://w3id.org/cwl/salad#enum",
+ "extends": {
+ "@id": "https://w3id.org/cwl/salad#extends",
+ "@type": "@id"
+ },
+ "fields": "sld:fields",
+ "float": "http://www.w3.org/2001/XMLSchema#float",
+ "identity": "https://w3id.org/cwl/salad#JsonldPredicate/identity",
+ "int": "http://www.w3.org/2001/XMLSchema#int",
+ "items": {
+ "@id": "https://w3id.org/cwl/salad#items",
+ "@type": "@vocab"
+ },
+ "jsonldPredicate": "sld:jsonldPredicate",
+ "long": "http://www.w3.org/2001/XMLSchema#long",
+ "name": "@id",
+ "noLinkCheck": "https://w3id.org/cwl/salad#JsonldPredicate/noLinkCheck",
+ "null": "https://w3id.org/cwl/salad#null",
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "record": "https://w3id.org/cwl/salad#record",
+ "sld": "https://w3id.org/cwl/salad#",
+ "specialize": "https://w3id.org/cwl/salad#SaladRecordSchema/specialize",
+ "specializeFrom": {
+ "@id": "https://w3id.org/cwl/salad#specializeFrom",
+ "@type": "@id"
+ },
+ "specializeTo": {
+ "@id": "https://w3id.org/cwl/salad#specializeTo",
+ "@type": "@id"
+ },
+ "string": "http://www.w3.org/2001/XMLSchema#string",
+ "symbols": {
+ "@id": "https://w3id.org/cwl/salad#symbols",
+ "@type": "@id",
+ "identity": True
+ },
+ "type": {
+ "@id": "https://w3id.org/cwl/salad#type",
+ "@type": "@vocab"
+ },
+ "xsd": "http://www.w3.org/2001/XMLSchema#"
+ })
+
+ for f in salad_files:
+ rs = resource_stream(__name__, 'metaschema/' + f)
+ loader.cache["https://w3id.org/cwl/" + f] = rs.read()
+ rs.close()
+
+ rs = resource_stream(__name__, 'metaschema/metaschema.yml')
+ loader.cache["https://w3id.org/cwl/salad"] = rs.read()
+ rs.close()
+
+ j = yaml.load(loader.cache["https://w3id.org/cwl/salad"])
+ j, _ = loader.resolve_all(j, "https://w3id.org/cwl/salad#")
+
+ #pprint.pprint(j)
+
+ (sch_names, sch_obj) = make_avro_schema(j, loader)
+ if isinstance(sch_names, Exception):
+ _logger.error("Metaschema error, avro was:\n%s", json.dumps(sch_obj, indent=4))
+ raise sch_names
+ validate_doc(sch_names, j, loader, strict=True)
+ return (sch_names, j, loader)
+
+def load_schema(schema_ref, cache=None):
+ metaschema_names, metaschema_doc, metaschema_loader = get_metaschema()
+ if cache is not None:
+ metaschema_loader.cache = cache
+ schema_doc, schema_metadata = metaschema_loader.resolve_ref(schema_ref, "")
+
+ validate_doc(metaschema_names, schema_doc, metaschema_loader, True)
+ metactx = schema_metadata.get("@context", {})
+ metactx.update(schema_metadata.get("$namespaces", {}))
+ (schema_ctx, rdfs) = jsonld_context.salad_to_jsonld_context(schema_doc, metactx)
+
+ # Create the loader that will be used to load the target document.
+ document_loader = ref_resolver.Loader(schema_ctx, cache=cache)
+
+ # Make the Avro validation that will be used to validate the target document
+ (avsc_names, avsc_obj) = schema_salad.schema.make_avro_schema(schema_doc, document_loader)
+
+ return document_loader, avsc_names, schema_metadata
+
+def load_and_validate(document_loader, avsc_names, document, strict):
+ if isinstance(document, dict):
+ data, metadata = document_loader.resolve_all(document, document["id"])
+ else:
+ data, metadata = document_loader.resolve_ref(document)
+
+ document_loader.validate_links(data)
+ validate_doc(avsc_names, data, document_loader, strict)
+ return data, metadata
+
+def validate_doc(schema_names, validate_doc, loader, strict):
+ has_root = False
+ for r in schema_names.names.values():
+ if r.get_prop("documentRoot"):
+ has_root = True
+ break
+
+ if not has_root:
+ raise validate.ValidationException("No document roots defined in the schema")
+
+ if isinstance(validate_doc, list):
+ pass
+ elif isinstance(validate_doc, dict):
+ validate_doc = [validate_doc]
+ else:
+ raise validate.ValidationException("Document must be dict or list")
+
+ anyerrors = []
+ for pos, item in enumerate(validate_doc):
+ errors = []
+ success = False
+ for r in schema_names.names.values():
+ if r.get_prop("documentRoot"):
+ try:
+ validate.validate_ex(r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties)
+ success = True
+ break
+ except validate.ValidationException as e:
+ errors.append("Could not validate as `%s` because\n%s" % (r.get_prop("name"), validate.indent(str(e), nolead=False)))
+ if not success:
+ objerr = "Validation error at position %i" % pos
+ for ident in loader.identifiers:
+ if ident in item:
+ objerr = "Validation error in object %s" % (item[ident])
+ break
+ anyerrors.append("%s\n%s" % (objerr, validate.indent("\n".join(errors))))
+ if anyerrors:
+ raise validate.ValidationException("\n".join(anyerrors))
+
+
+def replace_type(items, spec, loader, found):
+ """ Go through and replace types in the 'spec' mapping"""
+
+ items = copy.deepcopy(items)
+ if isinstance(items, dict):
+ # recursively check these fields for types to replace
+ if "type" in items and items["type"] in ("record", "enum"):
+ if items.get("name"):
+ if items["name"] in found:
+ return items["name"]
+ else:
+ found.add(items["name"])
+
+ for n in ("type", "items", "fields"):
+ if n in items:
+ items[n] = replace_type(items[n], spec, loader, found)
+ if isinstance(items[n], list):
+ items[n] = flatten(items[n])
+
+ return items
+ elif isinstance(items, list):
+ # recursively transform list
+ return [replace_type(i, spec, loader, found) for i in items]
+ elif isinstance(items, basestring):
+ # found a string which is a symbol corresponding to a type.
+ replace_with = None
+ if items in loader.vocab:
+ # If it's a vocabulary term, first expand it to its fully qualified URI
+ items = loader.vocab[items]
+
+ if items in spec:
+ # Look up in specialization map
+ replace_with = spec[items]
+
+ if replace_with:
+ return replace_type(replace_with, spec, loader, found)
+ return items
+
+def avro_name(url):
+ doc_url, frg = urlparse.urldefrag(url)
+ if frg:
+ if '/' in frg:
+ return frg[frg.rindex('/')+1:]
+ else:
+ return frg
+ return url
+
+def make_valid_avro(items, alltypes, found, union=False):
+ items = copy.deepcopy(items)
+ if isinstance(items, dict):
+ if items.get("name"):
+ items["name"] = avro_name(items["name"])
+
+ if "type" in items and items["type"] in ("https://w3id.org/cwl/salad#record", "https://w3id.org/cwl/salad#enum", "record", "enum"):
+ if items.get("abstract"):
+ return items
+ if not items.get("name"):
+ raise Exception("Named schemas must have a non-empty name: %s" % items)
+
+ if items["name"] in found:
+ return items["name"]
+ else:
+ found.add(items["name"])
+ for n in ("type", "items", "values", "fields"):
+ if n in items:
+ items[n] = make_valid_avro(items[n], alltypes, found, union=True)
+ if "symbols" in items:
+ items["symbols"] = [avro_name(sym) for sym in items["symbols"]]
+ return items
+ if isinstance(items, list):
+ n = []
+ for i in items:
+ n.append(make_valid_avro(i, alltypes, found, union=union))
+ return n
+ if union and isinstance(items, basestring):
+ if items in alltypes and avro_name(items) not in found:
+ return make_valid_avro(alltypes[items], alltypes, found, union=union)
+ items = avro_name(items)
+ return items
+
+
+def extend_and_specialize(items, loader):
+ """Apply 'extend' and 'specialize' to fully materialize derived record
+ types."""
+
+ types = {t["name"]: t for t in items}
+ n = []
+
+ for t in items:
+ t = copy.deepcopy(t)
+ if "extends" in t:
+ if "specialize" in t:
+ spec = {sp["specializeFrom"]: sp["specializeTo"] for sp in aslist(t["specialize"])}
+ else:
+ spec = {}
+
+ exfields = []
+ exsym = []
+ for ex in aslist(t["extends"]):
+ if ex not in types:
+ raise Exception("Extends %s in %s refers to invalid base type" % (t["extends"], t["name"]))
+
+ basetype = copy.deepcopy(types[ex])
+
+ if t["type"] == "record":
+ if spec:
+ basetype["fields"] = replace_type(basetype.get("fields", []), spec, loader, set())
+
+ for f in basetype.get("fields", []):
+ if "inherited_from" not in f:
+ f["inherited_from"] = ex
+
+ exfields.extend(basetype.get("fields", []))
+ elif t["type"] == "enum":
+ exsym.extend(basetype.get("symbols", []))
+
+ if t["type"] == "record":
+ exfields.extend(t.get("fields", []))
+ t["fields"] = exfields
+
+ fieldnames = set()
+ for field in t["fields"]:
+ if field["name"] in fieldnames:
+ raise validate.ValidationException("Field name %s appears twice in %s" % (field["name"], t["name"]))
+ else:
+ fieldnames.add(field["name"])
+
+ for y in [x for x in t["fields"] if x["name"] == "class"]:
+ y["type"] = {"type": "enum",
+ "symbols": [r["name"]],
+ "name": r["name"]+"_class",
+ }
+ y["doc"] = "Must be `%s` to indicate this is a %s object." % (r["name"], r["name"])
+ elif t["type"] == "enum":
+ exsym.extend(t.get("symbols", []))
+ t["symbol"] = exsym
+
+ types[t["name"]] = t
+
+ n.append(t)
+
+ ex_types = {t["name"]: t for t in n}
+
+ extended_by = {}
+ for t in n:
+ if "extends" in t:
+ for ex in aslist(t["extends"]):
+ if ex_types[ex].get("abstract"):
+ add_dictlist(extended_by, ex, ex_types[t["name"]])
+ add_dictlist(extended_by, avro_name(ex), ex_types[ex])
+
+ for t in n:
+ if "fields" in t:
+ t["fields"] = replace_type(t["fields"], extended_by, loader, set())
+
+ return n
+
+def make_avro_schema(j, loader):
+ names = avro.schema.Names()
+
+ #pprint.pprint(j)
+
+ j = extend_and_specialize(j, loader)
+
+ j2 = make_valid_avro(j, {t["name"]: t for t in j}, set())
+
+ j3 = [t for t in j2 if isinstance(t, dict) and not t.get("abstract") and t.get("type") != "documentation"]
+
+ try:
+ avro.schema.make_avsc_object(j3, names)
+ except avro.schema.SchemaParseException as e:
+ names = e
+
+ return (names, j3)
diff --git a/schema_salad/validate.py b/schema_salad/validate.py
new file mode 100644
index 0000000..2e629e1
--- /dev/null
+++ b/schema_salad/validate.py
@@ -0,0 +1,174 @@
+import pprint
+import avro.schema
+import yaml
+import urlparse
+
+class ValidationException(Exception):
+ pass
+
+def validate(expected_schema, datum, identifiers=[], strict=False, foreign_properties=set()):
+ try:
+ return validate_ex(expected_schema, datum, identifiers, strict=strict, foreign_properties=foreign_properties)
+ except ValidationException:
+ return False
+
+INT_MIN_VALUE = -(1 << 31)
+INT_MAX_VALUE = (1 << 31) - 1
+LONG_MIN_VALUE = -(1 << 63)
+LONG_MAX_VALUE = (1 << 63) - 1
+
+def indent(v, nolead=False):
+ if nolead:
+ return v.splitlines()[0] + "\n".join([" " + l for l in v.splitlines()[1:]])
+ else:
+ return "\n".join([" " + l for l in v.splitlines()])
+
+def friendly(v):
+ if isinstance(v, avro.schema.NamedSchema):
+ return v.name
+ if isinstance(v, avro.schema.ArraySchema):
+ return "array of <%s>" % friendly(v.items)
+ elif isinstance(v, avro.schema.PrimitiveSchema):
+ return v.type
+ elif isinstance(v, avro.schema.UnionSchema):
+ return " or ".join([friendly(s) for s in v.schemas])
+ else:
+ return v
+
+def multi(v, q=""):
+ if '\n' in v:
+ return "%s%s%s\n" % (q, v, q)
+ else:
+ return "%s%s%s" % (q, v, q)
+
+def vpformat(datum):
+ a = pprint.pformat(datum)
+ if len(a) > 160:
+ a = a[0:160] + "[...]"
+ return a
+
+def validate_ex(expected_schema, datum, identifiers=set(), strict=False, foreign_properties=set()):
+ """Determine if a python datum is an instance of a schema."""
+
+ schema_type = expected_schema.type
+
+ if schema_type == 'null':
+ if datum is None:
+ return True
+ else:
+ raise ValidationException("the value `%s` is not null" % vpformat(datum))
+ elif schema_type == 'boolean':
+ if isinstance(datum, bool):
+ return True
+ else:
+ raise ValidationException("the value `%s` is not boolean" % vpformat(datum))
+ elif schema_type == 'string':
+ if isinstance(datum, basestring):
+ return True
+ else:
+ raise ValidationException("the value `%s` is not string" % vpformat(datum))
+ elif schema_type == 'bytes':
+ if isinstance(datum, str):
+ return True
+ else:
+ raise ValidationException("the value `%s` is not bytes" % vpformat(datum))
+ elif schema_type == 'int':
+ if ((isinstance(datum, int) or isinstance(datum, long))
+ and INT_MIN_VALUE <= datum <= INT_MAX_VALUE):
+ return True
+ else:
+ raise ValidationException("`%s` is not int" % vpformat(datum))
+ elif schema_type == 'long':
+ if ((isinstance(datum, int) or isinstance(datum, long))
+ and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE):
+ return True
+ else:
+ raise ValidationException("the value `%s` is not long" % vpformat(datum))
+ elif schema_type in ['float', 'double']:
+ if (isinstance(datum, int) or isinstance(datum, long)
+ or isinstance(datum, float)):
+ return True
+ else:
+ raise ValidationException("the value `%s` is not float or double" % vpformat(datum))
+ elif schema_type == 'fixed':
+ if isinstance(datum, str) and len(datum) == expected_schema.size:
+ return True
+ else:
+ raise ValidationException("the value `%s` is not fixed" % vpformat(datum))
+ elif schema_type == 'enum':
+ if expected_schema.name == "Any":
+ if datum is not None:
+ return True
+ else:
+ raise ValidationException("Any type must be non-null")
+ if datum in expected_schema.symbols:
+ return True
+ else:
+ raise ValidationException("the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat(datum), expected_schema.name, "'" + "', '".join(expected_schema.symbols) + "'"))
+ elif schema_type == 'array':
+ if isinstance(datum, list):
+ for i, d in enumerate(datum):
+ try:
+ validate_ex(expected_schema.items, d, identifiers, strict=strict, foreign_properties=foreign_properties)
+ except ValidationException as v:
+ raise ValidationException("At position %i\n%s" % (i, indent(str(v))))
+ return True
+ else:
+ raise ValidationException("the value `%s` is not a list, expected list of %s" % (vpformat(datum), friendly(expected_schema.items)))
+ elif schema_type == 'map':
+ if (isinstance(datum, dict) and
+ False not in [isinstance(k, basestring) for k in datum.keys()] and
+ False not in [validate(expected_schema.values, v, strict=strict) for v in datum.values()]):
+ return True
+ else:
+ raise ValidationException("`%s` is not a valid map value, expected\n %s" % (vpformat(datum), vpformat(expected_schema.values)))
+ elif schema_type in ['union', 'error_union']:
+ if True in [validate(s, datum, identifiers, strict=strict) for s in expected_schema.schemas]:
+ return True
+ else:
+ errors = []
+ for s in expected_schema.schemas:
+ try:
+ validate_ex(s, datum, identifiers, strict=strict, foreign_properties=foreign_properties)
+ except ValidationException as e:
+ errors.append(str(e))
+ raise ValidationException("the value %s is not a valid type in the union, expected one of:\n%s" % (multi(vpformat(datum), '`'),
+ "\n".join(["- %s, but\n %s" % (friendly(expected_schema.schemas[i]), indent(multi(errors[i]))) for i in range(0, len(expected_schema.schemas))])))
+
+ elif schema_type in ['record', 'error', 'request']:
+ if not isinstance(datum, dict):
+ raise ValidationException("`%s`\n is not a dict" % vpformat(datum))
+
+ errors = []
+ for f in expected_schema.fields:
+ if f.name in datum:
+ fieldval = datum[f.name]
+ else:
+ fieldval = f.default
+
+ try:
+ validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties)
+ except ValidationException as v:
+ if f.name not in datum:
+ errors.append("missing required field `%s`" % f.name)
+ else:
+ errors.append("could not validate field `%s` because\n%s" % (f.name, multi(indent(str(v)))))
+ if strict:
+ for d in datum:
+ found = False
+ for f in expected_schema.fields:
+ if d == f.name:
+ found = True
+ if not found:
+ if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"):
+ split = urlparse.urlsplit(d)
+ if split.scheme:
+ errors.append("could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d))
+ else:
+ errors.append("could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d, ", ".join(fn.name for fn in expected_schema.fields)))
+
+ if errors:
+ raise ValidationException("\n".join(errors))
+ else:
+ return True
+ raise ValidationException("Unrecognized schema_type %s" % schema_type)
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..70b215d
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,5 @@
+[egg_info]
+tag_build = .20160202222448
+tag_date = 0
+tag_svn_revision = 0
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..9efd92b
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import setuptools.command.egg_info as egg_info_cmd
+import shutil
+
+from setuptools import setup, find_packages
+
+SETUP_DIR = os.path.dirname(__file__)
+README = os.path.join(SETUP_DIR, 'README.rst')
+
+try:
+ import gittaggers
+ tagger = gittaggers.EggInfoFromGit
+except ImportError:
+ tagger = egg_info_cmd.egg_info
+
+setup(name='schema-salad',
+ version='1.6',
+ description='Schema Annotations for Linked Avro Data (SALAD)',
+ long_description=open(README).read(),
+ author='Common workflow language working group',
+ author_email='common-workflow-language at googlegroups.com',
+ url="https://github.com/common-workflow-language/common-workflow-language",
+ download_url="https://github.com/common-workflow-language/common-workflow-language",
+ license='Apache 2.0',
+ packages=["schema_salad"],
+ package_data={'schema_salad': ['metaschema/*']},
+ install_requires=[
+ 'requests',
+ 'PyYAML',
+ 'avro',
+ 'rdflib >= 4.2.0',
+ 'rdflib-jsonld >= 0.3.0',
+ 'mistune'
+ ],
+ test_suite='tests',
+ tests_require=[],
+ entry_points={
+ 'console_scripts': [ "schema-salad-tool=schema_salad.main:main" ]
+ },
+ zip_safe=True,
+ cmdclass={'egg_info': tagger},
+)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-schema-salad.git
More information about the debian-med-commit
mailing list