[Python-modules-commits] [python-pyld] 03/276: Updated jsonld to work with new algorithm. Updated to match forge json-ld on the json-ld branch up to commit 225c5b50014d0ca5743a196f34b66223aae966dd. Still need to update with the few newever commits left. Runs and passes most tests. Fails on the following tests: Normalize: - reordered 4 bnodes, reordered 2 properties (1) - reordered 4 bnodes, reordered 2 properties (2) - evil (1) - evil (2) - evil (3) - deep diff (1) - deep diff (2)
Wolfgang Borgert
debacle at moszumanska.debian.org
Wed Oct 8 23:47:47 UTC 2014
This is an automated email from the git hooks/post-receive script.
debacle pushed a commit to branch master
in repository python-pyld.
commit 041a668c067723b7e20c302ddbe6247bcc6876d0
Author: Mike Johnson <mjohnson at digitalbazaar.com>
Date: Thu Jul 21 16:50:31 2011 -0400
Updated jsonld to work with new algorithm.
Updated to match forge json-ld on the json-ld branch up to commit
225c5b50014d0ca5743a196f34b66223aae966dd.
Still need to update with the few newever commits left.
Runs and passes most tests. Fails on the following tests:
Normalize:
- reordered 4 bnodes, reordered 2 properties (1)
- reordered 4 bnodes, reordered 2 properties (2)
- evil (1)
- evil (2)
- evil (3)
- deep diff (1)
- deep diff (2)
---
lib/pyld/jsonld.py | 1299 ++++++++++++++++++++++++++++++----------------------
1 file changed, 753 insertions(+), 546 deletions(-)
diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py
index 6924e34..005d65c 100644
--- a/lib/pyld/jsonld.py
+++ b/lib/pyld/jsonld.py
@@ -10,6 +10,12 @@
# Copyright (c) 2011 Digital Bazaar, Inc. All rights reserved.
import copy
+# DEBUG:
+import json
+
+_s = '@subject'
+_t = '@type'
+
ns = {
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
'xsd': 'http://www.w3.org/2001/XMLSchema#'
@@ -29,7 +35,6 @@ xsd = {
# @return the JSON-LD default context.
def _createDefaultContext():
return {
- 'a': ns['rdf'] + 'type',
'rdf': ns['rdf'],
'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
'owl': 'http://www.w3.org/2002/07/owl#',
@@ -71,28 +76,32 @@ def _compactIri(ctx, iri, usedCtx):
# check the context for a term that could shorten the IRI
# (give preference to terms over CURIEs)
for key in ctx:
+ # FIXME: javascript has '@' - js 103
# skip special context keys (start with '@')
- if not key.startswith('@'):
+ if len(key) > 0 and not key.startswith('@'):
# compact to a term
if iri == ctx[key]:
rval = key
if usedCtx is not None:
usedCtx[key] = ctx[key]
break
+
+ # term not found, if term is rdf type, use built-in keyword
+ if rval is None and iri == ns['rdf'] + 'type':
+ rval = _t
# term not found, check the context for a CURIE prefix
if rval is None:
for key in ctx:
+ # FIXME: javascript has '@' - js 130
# skip special context keys (start with '@')
- if not key.startswith('@'):
+ if len(key) > 0 and not key.startswith('@'):
# see if IRI begins with the next IRI from the context
ctxIri = ctx[key]
idx = iri.find(ctxIri)
-
# compact to a CURIE
if idx == 0 and len(iri) > len(ctxIri):
- # create the compacted IRI
rval = key + ':' + iri[len(ctxIri):]
if usedCtx is not None:
usedCtx[key] = ctxIri
@@ -119,7 +128,6 @@ def _expandTerm(ctx, term, usedCtx):
# 1. If the property has a colon, then it is a CURIE or an absolute IRI:
idx = term.find(':')
-
if idx != -1:
# get the potential CURIE prefix
prefix = term[0:idx]
@@ -130,22 +138,21 @@ def _expandTerm(ctx, term, usedCtx):
rval = ctx[prefix] + term[idx + 1:]
if usedCtx is not None:
usedCtx[prefix] = ctx[prefix]
-
# 1.2. Prefix is not in context, property is already an absolute IRI:
else:
rval = term
-
# 2. If the property is in the context, then it's a term.
elif term in ctx:
rval = ctx[term]
if usedCtx is not None:
usedCtx[term] = rval
-
- # 3. The property is the special-case '@'.
- elif term == "@":
- rval = "@"
-
- # 4. The property is a relative IRI, prepend the default vocab.
+ # 3. The property is the special-case subject.
+ elif term == _s:
+ rval = _s
+ # 4. The property is the special-case rdf type.
+ elif term == _t:
+ rval = ns['rdf'] + 'type'
+ # 5. The property is a relative IRI, prepend the default vocab.
else:
rval = ctx['@vocab'] + term
if usedCtx is not None:
@@ -169,6 +176,8 @@ def _setProperty(s, p, o):
else:
s[p] = o;
+# FIXME: is implementation of _cloneContext (js line 302) needed?
+
##
# Gets the coerce type for the given property.
#
@@ -184,35 +193,37 @@ def _getCoerceType(ctx, property, usedCtx):
p = _expandTerm(ctx, property, None)
# built-in type coercion JSON-LD-isms
- if p == '@' or p == ns['rdf'] + 'type':
+ if p == _s or p == ns['rdf'] + 'type':
rval = xsd['anyURI']
+
# check type coercion for property
else:
# force compacted property
p = _compactIri(ctx, p, None)
- for cType in ctx['@coerce']:
+ for type in ctx['@coerce']:
# get coerced properties (normalize to an array)
- props = ctx['@coerce'][cType]
+ props = ctx['@coerce'][type]
if not isinstance(props, list):
props = [props]
# look for the property in the array
for i in props:
+ # FIXME: print i == p to check comparison
# property found
if i == p:
- rval = _expandTerm(ctx, cType, usedCtx)
+ rval = _expandTerm(ctx, type, usedCtx)
if usedCtx is not None:
- if not ('@coerce' in usedCtx):
+ if '@coerce' not in usedCtx:
usedCtx['@coerce'] = {}
- if not (cType in usedCtx['@coerce']):
- usedCtx['@coerce'][cType] = p
+ if type not in usedCtx['@coerce']:
+ usedCtx['@coerce'][type] = p
else:
- c = usedCtx['@coerce'][cType]
+ c = usedCtx['@coerce'][type]
if ((isinstance(c, list) and c.find(p) == -1) or
(isinstance(c, (str, unicode)) and c != p)):
- _setProperty(usedCtx['@coerce'], cType, p)
+ _setProperty(usedCtx['@coerce'], type, p)
break
return rval
@@ -222,12 +233,12 @@ def _getCoerceType(ctx, property, usedCtx):
# terms and do reverse type coercion to compact a value.
#
# @param ctx the context to use.
-# @param prop the property that points to the value, NULL for none.
+# @param property the property that points to the value, NULL for none.
# @param value the value to compact.
# @param usedCtx a context to update if a value was used from "ctx".
#
# @return the compacted value.
-def _compact(ctx, prop, value, usedCtx):
+def _compact(ctx, property, value, usedCtx):
rval = None
if value is None:
@@ -236,12 +247,12 @@ def _compact(ctx, prop, value, usedCtx):
# recursively add compacted values to array
rval = []
for i in value:
- rval.append(_compact(ctx, prop, i, usedCtx))
+ rval.append(_compact(ctx, property, i, usedCtx))
# graph literal/disjoint graph
- elif (isinstance(value, dict) and '@' in value and
- isinstance(value['@'], list)):
+ elif (isinstance(value, dict) and _s in value and
+ isinstance(value[_s], list)):
rval = {}
- rval['@'] = _compact(ctx, prop, value['@'], usedCtx)
+ rval[_s] = _compact(ctx, property, value[_s], usedCtx)
# value has sub-properties if it doesn't define a literal or IRI value
elif (isinstance(value, dict) and '@literal' not in value and
'@iri' not in value):
@@ -254,40 +265,42 @@ def _compact(ctx, prop, value, usedCtx):
_compact(ctx, key, value[key], usedCtx))
else:
# get coerce type
- cType = _getCoerceType(ctx, prop, usedCtx)
+ coerce = _getCoerceType(ctx, property, usedCtx)
# get type from value, to ensure coercion is valid
- vType = None
+ type = None
if isinstance(value, dict):
# type coercion can only occur if language is not specified
- if not ('@language' in value):
+ if '@language' not in value:
# datatype must match coerce type if specified
if '@datatype' in value:
- vType = value['@datatype']
+ type = value['@datatype']
# datatype is IRI
elif '@iri' in value:
- vType = xsd['anyURI']
+ type = xsd['anyURI']
# can be coerced to any type
else:
- vType = cType
- # value type can be coerced to anything
+ type = coerce
+ # type can be coerced to anything
elif isinstance(value, (str, unicode)):
- vType = cType
+ type = coerce
# types that can be auto-coerced from a JSON-builtin
- if cType is None and (vType == xsd['boolean'] or
- vType == xsd['integer'] or vType == xsd['double']):
- cType = vType
+ if coerce is None and (type == xsd['boolean'] or
+ type == xsd['integer'] or type == xsd['double']):
+ coerce = type
# do reverse type-coercion
- if cType is not None:
+ if coerce is not None:
# type is only None if a language was specified, which is an error
# if type coercion is specified
- if vType is None:
- raise Exception('Cannot coerce type when a language is specified. The language information would be lost.')
+ if type is None:
+ raise Exception('Cannot coerce type when a language is ' +
+ 'specified. The language information would be lost.')
# if the value type does not match the coerce type, it is an error
- elif vType != cType:
- raise Exception('Cannot coerce type because the datatype does not match.')
+ elif type != coerce:
+ raise Exception('Cannot coerce type because the datatype ' +
+ 'does not match.')
# do reverse type-coercion
else:
if isinstance(value, dict):
@@ -297,26 +310,26 @@ def _compact(ctx, prop, value, usedCtx):
rval = value['@literal']
else:
rval = value
-
+
# do basic JSON types conversion
- if cType == xsd['boolean']:
+ if coerce == xsd['boolean']:
rval = (rval == 'true' or rval != 0)
- elif cType == xsd['double']:
+ elif coerce == xsd['double']:
rval = float(rval)
- elif cType == xsd['integer']:
+ elif coerce == xsd['integer']:
rval = int(rval)
-
+
# no type-coercion, just copy value
else:
rval = copy.copy(value)
-
+
# compact IRI
- if vType == xsd['anyURI']:
+ if type == xsd['anyURI']:
if isinstance(rval, dict):
rval['@iri'] = _compactIri(ctx, rval['@iri'], usedCtx)
else:
rval = _compactIri(ctx, rval, usedCtx)
-
+
return rval
##
@@ -324,26 +337,25 @@ def _compact(ctx, prop, value, usedCtx):
# the value will be removed.
#
# @param ctx the context.
-# @param prop the property that points to the value, NULL for none.
+# @param property the property that points to the value, NULL for none.
# @param value the value to expand.
# @param expandSubjects True to expand subjects (normalize), False not to.
#
# @return the expanded value.
-def _expand(ctx, prop, value, expandSubjects):
+def _expand(ctx, property, value, expandSubjects):
rval = None
# TODO: add data format error detection?
# if no property is specified and the value is a string (this means the
# value is a property itself), expand to an IRI
- if prop is None and isinstance(value, (str, unicode)):
- #print ' none'
+ if property is None and isinstance(value, (str, unicode)):
rval = _expandTerm(ctx, value, None)
elif isinstance(value, list):
# recursively add expanded values to array
rval = []
for i in value:
- rval.append(_expand(ctx, prop, i, expandSubjects))
+ rval.append(_expand(ctx, property, i, expandSubjects))
elif isinstance(value, dict):
# value has sub-properties if it doesn't define a literal or IRI value
if not ('@literal' in value or '@iri' in value):
@@ -354,19 +366,19 @@ def _expand(ctx, prop, value, expandSubjects):
# recursively handle sub-properties that aren't a sub-context
rval = {}
for key in value:
- if len(key) == 1 or key.find('@') != 0:
+ # preserve frame keywords
+ if key == '@embed' or key == '@explicit':
+ _setProperty(rval, key, copy.copy(value[key]))
+ elif key != '@context':
# set object to expanded property
_setProperty(rval, _expandTerm(ctx, key, None),
_expand(ctx, key, value[key], expandSubjects))
- elif key != '@context':
- # preserve non-context json-ld keywords
- _setProperty(rval, key, copy.copy(value[key]))
# value is already expanded
else:
rval = copy.copy(value)
else:
# do type coercion
- coerce = _getCoerceType(ctx, prop, None)
+ coerce = _getCoerceType(ctx, property, None)
# automatic coercion for basic JSON types
if coerce is None and isinstance(value, (int, long, float, bool)):
@@ -378,7 +390,7 @@ def _expand(ctx, prop, value, expandSubjects):
coerce = xsd['integer']
# coerce to appropriate datatype, only expand subjects if requested
- if coerce is not None and (prop != '@' or expandSubjects):
+ if coerce is not None and (property != _s or expandSubjects):
rval = {}
# expand IRI
@@ -399,7 +411,6 @@ def _expand(ctx, prop, value, expandSubjects):
else:
rval = '' + value
- #print '-- _expand done --'
return rval
##
@@ -411,15 +422,15 @@ def _isBlankNodeIri(v):
# Checks if is named blank node.
def _isNamedBlankNode(v):
# look for "_:" at the beginning of the subject
- return (isinstance(v, dict) and '@' in v and
- '@iri' in v['@'] and _isBlankNodeIri(v['@']['@iri']))
+ return (isinstance(v, dict) and _s in v and
+ '@iri' in v[_s] and _isBlankNodeIri(v[_s]['@iri']))
##
# Checks if is blank node.
def _isBlankNode(v):
# look for no subject or named blank node
return (isinstance(v, dict) and not ('@iri' in v or '@literal' in v) and
- ('@' not in v or _isNamedBlankNode(v)))
+ (_s not in v or _isNamedBlankNode(v)))
##
# Compares two values.
@@ -502,7 +513,7 @@ def _compareObjects(o1, o2):
def _compareBlankNodeObjects(a, b):
rval = 0
- # 3. For each property, compare sorted object values.
+ # 3. For each property, compare sorted object values.
# 3.1. The bnode with fewer objects is first.
# 3.2. For each object value, compare only literals and non-bnodes.
# 3.2.1. The bnode with fewer non-bnodes is first.
@@ -519,6 +530,7 @@ def _compareBlankNodeObjects(a, b):
# step #3.1
lenA = len(a[p]) if isinstance(a[p], list) else 1
lenB = len(b[p]) if isinstance(b[p], list) else 1
+ rval = _compare(lenA, lenB)
# step #3.2.1
if rval == 0:
@@ -540,6 +552,8 @@ def _compareBlankNodeObjects(a, b):
# steps #3.2.2-3.2.9
if rval == 0:
+ objsA.sort(_compareObjects)
+ objsB.sort(_compareObjects)
for i in range(0, len(objsA)):
rval = _compareObjects(objsA[i], objsB[i])
if rval != 0:
@@ -576,28 +590,28 @@ class NameGenerator:
# Populates a map of all named subjects from the given input and an array
# of all unnamed bnodes (includes embedded ones).
#
-# @param src the input (must be expanded, no context).
+# @param input the input (must be expanded, no context).
# @param subjects the subjects map to populate.
# @param bnodes the bnodes array to populate.
-def _collectSubjects(src, subjects, bnodes):
- if isinstance(src, list):
- for i in src:
+def _collectSubjects(input, subjects, bnodes):
+ if isinstance(input, list):
+ for i in input:
_collectSubjects(i, subjects, bnodes)
- elif isinstance(src, dict):
- if '@' in src:
+ elif isinstance(input, dict):
+ if _s in input:
# graph literal
- if isinstance(src['@'], list):
- _collectSubjects(src['@'], subjects, bnodes)
+ if isinstance(input[_s], list):
+ _collectSubjects(input[_s], subjects, bnodes)
# named subject
else:
- subjects[src['@']['@iri']] = src
+ subjects[input[_s]['@iri']] = input
# unnamed blank node
- elif _isBlankNode(src):
- bnodes.append(src)
+ elif _isBlankNode(input):
+ bnodes.append(input)
# recurse through subject properties
- for key in src:
- _collectSubjects(src[key], subjects, bnodes)
+ for key in input:
+ _collectSubjects(input[key], subjects, bnodes)
##
# Flattens the given value into a map of unique subjects. It is assumed that
@@ -626,13 +640,13 @@ def _flatten(parent, parentProperty, value, subjects):
elif isinstance(value, dict):
# graph literal/disjoint graph
- if '@' in value and isinstance(value['@'], list):
+ if _s in value and isinstance(value[_s], list):
# cannot flatten embedded graph literals
if parent is not None:
- raise('Embedded graph literals cannot be flattened.')
+ raise Exception('Embedded graph literals cannot be flattened.')
# top-level graph literal
- for key in value['@']:
+ for key in value[_s]:
_flatten(parent, parentProperty, key, subjects)
# already-expanded value
elif '@literal' in value or '@iri' in value:
@@ -641,14 +655,14 @@ def _flatten(parent, parentProperty, value, subjects):
else:
# create or fetch existing subject
subject = None
- if value['@']['@iri'] in subjects:
- # FIXME: '@' might be a graph literal (as {})
- subject = subjects[value['@']['@iri']]
+ if value[_s]['@iri'] in subjects:
+ # FIXME: _s might be a graph literal (as {})
+ subject = subjects[value[_s]['@iri']]
else:
subject = {}
- if '@' in value:
- # FIXME: '@' might be a graph literal (as {})
- subjects[value['@']['@iri']] = subject
+ if _s in value:
+ # FIXME: _s might be a graph literal (as {})
+ subjects[value[_s]['@iri']] = subject
flattened = subject
# flatten embeds
@@ -667,18 +681,20 @@ def _flatten(parent, parentProperty, value, subjects):
# add flattened value to parent
if flattened is not None and parent is not None:
- # remove top-level '@' for subjects
- # 'http://mypredicate': {'@': {'@iri': 'http://mysubject'}} becomes
+ # remove top-level _s for subjects
+ # 'http://mypredicate': {'@subject': {'@iri': 'http://mysubject'}}
+ # becomes
# 'http://mypredicate': {'@iri': 'http://mysubject'}
- if isinstance(flattened, dict) and '@' in flattened:
- flattened = flattened['@']
+ if isinstance(flattened, dict) and _s in flattened:
+ flattened = flattened[_s]
if isinstance(parent, list):
# do not add duplicate IRIs for the same property
duplicate = False
if isinstance(flattened, dict) and '@iri' in flattened:
def parentFilter(e):
- return (isinstance(e, dict) and '@iri' in e and e['@iri'] == flattened['@iri'])
+ return (isinstance(e, dict) and '@iri' in e and
+ e['@iri'] == flattened['@iri'])
duplicate = len(filter(parentFilter, parent)) > 0
if not duplicate:
@@ -687,89 +703,73 @@ def _flatten(parent, parentProperty, value, subjects):
parent[parentProperty] = flattened
##
-# Returns True if the given source is a subject and has one of the given
-# types in the given frame.
-#
-# @param src the input.
-# @param frame the frame with types to look for.
-#
-# @return True if the src has one of the given types.
-def _isType(src, frame):
- rval = False
-
- # check if type(s) are specified in frame and src
- rType = ns['rdf'] + 'type'
- if (rType in frame and isinstance(src, dict) and
- '@' in src and rType in src):
- tmp = src[rType] if isinstance(src[rType], list) else [src[rType]]
- types = frame[rType] if isinstance(frame[rType], list) else [frame[rType]]
-
- for t in range(0, len(types)):
- rType = types[t]['@iri']
- for i in tmp:
- if i['@iri'] == rType:
- rval = True
- break
- if rval:
- break
-
- return rval
-
-##
-# Returns True if the given src matches the given frame via duck-typing.
-#
-# @param src the input.
-# @param frame the frame to check against.
-#
-# @return True if the src matches the frame.
-def _isDuckType(src, frame):
- rval = False
+# A MappingBuilder is used to build a mapping of existing blank node names
+# to a form for serialization. The serialization is used to compare blank
+# nodes against one another to determine a sort order.
+class MappingBuilder:
+ ##
+ # Initialize the MappingBuilder.
+ def __init__(self):
+ self.count = 1
+ self.mapped = {}
+ self.mapping = {}
+ self.output = {}
- # frame must not have a specific type
- rType = ns['rdf'] + 'type'
- if rType not in frame:
- # get frame properties that must exist on src
- props = frame.keys()
- if len(props) == 0:
- # src always matches if there are no properties
- rval = True
- # src must be a subject with all the given properties
- elif isinstance(src, dict) and '@' in src:
- rval = True
- for i in props:
- if i not in src:
- rval = False
- break
+ ##
+ # Copies this MappingBuilder.
+ #
+ # @return the MappingBuilder copy.
+ def copy(self):
+ rval = MappingBuilder()
+ rval.count = self.count
+ rval.mapped = copy.copy(self.mapped)
+ rval.mapping = copy.copy(self.mapping)
+ rval.output = copy.copy(self.output)
+ return rval
- return rval
-
+ ##
+ # Maps the next name to the given bnode IRI if the bnode IRI isn't already in
+ # the mapping. If the given bnode IRI is canonical, then it will be given
+ # a shortened form of the same name.
+ #
+ # @param iri the blank node IRI to map the next name to.
+ #
+ # @return the mapped name.
+ def mapNode(self, iri):
+ if iri not in self.mapping:
+ if iri.startswith('_:c14n'):
+ self.mapping[iri] = 'c%s' % iri[0:6]
+ else:
+ self.mapping[iri] = 's%s' % self.count
+ self.count += 1
+ return self.mapping[iri]
##
# A JSON-LD processor.
class Processor:
-
##
# Initialize the JSON-LD processor.
def __init__(self):
- self.memo = {}
+ self.tmp = None
+ self.c14n = None
##
# Normalizes a JSON-LD object.
#
- # @param src the JSON-LD object to normalize.
+ # @param input the JSON-LD object to normalize.
#
# @return the normalized JSON-LD object.
- def normalize(self, src):
+ def normalize(self, input):
rval = []
# TODO: validate context
- if src is not None:
+ if input is not None:
# get default context
ctx = _createDefaultContext()
- # expand src
- expanded = _expand(ctx, None, src, True)
+ # expand input
+ expanded = _expand(ctx, None, input, True)
# assign names to unnamed bnodes
self.nameBlankNodes(expanded)
@@ -778,15 +778,22 @@ class Processor:
subjects = {}
_flatten(None, None, expanded, subjects)
- # append subjects to array
+ # append subjects with sorted properties to array
for key in subjects:
- rval.append(subjects[key])
+ # FIXME: python for has different data for array vs dict
+ s = subjects[key]
+ sorted = {}
+ keys = s.keys()
+ keys.sort()
+ for k in keys:
+ sorted[k] = s[k]
+ rval.append(sorted)
# canonicalize blank nodes
self.canonicalizeBlankNodes(rval)
def normalizeSort(a, b):
- return _compare(a['@']['@iri'], b['@']['@iri'])
+ return _compare(a[_s]['@iri'], b[_s]['@iri'])
# sort output
rval.sort(cmp=normalizeSort)
@@ -796,23 +803,23 @@ class Processor:
##
# Assigns unique names to blank nodes that are unnamed in the given input.
#
- # @param src the input to assign names to.
- def nameBlankNodes(self, src):
+ # @param input the input to assign names to.
+ def nameBlankNodes(self, input):
# create temporary blank node name generator
- ng = self.ng = NameGenerator('tmp')
+ ng = self.tmp = NameGenerator('tmp')
# collect subjects and unnamed bnodes
subjects = {}
bnodes = []
- _collectSubjects(src, subjects, bnodes)
+ _collectSubjects(input, subjects, bnodes)
# uniquely name all unnamed bnodes
for bnode in bnodes:
- if not ('@' in bnode):
+ if not (_s in bnode):
# generate names until one is unique
while(ng.next() in subjects):
pass
- bnode['@'] = { '@iri': ng.current() }
+ bnode[_s] = { '@iri': ng.current() }
subjects[ng.current()] = bnode
##
@@ -820,35 +827,35 @@ class Processor:
# that the given name is unique.
#
# @param b the blank node to rename.
- # @param name the new name to use.
- def renameBlankNode(self, b, name):
- old = b['@']['@iri']
+ # @param id the new name to use.
+ def renameBlankNode(self, b, id):
+ old = b[_s]['@iri']
# update bnode IRI
- b['@']['@iri'] = name
+ b[_s]['@iri'] = id
# update subjects map
subjects = self.subjects
- subjects[name] = subjects[old]
+ subjects[id] = subjects[old]
del subjects[old]
# update reference and property lists
- self.edges['refs'][name] = self.edges['refs'][old]
- self.edges['props'][name] = self.edges['props'][old]
+ self.edges['refs'][id] = self.edges['refs'][old]
+ self.edges['props'][id] = self.edges['props'][old]
del self.edges['refs'][old]
del self.edges['props'][old]
# update references to this bnode
- refs = self.edges['refs'][name]['allnodes']
+ refs = self.edges['refs'][id]['all']
for i in refs:
iri = i['s']
if iri == old:
- iri = name
+ iri = id
ref = subjects[iri]
- props = self.edges['props'][iri]['allnodes']
+ props = self.edges['props'][iri]['all']
for i2 in props:
if i2['s'] == old:
- i2['s'] = name
+ i2['s'] = id
# normalize property to array for single code-path
p = i2['p']
@@ -857,292 +864,346 @@ class Processor:
for n in tmp:
if (isinstance(n, dict) and '@iri' in n and
n['@iri'] == old):
- n['@iri'] = name
+ n['@iri'] = id
# update references from this bnode
- props = self.edges['props'][name]['allnodes']
+ props = self.edges['props'][id]['all']
for i in props:
iri = i['s']
- refs = self.edges['refs'][iri]['allnodes']
+ refs = self.edges['refs'][iri]['all']
for r in refs:
if r['s'] == old:
- r['s'] = name
+ r['s'] = id
##
- # Deeply names the given blank node by first naming it if it doesn't already
- # have an appropriate prefix, and then by naming its properties and then
- # references.
+ # Canonically names blank nodes in the given input.
#
- # @param b the bnode to name.
- def deepNameBlankNode(self, b):
- # rename bnode (if not already renamed)
- iri = b['@']['@iri']
- ng = self.ng
- if not ng.inNamespace(iri):
- self.renameBlankNode(b, ng.next())
- iri = ng.current()
-
- subjects = self.subjects
-
- # FIXME: can bnode edge sorting be optimized out due to sorting them
- # when they are unequal in other parts of this algorithm?
-
- def compareEdges(a, b):
- return self.compareEdges(a, b)
-
- # rename bnode properties
- props = self.edges['props'][iri]['bnodes']
- props.sort(cmp=compareEdges)
- for i in props:
- if i['s'] in subjects:
- self.deepNameBlankNode(subjects[i['s']])
-
- # rename bnode references
- refs = self.edges['refs'][iri]['bnodes']
- refs.sort(cmp=compareEdges)
- for i in refs:
- if i['s'] in subjects:
- self.deepNameBlankNode(subjects[i['s']])
-
- ##
- # Canonically names blank nodes in the given source.
- #
- # @param src the flat input graph to assign names to.
- def canonicalizeBlankNodes(self, src):
- # collect subjects and bnodes from flat input graph
- memo = self.memo = {}
+ # @param input the flat input graph to assign names to.
+ def canonicalizeBlankNodes(self, input):
+ # create serialization state
+ self.renamed = {}
+ self.mappings = {}
+ self.serializations = {}
+
+ # collect subject and bnodes from flat input graph
edges = self.edges = {
'refs': {},
'props': {}
}
subjects = self.subjects = {}
bnodes = []
- for s in src:
- iri = s['@']['@iri']
+ for s in input:
+ iri = s[_s]['@iri']
subjects[iri] = s
edges['refs'][iri] = {
- 'allnodes': [],
+ 'all': [],
'bnodes': []
}
edges['props'][iri] = {
- 'allnodes': [],
+ 'all': [],
'bnodes': []
}
if _isBlankNodeIri(iri):
bnodes.append(s)
- # build map of memoized bnode comparisons
- for bn in bnodes:
- iri1 = bn['@']['@iri']
- memo[iri1] = {}
-
# collect edges in the graph
self.collectEdges()
- def bnodeSort(a, b):
- return self.deepCompareBlankNodes(a, b, {})
-
- # sort blank nodes
- bnodes.sort(cmp=bnodeSort)
-
# create canonical blank node name generator
- c14n = NameGenerator('c14n')
+ c14n = self.c14n = NameGenerator('c14n')
+ ngTmp = self.tmp
- # rename all bnodes that have canonical names to temporary names
- tmp = self.ng
+ # rename all bnodes that happen to be in the c14n namespace
+ # and initialize serializations
for bnode in bnodes:
- if c14n.inNamespace(bnode['@']['@iri']):
- # generate names until one is unique
- while(tmp.next() in subjects):
+ iri = bnode[_s]['@iri']
+ if c14n.inNamespace(iri):
+ while ngTmp.next() in subjects:
pass
- self.renameBlankNode(bnode, tmp.current())
-
- # change internal name generator from tmp one to canonical one
- self.ng = c14n
-
- # deeply-iterate over bnodes canonically-naming them
- for bnode in bnodes:
- self.deepNameBlankNode(bnode)
+ self.renameBlankNode(bnode, ngTmp.current())
+ iri = bnode[_s]['@iri']
+ self.serializations[iri] = {
+ 'props': None,
+ 'refs': None
+ }
- # sort property lists that now have canonically-named bnodes
+ # keep sorting and naming blank nodes until they are all named
+ while len(bnodes) > 0:
+ # define bnode sorting function
+ def bnodeSort(a, b):
+ return self.deepCompareBlankNodes(a, b)
+
+ bnodes.sort(cmp=bnodeSort)
+
+ # name all bnodes accoring to the first bnodes relation mappings
+ bnode = bnodes.pop(0)
+ iri = bnode[_s]['@iri']
+ dirs = ['props', 'refs']
+ for dir in dirs:
+ # if no serialization has been computed,
+ # name only the first node
+ if self.serializations[iri][dir] is None:
+ mapping = {}
+ mapping[iri] = 's1'
+ else:
+ mapping = self.serializations[iri][dir]['m']
+
+ # define key sorting function
+ def sortKeys(a, b):
+ return _compare(mapping[a], mapping[b])
+
+ # sort keys by value to name them in order
+ keys = mapping.keys()
+ keys.sort(sortKeys)
+
+ # name bnodes in mapping
+ renamed = []
+ for iriK in keys:
+ if not c14n.inNamespace(iri) and iriK in subjects:
+ self.renameBlankNode(subjects[iriK], c14n.next())
+ renamed.append(iriK)
+
+ # only keep non-canonically named bnodes
+ tmp = bnodes
+ bnodes = []
+ for b in tmp:
+ iriB = b[_s]['@iri']
+ if not c14n.inNamespace(iriB):
+ for i2 in renamed:
+ self.markSerializationDirty(iriB, i2, dir)
+ bnodes.append(b)
+
+ # sort property lists that now have canonically named bnodes
for key in edges['props']:
if len(edges['props'][key]['bnodes']) > 0:
bnode = subjects[key]
for p in bnode:
+ # FIXME: javascript has '@' - js 1434
if p.find('@') != 0 and isinstance(bnode[p], list):
bnode[p].sort(_compareObjects)
-
+
##
- # Compares the edges between two nodes for equivalence.
+ # Marks a relation serialization as dirty if necessary.
+ #
+ # @param iri the IRI of the bnode to check.
+ # @param changed the old IRI of the bnode that changed.
+ # @param dir the direction to check ('props' or 'refs').
+ def markSerializationDirty(self, iri, changed, dir):
+ s = self.serializations[iri]
+ if s[dir] is not None and changed in s[dir]['m']:
+ s[dir] = None
+
+ ##
+ #Recursively creates a relation serialization (partial or full).
#
- # @param a the first bnode.
- # @param b the second bnode.
- # @param dir the edge direction ('props' or 'refs').
- # @param iso the current subgraph isomorphism for connected bnodes.
+ # @param keys the keys to serialize in the current output.
+ # @param output the current mapping builder output.
+ # @param done the already serialized keys.
#
- # @return -1 if a < b, 0 if a == b, 1 if a > b.
- def deepCompareEdges(self, a, b, dir, iso):
- rval = 0
-
- # Edge comparison algorithm:
- # 1. Compare adjacent bnode lists for matches.
- # 1.1. If a bnode ID is in the potential isomorphism, then its associated
- # bnode *must* be in the other bnode under the same property.
- # 1.2. If a bnode ID is not in the potential isomorphism yet, then the
- # associated bnode *must* have a bnode with the same property from the
- # same bnode group that isn't in the isomorphism yet to match up.
- # Iterate over each bnode in the group until an equivalent one is found.
- # 1.3. Recurse to compare the chosen bnodes.
- # 1.4. The bnode with lowest group index amongst bnodes with the same
- # property name is first.
-
- # for every bnode edge in A, make sure there's a match in B
- iriA = a['@']['@iri']
- iriB = b['@']['@iri']
- edgesA = self.edges[dir][iriA]['bnodes']
- edgesB = self.edges[dir][iriB]['bnodes']
- for i1 in range(0, len(edgesA)):
- found = False
- edgeA = edgesA[i1]
-
- # step #1.1
- if edgeA['s'] in iso:
- match = iso[edgeA['s']]
- for edgeB in edgesB:
- if edgeB['p'] > edgeA['p']:
- break
- if edgeB['p'] == edgeA['p']:
- found = (edgeB['s'] == match)
- break
- # step #1.2
+ # @return the relation serialization.
+ def recursiveSerializeMapping(self, keys, output, done):
+ rval = ''
+ for k in keys:
+ if k not in output:
+ break
+ if k in done:
+ # mark cycle
+ rval += '_' + k
else:
- for edgeB in edgesB:
- if edgeB['p'] > edgeA['p']:
- break
- if edgeB['p'] == edgeA['p'] and not (edgeB['s'] in iso):
- # add bnode pair temporarily to iso
- iso[edgeA['s']] = edgeB['s']
- iso[edgeB['s']] = edgeA['s']
+ done[k] = True
+ tmp = output[k]
+ for t in tmp[k]:
+ # FIXME: javascript-ism?
+ s = tmp[k][t]
+ rval += s
+ iri = tmp.m[s]
+ if iri in self.subjects:
+ b = self.subjects[iri]
- # step #1.3
- sA = self.subjects[edgeA['s']]
- sB = self.subjects[edgeB['s']]
- if self.deepCompareBlankNodes(sA, sB, iso) == 0:
- found = True
- break
+ # serialize properties
+ rval += '<'
+ rval += _serializeProperties(b)
+ rval += '>'
- # remove non-matching bnode pair from iso
- del iso[edgeA['s']]
- del iso[edgeB['s']]
- # step #1.4
- if not found:
- # no matching bnode pair found, sort order is the bnode with the
- # least bnode for edgeA's property
- rval = self.compareEdgeType(a, b, edgeA['p'], dir, iso)
-
- if rval != 0:
- break
-
+ # serialize references
+ rval += '<'
+ first = True
+ refs = self.edges['refs']['iri']['all']
+ for r in refs:
+ if first:
+ first = False
+ else:
+ rval += '|'
+ rval += '_:' if _isBlankNodeIri(refs[r]['s']) else refs[r]['s']
+ rval += '>'
+ rval += self.recursiveSerializeMapping(tmp[k], output, done)
return rval
-
+
##
- # Compares bnodes along the same edge type to determine which is less.
+ # Creates a relation serialization (partial or full).
#
- # @param a the first bnode.
- # @param b the second bnode.
- # @param p the property.
- # @param d the direction of the edge ('props' or 'refs').
- # @param iso the current subgraph isomorphism for connected bnodes.
+ # @param output the current mapping builder output.
#
- # @return -1 if a < b, 0 if a == b, 1 if a > b.
- def compareEdgeType(self, a, b, p, d, iso):
- rval = 0
-
- # compare adjacent bnodes for smallest
- adjA = self.getSortedAdjacents(a, p, d, iso)
- adjB = self.getSortedAdjacents(a, p, d, iso)
- for i in range(0, len(adjA)):
- rval = self.deepCompareBlankNodes(adjA[i], adjB[i], iso)
- if rval != 0:
- break
-
- return rval
-
+ # @return the relation serialization.
+ def serializeMapping(self, output):
+ return self.recursiveSerializeMapping(['s1'], output, {})
+
##
- # Returns the bnode properties for a particular bnode in sorted order.
+ # Recursively serializes adjacent bnode combinations.
#
- # @param b the bnode.
- # @param p the property (edge type).
- # @param d the direction of the edge ('props' or 'refs').
- # @param iso the current subgraph isomorphism for connected bnodes.
+ # @param s the serialization to update.
+ # @param top the top of the serialization.
+ # @param mb the MappingBuilder to use.
+ # @param dir the edge direction to use ('props' or 'refs').
+ # @param mapped all of the already-mapped adjacent bnodes.
+ # @param notMapped all of the not-yet mapped adjacent bnodes.
+ def serializeCombos(self, s, top, mb, dir, mapped, notMapped):
+ # copy mapped nodes
+ mapped = copy.copy(mapped)
+ # FIXME: do we need to use deep copy here? - js 1690
+
+ # handle recursion
+ if len(notMapped) > 0:
+ # map first bnode in list
+ mapped[mb.mapNode(notMapped[0]['s'])] = notMapped[0]['s']
+
+ # recurse into remaining possible combinations
+ # FIXME: check javascript - js 1699
+ original = mb.copy()
+ notMapped = notMapped[1:]
+ rotations = max(1, len(notMapped))
+ # FIXME: check range length - js 1702
+ for r in range(0, rotations):
+ # FIXME: check javascript copy - js 1704
+ m = mb if r == 0 else copy.copy(original)
+ self.serializeCombos(s, top, m, dir, mapped, notMapped)
+
+ # rotate not-mapped for next combination
+ _rotate(notMapped)
+ # handle final adjacent node in current combination
+ else:
+ keys = mapped.keys()
+ keys.sort()
+ mb.output['top'] = { 'k': keys, 'm': mapped }
+
+ # optimize away mappings that are already too large
+ _s = self.serializeMapping(mb.output)
+ if s[dir] is None or _compareSerializations(_s, s[dir]['s']) <= 0:
+ oldCount = mb.count
+
+ # recurse into adjacent alues
+ for k in keys:
+ self.serializeBlankNode(s, mapped[k], mb, dir)
+
+ # reserialize if more nodes were mapped
+ if mb.count > oldCount:
+ _s = self.serializeMapping(mb.output)
+
+ # update least serialization if new one has been found
+ if (s[dir] is None or
+ (_compareSerializations(_s, s[dir]['s']) <= 0 and
+ len(_s) >= len(s[dir]['s']))):
+ s[dir] = { 's': _s, 'm': mb.mapping }
+
+ ##
+ # Computes the relation serialization for the given blank node IRI.
#
- # @return the sorted bnodes for the property.
- def getSortedAdjacents(self, b, p, d, iso):
- #print ''
- #print '-- Get Sorted Adjacents --'
- rval = []
-
- # add all bnodes for the given property
- iri = b['@']['@iri']
- edges = self.edges[d][iri]['bnodes']
- for edge in edges:
- if edge['p'] > p:
- break
- if edge['p'] == p:
- rval.append(self.subjects[edge['s']])
-
- def bnodeSort(a, b):
- #print '- bnodeSort -'
- #print 'a: ',a
- #print 'b: ', b
- #print 'iso: ', iso
- return self.deepCompareBlankNodes(a, b, iso)
-
- # sort bnodes
- rval.sort(cmp=bnodeSort)
- #print 'rval: ', rval
- return rval
-
+ # @param s the serialization to update.
+ # @param iri the current bnode IRI to be mapped.
+ # @param mb the MappingBuilder to use.
+ # @param dir the edge direction to use ('props' or 'refs').
+ def serializeBlankNode(self, s, iri, mb, dir):
+ # only do mapping if iri not already mapped
+ if iri not in mb.mapped:
+ # iri now mapped
+ mb.mapped[iri] = True
+ top = mb.mapNode(iri)
+
+ # copy original mapping builder
+ # FIXME: check javascript copy - js 1765
+ original = copy.copy(mb)
+
+ # split adjacent bnodes on mapped and not-mapped
+ adj = self.edges[dir][iri]['bnodes']
+ mapped = {}
+ notMapped = []
+ for i in adj:
+ if i['s'] in mb.mapping:
+ mapped[mb.mapping[i['s']]] = i['s']
+ else:
+ notMapped.append(i)
+
+ # TODO: ensure this optimization does not alter canonical order
+
+ # if the current bnode already has a serialization, reuse it
+ #hint = self.serializations[iri][dir] if iri in self.serializations else None
+ #if hint is not None:
+ # hm = hint['m']
+ #
+ # def notMappedSort(a, b):
+ # return _compare(hm[a['s']], hm[b['s']])
+ #
+ # notMapped.sort(cmp=notMappedSort)
+ # for i in notMapped:
+ # mapped[mb.mapNode(notMapped[i]['s'])] = notMapped[i]['s']
+ # notMapped = []
+
+ # loop over possible combinations
+ combos = max(1, len(notMapped))
+ # FIXME: check range - js 1804
+ for i in range(0, combos):
+ # FIXME: check javascript copy - js 1806
+ m = mb if i == 0 else copy.copy(original)
+ self.serializeCombos(s, top, mb, dir, mapped, notMapped)
+
##
# Compares two blank nodes for equivalence.
#
# @param a the first blank node.
# @param b the second blank node.
- # @param iso the current subgraph isomorphism for connected bnodes.
#
# @return -1 if a < b, 0 if a == b, 1 if a > b.
- def deepCompareBlankNodes(self, a, b, iso):
+ def deepCompareBlankNodes(self, a, b):
rval = 0
# compare IRIs
- iriA = a['@']['@iri']
- iriB = b['@']['@iri']
+ iriA = a[_s]['@iri']
+ iriB = b[_s]['@iri']
if iriA == iriB:
rval = 0
- # use memoized comparison if available
- elif iriB in self.memo[iriA]:
- rval = self.memo[iriA][iriB]
else:
# do shallow compare first
rval = self.shallowCompareBlankNodes(a, b)
- if rval != 0:
- # compare done
- self.memo[iriA][iriB] = rval
- self.memo[iriB][iriA] = -rval
+
# deep comparison is necessary
- else:
- # compare properties
- rval = self.deepCompareEdges(a, b, 'props', iso)
-
- # compare references
- if rval == 0:
- rval = self.deepCompareEdges(a, b, 'refs', iso)
-
- # update memo
- if not (iriB in self.memo[iriA]):
- self.memo[iriA][iriB] = rval
- self.memo[iriB][iriA] = -rval
-
+ if rval == 0:
+ # compare property edges then reference edges
+ dirs = ['props', 'refs']
+ for i in range(0, len(dirs)):
+ # recompute 'a' and 'b' serializations as necessary
+ dir = dirs[i]
+ sA = self.serializations[iriA]
+ sB = self.serializations[iriB]
+ if sA[dir] is None:
+ mb = MappingBuilder()
+ if dir == 'refs':
+ # keep same mapping and count from 'props' serialization
+ mb.mapping = copy.copy(sA['props']['m'])
+ mb.count = len(mb.mapping.keys()) + 1
+ self.serializeBlankNode(sA, iriA, mb, dir)
+ if sB[dir] is None:
+ mb = MappingBuilder()
+ if dir == 'refs':
+ # keep same mapping and count from 'props' serialization
+ mb.mapping = copy.copy(sB['props']['m'])
+ mb.count = len(mb.mapping.keys()) + 1
+ self.serializeBlankNode(sB, iriB, mb, dir)
+
+ # compare serializations
+ rval = _compare(sA[dir]['s'], sB[dir]['s'])
+
+ if rval != 0:
+ break
return rval
##
@@ -1186,8 +1247,8 @@ class Processor:
# step #4
if rval == 0:
- edgesA = self.edges['refs'][a['@']['@iri']]['allnodes']
- edgesB = self.edges['refs'][b['@']['@iri']]['allnodes']
+ edgesA = self.edges['refs'][a[_s]['@iri']]['all']
+ edgesB = self.edges['refs'][b[_s]['@iri']]['all']
rval = _compare(len(edgesA), len(edgesB))
# step #5
@@ -1203,8 +1264,9 @@ class Processor:
##
# Compares two edges. Edges with an IRI (vs. a bnode ID) come first, then
# alphabetically-first IRIs, then alphabetically-first properties. If a blank
- # node appears in the blank node equality memo then they will be compared
- # after properties, otherwise they won't be.
+ # node has been canonically named, then blank nodes will be compared after
+ # properties (with a preference for canonically named over non-canonically
+ # named), otherwise they won't be.
#
# @param a the first edge.
# @param b the second edge.
@@ -1215,7 +1277,7 @@ class Processor:
bnodeA = _isBlankNodeIri(a['s'])
bnodeB = _isBlankNodeIri(b['s'])
- memo = self.memo
+ c14n = self.c14n
# if not both bnodes, one that is a bnode is greater
if bnodeA != bnodeB:
@@ -1225,8 +1287,16 @@ class Processor:
rval = _compare(a['s'], b['s'])
if rval == 0:
rval = _compare(a['p'], b['p'])
- if rval == 0 and bnodeA and a['s'] in memo and b['s'] in memo[a['s']]:
- rval = memo[a['s']][b['s']]
+
+ # do bnode IRI comparison if canonical naming has begun
+ if rval == 0 and c14n is not None:
+ c14nA = c14n.inNamespace(a['s'])
+ c14nB = c14n.inNamespace(b['s'])
+ if c14nA != c14nB:
+ rval = 1 if c14nA else -1
+ elif c14nA:
+ rval = _compare(a['s'], b['s'])
+ # FIXME: javascript has no else clause - js 1987
return rval
@@ -1244,152 +1314,46 @@ class Processor:
for iri in self.subjects:
subject = self.subjects[iri]
for key in subject:
- if key != '@':
+ if key != _s:
# normalize to array for single codepath
- obj = subject[key]
- tmp = [obj] if not isinstance(obj, list) else obj
+ object = subject[key]
+ tmp = [object] if not isinstance(object, list) else object
for o in tmp:
- if (isinstance(o, dict) and
- '@iri' in o and
+ if (isinstance(o, dict) and '@iri' in o and
o['@iri'] in self.subjects):
objIri = o['@iri']
# map object to this subject
- refs[objIri]['allnodes'].append({ 's': iri, 'p': key })
+ refs[objIri]['all'].append({ 's': iri, 'p': key })
# map this subject to object
- props[iri]['allnodes'].append({ 's': objIri, 'p': key })
+ props[iri]['all'].append({ 's': objIri, 'p': key })
+ # create node filter function
def filterNodes(edge):
return _isBlankNodeIri(edge['s'])
# create sorted categories
for iri in refs:
- refs[iri]['allnodes'].sort(cmp=self.compareEdges)
- refs[iri]['bnodes'] = filter(filterNodes, refs[iri]['allnodes'])
+ refs[iri]['all'].sort(cmp=self.compareEdges)
+ refs[iri]['bnodes'] = filter(filterNodes, refs[iri]['all'])
for iri in props:
- props[iri]['allnodes'].sort(cmp=self.compareEdges)
- props[iri]['bnodes'] = filter(filterNodes, props[iri]['allnodes'])
+ props[iri]['all'].sort(cmp=self.compareEdges)
+ props[iri]['bnodes'] = filter(filterNodes, props[iri]['all'])
##
- # Recursively frames the given src according to the given frame.
+ # Frames JSON-LD input.
#
- # @param subjects a map of subjects in the graph.
- # @param src the input to frame.
- # @param frame the frame to use.
- # @param embeds a map of previously embedded subjects, used to prevent cycles.
- # @param options the framing options.
- #
- # @return the framed src.
- def _frame(self, subjects, src, frame, embeds, options):
- rval = None
-
- # prepare output, set limit, get array of frames
- limit = -1
- frames = None
- if isinstance(frame, list):
- rval = []
- frames = frame
- else:
- frames = [frame]
- limit = 1
-
- # iterate over frames adding src matches to list
- values = []
- for i in range(0, len(frames)):
- # get next frame
- frame = frames[i]
- if not isinstance(frame, (list, dict)):
- raise Exception('Invalid JSON-LD frame. Frame type is not a map or array.')
-
- # create array of values for each frame
- values.append([])
- for n in src:
- # add src to list if it matches frame specific type or duck-type
- if _isType(n, frame) or _isDuckType(n, frame):
- values[i].append(n)
- limit -= 1
- if limit == 0:
- break
- if limit == 0:
- break
-
- # FIXME: refactor to use python zip()
- # for each matching value, add it to the output
- for i1 in range(0, len(values)):
- for i2 in range(0, len(values[i1])):
- frame = frames[i1]
- value = values[i1][i2]
-
- # determine if value should be embedded or referenced
- embedOn = frame['@embed'] if '@embed' in frame else options['defaults']['embedOn']
-
- if not embedOn:
- # if value is a subject, only use subject IRI as reference
- if isinstance(value, dict) and '@' in value:
- value = value['@']
- elif (isinstance(value, dict) and '@' in value and
- value['@']['@iri'] in embeds):
-
- # TODO: possibly support multiple embeds in the future ... and
- # instead only prevent cycles?
- raise Exception(
- 'Multiple embeds of the same subject is not supported.',
- value['@']['@iri'])
-
- # if value is a subject, do embedding and subframing
- elif isinstance(value, dict) and '@' in value:
- embeds[value['@']['@iri']] = True
-
- # if explicit is on, remove keys from value that aren't in frame
- explicitOn = frame['@explicit'] if '@explicit' in frame else options['defaults']['explicitOn']
- if explicitOn:
- # python - iterate over copy of list to remove key
- for key in list(value):
- # always include subject
- if key != '@' and key not in frame:
- del value[key]
-
- # iterate over frame keys to do subframing
- for key in frame:
- # skip keywords and type query
- if key.find('@') != 0 and key != ns['rdf'] + 'type':
- if key in value:
- # build src and do recursion
- src = value[key] if isinstance(value[key], list) else [value[key]]
- for n in range(0, len(src)):
- # replace reference to subject w/subject
- if (isinstance(src[n], dict) and
- '@iri' in src[n] and
- src[n]['@iri'] in subjects):
- src[n] = subjects[src[n]['@iri']]
- value[key] = self._frame(
- subjects, src, frame[key], embeds, options)
- else:
- # add None property to value
- value[key] = None
-
- # add value to output
- if rval is None:
- rval = value
- else:
- rval.append(value)
-
- return rval
-
- ##
- # Frames JSON-LD src.
- #
- # @param src the JSON-LD input.
+ # @param input the JSON-LD input.
# @param frame the frame to use.
# @param options framing options to use.
#
# @return the framed output.
- def frame(self, src, frame, options=None):
+ def frame(self, input, frame, options=None):
rval = None
- # normalize src
- src = self.normalize(src)
+ # normalize input
+ input = self.normalize(input)
# save frame context
ctx = None
@@ -1411,11 +1375,11 @@ class Processor:
# build map of all subjects
subjects = {}
- for i in src:
- subjects[i['@']['@iri']] = i
+ for i in input:
+ subjects[i[_s]['@iri']] = i
- # frame src
- rval = self._frame(subjects, src, frame, {}, options)
+ # frame input
+ rval = _frame(subjects, input, frame, {}, options)
# apply context
if ctx is not None and rval is not None:
@@ -1425,37 +1389,261 @@ class Processor:
##
+# Returns True if the given source is a subject and has one of the given
+# types in the given frame.
+#
+# @param src the input.
+# @param frame the frame with types to look for.
+#
+# @return True if the src has one of the given types.
+def _isType(src, frame):
+ rval = False
+
+ # check if type(s) are specified in frame and src
+ rType = ns['rdf'] + 'type'
+ if (rType in frame and isinstance(src, dict) and _s in src and
+ rType in src):
+ tmp = src[rType] if isinstance(src[rType], list) else [src[rType]]
+ types = frame[rType] if isinstance(frame[rType], list) else [frame[rType]]
+
+ for t in range(0, len(types)):
+ rType = types[t]['@iri']
+ for i in tmp:
+ if i['@iri'] == rType:
+ rval = True
+ break
+ if rval:
+ break
+
+ return rval
+
+##
+# Returns True if the given src matches the given frame via duck-typing.
+#
+# @param src the input.
+# @param frame the frame to check against.
+#
+# @return True if the src matches the frame.
+def _isDuckType(src, frame):
+ rval = False
+
+ # frame must not have a specific type
+ rType = ns['rdf'] + 'type'
+ if rType not in frame:
+ # get frame properties that must exist on src
+ props = frame.keys()
+ if len(props) == 0:
+ # src always matches if there are no properties
+ rval = True
+ # src must be a subject with all the given properties
+ elif isinstance(src, dict) and _s in src:
+ rval = True
+ for i in props:
+ if i not in src:
+ rval = False
+ break
+
+ return rval
+
+##
+# Recursively frames the given input according to the given frame.
+#
+# @param subjects a map of subjects in the graph.
+# @param input the input to frame.
+# @param frame the frame to use.
+# @param embeds a map of previously embedded subjects, used to prevent cycles.
+# @param options the framing options.
+#
+# @return the framed input.
+def _frame(subjects, input, frame, embeds, options):
+ rval = None
+
+ # prepare output, set limit, get array of frames
+ limit = -1
+ frames = None
+ if isinstance(frame, list):
+ rval = []
+ frames = frame
+ else:
+ frames = [frame]
+ limit = 1
+
+ # iterate over frames adding input matches to list
+ values = []
+ for i in range(0, len(frames)):
+ # get next frame
+ frame = frames[i]
+ if not isinstance(frame, (list, dict)):
+ raise Exception('Invalid JSON-LD frame. Frame type is not a map or array.')
+
+ # create array of values for each frame
+ values.append([])
+ for n in input:
+ # add input to list if it matches frame specific type or duck-type
+ if _isType(n, frame) or _isDuckType(n, frame):
+ values[i].append(n)
+ limit -= 1
+ if limit == 0:
+ break
+ if limit == 0:
+ break
+
+ # FIXME: refactor to use python zip()
+ # for each matching value, add it to the output
+ for i1 in range(0, len(values)):
+ for i2 in range(0, len(values[i1])):
+ frame = frames[i1]
+ value = values[i1][i2]
+
+ # determine if value should be embedded or referenced
+ embedOn = frame['@embed'] if '@embed' in frame else options['defaults']['embedOn']
+
+ if not embedOn:
+ # if value is a subject, only use subject IRI as reference
+ if isinstance(value, dict) and _s in value:
+ value = value[_s]
+ elif (isinstance(value, dict) and _s in value and
+ value[_s]['@iri'] in embeds):
+
+ # TODO: possibly support multiple embeds in the future ... and
+ # instead only prevent cycles?
+ raise Exception(
+ 'Multiple embeds of the same subject is not supported.',
+ value[_s]['@iri'])
+
+ # if value is a subject, do embedding and subframing
+ elif isinstance(value, dict) and _s in value:
+ embeds[value[_s]['@iri']] = True
+
+ # if explicit is on, remove keys from value that aren't in frame
+ explicitOn = frame['@explicit'] if '@explicit' in frame else options['defaults']['explicitOn']
+ if explicitOn:
+ # python - iterate over copy of list to remove key
+ for key in list(value):
+ # always include subject
+ if key != _s and key not in frame:
+ del value[key]
+
+ # iterate over frame keys to do subframing
+ for key in frame:
+ # FIXME: javascript has '@' - js 2246
+ # skip keywords and type query
+ if key.find('@') != 0 and key != ns['rdf'] + 'type':
+ if key in value:
+ # build input and do recursion
+ input = value[key] if isinstance(value[key], list) else [value[key]]
+ for n in range(0, len(input)):
+ # replace reference to subject w/subject
+ if (isinstance(input[n], dict) and
+ '@iri' in input[n] and
+ input[n]['@iri'] in subjects):
+ input[n] = subjects[input[n]['@iri']]
+ value[key] = _frame(
+ subjects, input, frame[key], embeds, options)
+ else:
+ # add None property to value
+ value[key] = None
+
+ # add value to output
+ if rval is None:
+ rval = value
+ else:
+ rval.append(value)
+
+ return rval
+
+##
+# Rotates the elements in an array one position.
+#
+# @param a the array.
+def _rotate(a):
+ if len(a) > 0:
+ a.append(a.pop(0))
+
+##
+# Serializes the properties of the given bnode for its relation serialization.
+#
+# @param b the blank node.
+#
+# @return the serialized properties.
+def _serializeProperties(b):
+ rval = ''
+ for p in b:
+ if p != '@subject':
+ first = True
+ objs = b[p] if isinstance(b[p], list) else [b[p]]
+ for oi in objs:
+ if first:
+ first = False
+ else:
+ rval += '|'
+ if (isinstance(objs[oi], dict) and '@iri' in objs[oi] and
+ _isBlankNodeIri(objs[oi]['@iri'])):
+ rval += '_:'
+ else:
+ rval += json.dumps(objs[oi])
+ return rval
+
+##
+# Compares two serializations for the same blank node. If the two
+# serializations aren't complete enough to determine if they are equal (or if
+# they are actually equal), 0 is returned.
+#
+# @param s1 the first serialization.
+# @param s2 the second serialization.
+#
+# @return -1 if s1 < s2, 0 if s1 == s2 (or indeterminate), 1 if s1 > v2.
+def _compareSerializations(s1, s2):
+ rval = 0
+ if len(s1) == len(s2):
+ rval = _compare(s1, s2)
+ elif len(s1) > len(s2):
+ rval = _compare(s1[0:len(s2)], s2)
+ else:
+ rval = _compare(s1, s2[0:len(s1)])
+ return rval
+
+##
# Normalizes a JSON-LD object.
#
-# @param src the JSON-LD object to normalize.
+# @param input the JSON-LD object to normalize.
#
# @return the normalized JSON-LD object.
-def normalize(src):
- return Processor().normalize(src)
+def normalize(input):
+ return Processor().normalize(input)
##
# Removes the context from a JSON-LD object.
#
-# @param src the JSON-LD object to remove the context from.
+# @param input the JSON-LD object to remove the context from.
#
# @return the context-neutral JSON-LD object.
-def removeContext(src):
+def removeContext(input):
rval = None
- if src is not None:
+ if input is not None:
ctx = _createDefaultContext()
- rval = _expand(ctx, None, src, False)
+ rval = _expand(ctx, None, input, False)
return rval
##
+# Expands the JSON-LD object.
+#
+# @param input the JSON-LD object to expand.
+#
+# @return the expanded JSON-LD object.
+def expand(input):
+ return removeContext(input)
+
+##
# Adds the given context to the given context-neutral JSON-LD object.
#
# @param ctx the new context to use.
-# @param src the context-neutral JSON-LD object to add the context to.
+# @param input the context-neutral JSON-LD object to add the context to.
#
# @return the JSON-LD object with the new context.
-def addContext(ctx, src):
+def addContext(ctx, input):
rval = None
# TODO: should context simplification be optional? (ie: remove context
@@ -1467,7 +1655,7 @@ def addContext(ctx, src):
ctxOut = {}
# compact
- rval = _compact(ctx, None, src, ctxOut)
+ rval = _compact(ctx, None, input, ctxOut)
# add context if used
if len(ctxOut.keys()) > 0:
@@ -1481,16 +1669,26 @@ def addContext(ctx, src):
return rval
##
-# Changes the context of JSON-LD object "src" to "context", returning the
+# Changes the context of JSON-LD object "input" to "context", returning the
# output.
#
# @param ctx the new context to use.
-# @param src the input JSON-LD object.
+# @param input the input JSON-LD object.
#
# @return the output JSON-LD object.
-def changeContext(ctx, src):
+def changeContext(ctx, input):
# remove context and then add new one
- return jsonld.addContext(ctx, jsonld.removeContext(src))
+ return jsonld.addContext(ctx, jsonld.removeContext(input))
+
+##
+# Compacts the JSON-LD obejct.
+#
+# @param ctx the new context to use.
+# @param input the input JSON-LD object.
+#
+# @return the output JSON-LD object.
+def compact(ctx, input):
+ return changeContext(ctx, input)
##
# Merges one context with another.
@@ -1507,6 +1705,7 @@ def mergeContexts(ctx1, ctx2):
# if the new context contains any IRIs that are in the merged context,
# remove them from the merged context, they will be overwritten
for key in cCopy:
+ # FIXME: javascript has '@', should be _s? - js 2454
# ignore special keys starting with '@'
if key.find('@') != 0:
for mkey in cMerged:
@@ -1534,8 +1733,8 @@ def mergeContexts(ctx1, ctx2):
for key in cCopy:
cMerged[key] = cCopy[key]
+ # special-merge @coerce
if mergeCoerce or copyCoerce:
- # special-merge @coerce
for cType in c1:
# append existing-type properties that don't already exist
if cType in c2:
@@ -1591,6 +1790,7 @@ def mergeContexts(ctx1, ctx2):
# @param term the term to expand.
#
# @return the expanded term as an absolute IRI.
+# FIXME: should this actually just redefine the function? - js 2577
expandTerm = _expandTerm
##
@@ -1608,10 +1808,17 @@ def compactIri(ctx, iri):
##
# Frames JSON-LD input.
#
-# @param src the JSON-LD input.
+# @param input the JSON-LD input.
# @param frame the frame to use.
# @param options framing options to use.
#
# @return the framed output.
-def frame(src, frame, options=None):
- return Processor().frame(src, frame, options)
+def frame(input, frame, options=None):
+ return Processor().frame(input, frame, options)
+
+##
+# Creates the JSON-LD default context.
+#
+# @return the JSON-LD default context.
+# FIXME: should this actually just redefine the function? - js 2613
+createDefaultContext = _createDefaultContext
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-pyld.git
More information about the Python-modules-commits
mailing list