-
-
Save niklasl/7873635 to your computer and use it in GitHub Desktop.
| from rdflib import * | |
| SDO = Namespace("http://schema.org/") | |
| datatype_coerce_map = { | |
| #SDO.Number: XSD.double, | |
| SDO.Date: 'xsd:date', | |
| SDO.DateTime: "xsd:dateTime", | |
| } | |
| def make_context(graph, use_vocab=False, dt_coercion=False, object_coercion=False): | |
| ctx = {'xsd': unicode(XSD)} | |
| if use_vocab: | |
| ctx['@vocab'] = SDO | |
| else: | |
| for cls in graph[:RDF.type:RDFS.Class]: | |
| term_key = graph.value(cls, RDFS.label) | |
| ctx[term_key] = unicode(cls) | |
| for prop in graph[:RDF.type:RDF.Property]: | |
| term_key = graph.value(prop, RDFS.label) | |
| ranges = list(graph.objects(prop, SDO.rangeIncludes)) | |
| coercion = None | |
| if len(ranges) == 1: | |
| if ranges[0] == SDO.URL: | |
| coercion = "@id" | |
| elif dt_coercion: | |
| coercion = datatype_coerce_map.get(ranges[0]) | |
| elif object_coercion and not any(SDO.DataType in | |
| graph.objects(rng, RDFS.subClassOf*'*') for rng in ranges): | |
| coercion = "@id" | |
| if coercion: | |
| dfn = ctx[term_key] = {"@type": coercion} | |
| if not use_vocab: | |
| dfn["@id"] = unicode(prop) | |
| elif not use_vocab: | |
| ctx[term_key] = unicode(prop) | |
| return {"@context": ctx} | |
| if __name__ == '__main__': | |
| from sys import argv | |
| import json | |
| from rdflib.util import guess_format | |
| args = argv[1:] | |
| source = args.pop(0) | |
| use_vocab = '-V' not in args | |
| dt_coercion = '-d' in args | |
| object_coercion = '-o' in args | |
| graph = Graph().parse(source, format=guess_format(source)) | |
| context = make_context(graph, use_vocab, dt_coercion, object_coercion) | |
| s = json.dumps(context, sort_keys=True, indent=2, separators=(',', ': '), | |
| ensure_ascii=False).encode('utf-8') | |
| import re | |
| print re.sub(r'{\s+(\S+: "[^"]+")\s+}', r'{\1}', s) |
Did an update to the coercion controls. The script now outputs a small, limited context by default (where only exclusive URL properties are coerced to @id). Use flag -d to turn on datatype coercion (defined for Date and DateTime), and flag -o to add @id coercion for all other properties which don't have any DataType class as a possible range.
This generates an JSON-LD @context from elasticearch mappings with rudimentary xsd: type mappings: https://github.com/westurner/elasticsearchjsonld/blob/master/elasticsearchjsonld/elasticsearchjsonld.py
The TopBraid RDF versions of the schema.org ontology can be transformed to JSON-LD (e.g. with rdfpipe or a short pyld script w/ framing and compaction, etc.), but do lag just a bit (due to lack of build automation integration)): http://topbraid.org/schema/
Test build: http://sdo-context-test.appspot.com/