Skip to content

Commit 6bc4b6e

Browse files
committed
update the >>update<< function to reuse BlankNode labels to refer to the same BlankNode
1 parent d5da755 commit 6bc4b6e

File tree

2 files changed

+139
-10
lines changed

2 files changed

+139
-10
lines changed

quit/core.py

+106-10
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from pygit2 import GIT_MERGE_ANALYSIS_NORMAL
1010
from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT
1111

12+
import rdflib
1213
from rdflib import Graph, ConjunctiveGraph, BNode, Literal, URIRef
14+
import rdflib.plugins.parsers.ntriples as ntriples
15+
1316
import re
1417

1518
from quit.conf import Feature, QuitGraphConfiguration
@@ -189,7 +192,12 @@ def instance(self, reference, force=False):
189192
for blob in self.getFilesForCommit(commit):
190193
try:
191194
(name, oid) = blob
192-
(f, context) = self.getFileReferenceAndContext(blob, commit)
195+
result = self.getFileReferenceAndContext(blob, commit)
196+
try:
197+
(f, context, nameMap) = result
198+
except ValueError:
199+
print(result)
200+
193201
internal_identifier = context.identifier + '-' + str(oid)
194202

195203
if force or not self.config.hasFeature(Feature.Persistence):
@@ -330,13 +338,15 @@ def changeset(self, commit):
330338
blob = (entity.name, entity.oid)
331339

332340
try:
333-
f, context = self.getFileReferenceAndContext(blob, commit)
341+
f, context, nameMap = self.getFileReferenceAndContext(blob, commit)
334342
except KeyError:
335343
graph = Graph(identifier=graphUri)
336-
graph.parse(data=entity.content, format='nt')
344+
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
345+
source = rdflib.parser.create_input_source(data=entity.content)
346+
parserGraph.parse(source.getCharacterStream())
337347

338348
self._blobs.set(
339-
blob, (FileReference(entity.name, entity.content), graph)
349+
blob, (FileReference(entity.name, entity.content), graph, {})
340350
)
341351

342352
private_uri = QUIT["graph-{}".format(entity.oid)]
@@ -413,17 +423,74 @@ def getFileReferenceAndContext(self, blob, commit):
413423
content = commit.node(path=name).content
414424
graphUri = self._graphconfigs.get(commit.id).getgraphuriforfile(name)
415425
graph = Graph(identifier=URIRef(graphUri))
416-
graph.parse(data=content, format='nt')
417-
quitWorkingData = (FileReference(name, content), graph)
426+
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
427+
source = rdflib.parser.create_input_source(data=content)
428+
parserGraph.parse(source.getCharacterStream())
429+
nameMap = {v: k for k, v in parserGraph._bnode_ids.items()}
430+
quitWorkingData = (FileReference(name, content), graph, nameMap)
418431
self._blobs.set(blob, quitWorkingData)
419432
return quitWorkingData
420433
return self._blobs.get(blob)
421434

435+
def _replaceLabledBlankNodes(self, parsedQuery, parent_commit_ref):
436+
"""Replaces blanknodes in parsedQuery with Blanknodes that have the same label in the graph.nt
437+
E.g. We have a Graph with the content: '_:a <urn:pred> _:b'
438+
A BNode('a') found in parsedQuery would be replaced by the blanknode _:a found in the graph.nt.
439+
That way, updates can pass Blanknodes as instances and do not have to work on string representations.
440+
"""
441+
def replaceBlankNode(parsedQuery, nameMap):
442+
nameMap = {v: k for k, v in nameMap.items()}
443+
for update in parsedQuery:
444+
for graphURI in update['quads']:
445+
new_triples = []
446+
for triple in update['quads'][graphURI]:
447+
new_triple_subj = None
448+
new_triple_obj = None
449+
if isinstance(triple[0], rdflib.BNode):
450+
bNode_key = triple[0].n3()
451+
bNode_key = bNode_key[2:]
452+
if bNode_key in nameMap:
453+
new_triple_subj = nameMap[bNode_key]
454+
else:
455+
new_triple_subj = triple[0]
456+
nameMap[bNode_key] = triple[0]
457+
else:
458+
new_triple_subj = triple[0]
459+
if isinstance(triple[2], rdflib.BNode):
460+
bNode_key = triple[2].n3()
461+
bNode_key = bNode_key[2:]
462+
if bNode_key in nameMap:
463+
new_triple_obj = nameMap[bNode_key]
464+
else:
465+
new_triple_obj = triple[2]
466+
nameMap[bNode_key] = triple[2]
467+
else:
468+
new_triple_obj = triple[2]
469+
new_triples.append((new_triple_subj, triple[1], new_triple_obj))
470+
update['quads'][graphURI] = new_triples
471+
472+
if parent_commit_ref == None:
473+
return {}
474+
parent_commit = self.repository.revision(parent_commit_ref)
475+
blobs = self.getFilesForCommit(parent_commit)
476+
for blob in blobs:
477+
(name, oid) = blob
478+
if(name == "graph.nt"):
479+
file_reference, context, nameMap = self.getFileReferenceAndContext(
480+
blob, parent_commit)
481+
replaceBlankNode(parsedQuery, nameMap)
482+
return nameMap
483+
return {}
484+
422485
def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=None,
423486
default_graph=[], named_graph=[]):
424487
"""Apply an update query on the graph and the git repository."""
425488
graph, commitid = self.instance(parent_commit_ref)
489+
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
490+
nameMap = self._replaceLabledBlankNodes(parsedQuery, parent_commit_ref)
426491
resultingChanges, exception = graph.update(parsedQuery)
492+
self._replaceExplicitNamedBlankNodesInChanges(resultingChanges, nameMap)
493+
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
427494
if exception:
428495
# TODO need to revert or invalidate the graph at this point.
429496
pass
@@ -432,6 +499,7 @@ def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=N
432499
named_graph=named_graph)
433500
if exception:
434501
raise exception
502+
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
435503
return oid
436504

437505
def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=None,
@@ -494,7 +562,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non
494562

495563
# Update Cache and add new contexts to store
496564
blob = fileReference.path, index.stash[fileReference.path][0]
497-
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier)))
565+
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier), {}))
498566
blobs_new.add(blob)
499567
if graphconfig.mode == 'configuration':
500568
index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode())
@@ -541,12 +609,40 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k
541609
out.append('{}: "{}"'.format(k, v.replace('"', "\\\"")))
542610
return "\n".join(out)
543611

612+
def _replaceExplicitNamedBlankNodesInChanges(self, changes, nameMap):
613+
"""Any changes applied to the update query by _replaceLabledBlankNodes have to be reverted for git deltas.
614+
Otherwise the serialization results in Blanknodes being represented as random hashes instead of their original labels.
615+
"""
616+
def lookUpBNode(bNode, nameMap):
617+
if(bNode in nameMap):
618+
return rdflib.BNode(nameMap[bNode])
619+
return bNode
620+
621+
def replaceBNodesByName(triple, nameMap):
622+
new_subject = triple[0]
623+
new_object = triple[2]
624+
if(isinstance(new_subject, BNode)):
625+
new_subject = lookUpBNode(new_subject, nameMap)
626+
if(isinstance(new_object, BNode)):
627+
new_object = lookUpBNode(new_object, nameMap)
628+
return (new_subject, triple[1], new_object)
629+
630+
if len(nameMap) == 0:
631+
return
632+
for change in changes:
633+
for context in change['delta']:
634+
for payload in change['delta'][context]:
635+
if(isinstance(payload[1], list)):
636+
for i in range(0, len(payload[1])):
637+
payload[1][i] = replaceBNodesByName(payload[1][i], nameMap)
638+
544639
def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
545640
blobs_new = set()
546641
for blob in blobs:
547642
(fileName, oid) = blob
548643
try:
549-
file_reference, context = self.getFileReferenceAndContext(blob, parent_commit)
644+
file_reference, context, nameMap = self.getFileReferenceAndContext(
645+
blob, parent_commit)
550646
for entry in delta:
551647
changeset = entry['delta'].get(context.identifier, None)
552648

@@ -558,7 +654,7 @@ def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
558654

559655
self._blobs.remove(blob)
560656
blob = fileName, index.stash[file_reference.path][0]
561-
self._blobs.set(blob, (file_reference, context))
657+
self._blobs.set(blob, (file_reference, context, nameMap))
562658
blobs_new.add(blob)
563659
except KeyError:
564660
pass
@@ -580,7 +676,7 @@ def _applyUnknownGraphs(self, delta, known_blobs):
580676
n = [
581677
int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m
582678
] + [0]
583-
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n)+1)
679+
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n) + 1)
584680

585681
new_contexts[identifier] = FileReference(fileName, '')
586682

tests/test_app.py

+33
Original file line numberDiff line numberDiff line change
@@ -3841,6 +3841,39 @@ def testDeleteWithWhitespaceFile(self):
38413841
with open(path.join(repo.workdir, 'graph.nt'), 'r') as f:
38423842
self.assertEqual('\n', f.read())
38433843

3844+
def testUpdateWithBlankNode(self):
3845+
# Prepate a git Repository
3846+
graphContent = """<urn:x> <urn:y> <urn:z> .
3847+
_:a <urn:pred> _:c .
3848+
_:c <urn:pred> _:d .
3849+
"""
3850+
with TemporaryRepositoryFactory().withGraph("http://example.org/", graphContent) as repo:
3851+
3852+
# Start Quit
3853+
args = quitApp.getDefaults()
3854+
args['targetdir'] = repo.workdir
3855+
app = create_app(args).test_client()
3856+
3857+
with open(path.join(repo.workdir, 'graph.nt'), 'r') as f:
3858+
self.assertEqual(graphContent, f.read())
3859+
3860+
# execute Update query
3861+
update = 'INSERT DATA { GRAPH <http://example.org/> { _:c <urn:pred> _:e .}}'
3862+
result = app.post('/sparql',
3863+
content_type="application/sparql-update",
3864+
data=update)
3865+
targetContent = """
3866+
<urn:x> <urn:y> <urn:z> .
3867+
_:a <urn:pred> _:c .
3868+
_:c <urn:pred> _:d .
3869+
_:c <urn:pred> _:e .
3870+
"""
3871+
3872+
reference = repo.lookup_reference('refs/heads/%s' % "master")
3873+
branchOid = reference.resolve().target
3874+
branchCommit = repo.get(branchOid)
3875+
self.assertEqual(targetContent, branchCommit.tree["graph.nt"].data.decode("utf-8"))
3876+
38443877

38453878
if __name__ == '__main__':
38463879
unittest.main()

0 commit comments

Comments
 (0)