Skip to content

Commit e5c5ff2

Browse files
committed
update the >>update<< function to reuse BlankNode labels to refer to the same BlankNode
1 parent d5da755 commit e5c5ff2

File tree

2 files changed

+128
-10
lines changed

2 files changed

+128
-10
lines changed

quit/core.py

Lines changed: 95 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from pygit2 import GIT_MERGE_ANALYSIS_NORMAL
1010
from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT
1111

12+
import rdflib
1213
from rdflib import Graph, ConjunctiveGraph, BNode, Literal, URIRef
14+
import rdflib.plugins.parsers.ntriples as ntriples
15+
1316
import re
1417

1518
from quit.conf import Feature, QuitGraphConfiguration
@@ -189,7 +192,12 @@ def instance(self, reference, force=False):
189192
for blob in self.getFilesForCommit(commit):
190193
try:
191194
(name, oid) = blob
192-
(f, context) = self.getFileReferenceAndContext(blob, commit)
195+
result = self.getFileReferenceAndContext(blob, commit)
196+
try:
197+
(f, context, nameMap) = result
198+
except ValueError:
199+
print(result)
200+
193201
internal_identifier = context.identifier + '-' + str(oid)
194202

195203
if force or not self.config.hasFeature(Feature.Persistence):
@@ -330,13 +338,15 @@ def changeset(self, commit):
330338
blob = (entity.name, entity.oid)
331339

332340
try:
333-
f, context = self.getFileReferenceAndContext(blob, commit)
341+
f, context, nameMap = self.getFileReferenceAndContext(blob, commit)
334342
except KeyError:
335343
graph = Graph(identifier=graphUri)
336-
graph.parse(data=entity.content, format='nt')
344+
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
345+
source = rdflib.parser.create_input_source(data=entity.content)
346+
parserGraph.parse(source.getCharacterStream())
337347

338348
self._blobs.set(
339-
blob, (FileReference(entity.name, entity.content), graph)
349+
blob, (FileReference(entity.name, entity.content), graph, {})
340350
)
341351

342352
private_uri = QUIT["graph-{}".format(entity.oid)]
@@ -413,17 +423,68 @@ def getFileReferenceAndContext(self, blob, commit):
413423
content = commit.node(path=name).content
414424
graphUri = self._graphconfigs.get(commit.id).getgraphuriforfile(name)
415425
graph = Graph(identifier=URIRef(graphUri))
416-
graph.parse(data=content, format='nt')
417-
quitWorkingData = (FileReference(name, content), graph)
426+
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
427+
source = rdflib.parser.create_input_source(data=content)
428+
parserGraph.parse(source.getCharacterStream())
429+
nameMap = {v: k for k, v in parserGraph._bnode_ids.items()}
430+
quitWorkingData = (FileReference(name, content), graph, nameMap)
418431
self._blobs.set(blob, quitWorkingData)
419432
return quitWorkingData
420433
return self._blobs.get(blob)
421434

435+
def _replaceLabledBlankNodes(self, parsedQuery, parent_commit_ref):
436+
def replaceBlankNode(parsedQuery, nameMap):
437+
nameMap = {v: k for k, v in nameMap.items()}
438+
for update in parsedQuery:
439+
for graphURI in update['quads']:
440+
new_triples = []
441+
for triple in update['quads'][graphURI]:
442+
new_triple_subj = None
443+
new_triple_obj = None
444+
if isinstance(triple[0], rdflib.BNode):
445+
bNode_key = triple[0].n3()
446+
bNode_key = bNode_key[2:]
447+
if bNode_key in nameMap:
448+
new_triple_subj = nameMap[bNode_key]
449+
else:
450+
new_triple_subj = triple[0]
451+
nameMap[bNode_key] = triple[0]
452+
else:
453+
new_triple_subj = triple[0]
454+
if isinstance(triple[2], rdflib.BNode):
455+
bNode_key = triple[2].n3()
456+
bNode_key = bNode_key[2:]
457+
if bNode_key in nameMap:
458+
new_triple_obj = nameMap[bNode_key]
459+
else:
460+
new_triple_obj = triple[2]
461+
nameMap[bNode_key] = triple[2]
462+
else:
463+
new_triple_obj = triple[2]
464+
new_triples.append((new_triple_subj, triple[1], new_triple_obj))
465+
update['quads'][graphURI] = new_triples
466+
467+
parent_commit = self.repository.revision(parent_commit_ref)
468+
blobs = self.getFilesForCommit(parent_commit)
469+
for blob in blobs:
470+
(name, oid) = blob
471+
if(name == "graph.nt"):
472+
file_reference, context, nameMap = self.getFileReferenceAndContext(
473+
blob, parent_commit)
474+
replaceBlankNode(parsedQuery, nameMap)
475+
return nameMap
476+
422477
def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=None,
423478
default_graph=[], named_graph=[]):
424479
"""Apply an update query on the graph and the git repository."""
425480
graph, commitid = self.instance(parent_commit_ref)
481+
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
482+
nameMap = self._replaceLabledBlankNodes(parsedQuery, parent_commit_ref)
483+
key = next(iter(parsedQuery[0]['quads']))
484+
ptriples = {(x.n3(), y.n3(), z.n3()) for x, y, z in parsedQuery[0]['quads'][key]}
426485
resultingChanges, exception = graph.update(parsedQuery)
486+
self._replaceExplicitNamedBlankNodesInChanges(resultingChanges, nameMap)
487+
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
427488
if exception:
428489
# TODO need to revert or invalidate the graph at this point.
429490
pass
@@ -432,6 +493,7 @@ def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=N
432493
named_graph=named_graph)
433494
if exception:
434495
raise exception
496+
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
435497
return oid
436498

437499
def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=None,
@@ -494,7 +556,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non
494556

495557
# Update Cache and add new contexts to store
496558
blob = fileReference.path, index.stash[fileReference.path][0]
497-
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier)))
559+
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier), {}))
498560
blobs_new.add(blob)
499561
if graphconfig.mode == 'configuration':
500562
index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode())
@@ -541,12 +603,35 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k
541603
out.append('{}: "{}"'.format(k, v.replace('"', "\\\"")))
542604
return "\n".join(out)
543605

606+
def _replaceExplicitNamedBlankNodesInChanges(self, changes, nameMap):
607+
def lookUpBNode(bNode, nameMap):
608+
if(bNode in nameMap):
609+
return rdflib.BNode(nameMap[bNode])
610+
return bNode
611+
612+
def replaceBNodesByName(triple, nameMap):
613+
new_subject = triple[0]
614+
new_object = triple[2]
615+
if(isinstance(new_subject, BNode)):
616+
new_subject = lookUpBNode(new_subject, nameMap)
617+
if(isinstance(new_object, BNode)):
618+
new_object = lookUpBNode(new_object, nameMap)
619+
return (new_subject, triple[1], new_object)
620+
621+
622+
for change in changes:
623+
for context in change['delta']:
624+
for payload in change['delta'][context]:
625+
for i in range(0, len(payload[1])):
626+
payload[1][i] = replaceBNodesByName(payload[1][i], nameMap)
627+
544628
def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
545629
blobs_new = set()
546630
for blob in blobs:
547631
(fileName, oid) = blob
548632
try:
549-
file_reference, context = self.getFileReferenceAndContext(blob, parent_commit)
633+
file_reference, context, nameMap = self.getFileReferenceAndContext(
634+
blob, parent_commit)
550635
for entry in delta:
551636
changeset = entry['delta'].get(context.identifier, None)
552637

@@ -558,7 +643,7 @@ def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
558643

559644
self._blobs.remove(blob)
560645
blob = fileName, index.stash[file_reference.path][0]
561-
self._blobs.set(blob, (file_reference, context))
646+
self._blobs.set(blob, (file_reference, context, nameMap))
562647
blobs_new.add(blob)
563648
except KeyError:
564649
pass
@@ -580,7 +665,7 @@ def _applyUnknownGraphs(self, delta, known_blobs):
580665
n = [
581666
int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m
582667
] + [0]
583-
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n)+1)
668+
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n) + 1)
584669

585670
new_contexts[identifier] = FileReference(fileName, '')
586671

tests/test_app.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3841,6 +3841,39 @@ def testDeleteWithWhitespaceFile(self):
38413841
with open(path.join(repo.workdir, 'graph.nt'), 'r') as f:
38423842
self.assertEqual('\n', f.read())
38433843

3844+
def testUpdateWithBlankNode(self):
3845+
# Prepate a git Repository
3846+
graphContent = """<urn:x> <urn:y> <urn:z> .
3847+
_:a <urn:pred> _:c .
3848+
_:c <urn:pred> _:d .
3849+
"""
3850+
with TemporaryRepositoryFactory().withGraph("http://example.org/", graphContent) as repo:
3851+
3852+
# Start Quit
3853+
args = quitApp.getDefaults()
3854+
args['targetdir'] = repo.workdir
3855+
app = create_app(args).test_client()
3856+
3857+
with open(path.join(repo.workdir, 'graph.nt'), 'r') as f:
3858+
self.assertEqual(graphContent, f.read())
3859+
3860+
# execute Update query
3861+
update = 'INSERT DATA { GRAPH <http://example.org/> { _:c <urn:pred> _:e .}}'
3862+
result = app.post('/sparql',
3863+
content_type="application/sparql-update",
3864+
data=update)
3865+
targetContent = """
3866+
<urn:x> <urn:y> <urn:z> .
3867+
_:a <urn:pred> _:c .
3868+
_:c <urn:pred> _:d .
3869+
_:c <urn:pred> _:e .
3870+
"""
3871+
3872+
reference = repo.lookup_reference('refs/heads/%s' % "master")
3873+
branchOid = reference.resolve().target
3874+
branchCommit = repo.get(branchOid)
3875+
self.assertEqual(targetContent, branchCommit.tree["graph.nt"].data.decode("utf-8"))
3876+
38443877

38453878
if __name__ == '__main__':
38463879
unittest.main()

0 commit comments

Comments
 (0)