99from pygit2 import GIT_MERGE_ANALYSIS_NORMAL
1010from pygit2 import GIT_SORT_REVERSE , GIT_RESET_HARD , GIT_STATUS_CURRENT
1111
12+ import rdflib
1213from rdflib import Graph , ConjunctiveGraph , BNode , Literal , URIRef
14+ import rdflib .plugins .parsers .ntriples as ntriples
15+
1316import re
1417
1518from quit .conf import Feature , QuitGraphConfiguration
@@ -189,7 +192,12 @@ def instance(self, reference, force=False):
189192 for blob in self .getFilesForCommit (commit ):
190193 try :
191194 (name , oid ) = blob
192- (f , context ) = self .getFileReferenceAndContext (blob , commit )
195+ result = self .getFileReferenceAndContext (blob , commit )
196+ try :
197+ (f , context , nameMap ) = result
198+ except ValueError :
199+ print (result )
200+
193201 internal_identifier = context .identifier + '-' + str (oid )
194202
195203 if force or not self .config .hasFeature (Feature .Persistence ):
@@ -330,13 +338,15 @@ def changeset(self, commit):
330338 blob = (entity .name , entity .oid )
331339
332340 try :
333- f , context = self .getFileReferenceAndContext (blob , commit )
341+ f , context , nameMap = self .getFileReferenceAndContext (blob , commit )
334342 except KeyError :
335343 graph = Graph (identifier = graphUri )
336- graph .parse (data = entity .content , format = 'nt' )
344+ parserGraph = ntriples .W3CNTriplesParser (ntriples .NTGraphSink (graph ))
345+ source = rdflib .parser .create_input_source (data = entity .content )
346+ parserGraph .parse (source .getCharacterStream ())
337347
338348 self ._blobs .set (
339- blob , (FileReference (entity .name , entity .content ), graph )
349+ blob , (FileReference (entity .name , entity .content ), graph , {} )
340350 )
341351
342352 private_uri = QUIT ["graph-{}" .format (entity .oid )]
@@ -413,17 +423,74 @@ def getFileReferenceAndContext(self, blob, commit):
413423 content = commit .node (path = name ).content
414424 graphUri = self ._graphconfigs .get (commit .id ).getgraphuriforfile (name )
415425 graph = Graph (identifier = URIRef (graphUri ))
416- graph .parse (data = content , format = 'nt' )
417- quitWorkingData = (FileReference (name , content ), graph )
426+ parserGraph = ntriples .W3CNTriplesParser (ntriples .NTGraphSink (graph ))
427+ source = rdflib .parser .create_input_source (data = content )
428+ parserGraph .parse (source .getCharacterStream ())
429+ nameMap = {v : k for k , v in parserGraph ._bnode_ids .items ()}
430+ quitWorkingData = (FileReference (name , content ), graph , nameMap )
418431 self ._blobs .set (blob , quitWorkingData )
419432 return quitWorkingData
420433 return self ._blobs .get (blob )
421434
435+ def _replaceLabledBlankNodes (self , parsedQuery , parent_commit_ref ):
436+ """Replaces blanknodes in parsedQuery with Blanknodes that have the same label in the graph.nt
437+ E.g. We have a Graph with the content: '_:a <urn:pred> _:b'
438+ A BNode('a') found in parsedQuery would be replaced by the blanknode _:a found in the graph.nt.
439+ That way, updates can pass Blanknodes as instances and do not have to work on string representations.
440+ """
441+ def replaceBlankNode (parsedQuery , nameMap ):
442+ nameMap = {v : k for k , v in nameMap .items ()}
443+ for update in parsedQuery :
444+ for graphURI in update ['quads' ]:
445+ new_triples = []
446+ for triple in update ['quads' ][graphURI ]:
447+ new_triple_subj = None
448+ new_triple_obj = None
449+ if isinstance (triple [0 ], rdflib .BNode ):
450+ bNode_key = triple [0 ].n3 ()
451+ bNode_key = bNode_key [2 :]
452+ if bNode_key in nameMap :
453+ new_triple_subj = nameMap [bNode_key ]
454+ else :
455+ new_triple_subj = triple [0 ]
456+ nameMap [bNode_key ] = triple [0 ]
457+ else :
458+ new_triple_subj = triple [0 ]
459+ if isinstance (triple [2 ], rdflib .BNode ):
460+ bNode_key = triple [2 ].n3 ()
461+ bNode_key = bNode_key [2 :]
462+ if bNode_key in nameMap :
463+ new_triple_obj = nameMap [bNode_key ]
464+ else :
465+ new_triple_obj = triple [2 ]
466+ nameMap [bNode_key ] = triple [2 ]
467+ else :
468+ new_triple_obj = triple [2 ]
469+ new_triples .append ((new_triple_subj , triple [1 ], new_triple_obj ))
470+ update ['quads' ][graphURI ] = new_triples
471+
472+ if parent_commit_ref == None :
473+ return {}
474+ parent_commit = self .repository .revision (parent_commit_ref )
475+ blobs = self .getFilesForCommit (parent_commit )
476+ for blob in blobs :
477+ (name , oid ) = blob
478+ if (name == "graph.nt" ):
479+ file_reference , context , nameMap = self .getFileReferenceAndContext (
480+ blob , parent_commit )
481+ replaceBlankNode (parsedQuery , nameMap )
482+ return nameMap
483+ return {}
484+
422485 def applyQueryOnCommit (self , parsedQuery , parent_commit_ref , target_ref , query = None ,
423486 default_graph = [], named_graph = []):
424487 """Apply an update query on the graph and the git repository."""
425488 graph , commitid = self .instance (parent_commit_ref )
489+ triples = {(x .n3 (), y .n3 (), z .n3 ()) for x , y , z in graph .store }
490+ nameMap = self ._replaceLabledBlankNodes (parsedQuery , parent_commit_ref )
426491 resultingChanges , exception = graph .update (parsedQuery )
492+ self ._replaceExplicitNamedBlankNodesInChanges (resultingChanges , nameMap )
493+ triples = {(x .n3 (), y .n3 (), z .n3 ()) for x , y , z in graph .store }
427494 if exception :
428495 # TODO need to revert or invalidate the graph at this point.
429496 pass
@@ -432,6 +499,7 @@ def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=N
432499 named_graph = named_graph )
433500 if exception :
434501 raise exception
502+ triples = {(x .n3 (), y .n3 (), z .n3 ()) for x , y , z in graph .store }
435503 return oid
436504
437505 def commit (self , graph , delta , message , parent_commit_ref , target_ref , query = None ,
@@ -494,7 +562,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non
494562
495563 # Update Cache and add new contexts to store
496564 blob = fileReference .path , index .stash [fileReference .path ][0 ]
497- self ._blobs .set (blob , (fileReference , graph .store .get_context (identifier )))
565+ self ._blobs .set (blob , (fileReference , graph .store .get_context (identifier ), {} ))
498566 blobs_new .add (blob )
499567 if graphconfig .mode == 'configuration' :
500568 index .add ('config.ttl' , new_config .graphconf .serialize (format = 'turtle' ).decode ())
@@ -541,12 +609,40 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k
541609 out .append ('{}: "{}"' .format (k , v .replace ('"' , "\\ \" " )))
542610 return "\n " .join (out )
543611
612+ def _replaceExplicitNamedBlankNodesInChanges (self , changes , nameMap ):
613+ """Any changes applied to the update query by _replaceLabledBlankNodes have to be reverted for git deltas.
614+ Otherwise the serialization results in Blanknodes being represented as random hashes instead of their original labels.
615+ """
616+ def lookUpBNode (bNode , nameMap ):
617+ if (bNode in nameMap ):
618+ return rdflib .BNode (nameMap [bNode ])
619+ return bNode
620+
621+ def replaceBNodesByName (triple , nameMap ):
622+ new_subject = triple [0 ]
623+ new_object = triple [2 ]
624+ if (isinstance (new_subject , BNode )):
625+ new_subject = lookUpBNode (new_subject , nameMap )
626+ if (isinstance (new_object , BNode )):
627+ new_object = lookUpBNode (new_object , nameMap )
628+ return (new_subject , triple [1 ], new_object )
629+
630+ if len (nameMap ) == 0 :
631+ return
632+ for change in changes :
633+ for context in change ['delta' ]:
634+ for payload in change ['delta' ][context ]:
635+ if (isinstance (payload [1 ], list )):
636+ for i in range (0 , len (payload [1 ])):
637+ payload [1 ][i ] = replaceBNodesByName (payload [1 ][i ], nameMap )
638+
544639 def _applyKnownGraphs (self , delta , blobs , parent_commit , index ):
545640 blobs_new = set ()
546641 for blob in blobs :
547642 (fileName , oid ) = blob
548643 try :
549- file_reference , context = self .getFileReferenceAndContext (blob , parent_commit )
644+ file_reference , context , nameMap = self .getFileReferenceAndContext (
645+ blob , parent_commit )
550646 for entry in delta :
551647 changeset = entry ['delta' ].get (context .identifier , None )
552648
@@ -558,7 +654,7 @@ def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
558654
559655 self ._blobs .remove (blob )
560656 blob = fileName , index .stash [file_reference .path ][0 ]
561- self ._blobs .set (blob , (file_reference , context ))
657+ self ._blobs .set (blob , (file_reference , context , nameMap ))
562658 blobs_new .add (blob )
563659 except KeyError :
564660 pass
@@ -580,7 +676,7 @@ def _applyUnknownGraphs(self, delta, known_blobs):
580676 n = [
581677 int (m .group (1 )) for b in known_blobs for m in [reg .search (b )] if m
582678 ] + [0 ]
583- fileName = '{}_{}.nt' .format (iri_to_name (identifier ), max (n )+ 1 )
679+ fileName = '{}_{}.nt' .format (iri_to_name (identifier ), max (n ) + 1 )
584680
585681 new_contexts [identifier ] = FileReference (fileName , '' )
586682
0 commit comments