9
9
from pygit2 import GIT_MERGE_ANALYSIS_NORMAL
10
10
from pygit2 import GIT_SORT_REVERSE , GIT_RESET_HARD , GIT_STATUS_CURRENT
11
11
12
+ import rdflib
12
13
from rdflib import Graph , ConjunctiveGraph , BNode , Literal , URIRef
14
+ import rdflib .plugins .parsers .ntriples as ntriples
15
+
13
16
import re
14
17
15
18
from quit .conf import Feature , QuitGraphConfiguration
@@ -189,7 +192,12 @@ def instance(self, reference, force=False):
189
192
for blob in self .getFilesForCommit (commit ):
190
193
try :
191
194
(name , oid ) = blob
192
- (f , context ) = self .getFileReferenceAndContext (blob , commit )
195
+ result = self .getFileReferenceAndContext (blob , commit )
196
+ try :
197
+ (f , context , nameMap ) = result
198
+ except ValueError :
199
+ print (result )
200
+
193
201
internal_identifier = context .identifier + '-' + str (oid )
194
202
195
203
if force or not self .config .hasFeature (Feature .Persistence ):
@@ -330,13 +338,15 @@ def changeset(self, commit):
330
338
blob = (entity .name , entity .oid )
331
339
332
340
try :
333
- f , context = self .getFileReferenceAndContext (blob , commit )
341
+ f , context , nameMap = self .getFileReferenceAndContext (blob , commit )
334
342
except KeyError :
335
343
graph = Graph (identifier = graphUri )
336
- graph .parse (data = entity .content , format = 'nt' )
344
+ parserGraph = ntriples .W3CNTriplesParser (ntriples .NTGraphSink (graph ))
345
+ source = rdflib .parser .create_input_source (data = entity .content )
346
+ parserGraph .parse (source .getCharacterStream ())
337
347
338
348
self ._blobs .set (
339
- blob , (FileReference (entity .name , entity .content ), graph )
349
+ blob , (FileReference (entity .name , entity .content ), graph , {} )
340
350
)
341
351
342
352
private_uri = QUIT ["graph-{}" .format (entity .oid )]
@@ -413,17 +423,74 @@ def getFileReferenceAndContext(self, blob, commit):
413
423
content = commit .node (path = name ).content
414
424
graphUri = self ._graphconfigs .get (commit .id ).getgraphuriforfile (name )
415
425
graph = Graph (identifier = URIRef (graphUri ))
416
- graph .parse (data = content , format = 'nt' )
417
- quitWorkingData = (FileReference (name , content ), graph )
426
+ parserGraph = ntriples .W3CNTriplesParser (ntriples .NTGraphSink (graph ))
427
+ source = rdflib .parser .create_input_source (data = content )
428
+ parserGraph .parse (source .getCharacterStream ())
429
+ nameMap = {v : k for k , v in parserGraph ._bnode_ids .items ()}
430
+ quitWorkingData = (FileReference (name , content ), graph , nameMap )
418
431
self ._blobs .set (blob , quitWorkingData )
419
432
return quitWorkingData
420
433
return self ._blobs .get (blob )
421
434
435
+ def _replaceLabledBlankNodes (self , parsedQuery , parent_commit_ref ):
436
+ """Replaces blanknodes in parsedQuery with Blanknodes that have the same label in the graph.nt
437
+ E.g. We have a Graph with the content: '_:a <urn:pred> _:b'
438
+ A BNode('a') found in parsedQuery would be replaced by the blanknode _:a found in the graph.nt.
439
+ That way, updates can pass Blanknodes as instances and do not have to work on string representations.
440
+ """
441
+ def replaceBlankNode (parsedQuery , nameMap ):
442
+ nameMap = {v : k for k , v in nameMap .items ()}
443
+ for update in parsedQuery :
444
+ for graphURI in update ['quads' ]:
445
+ new_triples = []
446
+ for triple in update ['quads' ][graphURI ]:
447
+ new_triple_subj = None
448
+ new_triple_obj = None
449
+ if isinstance (triple [0 ], rdflib .BNode ):
450
+ bNode_key = triple [0 ].n3 ()
451
+ bNode_key = bNode_key [2 :]
452
+ if bNode_key in nameMap :
453
+ new_triple_subj = nameMap [bNode_key ]
454
+ else :
455
+ new_triple_subj = triple [0 ]
456
+ nameMap [bNode_key ] = triple [0 ]
457
+ else :
458
+ new_triple_subj = triple [0 ]
459
+ if isinstance (triple [2 ], rdflib .BNode ):
460
+ bNode_key = triple [2 ].n3 ()
461
+ bNode_key = bNode_key [2 :]
462
+ if bNode_key in nameMap :
463
+ new_triple_obj = nameMap [bNode_key ]
464
+ else :
465
+ new_triple_obj = triple [2 ]
466
+ nameMap [bNode_key ] = triple [2 ]
467
+ else :
468
+ new_triple_obj = triple [2 ]
469
+ new_triples .append ((new_triple_subj , triple [1 ], new_triple_obj ))
470
+ update ['quads' ][graphURI ] = new_triples
471
+
472
+ if parent_commit_ref == None :
473
+ return {}
474
+ parent_commit = self .repository .revision (parent_commit_ref )
475
+ blobs = self .getFilesForCommit (parent_commit )
476
+ for blob in blobs :
477
+ (name , oid ) = blob
478
+ if (name == "graph.nt" ):
479
+ file_reference , context , nameMap = self .getFileReferenceAndContext (
480
+ blob , parent_commit )
481
+ replaceBlankNode (parsedQuery , nameMap )
482
+ return nameMap
483
+ return {}
484
+
422
485
def applyQueryOnCommit (self , parsedQuery , parent_commit_ref , target_ref , query = None ,
423
486
default_graph = [], named_graph = []):
424
487
"""Apply an update query on the graph and the git repository."""
425
488
graph , commitid = self .instance (parent_commit_ref )
489
+ triples = {(x .n3 (), y .n3 (), z .n3 ()) for x , y , z in graph .store }
490
+ nameMap = self ._replaceLabledBlankNodes (parsedQuery , parent_commit_ref )
426
491
resultingChanges , exception = graph .update (parsedQuery )
492
+ self ._replaceExplicitNamedBlankNodesInChanges (resultingChanges , nameMap )
493
+ triples = {(x .n3 (), y .n3 (), z .n3 ()) for x , y , z in graph .store }
427
494
if exception :
428
495
# TODO need to revert or invalidate the graph at this point.
429
496
pass
@@ -432,6 +499,7 @@ def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=N
432
499
named_graph = named_graph )
433
500
if exception :
434
501
raise exception
502
+ triples = {(x .n3 (), y .n3 (), z .n3 ()) for x , y , z in graph .store }
435
503
return oid
436
504
437
505
def commit (self , graph , delta , message , parent_commit_ref , target_ref , query = None ,
@@ -494,7 +562,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non
494
562
495
563
# Update Cache and add new contexts to store
496
564
blob = fileReference .path , index .stash [fileReference .path ][0 ]
497
- self ._blobs .set (blob , (fileReference , graph .store .get_context (identifier )))
565
+ self ._blobs .set (blob , (fileReference , graph .store .get_context (identifier ), {} ))
498
566
blobs_new .add (blob )
499
567
if graphconfig .mode == 'configuration' :
500
568
index .add ('config.ttl' , new_config .graphconf .serialize (format = 'turtle' ).decode ())
@@ -541,12 +609,40 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k
541
609
out .append ('{}: "{}"' .format (k , v .replace ('"' , "\\ \" " )))
542
610
return "\n " .join (out )
543
611
612
+ def _replaceExplicitNamedBlankNodesInChanges (self , changes , nameMap ):
613
+ """Any changes applied to the update query by _replaceLabledBlankNodes have to be reverted for git deltas.
614
+ Otherwise the serialization results in Blanknodes being represented as random hashes instead of their original labels.
615
+ """
616
+ def lookUpBNode (bNode , nameMap ):
617
+ if (bNode in nameMap ):
618
+ return rdflib .BNode (nameMap [bNode ])
619
+ return bNode
620
+
621
+ def replaceBNodesByName (triple , nameMap ):
622
+ new_subject = triple [0 ]
623
+ new_object = triple [2 ]
624
+ if (isinstance (new_subject , BNode )):
625
+ new_subject = lookUpBNode (new_subject , nameMap )
626
+ if (isinstance (new_object , BNode )):
627
+ new_object = lookUpBNode (new_object , nameMap )
628
+ return (new_subject , triple [1 ], new_object )
629
+
630
+ if len (nameMap ) == 0 :
631
+ return
632
+ for change in changes :
633
+ for context in change ['delta' ]:
634
+ for payload in change ['delta' ][context ]:
635
+ if (isinstance (payload [1 ], list )):
636
+ for i in range (0 , len (payload [1 ])):
637
+ payload [1 ][i ] = replaceBNodesByName (payload [1 ][i ], nameMap )
638
+
544
639
def _applyKnownGraphs (self , delta , blobs , parent_commit , index ):
545
640
blobs_new = set ()
546
641
for blob in blobs :
547
642
(fileName , oid ) = blob
548
643
try :
549
- file_reference , context = self .getFileReferenceAndContext (blob , parent_commit )
644
+ file_reference , context , nameMap = self .getFileReferenceAndContext (
645
+ blob , parent_commit )
550
646
for entry in delta :
551
647
changeset = entry ['delta' ].get (context .identifier , None )
552
648
@@ -558,7 +654,7 @@ def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
558
654
559
655
self ._blobs .remove (blob )
560
656
blob = fileName , index .stash [file_reference .path ][0 ]
561
- self ._blobs .set (blob , (file_reference , context ))
657
+ self ._blobs .set (blob , (file_reference , context , nameMap ))
562
658
blobs_new .add (blob )
563
659
except KeyError :
564
660
pass
@@ -580,7 +676,7 @@ def _applyUnknownGraphs(self, delta, known_blobs):
580
676
n = [
581
677
int (m .group (1 )) for b in known_blobs for m in [reg .search (b )] if m
582
678
] + [0 ]
583
- fileName = '{}_{}.nt' .format (iri_to_name (identifier ), max (n )+ 1 )
679
+ fileName = '{}_{}.nt' .format (iri_to_name (identifier ), max (n ) + 1 )
584
680
585
681
new_contexts [identifier ] = FileReference (fileName , '' )
586
682
0 commit comments