Skip to content

Commit eb9ea1b

Browse files
authored
Merge pull request #223 from linkml/issue-576
Allow for non-CURIE identifiers to be encoded.
2 parents 16ba202 + 7a096ab commit eb9ea1b

File tree

9 files changed

+420
-8
lines changed

9 files changed

+420
-8
lines changed

linkml_runtime/dumpers/rdflib_dumper.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import urllib
23
from abc import abstractmethod
34
from typing import Optional, Any, Dict
45

@@ -35,13 +36,18 @@ def as_rdf_graph(self, element: YAMLRoot, schemaview: SchemaView, prefix_map: Di
3536
logging.debug(f'PREFIXMAP={prefix_map}')
3637
if prefix_map:
3738
for k, v in prefix_map.items():
38-
schemaview.namespaces()[k] = v
39-
g.namespace_manager.bind(k, URIRef(v))
39+
if k == "@base":
40+
schemaview.namespaces()._base = v
41+
else:
42+
schemaview.namespaces()[k] = v
43+
g.namespace_manager.bind(k, URIRef(v))
4044
for prefix in schemaview.namespaces():
4145
g.bind(prefix, URIRef(schemaview.namespaces()[prefix]))
4246
else:
4347
for prefix in schemaview.namespaces():
4448
g.bind(prefix, URIRef(schemaview.namespaces()[prefix]))
49+
if schemaview.namespaces()._base:
50+
g.base = schemaview.namespaces()._base
4551
self.inject_triples(element, schemaview, g)
4652
return g
4753

@@ -84,14 +90,15 @@ def inject_triples(self, element: Any, schemaview: SchemaView, graph: Graph, tar
8490
return Literal(element)
8591
element_vars = {k: v for k, v in vars(element).items() if not k.startswith('_')}
8692
if len(element_vars) == 0:
87-
return URIRef(schemaview.expand_curie(str(element)))
93+
id_slot = schemaview.get_identifier_slot(target_type)
94+
return self._as_uri(element, id_slot.range, schemaview)
95+
#return URIRef(schemaview.expand_curie(str(element)))
8896
element_type = type(element)
8997
cn = element_type.class_name
9098
id_slot = schemaview.get_identifier_slot(cn)
9199
if id_slot is not None:
92100
element_id = getattr(element, id_slot.name)
93-
logging.debug(f'ELEMENT_ID={element_id} // {id_slot.name}')
94-
element_uri = namespaces.uri_for(element_id)
101+
element_uri = self._as_uri(element_id, id_slot.range, schemaview)
95102
else:
96103
element_uri = BNode()
97104
type_added = False
@@ -150,3 +157,9 @@ def dumps(self, element: YAMLRoot, schemaview: SchemaView = None,
150157
return self.as_rdf_graph(element, schemaview, prefix_map=prefix_map).\
151158
serialize(format=fmt)
152159

160+
def _as_uri(self, element_id: str, id_slot_range: str, schemaview: SchemaView) -> URIRef:
161+
if schemaview.is_type_percent_encoded(id_slot_range):
162+
return URIRef(urllib.parse.quote(element_id))
163+
else:
164+
return schemaview.namespaces().uri_for(element_id)
165+

linkml_runtime/loaders/rdflib_loader.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import urllib
23
from copy import copy
34
from dataclasses import dataclass
45
from typing import Optional, Any, Dict, Type, Union, TextIO, List, Tuple, Set
@@ -121,7 +122,11 @@ def from_rdf_graph(self, graph: Graph, schemaview: SchemaView, target_class: Typ
121122
v = Pointer(o)
122123
else:
123124
if ClassDefinition.class_name in range_applicable_elements:
124-
v = namespaces.curie_for(o)
125+
if slot.range in schemaview.all_classes():
126+
id_slot = schemaview.get_identifier_slot(slot.range)
127+
v = self._uri_to_id(o, id_slot, schemaview)
128+
else:
129+
v = namespaces.curie_for(o)
125130
if v is None:
126131
logging.debug(f'No CURIE for {p}={o} in {subject} [{subject_class}]')
127132
v = str(o)
@@ -201,7 +206,8 @@ def repl(v):
201206
def _get_id_dict(self, node: VALID_SUBJECT, schemaview: SchemaView, cn: ClassDefinitionName) -> ANYDICT:
202207
id_slot = schemaview.get_identifier_slot(cn)
203208
if not isinstance(node, BNode):
204-
id_val = schemaview.namespaces().curie_for(node)
209+
id_val = self._uri_to_id(node, id_slot, schemaview)
210+
#id_val = schemaview.namespaces().curie_for(node)
205211
if id_val == None:
206212
id_val = str(node)
207213
return {id_slot.name: id_val}
@@ -210,6 +216,12 @@ def _get_id_dict(self, node: VALID_SUBJECT, schemaview: SchemaView, cn: ClassDef
210216
raise Exception(f'Unexpected blank node {node}, type {cn} expects {id_slot.name} identifier')
211217
return {}
212218

219+
def _uri_to_id(self, node: VALID_SUBJECT, id_slot: SlotDefinition, schemaview: SchemaView) -> str:
220+
if schemaview.is_type_percent_encoded(id_slot.range):
221+
return urllib.parse.unquote(node).replace(schemaview.namespaces()._base, "")
222+
else:
223+
return schemaview.namespaces().curie_for(node)
224+
213225

214226
def load(self, source: Union[str, TextIO, Graph], target_class: Type[YAMLRoot], *,
215227
schemaview: SchemaView = None,

linkml_runtime/utils/schemaview.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ def slot_applicable_range_elements(self, slot: SlotDefinition) -> List[ClassDefi
12831283
raise ValueError(f'Unrecognized range: {r}')
12841284
return range_types
12851285

1286-
def slot_range_as_union(self, slot: SlotDefinition) -> List[EnumDefinitionName]:
1286+
def slot_range_as_union(self, slot: SlotDefinition) -> List[ElementName]:
12871287
"""
12881288
Returns all applicable ranges for a slot
12891289
@@ -1338,6 +1338,21 @@ def get_slots_by_enum(self, enum_name: ENUM_NAME = None) -> List[SlotDefinition]
13381338
enum_slots.append(slot_definition)
13391339
return enum_slots
13401340

1341+
def is_type_percent_encoded(self, type: TypeDefinitionName) -> bool:
1342+
"""
1343+
True if type is has a percent_encoded annotation.
1344+
1345+
This is true for type fields that are the range of identifier columns,
1346+
where the identifier is not guaranteed to be a valid URI or CURIE
1347+
1348+
:param type:
1349+
:return:
1350+
"""
1351+
id_slot_ranges = self.type_ancestors(type)
1352+
for t in id_slot_ranges:
1353+
anns = self.get_type(t).annotations
1354+
return "percent_encoded" in anns
1355+
13411356
@lru_cache()
13421357
def usage_index(self) -> Dict[ElementName, List[SchemaUsage]]:
13431358
"""
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
id: https://w3id.org/linkml/examples/personinfo
2+
name: personinfo
3+
prefixes:
4+
linkml: https://w3id.org/linkml/
5+
personinfo: https://w3id.org/linkml/personinfo/
6+
schema: http://schema.org/
7+
ex: https://example.org/
8+
imports:
9+
- linkml:types
10+
default_range: string
11+
default_prefix: personinfo
12+
13+
types:
14+
Code:
15+
typeof: string
16+
description: >-
17+
An identifier that is encoded in a string. This is used to represent
18+
identifiers that are not URIs, but are encoded as strings. For example,
19+
a person's social security number is an encoded identifier.
20+
annotations:
21+
prefix: "@base"
22+
percent_encoded: true
23+
24+
25+
classes:
26+
Person:
27+
class_uri: schema:Person
28+
attributes:
29+
id:
30+
identifier: true
31+
range: uriorcurie
32+
comments:
33+
- person IDs are natively encoded as uris or curies
34+
name:
35+
range: string
36+
slot_uri: schema:name
37+
friends:
38+
range: Person
39+
multivalued: true
40+
41+
Pet:
42+
attributes:
43+
id:
44+
identifier: true
45+
range: string
46+
comments:
47+
- pet IDs are natively encoded as strings without encoding
48+
- they are assumed to be in CURIE form
49+
name:
50+
range: string
51+
slot_uri: schema:name
52+
owner:
53+
range: Person
54+
slot_uri: schema:owner
55+
56+
Organization:
57+
class_uri: schema:Organization
58+
attributes:
59+
id:
60+
identifier: true
61+
range: Code
62+
comments:
63+
- organization IDs are natively encoded as strings with percent encoding
64+
- they are encoded before being turned into CURIEs
65+
name:
66+
range: string
67+
slot_uri: schema:name
68+
part_of:
69+
range: Organization
70+
multivalued: true
71+
72+
Dataset:
73+
attributes:
74+
persons:
75+
multivalued: true
76+
range: Person
77+
inlined: true
78+
organizations:
79+
multivalued: true
80+
range: Organization
81+
inlined: true
82+
pets:
83+
multivalued: true
84+
range: Pet
85+
inlined: true
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
persons:
2+
- id: ex:P1
3+
name: John Doe
4+
- id: ex:P2
5+
name: Jane Doe
6+
friends:
7+
- ex:P1
8+
organizations:
9+
- id: org 1
10+
name: Acme Inc. (US)
11+
- id: org 2
12+
name: Acme Inc. (UK)
13+
part_of:
14+
- org 1
15+
pets:
16+
- id: ex:PetA
17+
name: Fido
18+
owner: ex:P1
19+
- id: ex:PetB
20+
name: Spot
21+
owner: ex:P2

0 commit comments

Comments
 (0)