Skip to content

Commit fabdad2

Browse files
authored
Merge pull request #110 from opendata-swiss/feat/publisher_name_multilang
Implement dct:publisher as foaf:Agent
2 parents 124212a + 13df76d commit fabdad2

File tree

3 files changed

+80
-21
lines changed

3 files changed

+80
-21
lines changed

ckanext/dcatapchharvest/profiles.py

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -169,31 +169,73 @@ def _get_publisher_url_from_identifier(self, identifier):
169169

170170
def _publisher(self, subject, identifier):
171171
"""
172-
Returns a dict with details about a dct:publisher entity, a foaf:Agent
172+
Returns a dict with details about a dct:publisher entity,
173+
represented as a foaf:Agent.
173174
174-
Both subject and predicate must be rdflib URIRef or BNode objects
175+
Both `subject` and `predicate` must be rdflib URIRef or BNode objects.
176+
177+
Examples of supported RDF structures:
175178
176-
Examples:
179+
1. Basic Organization Representation (Legacy):
177180
178181
<dct:publisher>
179182
<foaf:Organization rdf:about="http://orgs.vocab.org/some-org">
180183
<foaf:name>Publishing Organization for dataset 1</foaf:name>
181184
</foaf:Organization>
182185
</dct:publisher>
183186
187+
Output:
184188
{
185189
'url': 'http://orgs.vocab.org/some-org',
186190
'name': 'Publishing Organization for dataset 1',
187191
}
188192
189-
Returns keys for url, name with the values set to
190-
an empty string if they could not be found
193+
2. Multilingual Agent Representation:
194+
195+
<dct:publisher>
196+
<foaf:Agent rdf:about="http://orgs.vocab.org/some-org">
197+
<foaf:name xml:lang="de">Wirtschaftsamt</foaf:name>
198+
<foaf:name xml:lang="it">Ufficio economico</foaf:name>
199+
<foaf:name xml:lang="fr">Bureau des economiques</foaf:name>
200+
<foaf:mbox rdf:resource="mailto:wirtschaftsamt@sh.ch"/>
201+
<foaf:homepage rdf:resource="https://some-org.org/info"/>
202+
</foaf:Agent>
203+
</dct:publisher>
204+
205+
The `name` field resolves directly using `multilang=True`,
206+
allowing for prioritized language selection.
207+
The `url` field prioritizes the `foaf:homepage` property and falls back
208+
to the `rdf:about` attribute of the Agent.
209+
210+
Returns:
211+
A JSON-encoded dictionary with keys:
212+
- `url`: The URL of the publisher (from `foaf:homepage` or `rdf:about`)
213+
- `name`: The resolved multilingual name using the `multilang=True`
214+
215+
If no valid data is found, the values for `url` and `name` will default
216+
to empty strings.
191217
"""
192218
publisher = {}
193219
for agent in self.g.objects(subject, DCT.publisher):
194-
publisher['url'] = (str(agent) if isinstance(agent,
195-
URIRef) else '')
196-
publisher_name = self._object_value(agent, FOAF.name)
220+
publisher['url'] = (
221+
self._object_value(agent, FOAF.homepage) or
222+
(str(agent) if isinstance(agent, URIRef) else '')
223+
)
224+
# detect if the agent is a foaf:Agent or foaf:Organization
225+
is_agent = (FOAF.Agent in self.g.objects(agent, RDF.type))
226+
is_organization = (
227+
FOAF.Organization in self.g.objects(agent, RDF.type))
228+
229+
if is_agent:
230+
# handle multilingual name for foaf:Agent
231+
publisher_name = self._object_value(agent, FOAF.name,
232+
multilang=True)
233+
elif is_organization:
234+
# handle single name for foaf:Organization
235+
publisher_name = self._object_value(agent, FOAF.name)
236+
else:
237+
publisher_name = None
238+
197239
publisher_deprecated = self._object_value(agent, RDFS.label)
198240
if publisher_name:
199241
publisher['name'] = publisher_name
@@ -1124,18 +1166,33 @@ def _accrual_periodicity_to_graph(self, dataset_ref, accrual_periodicity):
11241166
))
11251167

11261168
def _publisher_to_graph(self, dataset_ref, dataset_dict):
1169+
""" Supporting both FOAF.Agent (with multilingual names)
1170+
and FOAF.Organization (with a single name)
1171+
"""
11271172
g = self.g
11281173
publisher_uri, publisher_name = \
11291174
dh.get_publisher_dict_from_dataset(
11301175
dataset_dict.get('publisher')
11311176
)
1132-
if publisher_uri:
1133-
publisher_ref = URIRef(publisher_uri)
1177+
1178+
# determine publisher structure FOAF.Agent or FOAF.Organization
1179+
if isinstance(publisher_name, dict):
1180+
entity_type = FOAF.Agent
1181+
publisher_ref = URIRef(publisher_uri) if publisher_uri else BNode()
1182+
1183+
g.add((publisher_ref, RDF.type, entity_type))
1184+
for lang, name in publisher_name.items():
1185+
if name: # check if the name is not empty
1186+
g.add((publisher_ref, FOAF.name, Literal(name, lang=lang)))
11341187
else:
1135-
publisher_ref = BNode()
1136-
g.add((publisher_ref, RDF.type, FOAF.Organization))
1137-
if publisher_name:
1138-
g.add((publisher_ref, FOAF.name, Literal(publisher_name)))
1188+
entity_type = FOAF.Organization
1189+
publisher_ref = URIRef(publisher_uri) if publisher_uri else BNode()
1190+
1191+
g.add((publisher_ref, RDF.type, entity_type))
1192+
if publisher_name:
1193+
g.add((publisher_ref, FOAF.name, Literal(publisher_name)))
1194+
1195+
# link the publisher to the dataset
11391196
g.add((dataset_ref, DCT.publisher, publisher_ref))
11401197

11411198

ckanext/dcatapchharvest/tests/fixtures/conformant/dataset-publisher.xml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77
<dcat:Dataset rdf:about="https://swisstopo/123">
88
<dct:identifier>346265-fr@bundesamt-fur-statistik-bfs</dct:identifier>
99
<dct:publisher>
10-
<foaf:Organization rdf:about="https://swisstopo">
11-
<foaf:name xml:lang="de">Landesamt Topographie Swisstopo</foaf:name>
12-
</foaf:Organization>
10+
<foaf:Agent rdf:about="http://orgs.vocab.org/some-org">
11+
<foaf:name xml:lang="de">Wirtschaftsamt</foaf:name>
12+
<foaf:name xml:lang="it">Ufficio economico</foaf:name>
13+
<foaf:name xml:lang="fr">Bureau des economiques</foaf:name>
14+
<foaf:mbox rdf:resource="mailto:wirtschaftsamt@sh.ch"/>
15+
<foaf:homepage rdf:resource="https://some-org.org/info"/>
16+
</foaf:Agent>
1317
</dct:publisher>
14-
1518
</dcat:Dataset>
16-
1719
</rdf:RDF>

ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_conformant_rdf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,5 @@ def test_dcatap_conformant_publisher_import(self):
2222
p.parse(contents)
2323
dataset = [d for d in p.datasets()][0]
2424
publisher = json.loads(dataset['publisher'])
25-
eq_(publisher['name'], 'Landesamt Topographie Swisstopo')
26-
eq_(publisher['url'], 'https://swisstopo')
25+
eq_(publisher['name'], {'fr': 'Bureau des economiques', 'de': 'Wirtschaftsamt', 'en': '', 'it': 'Ufficio economico'})
26+
eq_(publisher['url'], 'https://some-org.org/info')

0 commit comments

Comments
 (0)