@@ -169,31 +169,60 @@ def _get_publisher_url_from_identifier(self, identifier):
169
169
170
170
def _publisher (self , subject , identifier ):
171
171
"""
172
- Returns a dict with details about a dct:publisher entity, a foaf:Agent
172
+ Returns a dict with details about a dct:publisher entity,
173
+ represented as a foaf:Agent.
173
174
174
- Both subject and predicate must be rdflib URIRef or BNode objects
175
+ Both `subject` and `predicate` must be rdflib URIRef or BNode objects.
176
+
177
+ Examples of supported RDF structures:
175
178
176
- Examples :
179
+ 1. Basic Organization Representation (Legacy) :
177
180
178
181
<dct:publisher>
179
182
<foaf:Organization rdf:about="http://orgs.vocab.org/some-org">
180
183
<foaf:name>Publishing Organization for dataset 1</foaf:name>
181
184
</foaf:Organization>
182
185
</dct:publisher>
183
186
187
+ Output:
184
188
{
185
189
'url': 'http://orgs.vocab.org/some-org',
186
190
'name': 'Publishing Organization for dataset 1',
187
191
}
188
192
189
- Returns keys for url, name with the values set to
190
- an empty string if they could not be found
193
+ 2. Multilingual Agent Representation:
194
+
195
+ <dct:publisher>
196
+ <foaf:Agent rdf:about="http://orgs.vocab.org/some-org">
197
+ <foaf:name xml:lang="de">Wirtschaftsamt</foaf:name>
198
+ <foaf:name xml:lang="it">Ufficio economico</foaf:name>
199
+ <foaf:name xml:lang="fr">Bureau des economiques</foaf:name>
200
+ <foaf:mbox rdf:resource="mailto:wirtschaftsamt@sh.ch"/>
201
+ <foaf:homepage rdf:resource="https://some-org.org/info"/>
202
+ </foaf:Agent>
203
+ </dct:publisher>
204
+
205
+ The `name` field resolves directly using `multilang=True`,
206
+ allowing for prioritized language selection.
207
+ The `url` field prioritizes the `foaf:homepage` property and falls back
208
+ to the `rdf:about` attribute of the Agent.
209
+
210
+ Returns:
211
+ A JSON-encoded dictionary with keys:
212
+ - `url`: The URL of the publisher (from `foaf:homepage` or `rdf:about`)
213
+ - `name`: The resolved multilingual name using the `multilang=True`
214
+
215
+ If no valid data is found, the values for `url` and `name` will default
216
+ to empty strings.
191
217
"""
192
218
publisher = {}
193
219
for agent in self .g .objects (subject , DCT .publisher ):
194
- publisher ['url' ] = (str (agent ) if isinstance (agent ,
195
- URIRef ) else '' )
196
- publisher_name = self ._object_value (agent , FOAF .name )
220
+ publisher ['url' ] = (
221
+ self ._object_value (agent , FOAF .homepage ) or
222
+ (str (agent ) if isinstance (agent , URIRef ) else '' )
223
+ )
224
+ publisher_name = self ._object_value (agent , FOAF .name ,
225
+ multilang = True )
197
226
publisher_deprecated = self ._object_value (agent , RDFS .label )
198
227
if publisher_name :
199
228
publisher ['name' ] = publisher_name
@@ -571,7 +600,8 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
571
600
dataset_ref ,
572
601
dataset_dict .get ('identifier' , '' )
573
602
)
574
-
603
+ log .info ("harvested publisher dict" )
604
+ log .info (dataset_dict ['publisher' ])
575
605
# Relations
576
606
dataset_dict ['relations' ] = self ._relations (dataset_ref )
577
607
for relation in dataset_dict ['relations' ]:
0 commit comments