Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ckanext/dcatapchharvest/dcat_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ def get_langs():
return language_priorities


def localize_by_language_priority(multilang_dict):
for lang in get_langs():
if multilang_dict.get(lang, ''):
return multilang_dict[lang]
return ''


def dataset_uri(dataset_dict, dataset_ref=None):
"""
Returns a URI for the dataset
Expand Down
40 changes: 25 additions & 15 deletions ckanext/dcatapchharvest/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@


class MultiLangProfile(RDFProfile):
def _add_multilang_value(self, subject, predicate, dataset_key=None,
dataset_dict=None,
def _add_multilang_value(self, subject, predicate, key=None,
data_dict=None,
multilang_values=None): # noqa
if not multilang_values and dataset_dict and dataset_key:
multilang_values = dataset_dict.get(dataset_key)
if not multilang_values and data_dict and key:
multilang_values = data_dict.get(key)
if multilang_values:
try:
for key, values in multilang_values.iteritems():
Expand Down Expand Up @@ -251,14 +251,23 @@ def _publisher(self, subject, identifier):
return json.dumps(publisher)

def _relations(self, subject):

relations = []

for relation_node in self.g.objects(subject, DCT.relation):
relation = {
'label': self._object_value(relation_node, RDFS.label),
'url': relation_node
'label': self._object_value(
relation_node,
RDFS.label,
multilang=True
),
'url': unicode(relation_node)
}
# If we don't have a label in any language, use the highest-prio
# language where we do have a label, or fall back to the url
fallback = (dh.localize_by_language_priority(relation['label']) or
relation.get('url', ''))
for lang in dh.get_langs():
if not relation['label'][lang]:
relation['label'][lang] = fallback
relations.append(relation)

return relations
Expand Down Expand Up @@ -616,9 +625,6 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa

# Relations
dataset_dict['relations'] = self._relations(dataset_ref)
for relation in dataset_dict['relations']:
if relation['label'] == {}:
relation['label'] = str(relation.get('url', ''))

# Temporal
dataset_dict['temporals'] = self._temporals(dataset_ref)
Expand Down Expand Up @@ -862,16 +868,20 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa
if dataset_dict.get('relations'):
relations = dataset_dict.get('relations')
for relation in relations:
relation_name = relation['label']
try:
relation_url = dh.uri_to_iri(relation['url'])
except ValueError:
# skip this relation if the URL is invalid
continue

relation = URIRef(relation_url)
g.add((relation, RDFS.label, Literal(relation_name)))
g.add((dataset_ref, DCT.relation, relation))
relation_uriref = URIRef(relation_url)
self._add_multilang_value(
relation_uriref,
RDFS.label,
'label',
relation
)
g.add((dataset_ref, DCT.relation, relation_uriref))

# References
if dataset_dict.get('see_alsos'):
Expand Down
14 changes: 14 additions & 0 deletions ckanext/dcatapchharvest/tests/fixtures/1901.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@
<rdfs:label>legal_basis</rdfs:label>
</rdf:Description>
</dct:relation>
<dct:relation>
<rdf:Description rdf:about="https://www.example.org/aaa">
<rdfs:label xml:lang="it">Text for label IT</rdfs:label>
<rdfs:label xml:lang="fr">Text for label FR</rdfs:label>
<rdfs:label xml:lang="de">Text for label DE</rdfs:label>
<rdfs:label xml:lang="en">Text for label EN</rdfs:label>
</rdf:Description>
</dct:relation>
<dct:relation rdf:resource="https://www.example.org/bbb"/>
<dct:relation>
<rdf:Description rdf:about="https://www.example.org/ccc">
<rdfs:label xml:lang="it">Text for label IT</rdfs:label>
</rdf:Description>
</dct:relation>
<dcat:distribution>
<dcat:Distribution rdf:about="https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585/resource/c8ec6ca0-6923-4cf3-92f2-95a10e6f8e25">
<dct:title xml:lang="fr">Annuaire statistique de la Suisse 1901</dct:title>
Expand Down
34 changes: 29 additions & 5 deletions ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from rdflib.namespace import RDF

from ckanext.dcat.processors import RDFParser
from ckanext.dcatapchharvest.dcat_helpers import get_langs
from ckanext.dcatapchharvest.profiles import (DCAT, DCT)
from ckanext.dcatapchharvest.tests.base_test_classes import BaseParseTest

Expand All @@ -16,6 +17,7 @@


class TestSwissDCATAPProfileParsing(BaseParseTest):
languages = get_langs()

def test_rights_license(self):

Expand Down Expand Up @@ -51,16 +53,16 @@ def test_dataset_all_fields(self):
extras = self._extras(dataset)

# Basic fields
assert all(l in dataset['title'] for l in ['de', 'fr', 'it', 'en']), "title contains all languages"
assert all(l in dataset['title'] for l in self.languages), "title contains all languages"
eq_(dataset['title']['de'], u'Statistisches Jahrbuch der Schweiz 1901')
eq_(dataset['title']['fr'], u'Annuaire statistique de la Suisse 1901')

assert all(l in dataset['description'] for l in ['de', 'fr', 'it', 'en']), "description contains all languages"
assert all(l in dataset['description'] for l in self.languages), "description contains all languages"
eq_(dataset['description']['de'], u'')
eq_(dataset['url'], u'https://www.bfs.admin.ch/bfs/de/home/statistiken.html')

# Keywords
assert all(l in dataset['keywords'] for l in ['de', 'fr', 'it', 'en']), "keywords contains all languages"
assert all(l in dataset['keywords'] for l in self.languages), "keywords contains all languages"
eq_(sorted(dataset['keywords']['de']), ['publikation', 'statistische-grundlagen-und-ubersichten'])
eq_(sorted(dataset['keywords']['fr']), ['bases-statistiques-et-generalites', 'publication'])
eq_(sorted(dataset['keywords']['it']), ['basi-statistiche-e-presentazioni-generali', 'pubblicazione'])
Expand Down Expand Up @@ -101,6 +103,28 @@ def test_dataset_all_fields(self):
see_also = dataset['see_alsos'][0]
eq_(see_also['dataset_identifier'], u'4682791@bundesamt-fur-statistik-bfs')

relations = sorted(dataset["relations"], key=lambda relation: relation['url'])

# Relations - only one label given, no language specified
eq_(relations[0]['url'], "https://www.admin.ch/opc/de/classified-compilation/19920252/index.html")
for lang in self.languages:
eq_(relations[0]['label'][lang], 'legal_basis')

# Relations - multilingual labels
eq_(relations[1]['url'], "https://www.example.org/aaa")
for lang in self.languages:
eq_(relations[1]['label'][lang], 'Text for label ' + lang.upper())

# Relations - no label given
eq_(relations[2]['url'], "https://www.example.org/bbb")
for lang in self.languages:
eq_(relations[2]['label'][lang], "https://www.example.org/bbb")

# Relations - label given, language specified but not German
eq_(relations[3]['url'], "https://www.example.org/ccc")
for lang in self.languages:
eq_(relations[3]['label'][lang], 'Text for label IT')

# Qualified relations
qualified_relations = sorted(dataset["qualified_relations"])
eq_(
Expand Down Expand Up @@ -138,10 +162,10 @@ def test_dataset_all_fields(self):
resource = dataset['resources'][0]

# Simple values
assert all(l in resource['title'] for l in ['de', 'fr', 'it', 'en']), "resource title contains all languages"
assert all(l in resource['title'] for l in self.languages), "resource title contains all languages"
eq_(resource['title']['fr'], u'Annuaire statistique de la Suisse 1901')
eq_(resource['title']['de'], u'')
assert all(l in resource['description'] for l in ['de', 'fr', 'it', 'en']), "resource description contains all languages"
assert all(l in resource['description'] for l in self.languages), "resource description contains all languages"
eq_(resource['description']['de'], u'')
eq_(resource['format'], u'html')
eq_(resource['media_type'], u'text/html')
Expand Down