From 1d0ee20efbfa5517985c7a75dac70027e25486a7 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 13:06:31 +0200 Subject: [PATCH 1/8] tests: Update github workflow test.yml --- .github/workflows/test.yml | 47 ++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f48a32a6..4895810a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,16 +19,17 @@ jobs: needs: lint strategy: matrix: - ckan-version: [2.8] + ckan-version: ["2.10", "2.11"] fail-fast: false name: CKAN ${{ matrix.ckan-version }} runs-on: ubuntu-latest container: - image: openknowledge/ckan-dev:${{ matrix.ckan-version }} + image: ckan/ckan-dev:${{ matrix.ckan-version }} + options: --user root services: solr: - image: ckan/ckan-solr:${{ matrix.ckan-version }} + image: ckan/ckan-solr:${{ matrix.ckan-version }}-solr9 postgres: image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }} env: @@ -37,7 +38,7 @@ jobs: POSTGRES_DB: postgres options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 redis: - image: redis:3 + image: redis:3 env: CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test @@ -47,25 +48,21 @@ jobs: CKAN_SITE_URL: http://test.ckan.net steps: - - uses: actions/checkout@v3 - - name: Install requirements - run: | - # Replace default path to CKAN core config file with the one on the container - sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini - pip install -r requirements.txt - pip install -r dev-requirements.txt - pip install -e . - # Last commit before support for Python 2 was dropped - pip install -e git+https://github.com/ckan/ckanext-harvest.git@v1.4.2#egg=ckanext-harvest - pip install -r https://raw.githubusercontent.com/ckan/ckanext-harvest/v1.4.2/requirements.txt - # Last commit before support for Python 2 was dropped - pip install -e git+https://github.com/ckan/ckanext-dcat.git@0c26bed5b7a3a7fca8e7b78e338aace096e0ebf6#egg=ckanext-dcat - pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/0c26bed5b7a3a7fca8e7b78e338aace096e0ebf6/requirements-py2.txt - pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/0c26bed5b7a3a7fca8e7b78e338aace096e0ebf6/dev-requirements-py2.txt + - name: Checkout + uses: actions/checkout@v4 + - name: Install requirements + run: | + pip install -r requirements.txt + pip install -r dev-requirements.txt + pip install -e . + pip install -e git+https://github.com/ckan/ckanext-harvest.git#egg=ckanext-harvest + pip install -r https://raw.githubusercontent.com/ckan/ckanext-harvest/master/requirements.txt + pip install -e git+https://github.com/ckan/ckanext-dcat.git#egg=ckanext-dcat + pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/master/requirements.txt - - name: Setup extension - run: | - paster --plugin=ckan db init -c test.ini - paster --plugin=ckanext-harvest harvester initdb -c test.ini - - name: Run tests - run: nosetests --ckan --nocapture --nologcapture --with-pylons=test.ini --with-coverage --cover-package=ckanext.dcatapchharvest --cover-inclusive --cover-erase --cover-tests ckanext/dcatapchharvest + - name: Setup extension + run: | + ckan -c test.ini db init + ckan -c test.ini db pending-migrations --apply + - name: Run tests + run: pytest --ckan-ini=test.ini --disable-warnings --cov=ckanext.dcatapchharvest ckanext/dcatapchharvest From 59bc45fd462699fde3db71c4df15abcb10acd412 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 13:07:08 +0200 Subject: [PATCH 2/8] tests: Update tests for CKAN 2.11 and Python 3 --- .../tests/test_dcatap_ch_parse.py | 498 ++++++++---------- .../test_dcatap_ch_parse_conformant_rdf.py | 27 +- 2 files changed, 234 insertions(+), 291 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py index ff383395..5b6c4e44 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - import json +from pprint import pprint -import nose from rdflib import Graph, Literal, URIRef from rdflib.namespace import RDF @@ -11,9 +9,6 @@ from ckanext.dcatapchharvest.profiles import DCAT, DCT from ckanext.dcatapchharvest.tests.base_test_classes import BaseParseTest -eq_ = nose.tools.eq_ -assert_true = nose.tools.assert_true - class TestSwissDCATAPProfileParsing(BaseParseTest): languages = get_langs() @@ -27,28 +22,26 @@ def test_rights_license(self): datasets = [d for d in p.datasets()] # Dataset - eq_(len(datasets), 1) + assert len(datasets) == 1 dataset = datasets[0] # Resources - eq_(len(dataset["resources"]), 1) + assert len(dataset["resources"]) == 1 resource = dataset["resources"][0] - eq_(str(resource["rights"]), "https://opendata.swiss/terms-of-use#terms_by") - eq_( - str(resource["license"]), "https://opendata.swiss/terms-of-use#terms_by_ask" + assert str(resource["rights"]) == "https://opendata.swiss/terms-of-use#terms_by" + assert ( + str(resource["license"]) + == "https://opendata.swiss/terms-of-use#terms_by_ask" ) def test_dataset_all_fields(self): - contents = self._get_file_contents("1901.xml") - p = RDFParser(profiles=["swiss_dcat_ap"]) - p.parse(contents) datasets = [d for d in p.datasets()] - eq_(len(datasets), 1) + assert len(datasets) == 1 dataset = datasets[0] extras = self._extras(dataset) @@ -57,191 +50,172 @@ def test_dataset_all_fields(self): assert all( l in dataset["title"] for l in self.languages ), "title contains all languages" - eq_(dataset["title"]["de"], "Statistisches Jahrbuch der Schweiz 1901") - eq_(dataset["title"]["fr"], "Annuaire statistique de la Suisse 1901") + assert dataset["title"]["de"] == "Statistisches Jahrbuch der Schweiz 1901" + assert dataset["title"]["fr"] == "Annuaire statistique de la Suisse 1901" assert all( l in dataset["description"] for l in self.languages ), "description contains all languages" - eq_(dataset["description"]["de"], "") - eq_(dataset["url"], "https://www.bfs.admin.ch/bfs/de/home/statistiken.html") + assert dataset["description"]["de"] == "" + assert dataset["url"] == "https://www.bfs.admin.ch/bfs/de/home/statistiken.html" # Keywords assert all( l in dataset["keywords"] for l in self.languages ), "keywords contains all languages" - eq_( - sorted(dataset["keywords"]["de"]), - ["publikation", "statistische-grundlagen-und-ubersichten"], - ) - eq_( - sorted(dataset["keywords"]["fr"]), - ["bases-statistiques-et-generalites", "publication"], - ) - eq_( - sorted(dataset["keywords"]["it"]), - ["basi-statistiche-e-presentazioni-generali", "pubblicazione"], - ) - eq_( - sorted(dataset["keywords"]["en"]), - ["publication", "statistical-basis-and-overviews"], - ) - eq_( - sorted(dataset["tags"], key=lambda k: k["name"]), - [ - {"name": "basas-statisticas-e-survistas"}, - {"name": "bases-statistiques-et-generalites"}, - {"name": "basi-statistiche-e-presentazioni-generali"}, - {"name": "pubblicazione"}, - {"name": "publication"}, - {"name": "publication"}, - {"name": "publikation"}, - {"name": "statistical-basis-and-overviews"}, - {"name": "statistische-grundlagen-und-ubersichten"}, - ], - ) + assert sorted(dataset["keywords"]["de"]) == [ + "publikation", + "statistische-grundlagen-und-ubersichten", + ] + assert sorted(dataset["keywords"]["fr"]) == [ + "bases-statistiques-et-generalites", + "publication", + ] + assert sorted(dataset["keywords"]["it"]) == [ + "basi-statistiche-e-presentazioni-generali", + "pubblicazione", + ] + assert sorted(dataset["keywords"]["en"]) == [ + "publication", + "statistical-basis-and-overviews", + ] + assert sorted(dataset["tags"], key=lambda k: k["name"]) == [ + {"name": "basas-statisticas-e-survistas"}, + {"name": "bases-statistiques-et-generalites"}, + {"name": "basi-statistiche-e-presentazioni-generali"}, + {"name": "pubblicazione"}, + {"name": "publication"}, + {"name": "publication"}, + {"name": "publikation"}, + {"name": "statistical-basis-and-overviews"}, + {"name": "statistische-grundlagen-und-ubersichten"}, + ] # Simple values - eq_(dataset["issued"], "1900-12-31T00:00:00") - eq_(dataset["modified"], "2018-04-24T19:30:57.197374") - eq_(dataset["identifier"], "346266@bundesamt-fur-statistik-bfs") - eq_(dataset["spatial"], "Schweiz") + assert dataset["issued"] == "1900-12-31T00:00:00" + assert dataset["modified"] == "2018-04-24T19:30:57.197374" + assert dataset["identifier"] == "346266@bundesamt-fur-statistik-bfs" + assert dataset["spatial"] == "Schweiz" # Temporals temporal = dataset["temporals"][0] - eq_(temporal["end_date"], "1901-12-31T00:00:00") + assert temporal["end_date"] == "1901-12-31T00:00:00" - eq_(temporal["start_date"], "1901-01-01T00:00:00") + assert temporal["start_date"] == "1901-01-01T00:00:00" # Publisher publisher = json.loads(dataset["publisher"]) - eq_(publisher["name"], "Landesamt Topographie Swisstopo") - eq_(publisher["url"], "https://swisstopo") + assert publisher["name"] == "Landesamt Topographie Swisstopo" + assert publisher["url"] == "https://swisstopo" # Contact points contact_point = dataset["contact_points"][0] - eq_(contact_point["name"], "info@bfs.admin.ch") - eq_(contact_point["email"], "auskunftsdienst@bfs.admin.ch") + assert contact_point["name"] == "info@bfs.admin.ch" + assert contact_point["email"] == "auskunftsdienst@bfs.admin.ch" # See alsos see_also = dataset["see_alsos"][0] - eq_(see_also["dataset_identifier"], "4682791@bundesamt-fur-statistik-bfs") + assert see_also["dataset_identifier"] == "4682791@bundesamt-fur-statistik-bfs" relations = sorted(dataset["relations"], key=lambda relation: relation["url"]) # Relations - only one label given, no language specified - eq_( - relations[0]["url"], - "https://www.admin.ch/opc/de/classified-compilation/19920252/index.html", + assert ( + relations[0]["url"] + == "https://www.admin.ch/opc/de/classified-compilation/19920252/index.html" ) for lang in self.languages: - eq_(relations[0]["label"][lang], "legal_basis") + assert relations[0]["label"][lang] == "legal_basis" # Relations - multilingual labels - eq_(relations[1]["url"], "https://www.example.org/aaa") + assert relations[1]["url"] == "https://www.example.org/aaa" for lang in self.languages: - eq_(relations[1]["label"][lang], f"Text for label {lang.upper()}") + assert relations[1]["label"][lang] == f"Text for label {lang.upper()}" # Relations - no label given - eq_(relations[2]["url"], "https://www.example.org/bbb") + assert relations[2]["url"] == "https://www.example.org/bbb" for lang in self.languages: - eq_(relations[2]["label"][lang], "https://www.example.org/bbb") + assert relations[2]["label"][lang] == "https://www.example.org/bbb" # Relations - label given, language specified but not German. # If there is no label given in a language, we try to get one from # another language, in the priority order 'en' -> 'de' -> 'fr' -> 'it'. # Here we test that we end up with a label text in all languages, even # though the source only had a label in Italian. - eq_(relations[3]["url"], "https://www.example.org/ccc") + assert relations[3]["url"] == "https://www.example.org/ccc" for lang in self.languages: - eq_(relations[3]["label"][lang], "Text for label IT") + assert relations[3]["label"][lang] == "Text for label IT" # Qualified relations - qualified_relations = sorted(dataset["qualified_relations"]) - eq_( - qualified_relations[0], - { - "relation": "http://example.org/Original987", - "had_role": "http://www.iana.org/assignments/relation/original", - }, - ) - eq_( - qualified_relations[1], - { - "relation": "http://example.org/Related486", - "had_role": "http://www.iana.org/assignments/relation/related", - }, + qualified_relations = sorted( + dataset["qualified_relations"], key=lambda x: x.get("relation") ) + assert qualified_relations[0] == { + "relation": "http://example.org/Original987", + "had_role": "http://www.iana.org/assignments/relation/original", + } + assert qualified_relations[1] == { + "relation": "http://example.org/Related486", + "had_role": "http://www.iana.org/assignments/relation/related", + } # Lists - eq_(sorted(dataset["language"]), ["de", "fr"]) - eq_(sorted(dataset["groups"]), [{"name": "gove"}]) - eq_( - sorted(dataset["documentation"]), - [ - "https://example.com/documentation-dataset-1", - "https://example.com/documentation-dataset-2", - ], - ) - eq_( - sorted(dataset["conforms_to"]), - [ - "http://resource.geosciml.org/ontology/timescale/gts", - "https://inspire.ec.europa.eu/documents", - ], - ) + assert sorted(dataset["language"]), ["de" == "fr"] + assert sorted(dataset["groups"]) == [{"name": "gove"}] + assert sorted(dataset["documentation"]) == [ + "https://example.com/documentation-dataset-1", + "https://example.com/documentation-dataset-2", + ] + assert sorted(dataset["conforms_to"]) == [ + "http://resource.geosciml.org/ontology/timescale/gts", + "https://inspire.ec.europa.eu/documents", + ] # Dataset URI - eq_( - extras["uri"], - "https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585", + assert ( + extras["uri"] + == "https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585" ) # Resources - eq_(len(dataset["resources"]), 1) + assert len(dataset["resources"]) == 1 resource = dataset["resources"][0] # Simple values assert all( l in resource["title"] for l in self.languages ), "resource title contains all languages" - eq_(resource["title"]["fr"], "Annuaire statistique de la Suisse 1901") - eq_(resource["title"]["de"], "") + assert resource["title"]["fr"] == "Annuaire statistique de la Suisse 1901" + assert resource["title"]["de"] == "" assert all( l in resource["description"] for l in self.languages ), "resource description contains all languages" - eq_(resource["description"]["de"], "") - eq_(resource["format"], "html") - eq_(resource["media_type"], "text/html") - eq_(resource["identifier"], "346265-fr@bundesamt-fur-statistik-bfs") - eq_(resource["license"], "https://opendata.swiss/terms-of-use#terms_by") - eq_(resource["rights"], "http://www.opendefinition.org/licenses/cc-zero") - eq_(resource["language"], ["fr"]) - eq_(resource["issued"], "1900-12-31T00:00:00") - eq_(resource["temporal_resolution"], "P1D") - eq_(resource["url"], "https://www.bfs.admin.ch/asset/fr/hs-b-00.01-jb-1901") + assert resource["description"]["de"] == "" + assert resource["format"] == "html" + assert resource["media_type"] == "text/html" + assert resource["identifier"] == "346265-fr@bundesamt-fur-statistik-bfs" + assert resource["license"] == "https://opendata.swiss/terms-of-use#terms_by" + assert resource["rights"] == "http://www.opendefinition.org/licenses/cc-zero" + assert resource["language"] == ["fr"] + assert resource["issued"] == "1900-12-31T00:00:00" + assert resource["temporal_resolution"] == "P1D" + assert resource["url"] == "https://www.bfs.admin.ch/asset/fr/hs-b-00.01-jb-1901" assert "download_url" not in resource, "download_url not available on resource" # Lists - eq_( - sorted(resource["documentation"]), - [ - "https://example.com/documentation-distribution-1", - "https://example.com/documentation-distribution-2", - ], - ) - eq_( - sorted(resource["access_services"]), - [ - "https://example.com/my-great-data-service-1", - "https://geoportal.sachsen.de/md/685a4409-a026-430e-afad-1fa2881f9700", - ], - ) + assert sorted(resource["documentation"]) == [ + "https://example.com/documentation-distribution-1", + "https://example.com/documentation-distribution-2", + ] + assert sorted(resource["access_services"]) == [ + "https://example.com/my-great-data-service-1", + "https://geoportal.sachsen.de/md/685a4409-a026-430e-afad-1fa2881f9700", + ] # Distribution URI - eq_( - resource["uri"], - "https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585/resource/c8ec6ca0-6923-4cf3-92f2-95a10e6f8e25", + assert ( + resource["uri"] + == "https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585/resource/c8ec6ca0-6923-4cf3-92f2-95a10e6f8e25" ) def test_dataset_issued_with_year_before_1900(self): @@ -254,14 +228,14 @@ def test_dataset_issued_with_year_before_1900(self): datasets = [d for d in p.datasets()] - eq_(len(datasets), 1) + assert len(datasets) == 1 dataset = datasets[0] # Check date values - eq_(dataset["issued"], "1893-12-31T00:00:00") + assert dataset["issued"] == "1893-12-31T00:00:00" - eq_(dataset["modified"], "2018-04-24T19:30:57.197374") + assert dataset["modified"] == "2018-04-24T19:30:57.197374" def test_catalog(self): @@ -273,7 +247,7 @@ def test_catalog(self): datasets = [d for d in p.datasets()] - eq_(len(datasets), 2) + assert len(datasets) == 2 def test_distribution_access_url(self): g = Graph() @@ -295,7 +269,7 @@ def test_distribution_access_url(self): resource = datasets[0]["resources"][0] - eq_(resource["url"], "http://access.url.org") + assert resource["url"] == "http://access.url.org" assert "download_url" not in resource def test_distribution_download_url(self): @@ -318,8 +292,8 @@ def test_distribution_download_url(self): resource = datasets[0]["resources"][0] - eq_(resource["url"], "http://download.url.org") - eq_(resource["download_url"], "http://download.url.org") + assert resource["url"] == "http://download.url.org" + assert resource["download_url"] == "http://download.url.org" def test_distribution_both_access_and_download_url(self): g = Graph() @@ -342,8 +316,8 @@ def test_distribution_both_access_and_download_url(self): resource = datasets[0]["resources"][0] - eq_(resource["url"], "http://access.url.org") - eq_(resource["download_url"], "http://download.url.org") + assert resource["url"] == "http://access.url.org" + assert resource["download_url"] == "http://download.url.org" def test_distribution_format_format_only(self): g = Graph() @@ -370,53 +344,50 @@ def test_temporals_accepted_formats(self): p = RDFParser(profiles=["swiss_dcat_ap"]) p.parse(contents) dataset = [d for d in p.datasets()][0] - eq_(len(dataset["temporals"]), 10) - - eq_( - sorted(dataset["temporals"]), - [ - { - "start_date": "1990-01-01T00:00:00", - "end_date": "1991-04-04T12:30:30", - }, - { - "start_date": "1992-01-02T00:00:00", - "end_date": "1993-12-03T23:59:59.999999", - }, - { - "start_date": "1994-01-01T00:00:00", - "end_date": "1995-04-04T12:30:30", - }, - { - "start_date": "1996-01-02T00:00:00", - "end_date": "1997-12-03T23:59:59.999999", - }, - { - "start_date": "1998-04-01T00:00:00", - "end_date": "1999-06-30T23:59:59.999999", - }, - { - "start_date": "2000-01-01T00:00:00", - "end_date": "2001-12-31T23:59:59.999999", - }, - { - "start_date": "2002-01-01T00:00:00", - "end_date": "2003-04-04T12:30:30", - }, - { - "start_date": "2004-01-02T00:00:00", - "end_date": "2005-12-03T23:59:59.999999", - }, - { - "start_date": "2006-04-01T00:00:00", - "end_date": "2007-06-30T23:59:59.999999", - }, - { - "start_date": "2008-01-01T00:00:00", - "end_date": "2009-12-31T23:59:59.999999", - }, - ], - ) + assert len(dataset["temporals"]) == 10 + + assert sorted(dataset["temporals"], key=lambda x: x["start_date"]) == [ + { + "start_date": "1990-01-01T00:00:00", + "end_date": "1991-04-04T12:30:30", + }, + { + "start_date": "1992-01-02T00:00:00", + "end_date": "1993-12-03T23:59:59.999999", + }, + { + "start_date": "1994-01-01T00:00:00", + "end_date": "1995-04-04T12:30:30", + }, + { + "start_date": "1996-01-02T00:00:00", + "end_date": "1997-12-03T23:59:59.999999", + }, + { + "start_date": "1998-04-01T00:00:00", + "end_date": "1999-06-30T23:59:59.999999", + }, + { + "start_date": "2000-01-01T00:00:00", + "end_date": "2001-12-31T23:59:59.999999", + }, + { + "start_date": "2002-01-01T00:00:00", + "end_date": "2003-04-04T12:30:30", + }, + { + "start_date": "2004-01-02T00:00:00", + "end_date": "2005-12-03T23:59:59.999999", + }, + { + "start_date": "2006-04-01T00:00:00", + "end_date": "2007-06-30T23:59:59.999999", + }, + { + "start_date": "2008-01-01T00:00:00", + "end_date": "2009-12-31T23:59:59.999999", + }, + ] def test_temporals_incorrect_formats(self): # See comments in dataset-datetimes-bad.xml for reasons why temporals @@ -425,33 +396,30 @@ def test_temporals_incorrect_formats(self): p = RDFParser(profiles=["swiss_dcat_ap"]) p.parse(contents) dataset = [d for d in p.datasets()][0] - eq_(len(dataset["temporals"]), 5) - - eq_( - sorted(dataset["temporals"]), - [ - { - "start_date": "1998-04-01T00:00:00", - "end_date": "1999-01-01T23:59:59.999999", - }, - { - "start_date": "2000-11-21T00:00:00", - "end_date": "2001-01-01T23:59:59.999999", - }, - { - "start_date": "2002-01-01T00:00:00", - "end_date": "2003-01-31T23:59:59.999999", - }, - { - "start_date": "2004-01-01T00:00:00", - "end_date": "2005-12-31T23:59:59.999999", - }, - { - "start_date": "2006-01-01T00:00:00", - "end_date": "2007-01-31T23:59:59.999999", - }, - ], - ) + assert len(dataset["temporals"]) == 5 + + assert sorted(dataset["temporals"]) == [ + { + "start_date": "1998-04-01T00:00:00", + "end_date": "1999-01-01T23:59:59.999999", + }, + { + "start_date": "2000-11-21T00:00:00", + "end_date": "2001-01-01T23:59:59.999999", + }, + { + "start_date": "2002-01-01T00:00:00", + "end_date": "2003-01-31T23:59:59.999999", + }, + { + "start_date": "2004-01-01T00:00:00", + "end_date": "2005-12-31T23:59:59.999999", + }, + { + "start_date": "2006-01-01T00:00:00", + "end_date": "2007-01-31T23:59:59.999999", + }, + ] def test_resource_issued_modified_accepted_formats(self): contents = self._get_file_contents("dataset-datetimes.xml") @@ -460,27 +428,21 @@ def test_resource_issued_modified_accepted_formats(self): dataset = [d for d in p.datasets()][0] issued_dates = [distribution["issued"] for distribution in dataset["resources"]] - eq_( - sorted(issued_dates), - [ - "1990-01-01T00:00:00", - "1992-01-02T00:00:00", - "1994-04-01T00:00:00", - "1996-01-01T00:00:00", - ], - ) + assert sorted(issued_dates) == [ + "1990-01-01T00:00:00", + "1992-01-02T00:00:00", + "1994-04-01T00:00:00", + "1996-01-01T00:00:00", + ] modified_dates = [ distribution["modified"] for distribution in dataset["resources"] ] - eq_( - sorted(modified_dates), - [ - "1991-04-04T12:30:30", - "1993-12-03T00:00:00", - "1995-06-01T00:00:00", - "1997-01-01T00:00:00", - ], - ) + assert sorted(modified_dates) == [ + "1991-04-04T12:30:30", + "1993-12-03T00:00:00", + "1995-06-01T00:00:00", + "1997-01-01T00:00:00", + ] def test_dataset_issued_modified_accepted_formats(self): contents = self._get_file_contents("catalog-datetimes.xml") @@ -488,27 +450,21 @@ def test_dataset_issued_modified_accepted_formats(self): p.parse(contents) datasets = [d for d in p.datasets()] - eq_(len(datasets), 4) + assert len(datasets) == 4 issued_dates = [dataset["issued"] for dataset in datasets] - eq_( - sorted(issued_dates), - [ - "1990-12-31T23:00:00+00:00", - "1992-12-31T00:00:00", - "1994-12-01T00:00:00", - "1996-01-01T00:00:00", - ], - ) + assert sorted(issued_dates) == [ + "1990-12-31T23:00:00+00:00", + "1992-12-31T00:00:00", + "1994-12-01T00:00:00", + "1996-01-01T00:00:00", + ] modified_dates = [dataset["modified"] for dataset in datasets] - eq_( - sorted(modified_dates), - [ - "1991-02-19T23:00:00+00:00", - "1993-02-19T00:00:00", - "1995-02-01T00:00:00", - "1997-01-01T00:00:00", - ], - ) + assert sorted(modified_dates) == [ + "1991-02-19T23:00:00+00:00", + "1993-02-19T00:00:00", + "1995-02-01T00:00:00", + "1997-01-01T00:00:00", + ] def test_multiple_rights_statements(self): """Even if there are multiple dct:rights nodes on a distribution, only @@ -520,7 +476,10 @@ def test_multiple_rights_statements(self): dataset = [d for d in p.datasets()][0] resource = dataset["resources"][0] - eq_(str(resource["rights"]), "https://opendata.swiss/terms-of-use#terms_by_ask") + assert ( + str(resource["rights"]) + == "https://opendata.swiss/terms-of-use#terms_by_ask" + ) def test_eu_themes_mapping(self): contents = self._get_file_contents("catalog-themes.xml") @@ -528,15 +487,11 @@ def test_eu_themes_mapping(self): p.parse(contents) for dataset in p.datasets(): - eq_( - sorted(dataset["groups"]), - [ - {"name": "econ"}, - {"name": "gove"}, - {"name": "soci"}, - ], - f"Groups not mapped correctly for dataset {dataset['identifier']}", - ) + assert sorted(dataset["groups"], key=lambda x: x["name"]) == [ + {"name": "econ"}, + {"name": "gove"}, + {"name": "soci"}, + ] def test_format_media_type(self): """Test that format and media type are parsed both from URIs and from @@ -551,13 +506,10 @@ def test_format_media_type(self): (resource.get("format"), resource.get("media_type")) for resource in dataset["resources"] ] - eq_( - sorted(results), - [ - ("esri_ascii_grid", "text/plain"), - ("grid_ascii", "text/plain"), - ("html", "text/html"), - ("json", "application/json"), - ("text/calendar", "text/calendar"), - ], - ) + assert sorted(results) == [ + ("esri_ascii_grid", "text/plain"), + ("grid_ascii", "text/plain"), + ("html", "text/html"), + ("json", "application/json"), + ("text/calendar", "text/calendar"), + ] diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_conformant_rdf.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_conformant_rdf.py index 7440ccd3..de169bc5 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_conformant_rdf.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_conformant_rdf.py @@ -1,22 +1,16 @@ -# -*- coding: utf-8 -*- import json -import nose - from ckanext.dcat.processors import RDFParser from ckanext.dcatapchharvest.tests.base_test_classes import BaseParseTest -eq_ = nose.tools.eq_ -assert_true = nose.tools.assert_true - -class ConformantProfileParseTest(BaseParseTest): +class TestConformantProfileParse(BaseParseTest): def test_dcatap_conformant_landing_page_import(self): contents = self._get_file_contents("conformant/dataset-landing-page.xml") p = RDFParser(profiles=["swiss_dcat_ap"]) p.parse(contents) dataset = [d for d in p.datasets()][0] - eq_(dataset["url"], "https://www.bfs.admin.ch/bfs/de/home/statistiken.html") + assert dataset["url"] == "https://www.bfs.admin.ch/bfs/de/home/statistiken.html" def test_dcatap_conformant_publisher_import(self): contents = self._get_file_contents("conformant/dataset-publisher.xml") @@ -24,13 +18,10 @@ def test_dcatap_conformant_publisher_import(self): p.parse(contents) dataset = [d for d in p.datasets()][0] publisher = json.loads(dataset["publisher"]) - eq_( - publisher["name"], - { - "fr": "Bureau des economiques", - "de": "Wirtschaftsamt", - "en": "", - "it": "Ufficio economico", - }, - ) - eq_(publisher["url"], "https://some-org.org/info") + assert publisher["name"] == { + "fr": "Bureau des economiques", + "de": "Wirtschaftsamt", + "en": "", + "it": "Ufficio economico", + } + assert publisher["url"] == "https://some-org.org/info" From 5230ac4ae6b83882d7468c30ad4110aa730eedf2 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 13:56:22 +0200 Subject: [PATCH 3/8] fix: Update check for values that are nested in a list For each language in the multilang dict, we might get a string or a list of values, e.g. {'de': 'Dataset Title'} or {'de': ['tag1', 'tag2', 'tag3']}. If it's a string, we need to wrap it in a list so we can safely iterate over it in the next line - otherwise we iterate by letter: 'D', 'a', 't', ... Checking whether the value had the '__iter__' attribute worked for this in Python 2, but in Python 3, strings apparently also have this attribute. Let's just check whether the value is a list or not. If there are any other cases, we can catch them later! --- ckanext/dcatapchharvest/profiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index 1ae9e0a1..1be77edb 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -78,7 +78,7 @@ def _add_multilang_value( if values: # the values can be either a multilang-dict or they are # nested in another iterable (e.g. keywords) - if not hasattr(values, "__iter__"): + if not isinstance(values, list): values = [values] for value in values: if value: From e13378ffe928d8c7f8271a3ae881f486a4ce99af Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 14:05:42 +0200 Subject: [PATCH 4/8] tests: Update tests for CKAN 2.11 and Python 3 --- .../tests/test_dcatap_ch_serialize.py | 48 +++++++++---------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py index c8b4e048..d5941244 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py @@ -1,7 +1,6 @@ import json import logging -import nose from rdflib import XSD, Literal, URIRef from rdflib.namespace import RDF @@ -11,8 +10,6 @@ from ckanext.dcat.profiles import DCAT, DCT, FOAF, OWL, SCHEMA, VCARD, XSD from ckanext.dcatapchharvest.tests.base_test_classes import BaseSerializeTest -eq_ = nose.tools.eq_ -assert_true = nose.tools.assert_true log = logging.getLogger(__name__) @@ -28,7 +25,7 @@ def test_graph_from_dataset(self): dataset_ref = s.graph_from_dataset(dataset) - eq_(str(dataset_ref), utils.dataset_uri(dataset)) + assert str(dataset_ref) == utils.dataset_uri(dataset) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) @@ -41,14 +38,15 @@ def test_graph_from_dataset(self): assert len(list(g.objects(dataset_ref, DCT.modified))) == 0 for key, value in dataset["description"].items(): - if dataset["description"].get(key): + print(key, value) + if value: assert self._triple( g, dataset_ref, DCT.description, Literal(value, lang=key) ) - eq_(len([t for t in g.triples((dataset_ref, DCT.description, None))]), 2) + assert len([t for t in g.triples((dataset_ref, DCT.description, None))]) == 2 # Tags - eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 3) + assert len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]) == 3 for key, keywords in dataset["keywords"].items(): if dataset["keywords"].get(key): for keyword in keywords: @@ -57,25 +55,26 @@ def test_graph_from_dataset(self): ) # Documentation - eq_(len([t for t in g.triples((dataset_ref, FOAF.page, None))]), 2) + assert len([t for t in g.triples((dataset_ref, FOAF.page, None))]) == 2 for documentation_link in dataset["documentation"]: assert self._triple(g, dataset_ref, FOAF.page, URIRef(documentation_link)) # Contact points - eq_(len([t for t in g.triples((dataset_ref, DCAT.contactPoint, None))]), 1) + assert len([t for t in g.triples((dataset_ref, DCAT.contactPoint, None))]) == 1 contact_point = next(g.objects(dataset_ref, DCAT.contactPoint)) - eq_(next(g.objects(contact_point, RDF.type)), VCARD.Organization) - eq_( - next(g.objects(contact_point, VCARD.hasEmail)), - URIRef("mailto:maria.muster@example.com"), + assert next(g.objects(contact_point, RDF.type)) == VCARD.Organization + assert next(g.objects(contact_point, VCARD.hasEmail)) == URIRef( + "mailto:maria.muster@example.com" ) - eq_(next(g.objects(contact_point, VCARD.fn)), Literal("Maria Muster")) + assert next(g.objects(contact_point, VCARD.fn)) == Literal("Maria Muster") # Conformance conforms_to = dataset.get("conforms_to", []) # Check if the number of triples matches the number of conformance uris - eq_(len(list(g.triples((dataset_ref, DCT.conformsTo, None)))), len(conforms_to)) + assert len(list(g.triples((dataset_ref, DCT.conformsTo, None)))) == len( + conforms_to + ) for link in conforms_to: # Check if the triple (dataset_ref, DCT.conformsTo, URIRef(link)) exists in the graph assert (dataset_ref, DCT.conformsTo, URIRef(link)) in g @@ -88,26 +87,23 @@ def test_graph_from_dataset(self): g.add((dataset_ref, DCT.language, Literal(lang))) # Assert number of language triples matches expected - eq_( - len(list(g.triples((dataset_ref, DCT.language, None)))), - len(language_values), + assert len(list(g.triples((dataset_ref, DCT.language, None)))) == len( + language_values ) # Resources - eq_( - len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), - len(dataset["resources"]), - ) + assert len( + [t for t in g.triples((dataset_ref, DCAT.distribution, None))] + ) == len(dataset["resources"]) for resource_dict in dataset.get("resources", []): distribution = URIRef(dh.resource_uri(resource_dict)) assert self._triple(g, distribution, RDF.type, DCAT.Distribution) for link in resource_dict.get("documentation", []): assert self._triple(g, distribution, FOAF.page, URIRef(link)) - eq_( - len([t for t in g.triples((distribution, DCAT.accessService, None))]), - len(resource_dict.get("access_services", [])), - ) + assert len( + [t for t in g.triples((distribution, DCAT.accessService, None))] + ) == len(resource_dict.get("access_services", [])) for link in resource_dict.get("access_services", []): assert self._triple(g, distribution, DCAT.accessService, URIRef(link)) From 9113691d15d5e24a856a41f9eb9d543a6eaaad69 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 14:06:41 +0200 Subject: [PATCH 5/8] feat: Temporarily ignore three too-complex methods Let's get flake8 to look at everything else and fix these in a next step. --- ckanext/dcatapchharvest/harvesters.py | 4 +++- ckanext/dcatapchharvest/profiles.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ckanext/dcatapchharvest/harvesters.py b/ckanext/dcatapchharvest/harvesters.py index f565377b..82620f5d 100644 --- a/ckanext/dcatapchharvest/harvesters.py +++ b/ckanext/dcatapchharvest/harvesters.py @@ -71,7 +71,7 @@ def before_download(self, url, harvest_job): url = url.replace("ogd.global.szh.loc", "data.stadt-zuerich.ch") return url, [] - def _get_guid(self, dataset_dict, source_url=None): + def _get_guid(self, dataset_dict, source_url=None): # noqa C901 """ Try to get a unique identifier for a harvested dataset It will be the first found of: @@ -82,6 +82,8 @@ def _get_guid(self, dataset_dict, source_url=None): The last two are obviously not optimal, as depend on title, which might change. Returns None if no guid could be decided. + + TODO: This method is too complex (flake8 says 16). Refactor it! """ guid = None diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index 1be77edb..3b4eb771 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -542,7 +542,8 @@ def _get_groups(self, subject): # Deduplicate group names before returning list of group dicts return [{"name": name} for name in list(set(group_names))] - def parse_dataset(self, dataset_dict, dataset_ref): + def parse_dataset(self, dataset_dict, dataset_ref): # noqa C901 + # TODO: This method is too complex (flake8 says 30). Refactor it! log.debug(f"Parsing dataset '{dataset_ref!r}'") dataset_dict["temporals"] = [] @@ -760,7 +761,8 @@ def parse_dataset(self, dataset_dict, dataset_ref): return dataset_dict - def graph_from_dataset(self, dataset_dict, dataset_ref): + def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa C901 + # TODO: This method is too complex (flake8 says 49, I am amazed). Refactor it! log.debug(f"Create graph from dataset '{dataset_dict['name']}'") From 80087c32bc392931934ee8a1fc9a1309115825fa Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 14:10:25 +0200 Subject: [PATCH 6/8] tests: Update tests for CKAN 2.11 and Python 3 --- .../test_dcatap_ch_parse_deprecated_rdf.py | 15 +++------ .../tests/test_swiss_schemaorg_serialize.py | 33 +++++++++---------- 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_deprecated_rdf.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_deprecated_rdf.py index c690f649..ef2d62e6 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_deprecated_rdf.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_deprecated_rdf.py @@ -1,23 +1,16 @@ -# -*- coding: utf-8 -*- - import json -import nose - from ckanext.dcat.processors import RDFParser from ckanext.dcatapchharvest.tests.base_test_classes import BaseParseTest -eq_ = nose.tools.eq_ -assert_true = nose.tools.assert_true - -class DeprecatedProfileParseTest(BaseParseTest): +class TestDeprecatedProfileParse(BaseParseTest): def test_deprecated_landing_page_import(self): contents = self._get_file_contents("deprecated/dataset-landing-page.xml") p = RDFParser(profiles=["swiss_dcat_ap"]) p.parse(contents) dataset = [d for d in p.datasets()][0] - eq_(dataset["url"], "https://www.bfs.admin.ch/bfs/de/home/statistiken.html") + assert dataset["url"] == "http://www.bafu.admin.ch/laerm/index.html?lang=de" def test_deprecated_publisher_import(self): contents = self._get_file_contents("deprecated/dataset-publisher.xml") @@ -25,5 +18,5 @@ def test_deprecated_publisher_import(self): p.parse(contents) dataset = [d for d in p.datasets()][0] publisher = json.loads(dataset["publisher"]) - eq_(publisher["name"], "Landesamt Topographie Swisstopo") - eq_(publisher["url"], "https://swisstopo") + assert publisher["name"] == "Bundesamt für Landestopografie swisstopo" + assert publisher["url"] == "" diff --git a/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_serialize.py b/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_serialize.py index 91a37368..bb2976c8 100644 --- a/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_serialize.py +++ b/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_serialize.py @@ -1,6 +1,5 @@ import json -import nose from rdflib import Literal, URIRef from rdflib.namespace import RDF @@ -10,14 +9,10 @@ from ckanext.dcat.profiles import SCHEMA, VCARD from ckanext.dcatapchharvest.tests.base_test_classes import BaseSerializeTest -eq_ = nose.tools.eq_ -assert_true = nose.tools.assert_true - class TestSchemaOrgProfileSerializeDataset(BaseSerializeTest): def test_graph_from_dataset(self): - dataset = json.loads(self._get_file_contents("dataset.json")) extras = self._extras(dataset) @@ -26,7 +21,7 @@ def test_graph_from_dataset(self): dataset_ref = s.graph_from_dataset(dataset) - eq_(str(dataset_ref), utils.dataset_uri(dataset)) + assert str(dataset_ref) == utils.dataset_uri(dataset) # Basic fields assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset) @@ -35,15 +30,16 @@ def test_graph_from_dataset(self): assert self._triple(g, dataset_ref, SCHEMA.identifier, extras["identifier"]) # Contact points - eq_(len([t for t in g.triples((dataset_ref, SCHEMA.contactPoint, None))]), 1) + assert ( + len([t for t in g.triples((dataset_ref, SCHEMA.contactPoint, None))]) == 1 + ) contact_point = next(g.objects(dataset_ref, SCHEMA.contactPoint)) - eq_(next(g.objects(contact_point, RDF.type)), VCARD.Organization) - eq_( - next(g.objects(contact_point, VCARD.hasEmail)), - URIRef("mailto:maria.muster@example.com"), + assert next(g.objects(contact_point, RDF.type)) == VCARD.Organization + assert next(g.objects(contact_point, VCARD.hasEmail)) == URIRef( + "mailto:maria.muster@example.com" ) - eq_(next(g.objects(contact_point, VCARD.fn)), Literal("Maria Muster")) + assert next(g.objects(contact_point, VCARD.fn)) == Literal("Maria Muster") # Dates assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset["issued"]) @@ -54,10 +50,10 @@ def test_graph_from_dataset(self): assert self._triple( g, dataset_ref, SCHEMA.description, Literal(value, lang=key) ) - eq_(len([t for t in g.triples((dataset_ref, SCHEMA.description, None))]), 2) + assert len([t for t in g.triples((dataset_ref, SCHEMA.description, None))]) == 2 # Tags - eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 3) + assert len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]) == 3 for key, keywords in dataset["keywords"].items(): if dataset["keywords"].get(key): for keyword in keywords: @@ -70,15 +66,16 @@ def test_graph_from_dataset(self): ("language", SCHEMA.inLanguage, Literal), ]: values = json.loads(extras[item[0]]) - eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) + assert len([t for t in g.triples((dataset_ref, item[1], None))]) == len( + values + ) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value)) def test_graph_from_dataset_uri(self): - """ "Tests that datasets (resources) with a uri from the test system - have that uri changed to reference the prod system when they are output as a graph + """Tests that datasets (resources) with a uri from the test system have that + uri changed to reference the prod system when they are output as a graph """ - dataset = json.loads(self._get_file_contents("dataset-test-uri.json")) s = RDFSerializer(profiles=["swiss_schemaorg"]) From 8468bf97448afe57d4fd9d7c22f2c6057f4d3b0f Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 14:19:39 +0200 Subject: [PATCH 7/8] tests: Rewrite path to test-core.ini in test.ini --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4895810a..d9378276 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,6 +59,8 @@ jobs: pip install -r https://raw.githubusercontent.com/ckan/ckanext-harvest/master/requirements.txt pip install -e git+https://github.com/ckan/ckanext-dcat.git#egg=ckanext-dcat pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/master/requirements.txt + # Replace default path to CKAN core config file with the one on the container + sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini - name: Setup extension run: | From 784c54afcc79ac62450d00eecf18bde042998acd Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Thu, 31 Jul 2025 14:26:12 +0200 Subject: [PATCH 8/8] tests: Don't measure test coverage of tests --- .coveragerc | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..0d073d1f --- /dev/null +++ b/.coveragerc @@ -0,0 +1,6 @@ +[report] +omit = + */site-packages/* + */python?.?/* + ckan/* + */tests/*