Skip to content

Commit f5b6268

Browse files
committed
Issue #15: benchmarks: initial implementation of comparing with reference data
1 parent 4f3d8c0 commit f5b6268

File tree

4 files changed

+92
-4
lines changed

4 files changed

+92
-4
lines changed

benchmark_scenarios/max_ndvi.json

+33
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,39 @@
2222
},
2323
"result": true
2424
}
25+
},
26+
"reference_data": {
27+
"job-results.json": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:job-results.json",
28+
"openEO.tif": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:openEO.tif"
29+
}
30+
},
31+
{
32+
"id": "max_ndvi_fail",
33+
"type": "openeo",
34+
"description": "max_ndvi example, intentionally failing",
35+
"backend": "openeofed.dataspace.copernicus.eu",
36+
"process_graph": {
37+
"maxndvi1": {
38+
"process_id": "max_ndvi",
39+
"namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/f99f351d74d291d628e3aaa07fd078527a0cb631/openeo_udp/examples/max_ndvi/max_ndvi.json",
40+
"arguments": {
41+
"bbox": {
42+
"west": 6.07,
43+
"east": 6.09,
44+
"south": 51.21,
45+
"north": 51.23
46+
},
47+
"temporal_extent": [
48+
"2023-08-01",
49+
"2023-09-30"
50+
]
51+
},
52+
"result": true
53+
}
54+
},
55+
"reference_data": {
56+
"job-results.json": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:job-results.json",
57+
"openEO.tif": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:openEO.tif"
2558
}
2659
}
2760
]

qa/benchmarks/requirements.txt

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
# TODO: get rid of artifactory extra index url once openeo 0.31.0 is released
2+
--extra-index-url https://artifactory.vgt.vito.be/api/pypi/python-openeo/simple
13
apex-algorithm-qa-tools
2-
openeo>=0.30.0
4+
openeo>=0.31.0.a2.dev
35
pytest>=8.2.0
46
requests>=2.32.0
7+
xarray>=2024.6.0
8+
netCDF4>=1.7.1
9+
rioxarray>=0.15.7

qa/benchmarks/tests/test_benchmarks.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
from pathlib import Path
2+
13
import openeo
24
import pytest
3-
from apex_algorithm_qa_tools.scenarios import BenchmarkScenario, get_benchmark_scenarios
5+
from apex_algorithm_qa_tools.scenarios import (
6+
BenchmarkScenario,
7+
download_reference_data,
8+
get_benchmark_scenarios,
9+
)
10+
from openeo.testing.results import assert_job_results_allclose
411

512

613
@pytest.mark.parametrize(
@@ -11,7 +18,7 @@
1118
for uc in get_benchmark_scenarios()
1219
],
1320
)
14-
def test_run_benchmark(scenario: BenchmarkScenario, connection_factory):
21+
def test_run_benchmark(scenario: BenchmarkScenario, connection_factory, tmp_path: Path):
1522
connection: openeo.Connection = connection_factory(url=scenario.backend)
1623

1724
# TODO #14 scenario option to use synchronous instead of batch job mode?
@@ -20,6 +27,20 @@ def test_run_benchmark(scenario: BenchmarkScenario, connection_factory):
2027
title=f"APEx benchmark {scenario.id}",
2128
)
2229

30+
# TODO: monitor timing and progress
31+
# TODO: abort excessively long batch jobs? https://github.com/Open-EO/openeo-python-client/issues/589
2332
job.start_and_wait()
2433

25-
# TODO #5 download job results and inspect
34+
# Download actual results
35+
actual_dir = tmp_path / "actual"
36+
job.get_results().download_files(target=actual_dir, include_stac_metadata=True)
37+
# TODO: upload actual results to somewhere?
38+
39+
# Compare actual results with reference data
40+
reference_dir = download_reference_data(
41+
scenario=scenario, reference_dir=tmp_path / "reference"
42+
)
43+
# TODO: allow to override rtol/atol options of assert_job_results_allclose
44+
assert_job_results_allclose(
45+
actual=actual_dir, expected=reference_dir, tmp_path=tmp_path
46+
)

qa/tools/apex_algorithm_qa_tools/scenarios.py

+29
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
import json
55
import logging
66
import re
7+
from pathlib import Path
78
from typing import List
89

910
import jsonschema
1011
import requests
1112
from apex_algorithm_qa_tools.common import get_project_root
13+
from openeo.util import TimingLogger
1214

1315
_log = logging.getLogger(__name__)
1416

@@ -28,6 +30,7 @@ class BenchmarkScenario:
2830
description: str | None = None
2931
backend: str
3032
process_graph: dict
33+
reference_data: dict | None
3134

3235
@classmethod
3336
def from_dict(cls, data: dict) -> BenchmarkScenario:
@@ -41,6 +44,7 @@ def from_dict(cls, data: dict) -> BenchmarkScenario:
4144
description=data.get("description"),
4245
backend=data["backend"],
4346
process_graph=data["process_graph"],
47+
reference_data=data.get("reference_data"),
4448
)
4549

4650

@@ -92,3 +96,28 @@ def lint_benchmark_scenario(scenario: BenchmarkScenario):
9296
assert resp.json()["id"] == node["process_id"]
9397
# TODO: check that github URL is a "pinned" reference
9498
# TODO: check that provided parameters match expected process parameters
99+
100+
101+
def download_reference_data(scenario: BenchmarkScenario, reference_dir: Path) -> Path:
102+
with TimingLogger(
103+
title=f"Downloading reference data for {scenario.id=} to {reference_dir=}",
104+
logger=_log.info,
105+
):
106+
for path, source in scenario.reference_data.items():
107+
path = reference_dir / path
108+
if not path.is_relative_to(reference_dir):
109+
raise ValueError(
110+
f"Resolved {path=} is not relative to {reference_dir=} ({scenario.id=})"
111+
)
112+
path.parent.mkdir(parents=True, exist_ok=True)
113+
114+
with TimingLogger(
115+
title=f"Downloading {source=} to {path=}", logger=_log.info
116+
):
117+
# TODO: support other sources than HTTP?
118+
resp = requests.get(source, stream=True)
119+
with path.open("wb") as f:
120+
for chunk in resp.iter_content(chunk_size=128):
121+
f.write(chunk)
122+
123+
return reference_dir

0 commit comments

Comments
 (0)