Issue #15: benchmarks: initial implementation of comparing with reference data

soxofaan · soxofaan · commit f5b62687d224 · 2024-07-16T19:18:54.000+02:00
diff --git a/benchmark_scenarios/max_ndvi.json b/benchmark_scenarios/max_ndvi.json
@@ -22,6 +22,39 @@
         },
         "result": true
       }
+    },
+    "reference_data": {
+      "job-results.json": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:job-results.json",
+      "openEO.tif": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:openEO.tif"
+    }
+  },
+  {
+    "id": "max_ndvi_fail",
+    "type": "openeo",
+    "description": "max_ndvi example, intentionally failing",
+    "backend": "openeofed.dataspace.copernicus.eu",
+    "process_graph": {
+      "maxndvi1": {
+        "process_id": "max_ndvi",
+        "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/f99f351d74d291d628e3aaa07fd078527a0cb631/openeo_udp/examples/max_ndvi/max_ndvi.json",
+        "arguments": {
+          "bbox": {
+            "west": 6.07,
+            "east": 6.09,
+            "south": 51.21,
+            "north": 51.23
+          },
+          "temporal_extent": [
+            "2023-08-01",
+            "2023-09-30"
+          ]
+        },
+        "result": true
+      }
+    },
+    "reference_data": {
+      "job-results.json": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:job-results.json",
+      "openEO.tif": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:openEO.tif"
     }
   }
 ]
diff --git a/qa/benchmarks/requirements.txt b/qa/benchmarks/requirements.txt
@@ -1,4 +1,9 @@
+# TODO: get rid of artifactory extra index url once openeo 0.31.0 is released
+--extra-index-url https://artifactory.vgt.vito.be/api/pypi/python-openeo/simple
 apex-algorithm-qa-tools
-openeo>=0.30.0
+openeo>=0.31.0.a2.dev
 pytest>=8.2.0
 requests>=2.32.0
+xarray>=2024.6.0
+netCDF4>=1.7.1
+rioxarray>=0.15.7
diff --git a/qa/benchmarks/tests/test_benchmarks.py b/qa/benchmarks/tests/test_benchmarks.py
@@ -1,6 +1,13 @@
+from pathlib import Path
+
 import openeo
 import pytest
-from apex_algorithm_qa_tools.scenarios import BenchmarkScenario, get_benchmark_scenarios
+from apex_algorithm_qa_tools.scenarios import (
+    BenchmarkScenario,
+    download_reference_data,
+    get_benchmark_scenarios,
+)
+from openeo.testing.results import assert_job_results_allclose
 
 
 @pytest.mark.parametrize(
@@ -11,7 +18,7 @@
         for uc in get_benchmark_scenarios()
     ],
 )
-def test_run_benchmark(scenario: BenchmarkScenario, connection_factory):
+def test_run_benchmark(scenario: BenchmarkScenario, connection_factory, tmp_path: Path):
     connection: openeo.Connection = connection_factory(url=scenario.backend)
 
     # TODO #14 scenario option to use synchronous instead of batch job mode?
@@ -20,6 +27,20 @@ def test_run_benchmark(scenario: BenchmarkScenario, connection_factory):
         title=f"APEx benchmark {scenario.id}",
     )
 
+    # TODO: monitor timing and progress
+    # TODO: abort excessively long batch jobs? https://github.com/Open-EO/openeo-python-client/issues/589
     job.start_and_wait()
 
-    # TODO #5 download job results and inspect
+    # Download actual results
+    actual_dir = tmp_path / "actual"
+    job.get_results().download_files(target=actual_dir, include_stac_metadata=True)
+    # TODO: upload actual results to somewhere?
+
+    # Compare actual results with reference data
+    reference_dir = download_reference_data(
+        scenario=scenario, reference_dir=tmp_path / "reference"
+    )
+    # TODO: allow to override rtol/atol options of assert_job_results_allclose
+    assert_job_results_allclose(
+        actual=actual_dir, expected=reference_dir, tmp_path=tmp_path
+    )
diff --git a/qa/tools/apex_algorithm_qa_tools/scenarios.py b/qa/tools/apex_algorithm_qa_tools/scenarios.py
@@ -4,11 +4,13 @@
 import json
 import logging
 import re
+from pathlib import Path
 from typing import List
 
 import jsonschema
 import requests
 from apex_algorithm_qa_tools.common import get_project_root
+from openeo.util import TimingLogger
 
 _log = logging.getLogger(__name__)
 
@@ -28,6 +30,7 @@ class BenchmarkScenario:
     description: str | None = None
     backend: str
     process_graph: dict
+    reference_data: dict | None
 
     @classmethod
     def from_dict(cls, data: dict) -> BenchmarkScenario:
@@ -41,6 +44,7 @@ def from_dict(cls, data: dict) -> BenchmarkScenario:
             description=data.get("description"),
             backend=data["backend"],
             process_graph=data["process_graph"],
+            reference_data=data.get("reference_data"),
         )
 
 
@@ -92,3 +96,28 @@ def lint_benchmark_scenario(scenario: BenchmarkScenario):
                 assert resp.json()["id"] == node["process_id"]
                 # TODO: check that github URL is a "pinned" reference
             # TODO: check that provided parameters match expected process parameters
+
+
+def download_reference_data(scenario: BenchmarkScenario, reference_dir: Path) -> Path:
+    with TimingLogger(
+        title=f"Downloading reference data for {scenario.id=} to {reference_dir=}",
+        logger=_log.info,
+    ):
+        for path, source in scenario.reference_data.items():
+            path = reference_dir / path
+            if not path.is_relative_to(reference_dir):
+                raise ValueError(
+                    f"Resolved {path=} is not relative to {reference_dir=} ({scenario.id=})"
+                )
+            path.parent.mkdir(parents=True, exist_ok=True)
+
+            with TimingLogger(
+                title=f"Downloading {source=} to {path=}", logger=_log.info
+            ):
+                # TODO: support other sources than HTTP?
+                resp = requests.get(source, stream=True)
+                with path.open("wb") as f:
+                    for chunk in resp.iter_content(chunk_size=128):
+                        f.write(chunk)
+
+    return reference_dir