Skip to content

Commit bdb906b

Browse files
author
dsamaey
committed
Issue #761 better diff for apex reference check
1 parent 3f1cec5 commit bdb906b

File tree

2 files changed

+233
-22
lines changed

2 files changed

+233
-22
lines changed

openeo/testing/results.py

+131-6
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
from pathlib import Path
99
from typing import List, Optional, Union
1010

11+
import numpy as np
1112
import xarray
1213
import xarray.testing
14+
from scipy.spatial import ConvexHull
1315

1416
from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults
1517
from openeo.util import repr_truncate
@@ -88,6 +90,97 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
8890
return data
8991

9092

93+
def _compare_xarray_dataarray_xy(
94+
actual: Union[xarray.DataArray, str, Path],
95+
expected: Union[xarray.DataArray, str, Path],
96+
*,
97+
rtol: float = _DEFAULT_RTOL,
98+
atol: float = _DEFAULT_ATOL,
99+
) -> List[str]:
100+
"""
101+
Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
102+
103+
Checks that are done (with tolerance):
104+
- (optional) Check fraction of mismatching pixels (difference exceeding some tolerance).
105+
If fraction is below a given threshold, ignore these mismatches in subsequent comparisons.
106+
If fraction is above the threshold, report this issue.
107+
- Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances.
108+
109+
:return: list of issues (empty if no issues)
110+
"""
111+
# TODO: make this a public function?
112+
# TODO: option for nodata fill value?
113+
# TODO: option to include data type check?
114+
# TODO: option to cast to some data type (or even rescale) before comparison?
115+
# TODO: also compare attributes of the DataArray?
116+
actual = _as_xarray_dataarray(actual)
117+
expected = _as_xarray_dataarray(expected)
118+
issues = []
119+
120+
if actual.dims != expected.dims:
121+
issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
122+
for dim in sorted(set(expected.dims).intersection(actual.dims)):
123+
acs = actual.coords[dim].values
124+
ecs = expected.coords[dim].values
125+
if not (acs.shape == ecs.shape and (acs == ecs).all()):
126+
issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
127+
if actual.shape != expected.shape:
128+
issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
129+
130+
if not issues:
131+
threshold = abs(expected * rtol) + atol
132+
diff_exact = abs(expected - actual)
133+
diff_mask = diff_exact > threshold
134+
diff_lenient = diff_exact.where(diff_mask)
135+
136+
non_x_y_dims = list(set(expected.dims) - {"x", "y"})
137+
value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims))
138+
shape = tuple([len(value_mapping[x]) for x in non_x_y_dims])
139+
140+
for shape_index, v in np.ndenumerate(np.ndarray(shape)):
141+
indexers = {}
142+
for index, value_index in enumerate(shape_index):
143+
indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index]
144+
diff_data = diff_lenient.sel(indexers=indexers)
145+
total_pixel_count = expected.sel(indexers).count().item()
146+
diff_pixel_count = diff_data.count().item()
147+
148+
if diff_pixel_count > 0:
149+
diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
150+
diff_mean = round(diff_data.mean().item(), 1)
151+
diff_var = round(diff_data.var().item(), 1)
152+
153+
key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
154+
issues.append(
155+
f"{key}: value difference min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
156+
)
157+
158+
coord_grid = np.meshgrid(diff_data.coords["y"], diff_data.coords["x"])
159+
mask = diff_data.notnull()
160+
c1 = coord_grid[0][mask]
161+
c2 = coord_grid[1][mask]
162+
coordinates = np.dstack((c1, c2)).reshape(-1, 2)
163+
if len(coordinates) > 2:
164+
hull = ConvexHull(coordinates)
165+
area = hull.volume
166+
167+
x_m = diff_data.coords["x"][0].data
168+
x_M = diff_data.coords["x"][-1].data
169+
y_m = diff_data.coords["y"][0].data
170+
y_M = diff_data.coords["y"][-1].data
171+
172+
total_area = abs((y_M - y_m) * (x_M - x_m))
173+
area_percentage = round(area * 100 / total_area, 1)
174+
issues.append(
175+
f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), spread over {area_percentage}% of the area"
176+
)
177+
else:
178+
issues.append(
179+
f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%)"
180+
)
181+
return issues
182+
183+
91184
def _compare_xarray_dataarray(
92185
actual: Union[xarray.DataArray, str, Path],
93186
expected: Union[xarray.DataArray, str, Path],
@@ -128,11 +221,15 @@ def _compare_xarray_dataarray(
128221
if actual.shape != expected.shape:
129222
issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
130223

131-
try:
132-
xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
133-
except AssertionError as e:
134-
# TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
135-
issues.append(str(e).strip())
224+
if not issues:
225+
if {"x", "y"} <= set(expected.dims):
226+
issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
227+
else:
228+
try:
229+
xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
230+
except AssertionError as e:
231+
# TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
232+
issues.append(str(e).strip())
136233

137234
return issues
138235

@@ -163,6 +260,31 @@ def assert_xarray_dataarray_allclose(
163260
raise AssertionError("\n".join(issues))
164261

165262

263+
def assert_xarray_dataarray_allclose_xy(
264+
actual: Union[xarray.DataArray, str, Path],
265+
expected: Union[xarray.DataArray, str, Path],
266+
*,
267+
rtol: float = _DEFAULT_RTOL,
268+
atol: float = _DEFAULT_ATOL,
269+
):
270+
"""
271+
Assert that two Xarray ``DataArray`` instances are equal (with tolerance).
272+
273+
:param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
274+
:param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
275+
:param rtol: relative tolerance
276+
:param atol: absolute tolerance
277+
:raises AssertionError: if not equal within the given tolerance
278+
279+
.. versionadded:: 0.31.0
280+
281+
.. warning::
282+
This function is experimental and subject to change.
283+
"""
284+
issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
285+
if issues:
286+
raise AssertionError("\n".join(issues))
287+
166288
def _compare_xarray_datasets(
167289
actual: Union[xarray.Dataset, str, Path],
168290
expected: Union[xarray.Dataset, str, Path],
@@ -250,7 +372,10 @@ def assert_xarray_allclose(
250372
if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset):
251373
assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol)
252374
elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray):
253-
assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
375+
if (["x", "y", "band"]).elements_in(expected.dims):
376+
assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol)
377+
else:
378+
assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
254379
else:
255380
raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}")
256381

tests/testing/test_results.py

+102-16
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from openeo.testing.results import (
1414
_compare_xarray_dataarray,
1515
assert_job_results_allclose,
16-
assert_xarray_allclose,
1716
assert_xarray_dataarray_allclose,
1817
assert_xarray_dataset_allclose,
1918
)
@@ -36,7 +35,6 @@ def test_simple_defaults(self):
3635
[
3736
"Coordinates mismatch for dimension 'dim_0': [0 1 2 3] != [0 1 2]",
3837
"Shape mismatch: (4,) != (3,)",
39-
dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
4038
],
4139
),
4240
(
@@ -45,15 +43,13 @@ def test_simple_defaults(self):
4543
"Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)",
4644
"Coordinates mismatch for dimension 'dim_0': [0 1] != [0 1 2]",
4745
"Shape mismatch: (2, 3) != (3,)",
48-
dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
4946
],
5047
),
5148
(
5249
xarray.DataArray([[1], [2], [3]]),
5350
[
5451
"Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)",
5552
"Shape mismatch: (3, 1) != (3,)",
56-
dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
5753
],
5854
),
5955
],
@@ -75,20 +71,12 @@ def test_simple_shape_mismatch(self, actual, expected_issues):
7571
"Dimension mismatch: ('y', 'x') != ('x', 'y')",
7672
"Coordinates mismatch for dimension 'x': [0 1 2] != [0 1]",
7773
"Coordinates mismatch for dimension 'y': [0 1] != [0 1 2]",
78-
dirty_equals.IsStr(
79-
regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(y: 2, x: 3\) != \(x: 2, y: 3\)",
80-
regex_flags=re.DOTALL,
81-
),
8274
],
8375
),
8476
(
8577
xarray.DataArray([[1, 2, 3], [4, 5, 6]], dims=["x", "z"]),
8678
[
8779
"Dimension mismatch: ('x', 'z') != ('x', 'y')",
88-
dirty_equals.IsStr(
89-
regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(x: 2, z: 3\) != \(x: 2, y: 3\)",
90-
regex_flags=re.DOTALL,
91-
),
9280
],
9381
),
9482
],
@@ -108,10 +96,6 @@ def test_simple_dims_mismatch(self, actual, expected_issues):
10896
xarray.DataArray([[1, 2, 3], [4, 5, 6]], coords=[("x", [111, 222]), ("y", [33, 44, 55])]),
10997
[
11098
"Coordinates mismatch for dimension 'x': [111 222] != [11 22]",
111-
dirty_equals.IsStr(
112-
regex=r"Left and right DataArray objects are not close.*Differing coordinates:.*L \* x\s+\(x\).*?111 222.*R \* x\s+\(x\).*?11 22",
113-
regex_flags=re.DOTALL,
114-
),
11599
],
116100
),
117101
],
@@ -351,6 +335,108 @@ def test_allclose_minimal_success(self, tmp_path, actual_dir, expected_dir):
351335
ds.to_netcdf(actual_dir / "data.nc")
352336
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
353337

338+
def test_allclose_xy_success(self, tmp_path, actual_dir, expected_dir):
339+
expected_ds = xarray.Dataset(
340+
{
341+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
342+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
343+
},
344+
coords={
345+
"t": range(0, 3),
346+
"x": range(4, 8),
347+
"y": range(5, 10),
348+
},
349+
)
350+
expected_ds.to_netcdf(expected_dir / "data.nc")
351+
actual_ds = xarray.Dataset(
352+
{
353+
"b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))),
354+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
355+
},
356+
coords={
357+
"t": range(0, 3),
358+
"x": range(4, 8),
359+
"y": range(5, 10),
360+
},
361+
)
362+
actual_ds.to_netcdf(actual_dir / "data.nc")
363+
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path, rtol=1)
364+
365+
def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir):
366+
expected_ds = xarray.Dataset(
367+
{
368+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
369+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
370+
},
371+
coords={
372+
"t": range(0, 3),
373+
"x": range(4, 8),
374+
"y": range(5, 10),
375+
},
376+
)
377+
expected_ds.to_netcdf(expected_dir / "data.nc")
378+
actual_ds = xarray.Dataset(
379+
{
380+
"b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))),
381+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
382+
},
383+
coords={
384+
"t": range(0, 3),
385+
"x": range(4, 8),
386+
"y": range(5, 10),
387+
},
388+
)
389+
actual_ds.to_netcdf(actual_dir / "data.nc")
390+
with raises_assertion_error_or_not(
391+
r"Issues for file 'data.nc'.*"
392+
r"Issues for variable 'b1'.*"
393+
r"t 0: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
394+
r"t 0: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
395+
r"t 1: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
396+
r"t 1: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
397+
r"t 2: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
398+
r"t 2: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area"
399+
):
400+
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
401+
402+
def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, expected_dir):
403+
expected_ds = xarray.Dataset(
404+
{
405+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
406+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
407+
},
408+
coords={
409+
"t": range(0, 3),
410+
"x": range(4, 8),
411+
"y": range(5, 10),
412+
},
413+
)
414+
expected_ds.to_netcdf(expected_dir / "data.nc")
415+
b2_modified_data = 3 * numpy.ones((3, 4, 5))
416+
b2_modified_data[2][2][2] *= 15
417+
b2_modified_data[2][2][3] *= 14
418+
b2_modified_data[2][3][2] *= 13
419+
b2_modified_data[2][3][3] *= 12
420+
actual_ds = xarray.Dataset(
421+
{
422+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
423+
"b2": xarray.Variable(dims=["t", "x", "y"], data=b2_modified_data),
424+
},
425+
coords={
426+
"t": range(0, 3),
427+
"x": range(4, 8),
428+
"y": range(5, 10),
429+
},
430+
)
431+
actual_ds.to_netcdf(actual_dir / "data.nc")
432+
with raises_assertion_error_or_not(
433+
r"Issues for file 'data.nc'.*"
434+
r"Issues for variable 'b2'.*"
435+
r"t 2: value difference min:33.0, max: 42.0, mean: 37.5, var: 11.2.*"
436+
r"t 2: differing pixels: 4/20 \(20.0%\), spread over 8.3% of the area"
437+
):
438+
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
439+
354440
def test_allclose_basic_fail(self, tmp_path, actual_dir, expected_dir):
355441
expected_ds = xarray.Dataset({"a": (["time"], [1, 2, 3])}, coords={"time": [11, 22, 33]})
356442
expected_ds.to_netcdf(expected_dir / "data.nc")

0 commit comments

Comments
 (0)