Skip to content

Commit 8bdedb9

Browse files
authored
Merge pull request #765 from Open-EO/761-apex-reference-check-needs-better-representation-ascii-art-diff-diff-image-statistics
Issue #761 better diff for apex reference check
2 parents 238c8a6 + fcad19e commit 8bdedb9

File tree

3 files changed

+207
-10
lines changed

3 files changed

+207
-10
lines changed

openeo/testing/results.py

Lines changed: 103 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,22 @@
88
from pathlib import Path
99
from typing import List, Optional, Union
1010

11+
import numpy as np
1112
import xarray
1213
import xarray.testing
14+
from xarray import DataArray
1315

1416
from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults
1517
from openeo.util import repr_truncate
1618

1719
_log = logging.getLogger(__name__)
1820

19-
2021
_DEFAULT_RTOL = 1e-6
2122
_DEFAULT_ATOL = 1e-6
2223

24+
# https://paulbourke.net/dataformats/asciiart
25+
DEFAULT_GRAYSCALE_70_CHARACTERS = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "[::-1]
26+
DEFAULT_GRAYSCALE_10_CHARACTERS = " .:-=+*#%@"
2327

2428
def _load_xarray_netcdf(path: Union[str, Path], **kwargs) -> xarray.Dataset:
2529
"""
@@ -88,12 +92,105 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
8892
return data
8993

9094

95+
def _ascii_art(
96+
diff_data: DataArray,
97+
*,
98+
max_width: int = 60,
99+
y_vs_x_aspect_ratio=2.5,
100+
grayscale_characters: str = DEFAULT_GRAYSCALE_70_CHARACTERS,
101+
) -> str:
102+
max_grayscale_idx = len(grayscale_characters) - 1
103+
x_scale: int = max(1, int(diff_data.sizes["x"] / max_width))
104+
y_scale: int = max(1, int(diff_data.sizes["y"] / (max_width / y_vs_x_aspect_ratio)))
105+
data_max = diff_data.max().item()
106+
if data_max == 0:
107+
data_max = 1
108+
coarsened = diff_data.coarsen(dim={"x": x_scale, "y": y_scale}, boundary="pad").mean()
109+
coarsened = coarsened.transpose("y", "x", ...)
110+
top = "┌" + "─" * coarsened.sizes["x"] + "┐\n"
111+
bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘"
112+
113+
def _pixel_char(v) -> str:
114+
i = 0 if np.isnan(v) else int(v * max_grayscale_idx / data_max)
115+
if v > 0 and i == 0:
116+
i = 1 # don't show a blank for a difference above the threshold
117+
else:
118+
i = min(max_grayscale_idx, i)
119+
return grayscale_characters[i]
120+
121+
return top + "\n".join(["│" + "".join([_pixel_char(v) for v in row]) + "│" for row in coarsened]) + bottom
122+
123+
124+
def _compare_xarray_dataarray_xy(
125+
actual: Union[xarray.DataArray, str, Path],
126+
expected: Union[xarray.DataArray, str, Path],
127+
*,
128+
rtol: float = _DEFAULT_RTOL,
129+
atol: float = _DEFAULT_ATOL,
130+
name: str = None,
131+
) -> List[str]:
132+
"""
133+
Additional compare for two compatible spatial xarray DataArrays with tolerance (rtol, atol)
134+
:return: list of issues (empty if no issues)
135+
"""
136+
issues = []
137+
threshold = abs(expected * rtol) + atol
138+
diff_exact = abs(expected - actual)
139+
diff_mask = diff_exact > threshold
140+
diff_lenient = diff_exact.where(diff_mask)
141+
142+
non_x_y_dims = list(set(expected.dims) - {"x", "y"})
143+
value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims))
144+
shape = tuple([len(value_mapping[x]) for x in non_x_y_dims])
145+
146+
for shape_index, v in np.ndenumerate(np.ndarray(shape)):
147+
indexers = {}
148+
for index, value_index in enumerate(shape_index):
149+
indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index]
150+
diff_data = diff_lenient.sel(indexers=indexers)
151+
total_pixel_count = expected.sel(indexers).count().item()
152+
diff_pixel_count = diff_data.count().item()
153+
154+
if diff_pixel_count > 0:
155+
diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
156+
diff_mean = round(diff_data.mean().item(), 2)
157+
diff_var = round(diff_data.var().item(), 2)
158+
159+
key = name + ": " if name else ""
160+
key += ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
161+
issues.append(
162+
f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
163+
)
164+
165+
_log.warning(f"Difference (ascii art) for {key}:\n{_ascii_art(diff_data)}")
166+
167+
coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"])
168+
mask = diff_data.notnull()
169+
if mask.dims[0] != "y":
170+
mask = mask.transpose()
171+
x_coords = coord_grid[0][mask]
172+
y_coords = coord_grid[1][mask]
173+
174+
diff_bbox = ((x_coords.min().item(), y_coords.min().item()), (x_coords.max().item(), y_coords.max().item()))
175+
diff_area = (x_coords.max() - x_coords.min()) * (y_coords.max() - y_coords.min())
176+
total_area = abs(
177+
(diff_data.coords["y"][-1].data - diff_data.coords["y"][0].data)
178+
* (diff_data.coords["x"][-1].data - diff_data.coords["x"][0].data)
179+
)
180+
area_percentage = round(diff_area * 100 / total_area, 1)
181+
issues.append(
182+
f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), bbox {diff_bbox} - {area_percentage}% of the area"
183+
)
184+
return issues
185+
186+
91187
def _compare_xarray_dataarray(
92188
actual: Union[xarray.DataArray, str, Path],
93189
expected: Union[xarray.DataArray, str, Path],
94190
*,
95191
rtol: float = _DEFAULT_RTOL,
96192
atol: float = _DEFAULT_ATOL,
193+
name: str = None,
97194
) -> List[str]:
98195
"""
99196
Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
@@ -116,7 +213,7 @@ def _compare_xarray_dataarray(
116213
issues = []
117214

118215
# `xarray.testing.assert_allclose` currently does not always
119-
# provides detailed information about shape/dimension mismatches
216+
# provide detailed information about shape/dimension mismatches
120217
# so we enrich the issue listing with some more details
121218
if actual.dims != expected.dims:
122219
issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
@@ -127,13 +224,14 @@ def _compare_xarray_dataarray(
127224
issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
128225
if actual.shape != expected.shape:
129226
issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
130-
227+
compatible = len(issues) == 0
131228
try:
132229
xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
133230
except AssertionError as e:
134231
# TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
135232
issues.append(str(e).strip())
136-
233+
if compatible and {"x", "y"} <= set(expected.dims):
234+
issues.extend(_compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol, name=name))
137235
return issues
138236

139237

@@ -162,7 +260,6 @@ def assert_xarray_dataarray_allclose(
162260
if issues:
163261
raise AssertionError("\n".join(issues))
164262

165-
166263
def _compare_xarray_datasets(
167264
actual: Union[xarray.Dataset, str, Path],
168265
expected: Union[xarray.Dataset, str, Path],
@@ -180,15 +277,14 @@ def _compare_xarray_datasets(
180277
expected = _as_xarray_dataset(expected)
181278

182279
all_issues = []
183-
# TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this?
184280
actual_vars = set(actual.data_vars)
185281
expected_vars = set(expected.data_vars)
186282
_log.debug(f"_compare_xarray_datasets: actual_vars={actual_vars!r} expected_vars={expected_vars!r}")
187283
if actual_vars != expected_vars:
188284
all_issues.append(f"Xarray DataSet variables mismatch: {actual_vars} != {expected_vars}")
189285
for var in expected_vars.intersection(actual_vars):
190286
_log.debug(f"_compare_xarray_datasets: comparing variable {var!r}")
191-
issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol)
287+
issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol, name=var)
192288
if issues:
193289
all_issues.append(f"Issues for variable {var!r}:")
194290
all_issues.extend(issues)

setup.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
with open("openeo/_version.py") as fp:
1414
exec(fp.read(), _version)
1515

16-
1716
with open("README.md", "r") as fh:
1817
long_description = fh.read()
1918

@@ -56,7 +55,6 @@
5655
"ipython",
5756
]
5857

59-
6058
name = "openeo"
6159
setup(
6260
name=name,

tests/testing/test_results.py

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from openeo.testing.results import (
1414
_compare_xarray_dataarray,
1515
assert_job_results_allclose,
16-
assert_xarray_allclose,
1716
assert_xarray_dataarray_allclose,
1817
assert_xarray_dataset_allclose,
1918
)
@@ -351,6 +350,110 @@ def test_allclose_minimal_success(self, tmp_path, actual_dir, expected_dir):
351350
ds.to_netcdf(actual_dir / "data.nc")
352351
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
353352

353+
def test_allclose_xy_success(self, tmp_path, actual_dir, expected_dir):
354+
expected_ds = xarray.Dataset(
355+
{
356+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
357+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
358+
},
359+
coords={
360+
"t": range(0, 3),
361+
"x": range(4, 8),
362+
"y": range(5, 10),
363+
},
364+
)
365+
expected_ds.to_netcdf(expected_dir / "data.nc")
366+
actual_ds = xarray.Dataset(
367+
{
368+
"b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))),
369+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
370+
},
371+
coords={
372+
"t": range(0, 3),
373+
"x": range(4, 8),
374+
"y": range(5, 10),
375+
},
376+
)
377+
actual_ds.to_netcdf(actual_dir / "data.nc")
378+
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path, rtol=1)
379+
380+
def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir):
381+
expected_ds = xarray.Dataset(
382+
{
383+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
384+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
385+
},
386+
coords={
387+
"t": range(0, 3),
388+
"x": range(4, 8),
389+
"y": range(5, 10),
390+
},
391+
)
392+
expected_ds.to_netcdf(expected_dir / "data.nc")
393+
actual_ds = xarray.Dataset(
394+
{
395+
"b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))),
396+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
397+
},
398+
coords={
399+
"t": range(0, 3),
400+
"x": range(4, 8),
401+
"y": range(5, 10),
402+
},
403+
)
404+
actual_ds.to_netcdf(actual_dir / "data.nc")
405+
with raises_assertion_error_or_not(
406+
r"Issues for file 'data.nc'.*"
407+
r"Issues for variable 'b1'.*"
408+
r"Left and right DataArray objects are not close.*Differing values:.*"
409+
r"t 0: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
410+
r"t 0: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area.*"
411+
r"t 1: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
412+
r"t 1: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area.*"
413+
r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
414+
r"t 2: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area"
415+
):
416+
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
417+
418+
def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, expected_dir):
419+
expected_ds = xarray.Dataset(
420+
{
421+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
422+
"b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
423+
},
424+
coords={
425+
"t": range(0, 3),
426+
"x": range(4, 8),
427+
"y": range(5, 10),
428+
},
429+
)
430+
expected_ds.to_netcdf(expected_dir / "data.nc")
431+
b2_modified_data = 3 * numpy.ones((3, 4, 5))
432+
b2_modified_data[2][2][2] *= 15
433+
b2_modified_data[2][2][3] *= 14
434+
b2_modified_data[2][3][2] *= 13
435+
b2_modified_data[2][3][3] *= 12
436+
actual_ds = xarray.Dataset(
437+
{
438+
"b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
439+
"b2": xarray.Variable(dims=["t", "x", "y"], data=b2_modified_data),
440+
},
441+
coords={
442+
"t": range(0, 3),
443+
"x": range(4, 8),
444+
"y": range(5, 10),
445+
},
446+
)
447+
actual_ds.to_netcdf(actual_dir / "data.nc")
448+
with raises_assertion_error_or_not(
449+
r"Issues for file 'data.nc'.*"
450+
r"Issues for variable 'b2'.*"
451+
r"Left and right DataArray objects are not close.*Differing values:.*"
452+
r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:33.0, max: 42.0, mean: 37.5, var: 11.2.*"
453+
r"t 2: differing pixels: 4/20 \(20.0%\), bbox \(\(6, 7\), \(7, 8\)\) - 8.3% of the area"
454+
):
455+
assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
456+
354457
def test_allclose_basic_fail(self, tmp_path, actual_dir, expected_dir):
355458
expected_ds = xarray.Dataset({"a": (["time"], [1, 2, 3])}, coords={"time": [11, 22, 33]})
356459
expected_ds.to_netcdf(expected_dir / "data.nc")

0 commit comments

Comments
 (0)