From bdb906b2527fd4ab37252e8491c4d47b3dfb277d Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Tue, 22 Apr 2025 19:54:46 +0200
Subject: [PATCH 01/10] Issue #761 better diff for apex reference check

---
 openeo/testing/results.py     | 137 ++++++++++++++++++++++++++++++++--
 tests/testing/test_results.py | 118 +++++++++++++++++++++++++----
 2 files changed, 233 insertions(+), 22 deletions(-)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index 633ddaf58..7076aaa59 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -8,8 +8,10 @@
 from pathlib import Path
 from typing import List, Optional, Union
 
+import numpy as np
 import xarray
 import xarray.testing
+from scipy.spatial import ConvexHull
 
 from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults
 from openeo.util import repr_truncate
@@ -88,6 +90,97 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
     return data
 
 
+def _compare_xarray_dataarray_xy(
+    actual: Union[xarray.DataArray, str, Path],
+    expected: Union[xarray.DataArray, str, Path],
+    *,
+    rtol: float = _DEFAULT_RTOL,
+    atol: float = _DEFAULT_ATOL,
+) -> List[str]:
+    """
+    Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
+
+    Checks that are done (with tolerance):
+    - (optional) Check fraction of mismatching pixels (difference exceeding some tolerance).
+      If fraction is below a given threshold, ignore these mismatches in subsequent comparisons.
+      If fraction is above the threshold, report this issue.
+    - Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances.
+
+    :return: list of issues (empty if no issues)
+    """
+    # TODO: make this a public function?
+    # TODO: option for nodata fill value?
+    # TODO: option to include data type check?
+    # TODO: option to cast to some data type (or even rescale) before comparison?
+    # TODO: also compare attributes of the DataArray?
+    actual = _as_xarray_dataarray(actual)
+    expected = _as_xarray_dataarray(expected)
+    issues = []
+
+    if actual.dims != expected.dims:
+        issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
+    for dim in sorted(set(expected.dims).intersection(actual.dims)):
+        acs = actual.coords[dim].values
+        ecs = expected.coords[dim].values
+        if not (acs.shape == ecs.shape and (acs == ecs).all()):
+            issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
+    if actual.shape != expected.shape:
+        issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
+
+    if not issues:
+        threshold = abs(expected * rtol) + atol
+        diff_exact = abs(expected - actual)
+        diff_mask = diff_exact > threshold
+        diff_lenient = diff_exact.where(diff_mask)
+
+        non_x_y_dims = list(set(expected.dims) - {"x", "y"})
+        value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims))
+        shape = tuple([len(value_mapping[x]) for x in non_x_y_dims])
+
+        for shape_index, v in np.ndenumerate(np.ndarray(shape)):
+            indexers = {}
+            for index, value_index in enumerate(shape_index):
+                indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index]
+            diff_data = diff_lenient.sel(indexers=indexers)
+            total_pixel_count = expected.sel(indexers).count().item()
+            diff_pixel_count = diff_data.count().item()
+
+            if diff_pixel_count > 0:
+                diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
+                diff_mean = round(diff_data.mean().item(), 1)
+                diff_var = round(diff_data.var().item(), 1)
+
+                key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
+                issues.append(
+                    f"{key}: value difference min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
+                )
+
+                coord_grid = np.meshgrid(diff_data.coords["y"], diff_data.coords["x"])
+                mask = diff_data.notnull()
+                c1 = coord_grid[0][mask]
+                c2 = coord_grid[1][mask]
+                coordinates = np.dstack((c1, c2)).reshape(-1, 2)
+                if len(coordinates) > 2:
+                    hull = ConvexHull(coordinates)
+                    area = hull.volume
+
+                    x_m = diff_data.coords["x"][0].data
+                    x_M = diff_data.coords["x"][-1].data
+                    y_m = diff_data.coords["y"][0].data
+                    y_M = diff_data.coords["y"][-1].data
+
+                    total_area = abs((y_M - y_m) * (x_M - x_m))
+                    area_percentage = round(area * 100 / total_area, 1)
+                    issues.append(
+                        f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), spread over {area_percentage}% of the area"
+                    )
+                else:
+                    issues.append(
+                        f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%)"
+                    )
+    return issues
+
+
 def _compare_xarray_dataarray(
     actual: Union[xarray.DataArray, str, Path],
     expected: Union[xarray.DataArray, str, Path],
@@ -128,11 +221,15 @@ def _compare_xarray_dataarray(
     if actual.shape != expected.shape:
         issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
 
-    try:
-        xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
-    except AssertionError as e:
-        # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
-        issues.append(str(e).strip())
+    if not issues:
+        if {"x", "y"} <= set(expected.dims):
+            issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
+        else:
+            try:
+                xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
+            except AssertionError as e:
+                # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
+                issues.append(str(e).strip())
 
     return issues
 
@@ -163,6 +260,31 @@ def assert_xarray_dataarray_allclose(
         raise AssertionError("\n".join(issues))
 
 
+def assert_xarray_dataarray_allclose_xy(
+    actual: Union[xarray.DataArray, str, Path],
+    expected: Union[xarray.DataArray, str, Path],
+    *,
+    rtol: float = _DEFAULT_RTOL,
+    atol: float = _DEFAULT_ATOL,
+):
+    """
+    Assert that two Xarray ``DataArray`` instances are equal (with tolerance).
+
+    :param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
+    :param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
+    :param rtol: relative tolerance
+    :param atol: absolute tolerance
+    :raises AssertionError: if not equal within the given tolerance
+
+    .. versionadded:: 0.31.0
+
+    .. warning::
+        This function is experimental and subject to change.
+    """
+    issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
+    if issues:
+        raise AssertionError("\n".join(issues))
+
 def _compare_xarray_datasets(
     actual: Union[xarray.Dataset, str, Path],
     expected: Union[xarray.Dataset, str, Path],
@@ -250,7 +372,10 @@ def assert_xarray_allclose(
     if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset):
         assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol)
     elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray):
-        assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
+        if (["x", "y", "band"]).elements_in(expected.dims):
+            assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol)
+        else:
+            assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
     else:
         raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}")
 
diff --git a/tests/testing/test_results.py b/tests/testing/test_results.py
index 9bff2a4f1..73de47e95 100644
--- a/tests/testing/test_results.py
+++ b/tests/testing/test_results.py
@@ -13,7 +13,6 @@
 from openeo.testing.results import (
     _compare_xarray_dataarray,
     assert_job_results_allclose,
-    assert_xarray_allclose,
     assert_xarray_dataarray_allclose,
     assert_xarray_dataset_allclose,
 )
@@ -36,7 +35,6 @@ def test_simple_defaults(self):
                 [
                     "Coordinates mismatch for dimension 'dim_0': [0 1 2 3] != [0 1 2]",
                     "Shape mismatch: (4,) != (3,)",
-                    dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
                 ],
             ),
             (
@@ -45,7 +43,6 @@ def test_simple_defaults(self):
                     "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)",
                     "Coordinates mismatch for dimension 'dim_0': [0 1] != [0 1 2]",
                     "Shape mismatch: (2, 3) != (3,)",
-                    dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
                 ],
             ),
             (
@@ -53,7 +50,6 @@ def test_simple_defaults(self):
                 [
                     "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)",
                     "Shape mismatch: (3, 1) != (3,)",
-                    dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
                 ],
             ),
         ],
@@ -75,20 +71,12 @@ def test_simple_shape_mismatch(self, actual, expected_issues):
                     "Dimension mismatch: ('y', 'x') != ('x', 'y')",
                     "Coordinates mismatch for dimension 'x': [0 1 2] != [0 1]",
                     "Coordinates mismatch for dimension 'y': [0 1] != [0 1 2]",
-                    dirty_equals.IsStr(
-                        regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(y: 2, x: 3\) != \(x: 2, y: 3\)",
-                        regex_flags=re.DOTALL,
-                    ),
                 ],
             ),
             (
                 xarray.DataArray([[1, 2, 3], [4, 5, 6]], dims=["x", "z"]),
                 [
                     "Dimension mismatch: ('x', 'z') != ('x', 'y')",
-                    dirty_equals.IsStr(
-                        regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(x: 2, z: 3\) != \(x: 2, y: 3\)",
-                        regex_flags=re.DOTALL,
-                    ),
                 ],
             ),
         ],
@@ -108,10 +96,6 @@ def test_simple_dims_mismatch(self, actual, expected_issues):
                 xarray.DataArray([[1, 2, 3], [4, 5, 6]], coords=[("x", [111, 222]), ("y", [33, 44, 55])]),
                 [
                     "Coordinates mismatch for dimension 'x': [111 222] != [11 22]",
-                    dirty_equals.IsStr(
-                        regex=r"Left and right DataArray objects are not close.*Differing coordinates:.*L \* x\s+\(x\).*?111 222.*R \* x\s+\(x\).*?11 22",
-                        regex_flags=re.DOTALL,
-                    ),
                 ],
             ),
         ],
@@ -351,6 +335,108 @@ def test_allclose_minimal_success(self, tmp_path, actual_dir, expected_dir):
         ds.to_netcdf(actual_dir / "data.nc")
         assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
 
+    def test_allclose_xy_success(self, tmp_path, actual_dir, expected_dir):
+        expected_ds = xarray.Dataset(
+            {
+                "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
+                "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
+            },
+            coords={
+                "t": range(0, 3),
+                "x": range(4, 8),
+                "y": range(5, 10),
+            },
+        )
+        expected_ds.to_netcdf(expected_dir / "data.nc")
+        actual_ds = xarray.Dataset(
+            {
+                "b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))),
+                "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
+            },
+            coords={
+                "t": range(0, 3),
+                "x": range(4, 8),
+                "y": range(5, 10),
+            },
+        )
+        actual_ds.to_netcdf(actual_dir / "data.nc")
+        assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path, rtol=1)
+
+    def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir):
+        expected_ds = xarray.Dataset(
+            {
+                "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
+                "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
+            },
+            coords={
+                "t": range(0, 3),
+                "x": range(4, 8),
+                "y": range(5, 10),
+            },
+        )
+        expected_ds.to_netcdf(expected_dir / "data.nc")
+        actual_ds = xarray.Dataset(
+            {
+                "b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))),
+                "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
+            },
+            coords={
+                "t": range(0, 3),
+                "x": range(4, 8),
+                "y": range(5, 10),
+            },
+        )
+        actual_ds.to_netcdf(actual_dir / "data.nc")
+        with raises_assertion_error_or_not(
+            r"Issues for file 'data.nc'.*"
+            r"Issues for variable 'b1'.*"
+            r"t 0: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
+            r"t 0: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
+            r"t 1: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
+            r"t 1: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
+            r"t 2: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
+            r"t 2: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area"
+        ):
+            assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
+
+    def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, expected_dir):
+        expected_ds = xarray.Dataset(
+            {
+                "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
+                "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))),
+            },
+            coords={
+                "t": range(0, 3),
+                "x": range(4, 8),
+                "y": range(5, 10),
+            },
+        )
+        expected_ds.to_netcdf(expected_dir / "data.nc")
+        b2_modified_data = 3 * numpy.ones((3, 4, 5))
+        b2_modified_data[2][2][2] *= 15
+        b2_modified_data[2][2][3] *= 14
+        b2_modified_data[2][3][2] *= 13
+        b2_modified_data[2][3][3] *= 12
+        actual_ds = xarray.Dataset(
+            {
+                "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))),
+                "b2": xarray.Variable(dims=["t", "x", "y"], data=b2_modified_data),
+            },
+            coords={
+                "t": range(0, 3),
+                "x": range(4, 8),
+                "y": range(5, 10),
+            },
+        )
+        actual_ds.to_netcdf(actual_dir / "data.nc")
+        with raises_assertion_error_or_not(
+            r"Issues for file 'data.nc'.*"
+            r"Issues for variable 'b2'.*"
+            r"t 2: value difference min:33.0, max: 42.0, mean: 37.5, var: 11.2.*"
+            r"t 2: differing pixels: 4/20 \(20.0%\), spread over 8.3% of the area"
+        ):
+            assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
+
     def test_allclose_basic_fail(self, tmp_path, actual_dir, expected_dir):
         expected_ds = xarray.Dataset({"a": (["time"], [1, 2, 3])}, coords={"time": [11, 22, 33]})
         expected_ds.to_netcdf(expected_dir / "data.nc")

From e7237298e8d5a73b78cea74952be3de8c7039749 Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Wed, 23 Apr 2025 09:44:21 +0200
Subject: [PATCH 02/10] Issue #761 better diff for apex reference check (added
 scipy dependency)

---
 setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index d2c22b39f..bd48199b6 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,6 @@
 with open("openeo/_version.py") as fp:
     exec(fp.read(), _version)
 
-
 with open("README.md", "r") as fh:
     long_description = fh.read()
 
@@ -22,7 +21,8 @@
     "mock",
     "requests-mock>=1.8.0",
     "httpretty>=1.1.4",
-    "urllib3<2.3.0",  # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484
+    "urllib3<2.3.0",
+    # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484
     "netCDF4>=1.7.0",
     "matplotlib",  # TODO: eliminate matplotlib as test dependency
     # TODO #717 Simplify geopandas constraints when Python 3.8 support is dropped
@@ -35,6 +35,7 @@
     "pyarrow>=10.0.1",  # For Parquet read/write support in pandas
     "python-dateutil>=2.7.0",
     "pystac-client>=0.7.5",
+    "scipy",  # for Convex Hull algorithm
 ]
 
 docs_require = [
@@ -56,7 +57,6 @@
     "ipython",
 ]
 
-
 name = "openeo"
 setup(
     name=name,

From a25597bae8e9302657cab7d10291194dcb040dcc Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Wed, 30 Apr 2025 09:26:53 +0200
Subject: [PATCH 03/10] Issue #761 better diff for apex reference check (added
 ascii art diff)

---
 openeo/testing/results.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index 7076aaa59..cac777762 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -90,6 +90,27 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
     return data
 
 
+def ascii_art(diff_data: DataArray) -> str:
+    scale: int = max(1, (diff_data.sizes["x"] / 100))
+    data_max = diff_data.max().item()
+    if data_max == 0:
+        data_max = 1
+    grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "
+    coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").all()
+    top = "┌" + "─" * coarsened.sizes["x"] + "┐\n"
+    bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘"
+    return (
+        top
+        + "\n".join(
+            [
+                "│" + "".join([grayscale_characters[70 - int(v * 70 / data_max)] for v in row]) + "│"
+                for row in coarsened.transpose()
+            ]
+        )
+        + bottom
+    )
+
+
 def _compare_xarray_dataarray_xy(
     actual: Union[xarray.DataArray, str, Path],
     expected: Union[xarray.DataArray, str, Path],
@@ -155,6 +176,9 @@ def _compare_xarray_dataarray_xy(
                     f"{key}: value difference min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
                 )
 
+                print(f"Difference ascii art for {key}")
+                print(ascii_art(diff_data))
+
                 coord_grid = np.meshgrid(diff_data.coords["y"], diff_data.coords["x"])
                 mask = diff_data.notnull()
                 c1 = coord_grid[0][mask]

From d88a124729c27cb9375b18e1a223070e5a76d072 Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Wed, 30 Apr 2025 09:30:21 +0200
Subject: [PATCH 04/10] Issue #761 better diff for apex reference check (added
 ascii art diff)

---
 openeo/testing/results.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index cac777762..2ac5ee38f 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -12,6 +12,7 @@
 import xarray
 import xarray.testing
 from scipy.spatial import ConvexHull
+from xarray import DataArray
 
 from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults
 from openeo.util import repr_truncate

From 916b86881a66904ea9891ec30fc643f0a082f50f Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Wed, 30 Apr 2025 10:41:37 +0200
Subject: [PATCH 05/10] Issue #761 better diff for apex reference check (more
 robust xy/yx grid handling)

---
 openeo/testing/results.py     | 32 ++++++++++++++++++--------------
 tests/testing/test_results.py |  8 ++++----
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index 2ac5ee38f..ea0d25673 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -92,24 +92,24 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
 
 
 def ascii_art(diff_data: DataArray) -> str:
-    scale: int = max(1, (diff_data.sizes["x"] / 100))
+    scale: int = max(1, int(diff_data.sizes["x"] / 100))
     data_max = diff_data.max().item()
     if data_max == 0:
         data_max = 1
     grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "
     coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").all()
+    if coarsened.dims[0] != "y":
+        coarsened = coarsened.transpose()
     top = "┌" + "─" * coarsened.sizes["x"] + "┐\n"
     bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘"
-    return (
-        top
-        + "\n".join(
-            [
-                "│" + "".join([grayscale_characters[70 - int(v * 70 / data_max)] for v in row]) + "│"
-                for row in coarsened.transpose()
-            ]
-        )
-        + bottom
-    )
+
+    def pixelChar(v) -> str:
+        i = int(v * 70 / data_max)
+        if v > 0 and i == 0:
+            i = 1
+        return grayscale_characters[69 - i]
+
+    return top + "\n".join(["│" + "".join([pixelChar(v) for v in row]) + "│" for row in coarsened]) + bottom
 
 
 def _compare_xarray_dataarray_xy(
@@ -174,14 +174,18 @@ def _compare_xarray_dataarray_xy(
 
                 key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
                 issues.append(
-                    f"{key}: value difference min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
+                    f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
                 )
 
                 print(f"Difference ascii art for {key}")
-                print(ascii_art(diff_data))
+                art = ascii_art(diff_data)
+                print(art)
+
+                coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"])
 
-                coord_grid = np.meshgrid(diff_data.coords["y"], diff_data.coords["x"])
                 mask = diff_data.notnull()
+                if mask.dims[0] != "y":
+                    mask = mask.transpose()
                 c1 = coord_grid[0][mask]
                 c2 = coord_grid[1][mask]
                 coordinates = np.dstack((c1, c2)).reshape(-1, 2)
diff --git a/tests/testing/test_results.py b/tests/testing/test_results.py
index 73de47e95..6b5f76f38 100644
--- a/tests/testing/test_results.py
+++ b/tests/testing/test_results.py
@@ -390,11 +390,11 @@ def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir)
         with raises_assertion_error_or_not(
             r"Issues for file 'data.nc'.*"
             r"Issues for variable 'b1'.*"
-            r"t 0: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
+            r"t 0: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
             r"t 0: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
-            r"t 1: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
+            r"t 1: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
             r"t 1: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
-            r"t 2: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
+            r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
             r"t 2: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area"
         ):
             assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
@@ -432,7 +432,7 @@ def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, ex
         with raises_assertion_error_or_not(
             r"Issues for file 'data.nc'.*"
             r"Issues for variable 'b2'.*"
-            r"t 2: value difference min:33.0, max: 42.0, mean: 37.5, var: 11.2.*"
+            r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:33.0, max: 42.0, mean: 37.5, var: 11.2.*"
             r"t 2: differing pixels: 4/20 \(20.0%\), spread over 8.3% of the area"
         ):
             assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)

From ce0cc5fd8bae3a9b6270b8565b47ad6e5951d48a Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Tue, 6 May 2025 16:24:27 +0200
Subject: [PATCH 06/10] Issue #761 better diff for apex reference check (fixed
 boundary value issue)

---
 openeo/testing/results.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index ea0d25673..3c8ac51ea 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -97,16 +97,19 @@ def ascii_art(diff_data: DataArray) -> str:
     if data_max == 0:
         data_max = 1
     grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "
-    coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").all()
-    if coarsened.dims[0] != "y":
-        coarsened = coarsened.transpose()
+    coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").mean()
+    coarsened = coarsened.transpose("y", "x", ...)
     top = "┌" + "─" * coarsened.sizes["x"] + "┐\n"
     bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘"
 
     def pixelChar(v) -> str:
-        i = int(v * 70 / data_max)
+        if np.isnan(v):
+            return " "
+        i = int(v * 69 / data_max)
         if v > 0 and i == 0:
             i = 1
+        else:
+            i = min(69, i)
         return grayscale_characters[69 - i]
 
     return top + "\n".join(["│" + "".join([pixelChar(v) for v in row]) + "│" for row in coarsened]) + bottom
@@ -169,18 +172,14 @@ def _compare_xarray_dataarray_xy(
 
             if diff_pixel_count > 0:
                 diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
-                diff_mean = round(diff_data.mean().item(), 1)
-                diff_var = round(diff_data.var().item(), 1)
+                diff_mean = round(diff_data.mean().item(), 2)
+                diff_var = round(diff_data.var().item(), 2)
 
                 key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
                 issues.append(
                     f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
                 )
 
-                print(f"Difference ascii art for {key}")
-                art = ascii_art(diff_data)
-                print(art)
-
                 coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"])
 
                 mask = diff_data.notnull()
@@ -189,6 +188,11 @@ def _compare_xarray_dataarray_xy(
                 c1 = coord_grid[0][mask]
                 c2 = coord_grid[1][mask]
                 coordinates = np.dstack((c1, c2)).reshape(-1, 2)
+
+                art = ascii_art(diff_data)
+                print(f"Difference ascii art for {key}")
+                print(art)
+
                 if len(coordinates) > 2:
                     hull = ConvexHull(coordinates)
                     area = hull.volume

From 3e8860fe32952aad19d30b4cd3a01a14162f621f Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Thu, 8 May 2025 08:46:16 +0200
Subject: [PATCH 07/10] Issue #761 better diff for apex reference check
 (default ascii_art max_width and aspect ratio)

---
 openeo/testing/results.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index 3c8ac51ea..d0d8badef 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -91,13 +91,14 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
     return data
 
 
-def ascii_art(diff_data: DataArray) -> str:
-    scale: int = max(1, int(diff_data.sizes["x"] / 100))
+def ascii_art(diff_data: DataArray, *, max_width: int = 60, y_vs_x_aspect_ratio=2.5) -> str:
+    x_scale: int = max(1, int(diff_data.sizes["x"] / max_width))
+    y_scale: int = max(1, int(diff_data.sizes["x"] / (max_width * y_vs_x_aspect_ratio)))
     data_max = diff_data.max().item()
     if data_max == 0:
         data_max = 1
     grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "
-    coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").mean()
+    coarsened = diff_data.coarsen(dim={"x": x_scale, "y": y_scale}, boundary="pad").mean()
     coarsened = coarsened.transpose("y", "x", ...)
     top = "┌" + "─" * coarsened.sizes["x"] + "┐\n"
     bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘"

From ce58771b832051aa455ac80b8a2e11884da6415a Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Thu, 8 May 2025 10:19:00 +0200
Subject: [PATCH 08/10] Issue #761 better diff for apex reference check (deps
 fix)

---
 setup.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index bd48199b6..1f960f5ef 100644
--- a/setup.py
+++ b/setup.py
@@ -21,8 +21,7 @@
     "mock",
     "requests-mock>=1.8.0",
     "httpretty>=1.1.4",
-    "urllib3<2.3.0",
-    # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484
+    "urllib3<2.3.0",  # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484
     "netCDF4>=1.7.0",
     "matplotlib",  # TODO: eliminate matplotlib as test dependency
     # TODO #717 Simplify geopandas constraints when Python 3.8 support is dropped
@@ -35,7 +34,6 @@
     "pyarrow>=10.0.1",  # For Parquet read/write support in pandas
     "python-dateutil>=2.7.0",
     "pystac-client>=0.7.5",
-    "scipy",  # for Convex Hull algorithm
 ]
 
 docs_require = [
@@ -84,6 +82,7 @@
         "deprecated>=1.2.12",
         'oschmod>=0.3.12; sys_platform == "win32"',
         "importlib_resources; python_version<'3.9'",
+        "scipy",  # for Convex Hull algorithm
     ],
     extras_require={
         "tests": tests_require,

From 5bcc5e70ac369d3a3a684b6b4ba9aa7b77172bd5 Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Thu, 8 May 2025 10:19:42 +0200
Subject: [PATCH 09/10] Issue #761 better diff for apex reference check

---
 openeo/testing/results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index d0d8badef..1f58f37be 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -406,7 +406,7 @@ def assert_xarray_allclose(
     if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset):
         assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol)
     elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray):
-        if (["x", "y", "band"]).elements_in(expected.dims):
+        if {"x", "y"}.issubset(expected.dims):
             assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol)
         else:
             assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)

From fcad19e50b515e03f68edcdc10f4d356936fbc2f Mon Sep 17 00:00:00 2001
From: dsamaey <diether.samaey@vito.be>
Date: Thu, 8 May 2025 17:55:34 +0200
Subject: [PATCH 10/10] Issue #761 better diff for apex reference check
 (replaced convex hull with bbox, _compare_xarray_dataarray_xy now only adds
 to the original xarray implementation)

---
 openeo/testing/results.py     | 221 ++++++++++++----------------------
 setup.py                      |   1 -
 tests/testing/test_results.py |  25 +++-
 3 files changed, 100 insertions(+), 147 deletions(-)

diff --git a/openeo/testing/results.py b/openeo/testing/results.py
index 1f58f37be..67e678723 100644
--- a/openeo/testing/results.py
+++ b/openeo/testing/results.py
@@ -11,7 +11,6 @@
 import numpy as np
 import xarray
 import xarray.testing
-from scipy.spatial import ConvexHull
 from xarray import DataArray
 
 from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults
@@ -19,10 +18,12 @@
 
 _log = logging.getLogger(__name__)
 
-
 _DEFAULT_RTOL = 1e-6
 _DEFAULT_ATOL = 1e-6
 
+# https://paulbourke.net/dataformats/asciiart
+DEFAULT_GRAYSCALE_70_CHARACTERS = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "[::-1]
+DEFAULT_GRAYSCALE_10_CHARACTERS = " .:-=+*#%@"
 
 def _load_xarray_netcdf(path: Union[str, Path], **kwargs) -> xarray.Dataset:
     """
@@ -91,29 +92,33 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
     return data
 
 
-def ascii_art(diff_data: DataArray, *, max_width: int = 60, y_vs_x_aspect_ratio=2.5) -> str:
+def _ascii_art(
+    diff_data: DataArray,
+    *,
+    max_width: int = 60,
+    y_vs_x_aspect_ratio=2.5,
+    grayscale_characters: str = DEFAULT_GRAYSCALE_70_CHARACTERS,
+) -> str:
+    max_grayscale_idx = len(grayscale_characters) - 1
     x_scale: int = max(1, int(diff_data.sizes["x"] / max_width))
-    y_scale: int = max(1, int(diff_data.sizes["x"] / (max_width * y_vs_x_aspect_ratio)))
+    y_scale: int = max(1, int(diff_data.sizes["y"] / (max_width / y_vs_x_aspect_ratio)))
     data_max = diff_data.max().item()
     if data_max == 0:
         data_max = 1
-    grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "
     coarsened = diff_data.coarsen(dim={"x": x_scale, "y": y_scale}, boundary="pad").mean()
     coarsened = coarsened.transpose("y", "x", ...)
     top = "┌" + "─" * coarsened.sizes["x"] + "┐\n"
     bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘"
 
-    def pixelChar(v) -> str:
-        if np.isnan(v):
-            return " "
-        i = int(v * 69 / data_max)
+    def _pixel_char(v) -> str:
+        i = 0 if np.isnan(v) else int(v * max_grayscale_idx / data_max)
         if v > 0 and i == 0:
-            i = 1
+            i = 1  # don't show a blank for a difference above the threshold
         else:
-            i = min(69, i)
-        return grayscale_characters[69 - i]
+            i = min(max_grayscale_idx, i)
+        return grayscale_characters[i]
 
-    return top + "\n".join(["│" + "".join([pixelChar(v) for v in row]) + "│" for row in coarsened]) + bottom
+    return top + "\n".join(["│" + "".join([_pixel_char(v) for v in row]) + "│" for row in coarsened]) + bottom
 
 
 def _compare_xarray_dataarray_xy(
@@ -122,96 +127,60 @@ def _compare_xarray_dataarray_xy(
     *,
     rtol: float = _DEFAULT_RTOL,
     atol: float = _DEFAULT_ATOL,
+    name: str = None,
 ) -> List[str]:
     """
-    Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
-
-    Checks that are done (with tolerance):
-    - (optional) Check fraction of mismatching pixels (difference exceeding some tolerance).
-      If fraction is below a given threshold, ignore these mismatches in subsequent comparisons.
-      If fraction is above the threshold, report this issue.
-    - Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances.
-
+    Additional compare for two compatible spatial xarray DataArrays with tolerance (rtol, atol)
     :return: list of issues (empty if no issues)
     """
-    # TODO: make this a public function?
-    # TODO: option for nodata fill value?
-    # TODO: option to include data type check?
-    # TODO: option to cast to some data type (or even rescale) before comparison?
-    # TODO: also compare attributes of the DataArray?
-    actual = _as_xarray_dataarray(actual)
-    expected = _as_xarray_dataarray(expected)
     issues = []
-
-    if actual.dims != expected.dims:
-        issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
-    for dim in sorted(set(expected.dims).intersection(actual.dims)):
-        acs = actual.coords[dim].values
-        ecs = expected.coords[dim].values
-        if not (acs.shape == ecs.shape and (acs == ecs).all()):
-            issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
-    if actual.shape != expected.shape:
-        issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
-
-    if not issues:
-        threshold = abs(expected * rtol) + atol
-        diff_exact = abs(expected - actual)
-        diff_mask = diff_exact > threshold
-        diff_lenient = diff_exact.where(diff_mask)
-
-        non_x_y_dims = list(set(expected.dims) - {"x", "y"})
-        value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims))
-        shape = tuple([len(value_mapping[x]) for x in non_x_y_dims])
-
-        for shape_index, v in np.ndenumerate(np.ndarray(shape)):
-            indexers = {}
-            for index, value_index in enumerate(shape_index):
-                indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index]
-            diff_data = diff_lenient.sel(indexers=indexers)
-            total_pixel_count = expected.sel(indexers).count().item()
-            diff_pixel_count = diff_data.count().item()
-
-            if diff_pixel_count > 0:
-                diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
-                diff_mean = round(diff_data.mean().item(), 2)
-                diff_var = round(diff_data.var().item(), 2)
-
-                key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
-                issues.append(
-                    f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
-                )
-
-                coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"])
-
-                mask = diff_data.notnull()
-                if mask.dims[0] != "y":
-                    mask = mask.transpose()
-                c1 = coord_grid[0][mask]
-                c2 = coord_grid[1][mask]
-                coordinates = np.dstack((c1, c2)).reshape(-1, 2)
-
-                art = ascii_art(diff_data)
-                print(f"Difference ascii art for {key}")
-                print(art)
-
-                if len(coordinates) > 2:
-                    hull = ConvexHull(coordinates)
-                    area = hull.volume
-
-                    x_m = diff_data.coords["x"][0].data
-                    x_M = diff_data.coords["x"][-1].data
-                    y_m = diff_data.coords["y"][0].data
-                    y_M = diff_data.coords["y"][-1].data
-
-                    total_area = abs((y_M - y_m) * (x_M - x_m))
-                    area_percentage = round(area * 100 / total_area, 1)
-                    issues.append(
-                        f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), spread over {area_percentage}% of the area"
-                    )
-                else:
-                    issues.append(
-                        f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%)"
-                    )
+    threshold = abs(expected * rtol) + atol
+    diff_exact = abs(expected - actual)
+    diff_mask = diff_exact > threshold
+    diff_lenient = diff_exact.where(diff_mask)
+
+    non_x_y_dims = list(set(expected.dims) - {"x", "y"})
+    value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims))
+    shape = tuple([len(value_mapping[x]) for x in non_x_y_dims])
+
+    for shape_index, v in np.ndenumerate(np.ndarray(shape)):
+        indexers = {}
+        for index, value_index in enumerate(shape_index):
+            indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index]
+        diff_data = diff_lenient.sel(indexers=indexers)
+        total_pixel_count = expected.sel(indexers).count().item()
+        diff_pixel_count = diff_data.count().item()
+
+        if diff_pixel_count > 0:
+            diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
+            diff_mean = round(diff_data.mean().item(), 2)
+            diff_var = round(diff_data.var().item(), 2)
+
+            key = name + ": " if name else ""
+            key += ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
+            issues.append(
+                f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
+            )
+
+            _log.warning(f"Difference (ascii art) for {key}:\n{_ascii_art(diff_data)}")
+
+            coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"])
+            mask = diff_data.notnull()
+            if mask.dims[0] != "y":
+                mask = mask.transpose()
+            x_coords = coord_grid[0][mask]
+            y_coords = coord_grid[1][mask]
+
+            diff_bbox = ((x_coords.min().item(), y_coords.min().item()), (x_coords.max().item(), y_coords.max().item()))
+            diff_area = (x_coords.max() - x_coords.min()) * (y_coords.max() - y_coords.min())
+            total_area = abs(
+                (diff_data.coords["y"][-1].data - diff_data.coords["y"][0].data)
+                * (diff_data.coords["x"][-1].data - diff_data.coords["x"][0].data)
+            )
+            area_percentage = round(diff_area * 100 / total_area, 1)
+            issues.append(
+                f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), bbox {diff_bbox} - {area_percentage}% of the area"
+            )
     return issues
 
 
@@ -221,6 +190,7 @@ def _compare_xarray_dataarray(
     *,
     rtol: float = _DEFAULT_RTOL,
     atol: float = _DEFAULT_ATOL,
+    name: str = None,
 ) -> List[str]:
     """
     Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
@@ -243,7 +213,7 @@ def _compare_xarray_dataarray(
     issues = []
 
     # `xarray.testing.assert_allclose` currently does not always
-    # provides detailed information about shape/dimension mismatches
+    # provide detailed information about shape/dimension mismatches
     # so we enrich the issue listing with some more details
     if actual.dims != expected.dims:
         issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
@@ -254,17 +224,14 @@ def _compare_xarray_dataarray(
             issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
     if actual.shape != expected.shape:
         issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
-
-    if not issues:
-        if {"x", "y"} <= set(expected.dims):
-            issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
-        else:
-            try:
-                xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
-            except AssertionError as e:
-                # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
-                issues.append(str(e).strip())
-
+    compatible = len(issues) == 0
+    try:
+        xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
+    except AssertionError as e:
+        # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
+        issues.append(str(e).strip())
+    if compatible and {"x", "y"} <= set(expected.dims):
+        issues.extend(_compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol, name=name))
     return issues
 
 
@@ -293,32 +260,6 @@ def assert_xarray_dataarray_allclose(
     if issues:
         raise AssertionError("\n".join(issues))
 
-
-def assert_xarray_dataarray_allclose_xy(
-    actual: Union[xarray.DataArray, str, Path],
-    expected: Union[xarray.DataArray, str, Path],
-    *,
-    rtol: float = _DEFAULT_RTOL,
-    atol: float = _DEFAULT_ATOL,
-):
-    """
-    Assert that two Xarray ``DataArray`` instances are equal (with tolerance).
-
-    :param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
-    :param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
-    :param rtol: relative tolerance
-    :param atol: absolute tolerance
-    :raises AssertionError: if not equal within the given tolerance
-
-    .. versionadded:: 0.31.0
-
-    .. warning::
-        This function is experimental and subject to change.
-    """
-    issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
-    if issues:
-        raise AssertionError("\n".join(issues))
-
 def _compare_xarray_datasets(
     actual: Union[xarray.Dataset, str, Path],
     expected: Union[xarray.Dataset, str, Path],
@@ -336,7 +277,6 @@ def _compare_xarray_datasets(
     expected = _as_xarray_dataset(expected)
 
     all_issues = []
-    # TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this?
     actual_vars = set(actual.data_vars)
     expected_vars = set(expected.data_vars)
     _log.debug(f"_compare_xarray_datasets: actual_vars={actual_vars!r} expected_vars={expected_vars!r}")
@@ -344,7 +284,7 @@ def _compare_xarray_datasets(
         all_issues.append(f"Xarray DataSet variables mismatch: {actual_vars} != {expected_vars}")
     for var in expected_vars.intersection(actual_vars):
         _log.debug(f"_compare_xarray_datasets: comparing variable {var!r}")
-        issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol)
+        issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol, name=var)
         if issues:
             all_issues.append(f"Issues for variable {var!r}:")
             all_issues.extend(issues)
@@ -406,10 +346,7 @@ def assert_xarray_allclose(
     if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset):
         assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol)
     elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray):
-        if {"x", "y"}.issubset(expected.dims):
-            assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol)
-        else:
-            assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
+        assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
     else:
         raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}")
 
diff --git a/setup.py b/setup.py
index 1f960f5ef..9a4770793 100644
--- a/setup.py
+++ b/setup.py
@@ -82,7 +82,6 @@
         "deprecated>=1.2.12",
         'oschmod>=0.3.12; sys_platform == "win32"',
         "importlib_resources; python_version<'3.9'",
-        "scipy",  # for Convex Hull algorithm
     ],
     extras_require={
         "tests": tests_require,
diff --git a/tests/testing/test_results.py b/tests/testing/test_results.py
index 6b5f76f38..c4800dc93 100644
--- a/tests/testing/test_results.py
+++ b/tests/testing/test_results.py
@@ -35,6 +35,7 @@ def test_simple_defaults(self):
                 [
                     "Coordinates mismatch for dimension 'dim_0': [0 1 2 3] != [0 1 2]",
                     "Shape mismatch: (4,) != (3,)",
+                    dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
                 ],
             ),
             (
@@ -43,6 +44,7 @@ def test_simple_defaults(self):
                     "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)",
                     "Coordinates mismatch for dimension 'dim_0': [0 1] != [0 1 2]",
                     "Shape mismatch: (2, 3) != (3,)",
+                    dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
                 ],
             ),
             (
@@ -50,6 +52,7 @@ def test_simple_defaults(self):
                 [
                     "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)",
                     "Shape mismatch: (3, 1) != (3,)",
+                    dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL),
                 ],
             ),
         ],
@@ -71,12 +74,20 @@ def test_simple_shape_mismatch(self, actual, expected_issues):
                     "Dimension mismatch: ('y', 'x') != ('x', 'y')",
                     "Coordinates mismatch for dimension 'x': [0 1 2] != [0 1]",
                     "Coordinates mismatch for dimension 'y': [0 1] != [0 1 2]",
+                    dirty_equals.IsStr(
+                        regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(y: 2, x: 3\) != \(x: 2, y: 3\)",
+                        regex_flags=re.DOTALL,
+                    ),
                 ],
             ),
             (
                 xarray.DataArray([[1, 2, 3], [4, 5, 6]], dims=["x", "z"]),
                 [
                     "Dimension mismatch: ('x', 'z') != ('x', 'y')",
+                    dirty_equals.IsStr(
+                        regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(x: 2, z: 3\) != \(x: 2, y: 3\)",
+                        regex_flags=re.DOTALL,
+                    ),
                 ],
             ),
         ],
@@ -96,6 +107,10 @@ def test_simple_dims_mismatch(self, actual, expected_issues):
                 xarray.DataArray([[1, 2, 3], [4, 5, 6]], coords=[("x", [111, 222]), ("y", [33, 44, 55])]),
                 [
                     "Coordinates mismatch for dimension 'x': [111 222] != [11 22]",
+                    dirty_equals.IsStr(
+                        regex=r"Left and right DataArray objects are not close.*Differing coordinates:.*L \* x\s+\(x\).*?111 222.*R \* x\s+\(x\).*?11 22",
+                        regex_flags=re.DOTALL,
+                    ),
                 ],
             ),
         ],
@@ -390,12 +405,13 @@ def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir)
         with raises_assertion_error_or_not(
             r"Issues for file 'data.nc'.*"
             r"Issues for variable 'b1'.*"
+            r"Left and right DataArray objects are not close.*Differing values:.*"
             r"t 0: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
-            r"t 0: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
+            r"t 0: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area.*"
             r"t 1: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
-            r"t 1: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*"
+            r"t 1: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area.*"
             r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*"
-            r"t 2: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area"
+            r"t 2: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area"
         ):
             assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)
 
@@ -432,8 +448,9 @@ def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, ex
         with raises_assertion_error_or_not(
             r"Issues for file 'data.nc'.*"
             r"Issues for variable 'b2'.*"
+            r"Left and right DataArray objects are not close.*Differing values:.*"
             r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:33.0, max: 42.0, mean: 37.5, var: 11.2.*"
-            r"t 2: differing pixels: 4/20 \(20.0%\), spread over 8.3% of the area"
+            r"t 2: differing pixels: 4/20 \(20.0%\), bbox \(\(6, 7\), \(7, 8\)\) - 8.3% of the area"
         ):
             assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)