From bdb906b2527fd4ab37252e8491c4d47b3dfb277d Mon Sep 17 00:00:00 2001 From: dsamaey Date: Tue, 22 Apr 2025 19:54:46 +0200 Subject: [PATCH 01/10] Issue #761 better diff for apex reference check --- openeo/testing/results.py | 137 ++++++++++++++++++++++++++++++++-- tests/testing/test_results.py | 118 +++++++++++++++++++++++++---- 2 files changed, 233 insertions(+), 22 deletions(-) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index 633ddaf58..7076aaa59 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -8,8 +8,10 @@ from pathlib import Path from typing import List, Optional, Union +import numpy as np import xarray import xarray.testing +from scipy.spatial import ConvexHull from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults from openeo.util import repr_truncate @@ -88,6 +90,97 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat return data +def _compare_xarray_dataarray_xy( + actual: Union[xarray.DataArray, str, Path], + expected: Union[xarray.DataArray, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +) -> List[str]: + """ + Compare two xarray DataArrays with tolerance and report mismatch issues (as strings) + + Checks that are done (with tolerance): + - (optional) Check fraction of mismatching pixels (difference exceeding some tolerance). + If fraction is below a given threshold, ignore these mismatches in subsequent comparisons. + If fraction is above the threshold, report this issue. + - Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances. + + :return: list of issues (empty if no issues) + """ + # TODO: make this a public function? + # TODO: option for nodata fill value? + # TODO: option to include data type check? + # TODO: option to cast to some data type (or even rescale) before comparison? + # TODO: also compare attributes of the DataArray? + actual = _as_xarray_dataarray(actual) + expected = _as_xarray_dataarray(expected) + issues = [] + + if actual.dims != expected.dims: + issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}") + for dim in sorted(set(expected.dims).intersection(actual.dims)): + acs = actual.coords[dim].values + ecs = expected.coords[dim].values + if not (acs.shape == ecs.shape and (acs == ecs).all()): + issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}") + if actual.shape != expected.shape: + issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}") + + if not issues: + threshold = abs(expected * rtol) + atol + diff_exact = abs(expected - actual) + diff_mask = diff_exact > threshold + diff_lenient = diff_exact.where(diff_mask) + + non_x_y_dims = list(set(expected.dims) - {"x", "y"}) + value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims)) + shape = tuple([len(value_mapping[x]) for x in non_x_y_dims]) + + for shape_index, v in np.ndenumerate(np.ndarray(shape)): + indexers = {} + for index, value_index in enumerate(shape_index): + indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index] + diff_data = diff_lenient.sel(indexers=indexers) + total_pixel_count = expected.sel(indexers).count().item() + diff_pixel_count = diff_data.count().item() + + if diff_pixel_count > 0: + diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1) + diff_mean = round(diff_data.mean().item(), 1) + diff_var = round(diff_data.var().item(), 1) + + key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()]) + issues.append( + f"{key}: value difference min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}" + ) + + coord_grid = np.meshgrid(diff_data.coords["y"], diff_data.coords["x"]) + mask = diff_data.notnull() + c1 = coord_grid[0][mask] + c2 = coord_grid[1][mask] + coordinates = np.dstack((c1, c2)).reshape(-1, 2) + if len(coordinates) > 2: + hull = ConvexHull(coordinates) + area = hull.volume + + x_m = diff_data.coords["x"][0].data + x_M = diff_data.coords["x"][-1].data + y_m = diff_data.coords["y"][0].data + y_M = diff_data.coords["y"][-1].data + + total_area = abs((y_M - y_m) * (x_M - x_m)) + area_percentage = round(area * 100 / total_area, 1) + issues.append( + f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), spread over {area_percentage}% of the area" + ) + else: + issues.append( + f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%)" + ) + return issues + + def _compare_xarray_dataarray( actual: Union[xarray.DataArray, str, Path], expected: Union[xarray.DataArray, str, Path], @@ -128,11 +221,15 @@ def _compare_xarray_dataarray( if actual.shape != expected.shape: issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}") - try: - xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol) - except AssertionError as e: - # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line? - issues.append(str(e).strip()) + if not issues: + if {"x", "y"} <= set(expected.dims): + issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol) + else: + try: + xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol) + except AssertionError as e: + # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line? + issues.append(str(e).strip()) return issues @@ -163,6 +260,31 @@ def assert_xarray_dataarray_allclose( raise AssertionError("\n".join(issues)) +def assert_xarray_dataarray_allclose_xy( + actual: Union[xarray.DataArray, str, Path], + expected: Union[xarray.DataArray, str, Path], + *, + rtol: float = _DEFAULT_RTOL, + atol: float = _DEFAULT_ATOL, +): + """ + Assert that two Xarray ``DataArray`` instances are equal (with tolerance). + + :param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. + :param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. + :param rtol: relative tolerance + :param atol: absolute tolerance + :raises AssertionError: if not equal within the given tolerance + + .. versionadded:: 0.31.0 + + .. warning:: + This function is experimental and subject to change. + """ + issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol) + if issues: + raise AssertionError("\n".join(issues)) + def _compare_xarray_datasets( actual: Union[xarray.Dataset, str, Path], expected: Union[xarray.Dataset, str, Path], @@ -250,7 +372,10 @@ def assert_xarray_allclose( if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset): assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol) elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray): - assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol) + if (["x", "y", "band"]).elements_in(expected.dims): + assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol) + else: + assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol) else: raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}") diff --git a/tests/testing/test_results.py b/tests/testing/test_results.py index 9bff2a4f1..73de47e95 100644 --- a/tests/testing/test_results.py +++ b/tests/testing/test_results.py @@ -13,7 +13,6 @@ from openeo.testing.results import ( _compare_xarray_dataarray, assert_job_results_allclose, - assert_xarray_allclose, assert_xarray_dataarray_allclose, assert_xarray_dataset_allclose, ) @@ -36,7 +35,6 @@ def test_simple_defaults(self): [ "Coordinates mismatch for dimension 'dim_0': [0 1 2 3] != [0 1 2]", "Shape mismatch: (4,) != (3,)", - dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL), ], ), ( @@ -45,7 +43,6 @@ def test_simple_defaults(self): "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)", "Coordinates mismatch for dimension 'dim_0': [0 1] != [0 1 2]", "Shape mismatch: (2, 3) != (3,)", - dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL), ], ), ( @@ -53,7 +50,6 @@ def test_simple_defaults(self): [ "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)", "Shape mismatch: (3, 1) != (3,)", - dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL), ], ), ], @@ -75,20 +71,12 @@ def test_simple_shape_mismatch(self, actual, expected_issues): "Dimension mismatch: ('y', 'x') != ('x', 'y')", "Coordinates mismatch for dimension 'x': [0 1 2] != [0 1]", "Coordinates mismatch for dimension 'y': [0 1] != [0 1 2]", - dirty_equals.IsStr( - regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(y: 2, x: 3\) != \(x: 2, y: 3\)", - regex_flags=re.DOTALL, - ), ], ), ( xarray.DataArray([[1, 2, 3], [4, 5, 6]], dims=["x", "z"]), [ "Dimension mismatch: ('x', 'z') != ('x', 'y')", - dirty_equals.IsStr( - regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(x: 2, z: 3\) != \(x: 2, y: 3\)", - regex_flags=re.DOTALL, - ), ], ), ], @@ -108,10 +96,6 @@ def test_simple_dims_mismatch(self, actual, expected_issues): xarray.DataArray([[1, 2, 3], [4, 5, 6]], coords=[("x", [111, 222]), ("y", [33, 44, 55])]), [ "Coordinates mismatch for dimension 'x': [111 222] != [11 22]", - dirty_equals.IsStr( - regex=r"Left and right DataArray objects are not close.*Differing coordinates:.*L \* x\s+\(x\).*?111 222.*R \* x\s+\(x\).*?11 22", - regex_flags=re.DOTALL, - ), ], ), ], @@ -351,6 +335,108 @@ def test_allclose_minimal_success(self, tmp_path, actual_dir, expected_dir): ds.to_netcdf(actual_dir / "data.nc") assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path) + def test_allclose_xy_success(self, tmp_path, actual_dir, expected_dir): + expected_ds = xarray.Dataset( + { + "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))), + "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))), + }, + coords={ + "t": range(0, 3), + "x": range(4, 8), + "y": range(5, 10), + }, + ) + expected_ds.to_netcdf(expected_dir / "data.nc") + actual_ds = xarray.Dataset( + { + "b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))), + "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))), + }, + coords={ + "t": range(0, 3), + "x": range(4, 8), + "y": range(5, 10), + }, + ) + actual_ds.to_netcdf(actual_dir / "data.nc") + assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path, rtol=1) + + def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir): + expected_ds = xarray.Dataset( + { + "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))), + "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))), + }, + coords={ + "t": range(0, 3), + "x": range(4, 8), + "y": range(5, 10), + }, + ) + expected_ds.to_netcdf(expected_dir / "data.nc") + actual_ds = xarray.Dataset( + { + "b1": xarray.Variable(dims=["t", "x", "y"], data=1 * numpy.ones((3, 4, 5))), + "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))), + }, + coords={ + "t": range(0, 3), + "x": range(4, 8), + "y": range(5, 10), + }, + ) + actual_ds.to_netcdf(actual_dir / "data.nc") + with raises_assertion_error_or_not( + r"Issues for file 'data.nc'.*" + r"Issues for variable 'b1'.*" + r"t 0: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" + r"t 0: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*" + r"t 1: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" + r"t 1: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*" + r"t 2: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" + r"t 2: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area" + ): + assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path) + + def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, expected_dir): + expected_ds = xarray.Dataset( + { + "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))), + "b2": xarray.Variable(dims=["t", "x", "y"], data=3 * numpy.ones((3, 4, 5))), + }, + coords={ + "t": range(0, 3), + "x": range(4, 8), + "y": range(5, 10), + }, + ) + expected_ds.to_netcdf(expected_dir / "data.nc") + b2_modified_data = 3 * numpy.ones((3, 4, 5)) + b2_modified_data[2][2][2] *= 15 + b2_modified_data[2][2][3] *= 14 + b2_modified_data[2][3][2] *= 13 + b2_modified_data[2][3][3] *= 12 + actual_ds = xarray.Dataset( + { + "b1": xarray.Variable(dims=["t", "x", "y"], data=2 * numpy.ones((3, 4, 5))), + "b2": xarray.Variable(dims=["t", "x", "y"], data=b2_modified_data), + }, + coords={ + "t": range(0, 3), + "x": range(4, 8), + "y": range(5, 10), + }, + ) + actual_ds.to_netcdf(actual_dir / "data.nc") + with raises_assertion_error_or_not( + r"Issues for file 'data.nc'.*" + r"Issues for variable 'b2'.*" + r"t 2: value difference min:33.0, max: 42.0, mean: 37.5, var: 11.2.*" + r"t 2: differing pixels: 4/20 \(20.0%\), spread over 8.3% of the area" + ): + assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path) + def test_allclose_basic_fail(self, tmp_path, actual_dir, expected_dir): expected_ds = xarray.Dataset({"a": (["time"], [1, 2, 3])}, coords={"time": [11, 22, 33]}) expected_ds.to_netcdf(expected_dir / "data.nc") From e7237298e8d5a73b78cea74952be3de8c7039749 Mon Sep 17 00:00:00 2001 From: dsamaey Date: Wed, 23 Apr 2025 09:44:21 +0200 Subject: [PATCH 02/10] Issue #761 better diff for apex reference check (added scipy dependency) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index d2c22b39f..bd48199b6 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,6 @@ with open("openeo/_version.py") as fp: exec(fp.read(), _version) - with open("README.md", "r") as fh: long_description = fh.read() @@ -22,7 +21,8 @@ "mock", "requests-mock>=1.8.0", "httpretty>=1.1.4", - "urllib3<2.3.0", # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484 + "urllib3<2.3.0", + # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484 "netCDF4>=1.7.0", "matplotlib", # TODO: eliminate matplotlib as test dependency # TODO #717 Simplify geopandas constraints when Python 3.8 support is dropped @@ -35,6 +35,7 @@ "pyarrow>=10.0.1", # For Parquet read/write support in pandas "python-dateutil>=2.7.0", "pystac-client>=0.7.5", + "scipy", # for Convex Hull algorithm ] docs_require = [ @@ -56,7 +57,6 @@ "ipython", ] - name = "openeo" setup( name=name, From a25597bae8e9302657cab7d10291194dcb040dcc Mon Sep 17 00:00:00 2001 From: dsamaey Date: Wed, 30 Apr 2025 09:26:53 +0200 Subject: [PATCH 03/10] Issue #761 better diff for apex reference check (added ascii art diff) --- openeo/testing/results.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index 7076aaa59..cac777762 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -90,6 +90,27 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat return data +def ascii_art(diff_data: DataArray) -> str: + scale: int = max(1, (diff_data.sizes["x"] / 100)) + data_max = diff_data.max().item() + if data_max == 0: + data_max = 1 + grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. " + coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").all() + top = "┌" + "─" * coarsened.sizes["x"] + "┐\n" + bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘" + return ( + top + + "\n".join( + [ + "│" + "".join([grayscale_characters[70 - int(v * 70 / data_max)] for v in row]) + "│" + for row in coarsened.transpose() + ] + ) + + bottom + ) + + def _compare_xarray_dataarray_xy( actual: Union[xarray.DataArray, str, Path], expected: Union[xarray.DataArray, str, Path], @@ -155,6 +176,9 @@ def _compare_xarray_dataarray_xy( f"{key}: value difference min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}" ) + print(f"Difference ascii art for {key}") + print(ascii_art(diff_data)) + coord_grid = np.meshgrid(diff_data.coords["y"], diff_data.coords["x"]) mask = diff_data.notnull() c1 = coord_grid[0][mask] From d88a124729c27cb9375b18e1a223070e5a76d072 Mon Sep 17 00:00:00 2001 From: dsamaey Date: Wed, 30 Apr 2025 09:30:21 +0200 Subject: [PATCH 04/10] Issue #761 better diff for apex reference check (added ascii art diff) --- openeo/testing/results.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index cac777762..2ac5ee38f 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -12,6 +12,7 @@ import xarray import xarray.testing from scipy.spatial import ConvexHull +from xarray import DataArray from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults from openeo.util import repr_truncate From 916b86881a66904ea9891ec30fc643f0a082f50f Mon Sep 17 00:00:00 2001 From: dsamaey Date: Wed, 30 Apr 2025 10:41:37 +0200 Subject: [PATCH 05/10] Issue #761 better diff for apex reference check (more robust xy/yx grid handling) --- openeo/testing/results.py | 32 ++++++++++++++++++-------------- tests/testing/test_results.py | 8 ++++---- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index 2ac5ee38f..ea0d25673 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -92,24 +92,24 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat def ascii_art(diff_data: DataArray) -> str: - scale: int = max(1, (diff_data.sizes["x"] / 100)) + scale: int = max(1, int(diff_data.sizes["x"] / 100)) data_max = diff_data.max().item() if data_max == 0: data_max = 1 grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. " coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").all() + if coarsened.dims[0] != "y": + coarsened = coarsened.transpose() top = "┌" + "─" * coarsened.sizes["x"] + "┐\n" bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘" - return ( - top - + "\n".join( - [ - "│" + "".join([grayscale_characters[70 - int(v * 70 / data_max)] for v in row]) + "│" - for row in coarsened.transpose() - ] - ) - + bottom - ) + + def pixelChar(v) -> str: + i = int(v * 70 / data_max) + if v > 0 and i == 0: + i = 1 + return grayscale_characters[69 - i] + + return top + "\n".join(["│" + "".join([pixelChar(v) for v in row]) + "│" for row in coarsened]) + bottom def _compare_xarray_dataarray_xy( @@ -174,14 +174,18 @@ def _compare_xarray_dataarray_xy( key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()]) issues.append( - f"{key}: value difference min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}" + f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}" ) print(f"Difference ascii art for {key}") - print(ascii_art(diff_data)) + art = ascii_art(diff_data) + print(art) + + coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"]) - coord_grid = np.meshgrid(diff_data.coords["y"], diff_data.coords["x"]) mask = diff_data.notnull() + if mask.dims[0] != "y": + mask = mask.transpose() c1 = coord_grid[0][mask] c2 = coord_grid[1][mask] coordinates = np.dstack((c1, c2)).reshape(-1, 2) diff --git a/tests/testing/test_results.py b/tests/testing/test_results.py index 73de47e95..6b5f76f38 100644 --- a/tests/testing/test_results.py +++ b/tests/testing/test_results.py @@ -390,11 +390,11 @@ def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir) with raises_assertion_error_or_not( r"Issues for file 'data.nc'.*" r"Issues for variable 'b1'.*" - r"t 0: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" + r"t 0: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" r"t 0: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*" - r"t 1: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" + r"t 1: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" r"t 1: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*" - r"t 2: value difference min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" + r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" r"t 2: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area" ): assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path) @@ -432,7 +432,7 @@ def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, ex with raises_assertion_error_or_not( r"Issues for file 'data.nc'.*" r"Issues for variable 'b2'.*" - r"t 2: value difference min:33.0, max: 42.0, mean: 37.5, var: 11.2.*" + r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:33.0, max: 42.0, mean: 37.5, var: 11.2.*" r"t 2: differing pixels: 4/20 \(20.0%\), spread over 8.3% of the area" ): assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path) From ce0cc5fd8bae3a9b6270b8565b47ad6e5951d48a Mon Sep 17 00:00:00 2001 From: dsamaey Date: Tue, 6 May 2025 16:24:27 +0200 Subject: [PATCH 06/10] Issue #761 better diff for apex reference check (fixed boundary value issue) --- openeo/testing/results.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index ea0d25673..3c8ac51ea 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -97,16 +97,19 @@ def ascii_art(diff_data: DataArray) -> str: if data_max == 0: data_max = 1 grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. " - coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").all() - if coarsened.dims[0] != "y": - coarsened = coarsened.transpose() + coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").mean() + coarsened = coarsened.transpose("y", "x", ...) top = "┌" + "─" * coarsened.sizes["x"] + "┐\n" bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘" def pixelChar(v) -> str: - i = int(v * 70 / data_max) + if np.isnan(v): + return " " + i = int(v * 69 / data_max) if v > 0 and i == 0: i = 1 + else: + i = min(69, i) return grayscale_characters[69 - i] return top + "\n".join(["│" + "".join([pixelChar(v) for v in row]) + "│" for row in coarsened]) + bottom @@ -169,18 +172,14 @@ def _compare_xarray_dataarray_xy( if diff_pixel_count > 0: diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1) - diff_mean = round(diff_data.mean().item(), 1) - diff_var = round(diff_data.var().item(), 1) + diff_mean = round(diff_data.mean().item(), 2) + diff_var = round(diff_data.var().item(), 2) key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()]) issues.append( f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}" ) - print(f"Difference ascii art for {key}") - art = ascii_art(diff_data) - print(art) - coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"]) mask = diff_data.notnull() @@ -189,6 +188,11 @@ def _compare_xarray_dataarray_xy( c1 = coord_grid[0][mask] c2 = coord_grid[1][mask] coordinates = np.dstack((c1, c2)).reshape(-1, 2) + + art = ascii_art(diff_data) + print(f"Difference ascii art for {key}") + print(art) + if len(coordinates) > 2: hull = ConvexHull(coordinates) area = hull.volume From 3e8860fe32952aad19d30b4cd3a01a14162f621f Mon Sep 17 00:00:00 2001 From: dsamaey Date: Thu, 8 May 2025 08:46:16 +0200 Subject: [PATCH 07/10] Issue #761 better diff for apex reference check (default ascii_art max_width and aspect ratio) --- openeo/testing/results.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index 3c8ac51ea..d0d8badef 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -91,13 +91,14 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat return data -def ascii_art(diff_data: DataArray) -> str: - scale: int = max(1, int(diff_data.sizes["x"] / 100)) +def ascii_art(diff_data: DataArray, *, max_width: int = 60, y_vs_x_aspect_ratio=2.5) -> str: + x_scale: int = max(1, int(diff_data.sizes["x"] / max_width)) + y_scale: int = max(1, int(diff_data.sizes["x"] / (max_width * y_vs_x_aspect_ratio))) data_max = diff_data.max().item() if data_max == 0: data_max = 1 grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. " - coarsened = diff_data.coarsen(dim={"x": scale, "y": scale}, boundary="pad").mean() + coarsened = diff_data.coarsen(dim={"x": x_scale, "y": y_scale}, boundary="pad").mean() coarsened = coarsened.transpose("y", "x", ...) top = "┌" + "─" * coarsened.sizes["x"] + "┐\n" bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘" From ce58771b832051aa455ac80b8a2e11884da6415a Mon Sep 17 00:00:00 2001 From: dsamaey Date: Thu, 8 May 2025 10:19:00 +0200 Subject: [PATCH 08/10] Issue #761 better diff for apex reference check (deps fix) --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index bd48199b6..1f960f5ef 100644 --- a/setup.py +++ b/setup.py @@ -21,8 +21,7 @@ "mock", "requests-mock>=1.8.0", "httpretty>=1.1.4", - "urllib3<2.3.0", - # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484 + "urllib3<2.3.0", # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484 "netCDF4>=1.7.0", "matplotlib", # TODO: eliminate matplotlib as test dependency # TODO #717 Simplify geopandas constraints when Python 3.8 support is dropped @@ -35,7 +34,6 @@ "pyarrow>=10.0.1", # For Parquet read/write support in pandas "python-dateutil>=2.7.0", "pystac-client>=0.7.5", - "scipy", # for Convex Hull algorithm ] docs_require = [ @@ -84,6 +82,7 @@ "deprecated>=1.2.12", 'oschmod>=0.3.12; sys_platform == "win32"', "importlib_resources; python_version<'3.9'", + "scipy", # for Convex Hull algorithm ], extras_require={ "tests": tests_require, From 5bcc5e70ac369d3a3a684b6b4ba9aa7b77172bd5 Mon Sep 17 00:00:00 2001 From: dsamaey Date: Thu, 8 May 2025 10:19:42 +0200 Subject: [PATCH 09/10] Issue #761 better diff for apex reference check --- openeo/testing/results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index d0d8badef..1f58f37be 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -406,7 +406,7 @@ def assert_xarray_allclose( if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset): assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol) elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray): - if (["x", "y", "band"]).elements_in(expected.dims): + if {"x", "y"}.issubset(expected.dims): assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol) else: assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol) From fcad19e50b515e03f68edcdc10f4d356936fbc2f Mon Sep 17 00:00:00 2001 From: dsamaey Date: Thu, 8 May 2025 17:55:34 +0200 Subject: [PATCH 10/10] Issue #761 better diff for apex reference check (replaced convex hull with bbox, _compare_xarray_dataarray_xy now only adds to the original xarray implementation) --- openeo/testing/results.py | 221 ++++++++++++---------------------- setup.py | 1 - tests/testing/test_results.py | 25 +++- 3 files changed, 100 insertions(+), 147 deletions(-) diff --git a/openeo/testing/results.py b/openeo/testing/results.py index 1f58f37be..67e678723 100644 --- a/openeo/testing/results.py +++ b/openeo/testing/results.py @@ -11,7 +11,6 @@ import numpy as np import xarray import xarray.testing -from scipy.spatial import ConvexHull from xarray import DataArray from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults @@ -19,10 +18,12 @@ _log = logging.getLogger(__name__) - _DEFAULT_RTOL = 1e-6 _DEFAULT_ATOL = 1e-6 +# https://paulbourke.net/dataformats/asciiart +DEFAULT_GRAYSCALE_70_CHARACTERS = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "[::-1] +DEFAULT_GRAYSCALE_10_CHARACTERS = " .:-=+*#%@" def _load_xarray_netcdf(path: Union[str, Path], **kwargs) -> xarray.Dataset: """ @@ -91,29 +92,33 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat return data -def ascii_art(diff_data: DataArray, *, max_width: int = 60, y_vs_x_aspect_ratio=2.5) -> str: +def _ascii_art( + diff_data: DataArray, + *, + max_width: int = 60, + y_vs_x_aspect_ratio=2.5, + grayscale_characters: str = DEFAULT_GRAYSCALE_70_CHARACTERS, +) -> str: + max_grayscale_idx = len(grayscale_characters) - 1 x_scale: int = max(1, int(diff_data.sizes["x"] / max_width)) - y_scale: int = max(1, int(diff_data.sizes["x"] / (max_width * y_vs_x_aspect_ratio))) + y_scale: int = max(1, int(diff_data.sizes["y"] / (max_width / y_vs_x_aspect_ratio))) data_max = diff_data.max().item() if data_max == 0: data_max = 1 - grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. " coarsened = diff_data.coarsen(dim={"x": x_scale, "y": y_scale}, boundary="pad").mean() coarsened = coarsened.transpose("y", "x", ...) top = "┌" + "─" * coarsened.sizes["x"] + "┐\n" bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘" - def pixelChar(v) -> str: - if np.isnan(v): - return " " - i = int(v * 69 / data_max) + def _pixel_char(v) -> str: + i = 0 if np.isnan(v) else int(v * max_grayscale_idx / data_max) if v > 0 and i == 0: - i = 1 + i = 1 # don't show a blank for a difference above the threshold else: - i = min(69, i) - return grayscale_characters[69 - i] + i = min(max_grayscale_idx, i) + return grayscale_characters[i] - return top + "\n".join(["│" + "".join([pixelChar(v) for v in row]) + "│" for row in coarsened]) + bottom + return top + "\n".join(["│" + "".join([_pixel_char(v) for v in row]) + "│" for row in coarsened]) + bottom def _compare_xarray_dataarray_xy( @@ -122,96 +127,60 @@ def _compare_xarray_dataarray_xy( *, rtol: float = _DEFAULT_RTOL, atol: float = _DEFAULT_ATOL, + name: str = None, ) -> List[str]: """ - Compare two xarray DataArrays with tolerance and report mismatch issues (as strings) - - Checks that are done (with tolerance): - - (optional) Check fraction of mismatching pixels (difference exceeding some tolerance). - If fraction is below a given threshold, ignore these mismatches in subsequent comparisons. - If fraction is above the threshold, report this issue. - - Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances. - + Additional compare for two compatible spatial xarray DataArrays with tolerance (rtol, atol) :return: list of issues (empty if no issues) """ - # TODO: make this a public function? - # TODO: option for nodata fill value? - # TODO: option to include data type check? - # TODO: option to cast to some data type (or even rescale) before comparison? - # TODO: also compare attributes of the DataArray? - actual = _as_xarray_dataarray(actual) - expected = _as_xarray_dataarray(expected) issues = [] - - if actual.dims != expected.dims: - issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}") - for dim in sorted(set(expected.dims).intersection(actual.dims)): - acs = actual.coords[dim].values - ecs = expected.coords[dim].values - if not (acs.shape == ecs.shape and (acs == ecs).all()): - issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}") - if actual.shape != expected.shape: - issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}") - - if not issues: - threshold = abs(expected * rtol) + atol - diff_exact = abs(expected - actual) - diff_mask = diff_exact > threshold - diff_lenient = diff_exact.where(diff_mask) - - non_x_y_dims = list(set(expected.dims) - {"x", "y"}) - value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims)) - shape = tuple([len(value_mapping[x]) for x in non_x_y_dims]) - - for shape_index, v in np.ndenumerate(np.ndarray(shape)): - indexers = {} - for index, value_index in enumerate(shape_index): - indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index] - diff_data = diff_lenient.sel(indexers=indexers) - total_pixel_count = expected.sel(indexers).count().item() - diff_pixel_count = diff_data.count().item() - - if diff_pixel_count > 0: - diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1) - diff_mean = round(diff_data.mean().item(), 2) - diff_var = round(diff_data.var().item(), 2) - - key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()]) - issues.append( - f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}" - ) - - coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"]) - - mask = diff_data.notnull() - if mask.dims[0] != "y": - mask = mask.transpose() - c1 = coord_grid[0][mask] - c2 = coord_grid[1][mask] - coordinates = np.dstack((c1, c2)).reshape(-1, 2) - - art = ascii_art(diff_data) - print(f"Difference ascii art for {key}") - print(art) - - if len(coordinates) > 2: - hull = ConvexHull(coordinates) - area = hull.volume - - x_m = diff_data.coords["x"][0].data - x_M = diff_data.coords["x"][-1].data - y_m = diff_data.coords["y"][0].data - y_M = diff_data.coords["y"][-1].data - - total_area = abs((y_M - y_m) * (x_M - x_m)) - area_percentage = round(area * 100 / total_area, 1) - issues.append( - f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), spread over {area_percentage}% of the area" - ) - else: - issues.append( - f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%)" - ) + threshold = abs(expected * rtol) + atol + diff_exact = abs(expected - actual) + diff_mask = diff_exact > threshold + diff_lenient = diff_exact.where(diff_mask) + + non_x_y_dims = list(set(expected.dims) - {"x", "y"}) + value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims)) + shape = tuple([len(value_mapping[x]) for x in non_x_y_dims]) + + for shape_index, v in np.ndenumerate(np.ndarray(shape)): + indexers = {} + for index, value_index in enumerate(shape_index): + indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index] + diff_data = diff_lenient.sel(indexers=indexers) + total_pixel_count = expected.sel(indexers).count().item() + diff_pixel_count = diff_data.count().item() + + if diff_pixel_count > 0: + diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1) + diff_mean = round(diff_data.mean().item(), 2) + diff_var = round(diff_data.var().item(), 2) + + key = name + ": " if name else "" + key += ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()]) + issues.append( + f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}" + ) + + _log.warning(f"Difference (ascii art) for {key}:\n{_ascii_art(diff_data)}") + + coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"]) + mask = diff_data.notnull() + if mask.dims[0] != "y": + mask = mask.transpose() + x_coords = coord_grid[0][mask] + y_coords = coord_grid[1][mask] + + diff_bbox = ((x_coords.min().item(), y_coords.min().item()), (x_coords.max().item(), y_coords.max().item())) + diff_area = (x_coords.max() - x_coords.min()) * (y_coords.max() - y_coords.min()) + total_area = abs( + (diff_data.coords["y"][-1].data - diff_data.coords["y"][0].data) + * (diff_data.coords["x"][-1].data - diff_data.coords["x"][0].data) + ) + area_percentage = round(diff_area * 100 / total_area, 1) + issues.append( + f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), bbox {diff_bbox} - {area_percentage}% of the area" + ) return issues @@ -221,6 +190,7 @@ def _compare_xarray_dataarray( *, rtol: float = _DEFAULT_RTOL, atol: float = _DEFAULT_ATOL, + name: str = None, ) -> List[str]: """ Compare two xarray DataArrays with tolerance and report mismatch issues (as strings) @@ -243,7 +213,7 @@ def _compare_xarray_dataarray( issues = [] # `xarray.testing.assert_allclose` currently does not always - # provides detailed information about shape/dimension mismatches + # provide detailed information about shape/dimension mismatches # so we enrich the issue listing with some more details if actual.dims != expected.dims: issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}") @@ -254,17 +224,14 @@ def _compare_xarray_dataarray( issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}") if actual.shape != expected.shape: issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}") - - if not issues: - if {"x", "y"} <= set(expected.dims): - issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol) - else: - try: - xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol) - except AssertionError as e: - # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line? - issues.append(str(e).strip()) - + compatible = len(issues) == 0 + try: + xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol) + except AssertionError as e: + # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line? + issues.append(str(e).strip()) + if compatible and {"x", "y"} <= set(expected.dims): + issues.extend(_compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol, name=name)) return issues @@ -293,32 +260,6 @@ def assert_xarray_dataarray_allclose( if issues: raise AssertionError("\n".join(issues)) - -def assert_xarray_dataarray_allclose_xy( - actual: Union[xarray.DataArray, str, Path], - expected: Union[xarray.DataArray, str, Path], - *, - rtol: float = _DEFAULT_RTOL, - atol: float = _DEFAULT_ATOL, -): - """ - Assert that two Xarray ``DataArray`` instances are equal (with tolerance). - - :param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. - :param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file. - :param rtol: relative tolerance - :param atol: absolute tolerance - :raises AssertionError: if not equal within the given tolerance - - .. versionadded:: 0.31.0 - - .. warning:: - This function is experimental and subject to change. - """ - issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol) - if issues: - raise AssertionError("\n".join(issues)) - def _compare_xarray_datasets( actual: Union[xarray.Dataset, str, Path], expected: Union[xarray.Dataset, str, Path], @@ -336,7 +277,6 @@ def _compare_xarray_datasets( expected = _as_xarray_dataset(expected) all_issues = [] - # TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this? actual_vars = set(actual.data_vars) expected_vars = set(expected.data_vars) _log.debug(f"_compare_xarray_datasets: actual_vars={actual_vars!r} expected_vars={expected_vars!r}") @@ -344,7 +284,7 @@ def _compare_xarray_datasets( all_issues.append(f"Xarray DataSet variables mismatch: {actual_vars} != {expected_vars}") for var in expected_vars.intersection(actual_vars): _log.debug(f"_compare_xarray_datasets: comparing variable {var!r}") - issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol) + issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol, name=var) if issues: all_issues.append(f"Issues for variable {var!r}:") all_issues.extend(issues) @@ -406,10 +346,7 @@ def assert_xarray_allclose( if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset): assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol) elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray): - if {"x", "y"}.issubset(expected.dims): - assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol) - else: - assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol) + assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol) else: raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}") diff --git a/setup.py b/setup.py index 1f960f5ef..9a4770793 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,6 @@ "deprecated>=1.2.12", 'oschmod>=0.3.12; sys_platform == "win32"', "importlib_resources; python_version<'3.9'", - "scipy", # for Convex Hull algorithm ], extras_require={ "tests": tests_require, diff --git a/tests/testing/test_results.py b/tests/testing/test_results.py index 6b5f76f38..c4800dc93 100644 --- a/tests/testing/test_results.py +++ b/tests/testing/test_results.py @@ -35,6 +35,7 @@ def test_simple_defaults(self): [ "Coordinates mismatch for dimension 'dim_0': [0 1 2 3] != [0 1 2]", "Shape mismatch: (4,) != (3,)", + dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL), ], ), ( @@ -43,6 +44,7 @@ def test_simple_defaults(self): "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)", "Coordinates mismatch for dimension 'dim_0': [0 1] != [0 1 2]", "Shape mismatch: (2, 3) != (3,)", + dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL), ], ), ( @@ -50,6 +52,7 @@ def test_simple_defaults(self): [ "Dimension mismatch: ('dim_0', 'dim_1') != ('dim_0',)", "Shape mismatch: (3, 1) != (3,)", + dirty_equals.IsStr(regex="Left and right DataArray objects are not close.*", regex_flags=re.DOTALL), ], ), ], @@ -71,12 +74,20 @@ def test_simple_shape_mismatch(self, actual, expected_issues): "Dimension mismatch: ('y', 'x') != ('x', 'y')", "Coordinates mismatch for dimension 'x': [0 1 2] != [0 1]", "Coordinates mismatch for dimension 'y': [0 1] != [0 1 2]", + dirty_equals.IsStr( + regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(y: 2, x: 3\) != \(x: 2, y: 3\)", + regex_flags=re.DOTALL, + ), ], ), ( xarray.DataArray([[1, 2, 3], [4, 5, 6]], dims=["x", "z"]), [ "Dimension mismatch: ('x', 'z') != ('x', 'y')", + dirty_equals.IsStr( + regex=r"Left and right DataArray objects are not close.*Differing dimensions:.*\(x: 2, z: 3\) != \(x: 2, y: 3\)", + regex_flags=re.DOTALL, + ), ], ), ], @@ -96,6 +107,10 @@ def test_simple_dims_mismatch(self, actual, expected_issues): xarray.DataArray([[1, 2, 3], [4, 5, 6]], coords=[("x", [111, 222]), ("y", [33, 44, 55])]), [ "Coordinates mismatch for dimension 'x': [111 222] != [11 22]", + dirty_equals.IsStr( + regex=r"Left and right DataArray objects are not close.*Differing coordinates:.*L \* x\s+\(x\).*?111 222.*R \* x\s+\(x\).*?11 22", + regex_flags=re.DOTALL, + ), ], ), ], @@ -390,12 +405,13 @@ def test_allclose_minimal_xy_different(self, tmp_path, actual_dir, expected_dir) with raises_assertion_error_or_not( r"Issues for file 'data.nc'.*" r"Issues for variable 'b1'.*" + r"Left and right DataArray objects are not close.*Differing values:.*" r"t 0: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" - r"t 0: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*" + r"t 0: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area.*" r"t 1: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" - r"t 1: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area.*" + r"t 1: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area.*" r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:1.0, max: 1.0, mean: 1.0, var: 0.0.*" - r"t 2: differing pixels: 20/20 \(100.0%\), spread over 100.0% of the area" + r"t 2: differing pixels: 20/20 \(100.0%\), bbox \(\(4, 5\), \(7, 9\)\) - 100.0% of the area" ): assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path) @@ -432,8 +448,9 @@ def test_allclose_minimal_xy_different_small_area(self, tmp_path, actual_dir, ex with raises_assertion_error_or_not( r"Issues for file 'data.nc'.*" r"Issues for variable 'b2'.*" + r"Left and right DataArray objects are not close.*Differing values:.*" r"t 2: value difference exceeds tolerance \(rtol 1e-06, atol 1e-06\), min:33.0, max: 42.0, mean: 37.5, var: 11.2.*" - r"t 2: differing pixels: 4/20 \(20.0%\), spread over 8.3% of the area" + r"t 2: differing pixels: 4/20 \(20.0%\), bbox \(\(6, 7\), \(7, 8\)\) - 8.3% of the area" ): assert_job_results_allclose(actual=actual_dir, expected=expected_dir, tmp_path=tmp_path)