From f9dbd8650fb43d2e6146f2c49539cfe67232556a Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sat, 28 Jun 2025 17:45:31 -0700 Subject: [PATCH] Use add_note() to annotate exceptions when encoding fails We now always add variable names and contents when encoding fails, in contrast to the current practice where variable names and values are only sometimes specified, based on the `name` passed into `VariableCoder.encode()`. This provides better debugging experience for users. In the future, we might remove `name` from `VariableCoder.encode()` because this makes it redundant. For example, attempting to save a int64 fail in netCDF3 file now shows something like the following: ValueError: could not safely cast array from int64 to int32... Raised while encoding variable 'invalid' with value Size: 8B array([9223372036854775807]) Note that `Exception.add_note()` is a Python 3.11+ feature, so this PR will need to wait until #10438 is submitted. --- doc/whats-new.rst | 3 ++- xarray/backends/common.py | 24 ++++++++++++++++++------ xarray/conventions.py | 8 +++++++- xarray/tests/test_backends_common.py | 17 +++++++++++++++++ 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6db780484bd..3e157eeacab 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,7 +14,8 @@ New Features ~~~~~~~~~~~~ - Expose :py:class:`~xarray.indexes.RangeIndex`, and :py:class:`~xarray.indexes.CoordinateTransformIndex` as public api under the ``xarray.indexes`` namespace. By `Deepak Cherian `_. - +- Better error messages when encoding data to be written to disk fails (:pull:`10464`). + By `Stephan Hoyer `_ Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/common.py b/xarray/backends/common.py index e1f8dc5cecd..f478c2b882c 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -389,9 +389,23 @@ def encode(self, variables, attributes): attributes : dict-like """ - variables = {k: self.encode_variable(v, name=k) for k, v in variables.items()} - attributes = {k: self.encode_attribute(v) for k, v in attributes.items()} - return variables, attributes + encoded_variables = {} + for k, v in variables.items(): + try: + encoded_variables[k] = self.encode_variable(v) + except Exception as e: + e.add_note(f"Raised while encoding variable {k!r} with value {v!r}") + raise + + encoded_attributes = {} + for k, v in attributes.items(): + try: + encoded_attributes[k] = self.encode_attribute(v) + except Exception as e: + e.add_note(f"Raised while encoding attribute {k!r} with value {v!r}") + raise + + return encoded_variables, encoded_attributes def encode_variable(self, v, name=None): """encode one variable""" @@ -641,9 +655,7 @@ def encode(self, variables, attributes): variables = { k: ensure_dtype_not_object(v, name=k) for k, v in variables.items() } - variables = {k: self.encode_variable(v, name=k) for k, v in variables.items()} - attributes = {k: self.encode_attribute(v) for k, v in attributes.items()} - return variables, attributes + return super().encode(variables, attributes) class BackendEntrypoint: diff --git a/xarray/conventions.py b/xarray/conventions.py index 5ae40ea57d8..17f1e0666b6 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -792,7 +792,13 @@ def cf_encoder(variables: T_Variables, attributes: T_Attrs): # add encoding for time bounds variables if present. _update_bounds_encoding(variables) - new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()} + new_vars = {} + for k, v in variables.items(): + try: + new_vars[k] = encode_cf_variable(v, name=k) + except Exception as e: + e.add_note(f"Raised while encoding variable {k!r} with value {v!r}") + raise # Remove attrs from bounds variables (issue #2921) for var in new_vars.values(): diff --git a/xarray/tests/test_backends_common.py b/xarray/tests/test_backends_common.py index dc89ecefbfe..33da027ac97 100644 --- a/xarray/tests/test_backends_common.py +++ b/xarray/tests/test_backends_common.py @@ -1,9 +1,13 @@ from __future__ import annotations +import re + import numpy as np import pytest +import xarray as xr from xarray.backends.common import _infer_dtype, robust_getitem +from xarray.tests import requires_scipy class DummyFailure(Exception): @@ -43,3 +47,16 @@ def test_robust_getitem() -> None: def test_infer_dtype_error_on_mixed_types(data): with pytest.raises(ValueError, match="unable to infer dtype on variable"): _infer_dtype(data, "test") + + +@requires_scipy +def test_encoding_failure_note(): + # Create an arbitrary value that cannot be encoded in netCDF3 + ds = xr.Dataset({"invalid": np.array([2**63 - 1], dtype=np.int64)}) + with pytest.raises( + ValueError, + match=re.escape( + "Raised while encoding variable 'invalid' with value