Skip to content

Commit 126b92a

Browse files
Deprecate tuples of chunks? (#8341)
* Deprecate tuples of chunks? (I was planning on putting an issue in, but then thought it wasn't much more difficult to make the PR. But it's totally fine if we don't think this is a good idea...) Allowing a tuple of dims means we're reliant on dimension order, which we really try and not be reliant on. It also makes the type signature even more complicated. So are we OK to encourage a dict of `dim: chunksizes`, rather than a tuple of chunksizes? * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update xarray/core/dataarray.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 86b4167 commit 126b92a

File tree

8 files changed

+32
-20
lines changed

8 files changed

+32
-20
lines changed

doc/whats-new.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ Breaking changes
3030
Deprecations
3131
~~~~~~~~~~~~
3232

33+
- Supplying dimension-ordered sequences to :py:meth:`DataArray.chunk` &
34+
:py:meth:`Dataset.chunk` is deprecated in favor of supplying a dictionary of
35+
dimensions, or a single ``int`` or ``"auto"`` argument covering all
36+
dimensions. Xarray favors using dimensions names rather than positions, and
37+
this was one place in the API where dimension positions were used.
38+
(:pull:`8341`)
39+
By `Maximilian Roos <https://github.com/max-sixty>`_.
3340

3441
Bug fixes
3542
~~~~~~~~~

xarray/core/dataarray.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,11 @@ def chunk(
13711371
# ignoring type; unclear why it won't accept a Literal into the value.
13721372
chunks = dict.fromkeys(self.dims, chunks)
13731373
elif isinstance(chunks, (tuple, list)):
1374+
utils.emit_user_level_warning(
1375+
"Supplying chunks as dimension-order tuples is deprecated. "
1376+
"It will raise an error in the future. Instead use a dict with dimension names as keys.",
1377+
category=DeprecationWarning,
1378+
)
13741379
chunks = dict(zip(self.dims, chunks))
13751380
else:
13761381
chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")

xarray/core/dataset.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2648,11 +2648,17 @@ def chunk(
26482648
warnings.warn(
26492649
"None value for 'chunks' is deprecated. "
26502650
"It will raise an error in the future. Use instead '{}'",
2651-
category=FutureWarning,
2651+
category=DeprecationWarning,
26522652
)
26532653
chunks = {}
26542654
chunks_mapping: Mapping[Any, Any]
26552655
if not isinstance(chunks, Mapping) and chunks is not None:
2656+
if isinstance(chunks, (tuple, list)):
2657+
utils.emit_user_level_warning(
2658+
"Supplying chunks as dimension-order tuples is deprecated. "
2659+
"It will raise an error in the future. Instead use a dict with dimensions as keys.",
2660+
category=DeprecationWarning,
2661+
)
26562662
chunks_mapping = dict.fromkeys(self.dims, chunks)
26572663
else:
26582664
chunks_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")

xarray/core/types.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,7 @@ def copy(
187187
# FYI in some cases we don't allow `None`, which this doesn't take account of.
188188
T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]]
189189
# We allow the tuple form of this (though arguably we could transition to named dims only)
190-
T_Chunks: TypeAlias = Union[
191-
T_ChunkDim, Mapping[Any, T_ChunkDim], tuple[T_ChunkDim, ...]
192-
]
190+
T_Chunks: TypeAlias = Union[T_ChunkDim, Mapping[Any, T_ChunkDim]]
193191
T_NormalizedChunks = tuple[tuple[int, ...], ...]
194192

195193
DataVars = Mapping[Any, Any]

xarray/core/variable.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from collections.abc import Hashable, Mapping, Sequence
99
from datetime import timedelta
1010
from functools import partial
11-
from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast
11+
from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast
1212

1313
import numpy as np
1414
import pandas as pd
@@ -34,6 +34,7 @@
3434
is_duck_dask_array,
3535
to_numpy,
3636
)
37+
from xarray.core.types import T_Chunks
3738
from xarray.core.utils import (
3839
OrderedSet,
3940
_default,
@@ -965,13 +966,7 @@ def _replace(
965966

966967
def chunk(
967968
self,
968-
chunks: (
969-
int
970-
| Literal["auto"]
971-
| tuple[int, ...]
972-
| tuple[tuple[int, ...], ...]
973-
| Mapping[Any, None | int | tuple[int, ...]]
974-
) = {},
969+
chunks: T_Chunks = {},
975970
name: str | None = None,
976971
lock: bool | None = None,
977972
inline_array: bool | None = None,

xarray/tests/test_backends.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2798,7 +2798,7 @@ def test_write_empty(self, write_empty: bool) -> None:
27982798
)
27992799

28002800
if has_dask:
2801-
ds["test"] = ds["test"].chunk((1, 1, 1))
2801+
ds["test"] = ds["test"].chunk(1)
28022802
encoding = None
28032803
else:
28042804
encoding = {"test": {"chunks": (1, 1, 1)}}

xarray/tests/test_dataarray.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -879,13 +879,14 @@ def test_chunk(self) -> None:
879879
assert blocked.chunks == ((3,), (4,))
880880
first_dask_name = blocked.data.name
881881

882-
blocked = unblocked.chunk(chunks=((2, 1), (2, 2)))
883-
assert blocked.chunks == ((2, 1), (2, 2))
884-
assert blocked.data.name != first_dask_name
882+
with pytest.warns(DeprecationWarning):
883+
blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) # type: ignore
884+
assert blocked.chunks == ((2, 1), (2, 2))
885+
assert blocked.data.name != first_dask_name
885886

886-
blocked = unblocked.chunk(chunks=(3, 3))
887-
assert blocked.chunks == ((3,), (3, 1))
888-
assert blocked.data.name != first_dask_name
887+
blocked = unblocked.chunk(chunks=(3, 3))
888+
assert blocked.chunks == ((3,), (3, 1))
889+
assert blocked.data.name != first_dask_name
889890

890891
# name doesn't change when rechunking by same amount
891892
# this fails if ReprObject doesn't have __dask_tokenize__ defined

xarray/tests/test_variable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2663,7 +2663,7 @@ def test_full_like(self) -> None:
26632663
def test_full_like_dask(self) -> None:
26642664
orig = Variable(
26652665
dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"}
2666-
).chunk(((1, 1), (2,)))
2666+
).chunk(dict(x=(1, 1), y=(2,)))
26672667

26682668
def check(actual, expect_dtype, expect_values):
26692669
assert actual.dtype == expect_dtype

0 commit comments

Comments
 (0)