pydata · TomNicholas · Aug 14, 2025 · Oct 24, 2024 · Oct 24, 2024 · Nov 8, 2024
diff --git a/ci/minimum_versions.py b/ci/minimum_versions.py
@@ -30,6 +30,7 @@
     "coveralls",
     "pip",
     "pytest",
+    "pytest-asyncio",
     "pytest-cov",
     "pytest-env",
     "pytest-mypy-plugins",

diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml
@@ -28,6 +28,7 @@ dependencies:
   - pip
   - pydap
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml
@@ -41,6 +41,7 @@ dependencies:
   - pyarrow # pandas raises a deprecation warning without this, breaking doctests
   - pydap
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/bare-min-and-scipy.yml b/ci/requirements/bare-min-and-scipy.yml
@@ -7,6 +7,7 @@ dependencies:
   - coveralls
   - pip
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/bare-minimum.yml b/ci/requirements/bare-minimum.yml
@@ -7,6 +7,7 @@ dependencies:
   - coveralls
   - pip
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/environment-3.14.yml b/ci/requirements/environment-3.14.yml
@@ -37,6 +37,7 @@ dependencies:
   - pyarrow # pandas raises a deprecation warning without this, breaking doctests
   - pydap
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/environment-windows-3.14.yml b/ci/requirements/environment-windows-3.14.yml
@@ -32,6 +32,7 @@ dependencies:
   - pyarrow # importing dask.dataframe raises an ImportError without this
   - pydap
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
@@ -32,6 +32,7 @@ dependencies:
   - pyarrow # importing dask.dataframe raises an ImportError without this
   - pydap
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
@@ -39,6 +39,7 @@ dependencies:
   - pydap
   - pydap-server
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml
@@ -40,6 +40,7 @@ dependencies:
   - pip
   - pydap=3.5.0
   - pytest
+  - pytest-asyncio
   - pytest-cov
   - pytest-env
   - pytest-mypy-plugins

diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
@@ -228,6 +228,7 @@
    Variable.isnull
    Variable.item
    Variable.load
+   Variable.load_async
    Variable.max
    Variable.mean
    Variable.median

diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst
@@ -331,10 +331,12 @@ information on plugins.
 How to support lazy loading
 +++++++++++++++++++++++++++
 
-If you want to make your backend effective with big datasets, then you should
-support lazy loading.
-Basically, you shall replace the :py:class:`numpy.ndarray` inside the
-variables with a custom class that supports lazy loading indexing.
+If you want to make your backend effective with big datasets, then you should take advantage of xarray's
+support for lazy loading and indexing.
+
+Basically, when your backend constructs the ``Variable`` objects,
+you need to replace the :py:class:`numpy.ndarray` inside the
+variables with a custom :py:class:`~xarray.backends.BackendArray` subclass that supports lazy loading and indexing.
 See the example below:
 
 .. code-block:: python
@@ -345,25 +347,27 @@ See the example below:
 
 Where:
 
-- :py:class:`~xarray.core.indexing.LazilyIndexedArray` is a class
-  provided by Xarray that manages the lazy loading.
-- ``MyBackendArray`` shall be implemented by the backend and shall inherit
+- :py:class:`~xarray.core.indexing.LazilyIndexedArray` is a wrapper class
+  provided by Xarray that manages the lazy loading and indexing.
+- ``MyBackendArray`` should be implemented by the backend and must inherit
   from :py:class:`~xarray.backends.BackendArray`.
 
 BackendArray subclassing
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-The BackendArray subclass shall implement the following method and attributes:
+The BackendArray subclass must implement the following method and attributes:
 
-- the ``__getitem__`` method that takes in input an index and returns a
-  `NumPy <https://numpy.org/>`__ array
-- the ``shape`` attribute
+- the ``__getitem__`` method that takes an index as an input and returns a
+  `NumPy <https://numpy.org/>`__ array,
+- the ``shape`` attribute,
 - the ``dtype`` attribute.
 
-Xarray supports different type of :doc:`/user-guide/indexing`, that can be
-grouped in three types of indexes
+It may also optionally implement an additional ``async_getitem`` method.
+
+Xarray supports different types of :doc:`/user-guide/indexing`, that can be
+grouped in three types of indexes:
 :py:class:`~xarray.core.indexing.BasicIndexer`,
-:py:class:`~xarray.core.indexing.OuterIndexer` and
+:py:class:`~xarray.core.indexing.OuterIndexer`, and
 :py:class:`~xarray.core.indexing.VectorizedIndexer`.
 This implies that the implementation of the method ``__getitem__`` can be tricky.
 In order to simplify this task, Xarray provides a helper function,
@@ -419,8 +423,22 @@ input the ``key``, the array ``shape`` and the following parameters:
 For more details see
 :py:class:`~xarray.core.indexing.IndexingSupport` and :ref:`RST indexing`.
 
+Async support
+^^^^^^^^^^^^^
+
+Backends can also optionally support loading data asynchronously via xarray's asynchronous loading methods
+(e.g. ``~xarray.Dataset.load_async``).
+To support async loading the ``BackendArray`` subclass must additionally implement the ``BackendArray.async_getitem`` method.
+
+Note that implementing this method is only necessary if you want to be able to load data from different xarray objects concurrently.
+Even without this method your ``BackendArray`` implementation is still free to concurrently load chunks of data for a single ``Variable`` itself,
+so long as it does so behind the synchronous ``__getitem__`` interface.
+
+Dask support
+^^^^^^^^^^^^
+
 In order to support `Dask Distributed <https://distributed.dask.org/>`__ and
-:py:mod:`multiprocessing`, ``BackendArray`` subclass should be serializable
+:py:mod:`multiprocessing`, the ``BackendArray`` subclass should be serializable
 either with :ref:`io.pickle` or
 `cloudpickle <https://github.com/cloudpipe/cloudpickle>`__.
 That implies that all the reference to open files should be dropped. For

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -13,6 +13,8 @@ v2025.07.2 (unreleased)
 New Features
 ~~~~~~~~~~~~
 
+- Added new asynchronous loading methods :py:meth:`~xarray.Dataset.load_async`, :py:meth:`~xarray.DataArray.load_async`, :py:meth:`~xarray.Variable.load_async`.
+  (:issue:`10326`, :pull:`10327`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - :py:meth:`DataTree.to_netcdf` can now write to a file-like object, or return bytes if called without a filepath. (:issue:`10570`)
   By `Matthew Willson <https://github.com/mjwillson>`_.
 - Added exception handling for invalid files in :py:func:`open_mfdataset`. (:issue:`6736`)
@@ -40,12 +42,10 @@ Deprecations
 
 Bug fixes
 ~~~~~~~~~
-
 - Fix Pydap Datatree backend testing. Testing now compares elements of (unordered) two sets (before, lists) (:pull:`10525`).
   By `Miguel Jimenez-Urias <https://github.com/Mikejmnez>`_.
 - Fix ``KeyError`` when passing a ``dim`` argument different from the default to ``convert_calendar`` (:pull:`10544`).
   By `Eric Jansen <https://github.com/ej81>`_.
-
 - Fix transpose of boolean arrays read from disk. (:issue:`10536`)
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix detection of the ``h5netcdf`` backend. Xarray now selects ``h5netcdf`` if the default ``netCDF4`` engine is not available (:issue:`10401`, :pull:`10557`).

diff --git a/pyproject.toml b/pyproject.toml
@@ -80,6 +80,7 @@ dev = [
   "pytest-mypy-plugins",
   "pytest-timeout",
   "pytest-xdist",
+  "pytest-asyncio",
   "ruff>=0.8.0",
   "sphinx",
   "sphinx_autosummary_accessors",

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -297,10 +297,17 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500
 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed):
     __slots__ = ()
 
+    async def async_getitem(self, key: indexing.ExplicitIndexer) -> np.typing.ArrayLike:
+        raise NotImplementedError("Backend does not not support asynchronous loading")
+
     def get_duck_array(self, dtype: np.typing.DTypeLike = None):
         key = indexing.BasicIndexer((slice(None),) * self.ndim)
         return self[key]  # type: ignore[index]
 
+    async def async_get_duck_array(self, dtype: np.typing.DTypeLike = None):
+        key = indexing.BasicIndexer((slice(None),) * self.ndim)
+        return await self.async_getitem(key)
+
 
 class AbstractDataStore:
     __slots__ = ()

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -180,12 +180,23 @@ def encode_zarr_attr_value(value):
     return encoded
 
 
+def has_zarr_async_index() -> bool:
+    try:
+        import zarr
+
+        return hasattr(zarr.AsyncArray, "oindex")
+    except (ImportError, AttributeError):
+        return False
+
+
 class ZarrArrayWrapper(BackendArray):
     __slots__ = ("_array", "dtype", "shape")
 
     def __init__(self, zarr_array):
         # some callers attempt to evaluate an array if an `array` property exists on the object.
         # we prefix with _ to avoid this inference.
+
+        # TODO type hint this?
         self._array = zarr_array
         self.shape = self._array.shape
 
@@ -213,6 +224,33 @@ def _vindex(self, key):
     def _getitem(self, key):
         return self._array[key]
 
+    async def _async_getitem(self, key):
+        if not _zarr_v3():
+            raise NotImplementedError(
+                "For lazy basic async indexing with zarr, zarr-python=>v3.0.0 is required"
+            )
+
+        async_array = self._array._async_array
+        return await async_array.getitem(key)
+
+    async def _async_oindex(self, key):
+        if not has_zarr_async_index():
+            raise NotImplementedError(
+                "For lazy orthogonal async indexing with zarr, zarr-python=>v3.1.2 is required"
+            )
+
+        async_array = self._array._async_array
+        return await async_array.oindex.getitem(key)
+
+    async def _async_vindex(self, key):
+        if not has_zarr_async_index():
+            raise NotImplementedError(
+                "For lazy vectorized async indexing with zarr, zarr-python=>v3.1.2 is required"
+            )
+
+        async_array = self._array._async_array
+        return await async_array.vindex.getitem(key)
+
     def __getitem__(self, key):
         array = self._array
         if isinstance(key, indexing.BasicIndexer):
@@ -228,6 +266,18 @@ def __getitem__(self, key):
         # if self.ndim == 0:
         # could possibly have a work-around for 0d data here
 
+    async def async_getitem(self, key):
+        array = self._array
+        if isinstance(key, indexing.BasicIndexer):
+            method = self._async_getitem
+        elif isinstance(key, indexing.VectorizedIndexer):
+            method = self._async_vindex
+        elif isinstance(key, indexing.OuterIndexer):
+            method = self._async_oindex
+        return await indexing.async_explicit_indexing_adapter(
+            key, array.shape, indexing.IndexingSupport.VECTORIZED, method
+        )
+
 
 def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
     """

diff --git a/xarray/coding/common.py b/xarray/coding/common.py
@@ -79,6 +79,9 @@ def __getitem__(self, key):
     def get_duck_array(self):
         return self.func(self.array.get_duck_array())
 
+    async def async_get_duck_array(self):
+        return self.func(await self.array.async_get_duck_array())
+
     def __repr__(self) -> str:
         return f"{type(self).__name__}({self.array!r}, func={self.func!r}, dtype={self.dtype!r})"
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1160,6 +1160,14 @@ def load(self, **kwargs) -> Self:
         self._coords = new._coords
         return self
 
+    async def load_async(self, **kwargs) -> Self:
+        temp_ds = self._to_temp_dataset()
+        ds = await temp_ds.load_async(**kwargs)
+        new = self._from_temp_dataset(ds)
+        self._variable = new._variable
+        self._coords = new._coords
+        return self
+
     def compute(self, **kwargs) -> Self:
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return a new array.

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import asyncio
 import copy
 import datetime
 import io
@@ -539,24 +540,50 @@ def load(self, **kwargs) -> Self:
         dask.compute
         """
         # access .data to coerce everything to numpy or dask arrays
-        lazy_data = {
+        chunked_data = {
             k: v._data for k, v in self.variables.items() if is_chunked_array(v._data)
         }
-        if lazy_data:
-            chunkmanager = get_chunked_array_type(*lazy_data.values())
+        if chunked_data:
+            chunkmanager = get_chunked_array_type(*chunked_data.values())
 
             # evaluate all the chunked arrays simultaneously
             evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute(
-                *lazy_data.values(), **kwargs
+                *chunked_data.values(), **kwargs
             )
 
-            for k, data in zip(lazy_data, evaluated_data, strict=False):
+            for k, data in zip(chunked_data, evaluated_data, strict=False):
                 self.variables[k].data = data
 
         # load everything else sequentially
-        for k, v in self.variables.items():
-            if k not in lazy_data:
-                v.load()
+        [v.load() for k, v in self.variables.items() if k not in chunked_data]
+
+        return self
+
+    async def load_async(self, **kwargs) -> Self:
+        # TODO refactor this to pull out the common chunked_data codepath
+
+        # this blocks on chunked arrays but not on lazily indexed arrays
+
+        # access .data to coerce everything to numpy or dask arrays
+        chunked_data = {
+            k: v._data for k, v in self.variables.items() if is_chunked_array(v._data)
+        }
+        if chunked_data:
+            chunkmanager = get_chunked_array_type(*chunked_data.values())
+
+            # evaluate all the chunked arrays simultaneously
+            evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute(
+                *chunked_data.values(), **kwargs
+            )
+
+            for k, data in zip(chunked_data, evaluated_data, strict=False):
+                self.variables[k].data = data
+
+        # load everything else concurrently
+        coros = [
+            v.load_async() for k, v in self.variables.items() if k not in chunked_data
+        ]
+        await asyncio.gather(*coros)
 
         return self