pydata · jsignell · Feb 14, 2025 · Feb 24, 2025 · Feb 24, 2025 · Feb 25, 2025
diff --git a/doc/user-guide/combining.rst b/doc/user-guide/combining.rst
@@ -43,7 +43,6 @@ new dimension by stacking lower dimensional arrays together:
 
 .. ipython:: python
 
-    da.sel(x="a")
     xr.concat([da.isel(x=0), da.isel(x=1)], "x")
 
 If the second argument to ``concat`` is a new dimension name, the arrays will
@@ -52,15 +51,18 @@ dimension:
 
 .. ipython:: python
 
-    xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim")
+    da0 = da.isel(x=0).drop_vars("x")
+    da1 = da.isel(x=1).drop_vars("x")
+
+    xr.concat([da0, da1], "new_dim")
 
 The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or
 :py:class:`~xarray.DataArray` object as well as a string, in which case it is
 used to label the values along the new dimension:
 
 .. ipython:: python
 
-    xr.concat([da.isel(x=0), da.isel(x=1)], pd.Index([-90, -100], name="new_dim"))
+    xr.concat([da0, da1], pd.Index([-90, -100], name="new_dim"))
 
 Of course, ``concat`` also works on ``Dataset`` objects:
 
@@ -75,6 +77,12 @@ between datasets. With the default parameters, xarray will load some coordinate
 variables into memory to compare them between datasets. This may be prohibitively
 expensive if you are manipulating your dataset lazily using :ref:`dask`.
 
+.. note::
+
+   In a future version of xarray the default values for many of these options
+   will change. You can opt into the new default values early using
+   ``xr.set_options(use_new_combine_kwarg_defaults=True)``.
+
 .. _merge:
 
 Merge
@@ -94,10 +102,18 @@ If you merge another dataset (or a dictionary including data array objects), by
 default the resulting dataset will be aligned on the **union** of all index
 coordinates:
 
+.. note::
+
+   In a future version of xarray the default value for ``join`` and ``compat``
+   will change. This change will mean that xarray will no longer attempt
+   to align the indices of the merged dataset. You can opt into the new default
+   values early using ``xr.set_options(use_new_combine_kwarg_defaults=True)``.
+   Or explicitly set ``join='outer'`` to preserve old behavior.
+
 .. ipython:: python
 
     other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")})
-    xr.merge([ds, other])
+    xr.merge([ds, other], join="outer")
 
 This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised
 if you attempt to merge two variables with the same name but different values:
@@ -114,6 +130,16 @@ if you attempt to merge two variables with the same name but different values:
     array([[ 1.4691123 ,  0.71713666, -0.5090585 ],
            [-0.13563237,  2.21211203,  0.82678535]])
 
+.. note::
+
+    In a future version of xarray the default value for ``compat`` will change
+    from ``compat='no_conflicts'`` to ``compat='override'``. In this scenario
+    the values in the first object override all the values in other objects.
+
+    .. ipython:: python
+
+        xr.merge([ds, ds + 1], compat="override")
+
 The same non-destructive merging between ``DataArray`` index coordinates is
 used in the :py:class:`~xarray.Dataset` constructor:
 
@@ -144,6 +170,11 @@ For datasets, ``ds0.combine_first(ds1)`` works similarly to
 there are conflicting values in variables to be merged, whereas
 ``.combine_first`` defaults to the calling object's values.
 
+.. note::
+
+   In a future version of xarray the default options for ``xr.merge`` will change
+   such that the behavior matches ``combine_first``.
+
 .. _update:
 
 Update
@@ -236,7 +267,7 @@ coordinates as long as any non-missing values agree or are disjoint:
 
     ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]})
     ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]})
-    xr.merge([ds1, ds2], compat="no_conflicts")
+    xr.merge([ds1, ds2], join="outer", compat="no_conflicts")
 
 Note that due to the underlying representation of missing values as floating
 point numbers (``NaN``), variable data type is not always preserved when merging
@@ -295,13 +326,12 @@ they are concatenated in order based on the values in their dimension
 coordinates, not on their position in the list passed to ``combine_by_coords``.
 
 .. ipython:: python
-    :okwarning:
 
     x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])])
     x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])])
     xr.combine_by_coords([x2, x1])
 
-These functions can be used by :py:func:`~xarray.open_mfdataset` to open many
+These functions are used by :py:func:`~xarray.open_mfdataset` to open many
 files as one dataset. The particular function used is specified by setting the
 argument ``'combine'`` to ``'by_coords'`` or ``'nested'``. This is useful for
 situations where your data is split across many files in multiple locations,

diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst
@@ -217,7 +217,7 @@ complete examples, please consult the relevant documentation.*
             )
 
             # combine the datasets
-            combined_ds = xr.combine_by_coords([ds1, ds2])
+            combined_ds = xr.combine_by_coords([ds1, ds2], join="outer")
             combined_ds
 
     lazy

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -7925,13 +7925,17 @@ Backwards incompatible changes
   Now, the default always concatenates data variables:
 
   .. ipython:: python
-      :suppress:
-
-      ds = xray.Dataset({"x": 0})
+    :verbatim:
 
-  .. ipython:: python
+    In [1]: ds = xray.Dataset({"x": 0})
 
-      xray.concat([ds, ds], dim="y")
+    In [2]: xray.concat([ds, ds], dim="y")
+    Out[2]:
+    <xarray.Dataset> Size: 16B
+    Dimensions:  (y: 2)
+    Dimensions without coordinates: y
+    Data variables:
+        x        (y) int64 16B 0 0
 
   To obtain the old behavior, supply the argument ``concat_over=[]``.
 

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -34,7 +34,7 @@
 )
 from xarray.backends.locks import _get_scheduler
 from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
-from xarray.core import indexing
+from xarray.core import dtypes, indexing
 from xarray.core.chunk import _get_chunk, _maybe_chunk
 from xarray.core.combine import (
     _infer_concat_order_from_positions,
@@ -50,6 +50,13 @@
 from xarray.core.utils import is_remote_uri
 from xarray.namedarray.daskmanager import DaskManager
 from xarray.namedarray.parallelcompat import guess_chunkmanager
+from xarray.util.deprecation_helpers import (
+    _COMPAT_DEFAULT,
+    _COORDS_DEFAULT,
+    _DATA_VARS_DEFAULT,
+    _JOIN_DEFAULT,
+    CombineKwargDefault,
+)
 
 if TYPE_CHECKING:
     try:
@@ -1404,14 +1411,16 @@ def open_mfdataset(
         | Sequence[Index]
         | None
     ) = None,
-    compat: CompatOptions = "no_conflicts",
+    compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT,
     preprocess: Callable[[Dataset], Dataset] | None = None,
     engine: T_Engine | None = None,
-    data_vars: Literal["all", "minimal", "different"] | list[str] = "all",
-    coords="different",
+    data_vars: Literal["all", "minimal", "different"]
+    | list[str]
+    | CombineKwargDefault = _DATA_VARS_DEFAULT,
+    coords=_COORDS_DEFAULT,
     combine: Literal["by_coords", "nested"] = "by_coords",
     parallel: bool = False,
-    join: JoinOptions = "outer",
+    join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT,
     attrs_file: str | os.PathLike | None = None,
     combine_attrs: CombineAttrsOptions = "override",
     **kwargs,
@@ -1598,9 +1607,6 @@ def open_mfdataset(
 
     paths1d: list[str | ReadBuffer]
     if combine == "nested":
-        if isinstance(concat_dim, str | DataArray) or concat_dim is None:
-            concat_dim = [concat_dim]  # type: ignore[assignment]
-
         # This creates a flat list which is easier to iterate over, whilst
         # encoding the originally-supplied structure as "ids".
         # The "ids" are not used at all if combine='by_coords`.
@@ -1649,13 +1655,14 @@ def open_mfdataset(
             # along each dimension, using structure given by "ids"
             combined = _nested_combine(
                 datasets,
-                concat_dims=concat_dim,
+                concat_dim=concat_dim,
                 compat=compat,
                 data_vars=data_vars,
                 coords=coords,
                 ids=ids,
                 join=join,
                 combine_attrs=combine_attrs,
+                fill_value=dtypes.NA,
             )
         elif combine == "by_coords":
             # Redo ordering from coordinates, ignoring how they were ordered

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -5,7 +5,7 @@
 from collections import defaultdict
 from collections.abc import Callable, Hashable, Iterable, Mapping
 from contextlib import suppress
-from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, cast, overload
+from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, cast, get_args, overload
 
 import numpy as np
 import pandas as pd
@@ -19,9 +19,10 @@
     indexes_all_equal,
     safe_cast_to_index,
 )
-from xarray.core.types import T_Alignable
-from xarray.core.utils import is_dict_like, is_full_slice
+from xarray.core.types import JoinOptions, T_Alignable
+from xarray.core.utils import emit_user_level_warning, is_dict_like, is_full_slice
 from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions
+from xarray.util.deprecation_helpers import CombineKwargDefault
 
 if TYPE_CHECKING:
     from xarray.core.dataarray import DataArray
@@ -112,7 +113,7 @@ class Aligner(Generic[T_Alignable]):
     objects: tuple[T_Alignable, ...]
     results: tuple[T_Alignable, ...]
     objects_matching_indexes: tuple[dict[MatchingIndexKey, Index], ...]
-    join: str
+    join: JoinOptions | CombineKwargDefault
     exclude_dims: frozenset[Hashable]
     exclude_vars: frozenset[Hashable]
     copy: bool
@@ -132,7 +133,7 @@ class Aligner(Generic[T_Alignable]):
     def __init__(
         self,
         objects: Iterable[T_Alignable],
-        join: str = "inner",
+        join: JoinOptions | CombineKwargDefault = "inner",
         indexes: Mapping[Any, Any] | None = None,
         exclude_dims: str | Iterable[Hashable] = frozenset(),
         exclude_vars: Iterable[Hashable] = frozenset(),
@@ -145,7 +146,9 @@ def __init__(
         self.objects = tuple(objects)
         self.objects_matching_indexes = ()
 
-        if join not in ["inner", "outer", "override", "exact", "left", "right"]:
+        if not isinstance(join, CombineKwargDefault) and join not in get_args(
+            JoinOptions
+        ):
             raise ValueError(f"invalid value for join: {join}")
         self.join = join
 
@@ -418,12 +421,34 @@ def align_indexes(self) -> None:
                 else:
                     need_reindex = False
                 if need_reindex:
+                    if (
+                        isinstance(self.join, CombineKwargDefault)
+                        and self.join != "exact"
+                    ):
+                        emit_user_level_warning(
+                            self.join.warning_message(
+                                "This change will result in the following ValueError:"
+                                "cannot be aligned with join='exact' because "
+                                "index/labels/sizes are not equal along "
+                                "these coordinates (dimensions): "
+                                + ", ".join(
+                                    f"{name!r} {dims!r}" for name, dims in key[0]
+                                ),
+                                recommend_set_options=False,
+                            ),
+                            FutureWarning,
+                        )
                     if self.join == "exact":
                         raise ValueError(
                             "cannot align objects with join='exact' where "
                             "index/labels/sizes are not equal along "
                             "these coordinates (dimensions): "
                             + ", ".join(f"{name!r} {dims!r}" for name, dims in key[0])
+                            + (
+                                self.join.error_message()
+                                if isinstance(self.join, CombineKwargDefault)
+                                else ""
+                            )
                         )
                     joiner = self._get_index_joiner(index_cls)
                     joined_index = joiner(matching_indexes)
@@ -595,7 +620,7 @@ def align(
     obj1: T_Obj1,
     /,
     *,
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),
@@ -609,7 +634,7 @@ def align(
     obj2: T_Obj2,
     /,
     *,
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),
@@ -624,7 +649,7 @@ def align(
     obj3: T_Obj3,
     /,
     *,
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),
@@ -640,7 +665,7 @@ def align(
     obj4: T_Obj4,
     /,
     *,
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),
@@ -657,7 +682,7 @@ def align(
     obj5: T_Obj5,
     /,
     *,
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),
@@ -668,7 +693,7 @@ def align(
 @overload
 def align(
     *objects: T_Alignable,
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),
@@ -678,7 +703,7 @@ def align(
 
 def align(
     *objects: T_Alignable,
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),
@@ -886,7 +911,7 @@ def align(
 
 def deep_align(
     objects: Iterable[Any],
-    join: JoinOptions = "inner",
+    join: JoinOptions | CombineKwargDefault = "inner",
     copy: bool = True,
     indexes=None,
     exclude: str | Iterable[Hashable] = frozenset(),