Add Dataset.dtypes property (#6706)

headtr1ck · web-flow · commit abad670098a4 · 2022-06-22T09:01:44.000-07:00
* add Dataset.dtypes property

* add Dataset.dtypes to whats-new

* add Dataset.dtypes to api

* fix typo

* fix mypy issue

* dtypes property for DataArrayCoordinates, DataVariables and DatasetCoordinates

* update whats new
diff --git a/doc/api.rst b/doc/api.rst
@@ -61,6 +61,7 @@ Attributes
 
    Dataset.dims
    Dataset.sizes
+   Dataset.dtypes
    Dataset.data_vars
    Dataset.coords
    Dataset.attrs
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -22,6 +22,10 @@ v2022.06.0 (unreleased)
 New Features
 ~~~~~~~~~~~~
 
+- Add :py:meth:`Dataset.dtypes`, :py:meth:`DatasetCoordinates.dtypes`,
+  :py:meth:`DataArrayCoordinates.dtypes` properties: Mapping from variable names to dtypes.
+  (:pull:`6706`)
+  By `Michael Niklas <https://github.com/headtr1ck>`_.
 
 Deprecations
 ~~~~~~~~~~~~
diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
@@ -38,6 +38,10 @@ def _names(self) -> set[Hashable]:
     def dims(self) -> Mapping[Hashable, int] | tuple[Hashable, ...]:
         raise NotImplementedError()
 
+    @property
+    def dtypes(self) -> Frozen[Hashable, np.dtype]:
+        raise NotImplementedError()
+
     @property
     def indexes(self) -> Indexes[pd.Index]:
         return self._data.indexes  # type: ignore[attr-defined]
@@ -242,6 +246,24 @@ def _names(self) -> set[Hashable]:
     def dims(self) -> Mapping[Hashable, int]:
         return self._data.dims
 
+    @property
+    def dtypes(self) -> Frozen[Hashable, np.dtype]:
+        """Mapping from coordinate names to dtypes.
+
+        Cannot be modified directly, but is updated when adding new variables.
+
+        See Also
+        --------
+        Dataset.dtypes
+        """
+        return Frozen(
+            {
+                n: v.dtype
+                for n, v in self._data._variables.items()
+                if n in self._data._coord_names
+            }
+        )
+
     @property
     def variables(self) -> Mapping[Hashable, Variable]:
         return Frozen(
@@ -313,6 +335,18 @@ def __init__(self, dataarray: DataArray):
     def dims(self) -> tuple[Hashable, ...]:
         return self._data.dims
 
+    @property
+    def dtypes(self) -> Frozen[Hashable, np.dtype]:
+        """Mapping from coordinate names to dtypes.
+
+        Cannot be modified directly, but is updated when adding new variables.
+
+        See Also
+        --------
+        DataArray.dtype
+        """
+        return Frozen({n: v.dtype for n, v in self._data._coords.items()})
+
     @property
     def _names(self) -> set[Hashable]:
         return set(self._data._coords)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -102,7 +102,7 @@
 
 def _infer_coords_and_dims(
     shape, coords, dims
-) -> tuple[dict[Any, Variable], tuple[Hashable, ...]]:
+) -> tuple[dict[Hashable, Variable], tuple[Hashable, ...]]:
     """All the logic for creating a new DataArray"""
 
     if (
@@ -140,7 +140,7 @@ def _infer_coords_and_dims(
             if not isinstance(d, str):
                 raise TypeError(f"dimension {d} is not a string")
 
-    new_coords: dict[Any, Variable] = {}
+    new_coords: dict[Hashable, Variable] = {}
 
     if utils.is_dict_like(coords):
         for k, v in coords.items():
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -35,9 +35,9 @@
 from ..coding.calendar_ops import convert_calendar, interp_calendar
 from ..coding.cftimeindex import CFTimeIndex, _parse_array_of_cftime_strings
 from ..plot.dataset_plot import _Dataset_PlotMethods
+from . import alignment
+from . import dtypes as xrdtypes
 from . import (
-    alignment,
-    dtypes,
     duck_array_ops,
     formatting,
     formatting_html,
@@ -385,6 +385,18 @@ def variables(self) -> Mapping[Hashable, Variable]:
         all_variables = self._dataset.variables
         return Frozen({k: all_variables[k] for k in self})
 
+    @property
+    def dtypes(self) -> Frozen[Hashable, np.dtype]:
+        """Mapping from data variable names to dtypes.
+
+        Cannot be modified directly, but is updated when adding new variables.
+
+        See Also
+        --------
+        Dataset.dtype
+        """
+        return self._dataset.dtypes
+
     def _ipython_key_completions_(self):
         """Provide method for the key-autocompletions in IPython."""
         return [
@@ -677,6 +689,24 @@ def sizes(self) -> Frozen[Hashable, int]:
         """
         return self.dims
 
+    @property
+    def dtypes(self) -> Frozen[Hashable, np.dtype]:
+        """Mapping from data variable names to dtypes.
+
+        Cannot be modified directly, but is updated when adding new variables.
+
+        See Also
+        --------
+        DataArray.dtype
+        """
+        return Frozen(
+            {
+                n: v.dtype
+                for n, v in self._variables.items()
+                if n not in self._coord_names
+            }
+        )
+
     def load(self: T_Dataset, **kwargs) -> T_Dataset:
         """Manually trigger loading and/or computation of this dataset's data
         from disk or a remote source into memory and return this dataset.
@@ -2792,7 +2822,7 @@ def reindex_like(
         method: ReindexMethodOptions = None,
         tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
     ) -> T_Dataset:
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
@@ -2858,7 +2888,7 @@ def reindex(
         method: ReindexMethodOptions = None,
         tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
         **indexers_kwargs: Any,
     ) -> T_Dataset:
         """Conform this object onto a new set of indexes, filling in
@@ -3074,7 +3104,7 @@ def _reindex(
         method: str = None,
         tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
         sparse: bool = False,
         **indexers_kwargs: Any,
     ) -> T_Dataset:
@@ -4532,7 +4562,7 @@ def _unstack_full_reindex(
     def unstack(
         self: T_Dataset,
         dim: Hashable | Iterable[Hashable] | None = None,
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
         sparse: bool = False,
     ) -> T_Dataset:
         """
@@ -4677,7 +4707,7 @@ def merge(
         overwrite_vars: Hashable | Iterable[Hashable] = frozenset(),
         compat: CompatOptions = "no_conflicts",
         join: JoinOptions = "outer",
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
         combine_attrs: CombineAttrsOptions = "override",
     ) -> T_Dataset:
         """Merge the arrays of two datasets into a single dataset.
@@ -5886,7 +5916,7 @@ def _set_sparse_data_from_dataframe(
             # missing values and needs a fill_value. For consistency, don't
             # special case the rare exceptions (e.g., dtype=int without a
             # MultiIndex).
-            dtype, fill_value = dtypes.maybe_promote(values.dtype)
+            dtype, fill_value = xrdtypes.maybe_promote(values.dtype)
             values = np.asarray(values, dtype=dtype)
 
             data = COO(
@@ -5924,7 +5954,7 @@ def _set_numpy_data_from_dataframe(
             # fill in missing values:
             # https://stackoverflow.com/a/35049899/809705
             if missing_values:
-                dtype, fill_value = dtypes.maybe_promote(values.dtype)
+                dtype, fill_value = xrdtypes.maybe_promote(values.dtype)
                 data = np.full(shape, fill_value, dtype)
             else:
                 # If there are no missing values, keep the existing dtype
@@ -6415,7 +6445,7 @@ def diff(
     def shift(
         self: T_Dataset,
         shifts: Mapping[Any, int] | None = None,
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
         **shifts_kwargs: int,
     ) -> T_Dataset:
 
@@ -6470,7 +6500,7 @@ def shift(
         for name, var in self.variables.items():
             if name in self.data_vars:
                 fill_value_ = (
-                    fill_value.get(name, dtypes.NA)
+                    fill_value.get(name, xrdtypes.NA)
                     if isinstance(fill_value, dict)
                     else fill_value
                 )
@@ -6931,7 +6961,9 @@ def differentiate(
         dim = coord_var.dims[0]
         if _contains_datetime_like_objects(coord_var):
             if coord_var.dtype.kind in "mM" and datetime_unit is None:
-                datetime_unit, _ = np.datetime_data(coord_var.dtype)
+                datetime_unit = cast(
+                    "DatetimeUnitOptions", np.datetime_data(coord_var.dtype)[0]
+                )
             elif datetime_unit is None:
                 datetime_unit = "s"  # Default to seconds for cftime objects
             coord_var = coord_var._to_numeric(datetime_unit=datetime_unit)
@@ -7744,7 +7776,7 @@ def idxmin(
         self: T_Dataset,
         dim: Hashable | None = None,
         skipna: bool | None = None,
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
         keep_attrs: bool | None = None,
     ) -> T_Dataset:
         """Return the coordinate label of the minimum value along a dimension.
@@ -7841,7 +7873,7 @@ def idxmax(
         self: T_Dataset,
         dim: Hashable | None = None,
         skipna: bool | None = None,
-        fill_value: Any = dtypes.NA,
+        fill_value: Any = xrdtypes.NA,
         keep_attrs: bool | None = None,
     ) -> T_Dataset:
         """Return the coordinate label of the maximum value along a dimension.
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -1327,9 +1327,11 @@ def test_coords(self) -> None:
         ]
         da = DataArray(np.random.randn(2, 3), coords, name="foo")
 
-        assert 2 == len(da.coords)
+        # len
+        assert len(da.coords) == 2
 
-        assert ["x", "y"] == list(da.coords)
+        # iter
+        assert list(da.coords) == ["x", "y"]
 
         assert coords[0].identical(da.coords["x"])
         assert coords[1].identical(da.coords["y"])
@@ -1343,6 +1345,7 @@ def test_coords(self) -> None:
         with pytest.raises(KeyError):
             da.coords["foo"]
 
+        # repr
         expected_repr = dedent(
             """\
         Coordinates:
@@ -1352,6 +1355,9 @@ def test_coords(self) -> None:
         actual = repr(da.coords)
         assert expected_repr == actual
 
+        # dtypes
+        assert da.coords.dtypes == {"x": np.dtype("int64"), "y": np.dtype("int64")}
+
         del da.coords["x"]
         da._indexes = filter_indexes_from_coords(da.xindexes, set(da.coords))
         expected = DataArray(da.values, {"y": [0, 1, 2]}, dims=["x", "y"], name="foo")
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py