From e873d3b1c3bec72a5afc399a064380895f2ab939 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 12 May 2025 16:21:10 +0300 Subject: [PATCH 01/30] Refactor resampler module to submodules --- satpy/resample.py | 1096 ---------------------------------- satpy/resample/__init__.py | 66 ++ satpy/resample/base.py | 482 +++++++++++++++ satpy/resample/bucket.py | 221 +++++++ satpy/resample/ewa.py | 11 + satpy/resample/kdtree.py | 315 ++++++++++ satpy/resample/native.py | 172 ++++++ satpy/tests/test_resample.py | 8 +- 8 files changed, 1271 insertions(+), 1100 deletions(-) delete mode 100644 satpy/resample.py create mode 100644 satpy/resample/__init__.py create mode 100644 satpy/resample/base.py create mode 100644 satpy/resample/bucket.py create mode 100644 satpy/resample/ewa.py create mode 100644 satpy/resample/kdtree.py create mode 100644 satpy/resample/native.py diff --git a/satpy/resample.py b/satpy/resample.py deleted file mode 100644 index 5737294838..0000000000 --- a/satpy/resample.py +++ /dev/null @@ -1,1096 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2015-2018 Satpy developers -# -# This file is part of satpy. -# -# satpy is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. -# -# satpy is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with -# satpy. If not, see . -"""Resampling in Satpy. - -Satpy provides multiple resampling algorithms for resampling geolocated -data to uniform projected grids. The easiest way to perform resampling in -Satpy is through the :class:`~satpy.scene.Scene` object's -:meth:`~satpy.scene.Scene.resample` method. Additional utility functions are -also available to assist in resampling data. Below is more information on -resampling with Satpy as well as links to the relevant API documentation for -available keyword arguments. - -Resampling algorithms ---------------------- - -.. csv-table:: Available Resampling Algorithms - :header-rows: 1 - :align: center - - "Resampler", "Description", "Related" - "nearest", "Nearest Neighbor", :class:`~satpy.resample.KDTreeResampler` - "ewa", "Elliptical Weighted Averaging", :class:`~pyresample.ewa.DaskEWAResampler` - "ewa_legacy", "Elliptical Weighted Averaging (Legacy)", :class:`~pyresample.ewa.LegacyDaskEWAResampler` - "native", "Native", :class:`~satpy.resample.NativeResampler` - "bilinear", "Bilinear", :class:`~satpy.resample.BilinearResampler` - "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.BucketAvg` - "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.BucketSum` - "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.BucketCount` - "bucket_fraction", "Fraction Bucket Resampling", :class:`~satpy.resample.BucketFraction` - "gradient_search", "Gradient Search Resampling", :meth:`~pyresample.gradient.create_gradient_search_resampler` - -The resampling algorithm used can be specified with the ``resampler`` keyword -argument and defaults to ``nearest``: - -.. code-block:: python - - >>> scn = Scene(...) - >>> euro_scn = scn.resample('euro4', resampler='nearest') - -.. warning:: - - Some resampling algorithms expect certain forms of data. For example, the - EWA resampling expects polar-orbiting swath data and prefers if the data - can be broken in to "scan lines". See the API documentation for a specific - algorithm for more information. - -Resampling for comparison and composites ----------------------------------------- - -While all the resamplers can be used to put datasets of different resolutions -on to a common area, the 'native' resampler is designed to match datasets to -one resolution in the dataset's original projection. This is extremely useful -when generating composites between bands of different resolutions. - -.. code-block:: python - - >>> new_scn = scn.resample(resampler='native') - -By default this resamples to the -:meth:`highest resolution area ` (smallest footprint per -pixel) shared between the loaded datasets. You can easily specify the lowest -resolution area: - -.. code-block:: python - - >>> new_scn = scn.resample(scn.coarsest_area(), resampler='native') - -Providing an area that is neither the minimum or maximum resolution area -may work, but behavior is currently undefined. - -Caching for geostationary data ------------------------------- - -Satpy will do its best to reuse calculations performed to resample datasets, -but it can only do this for the current processing and will lose this -information when the process/script ends. Some resampling algorithms, like -``nearest`` and ``bilinear``, can benefit by caching intermediate data on disk in the directory -specified by `cache_dir` and using it next time. This is most beneficial with -geostationary satellite data where the locations of the source data and the -target pixels don't change over time. - - >>> new_scn = scn.resample('euro4', cache_dir='/path/to/cache_dir') - -See the documentation for specific algorithms to see availability and -limitations of caching for that algorithm. - -Create custom area definition ------------------------------ - -See :class:`pyresample.geometry.AreaDefinition` for information on creating -areas that can be passed to the resample method:: - - >>> from pyresample.geometry import AreaDefinition - >>> my_area = AreaDefinition(...) - >>> local_scene = scn.resample(my_area) - -Resize area definition in pixels --------------------------------- - -Sometimes you may want to create a small image with fixed size in pixels. -For example, to create an image of (y, x) pixels : - - >>> small_scn = scn.resample(scn.finest_area().copy(height=y, width=x), resampler="nearest") - - -.. warning:: - - Be aware that resizing with native resampling (``resampler="native"``) only - works if the new size is an integer factor of the original input size. For example, - multiplying the size by 2 or dividing the size by 2. Multiplying by 1.5 would - not be allowed. - - -Create dynamic area definition ------------------------------- - -See :class:`pyresample.geometry.DynamicAreaDefinition` for more information. - -Examples coming soon... - -Store area definitions ----------------------- - -Area definitions can be saved to a custom YAML file (see -`pyresample's writing to disk `_) -and loaded using pyresample's utility methods -(`pyresample's loading from disk `_):: - - >>> from pyresample import load_area - >>> my_area = load_area('my_areas.yaml', 'my_area') - -Or using :func:`satpy.resample.get_area_def`, which will search through all -``areas.yaml`` files in your ``SATPY_CONFIG_PATH``:: - - >>> from satpy.resample import get_area_def - >>> area_eurol = get_area_def("eurol") - -For examples of area definitions, see the file ``etc/areas.yaml`` that is -included with Satpy and where all the area definitions shipped with Satpy are -defined. The section below gives an overview of these area definitions. - -Area definitions included in Satpy ----------------------------------- - -.. include:: /area_def_list.rst - -""" -import hashlib -import json -import os -import warnings -from logging import getLogger -from math import lcm # type: ignore -from weakref import WeakValueDictionary - -import dask.array as da -import numpy as np -import xarray as xr -import zarr -from pyresample.ewa import DaskEWAResampler, LegacyDaskEWAResampler -from pyresample.geometry import SwathDefinition -from pyresample.gradient import create_gradient_search_resampler -from pyresample.resampler import BaseResampler as PRBaseResampler - -from satpy._config import config_search_paths, get_config_path -from satpy.utils import PerformanceWarning, get_legacy_chunk_size - -LOG = getLogger(__name__) - -CHUNK_SIZE = get_legacy_chunk_size() -CACHE_SIZE = 10 -NN_COORDINATES = {"valid_input_index": ("y1", "x1"), - "valid_output_index": ("y2", "x2"), - "index_array": ("y2", "x2", "z2")} -BIL_COORDINATES = {"bilinear_s": ("x1", ), - "bilinear_t": ("x1", ), - "slices_x": ("x1", "n"), - "slices_y": ("x1", "n"), - "mask_slices": ("x1", "n"), - "out_coords_x": ("x2", ), - "out_coords_y": ("y2", )} - -resamplers_cache: "WeakValueDictionary[tuple, object]" = WeakValueDictionary() - - -def hash_dict(the_dict, the_hash=None): - """Calculate a hash for a dictionary.""" - if the_hash is None: - the_hash = hashlib.sha1() # nosec - the_hash.update(json.dumps(the_dict, sort_keys=True).encode("utf-8")) - return the_hash - - -def get_area_file(): - """Find area file(s) to use. - - The files are to be named `areas.yaml` or `areas.def`. - """ - paths = config_search_paths("areas.yaml") - if paths: - return paths - else: - return get_config_path("areas.def") - - -def get_area_def(area_name): - """Get the definition of *area_name* from file. - - The file is defined to use is to be placed in the $SATPY_CONFIG_PATH - directory, and its name is defined in satpy's configuration file. - """ - try: - from pyresample import parse_area_file - except ImportError: - from pyresample.utils import parse_area_file - return parse_area_file(get_area_file(), area_name)[0] - - -def add_xy_coords(data_arr, area, crs=None): - """Assign x/y coordinates to DataArray from provided area. - - If 'x' and 'y' coordinates already exist then they will not be added. - - Args: - data_arr (xarray.DataArray): data object to add x/y coordinates to - area (pyresample.geometry.AreaDefinition): area providing the - coordinate data. - crs (pyproj.crs.CRS or None): CRS providing additional information - about the area's coordinate reference system if available. - Requires pyproj 2.0+. - - Returns (xarray.DataArray): Updated DataArray object - - """ - if "x" in data_arr.coords and "y" in data_arr.coords: - # x/y coords already provided - return data_arr - if "x" not in data_arr.dims or "y" not in data_arr.dims: - # no defined x and y dimensions - return data_arr - if not hasattr(area, "get_proj_vectors"): - return data_arr - x, y = area.get_proj_vectors() - - # convert to DataArrays - y_attrs = {} - x_attrs = {} - if crs is not None: - units = crs.axis_info[0].unit_name - # fix udunits/CF standard units - units = units.replace("metre", "meter") - if units == "degree": - y_attrs["units"] = "degrees_north" - x_attrs["units"] = "degrees_east" - else: - y_attrs["units"] = units - x_attrs["units"] = units - y = xr.DataArray(y, dims=("y",), attrs=y_attrs) - x = xr.DataArray(x, dims=("x",), attrs=x_attrs) - return data_arr.assign_coords(y=y, x=x) - - -def add_crs_xy_coords(data_arr, area): - """Add :class:`pyproj.crs.CRS` and x/y or lons/lats to coordinates. - - For SwathDefinition or GridDefinition areas this will add a - `crs` coordinate and coordinates for the 2D arrays of `lons` and `lats`. - - For AreaDefinition areas this will add a `crs` coordinate and the - 1-dimensional `x` and `y` coordinate variables. - - Args: - data_arr (xarray.DataArray): DataArray to add the 'crs' - coordinate. - area (pyresample.geometry.AreaDefinition): Area to get CRS - information from. - - """ - # add CRS object if pyproj 2.0+ - try: - from pyproj import CRS - except ImportError: - LOG.debug("Could not add 'crs' coordinate with pyproj<2.0") - crs = None - else: - # default lat/lon projection - latlon_proj = "+proj=latlong +datum=WGS84 +ellps=WGS84" - # otherwise get it from the area definition - if hasattr(area, "crs"): - crs = area.crs - else: - proj_str = getattr(area, "proj_str", latlon_proj) - crs = CRS.from_string(proj_str) - data_arr = data_arr.assign_coords(crs=crs) - - # Add x/y coordinates if possible - if isinstance(area, SwathDefinition): - # add lon/lat arrays for swath definitions - # SwathDefinitions created by Satpy should be assigning DataArray - # objects as the lons/lats attributes so use those directly to - # maintain original .attrs metadata (instead of converting to dask - # array). - lons = area.lons - lats = area.lats - lons.attrs.setdefault("standard_name", "longitude") - lons.attrs.setdefault("long_name", "longitude") - lons.attrs.setdefault("units", "degrees_east") - lats.attrs.setdefault("standard_name", "latitude") - lats.attrs.setdefault("long_name", "latitude") - lats.attrs.setdefault("units", "degrees_north") - # See https://github.com/pydata/xarray/issues/3068 - # data_arr = data_arr.assign_coords(longitude=lons, latitude=lats) - else: - # Gridded data (AreaDefinition/StackedAreaDefinition) - data_arr = add_xy_coords(data_arr, area, crs=crs) - return data_arr - - -def update_resampled_coords(old_data, new_data, new_area): - """Add coordinate information to newly resampled DataArray. - - Args: - old_data (xarray.DataArray): Old data before resampling. - new_data (xarray.DataArray): New data after resampling. - new_area (pyresample.geometry.BaseDefinition): Area definition - for the newly resampled data. - - """ - # copy over other non-x/y coordinates - # this *MUST* happen before we set 'crs' below otherwise any 'crs' - # coordinate in the coordinate variables we are copying will overwrite the - # 'crs' coordinate we just assigned to the data - ignore_coords = ("y", "x", "crs") - new_coords = {} - for cname, cval in old_data.coords.items(): - # we don't want coordinates that depended on the old x/y dimensions - has_ignored_dims = any(dim in cval.dims for dim in ignore_coords) - if cname in ignore_coords or has_ignored_dims: - continue - new_coords[cname] = cval - new_data = new_data.assign_coords(**new_coords) - - # add crs, x, and y coordinates - new_data = add_crs_xy_coords(new_data, new_area) - return new_data - - -class KDTreeResampler(PRBaseResampler): - """Resample using a KDTree-based nearest neighbor algorithm. - - This resampler implements on-disk caching when the `cache_dir` argument - is provided to the `resample` method. This should provide significant - performance improvements on consecutive resampling of geostationary data. - It is not recommended to provide `cache_dir` when the `mask` keyword - argument is provided to `precompute` which occurs by default for - `SwathDefinition` source areas. - - Args: - cache_dir (str): Long term storage directory for intermediate - results. - mask (bool): Force resampled data's invalid pixel mask to be used - when searching for nearest neighbor pixels. By - default this is True for SwathDefinition source - areas and False for all other area definition types. - radius_of_influence (float): Search radius cut off distance in meters - epsilon (float): Allowed uncertainty in meters. Increasing uncertainty - reduces execution time. - - """ - - def __init__(self, source_geo_def, target_geo_def): - """Init KDTreeResampler.""" - super(KDTreeResampler, self).__init__(source_geo_def, target_geo_def) - self.resampler = None - self._index_caches = {} - - def precompute(self, mask=None, radius_of_influence=None, epsilon=0, - cache_dir=None, **kwargs): - """Create a KDTree structure and store it for later use. - - Note: The `mask` keyword should be provided if geolocation may be valid - where data points are invalid. - - """ - from pyresample.kd_tree import XArrayResamplerNN - del kwargs - if mask is not None and cache_dir is not None: - LOG.warning("Mask and cache_dir both provided to nearest " - "resampler. Cached parameters are affected by " - "masked pixels. Will not cache results.") - cache_dir = None - - if radius_of_influence is None and not hasattr(self.source_geo_def, "geocentric_resolution"): - radius_of_influence = self._adjust_radius_of_influence(radius_of_influence) - - kwargs = dict(source_geo_def=self.source_geo_def, - target_geo_def=self.target_geo_def, - radius_of_influence=radius_of_influence, - neighbours=1, - epsilon=epsilon) - - if self.resampler is None: - # FIXME: We need to move all of this caching logic to pyresample - self.resampler = XArrayResamplerNN(**kwargs) - - try: - self.load_neighbour_info(cache_dir, mask=mask, **kwargs) - LOG.debug("Read pre-computed kd-tree parameters") - except IOError: - LOG.debug("Computing kd-tree parameters") - self.resampler.get_neighbour_info(mask=mask) - self.save_neighbour_info(cache_dir, mask=mask, **kwargs) - - def _adjust_radius_of_influence(self, radius_of_influence): - """Adjust radius of influence.""" - warnings.warn( - "Upgrade 'pyresample' for a more accurate default 'radius_of_influence'.", - stacklevel=3 - ) - try: - radius_of_influence = self.source_geo_def.lons.resolution * 3 - except AttributeError: - try: - radius_of_influence = max(abs(self.source_geo_def.pixel_size_x), - abs(self.source_geo_def.pixel_size_y)) * 3 - except AttributeError: - radius_of_influence = 1000 - - except TypeError: - radius_of_influence = 10000 - return radius_of_influence - - def _apply_cached_index(self, val, idx_name, persist=False): - """Reassign resampler index attributes.""" - if isinstance(val, np.ndarray): - val = da.from_array(val, chunks=CHUNK_SIZE) - elif persist and isinstance(val, da.Array): - val = val.persist() - setattr(self.resampler, idx_name, val) - return val - - def load_neighbour_info(self, cache_dir, mask=None, **kwargs): - """Read index arrays from either the in-memory or disk cache.""" - mask_name = getattr(mask, "name", None) - cached = {} - for idx_name in NN_COORDINATES: - if mask_name in self._index_caches: - cached[idx_name] = self._apply_cached_index( - self._index_caches[mask_name][idx_name], idx_name) - elif cache_dir: - try: - filename = self._create_cache_filename( - cache_dir, prefix="nn_lut-", - mask=mask_name, **kwargs) - fid = zarr.open(filename, "r") - cache = np.array(fid[idx_name]) - if idx_name == "valid_input_index": - # valid input index array needs to be boolean - cache = cache.astype(bool) - except ValueError: - raise IOError - cache = self._apply_cached_index(cache, idx_name) - cached[idx_name] = cache - else: - raise IOError - self._index_caches[mask_name] = cached - - def save_neighbour_info(self, cache_dir, mask=None, **kwargs): - """Cache resampler's index arrays if there is a cache dir.""" - if cache_dir: - mask_name = getattr(mask, "name", None) - cache = self._read_resampler_attrs() - filename = self._create_cache_filename( - cache_dir, prefix="nn_lut-", mask=mask_name, **kwargs) - LOG.info("Saving kd_tree neighbour info to %s", filename) - zarr_out = xr.Dataset() - for idx_name, coord in NN_COORDINATES.items(): - # update the cache in place with persisted dask arrays - cache[idx_name] = self._apply_cached_index(cache[idx_name], - idx_name, - persist=True) - zarr_out[idx_name] = (coord, cache[idx_name]) - - # Write indices to Zarr file - zarr_out.to_zarr(filename) - - self._index_caches[mask_name] = cache - # Delete the kdtree, it's not needed anymore - self.resampler.delayed_kdtree = None - - def _read_resampler_attrs(self): - """Read certain attributes from the resampler for caching.""" - return {attr_name: getattr(self.resampler, attr_name) - for attr_name in NN_COORDINATES} - - def compute(self, data, weight_funcs=None, fill_value=np.nan, - with_uncert=False, **kwargs): - """Resample data.""" - del kwargs - LOG.debug("Resampling %s", str(data.name)) - res = self.resampler.get_sample_from_neighbour_info(data, fill_value) - return update_resampled_coords(data, res, self.target_geo_def) - - -class BilinearResampler(PRBaseResampler): - """Resample using bilinear interpolation. - - This resampler implements on-disk caching when the `cache_dir` argument - is provided to the `resample` method. This should provide significant - performance improvements on consecutive resampling of geostationary data. - - Args: - cache_dir (str): Long term storage directory for intermediate - results. - radius_of_influence (float): Search radius cut off distance in meters - epsilon (float): Allowed uncertainty in meters. Increasing uncertainty - reduces execution time. - reduce_data (bool): Reduce the input data to (roughly) match the - target area. - - """ - - def __init__(self, source_geo_def, target_geo_def): - """Init BilinearResampler.""" - super(BilinearResampler, self).__init__(source_geo_def, target_geo_def) - self.resampler = None - - def precompute(self, mask=None, radius_of_influence=50000, epsilon=0, - reduce_data=True, cache_dir=False, **kwargs): - """Create bilinear coefficients and store them for later use.""" - try: - from pyresample.bilinear import XArrayBilinearResampler - except ImportError: - from pyresample.bilinear import XArrayResamplerBilinear as XArrayBilinearResampler - - del kwargs - del mask - - if self.resampler is None: - kwargs = dict(source_geo_def=self.source_geo_def, - target_geo_def=self.target_geo_def, - radius_of_influence=radius_of_influence, - neighbours=32, - epsilon=epsilon) - - self.resampler = XArrayBilinearResampler(**kwargs) - try: - self.load_bil_info(cache_dir, **kwargs) - LOG.debug("Loaded bilinear parameters") - except IOError: - LOG.debug("Computing bilinear parameters") - self.resampler.get_bil_info() - LOG.debug("Saving bilinear parameters.") - self.save_bil_info(cache_dir, **kwargs) - - def load_bil_info(self, cache_dir, **kwargs): - """Load bilinear resampling info from cache directory.""" - if cache_dir: - filename = self._create_cache_filename(cache_dir, - prefix="bil_lut-", - **kwargs) - try: - self.resampler.load_resampling_info(filename) - except AttributeError: - warnings.warn( - "Bilinear resampler can't handle caching, " - "please upgrade Pyresample to 0.17.0 or newer.", - stacklevel=2 - ) - raise IOError - else: - raise IOError - - def save_bil_info(self, cache_dir, **kwargs): - """Save bilinear resampling info to cache directory.""" - if cache_dir: - filename = self._create_cache_filename(cache_dir, - prefix="bil_lut-", - **kwargs) - # There are some old caches, move them out of the way - if os.path.exists(filename): - _move_existing_caches(cache_dir, filename) - LOG.info("Saving BIL neighbour info to %s", filename) - try: - self.resampler.save_resampling_info(filename) - except AttributeError: - warnings.warn( - "Bilinear resampler can't handle caching, " - "please upgrade Pyresample to 0.17.0 or newer.", - stacklevel=2 - ) - - def compute(self, data, fill_value=None, **kwargs): - """Resample the given data using bilinear interpolation.""" - del kwargs - - if fill_value is None: - fill_value = data.attrs.get("_FillValue") - target_shape = self.target_geo_def.shape - - res = self.resampler.get_sample_from_bil_info(data, - fill_value=fill_value, - output_shape=target_shape) - - return update_resampled_coords(data, res, self.target_geo_def) - - -def _move_existing_caches(cache_dir, filename): - """Move existing cache files out of the way.""" - import os - import shutil - old_cache_dir = os.path.join(cache_dir, "moved_by_satpy") - try: - os.makedirs(old_cache_dir) - except FileExistsError: - pass - try: - shutil.move(filename, old_cache_dir) - except shutil.Error: - os.remove(os.path.join(old_cache_dir, - os.path.basename(filename))) - shutil.move(filename, old_cache_dir) - LOG.warning("Old cache file was moved to %s", old_cache_dir) - - -def _mean(data, y_size, x_size): - rows, cols = data.shape - new_shape = (int(rows / y_size), int(y_size), - int(cols / x_size), int(x_size)) - data_mean = np.nanmean(data.reshape(new_shape), axis=(1, 3)) - return data_mean - - -def _repeat_by_factor(data, block_info=None): - if block_info is None: - return data - out_shape = block_info[None]["chunk-shape"] - out_data = data - for axis, axis_size in enumerate(out_shape): - in_size = data.shape[axis] - out_data = np.repeat(out_data, int(axis_size / in_size), axis=axis) - return out_data - - -class NativeResampler(PRBaseResampler): - """Expand or reduce input datasets to be the same shape. - - If data is higher resolution (more pixels) than the destination area - then data is averaged to match the destination resolution. - - If data is lower resolution (less pixels) than the destination area - then data is repeated to match the destination resolution. - - This resampler does not perform any caching or masking due to the - simplicity of the operations. - - """ - - def resample(self, data, cache_dir=None, mask_area=False, **kwargs): - """Run NativeResampler.""" - # use 'mask_area' with a default of False. It wouldn't do anything. - return super(NativeResampler, self).resample(data, - cache_dir=cache_dir, - mask_area=mask_area, - **kwargs) - - @classmethod - def _expand_reduce(cls, d_arr, repeats): - """Expand reduce.""" - if not isinstance(d_arr, da.Array): - d_arr = da.from_array(d_arr, chunks=CHUNK_SIZE) - if all(x == 1 for x in repeats.values()): - return d_arr - if all(x >= 1 for x in repeats.values()): - return _replicate(d_arr, repeats) - if all(x <= 1 for x in repeats.values()): - # reduce - y_size = 1. / repeats[0] - x_size = 1. / repeats[1] - return _aggregate(d_arr, y_size, x_size) - raise ValueError("Must either expand or reduce in both " - "directions") - - def compute(self, data, expand=True, **kwargs): - """Resample data with NativeResampler.""" - if isinstance(self.target_geo_def, (list, tuple)): - # find the highest/lowest area among the provided - test_func = max if expand else min - target_geo_def = test_func(self.target_geo_def, - key=lambda x: x.shape) - else: - target_geo_def = self.target_geo_def - - # convert xarray backed with numpy array to dask array - if "x" not in data.dims or "y" not in data.dims: - if data.ndim not in [2, 3]: - raise ValueError("Can only handle 2D or 3D arrays without dimensions.") - # assume rows is the second to last axis - y_axis = data.ndim - 2 - x_axis = data.ndim - 1 - else: - y_axis = data.dims.index("y") - x_axis = data.dims.index("x") - - out_shape = target_geo_def.shape - in_shape = data.shape - y_repeats = out_shape[0] / float(in_shape[y_axis]) - x_repeats = out_shape[1] / float(in_shape[x_axis]) - repeats = {axis_idx: 1. for axis_idx in range(data.ndim) if axis_idx not in [y_axis, x_axis]} - repeats[y_axis] = y_repeats - repeats[x_axis] = x_repeats - - d_arr = self._expand_reduce(data.data, repeats) - new_data = xr.DataArray(d_arr, dims=data.dims) - return update_resampled_coords(data, new_data, target_geo_def) - - -def _aggregate(d, y_size, x_size): - """Average every 4 elements (2x2) in a 2D array.""" - if d.ndim != 2: - # we can't guarantee what blocks we are getting and how - # it should be reshaped to do the averaging. - raise ValueError("Can't aggregrate (reduce) data arrays with " - "more than 2 dimensions.") - if not (x_size.is_integer() and y_size.is_integer()): - raise ValueError("Aggregation factors are not integers") - y_size = int(y_size) - x_size = int(x_size) - d = _rechunk_if_nonfactor_chunks(d, y_size, x_size) - new_chunks = (tuple(int(x / y_size) for x in d.chunks[0]), - tuple(int(x / x_size) for x in d.chunks[1])) - return da.core.map_blocks(_mean, d, y_size, x_size, - meta=np.array((), dtype=d.dtype), - dtype=d.dtype, chunks=new_chunks) - - -def _rechunk_if_nonfactor_chunks(dask_arr, y_size, x_size): - need_rechunk = False - new_chunks = list(dask_arr.chunks) - for dim_idx, agg_size in enumerate([y_size, x_size]): - if dask_arr.shape[dim_idx] % agg_size != 0: - raise ValueError("Aggregation requires arrays with shapes divisible by the factor.") - for chunk_size in dask_arr.chunks[dim_idx]: - if chunk_size % agg_size != 0: - need_rechunk = True - new_dim_chunk = lcm(chunk_size, agg_size) - new_chunks[dim_idx] = new_dim_chunk - if need_rechunk: - warnings.warn( - "Array chunk size is not divisible by aggregation factor. " - "Re-chunking to continue native resampling.", - PerformanceWarning, - stacklevel=5 - ) - dask_arr = dask_arr.rechunk(tuple(new_chunks)) - return dask_arr - - -def _replicate(d_arr, repeats): - """Repeat data pixels by the per-axis factors specified.""" - repeated_chunks = _get_replicated_chunk_sizes(d_arr, repeats) - d_arr = d_arr.map_blocks(_repeat_by_factor, - meta=np.array((), dtype=d_arr.dtype), - dtype=d_arr.dtype, - chunks=repeated_chunks) - return d_arr - - -def _get_replicated_chunk_sizes(d_arr, repeats): - repeated_chunks = [] - for axis, axis_chunks in enumerate(d_arr.chunks): - factor = repeats[axis] - if not factor.is_integer(): - raise ValueError("Expand factor must be a whole number") - repeated_chunks.append(tuple(x * int(factor) for x in axis_chunks)) - return tuple(repeated_chunks) - - -class BucketResamplerBase(PRBaseResampler): - """Base class for bucket resampling which implements averaging.""" - - def __init__(self, source_geo_def, target_geo_def): - """Initialize bucket resampler.""" - super(BucketResamplerBase, self).__init__(source_geo_def, target_geo_def) - self.resampler = None - - def precompute(self, **kwargs): - """Create X and Y indices and store them for later use.""" - from pyresample import bucket - - LOG.debug("Initializing bucket resampler.") - source_lons, source_lats = self.source_geo_def.get_lonlats( - chunks=CHUNK_SIZE) - self.resampler = bucket.BucketResampler(self.target_geo_def, - source_lons, - source_lats) - - def compute(self, data, **kwargs): - """Call the resampling.""" - raise NotImplementedError("Use the sub-classes") - - def resample(self, data, **kwargs): # noqa: D417 - """Resample `data` by calling `precompute` and `compute` methods. - - Args: - data (xarray.DataArray): Data to be resampled - - Returns (xarray.DataArray): Data resampled to the target area - - """ - self.precompute(**kwargs) - attrs = data.attrs.copy() - data_arr = data.data - if data.ndim == 3 and data.dims[0] == "bands": - dims = ("bands", "y", "x") - # Both one and two dimensional input data results in 2D output - elif data.ndim in (1, 2): - dims = ("y", "x") - else: - dims = data.dims - LOG.debug("Resampling %s", str(data.attrs.get("_satpy_id", "unknown"))) - result = self.compute(data_arr, **kwargs) - coords = {} - if "bands" in data.coords: - coords["bands"] = data.coords["bands"] - # Fractions are returned in a dict - elif isinstance(result, dict): - coords["categories"] = sorted(result.keys()) - dims = ("categories", "y", "x") - new_result = [] - for cat in coords["categories"]: - new_result.append(result[cat]) - result = da.stack(new_result) - if result.ndim > len(dims): - result = da.squeeze(result) - - # Adjust some attributes - if "BucketFraction" in str(self): - attrs["units"] = "" - attrs["calibration"] = "" - attrs["standard_name"] = "area_fraction" - elif "BucketCount" in str(self): - attrs["units"] = "" - attrs["calibration"] = "" - attrs["standard_name"] = "number_of_observations" - - result = xr.DataArray(result, dims=dims, coords=coords, - attrs=attrs) - - return update_resampled_coords(data, result, self.target_geo_def) - - -class BucketAvg(BucketResamplerBase): - """Class for averaging bucket resampling. - - Bucket resampling calculates the average of all the values that - are closest to each bin and inside the target area. - - Parameters - ---------- - fill_value : float (default: np.nan) - Fill value to mark missing/invalid values in the input data, - as well as in the binned and averaged output data. - skipna : boolean (default: True) - If True, skips missing values (as marked by NaN or `fill_value`) for the average calculation - (similarly to Numpy's `nanmean`). Buckets containing only missing values are set to fill_value. - If False, sets the bucket to fill_value if one or more missing values are present in the bucket - (similarly to Numpy's `mean`). - In both cases, empty buckets are set to `fill_value`. - - """ - - def compute(self, data, fill_value=np.nan, skipna=True, **kwargs): # noqa: D417 - """Call the resampling. - - Args: - data (numpy.Array, dask.Array): Data to be resampled - fill_value (numpy.nan, int): fill_value. Defaults to numpy.nan - skipna (boolean): Skip NA's. Default `True` - - Returns: - dask.Array - """ - results = [] - if data.ndim == 3: - for i in range(data.shape[0]): - res = self.resampler.get_average(data[i, :, :], - fill_value=fill_value, - skipna=skipna, - **kwargs) - results.append(res) - else: - res = self.resampler.get_average(data, fill_value=fill_value, skipna=skipna, - **kwargs) - results.append(res) - - return da.stack(results) - - -class BucketSum(BucketResamplerBase): - """Class for bucket resampling which implements accumulation (sum). - - This resampler calculates the cumulative sum of all the values - that are closest to each bin and inside the target area. - - Parameters - ---------- - fill_value : float (default: np.nan) - Fill value for missing data - skipna : boolean (default: True) - If True, skips NaN values for the sum calculation - (similarly to Numpy's `nansum`). Buckets containing only NaN are set to zero. - If False, sets the bucket to NaN if one or more NaN values are present in the bucket - (similarly to Numpy's `sum`). - In both cases, empty buckets are set to 0. - - """ - - def compute(self, data, skipna=True, **kwargs): - """Call the resampling.""" - results = [] - if data.ndim == 3: - for i in range(data.shape[0]): - res = self.resampler.get_sum(data[i, :, :], skipna=skipna, - **kwargs) - results.append(res) - else: - res = self.resampler.get_sum(data, skipna=skipna, **kwargs) - results.append(res) - - return da.stack(results) - - -class BucketCount(BucketResamplerBase): - """Class for bucket resampling which implements hit-counting. - - This resampler calculates the number of occurences of the input - data closest to each bin and inside the target area. - - """ - - def compute(self, data, **kwargs): - """Call the resampling.""" - results = [] - if data.ndim == 3: - for _i in range(data.shape[0]): - res = self.resampler.get_count() - results.append(res) - else: - res = self.resampler.get_count() - results.append(res) - - return da.stack(results) - - -class BucketFraction(BucketResamplerBase): - """Class for bucket resampling to compute category fractions. - - This resampler calculates the fraction of occurences of the input - data per category. - - """ - - def compute(self, data, fill_value=np.nan, categories=None, **kwargs): - """Call the resampling.""" - if data.ndim > 2: - raise ValueError("BucketFraction not implemented for 3D datasets") - - result = self.resampler.get_fractions(data, categories=categories, - fill_value=fill_value) - - return result - - -# TODO: move this to pyresample.resampler -RESAMPLERS = {"kd_tree": KDTreeResampler, - "nearest": KDTreeResampler, - "bilinear": BilinearResampler, - "native": NativeResampler, - "gradient_search": create_gradient_search_resampler, - "bucket_avg": BucketAvg, - "bucket_sum": BucketSum, - "bucket_count": BucketCount, - "bucket_fraction": BucketFraction, - "ewa": DaskEWAResampler, - "ewa_legacy": LegacyDaskEWAResampler, - } - - -# TODO: move this to pyresample -def prepare_resampler(source_area, destination_area, resampler=None, **resample_kwargs): - """Instantiate and return a resampler.""" - if resampler is None: - LOG.info("Using default KDTree resampler") - resampler = "kd_tree" - - if isinstance(resampler, PRBaseResampler): - raise ValueError("Trying to create a resampler when one already " - "exists.") - if isinstance(resampler, str): - resampler_class = RESAMPLERS.get(resampler, None) - if resampler_class is None: - if resampler == "gradient_search": - warnings.warn( - "Gradient search resampler not available. Maybe missing `shapely`?", - stacklevel=2 - ) - raise KeyError("Resampler '%s' not available" % resampler) - else: - resampler_class = resampler - - key = (resampler_class, - source_area, destination_area, - hash_dict(resample_kwargs).hexdigest()) - try: - resampler_instance = resamplers_cache[key] - except KeyError: - resampler_instance = resampler_class(source_area, destination_area) - resamplers_cache[key] = resampler_instance - return key, resampler_instance - - -# TODO: move this to pyresample -def resample(source_area, data, destination_area, - resampler=None, **kwargs): - """Do the resampling.""" - if not isinstance(resampler, PRBaseResampler): - # we don't use the first argument (cache key) - _, resampler_instance = prepare_resampler(source_area, - destination_area, - resampler) - else: - resampler_instance = resampler - - if isinstance(data, list): - res = [resampler_instance.resample(ds, **kwargs) for ds in data] - else: - res = resampler_instance.resample(data, **kwargs) - - return res - - -def get_fill_value(dataset): - """Get the fill value of the *dataset*, defaulting to np.nan.""" - if np.issubdtype(dataset.dtype, np.integer): - return dataset.attrs.get("_FillValue", np.nan) - return np.nan - - -def resample_dataset(dataset, destination_area, **kwargs): - """Resample *dataset* and return the resampled version. - - Args: - dataset (xarray.DataArray): Data to be resampled. - destination_area: The destination onto which to project the data, - either a full blown area definition or a string corresponding to - the name of the area as defined in the area file. - **kwargs: The extra parameters to pass to the resampler objects. - - Returns: - A resampled DataArray with updated ``.attrs["area"]`` field. The dtype - of the array is preserved. - - """ - # call the projection stuff here - try: - source_area = dataset.attrs["area"] - except KeyError: - LOG.info("Cannot reproject dataset %s, missing area info", - dataset.attrs["name"]) - - return dataset - - fill_value = kwargs.pop("fill_value", get_fill_value(dataset)) - new_data = resample(source_area, dataset, destination_area, fill_value=fill_value, **kwargs) - new_attrs = new_data.attrs - new_data.attrs = dataset.attrs.copy() - new_data.attrs.update(new_attrs) - new_data.attrs.update(area=destination_area) - - return new_data diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py new file mode 100644 index 0000000000..fa024d05d7 --- /dev/null +++ b/satpy/resample/__init__.py @@ -0,0 +1,66 @@ +# Copyright (c) 2015-2025 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Writers subpackage.""" +from __future__ import annotations + +import warnings +from typing import Any + + +def __getattr__(name: str) -> Any: + if name in ("KDTreeResampler", "BilinearResampler"): + from . import kdtree + + new_submod = "kdtree" + obj = getattr(kdtree, name) + elif name == "NativeResampler": + from .native import NativeResampler + new_submod = "native" + obj = NativeResampler + elif name in ( + "BucketResamplerBase", + "BucketAvg", + "BucketSum", + "BucketCount", + "BucketFraction" + ): + from . import bucket + new_submod = "bucket" + obj = getattr(bucket, name) + elif name in ( + "hash_dict", + "get_area_file", + "get_area_def", + "add_xy_coords", + "add_crs_xy_coords", + "update_resampled_coords", + "resample", + "prepare_resampler", + "get_fill_value", + "resample_dataset" + ): + from . import base + new_submod = "base" + obj = getattr(base, name) + else: + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + + warnings.warn( + f"'satpy.resample.{name}' has been moved to 'satpy.resample.{new_submod}.{name}'. " + f"Import from the new location instead (ex. 'from satpy.resample.{new_submod} import {name}').", + stacklevel=2, + ) + return obj diff --git a/satpy/resample/base.py b/satpy/resample/base.py new file mode 100644 index 0000000000..32e5654934 --- /dev/null +++ b/satpy/resample/base.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python +# Copyright (c) 2015-2018 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Resampling in Satpy. + +Satpy provides multiple resampling algorithms for resampling geolocated +data to uniform projected grids. The easiest way to perform resampling in +Satpy is through the :class:`~satpy.scene.Scene` object's +:meth:`~satpy.scene.Scene.resample` method. Additional utility functions are +also available to assist in resampling data. Below is more information on +resampling with Satpy as well as links to the relevant API documentation for +available keyword arguments. + +Resampling algorithms +--------------------- + +.. csv-table:: Available Resampling Algorithms + :header-rows: 1 + :align: center + + "Resampler", "Description", "Related" + "nearest", "Nearest Neighbor", :class:`~satpy.resample.KDTreeResampler` + "ewa", "Elliptical Weighted Averaging", :class:`~pyresample.ewa.DaskEWAResampler` + "ewa_legacy", "Elliptical Weighted Averaging (Legacy)", :class:`~pyresample.ewa.LegacyDaskEWAResampler` + "native", "Native", :class:`~satpy.resample.NativeResampler` + "bilinear", "Bilinear", :class:`~satpy.resample.BilinearResampler` + "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.BucketAvg` + "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.BucketSum` + "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.BucketCount` + "bucket_fraction", "Fraction Bucket Resampling", :class:`~satpy.resample.BucketFraction` + "gradient_search", "Gradient Search Resampling", :meth:`~pyresample.gradient.create_gradient_search_resampler` + +The resampling algorithm used can be specified with the ``resampler`` keyword +argument and defaults to ``nearest``: + +.. code-block:: python + + >>> scn = Scene(...) + >>> euro_scn = scn.resample('euro4', resampler='nearest') + +.. warning:: + + Some resampling algorithms expect certain forms of data. For example, the + EWA resampling expects polar-orbiting swath data and prefers if the data + can be broken in to "scan lines". See the API documentation for a specific + algorithm for more information. + +Resampling for comparison and composites +---------------------------------------- + +While all the resamplers can be used to put datasets of different resolutions +on to a common area, the 'native' resampler is designed to match datasets to +one resolution in the dataset's original projection. This is extremely useful +when generating composites between bands of different resolutions. + +.. code-block:: python + + >>> new_scn = scn.resample(resampler='native') + +By default this resamples to the +:meth:`highest resolution area ` (smallest footprint per +pixel) shared between the loaded datasets. You can easily specify the lowest +resolution area: + +.. code-block:: python + + >>> new_scn = scn.resample(scn.coarsest_area(), resampler='native') + +Providing an area that is neither the minimum or maximum resolution area +may work, but behavior is currently undefined. + +Caching for geostationary data +------------------------------ + +Satpy will do its best to reuse calculations performed to resample datasets, +but it can only do this for the current processing and will lose this +information when the process/script ends. Some resampling algorithms, like +``nearest`` and ``bilinear``, can benefit by caching intermediate data on disk in the directory +specified by `cache_dir` and using it next time. This is most beneficial with +geostationary satellite data where the locations of the source data and the +target pixels don't change over time. + + >>> new_scn = scn.resample('euro4', cache_dir='/path/to/cache_dir') + +See the documentation for specific algorithms to see availability and +limitations of caching for that algorithm. + +Create custom area definition +----------------------------- + +See :class:`pyresample.geometry.AreaDefinition` for information on creating +areas that can be passed to the resample method:: + + >>> from pyresample.geometry import AreaDefinition + >>> my_area = AreaDefinition(...) + >>> local_scene = scn.resample(my_area) + +Resize area definition in pixels +-------------------------------- + +Sometimes you may want to create a small image with fixed size in pixels. +For example, to create an image of (y, x) pixels : + + >>> small_scn = scn.resample(scn.finest_area().copy(height=y, width=x), resampler="nearest") + + +.. warning:: + + Be aware that resizing with native resampling (``resampler="native"``) only + works if the new size is an integer factor of the original input size. For example, + multiplying the size by 2 or dividing the size by 2. Multiplying by 1.5 would + not be allowed. + + +Create dynamic area definition +------------------------------ + +See :class:`pyresample.geometry.DynamicAreaDefinition` for more information. + +Examples coming soon... + +Store area definitions +---------------------- + +Area definitions can be saved to a custom YAML file (see +`pyresample's writing to disk `_) +and loaded using pyresample's utility methods +(`pyresample's loading from disk `_):: + + >>> from pyresample import load_area + >>> my_area = load_area('my_areas.yaml', 'my_area') + +Or using :func:`satpy.resample.get_area_def`, which will search through all +``areas.yaml`` files in your ``SATPY_CONFIG_PATH``:: + + >>> from satpy.resample import get_area_def + >>> area_eurol = get_area_def("eurol") + +For examples of area definitions, see the file ``etc/areas.yaml`` that is +included with Satpy and where all the area definitions shipped with Satpy are +defined. The section below gives an overview of these area definitions. + +Area definitions included in Satpy +---------------------------------- + +.. include:: /area_def_list.rst + +""" +import hashlib +import json +import warnings +from logging import getLogger +from weakref import WeakValueDictionary + +import numpy as np +import xarray as xr +from pyresample.geometry import SwathDefinition +from pyresample.resampler import BaseResampler as PRBaseResampler + +from satpy._config import config_search_paths, get_config_path +from satpy.utils import get_legacy_chunk_size + +LOG = getLogger(__name__) + +CHUNK_SIZE = get_legacy_chunk_size() + +resamplers_cache: "WeakValueDictionary[tuple, object]" = WeakValueDictionary() + + +def hash_dict(the_dict, the_hash=None): + """Calculate a hash for a dictionary.""" + if the_hash is None: + the_hash = hashlib.sha1() # nosec + the_hash.update(json.dumps(the_dict, sort_keys=True).encode("utf-8")) + return the_hash + + +def get_area_file(): + """Find area file(s) to use. + + The files are to be named `areas.yaml` or `areas.def`. + """ + paths = config_search_paths("areas.yaml") + if paths: + return paths + else: + return get_config_path("areas.def") + + +def get_area_def(area_name): + """Get the definition of *area_name* from file. + + The file is defined to use is to be placed in the $SATPY_CONFIG_PATH + directory, and its name is defined in satpy's configuration file. + """ + try: + from pyresample import parse_area_file + except ImportError: + from pyresample.utils import parse_area_file + return parse_area_file(get_area_file(), area_name)[0] + + +def add_xy_coords(data_arr, area, crs=None): + """Assign x/y coordinates to DataArray from provided area. + + If 'x' and 'y' coordinates already exist then they will not be added. + + Args: + data_arr (xarray.DataArray): data object to add x/y coordinates to + area (pyresample.geometry.AreaDefinition): area providing the + coordinate data. + crs (pyproj.crs.CRS or None): CRS providing additional information + about the area's coordinate reference system if available. + Requires pyproj 2.0+. + + Returns (xarray.DataArray): Updated DataArray object + + """ + if "x" in data_arr.coords and "y" in data_arr.coords: + # x/y coords already provided + return data_arr + if "x" not in data_arr.dims or "y" not in data_arr.dims: + # no defined x and y dimensions + return data_arr + if not hasattr(area, "get_proj_vectors"): + return data_arr + x, y = area.get_proj_vectors() + + # convert to DataArrays + y_attrs = {} + x_attrs = {} + if crs is not None: + units = crs.axis_info[0].unit_name + # fix udunits/CF standard units + units = units.replace("metre", "meter") + if units == "degree": + y_attrs["units"] = "degrees_north" + x_attrs["units"] = "degrees_east" + else: + y_attrs["units"] = units + x_attrs["units"] = units + y = xr.DataArray(y, dims=("y",), attrs=y_attrs) + x = xr.DataArray(x, dims=("x",), attrs=x_attrs) + return data_arr.assign_coords(y=y, x=x) + + +def add_crs_xy_coords(data_arr, area): + """Add :class:`pyproj.crs.CRS` and x/y or lons/lats to coordinates. + + For SwathDefinition or GridDefinition areas this will add a + `crs` coordinate and coordinates for the 2D arrays of `lons` and `lats`. + + For AreaDefinition areas this will add a `crs` coordinate and the + 1-dimensional `x` and `y` coordinate variables. + + Args: + data_arr (xarray.DataArray): DataArray to add the 'crs' + coordinate. + area (pyresample.geometry.AreaDefinition): Area to get CRS + information from. + + """ + # add CRS object if pyproj 2.0+ + try: + from pyproj import CRS + except ImportError: + LOG.debug("Could not add 'crs' coordinate with pyproj<2.0") + crs = None + else: + # default lat/lon projection + latlon_proj = "+proj=latlong +datum=WGS84 +ellps=WGS84" + # otherwise get it from the area definition + if hasattr(area, "crs"): + crs = area.crs + else: + proj_str = getattr(area, "proj_str", latlon_proj) + crs = CRS.from_string(proj_str) + data_arr = data_arr.assign_coords(crs=crs) + + # Add x/y coordinates if possible + if isinstance(area, SwathDefinition): + # add lon/lat arrays for swath definitions + # SwathDefinitions created by Satpy should be assigning DataArray + # objects as the lons/lats attributes so use those directly to + # maintain original .attrs metadata (instead of converting to dask + # array). + lons = area.lons + lats = area.lats + lons.attrs.setdefault("standard_name", "longitude") + lons.attrs.setdefault("long_name", "longitude") + lons.attrs.setdefault("units", "degrees_east") + lats.attrs.setdefault("standard_name", "latitude") + lats.attrs.setdefault("long_name", "latitude") + lats.attrs.setdefault("units", "degrees_north") + # See https://github.com/pydata/xarray/issues/3068 + # data_arr = data_arr.assign_coords(longitude=lons, latitude=lats) + else: + # Gridded data (AreaDefinition/StackedAreaDefinition) + data_arr = add_xy_coords(data_arr, area, crs=crs) + return data_arr + + +def update_resampled_coords(old_data, new_data, new_area): + """Add coordinate information to newly resampled DataArray. + + Args: + old_data (xarray.DataArray): Old data before resampling. + new_data (xarray.DataArray): New data after resampling. + new_area (pyresample.geometry.BaseDefinition): Area definition + for the newly resampled data. + + """ + # copy over other non-x/y coordinates + # this *MUST* happen before we set 'crs' below otherwise any 'crs' + # coordinate in the coordinate variables we are copying will overwrite the + # 'crs' coordinate we just assigned to the data + ignore_coords = ("y", "x", "crs") + new_coords = {} + for cname, cval in old_data.coords.items(): + # we don't want coordinates that depended on the old x/y dimensions + has_ignored_dims = any(dim in cval.dims for dim in ignore_coords) + if cname in ignore_coords or has_ignored_dims: + continue + new_coords[cname] = cval + new_data = new_data.assign_coords(**new_coords) + + # add crs, x, and y coordinates + new_data = add_crs_xy_coords(new_data, new_area) + return new_data + + +# TODO: move this to pyresample.resampler +#RESAMPLERS = {"kd_tree": KDTreeResampler, +# "nearest": KDTreeResampler, +# "bilinear": BilinearResampler, +# "native": NativeResampler, +# "gradient_search": create_gradient_search_resampler, +# "bucket_avg": BucketAvg, +# "bucket_sum": BucketSum, +# "bucket_count": BucketCount, +# "bucket_fraction": BucketFraction, +# "ewa": DaskEWAResampler, +# "ewa_legacy": LegacyDaskEWAResampler, +# } + +def get_all_resampler_classes(): + """Get all available resampler classes.""" + resamplers = {} + try: + from .native import get_native_resampler_classes + resamplers.update(get_native_resampler_classes()) + except ImportError: + pass + try: + from .kdtree import get_kdtree_resampler_classes + resamplers.update(get_kdtree_resampler_classes()) + except ImportError: + pass + try: + from .bucket import get_bucket_resampler_classes + resamplers.update(get_bucket_resampler_classes()) + except ImportError: + pass + try: + from .ewa import get_ewa_resampler_classes + resamplers.update(get_ewa_resampler_classes()) + except ImportError: + pass + try: + from pyresample.gradient import create_gradient_search_resampler + resamplers["gradient_search"] = create_gradient_search_resampler + except ImportError: + pass + + return resamplers + + +# TODO: move this to pyresample +def prepare_resampler(source_area, destination_area, resampler=None, **resample_kwargs): + """Instantiate and return a resampler.""" + if resampler is None: + LOG.info("Using default KDTree resampler") + resampler = "kd_tree" + + if isinstance(resampler, PRBaseResampler): + raise ValueError("Trying to create a resampler when one already " + "exists.") + if isinstance(resampler, str): + resampler_class = get_all_resampler_classes().get(resampler, None) + if resampler_class is None: + if resampler == "gradient_search": + warnings.warn( + "Gradient search resampler not available. Maybe missing `shapely`?", + stacklevel=2 + ) + raise KeyError("Resampler '%s' not available" % resampler) + else: + resampler_class = resampler + + key = (resampler_class, + source_area, destination_area, + hash_dict(resample_kwargs).hexdigest()) + try: + resampler_instance = resamplers_cache[key] + except KeyError: + resampler_instance = resampler_class(source_area, destination_area) + resamplers_cache[key] = resampler_instance + return key, resampler_instance + + +# TODO: move this to pyresample +def resample(source_area, data, destination_area, + resampler=None, **kwargs): + """Do the resampling.""" + if not isinstance(resampler, PRBaseResampler): + # we don't use the first argument (cache key) + _, resampler_instance = prepare_resampler(source_area, + destination_area, + resampler) + else: + resampler_instance = resampler + + if isinstance(data, list): + res = [resampler_instance.resample(ds, **kwargs) for ds in data] + else: + res = resampler_instance.resample(data, **kwargs) + + return res + + +def get_fill_value(dataset): + """Get the fill value of the *dataset*, defaulting to np.nan.""" + if np.issubdtype(dataset.dtype, np.integer): + return dataset.attrs.get("_FillValue", np.nan) + return np.nan + + +def resample_dataset(dataset, destination_area, **kwargs): + """Resample *dataset* and return the resampled version. + + Args: + dataset (xarray.DataArray): Data to be resampled. + destination_area: The destination onto which to project the data, + either a full blown area definition or a string corresponding to + the name of the area as defined in the area file. + **kwargs: The extra parameters to pass to the resampler objects. + + Returns: + A resampled DataArray with updated ``.attrs["area"]`` field. The dtype + of the array is preserved. + + """ + # call the projection stuff here + try: + source_area = dataset.attrs["area"] + except KeyError: + LOG.info("Cannot reproject dataset %s, missing area info", + dataset.attrs["name"]) + + return dataset + + fill_value = kwargs.pop("fill_value", get_fill_value(dataset)) + new_data = resample(source_area, dataset, destination_area, fill_value=fill_value, **kwargs) + new_attrs = new_data.attrs + new_data.attrs = dataset.attrs.copy() + new_data.attrs.update(new_attrs) + new_data.attrs.update(area=destination_area) + + return new_data diff --git a/satpy/resample/bucket.py b/satpy/resample/bucket.py new file mode 100644 index 0000000000..e0c7ca6987 --- /dev/null +++ b/satpy/resample/bucket.py @@ -0,0 +1,221 @@ +"""Bucket resamplers.""" + +from logging import getLogger + +import dask.array as da +import numpy as np +import xarray as xr +from pyresample.resampler import BaseResampler as PRBaseResampler + +from satpy.resample.base import update_resampled_coords +from satpy.utils import get_legacy_chunk_size + +LOG = getLogger(__name__) + +CHUNK_SIZE = get_legacy_chunk_size() + + +class BucketResamplerBase(PRBaseResampler): + """Base class for bucket resampling which implements averaging.""" + + def __init__(self, source_geo_def, target_geo_def): + """Initialize bucket resampler.""" + super(BucketResamplerBase, self).__init__(source_geo_def, target_geo_def) + self.resampler = None + + def precompute(self, **kwargs): + """Create X and Y indices and store them for later use.""" + from pyresample import bucket + + LOG.debug("Initializing bucket resampler.") + source_lons, source_lats = self.source_geo_def.get_lonlats( + chunks=CHUNK_SIZE) + self.resampler = bucket.BucketResampler(self.target_geo_def, + source_lons, + source_lats) + + def compute(self, data, **kwargs): + """Call the resampling.""" + raise NotImplementedError("Use the sub-classes") + + def resample(self, data, **kwargs): # noqa: D417 + """Resample `data` by calling `precompute` and `compute` methods. + + Args: + data (xarray.DataArray): Data to be resampled + + Returns (xarray.DataArray): Data resampled to the target area + + """ + self.precompute(**kwargs) + attrs = data.attrs.copy() + data_arr = data.data + if data.ndim == 3 and data.dims[0] == "bands": + dims = ("bands", "y", "x") + # Both one and two dimensional input data results in 2D output + elif data.ndim in (1, 2): + dims = ("y", "x") + else: + dims = data.dims + LOG.debug("Resampling %s", str(data.attrs.get("_satpy_id", "unknown"))) + result = self.compute(data_arr, **kwargs) + coords = {} + if "bands" in data.coords: + coords["bands"] = data.coords["bands"] + # Fractions are returned in a dict + elif isinstance(result, dict): + coords["categories"] = sorted(result.keys()) + dims = ("categories", "y", "x") + new_result = [] + for cat in coords["categories"]: + new_result.append(result[cat]) + result = da.stack(new_result) + if result.ndim > len(dims): + result = da.squeeze(result) + + # Adjust some attributes + if "BucketFraction" in str(self): + attrs["units"] = "" + attrs["calibration"] = "" + attrs["standard_name"] = "area_fraction" + elif "BucketCount" in str(self): + attrs["units"] = "" + attrs["calibration"] = "" + attrs["standard_name"] = "number_of_observations" + + result = xr.DataArray(result, dims=dims, coords=coords, + attrs=attrs) + + return update_resampled_coords(data, result, self.target_geo_def) + + +class BucketAvg(BucketResamplerBase): + """Class for averaging bucket resampling. + + Bucket resampling calculates the average of all the values that + are closest to each bin and inside the target area. + + Parameters + ---------- + fill_value : float (default: np.nan) + Fill value to mark missing/invalid values in the input data, + as well as in the binned and averaged output data. + skipna : boolean (default: True) + If True, skips missing values (as marked by NaN or `fill_value`) for the average calculation + (similarly to Numpy's `nanmean`). Buckets containing only missing values are set to fill_value. + If False, sets the bucket to fill_value if one or more missing values are present in the bucket + (similarly to Numpy's `mean`). + In both cases, empty buckets are set to `fill_value`. + + """ + + def compute(self, data, fill_value=np.nan, skipna=True, **kwargs): # noqa: D417 + """Call the resampling. + + Args: + data (numpy.Array, dask.Array): Data to be resampled + fill_value (numpy.nan, int): fill_value. Defaults to numpy.nan + skipna (boolean): Skip NA's. Default `True` + + Returns: + dask.Array + """ + results = [] + if data.ndim == 3: + for i in range(data.shape[0]): + res = self.resampler.get_average(data[i, :, :], + fill_value=fill_value, + skipna=skipna, + **kwargs) + results.append(res) + else: + res = self.resampler.get_average(data, fill_value=fill_value, skipna=skipna, + **kwargs) + results.append(res) + + return da.stack(results) + + +class BucketSum(BucketResamplerBase): + """Class for bucket resampling which implements accumulation (sum). + + This resampler calculates the cumulative sum of all the values + that are closest to each bin and inside the target area. + + Parameters + ---------- + fill_value : float (default: np.nan) + Fill value for missing data + skipna : boolean (default: True) + If True, skips NaN values for the sum calculation + (similarly to Numpy's `nansum`). Buckets containing only NaN are set to zero. + If False, sets the bucket to NaN if one or more NaN values are present in the bucket + (similarly to Numpy's `sum`). + In both cases, empty buckets are set to 0. + + """ + + def compute(self, data, skipna=True, **kwargs): + """Call the resampling.""" + results = [] + if data.ndim == 3: + for i in range(data.shape[0]): + res = self.resampler.get_sum(data[i, :, :], skipna=skipna, + **kwargs) + results.append(res) + else: + res = self.resampler.get_sum(data, skipna=skipna, **kwargs) + results.append(res) + + return da.stack(results) + + +class BucketCount(BucketResamplerBase): + """Class for bucket resampling which implements hit-counting. + + This resampler calculates the number of occurences of the input + data closest to each bin and inside the target area. + + """ + + def compute(self, data, **kwargs): + """Call the resampling.""" + results = [] + if data.ndim == 3: + for _i in range(data.shape[0]): + res = self.resampler.get_count() + results.append(res) + else: + res = self.resampler.get_count() + results.append(res) + + return da.stack(results) + + +class BucketFraction(BucketResamplerBase): + """Class for bucket resampling to compute category fractions. + + This resampler calculates the fraction of occurences of the input + data per category. + + """ + + def compute(self, data, fill_value=np.nan, categories=None, **kwargs): + """Call the resampling.""" + if data.ndim > 2: + raise ValueError("BucketFraction not implemented for 3D datasets") + + result = self.resampler.get_fractions(data, categories=categories, + fill_value=fill_value) + + return result + + +def get_bucket_resampler_classes(): + """Get bucket resampler classes.""" + return { + "bucket_avg": BucketAvg, + "bucket_sum": BucketSum, + "bucket_count": BucketCount, + "bucket_fraction": BucketFraction + } diff --git a/satpy/resample/ewa.py b/satpy/resample/ewa.py new file mode 100644 index 0000000000..8d6345959a --- /dev/null +++ b/satpy/resample/ewa.py @@ -0,0 +1,11 @@ +"""EWA resamplers.""" + +from pyresample.ewa import DaskEWAResampler, LegacyDaskEWAResampler + + +def get_ewa_resampler_classes(): + """Get bucket resampler classes.""" + return { + "ewa": DaskEWAResampler, + "ewa_legacy": LegacyDaskEWAResampler, + } diff --git a/satpy/resample/kdtree.py b/satpy/resample/kdtree.py new file mode 100644 index 0000000000..dbb1ad43fe --- /dev/null +++ b/satpy/resample/kdtree.py @@ -0,0 +1,315 @@ +"""Resamplers based on the kdtree algorightm.""" + +import os +import warnings +from logging import getLogger + +import dask.array as da +import numpy as np +import xarray as xr +import zarr +from pyresample.resampler import BaseResampler as PRBaseResampler + +from satpy.resample.base import update_resampled_coords +from satpy.utils import get_legacy_chunk_size + +LOG = getLogger(__name__) + +CHUNK_SIZE = get_legacy_chunk_size() + +NN_COORDINATES = {"valid_input_index": ("y1", "x1"), + "valid_output_index": ("y2", "x2"), + "index_array": ("y2", "x2", "z2")} +BIL_COORDINATES = {"bilinear_s": ("x1", ), + "bilinear_t": ("x1", ), + "slices_x": ("x1", "n"), + "slices_y": ("x1", "n"), + "mask_slices": ("x1", "n"), + "out_coords_x": ("x2", ), + "out_coords_y": ("y2", )} + +class KDTreeResampler(PRBaseResampler): + """Resample using a KDTree-based nearest neighbor algorithm. + + This resampler implements on-disk caching when the `cache_dir` argument + is provided to the `resample` method. This should provide significant + performance improvements on consecutive resampling of geostationary data. + It is not recommended to provide `cache_dir` when the `mask` keyword + argument is provided to `precompute` which occurs by default for + `SwathDefinition` source areas. + + Args: + cache_dir (str): Long term storage directory for intermediate + results. + mask (bool): Force resampled data's invalid pixel mask to be used + when searching for nearest neighbor pixels. By + default this is True for SwathDefinition source + areas and False for all other area definition types. + radius_of_influence (float): Search radius cut off distance in meters + epsilon (float): Allowed uncertainty in meters. Increasing uncertainty + reduces execution time. + + """ + + def __init__(self, source_geo_def, target_geo_def): + """Init KDTreeResampler.""" + super(KDTreeResampler, self).__init__(source_geo_def, target_geo_def) + self.resampler = None + self._index_caches = {} + + def precompute(self, mask=None, radius_of_influence=None, epsilon=0, + cache_dir=None, **kwargs): + """Create a KDTree structure and store it for later use. + + Note: The `mask` keyword should be provided if geolocation may be valid + where data points are invalid. + + """ + from pyresample.kd_tree import XArrayResamplerNN + del kwargs + if mask is not None and cache_dir is not None: + LOG.warning("Mask and cache_dir both provided to nearest " + "resampler. Cached parameters are affected by " + "masked pixels. Will not cache results.") + cache_dir = None + + if radius_of_influence is None and not hasattr(self.source_geo_def, "geocentric_resolution"): + radius_of_influence = self._adjust_radius_of_influence(radius_of_influence) + + kwargs = dict(source_geo_def=self.source_geo_def, + target_geo_def=self.target_geo_def, + radius_of_influence=radius_of_influence, + neighbours=1, + epsilon=epsilon) + + if self.resampler is None: + # FIXME: We need to move all of this caching logic to pyresample + self.resampler = XArrayResamplerNN(**kwargs) + + try: + self.load_neighbour_info(cache_dir, mask=mask, **kwargs) + LOG.debug("Read pre-computed kd-tree parameters") + except IOError: + LOG.debug("Computing kd-tree parameters") + self.resampler.get_neighbour_info(mask=mask) + self.save_neighbour_info(cache_dir, mask=mask, **kwargs) + + def _adjust_radius_of_influence(self, radius_of_influence): + """Adjust radius of influence.""" + warnings.warn( + "Upgrade 'pyresample' for a more accurate default 'radius_of_influence'.", + stacklevel=3 + ) + try: + radius_of_influence = self.source_geo_def.lons.resolution * 3 + except AttributeError: + try: + radius_of_influence = max(abs(self.source_geo_def.pixel_size_x), + abs(self.source_geo_def.pixel_size_y)) * 3 + except AttributeError: + radius_of_influence = 1000 + + except TypeError: + radius_of_influence = 10000 + return radius_of_influence + + def _apply_cached_index(self, val, idx_name, persist=False): + """Reassign resampler index attributes.""" + if isinstance(val, np.ndarray): + val = da.from_array(val, chunks=CHUNK_SIZE) + elif persist and isinstance(val, da.Array): + val = val.persist() + setattr(self.resampler, idx_name, val) + return val + + def load_neighbour_info(self, cache_dir, mask=None, **kwargs): + """Read index arrays from either the in-memory or disk cache.""" + mask_name = getattr(mask, "name", None) + cached = {} + for idx_name in NN_COORDINATES: + if mask_name in self._index_caches: + cached[idx_name] = self._apply_cached_index( + self._index_caches[mask_name][idx_name], idx_name) + elif cache_dir: + try: + filename = self._create_cache_filename( + cache_dir, prefix="nn_lut-", + mask=mask_name, **kwargs) + fid = zarr.open(filename, "r") + cache = np.array(fid[idx_name]) + if idx_name == "valid_input_index": + # valid input index array needs to be boolean + cache = cache.astype(bool) + except ValueError: + raise IOError + cache = self._apply_cached_index(cache, idx_name) + cached[idx_name] = cache + else: + raise IOError + self._index_caches[mask_name] = cached + + def save_neighbour_info(self, cache_dir, mask=None, **kwargs): + """Cache resampler's index arrays if there is a cache dir.""" + if cache_dir: + mask_name = getattr(mask, "name", None) + cache = self._read_resampler_attrs() + filename = self._create_cache_filename( + cache_dir, prefix="nn_lut-", mask=mask_name, **kwargs) + LOG.info("Saving kd_tree neighbour info to %s", filename) + zarr_out = xr.Dataset() + for idx_name, coord in NN_COORDINATES.items(): + # update the cache in place with persisted dask arrays + cache[idx_name] = self._apply_cached_index(cache[idx_name], + idx_name, + persist=True) + zarr_out[idx_name] = (coord, cache[idx_name]) + + # Write indices to Zarr file + zarr_out.to_zarr(filename) + + self._index_caches[mask_name] = cache + # Delete the kdtree, it's not needed anymore + self.resampler.delayed_kdtree = None + + def _read_resampler_attrs(self): + """Read certain attributes from the resampler for caching.""" + return {attr_name: getattr(self.resampler, attr_name) + for attr_name in NN_COORDINATES} + + def compute(self, data, weight_funcs=None, fill_value=np.nan, + with_uncert=False, **kwargs): + """Resample data.""" + del kwargs + LOG.debug("Resampling %s", str(data.name)) + res = self.resampler.get_sample_from_neighbour_info(data, fill_value) + return update_resampled_coords(data, res, self.target_geo_def) + + +class BilinearResampler(PRBaseResampler): + """Resample using bilinear interpolation. + + This resampler implements on-disk caching when the `cache_dir` argument + is provided to the `resample` method. This should provide significant + performance improvements on consecutive resampling of geostationary data. + + Args: + cache_dir (str): Long term storage directory for intermediate + results. + radius_of_influence (float): Search radius cut off distance in meters + epsilon (float): Allowed uncertainty in meters. Increasing uncertainty + reduces execution time. + reduce_data (bool): Reduce the input data to (roughly) match the + target area. + + """ + + def __init__(self, source_geo_def, target_geo_def): + """Init BilinearResampler.""" + super(BilinearResampler, self).__init__(source_geo_def, target_geo_def) + self.resampler = None + + def precompute(self, mask=None, radius_of_influence=50000, epsilon=0, + reduce_data=True, cache_dir=False, **kwargs): + """Create bilinear coefficients and store them for later use.""" + try: + from pyresample.bilinear import XArrayBilinearResampler + except ImportError: + from pyresample.bilinear import XArrayResamplerBilinear as XArrayBilinearResampler + + del kwargs + del mask + + if self.resampler is None: + kwargs = dict(source_geo_def=self.source_geo_def, + target_geo_def=self.target_geo_def, + radius_of_influence=radius_of_influence, + neighbours=32, + epsilon=epsilon) + + self.resampler = XArrayBilinearResampler(**kwargs) + try: + self.load_bil_info(cache_dir, **kwargs) + LOG.debug("Loaded bilinear parameters") + except IOError: + LOG.debug("Computing bilinear parameters") + self.resampler.get_bil_info() + LOG.debug("Saving bilinear parameters.") + self.save_bil_info(cache_dir, **kwargs) + + def load_bil_info(self, cache_dir, **kwargs): + """Load bilinear resampling info from cache directory.""" + if cache_dir: + filename = self._create_cache_filename(cache_dir, + prefix="bil_lut-", + **kwargs) + try: + self.resampler.load_resampling_info(filename) + except AttributeError: + warnings.warn( + "Bilinear resampler can't handle caching, " + "please upgrade Pyresample to 0.17.0 or newer.", + stacklevel=2 + ) + raise IOError + else: + raise IOError + + def save_bil_info(self, cache_dir, **kwargs): + """Save bilinear resampling info to cache directory.""" + if cache_dir: + filename = self._create_cache_filename(cache_dir, + prefix="bil_lut-", + **kwargs) + # There are some old caches, move them out of the way + if os.path.exists(filename): + _move_existing_caches(cache_dir, filename) + LOG.info("Saving BIL neighbour info to %s", filename) + try: + self.resampler.save_resampling_info(filename) + except AttributeError: + warnings.warn( + "Bilinear resampler can't handle caching, " + "please upgrade Pyresample to 0.17.0 or newer.", + stacklevel=2 + ) + + def compute(self, data, fill_value=None, **kwargs): + """Resample the given data using bilinear interpolation.""" + del kwargs + + if fill_value is None: + fill_value = data.attrs.get("_FillValue") + target_shape = self.target_geo_def.shape + + res = self.resampler.get_sample_from_bil_info(data, + fill_value=fill_value, + output_shape=target_shape) + + return update_resampled_coords(data, res, self.target_geo_def) + + +def _move_existing_caches(cache_dir, filename): + """Move existing cache files out of the way.""" + import os + import shutil + old_cache_dir = os.path.join(cache_dir, "moved_by_satpy") + try: + os.makedirs(old_cache_dir) + except FileExistsError: + pass + try: + shutil.move(filename, old_cache_dir) + except shutil.Error: + os.remove(os.path.join(old_cache_dir, + os.path.basename(filename))) + shutil.move(filename, old_cache_dir) + LOG.warning("Old cache file was moved to %s", old_cache_dir) + + +def get_kdtree_resampler_classes(): + """Get resampler classes based on kdtree.""" + return { + "kd_tree": KDTreeResampler, + "nearest": KDTreeResampler, + "bilinear": BilinearResampler + } diff --git a/satpy/resample/native.py b/satpy/resample/native.py new file mode 100644 index 0000000000..ae8c55047f --- /dev/null +++ b/satpy/resample/native.py @@ -0,0 +1,172 @@ +"""Native resampler.""" + +import warnings +from math import lcm # type: ignore + +import dask.array as da +import numpy as np +import xarray as xr +from pyresample.resampler import BaseResampler as PRBaseResampler + +from satpy.resample.base import update_resampled_coords +from satpy.utils import PerformanceWarning, get_legacy_chunk_size + +CHUNK_SIZE = get_legacy_chunk_size() + + +class NativeResampler(PRBaseResampler): + """Expand or reduce input datasets to be the same shape. + + If data is higher resolution (more pixels) than the destination area + then data is averaged to match the destination resolution. + + If data is lower resolution (less pixels) than the destination area + then data is repeated to match the destination resolution. + + This resampler does not perform any caching or masking due to the + simplicity of the operations. + + """ + + def resample(self, data, cache_dir=None, mask_area=False, **kwargs): + """Run NativeResampler.""" + # use 'mask_area' with a default of False. It wouldn't do anything. + return super(NativeResampler, self).resample(data, + cache_dir=cache_dir, + mask_area=mask_area, + **kwargs) + + @classmethod + def _expand_reduce(cls, d_arr, repeats): + """Expand reduce.""" + if not isinstance(d_arr, da.Array): + d_arr = da.from_array(d_arr, chunks=CHUNK_SIZE) + if all(x == 1 for x in repeats.values()): + return d_arr + if all(x >= 1 for x in repeats.values()): + return _replicate(d_arr, repeats) + if all(x <= 1 for x in repeats.values()): + # reduce + y_size = 1. / repeats[0] + x_size = 1. / repeats[1] + return _aggregate(d_arr, y_size, x_size) + raise ValueError("Must either expand or reduce in both " + "directions") + + def compute(self, data, expand=True, **kwargs): + """Resample data with NativeResampler.""" + if isinstance(self.target_geo_def, (list, tuple)): + # find the highest/lowest area among the provided + test_func = max if expand else min + target_geo_def = test_func(self.target_geo_def, + key=lambda x: x.shape) + else: + target_geo_def = self.target_geo_def + + # convert xarray backed with numpy array to dask array + if "x" not in data.dims or "y" not in data.dims: + if data.ndim not in [2, 3]: + raise ValueError("Can only handle 2D or 3D arrays without dimensions.") + # assume rows is the second to last axis + y_axis = data.ndim - 2 + x_axis = data.ndim - 1 + else: + y_axis = data.dims.index("y") + x_axis = data.dims.index("x") + + out_shape = target_geo_def.shape + in_shape = data.shape + y_repeats = out_shape[0] / float(in_shape[y_axis]) + x_repeats = out_shape[1] / float(in_shape[x_axis]) + repeats = {axis_idx: 1. for axis_idx in range(data.ndim) if axis_idx not in [y_axis, x_axis]} + repeats[y_axis] = y_repeats + repeats[x_axis] = x_repeats + + d_arr = self._expand_reduce(data.data, repeats) + new_data = xr.DataArray(d_arr, dims=data.dims) + return update_resampled_coords(data, new_data, target_geo_def) + + +def _aggregate(d, y_size, x_size): + """Average every 4 elements (2x2) in a 2D array.""" + if d.ndim != 2: + # we can't guarantee what blocks we are getting and how + # it should be reshaped to do the averaging. + raise ValueError("Can't aggregrate (reduce) data arrays with " + "more than 2 dimensions.") + if not (x_size.is_integer() and y_size.is_integer()): + raise ValueError("Aggregation factors are not integers") + y_size = int(y_size) + x_size = int(x_size) + d = _rechunk_if_nonfactor_chunks(d, y_size, x_size) + new_chunks = (tuple(int(x / y_size) for x in d.chunks[0]), + tuple(int(x / x_size) for x in d.chunks[1])) + return da.core.map_blocks(_mean, d, y_size, x_size, + meta=np.array((), dtype=d.dtype), + dtype=d.dtype, chunks=new_chunks) + + +def _mean(data, y_size, x_size): + rows, cols = data.shape + new_shape = (int(rows / y_size), int(y_size), + int(cols / x_size), int(x_size)) + data_mean = np.nanmean(data.reshape(new_shape), axis=(1, 3)) + return data_mean + + +def _replicate(d_arr, repeats): + """Repeat data pixels by the per-axis factors specified.""" + repeated_chunks = _get_replicated_chunk_sizes(d_arr, repeats) + d_arr = d_arr.map_blocks(_repeat_by_factor, + meta=np.array((), dtype=d_arr.dtype), + dtype=d_arr.dtype, + chunks=repeated_chunks) + return d_arr + + +def _get_replicated_chunk_sizes(d_arr, repeats): + repeated_chunks = [] + for axis, axis_chunks in enumerate(d_arr.chunks): + factor = repeats[axis] + if not factor.is_integer(): + raise ValueError("Expand factor must be a whole number") + repeated_chunks.append(tuple(x * int(factor) for x in axis_chunks)) + return tuple(repeated_chunks) + + +def _repeat_by_factor(data, block_info=None): + if block_info is None: + return data + out_shape = block_info[None]["chunk-shape"] + out_data = data + for axis, axis_size in enumerate(out_shape): + in_size = data.shape[axis] + out_data = np.repeat(out_data, int(axis_size / in_size), axis=axis) + return out_data + + +def _rechunk_if_nonfactor_chunks(dask_arr, y_size, x_size): + need_rechunk = False + new_chunks = list(dask_arr.chunks) + for dim_idx, agg_size in enumerate([y_size, x_size]): + if dask_arr.shape[dim_idx] % agg_size != 0: + raise ValueError("Aggregation requires arrays with shapes divisible by the factor.") + for chunk_size in dask_arr.chunks[dim_idx]: + if chunk_size % agg_size != 0: + need_rechunk = True + new_dim_chunk = lcm(chunk_size, agg_size) + new_chunks[dim_idx] = new_dim_chunk + if need_rechunk: + warnings.warn( + "Array chunk size is not divisible by aggregation factor. " + "Re-chunking to continue native resampling.", + PerformanceWarning, + stacklevel=5 + ) + dask_arr = dask_arr.rechunk(tuple(new_chunks)) + return dask_arr + + +def get_native_resampler_classes(): + """Get classes based on native resampler.""" + return {"native": NativeResampler} diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 11a9644eb4..f388f533f6 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -131,8 +131,8 @@ def test_type_preserve(self): class TestKDTreeResampler(unittest.TestCase): """Test the kd-tree resampler.""" - @mock.patch("satpy.resample.xr.Dataset") - @mock.patch("satpy.resample.zarr.open") + @mock.patch("satpy.resample.kdtree.xr.Dataset") + @mock.patch("satpy.resample.kdtree.zarr.open") @mock.patch("satpy.resample.KDTreeResampler._create_cache_filename") @mock.patch("pyresample.kd_tree.XArrayResamplerNN") def test_kd_resampling(self, xr_resampler, create_filename, zarr_open, @@ -327,7 +327,7 @@ def test_expand_without_dims_4D(self): class TestBilinearResampler(unittest.TestCase): """Test the bilinear resampler.""" - @mock.patch("satpy.resample._move_existing_caches") + @mock.patch("satpy.resample.kdtree._move_existing_caches") @mock.patch("satpy.resample.BilinearResampler._create_cache_filename") @mock.patch("pyresample.bilinear.XArrayBilinearResampler") def test_bil_resampling(self, xr_resampler, create_filename, @@ -413,7 +413,7 @@ def test_move_existing_caches(self): zarr_file = os.path.join(the_dir, "test.zarr") with open(zarr_file, "w") as fid: fid.write("42") - from satpy.resample import _move_existing_caches + from satpy.resample.kdtree import _move_existing_caches _move_existing_caches(the_dir, zarr_file) assert not os.path.exists(zarr_file) assert os.path.exists(os.path.join(the_dir, "moved_by_satpy", "test.zarr")) From b65098a961eb2792072a6e0212b74903bbf5374d Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 12 May 2025 17:54:55 +0300 Subject: [PATCH 02/30] Update resample imports --- satpy/readers/yaml_reader.py | 2 +- satpy/scene.py | 2 +- satpy/tests/test_resample.py | 24 ++++++++++++------------ satpy/writers/__init__.py | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/satpy/readers/yaml_reader.py b/satpy/readers/yaml_reader.py index 8e683f7aa9..86fa5cb08f 100644 --- a/satpy/readers/yaml_reader.py +++ b/satpy/readers/yaml_reader.py @@ -45,7 +45,7 @@ from satpy.aux_download import DataDownloadMixin from satpy.dataset import DataID, DataQuery, get_key from satpy.dataset.dataid import default_co_keys_config, default_id_keys_config, get_keys_from_config -from satpy.resample import add_crs_xy_coords, get_area_def +from satpy.resample.base import add_crs_xy_coords, get_area_def from satpy.utils import recursive_dict_update logger = logging.getLogger(__name__) diff --git a/satpy/scene.py b/satpy/scene.py index 0e46ce1544..d41dcf3922 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -34,7 +34,7 @@ from satpy.dependency_tree import DependencyTree from satpy.node import CompositorNode, MissingDependencies, ReaderNode from satpy.readers import load_readers -from satpy.resample import get_area_def, prepare_resampler, resample_dataset +from satpy.resample.base import get_area_def, prepare_resampler, resample_dataset from satpy.utils import convert_remote_files_to_fsspec, get_storage_options_from_reader_kwargs from satpy.writers import load_writer diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index f388f533f6..6da45d239d 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -28,7 +28,7 @@ import xarray as xr from pyproj import CRS -from satpy.resample import NativeResampler +from satpy.resample.native import NativeResampler def get_test_data(input_shape=(100, 50), output_shape=(200, 100), output_proj=None, @@ -109,7 +109,7 @@ def test_type_preserve(self): """Check that the type of resampled datasets is preserved.""" from pyresample.geometry import SwathDefinition - from satpy.resample import resample_dataset + from satpy.resample.base import resample_dataset source_area = SwathDefinition(xr.DataArray(da.arange(4, chunks=5).reshape((2, 2)), dims=["y", "x"]), xr.DataArray(da.arange(4, chunks=5).reshape((2, 2)), dims=["y", "x"])) dest_area = SwathDefinition(xr.DataArray(da.arange(4, chunks=5).reshape((2, 2)) + .0001, dims=["y", "x"]), @@ -133,12 +133,12 @@ class TestKDTreeResampler(unittest.TestCase): @mock.patch("satpy.resample.kdtree.xr.Dataset") @mock.patch("satpy.resample.kdtree.zarr.open") - @mock.patch("satpy.resample.KDTreeResampler._create_cache_filename") + @mock.patch("satpy.resample.kdtree.KDTreeResampler._create_cache_filename") @mock.patch("pyresample.kd_tree.XArrayResamplerNN") def test_kd_resampling(self, xr_resampler, create_filename, zarr_open, xr_dset): """Test the kd resampler.""" - from satpy.resample import KDTreeResampler + from satpy.resample.kdtree import KDTreeResampler data, source_area, swath_data, source_swath, target_area = get_test_data() mock_dset = mock.MagicMock() xr_dset.return_value = mock_dset @@ -328,12 +328,12 @@ class TestBilinearResampler(unittest.TestCase): """Test the bilinear resampler.""" @mock.patch("satpy.resample.kdtree._move_existing_caches") - @mock.patch("satpy.resample.BilinearResampler._create_cache_filename") + @mock.patch("satpy.resample.kdtree.BilinearResampler._create_cache_filename") @mock.patch("pyresample.bilinear.XArrayBilinearResampler") def test_bil_resampling(self, xr_resampler, create_filename, move_existing_caches): """Test the bilinear resampler.""" - from satpy.resample import BilinearResampler + from satpy.resample.kdtree import BilinearResampler data, source_area, swath_data, source_swath, target_area = get_test_data() # Test that bilinear resampling info calculation is called @@ -432,7 +432,7 @@ def test_area_def_coordinates(self): """Test coordinates being added with an AreaDefinition.""" from pyresample.geometry import AreaDefinition - from satpy.resample import add_crs_xy_coords + from satpy.resample.base import add_crs_xy_coords area_def = AreaDefinition( "test", "test", "test", {"proj": "lcc", "lat_1": 25, "lat_0": 25}, 100, 200, [-100, -100, 100, 100] @@ -498,7 +498,7 @@ def test_swath_def_coordinates(self): """Test coordinates being added with an SwathDefinition.""" from pyresample.geometry import SwathDefinition - from satpy.resample import add_crs_xy_coords + from satpy.resample.base import add_crs_xy_coords lons_data = da.random.random((200, 100), chunks=50) lats_data = da.random.random((200, 100), chunks=50) lons = xr.DataArray(lons_data, attrs={"units": "degrees_east"}, @@ -536,7 +536,7 @@ class TestBucketAvg(unittest.TestCase): def setUp(self): """Create fake area definitions and resampler to be tested.""" - from satpy.resample import BucketAvg + from satpy.resample.bucket import BucketAvg get_lonlats = mock.MagicMock() get_lonlats.return_value = (1, 2) get_proj_vectors = mock.MagicMock() @@ -648,7 +648,7 @@ class TestBucketSum(unittest.TestCase): def setUp(self): """Create fake area definitions and resampler to be tested.""" - from satpy.resample import BucketSum + from satpy.resample.bucket import BucketSum get_lonlats = mock.MagicMock() get_lonlats.return_value = (1, 2) self.source_geo_def = mock.MagicMock(get_lonlats=get_lonlats) @@ -700,7 +700,7 @@ class TestBucketCount(unittest.TestCase): def setUp(self): """Create fake area definitions and resampler to be tested.""" - from satpy.resample import BucketCount + from satpy.resample.bucket import BucketCount get_lonlats = mock.MagicMock() get_lonlats.return_value = (1, 2) self.source_geo_def = mock.MagicMock(get_lonlats=get_lonlats) @@ -740,7 +740,7 @@ class TestBucketFraction(unittest.TestCase): def setUp(self): """Create fake area definitions and resampler to be tested.""" - from satpy.resample import BucketFraction + from satpy.resample.bucket import BucketFraction get_lonlats = mock.MagicMock() get_lonlats.return_value = (1, 2) get_proj_vectors = mock.MagicMock() diff --git a/satpy/writers/__init__.py b/satpy/writers/__init__.py index bc922de3c1..f729adea1f 100644 --- a/satpy/writers/__init__.py +++ b/satpy/writers/__init__.py @@ -36,7 +36,7 @@ from satpy.aux_download import DataDownloadMixin from satpy.enhancements.enhancer import Enhancer from satpy.plugin_base import Plugin -from satpy.resample import get_area_def +from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size, get_logger LOG = get_logger(__name__) From f3712abae5a6bd6def3be8ac10fd9eda54d6b6ad Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 12 May 2025 17:58:24 +0300 Subject: [PATCH 03/30] Update and move resampling docstring --- satpy/resample/__init__.py | 146 +++++++++++++++++++++++++++++++++++- satpy/resample/base.py | 148 +------------------------------------ 2 files changed, 147 insertions(+), 147 deletions(-) diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index fa024d05d7..dee93be15f 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -13,7 +13,151 @@ # # You should have received a copy of the GNU General Public License along with # satpy. If not, see . -"""Writers subpackage.""" +"""Resampling in Satpy. + +Satpy provides multiple resampling algorithms for resampling geolocated +data to uniform projected grids. The easiest way to perform resampling in +Satpy is through the :class:`~satpy.scene.Scene` object's +:meth:`~satpy.scene.Scene.resample` method. Additional utility functions are +also available to assist in resampling data. Below is more information on +resampling with Satpy as well as links to the relevant API documentation for +available keyword arguments. + +Resampling algorithms +--------------------- + +.. csv-table:: Available Resampling Algorithms + :header-rows: 1 + :align: center + + "Resampler", "Description", "Related" + "nearest", "Nearest Neighbor", :class:`~satpy.resample.kdtree.KDTreeResampler` + "ewa", "Elliptical Weighted Averaging", :class:`~pyresample.ewa.DaskEWAResampler` + "ewa_legacy", "Elliptical Weighted Averaging (Legacy)", :class:`~pyresample.ewa.LegacyDaskEWAResampler` + "native", "Native", :class:`~satpy.resample.native.NativeResampler` + "bilinear", "Bilinear", :class:`~satpy.resample.kdtree.BilinearResampler` + "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.bucket.BucketAvg` + "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.bucket.BucketSum` + "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.bucket.BucketCount` + "bucket_fraction", "Fraction Bucket Resampling", :class:`~satpy.resample.bucket.BucketFraction` + "gradient_search", "Gradient Search Resampling", :meth:`~pyresample.gradient.create_gradient_search_resampler` + +The resampling algorithm used can be specified with the ``resampler`` keyword +argument and defaults to ``nearest``: + +.. code-block:: python + + >>> scn = Scene(...) + >>> euro_scn = scn.resample('euro4', resampler='nearest') + +.. warning:: + + Some resampling algorithms expect certain forms of data. For example, the + EWA resampling expects polar-orbiting swath data and prefers if the data + can be broken in to "scan lines". See the API documentation for a specific + algorithm for more information. + +Resampling for comparison and composites +---------------------------------------- + +While all the resamplers can be used to put datasets of different resolutions +on to a common area, the 'native' resampler is designed to match datasets to +one resolution in the dataset's original projection. This is extremely useful +when generating composites between bands of different resolutions. + +.. code-block:: python + + >>> new_scn = scn.resample(resampler='native') + +By default this resamples to the +:meth:`highest resolution area ` (smallest footprint per +pixel) shared between the loaded datasets. You can easily specify the lowest +resolution area: + +.. code-block:: python + + >>> new_scn = scn.resample(scn.coarsest_area(), resampler='native') + +Providing an area that is neither the minimum or maximum resolution area +may work, but behavior is currently undefined. + +Caching for geostationary data +------------------------------ + +Satpy will do its best to reuse calculations performed to resample datasets, +but it can only do this for the current processing and will lose this +information when the process/script ends. Some resampling algorithms, like +``nearest`` and ``bilinear``, can benefit by caching intermediate data on disk in the directory +specified by `cache_dir` and using it next time. This is most beneficial with +geostationary satellite data where the locations of the source data and the +target pixels don't change over time. + + >>> new_scn = scn.resample('euro4', cache_dir='/path/to/cache_dir') + +See the documentation for specific algorithms to see availability and +limitations of caching for that algorithm. + +Create custom area definition +----------------------------- + +See :class:`pyresample.geometry.AreaDefinition` for information on creating +areas that can be passed to the resample method:: + + >>> from pyresample.geometry import AreaDefinition + >>> my_area = AreaDefinition(...) + >>> local_scene = scn.resample(my_area) + +Resize area definition in pixels +-------------------------------- + +Sometimes you may want to create a small image with fixed size in pixels. +For example, to create an image of (y, x) pixels : + + >>> small_scn = scn.resample(scn.finest_area().copy(height=y, width=x), resampler="nearest") + + +.. warning:: + + Be aware that resizing with native resampling (``resampler="native"``) only + works if the new size is an integer factor of the original input size. For example, + multiplying the size by 2 or dividing the size by 2. Multiplying by 1.5 would + not be allowed. + + +Create dynamic area definition +------------------------------ + +See :class:`pyresample.geometry.DynamicAreaDefinition` for more information. + +Examples coming soon... + +Store area definitions +---------------------- + +Area definitions can be saved to a custom YAML file (see +`pyresample's writing to disk `_) +and loaded using pyresample's utility methods +(`pyresample's loading from disk `_):: + + >>> from pyresample import load_area + >>> my_area = load_area('my_areas.yaml', 'my_area') + +Or using :func:`satpy.resample.base.get_area_def`, which will search through all +``areas.yaml`` files in your ``SATPY_CONFIG_PATH``:: + + >>> from satpy.base.resample import get_area_def + >>> area_eurol = get_area_def("eurol") + +For examples of area definitions, see the file ``etc/areas.yaml`` that is +included with Satpy and where all the area definitions shipped with Satpy are +defined. The section below gives an overview of these area definitions. + +Area definitions included in Satpy +---------------------------------- + +.. include:: /area_def_list.rst + +""" from __future__ import annotations import warnings diff --git a/satpy/resample/base.py b/satpy/resample/base.py index 32e5654934..ca29d8563c 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2015-2018 Satpy developers +# Copyright (c) 2015-2025 Satpy developers # # This file is part of satpy. # @@ -14,151 +14,7 @@ # # You should have received a copy of the GNU General Public License along with # satpy. If not, see . -"""Resampling in Satpy. - -Satpy provides multiple resampling algorithms for resampling geolocated -data to uniform projected grids. The easiest way to perform resampling in -Satpy is through the :class:`~satpy.scene.Scene` object's -:meth:`~satpy.scene.Scene.resample` method. Additional utility functions are -also available to assist in resampling data. Below is more information on -resampling with Satpy as well as links to the relevant API documentation for -available keyword arguments. - -Resampling algorithms ---------------------- - -.. csv-table:: Available Resampling Algorithms - :header-rows: 1 - :align: center - - "Resampler", "Description", "Related" - "nearest", "Nearest Neighbor", :class:`~satpy.resample.KDTreeResampler` - "ewa", "Elliptical Weighted Averaging", :class:`~pyresample.ewa.DaskEWAResampler` - "ewa_legacy", "Elliptical Weighted Averaging (Legacy)", :class:`~pyresample.ewa.LegacyDaskEWAResampler` - "native", "Native", :class:`~satpy.resample.NativeResampler` - "bilinear", "Bilinear", :class:`~satpy.resample.BilinearResampler` - "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.BucketAvg` - "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.BucketSum` - "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.BucketCount` - "bucket_fraction", "Fraction Bucket Resampling", :class:`~satpy.resample.BucketFraction` - "gradient_search", "Gradient Search Resampling", :meth:`~pyresample.gradient.create_gradient_search_resampler` - -The resampling algorithm used can be specified with the ``resampler`` keyword -argument and defaults to ``nearest``: - -.. code-block:: python - - >>> scn = Scene(...) - >>> euro_scn = scn.resample('euro4', resampler='nearest') - -.. warning:: - - Some resampling algorithms expect certain forms of data. For example, the - EWA resampling expects polar-orbiting swath data and prefers if the data - can be broken in to "scan lines". See the API documentation for a specific - algorithm for more information. - -Resampling for comparison and composites ----------------------------------------- - -While all the resamplers can be used to put datasets of different resolutions -on to a common area, the 'native' resampler is designed to match datasets to -one resolution in the dataset's original projection. This is extremely useful -when generating composites between bands of different resolutions. - -.. code-block:: python - - >>> new_scn = scn.resample(resampler='native') - -By default this resamples to the -:meth:`highest resolution area ` (smallest footprint per -pixel) shared between the loaded datasets. You can easily specify the lowest -resolution area: - -.. code-block:: python - - >>> new_scn = scn.resample(scn.coarsest_area(), resampler='native') - -Providing an area that is neither the minimum or maximum resolution area -may work, but behavior is currently undefined. - -Caching for geostationary data ------------------------------- - -Satpy will do its best to reuse calculations performed to resample datasets, -but it can only do this for the current processing and will lose this -information when the process/script ends. Some resampling algorithms, like -``nearest`` and ``bilinear``, can benefit by caching intermediate data on disk in the directory -specified by `cache_dir` and using it next time. This is most beneficial with -geostationary satellite data where the locations of the source data and the -target pixels don't change over time. - - >>> new_scn = scn.resample('euro4', cache_dir='/path/to/cache_dir') - -See the documentation for specific algorithms to see availability and -limitations of caching for that algorithm. - -Create custom area definition ------------------------------ - -See :class:`pyresample.geometry.AreaDefinition` for information on creating -areas that can be passed to the resample method:: - - >>> from pyresample.geometry import AreaDefinition - >>> my_area = AreaDefinition(...) - >>> local_scene = scn.resample(my_area) - -Resize area definition in pixels --------------------------------- - -Sometimes you may want to create a small image with fixed size in pixels. -For example, to create an image of (y, x) pixels : - - >>> small_scn = scn.resample(scn.finest_area().copy(height=y, width=x), resampler="nearest") - - -.. warning:: - - Be aware that resizing with native resampling (``resampler="native"``) only - works if the new size is an integer factor of the original input size. For example, - multiplying the size by 2 or dividing the size by 2. Multiplying by 1.5 would - not be allowed. - - -Create dynamic area definition ------------------------------- - -See :class:`pyresample.geometry.DynamicAreaDefinition` for more information. - -Examples coming soon... - -Store area definitions ----------------------- - -Area definitions can be saved to a custom YAML file (see -`pyresample's writing to disk `_) -and loaded using pyresample's utility methods -(`pyresample's loading from disk `_):: - - >>> from pyresample import load_area - >>> my_area = load_area('my_areas.yaml', 'my_area') - -Or using :func:`satpy.resample.get_area_def`, which will search through all -``areas.yaml`` files in your ``SATPY_CONFIG_PATH``:: - - >>> from satpy.resample import get_area_def - >>> area_eurol = get_area_def("eurol") - -For examples of area definitions, see the file ``etc/areas.yaml`` that is -included with Satpy and where all the area definitions shipped with Satpy are -defined. The section below gives an overview of these area definitions. - -Area definitions included in Satpy ----------------------------------- - -.. include:: /area_def_list.rst - -""" +"""Base resampling functionality.""" import hashlib import json import warnings From 0b59ca8c73434ec196c14cb617092128b01e1a45 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 10:26:56 +0300 Subject: [PATCH 04/30] Fix DOS line-endings, fix get_area_def import --- satpy/readers/li_l2_nc.py | 356 +++++++++++++++++++------------------- 1 file changed, 178 insertions(+), 178 deletions(-) diff --git a/satpy/readers/li_l2_nc.py b/satpy/readers/li_l2_nc.py index 196df2834a..824f05f009 100644 --- a/satpy/readers/li_l2_nc.py +++ b/satpy/readers/li_l2_nc.py @@ -1,178 +1,178 @@ -# Copyright (c) 2022 Satpy developers -# -# satpy is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# satpy is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with satpy. If not, see . - -"""MTG Lightning Imager (LI) Level-2 (L2) unified reader. - -This reader supports reading all the products from the LI L2 -processing level: - -Point products: - * L2-LE Lightning Events - * L2-LEF Lightning Events Filtered - * L2-LFL Lightning Flashes - * L2-LGR Lightning Groups -Accumulated products: - * L2-AF Accumulated Flashes - * L2-AFA Accumulated Flash Area - * L2-AFR Accumulated Flash Radiance - -Per default, the unified LI L2 reader returns the data either as an 1-D array -or as a 2-D array depending on the product type. - -Point-based products (LE, LEF, LFL, LGR) are "classic" lightning products -consisting of values with attached latitude and longitude coordinates. -Hence, these products are provided by the reader as 1-D arrays, -with a ``pyresample.geometry.SwathDefinition`` area -attribute containing the points lat-lon coordinates. - -Accumulated products (AF, AFA, AFR) are the result of temporal accumulation -of events (e.g. over 30 seconds), and are gridded in the FCI 2km geostationary -projection grid, in order to facilitate the synergistic usage together with FCI. -Compared to the point products, the gridded products also give information -about the spatial extent of the lightning activity. -Hence, these products are provided by the reader as 2-D arrays in the FCI 2km -grid as per intended usage, with a ``pyresample.geometry.AreaDefinition`` area -attribute containing the grid geolocation information. -In this way, the products can directly be overlaid to FCI data. - -.. note:: - - L2 accumulated products retrieved from the archive - (that have "ARC" in the filename) contain data for 20 repeat cycles (timesteps) covering - 10 minutes of sensing time. For these files, when loading the main variables - (``accumulated_flash_area``, ``flash_accumulation``, ``flash_radiance``), - the reader will cumulate (sum up) the data for the entire sensing period of the file. - A solution to access easily each timestep is being worked on. See https://github.com/pytroll/satpy/issues/2878 - for possible workarounds in the meanwhile. - - -If needed, the accumulated products can also be accessed as 1-d array by -setting the reader kwarg ``with_area_definition=False``, -e.g.:: - - scn = Scene(filenames=filenames, reader="li_l2_nc", reader_kwargs={'with_area_definition': False}) - -For both 1-d and 2-d products, the lat-lon coordinates of the points/grid pixels -can be accessed using e.g. -``scn['dataset_name'].attrs['area'].get_lonlats()``. - -See the LI L2 Product User Guide `PUG`_ for more information. - -.. _PUG: https://www-dr.eumetsat.int/media/49348 - -""" - -import logging - -import dask.array as da -import numpy as np -import xarray as xr - -from satpy.readers.li_base_nc import LINCFileHandler -from satpy.resample import get_area_def -from satpy.utils import get_legacy_chunk_size - -logger = logging.getLogger(__name__) -LI_GRID_SHAPE = (5568, 5568) -CHUNK_SIZE = get_legacy_chunk_size() - - -class LIL2NCFileHandler(LINCFileHandler): - """Implementation class for the unified LI L2 satpy reader.""" - - def __init__(self, filename, filename_info, filetype_info, with_area_definition=True): - """Initialize LIL2NCFileHandler.""" - super(LIL2NCFileHandler, self).__init__(filename, filename_info, filetype_info) - - if with_area_definition and not self.prod_in_accumulation_grid: - logger.debug(f"The current product {filetype_info['file_desc']['product_type']} " - f"is not an accumulated product so it will not be regridded.") - self.with_area_def = False - else: - self.with_area_def = with_area_definition - - def get_dataset(self, dataset_id, ds_info=None): - """Get the dataset and apply gridding if requested.""" - data_array = super().get_dataset(dataset_id, ds_info) - # variable_patterns are compiled to regex patterns - # hence search variable name from swath_coordinate - var_with_swath_coord = self.is_var_with_swath_coord(dataset_id) - if var_with_swath_coord and self.with_area_def: - data_array = self.get_array_on_fci_grid(data_array) - else : - if data_array is not None: - if not isinstance(data_array.data, da.Array): - data_array.data = da.from_array(data_array.data) - return data_array - - def get_area_def(self, dsid): - """Compute area definition for a dataset, only supported for accumulated products.""" - var_with_swath_coord = self.is_var_with_swath_coord(dsid) - if var_with_swath_coord and self.with_area_def: - return get_area_def("mtg_fci_fdss_2km") - - raise NotImplementedError("Area definition is not supported for non-accumulated products.") - - def is_var_with_swath_coord(self, dsid): - """Check if the variable corresponding to this dataset is listed as variable with swath coordinates.""" - # since the patterns are compiled to regex we use the search() method below to find matches - with_swath_coords = any([p.search(dsid["name"]) is not None for p in self.swath_coordinates["patterns"]]) - return with_swath_coords - - def get_array_on_fci_grid(self, data_array: xr.DataArray): - """Obtain the accumulated products as a (sparse) 2-d array. - - The array has the shape of the FCI 2 km grid (5568x5568px), - and will have an AreaDefinition attached. - """ - # Integer values without the application of scale_factor and add_offset - # hence no projection/index calculation. - # Note that x and y have origin in the south-west corner of the image - # and start with index 1. - - rows = self.get_measured_variable("y") - cols = self.get_measured_variable("x") - attrs = data_array.attrs - - rows, cols = da.compute(rows, cols) - - # origin is in the south-west corner, so we flip the rows (applying - # offset of 1 implicitly) - # And we manually offset the columns by 1 too: - rows = (LI_GRID_SHAPE[0] - rows.astype(int)) - cols = cols.astype(int) - 1 - - # initialise results array with zeros - data_2d = da.zeros((LI_GRID_SHAPE[0], LI_GRID_SHAPE[1]), dtype=data_array.dtype, - chunks=(LI_GRID_SHAPE[0], LI_GRID_SHAPE[1])) - - # insert the data. If a pixel has more than one entry, the values are added up (np.add.at functionality) - data_2d = da.map_blocks(_np_add_at_wrapper, data_2d, (rows, cols), data_array, - dtype=data_array.dtype, - chunks=(LI_GRID_SHAPE[0], LI_GRID_SHAPE[1])) - data_2d = data_2d.astype(np.float32) - data_2d = da.where(data_2d > 0, data_2d, np.nan) - xarr = xr.DataArray(da.asarray(data_2d, CHUNK_SIZE), dims=("y", "x")) - - xarr.attrs = attrs - return xarr - - -def _np_add_at_wrapper(target_array, indices, data): - # copy needed for correct computation in-place - ta = target_array.copy() - # add.at is not implemented in xarray, so we explicitly need the np.array - np.add.at(ta, indices, data.values) - return ta +# Copyright (c) 2022 Satpy developers +# +# satpy is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# satpy is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with satpy. If not, see . + +"""MTG Lightning Imager (LI) Level-2 (L2) unified reader. + +This reader supports reading all the products from the LI L2 +processing level: + +Point products: + * L2-LE Lightning Events + * L2-LEF Lightning Events Filtered + * L2-LFL Lightning Flashes + * L2-LGR Lightning Groups +Accumulated products: + * L2-AF Accumulated Flashes + * L2-AFA Accumulated Flash Area + * L2-AFR Accumulated Flash Radiance + +Per default, the unified LI L2 reader returns the data either as an 1-D array +or as a 2-D array depending on the product type. + +Point-based products (LE, LEF, LFL, LGR) are "classic" lightning products +consisting of values with attached latitude and longitude coordinates. +Hence, these products are provided by the reader as 1-D arrays, +with a ``pyresample.geometry.SwathDefinition`` area +attribute containing the points lat-lon coordinates. + +Accumulated products (AF, AFA, AFR) are the result of temporal accumulation +of events (e.g. over 30 seconds), and are gridded in the FCI 2km geostationary +projection grid, in order to facilitate the synergistic usage together with FCI. +Compared to the point products, the gridded products also give information +about the spatial extent of the lightning activity. +Hence, these products are provided by the reader as 2-D arrays in the FCI 2km +grid as per intended usage, with a ``pyresample.geometry.AreaDefinition`` area +attribute containing the grid geolocation information. +In this way, the products can directly be overlaid to FCI data. + +.. note:: + + L2 accumulated products retrieved from the archive + (that have "ARC" in the filename) contain data for 20 repeat cycles (timesteps) covering + 10 minutes of sensing time. For these files, when loading the main variables + (``accumulated_flash_area``, ``flash_accumulation``, ``flash_radiance``), + the reader will cumulate (sum up) the data for the entire sensing period of the file. + A solution to access easily each timestep is being worked on. See https://github.com/pytroll/satpy/issues/2878 + for possible workarounds in the meanwhile. + + +If needed, the accumulated products can also be accessed as 1-d array by +setting the reader kwarg ``with_area_definition=False``, +e.g.:: + + scn = Scene(filenames=filenames, reader="li_l2_nc", reader_kwargs={'with_area_definition': False}) + +For both 1-d and 2-d products, the lat-lon coordinates of the points/grid pixels +can be accessed using e.g. +``scn['dataset_name'].attrs['area'].get_lonlats()``. + +See the LI L2 Product User Guide `PUG`_ for more information. + +.. _PUG: https://www-dr.eumetsat.int/media/49348 + +""" + +import logging + +import dask.array as da +import numpy as np +import xarray as xr + +from satpy.readers.li_base_nc import LINCFileHandler +from satpy.resample.base import get_area_def +from satpy.utils import get_legacy_chunk_size + +logger = logging.getLogger(__name__) +LI_GRID_SHAPE = (5568, 5568) +CHUNK_SIZE = get_legacy_chunk_size() + + +class LIL2NCFileHandler(LINCFileHandler): + """Implementation class for the unified LI L2 satpy reader.""" + + def __init__(self, filename, filename_info, filetype_info, with_area_definition=True): + """Initialize LIL2NCFileHandler.""" + super(LIL2NCFileHandler, self).__init__(filename, filename_info, filetype_info) + + if with_area_definition and not self.prod_in_accumulation_grid: + logger.debug(f"The current product {filetype_info['file_desc']['product_type']} " + f"is not an accumulated product so it will not be regridded.") + self.with_area_def = False + else: + self.with_area_def = with_area_definition + + def get_dataset(self, dataset_id, ds_info=None): + """Get the dataset and apply gridding if requested.""" + data_array = super().get_dataset(dataset_id, ds_info) + # variable_patterns are compiled to regex patterns + # hence search variable name from swath_coordinate + var_with_swath_coord = self.is_var_with_swath_coord(dataset_id) + if var_with_swath_coord and self.with_area_def: + data_array = self.get_array_on_fci_grid(data_array) + else : + if data_array is not None: + if not isinstance(data_array.data, da.Array): + data_array.data = da.from_array(data_array.data) + return data_array + + def get_area_def(self, dsid): + """Compute area definition for a dataset, only supported for accumulated products.""" + var_with_swath_coord = self.is_var_with_swath_coord(dsid) + if var_with_swath_coord and self.with_area_def: + return get_area_def("mtg_fci_fdss_2km") + + raise NotImplementedError("Area definition is not supported for non-accumulated products.") + + def is_var_with_swath_coord(self, dsid): + """Check if the variable corresponding to this dataset is listed as variable with swath coordinates.""" + # since the patterns are compiled to regex we use the search() method below to find matches + with_swath_coords = any([p.search(dsid["name"]) is not None for p in self.swath_coordinates["patterns"]]) + return with_swath_coords + + def get_array_on_fci_grid(self, data_array: xr.DataArray): + """Obtain the accumulated products as a (sparse) 2-d array. + + The array has the shape of the FCI 2 km grid (5568x5568px), + and will have an AreaDefinition attached. + """ + # Integer values without the application of scale_factor and add_offset + # hence no projection/index calculation. + # Note that x and y have origin in the south-west corner of the image + # and start with index 1. + + rows = self.get_measured_variable("y") + cols = self.get_measured_variable("x") + attrs = data_array.attrs + + rows, cols = da.compute(rows, cols) + + # origin is in the south-west corner, so we flip the rows (applying + # offset of 1 implicitly) + # And we manually offset the columns by 1 too: + rows = (LI_GRID_SHAPE[0] - rows.astype(int)) + cols = cols.astype(int) - 1 + + # initialise results array with zeros + data_2d = da.zeros((LI_GRID_SHAPE[0], LI_GRID_SHAPE[1]), dtype=data_array.dtype, + chunks=(LI_GRID_SHAPE[0], LI_GRID_SHAPE[1])) + + # insert the data. If a pixel has more than one entry, the values are added up (np.add.at functionality) + data_2d = da.map_blocks(_np_add_at_wrapper, data_2d, (rows, cols), data_array, + dtype=data_array.dtype, + chunks=(LI_GRID_SHAPE[0], LI_GRID_SHAPE[1])) + data_2d = data_2d.astype(np.float32) + data_2d = da.where(data_2d > 0, data_2d, np.nan) + xarr = xr.DataArray(da.asarray(data_2d, CHUNK_SIZE), dims=("y", "x")) + + xarr.attrs = attrs + return xarr + + +def _np_add_at_wrapper(target_array, indices, data): + # copy needed for correct computation in-place + ta = target_array.copy() + # add.at is not implemented in xarray, so we explicitly need the np.array + np.add.at(ta, indices, data.values) + return ta From 9051519e4afbbf9c926c7655b0a1cbdcf02d10a9 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 10:27:11 +0300 Subject: [PATCH 05/30] Fix imports to use new resampler modules --- satpy/composites/__init__.py | 2 +- satpy/readers/cmsaf_claas2.py | 2 +- satpy/readers/eum_l2_bufr.py | 2 +- satpy/readers/fci_l2_nc.py | 2 +- satpy/readers/gerb_l2_hr_h5.py | 2 +- satpy/readers/hsaf_h5.py | 2 +- satpy/tests/modifier_tests/test_parallax.py | 2 +- satpy/tests/test_composites.py | 4 ++-- satpy/tests/test_config.py | 4 ++-- satpy/tests/test_testing.py | 2 +- satpy/tests/utils.py | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index ea33de3bc3..0eaf9cf4b3 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -1598,7 +1598,7 @@ def __init__(self, name, filename=None, url=None, known_hash=None, area=None, # self._known_hash = known_hash self.area = None if area is not None: - from satpy.resample import get_area_def + from satpy.resample.base import get_area_def self.area = get_area_def(area) super(StaticImageCompositor, self).__init__(name, **kwargs) diff --git a/satpy/readers/cmsaf_claas2.py b/satpy/readers/cmsaf_claas2.py index 9bf3ca3deb..2b60ff18c6 100644 --- a/satpy/readers/cmsaf_claas2.py +++ b/satpy/readers/cmsaf_claas2.py @@ -2,7 +2,7 @@ import datetime -from satpy.resample import get_area_def +from satpy.resample.base import get_area_def from .netcdf_utils import NetCDF4FileHandler diff --git a/satpy/readers/eum_l2_bufr.py b/satpy/readers/eum_l2_bufr.py index 15c594bf73..49470f8157 100644 --- a/satpy/readers/eum_l2_bufr.py +++ b/satpy/readers/eum_l2_bufr.py @@ -36,7 +36,7 @@ from satpy.readers.eum_base import get_service_mode, recarray2dict from satpy.readers.file_handlers import BaseFileHandler from satpy.readers.seviri_base import mpef_product_header -from satpy.resample import get_area_def +from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size try: diff --git a/satpy/readers/fci_l2_nc.py b/satpy/readers/fci_l2_nc.py index f71e55f921..99db81b630 100644 --- a/satpy/readers/fci_l2_nc.py +++ b/satpy/readers/fci_l2_nc.py @@ -27,7 +27,7 @@ from satpy.readers._geos_area import get_geos_area_naming, make_ext from satpy.readers.eum_base import get_service_mode from satpy.readers.file_handlers import BaseFileHandler -from satpy.resample import get_area_def +from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size logger = logging.getLogger(__name__) diff --git a/satpy/readers/gerb_l2_hr_h5.py b/satpy/readers/gerb_l2_hr_h5.py index 6b3ceb5e0a..0381561aa6 100644 --- a/satpy/readers/gerb_l2_hr_h5.py +++ b/satpy/readers/gerb_l2_hr_h5.py @@ -26,7 +26,7 @@ import logging from satpy.readers.hdf5_utils import HDF5FileHandler -from satpy.resample import get_area_def +from satpy.resample.base import get_area_def LOG = logging.getLogger(__name__) diff --git a/satpy/readers/hsaf_h5.py b/satpy/readers/hsaf_h5.py index 25b42ec6a5..87c146b69a 100644 --- a/satpy/readers/hsaf_h5.py +++ b/satpy/readers/hsaf_h5.py @@ -27,7 +27,7 @@ import xarray as xr from satpy.readers.file_handlers import BaseFileHandler -from satpy.resample import get_area_def +from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size LOG = logging.getLogger(__name__) diff --git a/satpy/tests/modifier_tests/test_parallax.py b/satpy/tests/modifier_tests/test_parallax.py index 63ddbd8caf..db97c4774f 100644 --- a/satpy/tests/modifier_tests/test_parallax.py +++ b/satpy/tests/modifier_tests/test_parallax.py @@ -499,7 +499,7 @@ def test_parallax_modifier_interface(self): res = modif([fake_bt, cth_clear], optional_datasets=[]) np.testing.assert_allclose(res, fake_bt) with unittest.mock.patch("satpy.modifiers.parallax.resample_dataset") as smp: - smp.side_effect = satpy.resample.resample_dataset + smp.side_effect = satpy.resample.base.resample_dataset modif([fake_bt, cth_clear], optional_datasets=[]) assert smp.call_args_list[0].kwargs["radius_of_influence"] == 48_000 assert smp.call_args_list[1].kwargs["radius_of_influence"] == 49_000 diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 0a75ba072c..d28a5499b0 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -145,7 +145,7 @@ def test_almost_equal_geo_coordinates(self): """ from satpy.composites import CompositeBase - from satpy.resample import add_crs_xy_coords + from satpy.resample.base import add_crs_xy_coords comp = CompositeBase("test_comp") data_arr1 = self._get_test_ds(shape=(2, 2)) @@ -1405,7 +1405,7 @@ def test_add_bands_p_l(self): class TestStaticImageCompositor(unittest.TestCase): """Test case for the static compositor.""" - @mock.patch("satpy.resample.get_area_def") + @mock.patch("satpy.resample.base.get_area_def") def test_init(self, get_area_def): """Test the initializiation of static compositor.""" from satpy.composites import StaticImageCompositor diff --git a/satpy/tests/test_config.py b/satpy/tests/test_config.py index 9a0fff9999..852ce71e57 100644 --- a/satpy/tests/test_config.py +++ b/satpy/tests/test_config.py @@ -50,7 +50,7 @@ def test_areas_pyproj(self): from pyresample import parse_area_file from pyresample.geometry import SwathDefinition - from satpy.resample import get_area_file + from satpy.resample.base import get_area_file lons = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) lats = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) @@ -82,7 +82,7 @@ def test_areas_rasterio(self): from pyresample import parse_area_file from pyresample.geometry import SwathDefinition - from satpy.resample import get_area_file + from satpy.resample.base import get_area_file lons = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) lats = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) diff --git a/satpy/tests/test_testing.py b/satpy/tests/test_testing.py index 278dfd5b4f..63aee37a68 100644 --- a/satpy/tests/test_testing.py +++ b/satpy/tests/test_testing.py @@ -4,7 +4,7 @@ import xarray as xr from satpy import Scene -from satpy.resample import get_area_def +from satpy.resample.base import get_area_def from satpy.testing import fake_satpy_reading diff --git a/satpy/tests/utils.py b/satpy/tests/utils.py index ea8b01c0df..17beebbdf7 100644 --- a/satpy/tests/utils.py +++ b/satpy/tests/utils.py @@ -373,7 +373,7 @@ def _get_fake_scene_area(arr, area): def _get_did_for_fake_scene(area, arr, extra_attrs, daskify): """Add instance to fake scene. Helper for make_fake_scene.""" - from satpy.resample import add_crs_xy_coords + from satpy.resample.base import add_crs_xy_coords if isinstance(arr, DataArray): new = arr.copy() # don't change attributes of input new.attrs.update(extra_attrs) From 41f5ef07124742df10d0e4ed2f8ccb4f5f3b7129 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 10:42:54 +0300 Subject: [PATCH 06/30] Simplify prepare_resampler() --- satpy/resample/base.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/satpy/resample/base.py b/satpy/resample/base.py index ca29d8563c..7f8bd77170 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -256,13 +256,7 @@ def prepare_resampler(source_area, destination_area, resampler=None, **resample_ "exists.") if isinstance(resampler, str): resampler_class = get_all_resampler_classes().get(resampler, None) - if resampler_class is None: - if resampler == "gradient_search": - warnings.warn( - "Gradient search resampler not available. Maybe missing `shapely`?", - stacklevel=2 - ) - raise KeyError("Resampler '%s' not available" % resampler) + _check_resampler_class(resampler_class, resampler) else: resampler_class = resampler @@ -277,6 +271,17 @@ def prepare_resampler(source_area, destination_area, resampler=None, **resample_ return key, resampler_instance +def _check_resampler_class(resampler_class, resampler): + if resampler_class is not None: + return + if resampler == "gradient_search": + warnings.warn( + "Gradient search resampler not available. Maybe missing `shapely`?", + stacklevel=2 + ) + raise KeyError("Resampler '%s' not available" % resampler) + + # TODO: move this to pyresample def resample(source_area, data, destination_area, resampler=None, **kwargs): From 43f4b33bcadc4dacfa6113b114f6d632c3050b45 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 10:47:10 +0300 Subject: [PATCH 07/30] Simplify add_xy_coords() --- satpy/resample/base.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/satpy/resample/base.py b/satpy/resample/base.py index 7f8bd77170..b8c867b9e0 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -98,6 +98,15 @@ def add_xy_coords(data_arr, area, crs=None): # convert to DataArrays y_attrs = {} x_attrs = {} + + _check_crs_units(crs, x_attrs, y_attrs) + + y = xr.DataArray(y, dims=("y",), attrs=y_attrs) + x = xr.DataArray(x, dims=("x",), attrs=x_attrs) + return data_arr.assign_coords(y=y, x=x) + + +def _check_crs_units(crs, x_attrs, y_attrs): if crs is not None: units = crs.axis_info[0].unit_name # fix udunits/CF standard units @@ -108,9 +117,6 @@ def add_xy_coords(data_arr, area, crs=None): else: y_attrs["units"] = units x_attrs["units"] = units - y = xr.DataArray(y, dims=("y",), attrs=y_attrs) - x = xr.DataArray(x, dims=("x",), attrs=x_attrs) - return data_arr.assign_coords(y=y, x=x) def add_crs_xy_coords(data_arr, area): From 80b16e1b630909cf2238d9b8e6c1a3646dd4be41 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 11:11:35 +0300 Subject: [PATCH 08/30] Simplify BucketResamplerBase.resample() --- satpy/resample/bucket.py | 60 +++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/satpy/resample/bucket.py b/satpy/resample/bucket.py index e0c7ca6987..2ac3cac014 100644 --- a/satpy/resample/bucket.py +++ b/satpy/resample/bucket.py @@ -50,29 +50,19 @@ def resample(self, data, **kwargs): # noqa: D417 self.precompute(**kwargs) attrs = data.attrs.copy() data_arr = data.data - if data.ndim == 3 and data.dims[0] == "bands": - dims = ("bands", "y", "x") - # Both one and two dimensional input data results in 2D output - elif data.ndim in (1, 2): - dims = ("y", "x") - else: - dims = data.dims + dims = _get_dims(data) LOG.debug("Resampling %s", str(data.attrs.get("_satpy_id", "unknown"))) result = self.compute(data_arr, **kwargs) - coords = {} - if "bands" in data.coords: - coords["bands"] = data.coords["bands"] - # Fractions are returned in a dict - elif isinstance(result, dict): - coords["categories"] = sorted(result.keys()) - dims = ("categories", "y", "x") - new_result = [] - for cat in coords["categories"]: - new_result.append(result[cat]) - result = da.stack(new_result) - if result.ndim > len(dims): - result = da.squeeze(result) + coords, result, dims = _check_coords_results_dims(result, data, dims) + + self._adjust_attrs(attrs) + + result = xr.DataArray(result, dims=dims, coords=coords, + attrs=attrs) + return update_resampled_coords(data, result, self.target_geo_def) + + def _adjust_attrs(self, attrs): # Adjust some attributes if "BucketFraction" in str(self): attrs["units"] = "" @@ -83,10 +73,34 @@ def resample(self, data, **kwargs): # noqa: D417 attrs["calibration"] = "" attrs["standard_name"] = "number_of_observations" - result = xr.DataArray(result, dims=dims, coords=coords, - attrs=attrs) - return update_resampled_coords(data, result, self.target_geo_def) +def _get_dims(data): + if data.ndim == 3 and data.dims[0] == "bands": + dims = ("bands", "y", "x") + # Both one and two dimensional input data results in 2D output + elif data.ndim in (1, 2): + dims = ("y", "x") + else: + dims = data.dims + return dims + + +def _check_coords_results_dims(result, data, dims): + coords = {} + if "bands" in data.coords: + coords["bands"] = data.coords["bands"] + # Fractions are returned in a dict + elif isinstance(result, dict): + coords["categories"] = sorted(result.keys()) + dims = ("categories", "y", "x") + new_result = [] + for cat in coords["categories"]: + new_result.append(result[cat]) + result = da.stack(new_result) + if result.ndim > len(dims): + result = da.squeeze(result) + + return coords, result, dims class BucketAvg(BucketResamplerBase): From b16e83447d412f8361768a1def465f0fe20660f4 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 11:17:57 +0300 Subject: [PATCH 09/30] Simplify KDTreeResampler.load_neighbour_info() --- satpy/resample/kdtree.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/satpy/resample/kdtree.py b/satpy/resample/kdtree.py index dbb1ad43fe..8b135e3c62 100644 --- a/satpy/resample/kdtree.py +++ b/satpy/resample/kdtree.py @@ -131,23 +131,28 @@ def load_neighbour_info(self, cache_dir, mask=None, **kwargs): cached[idx_name] = self._apply_cached_index( self._index_caches[mask_name][idx_name], idx_name) elif cache_dir: - try: - filename = self._create_cache_filename( - cache_dir, prefix="nn_lut-", - mask=mask_name, **kwargs) - fid = zarr.open(filename, "r") - cache = np.array(fid[idx_name]) - if idx_name == "valid_input_index": - # valid input index array needs to be boolean - cache = cache.astype(bool) - except ValueError: - raise IOError + cache = self._load_neighbour_info_from_cache( + cache_dir, idx_name, mask_name, **kwargs) cache = self._apply_cached_index(cache, idx_name) cached[idx_name] = cache else: raise IOError self._index_caches[mask_name] = cached + def _load_neighbour_info_from_cache(self, cache_dir, idx_name, mask_name, **kwargs): + try: + filename = self._create_cache_filename( + cache_dir, prefix="nn_lut-", + mask=mask_name, **kwargs) + fid = zarr.open(filename, "r") + cache = np.array(fid[idx_name]) + if idx_name == "valid_input_index": + # valid input index array needs to be boolean + cache = cache.astype(bool) + except ValueError: + raise IOError + return cache + def save_neighbour_info(self, cache_dir, mask=None, **kwargs): """Cache resampler's index arrays if there is a cache dir.""" if cache_dir: From d68afb561be716e4b3277799f26b376ccaaf1c96 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 11:23:34 +0300 Subject: [PATCH 10/30] Simplify NativeResampler.compute() --- satpy/resample/native.py | 45 +++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/satpy/resample/native.py b/satpy/resample/native.py index ae8c55047f..6dc410033f 100644 --- a/satpy/resample/native.py +++ b/satpy/resample/native.py @@ -64,29 +64,40 @@ def compute(self, data, expand=True, **kwargs): target_geo_def = self.target_geo_def # convert xarray backed with numpy array to dask array - if "x" not in data.dims or "y" not in data.dims: - if data.ndim not in [2, 3]: - raise ValueError("Can only handle 2D or 3D arrays without dimensions.") - # assume rows is the second to last axis - y_axis = data.ndim - 2 - x_axis = data.ndim - 1 - else: - y_axis = data.dims.index("y") - x_axis = data.dims.index("x") - - out_shape = target_geo_def.shape - in_shape = data.shape - y_repeats = out_shape[0] / float(in_shape[y_axis]) - x_repeats = out_shape[1] / float(in_shape[x_axis]) - repeats = {axis_idx: 1. for axis_idx in range(data.ndim) if axis_idx not in [y_axis, x_axis]} - repeats[y_axis] = y_repeats - repeats[x_axis] = x_repeats + repeats = _get_repeats(target_geo_def, data) d_arr = self._expand_reduce(data.data, repeats) new_data = xr.DataArray(d_arr, dims=data.dims) return update_resampled_coords(data, new_data, target_geo_def) +def _get_repeats(target_geo_def, data): + y_axis, x_axis = _get_axes(data) + out_shape = target_geo_def.shape + in_shape = data.shape + y_repeats = out_shape[0] / float(in_shape[y_axis]) + x_repeats = out_shape[1] / float(in_shape[x_axis]) + repeats = {axis_idx: 1. for axis_idx in range(data.ndim) if axis_idx not in [y_axis, x_axis]} + repeats[y_axis] = y_repeats + repeats[x_axis] = x_repeats + + return repeats + + +def _get_axes(data): + if "x" not in data.dims or "y" not in data.dims: + if data.ndim not in [2, 3]: + raise ValueError("Can only handle 2D or 3D arrays without dimensions.") + # assume rows is the second to last axis + y_axis = data.ndim - 2 + x_axis = data.ndim - 1 + else: + y_axis = data.dims.index("y") + x_axis = data.dims.index("x") + + return y_axis, x_axis + + def _aggregate(d, y_size, x_size): """Average every 4 elements (2x2) in a 2D array.""" if d.ndim != 2: From ea5d24b364b885354dbb7893cf7b61bfdc6e0941 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 11:30:30 +0300 Subject: [PATCH 11/30] Simplify native._rechunk_if_nonfactor_chunks() --- satpy/resample/native.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/satpy/resample/native.py b/satpy/resample/native.py index 6dc410033f..9c26328c26 100644 --- a/satpy/resample/native.py +++ b/satpy/resample/native.py @@ -157,16 +157,11 @@ def _repeat_by_factor(data, block_info=None): def _rechunk_if_nonfactor_chunks(dask_arr, y_size, x_size): - need_rechunk = False new_chunks = list(dask_arr.chunks) for dim_idx, agg_size in enumerate([y_size, x_size]): if dask_arr.shape[dim_idx] % agg_size != 0: raise ValueError("Aggregation requires arrays with shapes divisible by the factor.") - for chunk_size in dask_arr.chunks[dim_idx]: - if chunk_size % agg_size != 0: - need_rechunk = True - new_dim_chunk = lcm(chunk_size, agg_size) - new_chunks[dim_idx] = new_dim_chunk + need_rechunk = _check_chunking(new_chunks, dask_arr, dim_idx, agg_size) if need_rechunk: warnings.warn( "Array chunk size is not divisible by aggregation factor. " @@ -178,6 +173,16 @@ def _rechunk_if_nonfactor_chunks(dask_arr, y_size, x_size): return dask_arr +def _check_chunking(new_chunks, dask_arr, dim_idx, agg_size): + need_rechunk = False + for chunk_size in dask_arr.chunks[dim_idx]: + if chunk_size % agg_size != 0: + need_rechunk = True + new_dim_chunk = lcm(chunk_size, agg_size) + new_chunks[dim_idx] = new_dim_chunk + return need_rechunk + + def get_native_resampler_classes(): """Get classes based on native resampler.""" return {"native": NativeResampler} From c71f33d7a26799174dd99d8dbcf65ea3477c2661 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 11:58:07 +0300 Subject: [PATCH 12/30] Simplify _check_crs_units and add_xy_coords() more --- satpy/resample/base.py | 58 +++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/satpy/resample/base.py b/satpy/resample/base.py index b8c867b9e0..d4aaf38cc4 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -107,16 +107,17 @@ def add_xy_coords(data_arr, area, crs=None): def _check_crs_units(crs, x_attrs, y_attrs): - if crs is not None: - units = crs.axis_info[0].unit_name - # fix udunits/CF standard units - units = units.replace("metre", "meter") - if units == "degree": - y_attrs["units"] = "degrees_north" - x_attrs["units"] = "degrees_east" - else: - y_attrs["units"] = units - x_attrs["units"] = units + if crs is None: + return + units = crs.axis_info[0].unit_name + # fix udunits/CF standard units + units = units.replace("metre", "meter") + if units == "degree": + y_attrs["units"] = "degrees_north" + x_attrs["units"] = "degrees_east" + else: + y_attrs["units"] = units + x_attrs["units"] = units def add_crs_xy_coords(data_arr, area): @@ -135,22 +136,7 @@ def add_crs_xy_coords(data_arr, area): information from. """ - # add CRS object if pyproj 2.0+ - try: - from pyproj import CRS - except ImportError: - LOG.debug("Could not add 'crs' coordinate with pyproj<2.0") - crs = None - else: - # default lat/lon projection - latlon_proj = "+proj=latlong +datum=WGS84 +ellps=WGS84" - # otherwise get it from the area definition - if hasattr(area, "crs"): - crs = area.crs - else: - proj_str = getattr(area, "proj_str", latlon_proj) - crs = CRS.from_string(proj_str) - data_arr = data_arr.assign_coords(crs=crs) + crs, data_arr = _add_crs(area, data_arr) # Add x/y coordinates if possible if isinstance(area, SwathDefinition): @@ -175,6 +161,26 @@ def add_crs_xy_coords(data_arr, area): return data_arr +def _add_crs(area, data_arr): + # add CRS object if pyproj 2.0+ + try: + from pyproj import CRS + except ImportError: + LOG.debug("Could not add 'crs' coordinate with pyproj<2.0") + crs = None + else: + # default lat/lon projection + latlon_proj = "+proj=latlong +datum=WGS84 +ellps=WGS84" + # otherwise get it from the area definition + if hasattr(area, "crs"): + crs = area.crs + else: + proj_str = getattr(area, "proj_str", latlon_proj) + crs = CRS.from_string(proj_str) + data_arr = data_arr.assign_coords(crs=crs) + return crs, data_arr + + def update_resampled_coords(old_data, new_data, new_area): """Add coordinate information to newly resampled DataArray. From 86a4def0dd4f04cfa9efee8d702d0b5fed7c2ad8 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 12:10:36 +0300 Subject: [PATCH 13/30] Reduce cyclomatic complecity of NativeResampler._expand_reduce() --- satpy/resample/native.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/satpy/resample/native.py b/satpy/resample/native.py index 9c26328c26..60f39c6a13 100644 --- a/satpy/resample/native.py +++ b/satpy/resample/native.py @@ -39,8 +39,7 @@ def resample(self, data, cache_dir=None, mask_area=False, **kwargs): @classmethod def _expand_reduce(cls, d_arr, repeats): """Expand reduce.""" - if not isinstance(d_arr, da.Array): - d_arr = da.from_array(d_arr, chunks=CHUNK_SIZE) + d_arr = _ensure_dask_array(d_arr) if all(x == 1 for x in repeats.values()): return d_arr if all(x >= 1 for x in repeats.values()): @@ -71,6 +70,12 @@ def compute(self, data, expand=True, **kwargs): return update_resampled_coords(data, new_data, target_geo_def) +def _ensure_dask_array(d_arr): + if not isinstance(d_arr, da.Array): + d_arr = da.from_array(d_arr, chunks=CHUNK_SIZE) + return d_arr + + def _get_repeats(target_geo_def, data): y_axis, x_axis = _get_axes(data) out_shape = target_geo_def.shape From 44ee39b9c122da80218cebfd5ed078ecf4f495e2 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 13 May 2025 12:28:43 +0300 Subject: [PATCH 14/30] Make base.hash_dict a private function --- satpy/resample/__init__.py | 1 - satpy/resample/base.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index dee93be15f..84b9ce0a7d 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -185,7 +185,6 @@ def __getattr__(name: str) -> Any: new_submod = "bucket" obj = getattr(bucket, name) elif name in ( - "hash_dict", "get_area_file", "get_area_def", "add_xy_coords", diff --git a/satpy/resample/base.py b/satpy/resample/base.py index d4aaf38cc4..d385ae90fc 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -36,7 +36,7 @@ resamplers_cache: "WeakValueDictionary[tuple, object]" = WeakValueDictionary() -def hash_dict(the_dict, the_hash=None): +def _hash_dict(the_dict, the_hash=None): """Calculate a hash for a dictionary.""" if the_hash is None: the_hash = hashlib.sha1() # nosec @@ -274,7 +274,7 @@ def prepare_resampler(source_area, destination_area, resampler=None, **resample_ key = (resampler_class, source_area, destination_area, - hash_dict(resample_kwargs).hexdigest()) + _hash_dict(resample_kwargs).hexdigest()) try: resampler_instance = resamplers_cache[key] except KeyError: From e33ea4bdd70cc731165a6268f103cd82bf723346 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 16 May 2025 14:44:21 +0300 Subject: [PATCH 15/30] Make get_area_file and update_resampled_coords private functions --- satpy/resample/__init__.py | 2 -- satpy/resample/base.py | 6 +++--- satpy/resample/bucket.py | 4 ++-- satpy/resample/kdtree.py | 6 +++--- satpy/resample/native.py | 4 ++-- satpy/tests/writer_tests/test_awips_tiled.py | 6 +++--- 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index 84b9ce0a7d..113ee8e3d7 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -189,10 +189,8 @@ def __getattr__(name: str) -> Any: "get_area_def", "add_xy_coords", "add_crs_xy_coords", - "update_resampled_coords", "resample", "prepare_resampler", - "get_fill_value", "resample_dataset" ): from . import base diff --git a/satpy/resample/base.py b/satpy/resample/base.py index d385ae90fc..4afdf5865d 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -181,7 +181,7 @@ def _add_crs(area, data_arr): return crs, data_arr -def update_resampled_coords(old_data, new_data, new_area): +def _update_resampled_coords(old_data, new_data, new_area): """Add coordinate information to newly resampled DataArray. Args: @@ -314,7 +314,7 @@ def resample(source_area, data, destination_area, return res -def get_fill_value(dataset): +def _get_fill_value(dataset): """Get the fill value of the *dataset*, defaulting to np.nan.""" if np.issubdtype(dataset.dtype, np.integer): return dataset.attrs.get("_FillValue", np.nan) @@ -345,7 +345,7 @@ def resample_dataset(dataset, destination_area, **kwargs): return dataset - fill_value = kwargs.pop("fill_value", get_fill_value(dataset)) + fill_value = kwargs.pop("fill_value", _get_fill_value(dataset)) new_data = resample(source_area, dataset, destination_area, fill_value=fill_value, **kwargs) new_attrs = new_data.attrs new_data.attrs = dataset.attrs.copy() diff --git a/satpy/resample/bucket.py b/satpy/resample/bucket.py index 2ac3cac014..ea5192b416 100644 --- a/satpy/resample/bucket.py +++ b/satpy/resample/bucket.py @@ -7,7 +7,7 @@ import xarray as xr from pyresample.resampler import BaseResampler as PRBaseResampler -from satpy.resample.base import update_resampled_coords +from satpy.resample.base import _update_resampled_coords from satpy.utils import get_legacy_chunk_size LOG = getLogger(__name__) @@ -60,7 +60,7 @@ def resample(self, data, **kwargs): # noqa: D417 result = xr.DataArray(result, dims=dims, coords=coords, attrs=attrs) - return update_resampled_coords(data, result, self.target_geo_def) + return _update_resampled_coords(data, result, self.target_geo_def) def _adjust_attrs(self, attrs): # Adjust some attributes diff --git a/satpy/resample/kdtree.py b/satpy/resample/kdtree.py index 8b135e3c62..5f5694e21a 100644 --- a/satpy/resample/kdtree.py +++ b/satpy/resample/kdtree.py @@ -10,7 +10,7 @@ import zarr from pyresample.resampler import BaseResampler as PRBaseResampler -from satpy.resample.base import update_resampled_coords +from satpy.resample.base import _update_resampled_coords from satpy.utils import get_legacy_chunk_size LOG = getLogger(__name__) @@ -187,7 +187,7 @@ def compute(self, data, weight_funcs=None, fill_value=np.nan, del kwargs LOG.debug("Resampling %s", str(data.name)) res = self.resampler.get_sample_from_neighbour_info(data, fill_value) - return update_resampled_coords(data, res, self.target_geo_def) + return _update_resampled_coords(data, res, self.target_geo_def) class BilinearResampler(PRBaseResampler): @@ -290,7 +290,7 @@ def compute(self, data, fill_value=None, **kwargs): fill_value=fill_value, output_shape=target_shape) - return update_resampled_coords(data, res, self.target_geo_def) + return _update_resampled_coords(data, res, self.target_geo_def) def _move_existing_caches(cache_dir, filename): diff --git a/satpy/resample/native.py b/satpy/resample/native.py index 60f39c6a13..572fd5c609 100644 --- a/satpy/resample/native.py +++ b/satpy/resample/native.py @@ -8,7 +8,7 @@ import xarray as xr from pyresample.resampler import BaseResampler as PRBaseResampler -from satpy.resample.base import update_resampled_coords +from satpy.resample.base import _update_resampled_coords from satpy.utils import PerformanceWarning, get_legacy_chunk_size CHUNK_SIZE = get_legacy_chunk_size() @@ -67,7 +67,7 @@ def compute(self, data, expand=True, **kwargs): d_arr = self._expand_reduce(data.data, repeats) new_data = xr.DataArray(d_arr, dims=data.dims) - return update_resampled_coords(data, new_data, target_geo_def) + return _update_resampled_coords(data, new_data, target_geo_def) def _ensure_dask_array(d_arr): diff --git a/satpy/tests/writer_tests/test_awips_tiled.py b/satpy/tests/writer_tests/test_awips_tiled.py index 364d0c6b8e..bfc09f2964 100644 --- a/satpy/tests/writer_tests/test_awips_tiled.py +++ b/satpy/tests/writer_tests/test_awips_tiled.py @@ -30,8 +30,6 @@ import xarray as xr from pyproj import CRS -from satpy.resample import update_resampled_coords - START_TIME = dt.datetime(2018, 1, 1, 12, 0, 0) END_TIME = START_TIME + dt.timedelta(minutes=20) @@ -137,6 +135,8 @@ def _get_test_data(shape=(200, 100), chunks=50): def _get_test_lcc_data(dask_arr, area_def, extra_attrs=None): + from satpy.resample import _update_resampled_coords + attrs = dict( name="test_ds", platform_name="PLAT", @@ -154,7 +154,7 @@ def _get_test_lcc_data(dask_arr, area_def, extra_attrs=None): dims=("y", "x") if dask_arr.ndim == 2 else ("bands", "y", "x"), attrs=attrs, ) - return update_resampled_coords(ds, ds, area_def) + return _update_resampled_coords(ds, ds, area_def) class TestAWIPSTiledWriter: From 412d8480df502fcb95c9539bf15c497b19008401 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 16 May 2025 15:05:40 +0300 Subject: [PATCH 16/30] Fix import --- satpy/tests/writer_tests/test_awips_tiled.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/writer_tests/test_awips_tiled.py b/satpy/tests/writer_tests/test_awips_tiled.py index bfc09f2964..d5d97844c8 100644 --- a/satpy/tests/writer_tests/test_awips_tiled.py +++ b/satpy/tests/writer_tests/test_awips_tiled.py @@ -135,7 +135,7 @@ def _get_test_data(shape=(200, 100), chunks=50): def _get_test_lcc_data(dask_arr, area_def, extra_attrs=None): - from satpy.resample import _update_resampled_coords + from satpy.resample.base import _update_resampled_coords attrs = dict( name="test_ds", From d7e4e9eea2a0826a75cfb9ad75d78db688da47b4 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 16 May 2025 20:03:57 +0300 Subject: [PATCH 17/30] Move get_area_def and get_area_file to a new module satpy.area_utils --- satpy/area_utils.py | 43 ++++++++++++++++++++++++++++++++++ satpy/composites/__init__.py | 2 +- satpy/readers/cmsaf_claas2.py | 2 +- satpy/readers/eum_l2_bufr.py | 2 +- satpy/readers/fci_l2_nc.py | 2 +- satpy/readers/gerb_l2_hr_h5.py | 2 +- satpy/readers/hsaf_h5.py | 2 +- satpy/readers/li_l2_nc.py | 2 +- satpy/readers/yaml_reader.py | 3 ++- satpy/resample/__init__.py | 25 ++++++++++++-------- satpy/resample/base.py | 26 -------------------- satpy/scene.py | 3 ++- satpy/tests/test_config.py | 4 ++-- satpy/tests/test_testing.py | 2 +- satpy/writers/__init__.py | 2 +- 15 files changed, 73 insertions(+), 49 deletions(-) create mode 100644 satpy/area_utils.py diff --git a/satpy/area_utils.py b/satpy/area_utils.py new file mode 100644 index 0000000000..2e552758eb --- /dev/null +++ b/satpy/area_utils.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# Copyright (c) 2015-2025 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Utility functions for area definitions.""" +from ._config import config_search_paths, get_config_path + + +def get_area_file(): + """Find area file(s) to use. + + The files are to be named `areas.yaml` or `areas.def`. + """ + paths = config_search_paths("areas.yaml") + if paths: + return paths + else: + return get_config_path("areas.def") + + +def get_area_def(area_name): + """Get the definition of *area_name* from file. + + The file is defined to use is to be placed in the $SATPY_CONFIG_PATH + directory, and its name is defined in satpy's configuration file. + """ + try: + from pyresample import parse_area_file + except ImportError: + from pyresample.utils import parse_area_file + return parse_area_file(get_area_file(), area_name)[0] diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 0eaf9cf4b3..4257c88759 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -1598,7 +1598,7 @@ def __init__(self, name, filename=None, url=None, known_hash=None, area=None, # self._known_hash = known_hash self.area = None if area is not None: - from satpy.resample.base import get_area_def + from satpy.area_utils import get_area_def self.area = get_area_def(area) super(StaticImageCompositor, self).__init__(name, **kwargs) diff --git a/satpy/readers/cmsaf_claas2.py b/satpy/readers/cmsaf_claas2.py index 2b60ff18c6..03d7405449 100644 --- a/satpy/readers/cmsaf_claas2.py +++ b/satpy/readers/cmsaf_claas2.py @@ -2,7 +2,7 @@ import datetime -from satpy.resample.base import get_area_def +from satpy.area_utils import get_area_def from .netcdf_utils import NetCDF4FileHandler diff --git a/satpy/readers/eum_l2_bufr.py b/satpy/readers/eum_l2_bufr.py index 49470f8157..fce9c1194a 100644 --- a/satpy/readers/eum_l2_bufr.py +++ b/satpy/readers/eum_l2_bufr.py @@ -32,11 +32,11 @@ import numpy as np import xarray as xr +from satpy.area_utils import get_area_def from satpy.readers._geos_area import get_geos_area_naming from satpy.readers.eum_base import get_service_mode, recarray2dict from satpy.readers.file_handlers import BaseFileHandler from satpy.readers.seviri_base import mpef_product_header -from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size try: diff --git a/satpy/readers/fci_l2_nc.py b/satpy/readers/fci_l2_nc.py index 99db81b630..614d0e8263 100644 --- a/satpy/readers/fci_l2_nc.py +++ b/satpy/readers/fci_l2_nc.py @@ -24,10 +24,10 @@ from pyresample import geometry from satpy._compat import cached_property +from satpy.area_utils import get_area_def from satpy.readers._geos_area import get_geos_area_naming, make_ext from satpy.readers.eum_base import get_service_mode from satpy.readers.file_handlers import BaseFileHandler -from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size logger = logging.getLogger(__name__) diff --git a/satpy/readers/gerb_l2_hr_h5.py b/satpy/readers/gerb_l2_hr_h5.py index 0381561aa6..395238d8e6 100644 --- a/satpy/readers/gerb_l2_hr_h5.py +++ b/satpy/readers/gerb_l2_hr_h5.py @@ -25,8 +25,8 @@ import datetime as dt import logging +from satpy.area_utils import get_area_def from satpy.readers.hdf5_utils import HDF5FileHandler -from satpy.resample.base import get_area_def LOG = logging.getLogger(__name__) diff --git a/satpy/readers/hsaf_h5.py b/satpy/readers/hsaf_h5.py index 87c146b69a..f6e15e4c5b 100644 --- a/satpy/readers/hsaf_h5.py +++ b/satpy/readers/hsaf_h5.py @@ -26,8 +26,8 @@ import numpy as np import xarray as xr +from satpy.area_utils import get_area_def from satpy.readers.file_handlers import BaseFileHandler -from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size LOG = logging.getLogger(__name__) diff --git a/satpy/readers/li_l2_nc.py b/satpy/readers/li_l2_nc.py index 824f05f009..1a2528f734 100644 --- a/satpy/readers/li_l2_nc.py +++ b/satpy/readers/li_l2_nc.py @@ -80,8 +80,8 @@ import numpy as np import xarray as xr +from satpy.area_utils import get_area_def from satpy.readers.li_base_nc import LINCFileHandler -from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size logger = logging.getLogger(__name__) diff --git a/satpy/readers/yaml_reader.py b/satpy/readers/yaml_reader.py index 86fa5cb08f..9aad6e1f33 100644 --- a/satpy/readers/yaml_reader.py +++ b/satpy/readers/yaml_reader.py @@ -42,10 +42,11 @@ from satpy import DatasetDict from satpy._compat import cache +from satpy.area_utils import get_area_def from satpy.aux_download import DataDownloadMixin from satpy.dataset import DataID, DataQuery, get_key from satpy.dataset.dataid import default_co_keys_config, default_id_keys_config, get_keys_from_config -from satpy.resample.base import add_crs_xy_coords, get_area_def +from satpy.resample.base import add_crs_xy_coords from satpy.utils import recursive_dict_update logger = logging.getLogger(__name__) diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index 113ee8e3d7..21bcddcc62 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -142,10 +142,10 @@ >>> from pyresample import load_area >>> my_area = load_area('my_areas.yaml', 'my_area') -Or using :func:`satpy.resample.base.get_area_def`, which will search through all +Or using :func:`satpy.area_utils.get_area_def`, which will search through all ``areas.yaml`` files in your ``SATPY_CONFIG_PATH``:: - >>> from satpy.base.resample import get_area_def + >>> from satpy.area_utils import get_area_def >>> area_eurol = get_area_def("eurol") For examples of area definitions, see the file ``etc/areas.yaml`` that is @@ -168,11 +168,11 @@ def __getattr__(name: str) -> Any: if name in ("KDTreeResampler", "BilinearResampler"): from . import kdtree - new_submod = "kdtree" + new_mod = "satpy.resample.kdtree" obj = getattr(kdtree, name) elif name == "NativeResampler": from .native import NativeResampler - new_submod = "native" + new_mod = "satpy.resample.native" obj = NativeResampler elif name in ( "BucketResamplerBase", @@ -182,11 +182,9 @@ def __getattr__(name: str) -> Any: "BucketFraction" ): from . import bucket - new_submod = "bucket" + new_mod = "satpy.resample.bucket" obj = getattr(bucket, name) elif name in ( - "get_area_file", - "get_area_def", "add_xy_coords", "add_crs_xy_coords", "resample", @@ -194,14 +192,21 @@ def __getattr__(name: str) -> Any: "resample_dataset" ): from . import base - new_submod = "base" + new_mod = "satpy.resample.base" obj = getattr(base, name) + elif name in ( + "get_area_file", + "get_area_def", + ): + from satpy import area_utils + new_mod = "satpy.area_utils" + obj = getattr(area_utils, name) else: raise AttributeError(f"module '{__name__}' has no attribute '{name}'") warnings.warn( - f"'satpy.resample.{name}' has been moved to 'satpy.resample.{new_submod}.{name}'. " - f"Import from the new location instead (ex. 'from satpy.resample.{new_submod} import {name}').", + f"'satpy.resample.{name}' has been moved to '{new_mod}.{name}'. " + f"Import from the new location instead (ex. '{new_mod} import {name}').", stacklevel=2, ) return obj diff --git a/satpy/resample/base.py b/satpy/resample/base.py index 4afdf5865d..0cd59325c3 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -26,7 +26,6 @@ from pyresample.geometry import SwathDefinition from pyresample.resampler import BaseResampler as PRBaseResampler -from satpy._config import config_search_paths, get_config_path from satpy.utils import get_legacy_chunk_size LOG = getLogger(__name__) @@ -44,31 +43,6 @@ def _hash_dict(the_dict, the_hash=None): return the_hash -def get_area_file(): - """Find area file(s) to use. - - The files are to be named `areas.yaml` or `areas.def`. - """ - paths = config_search_paths("areas.yaml") - if paths: - return paths - else: - return get_config_path("areas.def") - - -def get_area_def(area_name): - """Get the definition of *area_name* from file. - - The file is defined to use is to be placed in the $SATPY_CONFIG_PATH - directory, and its name is defined in satpy's configuration file. - """ - try: - from pyresample import parse_area_file - except ImportError: - from pyresample.utils import parse_area_file - return parse_area_file(get_area_file(), area_name)[0] - - def add_xy_coords(data_arr, area, crs=None): """Assign x/y coordinates to DataArray from provided area. diff --git a/satpy/scene.py b/satpy/scene.py index d41dcf3922..478650d453 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -28,13 +28,14 @@ from pyresample.geometry import AreaDefinition, BaseDefinition, SwathDefinition from xarray import DataArray +from satpy.area_utils import get_area_def from satpy.composites import IncompatibleAreas from satpy.composites.config_loader import load_compositor_configs_for_sensors from satpy.dataset import DataID, DataQuery, DatasetDict, combine_metadata, dataset_walker, replace_anc from satpy.dependency_tree import DependencyTree from satpy.node import CompositorNode, MissingDependencies, ReaderNode from satpy.readers import load_readers -from satpy.resample.base import get_area_def, prepare_resampler, resample_dataset +from satpy.resample.base import prepare_resampler, resample_dataset from satpy.utils import convert_remote_files_to_fsspec, get_storage_options_from_reader_kwargs from satpy.writers import load_writer diff --git a/satpy/tests/test_config.py b/satpy/tests/test_config.py index 852ce71e57..975cb9fb65 100644 --- a/satpy/tests/test_config.py +++ b/satpy/tests/test_config.py @@ -50,7 +50,7 @@ def test_areas_pyproj(self): from pyresample import parse_area_file from pyresample.geometry import SwathDefinition - from satpy.resample.base import get_area_file + from satpy.area_utils import get_area_file lons = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) lats = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) @@ -82,7 +82,7 @@ def test_areas_rasterio(self): from pyresample import parse_area_file from pyresample.geometry import SwathDefinition - from satpy.resample.base import get_area_file + from satpy.area_utils import get_area_file lons = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) lats = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) diff --git a/satpy/tests/test_testing.py b/satpy/tests/test_testing.py index 63aee37a68..36bd973a9c 100644 --- a/satpy/tests/test_testing.py +++ b/satpy/tests/test_testing.py @@ -4,7 +4,7 @@ import xarray as xr from satpy import Scene -from satpy.resample.base import get_area_def +from satpy.area_utils import get_area_def from satpy.testing import fake_satpy_reading diff --git a/satpy/writers/__init__.py b/satpy/writers/__init__.py index f729adea1f..a39c16683d 100644 --- a/satpy/writers/__init__.py +++ b/satpy/writers/__init__.py @@ -33,10 +33,10 @@ from yaml import UnsafeLoader from satpy._config import config_search_paths, get_entry_points_config_dirs, glob_config +from satpy.area_utils import get_area_def from satpy.aux_download import DataDownloadMixin from satpy.enhancements.enhancer import Enhancer from satpy.plugin_base import Plugin -from satpy.resample.base import get_area_def from satpy.utils import get_legacy_chunk_size, get_logger LOG = get_logger(__name__) From 125e69150df041788500178eaa37ba40cf1af13c Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 16 May 2025 20:29:37 +0300 Subject: [PATCH 18/30] Create satpy.coords_utils module --- satpy/coords_utils.py | 137 ++++++++++++++++++++++++ satpy/readers/yaml_reader.py | 2 +- satpy/resample/__init__.py | 9 +- satpy/resample/base.py | 116 +------------------- satpy/tests/cf_tests/test_dataaarray.py | 2 +- satpy/tests/test_composites.py | 2 +- satpy/tests/test_coords_utils.py | 130 ++++++++++++++++++++++ satpy/tests/test_resample.py | 106 ------------------ satpy/tests/utils.py | 2 +- 9 files changed, 280 insertions(+), 226 deletions(-) create mode 100644 satpy/coords_utils.py create mode 100644 satpy/tests/test_coords_utils.py diff --git a/satpy/coords_utils.py b/satpy/coords_utils.py new file mode 100644 index 0000000000..bb2c0e7bc7 --- /dev/null +++ b/satpy/coords_utils.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# Copyright (c) 2015-2025 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Utility functions for coordinates.""" + +import logging + +import xarray as xr + +LOG = logging.getLogger(__name__) + + +def add_xy_coords(data_arr, area, crs=None): + """Assign x/y coordinates to DataArray from provided area. + + If 'x' and 'y' coordinates already exist then they will not be added. + + Args: + data_arr (xarray.DataArray): data object to add x/y coordinates to + area (pyresample.geometry.AreaDefinition): area providing the + coordinate data. + crs (pyproj.crs.CRS or None): CRS providing additional information + about the area's coordinate reference system if available. + Requires pyproj 2.0+. + + Returns (xarray.DataArray): Updated DataArray object + + """ + if "x" in data_arr.coords and "y" in data_arr.coords: + # x/y coords already provided + return data_arr + if "x" not in data_arr.dims or "y" not in data_arr.dims: + # no defined x and y dimensions + return data_arr + if not hasattr(area, "get_proj_vectors"): + return data_arr + x, y = area.get_proj_vectors() + + # convert to DataArrays + y_attrs = {} + x_attrs = {} + + _check_crs_units(crs, x_attrs, y_attrs) + + y = xr.DataArray(y, dims=("y",), attrs=y_attrs) + x = xr.DataArray(x, dims=("x",), attrs=x_attrs) + return data_arr.assign_coords(y=y, x=x) + + +def _check_crs_units(crs, x_attrs, y_attrs): + if crs is None: + return + units = crs.axis_info[0].unit_name + # fix udunits/CF standard units + units = units.replace("metre", "meter") + if units == "degree": + y_attrs["units"] = "degrees_north" + x_attrs["units"] = "degrees_east" + else: + y_attrs["units"] = units + x_attrs["units"] = units + + +def add_crs_xy_coords(data_arr, area): + """Add :class:`pyproj.crs.CRS` and x/y or lons/lats to coordinates. + + For SwathDefinition or GridDefinition areas this will add a + `crs` coordinate and coordinates for the 2D arrays of `lons` and `lats`. + + For AreaDefinition areas this will add a `crs` coordinate and the + 1-dimensional `x` and `y` coordinate variables. + + Args: + data_arr (xarray.DataArray): DataArray to add the 'crs' + coordinate. + area (pyresample.geometry.AreaDefinition): Area to get CRS + information from. + + """ + from pyresample.geometry import SwathDefinition + + crs, data_arr = _add_crs(area, data_arr) + + # Add x/y coordinates if possible + if isinstance(area, SwathDefinition): + # add lon/lat arrays for swath definitions + # SwathDefinitions created by Satpy should be assigning DataArray + # objects as the lons/lats attributes so use those directly to + # maintain original .attrs metadata (instead of converting to dask + # array). + lons = area.lons + lats = area.lats + lons.attrs.setdefault("standard_name", "longitude") + lons.attrs.setdefault("long_name", "longitude") + lons.attrs.setdefault("units", "degrees_east") + lats.attrs.setdefault("standard_name", "latitude") + lats.attrs.setdefault("long_name", "latitude") + lats.attrs.setdefault("units", "degrees_north") + # See https://github.com/pydata/xarray/issues/3068 + # data_arr = data_arr.assign_coords(longitude=lons, latitude=lats) + else: + # Gridded data (AreaDefinition/StackedAreaDefinition) + data_arr = add_xy_coords(data_arr, area, crs=crs) + return data_arr + + +def _add_crs(area, data_arr): + # add CRS object if pyproj 2.0+ + try: + from pyproj import CRS + except ImportError: + LOG.debug("Could not add 'crs' coordinate with pyproj<2.0") + crs = None + else: + # default lat/lon projection + latlon_proj = "+proj=latlong +datum=WGS84 +ellps=WGS84" + # otherwise get it from the area definition + if hasattr(area, "crs"): + crs = area.crs + else: + proj_str = getattr(area, "proj_str", latlon_proj) + crs = CRS.from_string(proj_str) + data_arr = data_arr.assign_coords(crs=crs) + return crs, data_arr diff --git a/satpy/readers/yaml_reader.py b/satpy/readers/yaml_reader.py index 9aad6e1f33..1678614580 100644 --- a/satpy/readers/yaml_reader.py +++ b/satpy/readers/yaml_reader.py @@ -44,9 +44,9 @@ from satpy._compat import cache from satpy.area_utils import get_area_def from satpy.aux_download import DataDownloadMixin +from satpy.coords_utils import add_crs_xy_coords from satpy.dataset import DataID, DataQuery, get_key from satpy.dataset.dataid import default_co_keys_config, default_id_keys_config, get_keys_from_config -from satpy.resample.base import add_crs_xy_coords from satpy.utils import recursive_dict_update logger = logging.getLogger(__name__) diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index 21bcddcc62..55ef73eb96 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -185,8 +185,6 @@ def __getattr__(name: str) -> Any: new_mod = "satpy.resample.bucket" obj = getattr(bucket, name) elif name in ( - "add_xy_coords", - "add_crs_xy_coords", "resample", "prepare_resampler", "resample_dataset" @@ -201,6 +199,13 @@ def __getattr__(name: str) -> Any: from satpy import area_utils new_mod = "satpy.area_utils" obj = getattr(area_utils, name) + elif name in ( + "add_xy_coords", + "add_crs_xy_coords", + ): + from satpy import coords_utils + new_mod = "satpy.coords_utils" + obj = getattr(coords_utils, name) else: raise AttributeError(f"module '{__name__}' has no attribute '{name}'") diff --git a/satpy/resample/base.py b/satpy/resample/base.py index 0cd59325c3..b7606e5dd4 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -22,8 +22,6 @@ from weakref import WeakValueDictionary import numpy as np -import xarray as xr -from pyresample.geometry import SwathDefinition from pyresample.resampler import BaseResampler as PRBaseResampler from satpy.utils import get_legacy_chunk_size @@ -43,118 +41,6 @@ def _hash_dict(the_dict, the_hash=None): return the_hash -def add_xy_coords(data_arr, area, crs=None): - """Assign x/y coordinates to DataArray from provided area. - - If 'x' and 'y' coordinates already exist then they will not be added. - - Args: - data_arr (xarray.DataArray): data object to add x/y coordinates to - area (pyresample.geometry.AreaDefinition): area providing the - coordinate data. - crs (pyproj.crs.CRS or None): CRS providing additional information - about the area's coordinate reference system if available. - Requires pyproj 2.0+. - - Returns (xarray.DataArray): Updated DataArray object - - """ - if "x" in data_arr.coords and "y" in data_arr.coords: - # x/y coords already provided - return data_arr - if "x" not in data_arr.dims or "y" not in data_arr.dims: - # no defined x and y dimensions - return data_arr - if not hasattr(area, "get_proj_vectors"): - return data_arr - x, y = area.get_proj_vectors() - - # convert to DataArrays - y_attrs = {} - x_attrs = {} - - _check_crs_units(crs, x_attrs, y_attrs) - - y = xr.DataArray(y, dims=("y",), attrs=y_attrs) - x = xr.DataArray(x, dims=("x",), attrs=x_attrs) - return data_arr.assign_coords(y=y, x=x) - - -def _check_crs_units(crs, x_attrs, y_attrs): - if crs is None: - return - units = crs.axis_info[0].unit_name - # fix udunits/CF standard units - units = units.replace("metre", "meter") - if units == "degree": - y_attrs["units"] = "degrees_north" - x_attrs["units"] = "degrees_east" - else: - y_attrs["units"] = units - x_attrs["units"] = units - - -def add_crs_xy_coords(data_arr, area): - """Add :class:`pyproj.crs.CRS` and x/y or lons/lats to coordinates. - - For SwathDefinition or GridDefinition areas this will add a - `crs` coordinate and coordinates for the 2D arrays of `lons` and `lats`. - - For AreaDefinition areas this will add a `crs` coordinate and the - 1-dimensional `x` and `y` coordinate variables. - - Args: - data_arr (xarray.DataArray): DataArray to add the 'crs' - coordinate. - area (pyresample.geometry.AreaDefinition): Area to get CRS - information from. - - """ - crs, data_arr = _add_crs(area, data_arr) - - # Add x/y coordinates if possible - if isinstance(area, SwathDefinition): - # add lon/lat arrays for swath definitions - # SwathDefinitions created by Satpy should be assigning DataArray - # objects as the lons/lats attributes so use those directly to - # maintain original .attrs metadata (instead of converting to dask - # array). - lons = area.lons - lats = area.lats - lons.attrs.setdefault("standard_name", "longitude") - lons.attrs.setdefault("long_name", "longitude") - lons.attrs.setdefault("units", "degrees_east") - lats.attrs.setdefault("standard_name", "latitude") - lats.attrs.setdefault("long_name", "latitude") - lats.attrs.setdefault("units", "degrees_north") - # See https://github.com/pydata/xarray/issues/3068 - # data_arr = data_arr.assign_coords(longitude=lons, latitude=lats) - else: - # Gridded data (AreaDefinition/StackedAreaDefinition) - data_arr = add_xy_coords(data_arr, area, crs=crs) - return data_arr - - -def _add_crs(area, data_arr): - # add CRS object if pyproj 2.0+ - try: - from pyproj import CRS - except ImportError: - LOG.debug("Could not add 'crs' coordinate with pyproj<2.0") - crs = None - else: - # default lat/lon projection - latlon_proj = "+proj=latlong +datum=WGS84 +ellps=WGS84" - # otherwise get it from the area definition - if hasattr(area, "crs"): - crs = area.crs - else: - proj_str = getattr(area, "proj_str", latlon_proj) - crs = CRS.from_string(proj_str) - data_arr = data_arr.assign_coords(crs=crs) - return crs, data_arr - - def _update_resampled_coords(old_data, new_data, new_area): """Add coordinate information to newly resampled DataArray. @@ -165,6 +51,8 @@ def _update_resampled_coords(old_data, new_data, new_area): for the newly resampled data. """ + from satpy.coords_utils import add_crs_xy_coords + # copy over other non-x/y coordinates # this *MUST* happen before we set 'crs' below otherwise any 'crs' # coordinate in the coordinate variables we are copying will overwrite the diff --git a/satpy/tests/cf_tests/test_dataaarray.py b/satpy/tests/cf_tests/test_dataaarray.py index 50e5b54424..c8696c58f3 100644 --- a/satpy/tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/cf_tests/test_dataaarray.py @@ -50,7 +50,7 @@ def test_make_cf_dataarray_lonlat(): from pyresample import create_area_def from satpy.cf.data_array import make_cf_data_array - from satpy.resample import add_crs_xy_coords + from satpy.coords_utils import add_crs_xy_coords area = create_area_def("mavas", 4326, shape=(5, 5), center=(0, 0), resolution=(1, 1)) diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index d28a5499b0..3eb5f07aa7 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -145,7 +145,7 @@ def test_almost_equal_geo_coordinates(self): """ from satpy.composites import CompositeBase - from satpy.resample.base import add_crs_xy_coords + from satpy.coords_utils import add_crs_xy_coords comp = CompositeBase("test_comp") data_arr1 = self._get_test_ds(shape=(2, 2)) diff --git a/satpy/tests/test_coords_utils.py b/satpy/tests/test_coords_utils.py new file mode 100644 index 0000000000..4a30ab7c88 --- /dev/null +++ b/satpy/tests/test_coords_utils.py @@ -0,0 +1,130 @@ +#!/usr/bin/python +# Copyright (c) 2016 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Unittests for coordinate utilities.""" + +import unittest + +import dask.array as da +import numpy as np +import xarray as xr +from pyproj import CRS + + +class TestCoordinateHelpers(unittest.TestCase): + """Test various utility functions for working with coordinates.""" + + def test_area_def_coordinates(self): + """Test coordinates being added with an AreaDefinition.""" + from pyresample.geometry import AreaDefinition + + from satpy.coords_utils import add_crs_xy_coords + area_def = AreaDefinition( + "test", "test", "test", {"proj": "lcc", "lat_1": 25, "lat_0": 25}, + 100, 200, [-100, -100, 100, 100] + ) + data_arr = xr.DataArray( + da.zeros((200, 100), chunks=50), + attrs={"area": area_def}, + dims=("y", "x"), + ) + new_data_arr = add_crs_xy_coords(data_arr, area_def) + assert "y" in new_data_arr.coords + assert "x" in new_data_arr.coords + + assert "units" in new_data_arr.coords["y"].attrs + assert new_data_arr.coords["y"].attrs["units"] == "meter" + assert "units" in new_data_arr.coords["x"].attrs + assert new_data_arr.coords["x"].attrs["units"] == "meter" + assert "crs" in new_data_arr.coords + assert isinstance(new_data_arr.coords["crs"].item(), CRS) + assert area_def.crs == new_data_arr.coords["crs"].item() + + # already has coords + data_arr = xr.DataArray( + da.zeros((200, 100), chunks=50), + attrs={"area": area_def}, + dims=("y", "x"), + coords={"y": np.arange(2, 202), "x": np.arange(100)} + ) + new_data_arr = add_crs_xy_coords(data_arr, area_def) + assert "y" in new_data_arr.coords + assert "units" not in new_data_arr.coords["y"].attrs + assert "x" in new_data_arr.coords + assert "units" not in new_data_arr.coords["x"].attrs + np.testing.assert_equal(new_data_arr.coords["y"], np.arange(2, 202)) + + assert "crs" in new_data_arr.coords + assert isinstance(new_data_arr.coords["crs"].item(), CRS) + assert area_def.crs == new_data_arr.coords["crs"].item() + + # lat/lon area + area_def = AreaDefinition( + "test", "test", "test", {"proj": "latlong"}, + 100, 200, [-100, -100, 100, 100] + ) + data_arr = xr.DataArray( + da.zeros((200, 100), chunks=50), + attrs={"area": area_def}, + dims=("y", "x"), + ) + new_data_arr = add_crs_xy_coords(data_arr, area_def) + assert "y" in new_data_arr.coords + assert "x" in new_data_arr.coords + + assert "units" in new_data_arr.coords["y"].attrs + assert new_data_arr.coords["y"].attrs["units"] == "degrees_north" + assert "units" in new_data_arr.coords["x"].attrs + assert new_data_arr.coords["x"].attrs["units"] == "degrees_east" + assert "crs" in new_data_arr.coords + assert isinstance(new_data_arr.coords["crs"].item(), CRS) + assert area_def.crs == new_data_arr.coords["crs"].item() + + def test_swath_def_coordinates(self): + """Test coordinates being added with an SwathDefinition.""" + from pyresample.geometry import SwathDefinition + + from satpy.coords_utils import add_crs_xy_coords + lons_data = da.random.random((200, 100), chunks=50) + lats_data = da.random.random((200, 100), chunks=50) + lons = xr.DataArray(lons_data, attrs={"units": "degrees_east"}, + dims=("y", "x")) + lats = xr.DataArray(lats_data, attrs={"units": "degrees_north"}, + dims=("y", "x")) + area_def = SwathDefinition(lons, lats) + data_arr = xr.DataArray( + da.zeros((200, 100), chunks=50), + attrs={"area": area_def}, + dims=("y", "x"), + ) + new_data_arr = add_crs_xy_coords(data_arr, area_def) + # See https://github.com/pydata/xarray/issues/3068 + # self.assertIn('longitude', new_data_arr.coords) + # self.assertIn('units', new_data_arr.coords['longitude'].attrs) + # self.assertEqual( + # new_data_arr.coords['longitude'].attrs['units'], 'degrees_east') + # self.assertIsInstance(new_data_arr.coords['longitude'].data, da.Array) + # self.assertIn('latitude', new_data_arr.coords) + # self.assertIn('units', new_data_arr.coords['latitude'].attrs) + # self.assertEqual( + # new_data_arr.coords['latitude'].attrs['units'], 'degrees_north') + # self.assertIsInstance(new_data_arr.coords['latitude'].data, da.Array) + + assert "crs" in new_data_arr.coords + crs = new_data_arr.coords["crs"].item() + assert isinstance(crs, CRS) + assert crs.is_geographic + assert isinstance(new_data_arr.coords["crs"].item(), CRS) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 6da45d239d..a4451ad333 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -425,112 +425,6 @@ def test_move_existing_caches(self): shutil.rmtree(the_dir) -class TestCoordinateHelpers(unittest.TestCase): - """Test various utility functions for working with coordinates.""" - - def test_area_def_coordinates(self): - """Test coordinates being added with an AreaDefinition.""" - from pyresample.geometry import AreaDefinition - - from satpy.resample.base import add_crs_xy_coords - area_def = AreaDefinition( - "test", "test", "test", {"proj": "lcc", "lat_1": 25, "lat_0": 25}, - 100, 200, [-100, -100, 100, 100] - ) - data_arr = xr.DataArray( - da.zeros((200, 100), chunks=50), - attrs={"area": area_def}, - dims=("y", "x"), - ) - new_data_arr = add_crs_xy_coords(data_arr, area_def) - assert "y" in new_data_arr.coords - assert "x" in new_data_arr.coords - - assert "units" in new_data_arr.coords["y"].attrs - assert new_data_arr.coords["y"].attrs["units"] == "meter" - assert "units" in new_data_arr.coords["x"].attrs - assert new_data_arr.coords["x"].attrs["units"] == "meter" - assert "crs" in new_data_arr.coords - assert isinstance(new_data_arr.coords["crs"].item(), CRS) - assert area_def.crs == new_data_arr.coords["crs"].item() - - # already has coords - data_arr = xr.DataArray( - da.zeros((200, 100), chunks=50), - attrs={"area": area_def}, - dims=("y", "x"), - coords={"y": np.arange(2, 202), "x": np.arange(100)} - ) - new_data_arr = add_crs_xy_coords(data_arr, area_def) - assert "y" in new_data_arr.coords - assert "units" not in new_data_arr.coords["y"].attrs - assert "x" in new_data_arr.coords - assert "units" not in new_data_arr.coords["x"].attrs - np.testing.assert_equal(new_data_arr.coords["y"], np.arange(2, 202)) - - assert "crs" in new_data_arr.coords - assert isinstance(new_data_arr.coords["crs"].item(), CRS) - assert area_def.crs == new_data_arr.coords["crs"].item() - - # lat/lon area - area_def = AreaDefinition( - "test", "test", "test", {"proj": "latlong"}, - 100, 200, [-100, -100, 100, 100] - ) - data_arr = xr.DataArray( - da.zeros((200, 100), chunks=50), - attrs={"area": area_def}, - dims=("y", "x"), - ) - new_data_arr = add_crs_xy_coords(data_arr, area_def) - assert "y" in new_data_arr.coords - assert "x" in new_data_arr.coords - - assert "units" in new_data_arr.coords["y"].attrs - assert new_data_arr.coords["y"].attrs["units"] == "degrees_north" - assert "units" in new_data_arr.coords["x"].attrs - assert new_data_arr.coords["x"].attrs["units"] == "degrees_east" - assert "crs" in new_data_arr.coords - assert isinstance(new_data_arr.coords["crs"].item(), CRS) - assert area_def.crs == new_data_arr.coords["crs"].item() - - def test_swath_def_coordinates(self): - """Test coordinates being added with an SwathDefinition.""" - from pyresample.geometry import SwathDefinition - - from satpy.resample.base import add_crs_xy_coords - lons_data = da.random.random((200, 100), chunks=50) - lats_data = da.random.random((200, 100), chunks=50) - lons = xr.DataArray(lons_data, attrs={"units": "degrees_east"}, - dims=("y", "x")) - lats = xr.DataArray(lats_data, attrs={"units": "degrees_north"}, - dims=("y", "x")) - area_def = SwathDefinition(lons, lats) - data_arr = xr.DataArray( - da.zeros((200, 100), chunks=50), - attrs={"area": area_def}, - dims=("y", "x"), - ) - new_data_arr = add_crs_xy_coords(data_arr, area_def) - # See https://github.com/pydata/xarray/issues/3068 - # self.assertIn('longitude', new_data_arr.coords) - # self.assertIn('units', new_data_arr.coords['longitude'].attrs) - # self.assertEqual( - # new_data_arr.coords['longitude'].attrs['units'], 'degrees_east') - # self.assertIsInstance(new_data_arr.coords['longitude'].data, da.Array) - # self.assertIn('latitude', new_data_arr.coords) - # self.assertIn('units', new_data_arr.coords['latitude'].attrs) - # self.assertEqual( - # new_data_arr.coords['latitude'].attrs['units'], 'degrees_north') - # self.assertIsInstance(new_data_arr.coords['latitude'].data, da.Array) - - assert "crs" in new_data_arr.coords - crs = new_data_arr.coords["crs"].item() - assert isinstance(crs, CRS) - assert crs.is_geographic - assert isinstance(new_data_arr.coords["crs"].item(), CRS) - - class TestBucketAvg(unittest.TestCase): """Test the bucket resampler.""" diff --git a/satpy/tests/utils.py b/satpy/tests/utils.py index 17beebbdf7..70f430b68b 100644 --- a/satpy/tests/utils.py +++ b/satpy/tests/utils.py @@ -373,7 +373,7 @@ def _get_fake_scene_area(arr, area): def _get_did_for_fake_scene(area, arr, extra_attrs, daskify): """Add instance to fake scene. Helper for make_fake_scene.""" - from satpy.resample.base import add_crs_xy_coords + from satpy.coords_utils import add_crs_xy_coords if isinstance(arr, DataArray): new = arr.copy() # don't change attributes of input new.attrs.update(extra_attrs) From f24507f8025a126e9e17ce5ef8d2ffa466a0dac1 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 16 May 2025 20:41:14 +0300 Subject: [PATCH 19/30] Fix get_area_def patching --- satpy/tests/test_composites.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 3eb5f07aa7..48cff21db1 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -1405,7 +1405,7 @@ def test_add_bands_p_l(self): class TestStaticImageCompositor(unittest.TestCase): """Test case for the static compositor.""" - @mock.patch("satpy.resample.base.get_area_def") + @mock.patch("satpy.area_utils.get_area_def") def test_init(self, get_area_def): """Test the initializiation of static compositor.""" from satpy.composites import StaticImageCompositor From 9371f453bbe025e1db9708fec4e2e1fd026dd4a7 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 16 May 2025 20:58:45 +0300 Subject: [PATCH 20/30] Fix bucket doc references, again --- satpy/resample/bucket.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/satpy/resample/bucket.py b/satpy/resample/bucket.py index ea5192b416..59364bebb3 100644 --- a/satpy/resample/bucket.py +++ b/satpy/resample/bucket.py @@ -111,10 +111,10 @@ class BucketAvg(BucketResamplerBase): Parameters ---------- - fill_value : float (default: np.nan) + fill_value : (float) default: `np.nan` Fill value to mark missing/invalid values in the input data, as well as in the binned and averaged output data. - skipna : boolean (default: True) + skipna : (bool) default: `True` If True, skips missing values (as marked by NaN or `fill_value`) for the average calculation (similarly to Numpy's `nanmean`). Buckets containing only missing values are set to fill_value. If False, sets the bucket to fill_value if one or more missing values are present in the bucket @@ -129,7 +129,7 @@ def compute(self, data, fill_value=np.nan, skipna=True, **kwargs): # noqa: D417 Args: data (numpy.Array, dask.Array): Data to be resampled fill_value (numpy.nan, int): fill_value. Defaults to numpy.nan - skipna (boolean): Skip NA's. Default `True` + skipna (bool): Skip NA's. Default `True` Returns: dask.Array @@ -158,9 +158,9 @@ class BucketSum(BucketResamplerBase): Parameters ---------- - fill_value : float (default: np.nan) + fill_value : (float) default: `np.nan` Fill value for missing data - skipna : boolean (default: True) + skipna : (bool) default: `True` If True, skips NaN values for the sum calculation (similarly to Numpy's `nansum`). Buckets containing only NaN are set to zero. If False, sets the bucket to NaN if one or more NaN values are present in the bucket From e5a4db7d76f2c7a6c2436a29cba5f0e3c76329c6 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Wed, 28 May 2025 19:05:51 +0300 Subject: [PATCH 21/30] Use helper function instead of if/elif/else in satpy.resample imports --- satpy/resample/__init__.py | 72 ++++++++++++-------------------------- satpy/scene.py | 2 +- 2 files changed, 24 insertions(+), 50 deletions(-) diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index 55ef73eb96..4bb76a3bef 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -160,58 +160,32 @@ """ from __future__ import annotations -import warnings from typing import Any +from satpy.utils import _import_and_warn_new_location + +IMPORT_PATHS = { + "KDTreeResampler": "satpy.resample.kdtree", + "BilinearResampler": "satpy.resample.kdtree", + "NativeResampler": "satpy.resample.native", + "BucketResamplerBase": "satpy.resample.bucket", + "BucketAvg": "satpy.resample.bucket", + "BucketSum": "satpy.resample.bucket", + "BucketCount": "satpy.resample.bucket", + "BucketFraction": "satpy.resample.bucket", + "resample": "satpy.resample.base", + "prepare_resampler": "satpy.resample.base", + "resample_dataset": "satpy.resample.base", + "get_area_file": "satpy.area_utils", + "get_area_def": "satpy.area_utils", + "add_xy_coords": "satpy.coords_utils", + "add_crs_xy_coords": "satpy.coords_utils", +} def __getattr__(name: str) -> Any: - if name in ("KDTreeResampler", "BilinearResampler"): - from . import kdtree - - new_mod = "satpy.resample.kdtree" - obj = getattr(kdtree, name) - elif name == "NativeResampler": - from .native import NativeResampler - new_mod = "satpy.resample.native" - obj = NativeResampler - elif name in ( - "BucketResamplerBase", - "BucketAvg", - "BucketSum", - "BucketCount", - "BucketFraction" - ): - from . import bucket - new_mod = "satpy.resample.bucket" - obj = getattr(bucket, name) - elif name in ( - "resample", - "prepare_resampler", - "resample_dataset" - ): - from . import base - new_mod = "satpy.resample.base" - obj = getattr(base, name) - elif name in ( - "get_area_file", - "get_area_def", - ): - from satpy import area_utils - new_mod = "satpy.area_utils" - obj = getattr(area_utils, name) - elif name in ( - "add_xy_coords", - "add_crs_xy_coords", - ): - from satpy import coords_utils - new_mod = "satpy.coords_utils" - obj = getattr(coords_utils, name) - else: + new_module = IMPORT_PATHS.get(name) + + if new_module is None: raise AttributeError(f"module '{__name__}' has no attribute '{name}'") - warnings.warn( - f"'satpy.resample.{name}' has been moved to '{new_mod}.{name}'. " - f"Import from the new location instead (ex. '{new_mod} import {name}').", - stacklevel=2, - ) - return obj + return _import_and_warn_new_location(new_module, name) diff --git a/satpy/scene.py b/satpy/scene.py index 1a19d40686..a724824be3 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -35,8 +35,8 @@ from satpy.dataset import DataID, DataQuery, DatasetDict, combine_metadata, dataset_walker, replace_anc from satpy.dependency_tree import DependencyTree from satpy.node import CompositorNode, MissingDependencies, ReaderNode -from satpy.resample.base import prepare_resampler, resample_dataset from satpy.readers.core.loading import load_readers +from satpy.resample.base import prepare_resampler, resample_dataset from satpy.utils import convert_remote_files_to_fsspec, get_storage_options_from_reader_kwargs from satpy.writers import load_writer From 3fcbe6d571200f1a1341fd100a56244751ceb8e6 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Wed, 28 May 2025 19:15:36 +0300 Subject: [PATCH 22/30] Fix resample imports and doc references --- doc/source/generate_area_def_list.py | 5 +++-- satpy/modifiers/parallax.py | 2 +- satpy/readers/core/yaml_reader.py | 3 ++- satpy/tests/reader_tests/test_hsaf_h5.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/source/generate_area_def_list.py b/doc/source/generate_area_def_list.py index 2c5d8d17b7..16d6a2cb87 100644 --- a/doc/source/generate_area_def_list.py +++ b/doc/source/generate_area_def_list.py @@ -1,7 +1,8 @@ """Generate the area definition list restructuredtext document. This should be run once before generating the sphinx documentation to -produce the ``area_def_list.rst`` file referenced by ``satpy/resample.py``. +produce the ``area_def_list.rst`` file referenced by +``satpy/resample/__init__.py``. """ import logging @@ -19,7 +20,7 @@ from pyresample.utils.proj4 import ignore_pyproj_proj_warnings from reader_table import rst_table_header, rst_table_row -from satpy.resample import get_area_file +from satpy.area_utils import get_area_file logger = logging.getLogger(__name__) diff --git a/satpy/modifiers/parallax.py b/satpy/modifiers/parallax.py index 1face61e77..3ec1fbe526 100644 --- a/satpy/modifiers/parallax.py +++ b/satpy/modifiers/parallax.py @@ -51,7 +51,7 @@ from pyresample.geometry import SwathDefinition from satpy.modifiers import ModifierBase -from satpy.resample import resample_dataset +from satpy.resample.base import resample_dataset from satpy.utils import get_satpos, lonlat2xyz, xyz2lonlat logger = logging.getLogger(__name__) diff --git a/satpy/readers/core/yaml_reader.py b/satpy/readers/core/yaml_reader.py index 8681fd7a21..7ef0983685 100644 --- a/satpy/readers/core/yaml_reader.py +++ b/satpy/readers/core/yaml_reader.py @@ -42,10 +42,11 @@ from satpy import DatasetDict from satpy._compat import cache +from satpy.area_utils import get_area_def from satpy.aux_download import DataDownloadMixin +from satpy.coords_utils import add_crs_xy_coords from satpy.dataset import DataID, DataQuery, get_key from satpy.dataset.dataid import default_co_keys_config, default_id_keys_config, get_keys_from_config -from satpy.resample import add_crs_xy_coords, get_area_def from satpy.utils import recursive_dict_update logger = logging.getLogger(__name__) diff --git a/satpy/tests/reader_tests/test_hsaf_h5.py b/satpy/tests/reader_tests/test_hsaf_h5.py index bdd523ad0d..c065cb2a0b 100644 --- a/satpy/tests/reader_tests/test_hsaf_h5.py +++ b/satpy/tests/reader_tests/test_hsaf_h5.py @@ -8,7 +8,7 @@ import pytest from satpy import Scene -from satpy.resample import get_area_def +from satpy.area_utils import get_area_def # real shape is 916, 1902 SHAPE_SC = (916, 1902) From 5fe8f062da747128bb9a93572c933ff1663dc603 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Wed, 28 May 2025 19:27:56 +0300 Subject: [PATCH 23/30] Remove unnecessary pyproj<2.0 check --- satpy/coords_utils.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/satpy/coords_utils.py b/satpy/coords_utils.py index bb2c0e7bc7..5c35c42ac6 100644 --- a/satpy/coords_utils.py +++ b/satpy/coords_utils.py @@ -118,20 +118,14 @@ def add_crs_xy_coords(data_arr, area): def _add_crs(area, data_arr): - # add CRS object if pyproj 2.0+ - try: - from pyproj import CRS - except ImportError: - LOG.debug("Could not add 'crs' coordinate with pyproj<2.0") - crs = None + from pyproj import CRS + + if hasattr(area, "crs"): + crs = area.crs else: # default lat/lon projection latlon_proj = "+proj=latlong +datum=WGS84 +ellps=WGS84" - # otherwise get it from the area definition - if hasattr(area, "crs"): - crs = area.crs - else: - proj_str = getattr(area, "proj_str", latlon_proj) - crs = CRS.from_string(proj_str) - data_arr = data_arr.assign_coords(crs=crs) + proj_str = getattr(area, "proj_str", latlon_proj) + crs = CRS.from_string(proj_str) + data_arr = data_arr.assign_coords(crs=crs) return crs, data_arr From bfdb33456bdc87228680b6d00c735dff2cb351f1 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 28 May 2025 14:12:43 -0500 Subject: [PATCH 24/30] Fix resampling related invalid sphinx references --- satpy/resample/__init__.py | 7 ++++--- satpy/resample/bucket.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index 4bb76a3bef..b8e5c554d6 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -32,15 +32,16 @@ "Resampler", "Description", "Related" "nearest", "Nearest Neighbor", :class:`~satpy.resample.kdtree.KDTreeResampler` - "ewa", "Elliptical Weighted Averaging", :class:`~pyresample.ewa.DaskEWAResampler` - "ewa_legacy", "Elliptical Weighted Averaging (Legacy)", :class:`~pyresample.ewa.LegacyDaskEWAResampler` + "ewa", "Elliptical Weighted Averaging", :class:`~pyresample.ewa.dask_ewa.DaskEWAResampler` + "ewa_legacy", "Elliptical Weighted Averaging (Legacy)", \ + :class:`~pyresample.ewa._legacy_dask_ewa.LegacyDaskEWAResampler` "native", "Native", :class:`~satpy.resample.native.NativeResampler` "bilinear", "Bilinear", :class:`~satpy.resample.kdtree.BilinearResampler` "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.bucket.BucketAvg` "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.bucket.BucketSum` "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.bucket.BucketCount` "bucket_fraction", "Fraction Bucket Resampling", :class:`~satpy.resample.bucket.BucketFraction` - "gradient_search", "Gradient Search Resampling", :meth:`~pyresample.gradient.create_gradient_search_resampler` + "gradient_search", "Gradient Search Resampling", :func:`~pyresample.gradient.create_gradient_search_resampler` The resampling algorithm used can be specified with the ``resampler`` keyword argument and defaults to ``nearest``: diff --git a/satpy/resample/bucket.py b/satpy/resample/bucket.py index 59364bebb3..d876e5801e 100644 --- a/satpy/resample/bucket.py +++ b/satpy/resample/bucket.py @@ -127,12 +127,12 @@ def compute(self, data, fill_value=np.nan, skipna=True, **kwargs): # noqa: D417 """Call the resampling. Args: - data (numpy.Array, dask.Array): Data to be resampled - fill_value (numpy.nan, int): fill_value. Defaults to numpy.nan + data (numpy.ndarray | dask.array.Array): Data to be resampled + fill_value (float | int): fill_value. Defaults to numpy.nan skipna (bool): Skip NA's. Default `True` Returns: - dask.Array + dask.array.Array """ results = [] if data.ndim == 3: From 9d5a3a76190837303c014138a52692dec5c418d7 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 30 May 2025 11:22:48 +0300 Subject: [PATCH 25/30] Move BaseResampler import from top level to functions --- satpy/resample/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/satpy/resample/base.py b/satpy/resample/base.py index b7606e5dd4..4b3313265c 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -22,7 +22,6 @@ from weakref import WeakValueDictionary import numpy as np -from pyresample.resampler import BaseResampler as PRBaseResampler from satpy.utils import get_legacy_chunk_size @@ -121,6 +120,8 @@ def get_all_resampler_classes(): # TODO: move this to pyresample def prepare_resampler(source_area, destination_area, resampler=None, **resample_kwargs): """Instantiate and return a resampler.""" + from pyresample.resampler import BaseResampler as PRBaseResampler + if resampler is None: LOG.info("Using default KDTree resampler") resampler = "kd_tree" @@ -160,6 +161,8 @@ def _check_resampler_class(resampler_class, resampler): def resample(source_area, data, destination_area, resampler=None, **kwargs): """Do the resampling.""" + from pyresample.resampler import BaseResampler as PRBaseResampler + if not isinstance(resampler, PRBaseResampler): # we don't use the first argument (cache key) _, resampler_instance = prepare_resampler(source_area, From 9f9a3854c8d2c5d92a04d0e597c194f450245ecb Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 30 May 2025 12:27:56 +0300 Subject: [PATCH 26/30] Refactor collection of resampler classes --- satpy/resample/base.py | 60 ++++++++++++++++------------------------ satpy/resample/bucket.py | 2 +- satpy/resample/ewa.py | 2 +- satpy/resample/kdtree.py | 2 +- satpy/resample/native.py | 2 +- 5 files changed, 28 insertions(+), 40 deletions(-) diff --git a/satpy/resample/base.py b/satpy/resample/base.py index 4b3313265c..fe4f0d9605 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -18,6 +18,8 @@ import hashlib import json import warnings +from contextlib import suppress +from importlib import import_module from logging import getLogger from weakref import WeakValueDictionary @@ -71,48 +73,34 @@ def _update_resampled_coords(old_data, new_data, new_area): return new_data -# TODO: move this to pyresample.resampler -#RESAMPLERS = {"kd_tree": KDTreeResampler, -# "nearest": KDTreeResampler, -# "bilinear": BilinearResampler, -# "native": NativeResampler, -# "gradient_search": create_gradient_search_resampler, -# "bucket_avg": BucketAvg, -# "bucket_sum": BucketSum, -# "bucket_count": BucketCount, -# "bucket_fraction": BucketFraction, -# "ewa": DaskEWAResampler, -# "ewa_legacy": LegacyDaskEWAResampler, -# } +# TODO: move these to pyresample.resampler +RESAMPLER_MODULES = [ + "satpy.resample.native", + "satpy.resample.kdtree", + "satpy.resample.bucket", + "satpy.resample.ewa", +] + +def _get_resampler_classes_from_module(import_path): + with suppress(ImportError): + mod = import_module(import_path) + return mod.get_resampler_classes() + return {} + def get_all_resampler_classes(): """Get all available resampler classes.""" resamplers = {} - try: - from .native import get_native_resampler_classes - resamplers.update(get_native_resampler_classes()) - except ImportError: - pass - try: - from .kdtree import get_kdtree_resampler_classes - resamplers.update(get_kdtree_resampler_classes()) - except ImportError: - pass - try: - from .bucket import get_bucket_resampler_classes - resamplers.update(get_bucket_resampler_classes()) - except ImportError: - pass - try: - from .ewa import get_ewa_resampler_classes - resamplers.update(get_ewa_resampler_classes()) - except ImportError: - pass - try: + # Collect all available resampler classes + for import_path in RESAMPLER_MODULES: + res = _get_resampler_classes_from_module(import_path) + resamplers.update(res) + + # Add gradient search, which is infact a factory function + # TODO: add `get_resampler_classes()` function to pyresample.gradient + with suppress(ImportError): from pyresample.gradient import create_gradient_search_resampler resamplers["gradient_search"] = create_gradient_search_resampler - except ImportError: - pass return resamplers diff --git a/satpy/resample/bucket.py b/satpy/resample/bucket.py index 59364bebb3..7fab878012 100644 --- a/satpy/resample/bucket.py +++ b/satpy/resample/bucket.py @@ -225,7 +225,7 @@ def compute(self, data, fill_value=np.nan, categories=None, **kwargs): return result -def get_bucket_resampler_classes(): +def get_resampler_classes(): """Get bucket resampler classes.""" return { "bucket_avg": BucketAvg, diff --git a/satpy/resample/ewa.py b/satpy/resample/ewa.py index 8d6345959a..4e6ff263b2 100644 --- a/satpy/resample/ewa.py +++ b/satpy/resample/ewa.py @@ -3,7 +3,7 @@ from pyresample.ewa import DaskEWAResampler, LegacyDaskEWAResampler -def get_ewa_resampler_classes(): +def get_resampler_classes(): """Get bucket resampler classes.""" return { "ewa": DaskEWAResampler, diff --git a/satpy/resample/kdtree.py b/satpy/resample/kdtree.py index 5f5694e21a..6d84411444 100644 --- a/satpy/resample/kdtree.py +++ b/satpy/resample/kdtree.py @@ -311,7 +311,7 @@ def _move_existing_caches(cache_dir, filename): LOG.warning("Old cache file was moved to %s", old_cache_dir) -def get_kdtree_resampler_classes(): +def get_resampler_classes(): """Get resampler classes based on kdtree.""" return { "kd_tree": KDTreeResampler, diff --git a/satpy/resample/native.py b/satpy/resample/native.py index 572fd5c609..5548479256 100644 --- a/satpy/resample/native.py +++ b/satpy/resample/native.py @@ -188,6 +188,6 @@ def _check_chunking(new_chunks, dask_arr, dim_idx, agg_size): return need_rechunk -def get_native_resampler_classes(): +def get_resampler_classes(): """Get classes based on native resampler.""" return {"native": NativeResampler} From 5bbd4f124667f2b890a0ab0dcbe42a2daed110eb Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 30 May 2025 12:55:54 +0300 Subject: [PATCH 27/30] Rename area_utils.py to area.py --- doc/source/generate_area_def_list.py | 2 +- satpy/{area_utils.py => area.py} | 0 satpy/composites/__init__.py | 2 +- satpy/readers/cmsaf_claas2.py | 2 +- satpy/readers/core/yaml_reader.py | 2 +- satpy/readers/eum_l2_bufr.py | 2 +- satpy/readers/fci_l2_nc.py | 2 +- satpy/readers/gerb_l2_hr_h5.py | 2 +- satpy/readers/hsaf_h5.py | 2 +- satpy/readers/li_l2_nc.py | 2 +- satpy/resample/__init__.py | 8 ++++---- satpy/scene.py | 2 +- satpy/tests/reader_tests/test_hsaf_h5.py | 2 +- satpy/tests/test_composites.py | 2 +- satpy/tests/test_config.py | 4 ++-- satpy/tests/test_testing.py | 2 +- satpy/writers/__init__.py | 2 +- 17 files changed, 20 insertions(+), 20 deletions(-) rename satpy/{area_utils.py => area.py} (100%) diff --git a/doc/source/generate_area_def_list.py b/doc/source/generate_area_def_list.py index 16d6a2cb87..f9bace7b50 100644 --- a/doc/source/generate_area_def_list.py +++ b/doc/source/generate_area_def_list.py @@ -20,7 +20,7 @@ from pyresample.utils.proj4 import ignore_pyproj_proj_warnings from reader_table import rst_table_header, rst_table_row -from satpy.area_utils import get_area_file +from satpy.area import get_area_file logger = logging.getLogger(__name__) diff --git a/satpy/area_utils.py b/satpy/area.py similarity index 100% rename from satpy/area_utils.py rename to satpy/area.py diff --git a/satpy/composites/__init__.py b/satpy/composites/__init__.py index 5221dbdfe8..0286162952 100644 --- a/satpy/composites/__init__.py +++ b/satpy/composites/__init__.py @@ -1597,7 +1597,7 @@ def __init__(self, name, filename=None, url=None, known_hash=None, area=None, # self._known_hash = known_hash self.area = None if area is not None: - from satpy.area_utils import get_area_def + from satpy.area import get_area_def self.area = get_area_def(area) super(StaticImageCompositor, self).__init__(name, **kwargs) diff --git a/satpy/readers/cmsaf_claas2.py b/satpy/readers/cmsaf_claas2.py index e3d14c891f..300b685123 100644 --- a/satpy/readers/cmsaf_claas2.py +++ b/satpy/readers/cmsaf_claas2.py @@ -2,7 +2,7 @@ import datetime -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.readers.core.netcdf import NetCDF4FileHandler diff --git a/satpy/readers/core/yaml_reader.py b/satpy/readers/core/yaml_reader.py index 7ef0983685..82c057af03 100644 --- a/satpy/readers/core/yaml_reader.py +++ b/satpy/readers/core/yaml_reader.py @@ -42,7 +42,7 @@ from satpy import DatasetDict from satpy._compat import cache -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.aux_download import DataDownloadMixin from satpy.coords_utils import add_crs_xy_coords from satpy.dataset import DataID, DataQuery, get_key diff --git a/satpy/readers/eum_l2_bufr.py b/satpy/readers/eum_l2_bufr.py index 460307401d..95f378378a 100644 --- a/satpy/readers/eum_l2_bufr.py +++ b/satpy/readers/eum_l2_bufr.py @@ -32,7 +32,7 @@ import numpy as np import xarray as xr -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.readers.core._geos_area import get_geos_area_naming from satpy.readers.core.eum import get_service_mode, recarray2dict from satpy.readers.core.file_handlers import BaseFileHandler diff --git a/satpy/readers/fci_l2_nc.py b/satpy/readers/fci_l2_nc.py index 685a8cffde..3bde9ffb96 100644 --- a/satpy/readers/fci_l2_nc.py +++ b/satpy/readers/fci_l2_nc.py @@ -24,7 +24,7 @@ from pyresample import geometry from satpy._compat import cached_property -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.readers.core._geos_area import get_geos_area_naming, make_ext from satpy.readers.core.eum import get_service_mode from satpy.readers.core.fci import platform_name_translate diff --git a/satpy/readers/gerb_l2_hr_h5.py b/satpy/readers/gerb_l2_hr_h5.py index ab1f9b02e4..727ae424d8 100644 --- a/satpy/readers/gerb_l2_hr_h5.py +++ b/satpy/readers/gerb_l2_hr_h5.py @@ -25,7 +25,7 @@ import datetime as dt import logging -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.readers.core.hdf5 import HDF5FileHandler LOG = logging.getLogger(__name__) diff --git a/satpy/readers/hsaf_h5.py b/satpy/readers/hsaf_h5.py index d2294ec8fd..66ec0de1f5 100644 --- a/satpy/readers/hsaf_h5.py +++ b/satpy/readers/hsaf_h5.py @@ -26,7 +26,7 @@ import numpy as np import xarray as xr -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.readers.core.file_handlers import BaseFileHandler from satpy.utils import get_legacy_chunk_size diff --git a/satpy/readers/li_l2_nc.py b/satpy/readers/li_l2_nc.py index 8f75e8b527..b477349142 100644 --- a/satpy/readers/li_l2_nc.py +++ b/satpy/readers/li_l2_nc.py @@ -80,7 +80,7 @@ import numpy as np import xarray as xr -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.readers.core.li_nc import LINCFileHandler from satpy.utils import get_legacy_chunk_size diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index b8e5c554d6..df9508fac3 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -143,10 +143,10 @@ >>> from pyresample import load_area >>> my_area = load_area('my_areas.yaml', 'my_area') -Or using :func:`satpy.area_utils.get_area_def`, which will search through all +Or using :func:`satpy.area.get_area_def`, which will search through all ``areas.yaml`` files in your ``SATPY_CONFIG_PATH``:: - >>> from satpy.area_utils import get_area_def + >>> from satpy.area import get_area_def >>> area_eurol = get_area_def("eurol") For examples of area definitions, see the file ``etc/areas.yaml`` that is @@ -177,8 +177,8 @@ "resample": "satpy.resample.base", "prepare_resampler": "satpy.resample.base", "resample_dataset": "satpy.resample.base", - "get_area_file": "satpy.area_utils", - "get_area_def": "satpy.area_utils", + "get_area_file": "satpy.area", + "get_area_def": "satpy.area", "add_xy_coords": "satpy.coords_utils", "add_crs_xy_coords": "satpy.coords_utils", } diff --git a/satpy/scene.py b/satpy/scene.py index a724824be3..cd47160f9c 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -29,7 +29,7 @@ from pyresample.geometry import AreaDefinition, BaseDefinition, CoordinateDefinition, SwathDefinition from xarray import DataArray -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.composites import IncompatibleAreas from satpy.composites.config_loader import load_compositor_configs_for_sensors from satpy.dataset import DataID, DataQuery, DatasetDict, combine_metadata, dataset_walker, replace_anc diff --git a/satpy/tests/reader_tests/test_hsaf_h5.py b/satpy/tests/reader_tests/test_hsaf_h5.py index c065cb2a0b..f94dcb2d14 100644 --- a/satpy/tests/reader_tests/test_hsaf_h5.py +++ b/satpy/tests/reader_tests/test_hsaf_h5.py @@ -8,7 +8,7 @@ import pytest from satpy import Scene -from satpy.area_utils import get_area_def +from satpy.area import get_area_def # real shape is 916, 1902 SHAPE_SC = (916, 1902) diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 48cff21db1..7a8f85cdea 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -1405,7 +1405,7 @@ def test_add_bands_p_l(self): class TestStaticImageCompositor(unittest.TestCase): """Test case for the static compositor.""" - @mock.patch("satpy.area_utils.get_area_def") + @mock.patch("satpy.area.get_area_def") def test_init(self, get_area_def): """Test the initializiation of static compositor.""" from satpy.composites import StaticImageCompositor diff --git a/satpy/tests/test_config.py b/satpy/tests/test_config.py index 1393b15f34..f1f518030f 100644 --- a/satpy/tests/test_config.py +++ b/satpy/tests/test_config.py @@ -50,7 +50,7 @@ def test_areas_pyproj(self): from pyresample import parse_area_file from pyresample.geometry import SwathDefinition - from satpy.area_utils import get_area_file + from satpy.area import get_area_file lons = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) lats = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) @@ -82,7 +82,7 @@ def test_areas_rasterio(self): from pyresample import parse_area_file from pyresample.geometry import SwathDefinition - from satpy.area_utils import get_area_file + from satpy.area import get_area_file lons = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) lats = np.array([[0, 0.1, 0.2], [0.05, 0.15, 0.25]]) diff --git a/satpy/tests/test_testing.py b/satpy/tests/test_testing.py index 36bd973a9c..291eb3225c 100644 --- a/satpy/tests/test_testing.py +++ b/satpy/tests/test_testing.py @@ -4,7 +4,7 @@ import xarray as xr from satpy import Scene -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.testing import fake_satpy_reading diff --git a/satpy/writers/__init__.py b/satpy/writers/__init__.py index 141bee8156..a09cdf6130 100644 --- a/satpy/writers/__init__.py +++ b/satpy/writers/__init__.py @@ -33,7 +33,7 @@ from yaml import UnsafeLoader from satpy._config import config_search_paths, get_entry_points_config_dirs, glob_config -from satpy.area_utils import get_area_def +from satpy.area import get_area_def from satpy.aux_download import DataDownloadMixin from satpy.enhancements.enhancer import Enhancer from satpy.plugin_base import Plugin From 0b2be453b412f74db7b4c39ee9ac21372d87ff39 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 30 May 2025 13:00:29 +0300 Subject: [PATCH 28/30] Rename coords_utils.py to coords.py --- satpy/{coords_utils.py => coords.py} | 0 satpy/readers/core/yaml_reader.py | 2 +- satpy/resample/__init__.py | 4 ++-- satpy/resample/base.py | 2 +- satpy/tests/cf_tests/test_dataaarray.py | 2 +- satpy/tests/test_composites.py | 2 +- satpy/tests/{test_coords_utils.py => test_coords.py} | 4 ++-- satpy/tests/utils.py | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) rename satpy/{coords_utils.py => coords.py} (100%) rename satpy/tests/{test_coords_utils.py => test_coords.py} (97%) diff --git a/satpy/coords_utils.py b/satpy/coords.py similarity index 100% rename from satpy/coords_utils.py rename to satpy/coords.py diff --git a/satpy/readers/core/yaml_reader.py b/satpy/readers/core/yaml_reader.py index 82c057af03..1982aac346 100644 --- a/satpy/readers/core/yaml_reader.py +++ b/satpy/readers/core/yaml_reader.py @@ -44,7 +44,7 @@ from satpy._compat import cache from satpy.area import get_area_def from satpy.aux_download import DataDownloadMixin -from satpy.coords_utils import add_crs_xy_coords +from satpy.coords import add_crs_xy_coords from satpy.dataset import DataID, DataQuery, get_key from satpy.dataset.dataid import default_co_keys_config, default_id_keys_config, get_keys_from_config from satpy.utils import recursive_dict_update diff --git a/satpy/resample/__init__.py b/satpy/resample/__init__.py index df9508fac3..aa80f0c758 100644 --- a/satpy/resample/__init__.py +++ b/satpy/resample/__init__.py @@ -179,8 +179,8 @@ "resample_dataset": "satpy.resample.base", "get_area_file": "satpy.area", "get_area_def": "satpy.area", - "add_xy_coords": "satpy.coords_utils", - "add_crs_xy_coords": "satpy.coords_utils", + "add_xy_coords": "satpy.coords", + "add_crs_xy_coords": "satpy.coords", } def __getattr__(name: str) -> Any: diff --git a/satpy/resample/base.py b/satpy/resample/base.py index fe4f0d9605..5b451c62e1 100644 --- a/satpy/resample/base.py +++ b/satpy/resample/base.py @@ -52,7 +52,7 @@ def _update_resampled_coords(old_data, new_data, new_area): for the newly resampled data. """ - from satpy.coords_utils import add_crs_xy_coords + from satpy.coords import add_crs_xy_coords # copy over other non-x/y coordinates # this *MUST* happen before we set 'crs' below otherwise any 'crs' diff --git a/satpy/tests/cf_tests/test_dataaarray.py b/satpy/tests/cf_tests/test_dataaarray.py index c8696c58f3..4d09959ac0 100644 --- a/satpy/tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/cf_tests/test_dataaarray.py @@ -50,7 +50,7 @@ def test_make_cf_dataarray_lonlat(): from pyresample import create_area_def from satpy.cf.data_array import make_cf_data_array - from satpy.coords_utils import add_crs_xy_coords + from satpy.coords import add_crs_xy_coords area = create_area_def("mavas", 4326, shape=(5, 5), center=(0, 0), resolution=(1, 1)) diff --git a/satpy/tests/test_composites.py b/satpy/tests/test_composites.py index 7a8f85cdea..fa44ea5215 100644 --- a/satpy/tests/test_composites.py +++ b/satpy/tests/test_composites.py @@ -145,7 +145,7 @@ def test_almost_equal_geo_coordinates(self): """ from satpy.composites import CompositeBase - from satpy.coords_utils import add_crs_xy_coords + from satpy.coords import add_crs_xy_coords comp = CompositeBase("test_comp") data_arr1 = self._get_test_ds(shape=(2, 2)) diff --git a/satpy/tests/test_coords_utils.py b/satpy/tests/test_coords.py similarity index 97% rename from satpy/tests/test_coords_utils.py rename to satpy/tests/test_coords.py index 4a30ab7c88..f8ab084b2f 100644 --- a/satpy/tests/test_coords_utils.py +++ b/satpy/tests/test_coords.py @@ -31,7 +31,7 @@ def test_area_def_coordinates(self): """Test coordinates being added with an AreaDefinition.""" from pyresample.geometry import AreaDefinition - from satpy.coords_utils import add_crs_xy_coords + from satpy.coords import add_crs_xy_coords area_def = AreaDefinition( "test", "test", "test", {"proj": "lcc", "lat_1": 25, "lat_0": 25}, 100, 200, [-100, -100, 100, 100] @@ -97,7 +97,7 @@ def test_swath_def_coordinates(self): """Test coordinates being added with an SwathDefinition.""" from pyresample.geometry import SwathDefinition - from satpy.coords_utils import add_crs_xy_coords + from satpy.coords import add_crs_xy_coords lons_data = da.random.random((200, 100), chunks=50) lats_data = da.random.random((200, 100), chunks=50) lons = xr.DataArray(lons_data, attrs={"units": "degrees_east"}, diff --git a/satpy/tests/utils.py b/satpy/tests/utils.py index 867f0de1da..a7e94bb162 100644 --- a/satpy/tests/utils.py +++ b/satpy/tests/utils.py @@ -378,7 +378,7 @@ def _get_fake_scene_area(arr, area): def _get_did_for_fake_scene(area, arr, extra_attrs, daskify): """Add instance to fake scene. Helper for make_fake_scene.""" - from satpy.coords_utils import add_crs_xy_coords + from satpy.coords import add_crs_xy_coords if isinstance(arr, DataArray): new = arr.copy() # don't change attributes of input new.attrs.update(extra_attrs) From 7ff4f6df8d4dcc81db0aefecd5dd57c105712305 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 30 May 2025 15:29:27 +0300 Subject: [PATCH 29/30] Move resample_dataset and prepare_rempler import to functions --- satpy/scene.py | 5 ++++- satpy/tests/scene_tests/test_resampling.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/satpy/scene.py b/satpy/scene.py index cd47160f9c..e8de5586cc 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -36,7 +36,6 @@ from satpy.dependency_tree import DependencyTree from satpy.node import CompositorNode, MissingDependencies, ReaderNode from satpy.readers.core.loading import load_readers -from satpy.resample.base import prepare_resampler, resample_dataset from satpy.utils import convert_remote_files_to_fsspec, get_storage_options_from_reader_kwargs from satpy.writers import load_writer @@ -870,6 +869,8 @@ def _resampled_scene(self, new_scn, destination_area, reduce_data=True, If data reduction is enabled, some local caching is perfomed in order to avoid recomputation of area intersections. """ + from satpy.resample.base import resample_dataset + new_datasets = {} datasets = list(new_scn._datasets.values()) destination_area = self._get_finalized_destination_area(destination_area, new_scn) @@ -919,6 +920,8 @@ def _get_finalized_destination_area(self, destination_area, new_scn): return destination_area def _prepare_resampler(self, source_area, destination_area, resamplers, resample_kwargs): + from satpy.resample.base import prepare_resampler + if source_area not in resamplers: key, resampler = prepare_resampler( source_area, destination_area, **resample_kwargs) diff --git a/satpy/tests/scene_tests/test_resampling.py b/satpy/tests/scene_tests/test_resampling.py index d59019e3f7..e4f4c67732 100644 --- a/satpy/tests/scene_tests/test_resampling.py +++ b/satpy/tests/scene_tests/test_resampling.py @@ -194,7 +194,7 @@ def _fake_resample_dataset_force_20x20(self, dataset, dest_area, **kwargs): attrs=attrs, ) - @mock.patch("satpy.scene.resample_dataset") + @mock.patch("satpy.resample.base.resample_dataset") @pytest.mark.parametrize("datasets", [ None, ("comp13", "ds5", "ds2"), @@ -246,7 +246,7 @@ def test_resample_scene_copy(self, rs, datasets): assert loaded_ids[0] == make_cid(name="comp19") assert loaded_ids[1] == make_cid(name="new_ds") - @mock.patch("satpy.scene.resample_dataset") + @mock.patch("satpy.resample.base.resample_dataset") def test_resample_scene_preserves_requested_dependencies(self, rs): """Test that the Scene is properly copied during resampling. @@ -276,7 +276,7 @@ def test_resample_scene_preserves_requested_dependencies(self, rs): assert "comp26" in new_scene_2 assert "ds1" not in new_scene_2 # unloaded - @mock.patch("satpy.scene.resample_dataset") + @mock.patch("satpy.resample.base.resample_dataset") def test_resample_reduce_data_toggle(self, rs): """Test that the Scene can be reduced or not reduced during resampling.""" from pyresample.geometry import AreaDefinition @@ -402,7 +402,7 @@ def test_resample_reduce_data(self): assert new_scene2["comp19"].shape == (20, 20, 3) assert new_scene3["comp19"].shape == (20, 20, 3) - @mock.patch("satpy.scene.resample_dataset") + @mock.patch("satpy.resample.base.resample_dataset") def test_no_generate_comp10(self, rs): """Test generating a composite after loading.""" from pyresample.geometry import AreaDefinition From 80296ee364413247e0914e02f764e2c9d19f8f39 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 30 May 2025 17:06:18 +0300 Subject: [PATCH 30/30] Add a test for warnings issued when importing directly from satpy.resample --- satpy/tests/test_resample.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index a4451ad333..ff72285b1a 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -681,3 +681,28 @@ def test_resample(self, pyresample_bucket): assert "categories" in res.coords assert "categories" in res.dims assert np.all(res.coords["categories"] == np.array([0, 1, 2])) + + +@pytest.mark.parametrize("name", + ["KDTreeResampler", + "BilinearResampler", + "NativeResampler", + "BucketResamplerBase", + "BucketAvg", + "BucketSum", + "BucketCount", + "BucketFraction", + "resample", + "prepare_resampler", + "resample_dataset", + "get_area_file", + "get_area_def", + "add_xy_coords", + "add_crs_xy_coords", + ] + ) +def test_moved_import_warns(name): + """Test that imports done directly from satpy.resample sub-package issue a warning.""" + import satpy.resample + with pytest.warns(UserWarning): + _ = getattr(satpy.resample, name)