From 2874f07393e5fa0fc7a51ea88827ede6cad801ad Mon Sep 17 00:00:00 2001 From: Knut-Frode Dagestad Date: Wed, 13 Nov 2024 14:04:10 +0100 Subject: [PATCH 1/4] Making attribute names more precise: obsdim->obs_dimname, timedim->time_varname --- .../example_find_positions_at_obs_times.py | 2 +- tests/test_read_sfy.py | 4 +- trajan/accessor.py | 48 ++++++------ trajan/ragged.py | 22 +++--- trajan/traj.py | 58 +++++++-------- trajan/traj1d.py | 56 +++++++------- trajan/traj2d.py | 74 +++++++++---------- 7 files changed, 130 insertions(+), 134 deletions(-) diff --git a/examples/example_find_positions_at_obs_times.py b/examples/example_find_positions_at_obs_times.py index 11b6fd2..a117986 100644 --- a/examples/example_find_positions_at_obs_times.py +++ b/examples/example_find_positions_at_obs_times.py @@ -25,7 +25,7 @@ def gridwaves(tds): t = tds[['lat', 'lon', 'time']].traj.gridtime(tds['time_waves_imu'].squeeze()) - return t.traj.to_2d(obsdim='obs_waves_imu') + return t.traj.to_2d(obs_dimname='obs_waves_imu') dsw = ds.groupby('trajectory').map(gridwaves) diff --git a/tests/test_read_sfy.py b/tests/test_read_sfy.py index 9fc2fac..0277232 100644 --- a/tests/test_read_sfy.py +++ b/tests/test_read_sfy.py @@ -8,8 +8,8 @@ def test_interpret_sfy(test_data): ds = xr.open_dataset(test_data / 'bug32.nc') print(ds) - assert ds.traj.obsdim == 'package' - assert ds.traj.timedim == 'position_time' + assert ds.traj.obs_dimname == 'package' + assert ds.traj.time_varname == 'position_time' assert ds.traj.is_2d() diff --git a/trajan/accessor.py b/trajan/accessor.py index 000c9cf..1b53805 100644 --- a/trajan/accessor.py +++ b/trajan/accessor.py @@ -26,10 +26,10 @@ def detect_tx_dim(ds): raise ValueError("Could not determine x / lon variable") -def detect_time_dim(ds, obsdim): - logger.debug(f'Detecting time-dimension for "{obsdim}"..') +def detect_time_dim(ds, obs_dimname): + logger.debug(f'Detecting time-dimension for "{obs_dimname}"..') for v in ds.variables: - if obsdim in ds[v].dims and 'time' in v: + if obs_dimname in ds[v].dims and 'time' in v: return v raise ValueError("no time dimension detected") @@ -47,8 +47,8 @@ def __new__(cls, ds): ds = ds.expand_dims({'trajectory': 1}) ds['trajectory'].attrs['cf_role'] = 'trajectory_id' - obsdim = None - timedim = None + obs_dimname = None + time_varname = None tx = detect_tx_dim(ds) @@ -62,7 +62,7 @@ def __new__(cls, ds): # NOTE: this is probably not standard; something to point to the CF conventions? # NOTE: for now, there is no discovery of the "index" dim, this is hardcorded; any way to do better? if "index" in tx.dims: - obsdim = "index" + obs_dimname = "index" # discover the timecoord variable name ####################### # find all variables with standard_name "time" @@ -90,14 +90,14 @@ def __new__(cls, ds): else: raise ValueError(f"cannot deduce rowsizevar; we have the following candidates: {with_dim_trajectory = }") # sanity check - if not np.sum(ds[rowsizevar].to_numpy()) == len(ds[obsdim]): + if not np.sum(ds[rowsizevar].to_numpy()) == len(ds[obs_dimname]): raise ValueError("mismatch between the index length and the sum of the deduced trajectory lengths") logger.debug( - f"1D storage dataset; detected: {obsdim = }, {timecoord = }, {trajectorycoord = }, {rowsizevar}" + f"1D storage dataset; detected: {obs_dimname = }, {timecoord = }, {trajectorycoord = }, {rowsizevar}" ) - return ocls(ds, obsdim, timecoord, trajectorycoord, rowsizevar) + return ocls(ds, obs_dimname, timecoord, trajectorycoord, rowsizevar) else: logging.warning(f"{ds} has {tx.dims = } which is of dimension 1 but is not index; this is a bit unusual; try to parse with Traj1d or Traj2d") @@ -105,16 +105,16 @@ def __new__(cls, ds): # we have a ds where 2D arrays are used to store data, this is either Traj1d or Traj2d # there may also be some slightly unusual cases where these Traj1d and Traj2d classes will be used on data with 1D arrays if 'obs' in tx.dims: - obsdim = 'obs' - timedim = detect_time_dim(ds, obsdim) + obs_dimname = 'obs' + time_varname = detect_time_dim(ds, obs_dimname) elif 'index' in tx.dims: - obsdim = 'obs' - timedim = detect_time_dim(ds, obsdim) + obs_dimname = 'obs' + time_varname = detect_time_dim(ds, obs_dimname) elif 'time' in tx.dims: - obsdim = 'time' - timedim = 'time' + obs_dimname = 'time' + time_varname = 'time' else: for d in tx.dims: @@ -122,31 +122,31 @@ def __new__(cls, ds): 'cf_role', None) == 'trajectory_id' and not 'traj' in d: - obsdim = d - timedim = detect_time_dim(ds, obsdim) + obs_dimname = d + time_varname = detect_time_dim(ds, obs_dimname) break - if obsdim is None: + if obs_dimname is None: logger.warning('No time or obs dimension detected.') logger.debug( - f"Detected obs-dim: {obsdim}, detected time-dim: {timedim}.") + f"Detected obs-dim: {obs_dimname}, detected time-dim: {time_varname}.") - if obsdim is None: + if obs_dimname is None: ocls = Traj1d - elif len(ds[timedim].shape) <= 1: + elif len(ds[time_varname].shape) <= 1: logger.debug('Detected structured (1D) trajectory dataset') ocls = Traj1d - elif len(ds[timedim].shape) == 2: + elif len(ds[time_varname].shape) == 2: logger.debug('Detected un-structured (2D) trajectory dataset') ocls = Traj2d else: raise ValueError( - f'Time dimension has shape greater than 2: {ds["timedim"].shape}' + f'Time dimension has shape greater than 2: {ds["time_varname"].shape}' ) - return ocls(ds, obsdim, timedim) + return ocls(ds, obs_dimname, time_varname) diff --git a/trajan/ragged.py b/trajan/ragged.py index 39e8b21..9e2970d 100644 --- a/trajan/ragged.py +++ b/trajan/ragged.py @@ -14,12 +14,12 @@ class ContiguousRagged(Traj): trajdim: str rowvar: str - def __init__(self, ds, obsdim, timedim, trajectorycoord, rowsizevar): + def __init__(self, ds, obs_dimname, time_varname, trajectorycoord, rowsizevar): self.trajdim = trajectorycoord self.rowvar = rowsizevar - super().__init__(ds, obsdim, timedim) + super().__init__(ds, obs_dimname, time_varname) - def to_2d(self, obsdim='obs'): + def to_2d(self, obs_dimname='obs'): """This actually converts a contiguous ragged xarray Dataset into an xarray Dataset that follows the Traj2d conventions.""" global_attrs = self.ds.attrs @@ -45,7 +45,7 @@ def to_2d(self, obsdim='obs'): self.ds[self.rowvar].to_numpy()): end_index = start_index + crrt_rowsize array_time[crrt_index, :crrt_rowsize] = self.ds[ - self.timedim][start_index:end_index] + self.time_varname][start_index:end_index] start_index = end_index # it seems that we need to build the "backbone" of the Dataset independently first @@ -70,7 +70,7 @@ def to_2d(self, obsdim='obs'): # trajectory vars 'time': - xr.DataArray(dims=["trajectory", obsdim], + xr.DataArray(dims=["trajectory", obs_dimname], data=array_time, attrs={ "standard_name": "time", @@ -81,7 +81,7 @@ def to_2d(self, obsdim='obs'): # now add all "normal" variables # NOTE: for now, we only consider scalar vars; if we want to consider more complex vars (e.g., spectra), this will need updated - # NOTE: such an update would typically need to look at the dims of the variable, and if there are additional dims to obsdim, create a higer dim variable + # NOTE: such an update would typically need to look at the dims of the variable, and if there are additional dims to obs_dimname, create a higer dim variable for crrt_data_var in self.ds.data_vars: attrs = self.ds[crrt_data_var].attrs @@ -90,9 +90,9 @@ def to_2d(self, obsdim='obs'): continue if len(self.ds[crrt_data_var].dims - ) != 1 or self.ds[crrt_data_var].dims[0] != self.obsdim: + ) != 1 or self.ds[crrt_data_var].dims[0] != self.obs_dimname: raise ValueError( - f"data_vars element {crrt_data_var} has dims {self.ds[crrt_data_var].dims}, expected {(self.obsdim,)}" + f"data_vars element {crrt_data_var} has dims {self.ds[crrt_data_var].dims}, expected {(self.obs_dimname,)}" ) crrt_var = np.full((nbr_trajectories, longest_trajectory), np.nan) @@ -119,7 +119,7 @@ def to_2d(self, obsdim='obs'): crrt_data_var = "lat" ds_converted_to_traj2d[crrt_data_var] = \ - xr.DataArray(dims=["trajectory", obsdim], + xr.DataArray(dims=["trajectory", obs_dimname], data=crrt_var, attrs=attrs) @@ -140,6 +140,6 @@ def plot(self) -> Plot: def timestep(self, average=np.median): return self.to_2d().traj.timestep(average) - def gridtime(self, times, timedim=None, round=True): - return self.to_2d().traj.gridtime(times, timedim, round) + def gridtime(self, times, time_varname=None, round=True): + return self.to_2d().traj.gridtime(times, time_varname, round) diff --git a/trajan/traj.py b/trajan/traj.py index 27e8ec5..c035bcb 100644 --- a/trajan/traj.py +++ b/trajan/traj.py @@ -33,10 +33,10 @@ def detect_tx_dim(ds): raise ValueError("Could not determine x / lon variable") -def detect_time_dim(ds, obsdim): - logger.debug(f'Detecting time-dimension for "{obsdim}"..') +def detect_time_dim(ds, obs_dimname): + logger.debug(f'Detecting time-dimension for "{obs_dimname}"..') for v in ds.variables: - if obsdim in ds[v].dims and 'time' in v: + if obs_dimname in ds[v].dims and 'time' in v: return v raise ValueError("no time dimension detected") @@ -50,19 +50,13 @@ class Traj: __gcrs__: pyproj.CRS - obsdim: str - """ - Name of the dimension along which observations are taken. Usually either `obs` or `time`. - """ - timedim: str - - def __init__(self, ds, obsdim, timedim): + def __init__(self, ds, obs_dimname, time_varname): self.ds = ds self.__plot__ = None self.__animate__ = None self.__gcrs__ = pyproj.CRS.from_epsg(4326) - self.obsdim = obsdim - self.timedim = timedim + self.obs_dimname = obs_dimname # dimension along which time increases + self.time_varname = time_varname def __repr__(self): output = '=======================\n' @@ -70,8 +64,10 @@ def __repr__(self): output += '------------\n' output += f'{self.ds.sizes["trajectory"]} trajectories\n' if 'time' in self.ds.variables: - if self.timedim in self.ds.sizes: - output += f'{self.ds.sizes[self.timedim]} timesteps\n' + if self.time_varname in self.ds.sizes: + output += f'{self.ds.sizes[self.time_varname]} timesteps' + timevar = self.ds[self.time_varname] + output += f' {timevar.name}{list(timevar.sizes)} ({len(timevar.sizes)}D)\n' try: timestep = self.timestep() timestep = timedelta(seconds=int(timestep)) @@ -532,18 +528,18 @@ def distance_to_next(self): lon = self.ds.lon lat = self.ds.lat - lenobs = self.ds.sizes[self.obsdim] - lonfrom = lon.isel({self.obsdim: slice(0, lenobs - 1)}) - latfrom = lat.isel({self.obsdim: slice(0, lenobs - 1)}) - lonto = lon.isel({self.obsdim: slice(1, lenobs)}) - latto = lat.isel({self.obsdim: slice(1, lenobs)}) + lenobs = self.ds.sizes[self.obs_dimname] + lonfrom = lon.isel({self.obs_dimname: slice(0, lenobs - 1)}) + latfrom = lat.isel({self.obs_dimname: slice(0, lenobs - 1)}) + lonto = lon.isel({self.obs_dimname: slice(1, lenobs)}) + latto = lat.isel({self.obs_dimname: slice(1, lenobs)}) geod = pyproj.Geod(ellps='WGS84') azimuth_forward, a2, distance = geod.inv(lonfrom, latfrom, lonto, latto) distance = xr.DataArray(distance, coords=lonfrom.coords, dims=lon.dims) - distance = xr.concat((distance, distance.isel({self.obsdim: -1})), - dim=self.obsdim) # repeating last time step to + distance = xr.concat((distance, distance.isel({self.obs_dimname: -1})), + dim=self.obs_dimname) # repeating last time step to return distance def azimuth_to_next(self): @@ -564,11 +560,11 @@ def azimuth_to_next(self): # TODO: method is almost duplicate of "distance_to_next" above lon = self.ds.lon lat = self.ds.lat - lenobs = self.ds.dims[self.obsdim] - lonfrom = lon.isel({self.obsdim: slice(0, lenobs - 1)}) - latfrom = lat.isel({self.obsdim: slice(0, lenobs - 1)}) - lonto = lon.isel({self.obsdim: slice(1, lenobs)}) - latto = lat.isel({self.obsdim: slice(1, lenobs)}) + lenobs = self.ds.dims[self.obs_dimname] + lonfrom = lon.isel({self.obs_dimname: slice(0, lenobs - 1)}) + latfrom = lat.isel({self.obs_dimname: slice(0, lenobs - 1)}) + lonto = lon.isel({self.obs_dimname: slice(1, lenobs)}) + latto = lat.isel({self.obs_dimname: slice(1, lenobs)}) geod = pyproj.Geod(ellps='WGS84') azimuth_forward, a2, distance = geod.inv(lonfrom, latfrom, lonto, latto) @@ -577,8 +573,8 @@ def azimuth_to_next(self): coords=lonfrom.coords, dims=lon.dims) azimuth_forward = xr.concat( - (azimuth_forward, azimuth_forward.isel({self.obsdim: -1})), - dim=self.obsdim) # repeating last time step to + (azimuth_forward, azimuth_forward.isel({self.obs_dimname: -1})), + dim=self.obs_dimname) # repeating last time step to return azimuth_forward def velocity_components(self): @@ -679,7 +675,7 @@ def get_area_convex_hull(self): return np.array(hull.volume) # volume=area for 2D as here @abstractmethod - def gridtime(self, times, timedim=None) -> xr.Dataset: + def gridtime(self, times, time_varname=None) -> xr.Dataset: """Interpolate dataset to a regular time interval or a different grid. Parameters @@ -689,7 +685,7 @@ def gridtime(self, times, timedim=None) -> xr.Dataset: - an array of times, or - a string specifying a Pandas daterange (e.g. 'h', '6h, 'D'...) suitable for `pd.date_range`. - timedim : str + time_varname : str Name of new time dimension. The default is to use the same name as previously. Returns @@ -894,7 +890,7 @@ def condense_obs(self) -> xr.Dataset: """ @abstractmethod - def to_2d(self, obsdim='obs') -> xr.Dataset: + def to_2d(self, obs_dimname='obs') -> xr.Dataset: """ Convert dataset into a 2D dataset from. """ diff --git a/trajan/traj1d.py b/trajan/traj1d.py index a7dfe3d..6ac717a 100644 --- a/trajan/traj1d.py +++ b/trajan/traj1d.py @@ -14,8 +14,8 @@ class Traj1d(Traj): A structured dataset, where each trajectory is always given at the same times. Typically the output from a model or from a gridded dataset. """ - def __init__(self, ds, obsdim, timedim): - super().__init__(ds, obsdim, timedim) + def __init__(self, ds, obs_dimname, time_varname): + super().__init__(ds, obs_dimname, time_varname) def timestep(self): """Time step between observations in seconds.""" @@ -28,14 +28,14 @@ def is_1d(self): def is_2d(self): return False - def to_2d(self, obsdim='obs'): + def to_2d(self, obs_dimname='obs'): ds = self.ds.copy() - time = ds[self.timedim].rename({ - self.timedim: obsdim + time = ds[self.time_varname].rename({ + self.time_varname: obs_dimname }).expand_dims(dim={'trajectory': ds.sizes['trajectory']}) - ds = ds.rename({self.timedim: obsdim}) - ds[self.timedim] = time - ds[obsdim] = np.arange(0, ds.sizes[obsdim]) + ds = ds.rename({self.time_varname: obs_dimname}) + ds[self.time_varname] = time + ds[obs_dimname] = np.arange(0, ds.sizes[obs_dimname]) return ds @@ -101,8 +101,8 @@ def skill(self, other, method='liu-weissberg', **kwargs): ) diff = np.max( - np.abs((self.ds[self.obsdim] - - other[other.traj.obsdim]).astype('timedelta64[s]').astype( + np.abs((self.ds[self.obs_dimname] - + other[other.traj.obs_dimname]).astype('timedelta64[s]').astype( np.float64))) if not np.isclose(diff, 0): @@ -112,11 +112,11 @@ def skill(self, other, method='liu-weissberg', **kwargs): s = np.zeros((self.ds.sizes['trajectory']), dtype=np.float32) - # ds = self.ds.dropna(dim=self.obsdim) - # other = other.dropna(dim=other.traj.obsdim) + # ds = self.ds.dropna(dim=self.obs_dimname) + # other = other.dropna(dim=other.traj.obs_dimname) - ds = self.ds.transpose('trajectory', self.obsdim, ...) - other = other.transpose('trajectory', other.traj.obsdim, ...) + ds = self.ds.transpose('trajectory', self.obs_dimname, ...) + other = other.transpose('trajectory', other.traj.obs_dimname, ...) lon0 = ds.traj.tlon lat0 = ds.traj.tlat @@ -138,12 +138,12 @@ def skill(self, other, method='liu-weissberg', **kwargs): attrs={'method': method}) def seltime(self, t0=None, t1=None): - return self.ds.sel({self.timedim: slice(t0, t1)}) + return self.ds.sel({self.time_varname: slice(t0, t1)}) def iseltime(self, i): - return self.ds.isel({self.timedim: i}) + return self.ds.isel({self.time_varname: i}) - def gridtime(self, times, timedim=None, round=True): + def gridtime(self, times, time_varname=None, round=True): if isinstance(times, str) or isinstance( times, pd.Timedelta): # Make time series with given interval if round is True: @@ -161,27 +161,27 @@ def gridtime(self, times, timedim=None, round=True): if not isinstance(times, np.ndarray): times = times.to_numpy() - timedim = self.timedim if timedim is None else timedim + time_varname = self.time_varname if time_varname is None else time_varname ds = self.ds - if self.obsdim != timedim: + if self.obs_dimname != time_varname: ds = ds.rename({ - self.obsdim: timedim - }).set_index({timedim: timedim}) + self.obs_dimname: time_varname + }).set_index({time_varname: time_varname}) - _, ui = np.unique(ds[timedim], return_index=True) + _, ui = np.unique(ds[time_varname], return_index=True) - if len(ui) != len(self.ds[timedim]): + if len(ui) != len(self.ds[time_varname]): logger.warning('non-unique time points, dropping time-duplicates') - ds = ds.isel({timedim: ui}) - ds = ds.isel({timedim: np.where(~pd.isna(ds[timedim].values))[0]}) + ds = ds.isel({time_varname: ui}) + ds = ds.isel({time_varname: np.where(~pd.isna(ds[time_varname].values))[0]}) - if ds.sizes[timedim] > 0: - ds = ds.interp({timedim: times}) + if ds.sizes[time_varname] > 0: + ds = ds.interp({time_varname: times}) else: - logger.warning(f"time dimension ({timedim}) is zero size") + logger.warning(f"time dimension ({time_varname}) is zero size") if not 'trajectory' in ds.dims: ds = ds.expand_dims('trajectory') diff --git a/trajan/traj2d.py b/trajan/traj2d.py index 1f763df..f7466c7 100644 --- a/trajan/traj2d.py +++ b/trajan/traj2d.py @@ -8,11 +8,11 @@ logger = logging.getLogger(__name__) -def __require_obsdim__(f): +def __require_obs_dimname__(f): """This decorator is for methods of Traj that require a time or obs dimension to work.""" def wrapper(*args, **kwargs): - if args[0].obsdim is None: + if args[0].obs_dimname is None: raise ValueError(f'{f} requires an obs or time dimension') return f(*args, **kwargs) @@ -24,8 +24,8 @@ class Traj2d(Traj): A unstructured dataset, where each trajectory may have observations at different times. Typically from a collection of drifters. """ - def __init__(self, ds, obsdim, timedim): - super().__init__(ds, obsdim, timedim) + def __init__(self, ds, obs_dimname, time_varname): + super().__init__(ds, obs_dimname, time_varname) def timestep(self, average=np.nanmedian): """ @@ -132,30 +132,30 @@ def drop_where(self, condition): return xr.concat(trajs, dim='trajectory') - @__require_obsdim__ + @__require_obs_dimname__ def condense_obs(self) -> xr.Dataset: - on = self.ds.sizes[self.obsdim] + on = self.ds.sizes[self.obs_dimname] logger.debug(f'Condensing {on} observations.') ds = self.ds.copy(deep=True) # The observation coordinate will be re-written - ds = ds.drop_vars([self.obsdim]) + ds = ds.drop_vars([self.obs_dimname]) - assert self.obsdim in ds[ - self.timedim].dims, "observation not a coordinate of time variable" + assert self.obs_dimname in ds[ + self.time_varname].dims, "observation not a coordinate of time variable" # Move all observations for each trajectory to starting row maxN = 0 for ti in range(len(ds.trajectory)): obsvars = [ - var for var in ds.variables if self.obsdim in ds[var].dims + var for var in ds.variables if self.obs_dimname in ds[var].dims ] iv = np.full(on, False) for var in obsvars: ivv = ~pd.isnull( - ds[self.timedim][ti, :]) # valid times in this trajectory. + ds[self.time_varname][ti, :]) # valid times in this trajectory. iv = np.logical_or(iv, ivv) N = np.count_nonzero(iv) @@ -178,46 +178,46 @@ def condense_obs(self) -> xr.Dataset: # ), "Varying number of valid observations within same trajectory." logger.debug(f'Condensed observations from: {on} to {maxN}') - ds = ds.isel({self.obsdim: slice(0, maxN)}) + ds = ds.isel({self.obs_dimname: slice(0, maxN)}) # Write new observation coordinate. obs = np.arange(0, maxN) - ds = ds.assign_coords({self.obsdim: obs}) + ds = ds.assign_coords({self.obs_dimname: obs}) return ds - @__require_obsdim__ + @__require_obs_dimname__ def seltime(self, t0=None, t1=None): if t0 is None: - t0 = np.nanmin(self.ds[self.timedim].values.ravel()) + t0 = np.nanmin(self.ds[self.time_varname].values.ravel()) if t1 is None: - t1 = np.nanmax(self.ds[self.timedim].values.ravel()) + t1 = np.nanmax(self.ds[self.time_varname].values.ravel()) t0 = pd.to_datetime(t0) t1 = pd.to_datetime(t1) - return self.ds.where(np.logical_and(self.ds[self.timedim] >= t0, - self.ds[self.timedim] <= t1), + return self.ds.where(np.logical_and(self.ds[self.time_varname] >= t0, + self.ds[self.time_varname] <= t1), drop=True) - @__require_obsdim__ + @__require_obs_dimname__ def iseltime(self, i): def select(t): - ii = np.argwhere(~pd.isna(t[self.timedim]).squeeze()) + ii = np.argwhere(~pd.isna(t[self.time_varname]).squeeze()) ii = ii[i].squeeze() - o = t.isel({self.obsdim: ii}) + o = t.isel({self.obs_dimname: ii}) - if self.obsdim in o.dims: + if self.obs_dimname in o.dims: return o else: - return o.expand_dims(self.obsdim) + return o.expand_dims(self.obs_dimname) return self.ds.groupby('trajectory').map(select) - @__require_obsdim__ - def gridtime(self, times, timedim=None, round=True): + @__require_obs_dimname__ + def gridtime(self, times, time_varname=None, round=True): if isinstance(times, str) or isinstance( times, pd.Timedelta): # Make time series with given interval if round is True: @@ -235,27 +235,27 @@ def gridtime(self, times, timedim=None, round=True): if not isinstance(times, np.ndarray): times = times.to_numpy() - timedim = self.timedim if timedim is None else timedim + time_varname = self.time_varname if time_varname is None else time_varname d = None for t in range(self.ds.sizes['trajectory']): dt = self.ds.isel(trajectory=t) \ - .dropna(self.obsdim, how='all') + .dropna(self.obs_dimname, how='all') - dt = dt.assign_coords({self.obsdim : dt[self.timedim].values }) \ - .drop_vars(self.timedim) \ - .rename({self.obsdim : timedim}) \ - .set_index({timedim: timedim}) + dt = dt.assign_coords({self.obs_dimname : dt[self.time_varname].values }) \ + .drop_vars(self.time_varname) \ + .rename({self.obs_dimname : time_varname}) \ + .set_index({time_varname: time_varname}) - _, ui = np.unique(dt[timedim], return_index=True) - dt = dt.isel({timedim: ui}) - dt = dt.isel({timedim: np.where(~pd.isna(dt[timedim].values))[0]}) + _, ui = np.unique(dt[time_varname], return_index=True) + dt = dt.isel({time_varname: ui}) + dt = dt.isel({time_varname: np.where(~pd.isna(dt[time_varname].values))[0]}) - if dt.sizes[timedim] > 0: - dt = dt.interp({timedim: times}) + if dt.sizes[time_varname] > 0: + dt = dt.interp({time_varname: times}) else: - logger.warning(f"time dimension ({timedim}) is zero size") + logger.warning(f"time dimension ({time_varname}) is zero size") if d is None: d = dt.expand_dims('trajectory') From 657e40bb4ac6417ac7f7c2075e8e0c5266bf15fe Mon Sep 17 00:00:00 2001 From: Knut-Frode Dagestad Date: Wed, 13 Nov 2024 14:31:11 +0100 Subject: [PATCH 2/4] Renamed attribute obs_dimname to obs_dim, consistent with Xarray practice --- .../example_find_positions_at_obs_times.py | 2 +- tests/test_read_sfy.py | 2 +- trajan/accessor.py | 40 +++++++++---------- trajan/ragged.py | 16 ++++---- trajan/traj.py | 40 +++++++++---------- trajan/traj1d.py | 28 ++++++------- trajan/traj2d.py | 40 +++++++++---------- 7 files changed, 84 insertions(+), 84 deletions(-) diff --git a/examples/example_find_positions_at_obs_times.py b/examples/example_find_positions_at_obs_times.py index a117986..c3c4e01 100644 --- a/examples/example_find_positions_at_obs_times.py +++ b/examples/example_find_positions_at_obs_times.py @@ -25,7 +25,7 @@ def gridwaves(tds): t = tds[['lat', 'lon', 'time']].traj.gridtime(tds['time_waves_imu'].squeeze()) - return t.traj.to_2d(obs_dimname='obs_waves_imu') + return t.traj.to_2d(obs_dim='obs_waves_imu') dsw = ds.groupby('trajectory').map(gridwaves) diff --git a/tests/test_read_sfy.py b/tests/test_read_sfy.py index 0277232..608f0b9 100644 --- a/tests/test_read_sfy.py +++ b/tests/test_read_sfy.py @@ -8,7 +8,7 @@ def test_interpret_sfy(test_data): ds = xr.open_dataset(test_data / 'bug32.nc') print(ds) - assert ds.traj.obs_dimname == 'package' + assert ds.traj.obs_dim == 'package' assert ds.traj.time_varname == 'position_time' assert ds.traj.is_2d() diff --git a/trajan/accessor.py b/trajan/accessor.py index 1b53805..864c53e 100644 --- a/trajan/accessor.py +++ b/trajan/accessor.py @@ -26,10 +26,10 @@ def detect_tx_dim(ds): raise ValueError("Could not determine x / lon variable") -def detect_time_dim(ds, obs_dimname): - logger.debug(f'Detecting time-dimension for "{obs_dimname}"..') +def detect_time_dim(ds, obs_dim): + logger.debug(f'Detecting time-dimension for "{obs_dim}"..') for v in ds.variables: - if obs_dimname in ds[v].dims and 'time' in v: + if obs_dim in ds[v].dims and 'time' in v: return v raise ValueError("no time dimension detected") @@ -47,7 +47,7 @@ def __new__(cls, ds): ds = ds.expand_dims({'trajectory': 1}) ds['trajectory'].attrs['cf_role'] = 'trajectory_id' - obs_dimname = None + obs_dim = None time_varname = None tx = detect_tx_dim(ds) @@ -62,7 +62,7 @@ def __new__(cls, ds): # NOTE: this is probably not standard; something to point to the CF conventions? # NOTE: for now, there is no discovery of the "index" dim, this is hardcorded; any way to do better? if "index" in tx.dims: - obs_dimname = "index" + obs_dim = "index" # discover the timecoord variable name ####################### # find all variables with standard_name "time" @@ -90,14 +90,14 @@ def __new__(cls, ds): else: raise ValueError(f"cannot deduce rowsizevar; we have the following candidates: {with_dim_trajectory = }") # sanity check - if not np.sum(ds[rowsizevar].to_numpy()) == len(ds[obs_dimname]): + if not np.sum(ds[rowsizevar].to_numpy()) == len(ds[obs_dim]): raise ValueError("mismatch between the index length and the sum of the deduced trajectory lengths") logger.debug( - f"1D storage dataset; detected: {obs_dimname = }, {timecoord = }, {trajectorycoord = }, {rowsizevar}" + f"1D storage dataset; detected: {obs_dim = }, {timecoord = }, {trajectorycoord = }, {rowsizevar}" ) - return ocls(ds, obs_dimname, timecoord, trajectorycoord, rowsizevar) + return ocls(ds, obs_dim, timecoord, trajectorycoord, rowsizevar) else: logging.warning(f"{ds} has {tx.dims = } which is of dimension 1 but is not index; this is a bit unusual; try to parse with Traj1d or Traj2d") @@ -105,15 +105,15 @@ def __new__(cls, ds): # we have a ds where 2D arrays are used to store data, this is either Traj1d or Traj2d # there may also be some slightly unusual cases where these Traj1d and Traj2d classes will be used on data with 1D arrays if 'obs' in tx.dims: - obs_dimname = 'obs' - time_varname = detect_time_dim(ds, obs_dimname) + obs_dim = 'obs' + time_varname = detect_time_dim(ds, obs_dim) elif 'index' in tx.dims: - obs_dimname = 'obs' - time_varname = detect_time_dim(ds, obs_dimname) + obs_dim = 'obs' + time_varname = detect_time_dim(ds, obs_dim) elif 'time' in tx.dims: - obs_dimname = 'time' + obs_dim = 'time' time_varname = 'time' else: @@ -122,18 +122,18 @@ def __new__(cls, ds): 'cf_role', None) == 'trajectory_id' and not 'traj' in d: - obs_dimname = d - time_varname = detect_time_dim(ds, obs_dimname) + obs_dim = d + time_varname = detect_time_dim(ds, obs_dim) break - if obs_dimname is None: + if obs_dim is None: logger.warning('No time or obs dimension detected.') logger.debug( - f"Detected obs-dim: {obs_dimname}, detected time-dim: {time_varname}.") + f"Detected obs-dim: {obs_dim}, detected time-variable: {time_varname}.") - if obs_dimname is None: + if obs_dim is None: ocls = Traj1d elif len(ds[time_varname].shape) <= 1: @@ -146,7 +146,7 @@ def __new__(cls, ds): else: raise ValueError( - f'Time dimension has shape greater than 2: {ds["time_varname"].shape}' + f'Time variable has more than two dimensions: {ds[time_varname].shape}' ) - return ocls(ds, obs_dimname, time_varname) + return ocls(ds, obs_dim, time_varname) diff --git a/trajan/ragged.py b/trajan/ragged.py index 9e2970d..3a6f5b2 100644 --- a/trajan/ragged.py +++ b/trajan/ragged.py @@ -14,12 +14,12 @@ class ContiguousRagged(Traj): trajdim: str rowvar: str - def __init__(self, ds, obs_dimname, time_varname, trajectorycoord, rowsizevar): + def __init__(self, ds, obs_dim, time_varname, trajectorycoord, rowsizevar): self.trajdim = trajectorycoord self.rowvar = rowsizevar - super().__init__(ds, obs_dimname, time_varname) + super().__init__(ds, obs_dim, time_varname) - def to_2d(self, obs_dimname='obs'): + def to_2d(self, obs_dim='obs'): """This actually converts a contiguous ragged xarray Dataset into an xarray Dataset that follows the Traj2d conventions.""" global_attrs = self.ds.attrs @@ -70,7 +70,7 @@ def to_2d(self, obs_dimname='obs'): # trajectory vars 'time': - xr.DataArray(dims=["trajectory", obs_dimname], + xr.DataArray(dims=["trajectory", obs_dim], data=array_time, attrs={ "standard_name": "time", @@ -81,7 +81,7 @@ def to_2d(self, obs_dimname='obs'): # now add all "normal" variables # NOTE: for now, we only consider scalar vars; if we want to consider more complex vars (e.g., spectra), this will need updated - # NOTE: such an update would typically need to look at the dims of the variable, and if there are additional dims to obs_dimname, create a higer dim variable + # NOTE: such an update would typically need to look at the dims of the variable, and if there are additional dims to obs_dim, create a higer dim variable for crrt_data_var in self.ds.data_vars: attrs = self.ds[crrt_data_var].attrs @@ -90,9 +90,9 @@ def to_2d(self, obs_dimname='obs'): continue if len(self.ds[crrt_data_var].dims - ) != 1 or self.ds[crrt_data_var].dims[0] != self.obs_dimname: + ) != 1 or self.ds[crrt_data_var].dims[0] != self.obs_dim: raise ValueError( - f"data_vars element {crrt_data_var} has dims {self.ds[crrt_data_var].dims}, expected {(self.obs_dimname,)}" + f"data_vars element {crrt_data_var} has dims {self.ds[crrt_data_var].dims}, expected {(self.obs_dim,)}" ) crrt_var = np.full((nbr_trajectories, longest_trajectory), np.nan) @@ -119,7 +119,7 @@ def to_2d(self, obs_dimname='obs'): crrt_data_var = "lat" ds_converted_to_traj2d[crrt_data_var] = \ - xr.DataArray(dims=["trajectory", obs_dimname], + xr.DataArray(dims=["trajectory", obs_dim], data=crrt_var, attrs=attrs) diff --git a/trajan/traj.py b/trajan/traj.py index c035bcb..b5aa7ed 100644 --- a/trajan/traj.py +++ b/trajan/traj.py @@ -33,10 +33,10 @@ def detect_tx_dim(ds): raise ValueError("Could not determine x / lon variable") -def detect_time_dim(ds, obs_dimname): - logger.debug(f'Detecting time-dimension for "{obs_dimname}"..') +def detect_time_dim(ds, obs_dim): + logger.debug(f'Detecting time-dimension for "{obs_dim}"..') for v in ds.variables: - if obs_dimname in ds[v].dims and 'time' in v: + if obs_dim in ds[v].dims and 'time' in v: return v raise ValueError("no time dimension detected") @@ -50,12 +50,12 @@ class Traj: __gcrs__: pyproj.CRS - def __init__(self, ds, obs_dimname, time_varname): + def __init__(self, ds, obs_dim, time_varname): self.ds = ds self.__plot__ = None self.__animate__ = None self.__gcrs__ = pyproj.CRS.from_epsg(4326) - self.obs_dimname = obs_dimname # dimension along which time increases + self.obs_dim = obs_dim # dimension along which time increases self.time_varname = time_varname def __repr__(self): @@ -528,18 +528,18 @@ def distance_to_next(self): lon = self.ds.lon lat = self.ds.lat - lenobs = self.ds.sizes[self.obs_dimname] - lonfrom = lon.isel({self.obs_dimname: slice(0, lenobs - 1)}) - latfrom = lat.isel({self.obs_dimname: slice(0, lenobs - 1)}) - lonto = lon.isel({self.obs_dimname: slice(1, lenobs)}) - latto = lat.isel({self.obs_dimname: slice(1, lenobs)}) + lenobs = self.ds.sizes[self.obs_dim] + lonfrom = lon.isel({self.obs_dim: slice(0, lenobs - 1)}) + latfrom = lat.isel({self.obs_dim: slice(0, lenobs - 1)}) + lonto = lon.isel({self.obs_dim: slice(1, lenobs)}) + latto = lat.isel({self.obs_dim: slice(1, lenobs)}) geod = pyproj.Geod(ellps='WGS84') azimuth_forward, a2, distance = geod.inv(lonfrom, latfrom, lonto, latto) distance = xr.DataArray(distance, coords=lonfrom.coords, dims=lon.dims) - distance = xr.concat((distance, distance.isel({self.obs_dimname: -1})), - dim=self.obs_dimname) # repeating last time step to + distance = xr.concat((distance, distance.isel({self.obs_dim: -1})), + dim=self.obs_dim) # repeating last time step to return distance def azimuth_to_next(self): @@ -560,11 +560,11 @@ def azimuth_to_next(self): # TODO: method is almost duplicate of "distance_to_next" above lon = self.ds.lon lat = self.ds.lat - lenobs = self.ds.dims[self.obs_dimname] - lonfrom = lon.isel({self.obs_dimname: slice(0, lenobs - 1)}) - latfrom = lat.isel({self.obs_dimname: slice(0, lenobs - 1)}) - lonto = lon.isel({self.obs_dimname: slice(1, lenobs)}) - latto = lat.isel({self.obs_dimname: slice(1, lenobs)}) + lenobs = self.ds.dims[self.obs_dim] + lonfrom = lon.isel({self.obs_dim: slice(0, lenobs - 1)}) + latfrom = lat.isel({self.obs_dim: slice(0, lenobs - 1)}) + lonto = lon.isel({self.obs_dim: slice(1, lenobs)}) + latto = lat.isel({self.obs_dim: slice(1, lenobs)}) geod = pyproj.Geod(ellps='WGS84') azimuth_forward, a2, distance = geod.inv(lonfrom, latfrom, lonto, latto) @@ -573,8 +573,8 @@ def azimuth_to_next(self): coords=lonfrom.coords, dims=lon.dims) azimuth_forward = xr.concat( - (azimuth_forward, azimuth_forward.isel({self.obs_dimname: -1})), - dim=self.obs_dimname) # repeating last time step to + (azimuth_forward, azimuth_forward.isel({self.obs_dim: -1})), + dim=self.obs_dim) # repeating last time step to return azimuth_forward def velocity_components(self): @@ -890,7 +890,7 @@ def condense_obs(self) -> xr.Dataset: """ @abstractmethod - def to_2d(self, obs_dimname='obs') -> xr.Dataset: + def to_2d(self, obs_dim='obs') -> xr.Dataset: """ Convert dataset into a 2D dataset from. """ diff --git a/trajan/traj1d.py b/trajan/traj1d.py index 6ac717a..0310c97 100644 --- a/trajan/traj1d.py +++ b/trajan/traj1d.py @@ -14,8 +14,8 @@ class Traj1d(Traj): A structured dataset, where each trajectory is always given at the same times. Typically the output from a model or from a gridded dataset. """ - def __init__(self, ds, obs_dimname, time_varname): - super().__init__(ds, obs_dimname, time_varname) + def __init__(self, ds, obs_dim, time_varname): + super().__init__(ds, obs_dim, time_varname) def timestep(self): """Time step between observations in seconds.""" @@ -28,14 +28,14 @@ def is_1d(self): def is_2d(self): return False - def to_2d(self, obs_dimname='obs'): + def to_2d(self, obs_dim='obs'): ds = self.ds.copy() time = ds[self.time_varname].rename({ - self.time_varname: obs_dimname + self.time_varname: obs_dim }).expand_dims(dim={'trajectory': ds.sizes['trajectory']}) - ds = ds.rename({self.time_varname: obs_dimname}) + ds = ds.rename({self.time_varname: obs_dim}) ds[self.time_varname] = time - ds[obs_dimname] = np.arange(0, ds.sizes[obs_dimname]) + ds[obs_dim] = np.arange(0, ds.sizes[obs_dim]) return ds @@ -101,8 +101,8 @@ def skill(self, other, method='liu-weissberg', **kwargs): ) diff = np.max( - np.abs((self.ds[self.obs_dimname] - - other[other.traj.obs_dimname]).astype('timedelta64[s]').astype( + np.abs((self.ds[self.obs_dim] - + other[other.traj.obs_dim]).astype('timedelta64[s]').astype( np.float64))) if not np.isclose(diff, 0): @@ -112,11 +112,11 @@ def skill(self, other, method='liu-weissberg', **kwargs): s = np.zeros((self.ds.sizes['trajectory']), dtype=np.float32) - # ds = self.ds.dropna(dim=self.obs_dimname) - # other = other.dropna(dim=other.traj.obs_dimname) + # ds = self.ds.dropna(dim=self.obs_dim) + # other = other.dropna(dim=other.traj.obs_dim) - ds = self.ds.transpose('trajectory', self.obs_dimname, ...) - other = other.transpose('trajectory', other.traj.obs_dimname, ...) + ds = self.ds.transpose('trajectory', self.obs_dim, ...) + other = other.transpose('trajectory', other.traj.obs_dim, ...) lon0 = ds.traj.tlon lat0 = ds.traj.tlat @@ -165,9 +165,9 @@ def gridtime(self, times, time_varname=None, round=True): ds = self.ds - if self.obs_dimname != time_varname: + if self.obs_dim != time_varname: ds = ds.rename({ - self.obs_dimname: time_varname + self.obs_dim: time_varname }).set_index({time_varname: time_varname}) _, ui = np.unique(ds[time_varname], return_index=True) diff --git a/trajan/traj2d.py b/trajan/traj2d.py index f7466c7..9cb72a1 100644 --- a/trajan/traj2d.py +++ b/trajan/traj2d.py @@ -8,11 +8,11 @@ logger = logging.getLogger(__name__) -def __require_obs_dimname__(f): +def __require_obs_dim__(f): """This decorator is for methods of Traj that require a time or obs dimension to work.""" def wrapper(*args, **kwargs): - if args[0].obs_dimname is None: + if args[0].obs_dim is None: raise ValueError(f'{f} requires an obs or time dimension') return f(*args, **kwargs) @@ -24,8 +24,8 @@ class Traj2d(Traj): A unstructured dataset, where each trajectory may have observations at different times. Typically from a collection of drifters. """ - def __init__(self, ds, obs_dimname, time_varname): - super().__init__(ds, obs_dimname, time_varname) + def __init__(self, ds, obs_dim, time_varname): + super().__init__(ds, obs_dim, time_varname) def timestep(self, average=np.nanmedian): """ @@ -132,25 +132,25 @@ def drop_where(self, condition): return xr.concat(trajs, dim='trajectory') - @__require_obs_dimname__ + @__require_obs_dim__ def condense_obs(self) -> xr.Dataset: - on = self.ds.sizes[self.obs_dimname] + on = self.ds.sizes[self.obs_dim] logger.debug(f'Condensing {on} observations.') ds = self.ds.copy(deep=True) # The observation coordinate will be re-written - ds = ds.drop_vars([self.obs_dimname]) + ds = ds.drop_vars([self.obs_dim]) - assert self.obs_dimname in ds[ + assert self.obs_dim in ds[ self.time_varname].dims, "observation not a coordinate of time variable" # Move all observations for each trajectory to starting row maxN = 0 for ti in range(len(ds.trajectory)): obsvars = [ - var for var in ds.variables if self.obs_dimname in ds[var].dims + var for var in ds.variables if self.obs_dim in ds[var].dims ] iv = np.full(on, False) for var in obsvars: @@ -178,15 +178,15 @@ def condense_obs(self) -> xr.Dataset: # ), "Varying number of valid observations within same trajectory." logger.debug(f'Condensed observations from: {on} to {maxN}') - ds = ds.isel({self.obs_dimname: slice(0, maxN)}) + ds = ds.isel({self.obs_dim: slice(0, maxN)}) # Write new observation coordinate. obs = np.arange(0, maxN) - ds = ds.assign_coords({self.obs_dimname: obs}) + ds = ds.assign_coords({self.obs_dim: obs}) return ds - @__require_obs_dimname__ + @__require_obs_dim__ def seltime(self, t0=None, t1=None): if t0 is None: t0 = np.nanmin(self.ds[self.time_varname].values.ravel()) @@ -200,23 +200,23 @@ def seltime(self, t0=None, t1=None): self.ds[self.time_varname] <= t1), drop=True) - @__require_obs_dimname__ + @__require_obs_dim__ def iseltime(self, i): def select(t): ii = np.argwhere(~pd.isna(t[self.time_varname]).squeeze()) ii = ii[i].squeeze() - o = t.isel({self.obs_dimname: ii}) + o = t.isel({self.obs_dim: ii}) - if self.obs_dimname in o.dims: + if self.obs_dim in o.dims: return o else: - return o.expand_dims(self.obs_dimname) + return o.expand_dims(self.obs_dim) return self.ds.groupby('trajectory').map(select) - @__require_obs_dimname__ + @__require_obs_dim__ def gridtime(self, times, time_varname=None, round=True): if isinstance(times, str) or isinstance( times, pd.Timedelta): # Make time series with given interval @@ -241,11 +241,11 @@ def gridtime(self, times, time_varname=None, round=True): for t in range(self.ds.sizes['trajectory']): dt = self.ds.isel(trajectory=t) \ - .dropna(self.obs_dimname, how='all') + .dropna(self.obs_dim, how='all') - dt = dt.assign_coords({self.obs_dimname : dt[self.time_varname].values }) \ + dt = dt.assign_coords({self.obs_dim : dt[self.time_varname].values }) \ .drop_vars(self.time_varname) \ - .rename({self.obs_dimname : time_varname}) \ + .rename({self.obs_dim : time_varname}) \ .set_index({time_varname: time_varname}) _, ui = np.unique(dt[time_varname], return_index=True) From bf3838c37ad5813f80e425b53481731d36e36dd2 Mon Sep 17 00:00:00 2001 From: Knut-Frode Dagestad Date: Wed, 13 Nov 2024 14:45:43 +0100 Subject: [PATCH 3/4] Renamed detect_time_tim->detect_time_variable and detect_tx_dim->detect_tx_variable --- trajan/accessor.py | 30 +++++++++--------------------- trajan/traj.py | 13 ++----------- 2 files changed, 11 insertions(+), 32 deletions(-) diff --git a/trajan/accessor.py b/trajan/accessor.py index 864c53e..ca2569a 100644 --- a/trajan/accessor.py +++ b/trajan/accessor.py @@ -7,32 +7,20 @@ logger = logging.getLogger(__name__) -from .traj import Traj +from .traj import Traj, detect_tx_variable from .traj1d import Traj1d from .traj2d import Traj2d from .ragged import ContiguousRagged -def detect_tx_dim(ds): - if 'lon' in ds: - return ds.lon - elif 'longitude' in ds: - return ds.longitude - elif 'x' in ds: - return ds.x - elif 'X' in ds: - return ds.X - else: - raise ValueError("Could not determine x / lon variable") - - -def detect_time_dim(ds, obs_dim): - logger.debug(f'Detecting time-dimension for "{obs_dim}"..') +def detect_time_variable(ds, obs_dim): + logger.debug(f'Detecting time-variable for "{obs_dim}"..') + # TODO: should use cf-xarray here for v in ds.variables: if obs_dim in ds[v].dims and 'time' in v: return v - raise ValueError("no time dimension detected") + raise ValueError("No time variable detected") @xr.register_dataset_accessor("traj") @@ -50,7 +38,7 @@ def __new__(cls, ds): obs_dim = None time_varname = None - tx = detect_tx_dim(ds) + tx = detect_tx_variable(ds) # if we have a 1D dims, this is most likely some contiguous data # there may be a few exceptions though, so be ready to default to the classical 2D parser below @@ -106,11 +94,11 @@ def __new__(cls, ds): # there may also be some slightly unusual cases where these Traj1d and Traj2d classes will be used on data with 1D arrays if 'obs' in tx.dims: obs_dim = 'obs' - time_varname = detect_time_dim(ds, obs_dim) + time_varname = detect_time_variable(ds, obs_dim) elif 'index' in tx.dims: obs_dim = 'obs' - time_varname = detect_time_dim(ds, obs_dim) + time_varname = detect_time_variable(ds, obs_dim) elif 'time' in tx.dims: obs_dim = 'time' @@ -123,7 +111,7 @@ def __new__(cls, ds): None) == 'trajectory_id' and not 'traj' in d: obs_dim = d - time_varname = detect_time_dim(ds, obs_dim) + time_varname = detect_time_variable(ds, obs_dim) break diff --git a/trajan/traj.py b/trajan/traj.py index b5aa7ed..d3be6ef 100644 --- a/trajan/traj.py +++ b/trajan/traj.py @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) -def detect_tx_dim(ds): +def detect_tx_variable(ds): if 'lon' in ds: return ds.lon elif 'longitude' in ds: @@ -33,15 +33,6 @@ def detect_tx_dim(ds): raise ValueError("Could not determine x / lon variable") -def detect_time_dim(ds, obs_dim): - logger.debug(f'Detecting time-dimension for "{obs_dim}"..') - for v in ds.variables: - if obs_dim in ds[v].dims and 'time' in v: - return v - - raise ValueError("no time dimension detected") - - class Traj: ds: xr.Dataset @@ -123,7 +114,7 @@ def tx(self): tlon, ty """ - return detect_tx_dim(self.ds) + return detect_tx_variable(self.ds) @property def ty(self): From b42c6c4574460674d2bf57b2b1cc8b01376c2a8d Mon Sep 17 00:00:00 2001 From: Knut-Frode Dagestad Date: Wed, 13 Nov 2024 15:33:23 +0100 Subject: [PATCH 4/4] Replaced some hardcoded obs with self.obs_dim. Added test for repr. --- examples/example_opendrift.py | 6 +++++- tests/test_repr.py | 13 +++++++++++++ trajan/traj.py | 4 ++-- trajan/traj1d.py | 2 +- trajan/traj2d.py | 18 +++++++++--------- 5 files changed, 30 insertions(+), 13 deletions(-) create mode 100644 tests/test_repr.py diff --git a/examples/example_opendrift.py b/examples/example_opendrift.py index ad0776f..4cacb07 100644 --- a/examples/example_opendrift.py +++ b/examples/example_opendrift.py @@ -21,7 +21,11 @@ ds = ds.where(ds.status>=0) # only active particles #%% -# Displaying a basic plot of trajectories +# Displaying some basic information about this dataset +print(ds.traj) + +#%% +# Making a basic plot of trajectories ds.traj.plot() plt.title('Basic trajectory plot') plt.show() diff --git a/tests/test_repr.py b/tests/test_repr.py new file mode 100644 index 0000000..c91e6ef --- /dev/null +++ b/tests/test_repr.py @@ -0,0 +1,13 @@ +import xarray as xr +import trajan as _ + +def test_repr_1d(opendrift_sim): + repr = str(opendrift_sim.traj) + assert '2015-11-16T00:00' in repr + assert 'Timestep: 1:00:00' in repr + assert "67 timesteps time['time'] (1D)" in repr + +def test_repr_2d(test_data): + ds = xr.open_dataset(test_data / 'bug32.nc') + repr = str(ds.traj) + assert '2023-10-19T15:46:53.514499520' in repr diff --git a/trajan/traj.py b/trajan/traj.py index d3be6ef..7c1cfb9 100644 --- a/trajan/traj.py +++ b/trajan/traj.py @@ -69,12 +69,12 @@ def __repr__(self): end_time = self.ds.time.max().data output += f'Time coverage: {start_time} - {end_time}\n' else: - output += f'Dataset has no time dimension' + output += f'Dataset has no time variable' output += f'Longitude span: {self.tx.min().data} to {self.tx.max().data}\n' output += f'Latitude span: {self.ty.min().data} to {self.ty.max().data}\n' output += 'Variables:\n' for var in self.ds.variables: - if var not in ['trajectory', 'obs']: + if var not in ['trajectory', self.obs_dim]: output += f' {var}' if 'standard_name' in self.ds[var].attrs: output += f' [{self.ds[var].standard_name}]' diff --git a/trajan/traj1d.py b/trajan/traj1d.py index 0310c97..7d3d40f 100644 --- a/trajan/traj1d.py +++ b/trajan/traj1d.py @@ -181,7 +181,7 @@ def gridtime(self, times, time_varname=None, round=True): if ds.sizes[time_varname] > 0: ds = ds.interp({time_varname: times}) else: - logger.warning(f"time dimension ({time_varname}) is zero size") + logger.warning(f"time variable ({time_varname}) has zero size") if not 'trajectory' in ds.dims: ds = ds.expand_dims('trajectory') diff --git a/trajan/traj2d.py b/trajan/traj2d.py index 9cb72a1..fbd8703 100644 --- a/trajan/traj2d.py +++ b/trajan/traj2d.py @@ -42,11 +42,11 @@ def time_to_next(self): Last time is repeated for last position (which has no next position) """ time = self.ds.time - lenobs = self.ds.sizes['obs'] + lenobs = self.ds.sizes[self.obs_dim] td = time.isel(obs=slice(1, lenobs)) - time.isel( obs=slice(0, lenobs - 1)) td = xr.concat((td, td.isel(obs=-1)), - dim='obs') # repeating last time step + dim=self.obs_dim) # repeating last time step return td def is_1d(self): @@ -59,7 +59,7 @@ def insert_nan_where(self, condition): """Insert NaN-values in trajectories after given positions, shifting rest of trajectory.""" index_of_last = self.index_of_last() - num_inserts = condition.sum(dim='obs') + num_inserts = condition.sum(dim=self.obs_dim) max_obs = (index_of_last + num_inserts).max().values # Create new empty dataset with extended obs dimension @@ -68,13 +68,13 @@ def insert_nan_where(self, condition): coords={ 'trajectory': (["trajectory"], range(self.ds.sizes['trajectory'])), - 'obs': (['obs'], range(max_obs)) # Longest trajectory + self.obs_dim: ([self.obs_dim], range(max_obs)) # Longest trajectory }, attrs=self.ds.attrs) # Add extended variables for varname, var in self.ds.data_vars.items(): - if 'obs' not in var.dims: + if self.obs_dim not in var.dims: nd[varname] = var continue # Create empty dataarray to hold interpolated values for given variable @@ -104,11 +104,11 @@ def insert_nan_where(self, condition): newdata = newdata[slice(0, max_obs - 1)] # truncating, should be checked - da[{'trajectory': t, 'obs': slice(0, len(newdata))}] = newdata + da[{'trajectory': t, self.obs_dim: slice(0, len(newdata))}] = newdata nd[varname] = da.astype(var.dtype) - nd = nd.drop_vars(('obs', 'trajectory')) # Remove coordinates + nd = nd.drop_vars((self.obs_dim, 'trajectory')) # Remove coordinates return nd @@ -121,12 +121,12 @@ def drop_where(self, condition): new = self.ds.isel(trajectory=i).drop_sel(obs=np.where( condition.isel( trajectory=i))[0]) # Dropping from given trajectory - newlen = max(newlen, new.sizes['obs']) + newlen = max(newlen, new.sizes[self.obs_dim]) trajs.append(new) # Ensure all trajectories have equal length, by padding with NaN at end trajs = [ - t.pad(pad_width={'obs': (0, newlen - t.sizes['obs'])}) + t.pad(pad_width={self.obs_dim: (0, newlen - t.sizes[self.obs_dim])}) for t in trajs ]