Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
4ea35c9
Fix test for coords in feature detection output that previously had n…
w-k-jones Mar 26, 2025
f893614
Fix bug with datetime conversions during feature detection and add test
w-k-jones Mar 26, 2025
e2bbfe6
Add test for datetime type in 3D data and with 360 day calendar
w-k-jones Mar 26, 2025
565c386
Formatting
w-k-jones Mar 26, 2025
1e43dfc
Fix terminology
w-k-jones Mar 26, 2025
c507f21
Add a generators module and add generator for iterating over time thr…
w-k-jones Mar 27, 2025
7dd6543
Add check for time_var_name in field_and_features_over_time and test
w-k-jones Mar 27, 2025
2183b33
Update segmentation to use new field_and_features_over_time generator
w-k-jones Mar 27, 2025
d732def
Add generators module
w-k-jones Mar 27, 2025
db6d8d0
Add datetime conversion utils and tests
w-k-jones Mar 27, 2025
05bbdbd
Update bulk statistics to use field_and_features_over_time generator
w-k-jones Mar 27, 2025
1a65e2e
Update decorators, feature detection and coordinate interpolation so …
w-k-jones Mar 27, 2025
5651c7e
Formatting
w-k-jones Mar 27, 2025
442b984
Fix typing bug for python<3.10
w-k-jones Mar 27, 2025
46a72d6
Fix issues with coordinate interpolation in older xarray versions
w-k-jones Mar 28, 2025
1125c5b
Make use of use_standard_names parameter in coordinate interpolation …
w-k-jones Mar 28, 2025
498acef
Formatting
w-k-jones Mar 28, 2025
5a64f78
Remove duplicate decorators from general_internal utils
w-k-jones Mar 28, 2025
c6c7a31
Add new functions for finding coord names from dataframes and tests
w-k-jones Mar 28, 2025
a34e9e5
Add pd.Series input to find_coord_in_dataframe
w-k-jones Mar 29, 2025
96cf67f
Update calculate_distance to search for coords in dataframe using fin…
w-k-jones Mar 29, 2025
bb97c8e
Formatting
w-k-jones Mar 29, 2025
72c4844
Update idealised case notebooks and check velocity calculation is cor…
w-k-jones Mar 29, 2025
b8ef47e
Update basic example notebooks
w-k-jones Mar 29, 2025
fc08be9
Update test coverage for calculate_distance
w-k-jones Mar 29, 2025
0b1133d
Improve coverage of test_datetime
w-k-jones Mar 29, 2025
fe08335
Improve coverage of test_generators
w-k-jones Mar 29, 2025
158c2e3
Rename and separate internal.basic and internal.general_internal utils
w-k-jones Mar 29, 2025
4586984
Rename and separate tobac.utils.internal.basic and general_internal t…
w-k-jones Mar 29, 2025
a2d25f6
Formatting
w-k-jones Mar 29, 2025
8359a3f
Fix testing error for mismatched coordinates in test_analysis_spatial
w-k-jones Mar 29, 2025
8ae49ce
Add tests for missing error cases in find_dataframe_horizontal_coords
w-k-jones Mar 29, 2025
98b4303
Remove check for error that can never occur
w-k-jones Mar 30, 2025
ee07916
Expand test coverage for errors
w-k-jones Mar 30, 2025
5584ee2
Formatting
w-k-jones Mar 30, 2025
a46541f
Revert outdated changes to internal_utils.coordinates
w-k-jones Mar 30, 2025
51c9e29
Remove commented out lines
w-k-jones Mar 30, 2025
c8a6dd6
Fix issue with PBCs in merge_split_MEST where both min/max for hdim_1…
w-k-jones Apr 2, 2025
b2d23ff
Formatting
w-k-jones Apr 2, 2025
ed04521
Update spatial analysis tests to add documentation and improve granul…
w-k-jones Apr 15, 2025
ec91fe5
Add docstrings for datetime utils
w-k-jones Apr 18, 2025
7731e60
Add documentation to datetime tests
w-k-jones Apr 18, 2025
242054d
Formatting
w-k-jones Apr 18, 2025
17ee9eb
Add documentation to generator tests
w-k-jones Apr 18, 2025
d528cbc
Add documentation for test_utils_coordinates
w-k-jones Apr 18, 2025
d231cb3
Formatting
w-k-jones Apr 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
915 changes: 422 additions & 493 deletions examples/Basics/Idealized-Case-1_Tracking-of-a-Test-Blob-in-2D.ipynb

Large diffs are not rendered by default.

394 changes: 127 additions & 267 deletions examples/Basics/Idealized-Case-2_Two_crossing_Blobs.ipynb

Large diffs are not rendered by default.

357 changes: 83 additions & 274 deletions examples/Basics/Methods-and-Parameters-for-Linking.ipynb

Large diffs are not rendered by default.

174 changes: 30 additions & 144 deletions examples/Basics/Methods-and-Parameters-for-Segmentation.ipynb

Large diffs are not rendered by default.

98 changes: 61 additions & 37 deletions tobac/analysis/spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,20 @@

import logging
from itertools import combinations
from typing import Literal, Optional, Union

import numpy as np
import pandas as pd
import xarray as xr
from iris.analysis.cartography import area_weights

from tobac.utils.bulk_statistics import get_statistics_from_mask
from tobac.utils.internal.basic import find_vertical_coord_name
from tobac.utils import decorators
from tobac.utils.internal.coordinates import (
COMMON_LON_COORDS,
find_dataframe_horizontal_coords,
find_vertical_coord_name,
)

__all__ = (
"haversine",
Expand Down Expand Up @@ -61,7 +66,13 @@ def haversine(lat1, lon1, lat2, lon2):
return arclen * RADIUS_EARTH


def calculate_distance(feature_1, feature_2, method_distance=None):
def calculate_distance(
feature_1: pd.DataFrame,
feature_2: pd.DataFrame,
method_distance: Optional[Literal["xy", "latlon"]] = None,
hdim1_coord: Optional[str] = None,
hdim2_coord: Optional[str] = None,
) -> Union[float, pd.Series]:
"""Compute the distance between two features. It is based on
either lat/lon coordinates or x/y coordinates.

Expand All @@ -79,6 +90,11 @@ def calculate_distance(feature_1, feature_2, method_distance=None):
distance. None checks wether the required coordinates are
present and starts with 'xy'. Default is None.

hdim1_coord, hdim2_coord : str, optional (default: None)
The names of the coordinates for the two horizontal dimensions to use.
If None, tobac.utils.internal.general_internal.find_dataframe_horizontal_coords
will be used to search for coordinate names present in both dataframes

Returns
-------
distance : float or pandas.Series
Expand All @@ -88,48 +104,56 @@ def calculate_distance(feature_1, feature_2, method_distance=None):
multiple features.

"""
if method_distance is None:
if (
("projection_x_coordinate" in feature_1)
and ("projection_y_coordinate" in feature_1)
and ("projection_x_coordinate" in feature_2)
and ("projection_y_coordinate" in feature_2)
):
method_distance = "xy"
elif (
("latitude" in feature_1)
and ("longitude" in feature_1)
and ("latitude" in feature_2)
and ("longitude" in feature_2)
):
method_distance = "latlon"
else:
raise ValueError(
"either latitude/longitude or projection_x_coordinate/projection_y_coordinate have to be present to calculate distances"
)

if method_distance is None and (hdim1_coord is not None or hdim2_coord is not None):
raise ValueError(
"method_distance parameter must be provided if eithe hdim1_coord or hdim2_coord are specified"
)

if method_distance not in [None, "xy", "latlon"]:
raise ValueError(
"method_distance invalid, must be one of (None, 'xy', 'latlon')"
)

feature_1_coord = find_dataframe_horizontal_coords(
feature_1,
hdim1_coord=hdim1_coord,
hdim2_coord=hdim2_coord,
coord_type=method_distance,
)
feature_2_coord = find_dataframe_horizontal_coords(
feature_2,
hdim1_coord=hdim1_coord,
hdim2_coord=hdim2_coord,
coord_type=method_distance,
)

if feature_1_coord != feature_2_coord:
raise ValueError(
"Discovered coordinates in feature_1 and feature_2 do not match, please specify coordinates using hdim1_coord and hdim2_coord parameters"
)

hdim1_coord = feature_1_coord[0]
hdim2_coord = feature_1_coord[1]
method_distance = feature_1_coord[2]

if method_distance == "xy":
distance = np.sqrt(
(
feature_1["projection_x_coordinate"]
- feature_2["projection_x_coordinate"]
)
** 2
+ (
feature_1["projection_y_coordinate"]
- feature_2["projection_y_coordinate"]
)
** 2
(feature_1[hdim1_coord] - feature_2[hdim1_coord]) ** 2
+ (feature_1[hdim2_coord] - feature_2[hdim2_coord]) ** 2
)
elif method_distance == "latlon":
# Check if order of coords is correct, and swap if mismatched:
if hdim1_coord.lower() in COMMON_LON_COORDS:
hdim1_coord, hdim2_coord = hdim2_coord, hdim1_coord

distance = 1000 * haversine(
feature_1["latitude"],
feature_1["longitude"],
feature_2["latitude"],
feature_2["longitude"],
feature_1[hdim1_coord],
feature_1[hdim2_coord],
feature_2[hdim1_coord],
feature_2[hdim2_coord],
)
else:
raise ValueError("method undefined")

return distance


Expand Down
29 changes: 17 additions & 12 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,8 @@ def feature_detection_multithreshold(
strict_thresholding: bool = False,
statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
statistics_unsmoothed: bool = False,
preserve_iris_datetime_types: bool = True,
use_standard_names: Optional[bool] = None,
converted_from_iris: bool = False,
**kwargs,
) -> pd.DataFrame:
"""Perform feature detection based on contiguous regions.
Expand Down Expand Up @@ -1264,6 +1265,11 @@ def feature_detection_multithreshold(
If True, a feature can only be detected if all previous thresholds have been met.
Default is False.

use_standard_names: bool
If true, when interpolating a coordinate, it looks for a standard_name
and uses that to name the output coordinate, to mimic iris functionality.
If false, uses the actual name of the coordinate to output.

preserve_iris_datetime_types: bool, optional, default: True
If True, for iris input, preserve the original datetime type (typically
`cftime.DatetimeGregorian`) where possible. For xarray input, this parameter has no
Expand Down Expand Up @@ -1409,18 +1415,23 @@ def feature_detection_multithreshold(
if any([not x.empty for x in list_features_timesteps]):
features = pd.concat(list_features_timesteps, ignore_index=True)
features["feature"] = features.index + feature_number_start
# features_filtered = features.drop(features[features['num'] < min_num].index)
# features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True)

if use_standard_names is None:
use_standard_names = True if converted_from_iris else False

if "vdim" in features:
features = add_coordinates_3D(
features,
field_in,
vertical_coord=vertical_coord,
preserve_iris_datetime_types=kwargs["converted_from_iris"]
& preserve_iris_datetime_types,
use_standard_names=use_standard_names,
)
else:
features = add_coordinates(features, field_in)
features = add_coordinates(
features,
field_in,
use_standard_names=use_standard_names,
)

# Loop over DataFrame to remove features that are closer than distance_min to each
# other:
Expand Down Expand Up @@ -1452,12 +1463,6 @@ def feature_detection_multithreshold(
)
features = pd.concat(filtered_features, ignore_index=True)

features = add_coordinates(
features,
field_in,
preserve_iris_datetime_types=kwargs["converted_from_iris"]
& preserve_iris_datetime_types,
)
else:
features = None
logging.debug("No features detected")
Expand Down
7 changes: 6 additions & 1 deletion tobac/merge_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,12 @@ def merge_split_MEST(
if PBC_flag in ["hdim_1", "hdim_2", "both"]:
# Note that we multiply by dxy to get the distances in spatial coordinates
dist_func = build_distance_function(
min_h1 * dxy, max_h1 * dxy, min_h2 * dxy, max_h2 * dxy, PBC_flag, is_3D
min_h1 * dxy if min_h1 is not None else None,
max_h1 * dxy if max_h1 is not None else None,
min_h2 * dxy if min_h2 is not None else None,
max_h2 * dxy if max_h2 is not None else None,
PBC_flag,
is_3D,
)
cell_start_tree = BallTree(
cell_start_locations, metric="pyfunc", func=dist_func
Expand Down
31 changes: 9 additions & 22 deletions tobac/segmentation/watershed_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from tobac.utils import internal as internal_utils
from tobac.utils import get_statistics
from tobac.utils import decorators
from tobac.utils.generators import field_and_features_over_time


def add_markers(
Expand Down Expand Up @@ -1257,32 +1258,21 @@ def segmentation(
)
features_out_list = []

# Iris workaround: convert cftime to datetime64

if np.issubdtype(features["time"].dtype, np.datetime64):
# we are (likely) a numpy datetime
all_times = features["time"]
else:
all_times = features["time"].map(np.datetime64)

if len(field.coords[time_var_name]) == 1:
warnings.warn(
"As of v1.6.0, segmentation with time length 1 will return time as a coordinate"
" instead of dropping it (i.e., output will now be 1xMxN instead of MxN). ",
UserWarning,
)

for time_iteration_number, time_iteration_value in enumerate(
field.coords[time_var_name]
for (
time_iteration_number,
time_iteration_value,
field_at_time,
features_i,
) in field_and_features_over_time(
field, features, time_var_name=time_var_name, time_padding=time_padding
):
field_at_time = field.isel({time_var_name: time_iteration_number})
if time_padding is not None:
padded_conv = pd.Timedelta(time_padding).to_timedelta64()
min_time = time_iteration_value.values - padded_conv
max_time = time_iteration_value.values + padded_conv
features_i = features.loc[all_times.between(min_time, max_time)]
else:
features_i = features.loc[all_times == time_iteration_value.values]
segmentation_out_i, features_out_i = segmentation_timestep(
field_at_time,
features_i,
Expand All @@ -1304,10 +1294,7 @@ def segmentation(
segmentation_out_i
)
features_out_list.append(features_out_i)
logging.debug(
"Finished segmentation for "
+ pd.to_datetime(time_iteration_value.values).strftime("%Y-%m-%d %H:%M:%S")
)
logging.debug(f"Finished segmentation for {time_iteration_value.values}")

# Merge output from individual timesteps:
features_out = pd.concat(features_out_list)
Expand Down
Loading
Loading