Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
4ea35c9
Fix test for coords in feature detection output that previously had n…
w-k-jones Mar 26, 2025
f893614
Fix bug with datetime conversions during feature detection and add test
w-k-jones Mar 26, 2025
e2bbfe6
Add test for datetime type in 3D data and with 360 day calendar
w-k-jones Mar 26, 2025
565c386
Formatting
w-k-jones Mar 26, 2025
1e43dfc
Fix terminology
w-k-jones Mar 26, 2025
c507f21
Add a generators module and add generator for iterating over time thr…
w-k-jones Mar 27, 2025
7dd6543
Add check for time_var_name in field_and_features_over_time and test
w-k-jones Mar 27, 2025
2183b33
Update segmentation to use new field_and_features_over_time generator
w-k-jones Mar 27, 2025
d732def
Add generators module
w-k-jones Mar 27, 2025
db6d8d0
Add datetime conversion utils and tests
w-k-jones Mar 27, 2025
05bbdbd
Update bulk statistics to use field_and_features_over_time generator
w-k-jones Mar 27, 2025
1a65e2e
Update decorators, feature detection and coordinate interpolation so …
w-k-jones Mar 27, 2025
5651c7e
Formatting
w-k-jones Mar 27, 2025
442b984
Fix typing bug for python<3.10
w-k-jones Mar 27, 2025
46a72d6
Fix issues with coordinate interpolation in older xarray versions
w-k-jones Mar 28, 2025
1125c5b
Make use of use_standard_names parameter in coordinate interpolation …
w-k-jones Mar 28, 2025
498acef
Formatting
w-k-jones Mar 28, 2025
5a64f78
Remove duplicate decorators from general_internal utils
w-k-jones Mar 28, 2025
c6c7a31
Add new functions for finding coord names from dataframes and tests
w-k-jones Mar 28, 2025
a34e9e5
Add pd.Series input to find_coord_in_dataframe
w-k-jones Mar 29, 2025
96cf67f
Update calculate_distance to search for coords in dataframe using fin…
w-k-jones Mar 29, 2025
bb97c8e
Formatting
w-k-jones Mar 29, 2025
72c4844
Update idealised case notebooks and check velocity calculation is cor…
w-k-jones Mar 29, 2025
b8ef47e
Update basic example notebooks
w-k-jones Mar 29, 2025
fc08be9
Update test coverage for calculate_distance
w-k-jones Mar 29, 2025
0b1133d
Improve coverage of test_datetime
w-k-jones Mar 29, 2025
fe08335
Improve coverage of test_generators
w-k-jones Mar 29, 2025
158c2e3
Rename and separate internal.basic and internal.general_internal utils
w-k-jones Mar 29, 2025
4586984
Rename and separate tobac.utils.internal.basic and general_internal t…
w-k-jones Mar 29, 2025
a2d25f6
Formatting
w-k-jones Mar 29, 2025
8359a3f
Fix testing error for mismatched coordinates in test_analysis_spatial
w-k-jones Mar 29, 2025
8ae49ce
Add tests for missing error cases in find_dataframe_horizontal_coords
w-k-jones Mar 29, 2025
98b4303
Remove check for error that can never occur
w-k-jones Mar 30, 2025
ee07916
Expand test coverage for errors
w-k-jones Mar 30, 2025
5584ee2
Formatting
w-k-jones Mar 30, 2025
a46541f
Revert outdated changes to internal_utils.coordinates
w-k-jones Mar 30, 2025
51c9e29
Remove commented out lines
w-k-jones Mar 30, 2025
c8a6dd6
Fix issue with PBCs in merge_split_MEST where both min/max for hdim_1…
w-k-jones Apr 2, 2025
b2d23ff
Formatting
w-k-jones Apr 2, 2025
ed04521
Update spatial analysis tests to add documentation and improve granul…
w-k-jones Apr 15, 2025
ec91fe5
Add docstrings for datetime utils
w-k-jones Apr 18, 2025
7731e60
Add documentation to datetime tests
w-k-jones Apr 18, 2025
242054d
Formatting
w-k-jones Apr 18, 2025
17ee9eb
Add documentation to generator tests
w-k-jones Apr 18, 2025
d528cbc
Add documentation for test_utils_coordinates
w-k-jones Apr 18, 2025
d231cb3
Formatting
w-k-jones Apr 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,8 @@ def feature_detection_multithreshold(
strict_thresholding: bool = False,
statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
statistics_unsmoothed: bool = False,
preserve_iris_datetime_types: bool = True,
use_standard_names: Optional[bool] = None,
converted_from_iris: bool = False,
**kwargs,
) -> pd.DataFrame:
"""Perform feature detection based on contiguous regions.
Expand Down Expand Up @@ -1264,6 +1265,11 @@ def feature_detection_multithreshold(
If True, a feature can only be detected if all previous thresholds have been met.
Default is False.

use_standard_names: bool
If true, when interpolating a coordinate, it looks for a standard_name
and uses that to name the output coordinate, to mimic iris functionality.
If false, uses the actual name of the coordinate to output.

preserve_iris_datetime_types: bool, optional, default: True
If True, for iris input, preserve the original datetime type (typically
`cftime.DatetimeGregorian`) where possible. For xarray input, this parameter has no
Expand Down Expand Up @@ -1409,18 +1415,23 @@ def feature_detection_multithreshold(
if any([not x.empty for x in list_features_timesteps]):
features = pd.concat(list_features_timesteps, ignore_index=True)
features["feature"] = features.index + feature_number_start
# features_filtered = features.drop(features[features['num'] < min_num].index)
# features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True)

if use_standard_names is None:
use_standard_names = True if converted_from_iris else False

if "vdim" in features:
features = add_coordinates_3D(
features,
field_in,
vertical_coord=vertical_coord,
preserve_iris_datetime_types=kwargs["converted_from_iris"]
& preserve_iris_datetime_types,
use_standard_names=use_standard_names,
)
else:
features = add_coordinates(features, field_in)
features = add_coordinates(
features,
field_in,
use_standard_names=use_standard_names,
)

# Loop over DataFrame to remove features that are closer than distance_min to each
# other:
Expand Down Expand Up @@ -1452,12 +1463,6 @@ def feature_detection_multithreshold(
)
features = pd.concat(filtered_features, ignore_index=True)

features = add_coordinates(
features,
field_in,
preserve_iris_datetime_types=kwargs["converted_from_iris"]
& preserve_iris_datetime_types,
)
else:
features = None
logging.debug("No features detected")
Expand Down
31 changes: 9 additions & 22 deletions tobac/segmentation/watershed_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from tobac.utils import internal as internal_utils
from tobac.utils import get_statistics
from tobac.utils import decorators
from tobac.utils.generators import field_and_features_over_time


def add_markers(
Expand Down Expand Up @@ -1257,32 +1258,21 @@ def segmentation(
)
features_out_list = []

# Iris workaround: convert cftime to datetime64

if np.issubdtype(features["time"].dtype, np.datetime64):
# we are (likely) a numpy datetime
all_times = features["time"]
else:
all_times = features["time"].map(np.datetime64)

if len(field.coords[time_var_name]) == 1:
warnings.warn(
"As of v1.6.0, segmentation with time length 1 will return time as a coordinate"
" instead of dropping it (i.e., output will now be 1xMxN instead of MxN). ",
UserWarning,
)

for time_iteration_number, time_iteration_value in enumerate(
field.coords[time_var_name]
for (
time_iteration_number,
time_iteration_value,
field_at_time,
features_i,
) in field_and_features_over_time(
field, features, time_var_name=time_var_name, time_padding=time_padding
):
field_at_time = field.isel({time_var_name: time_iteration_number})
if time_padding is not None:
padded_conv = pd.Timedelta(time_padding).to_timedelta64()
min_time = time_iteration_value.values - padded_conv
max_time = time_iteration_value.values + padded_conv
features_i = features.loc[all_times.between(min_time, max_time)]
else:
features_i = features.loc[all_times == time_iteration_value.values]
segmentation_out_i, features_out_i = segmentation_timestep(
field_at_time,
features_i,
Expand All @@ -1304,10 +1294,7 @@ def segmentation(
segmentation_out_i
)
features_out_list.append(features_out_i)
logging.debug(
"Finished segmentation for "
+ pd.to_datetime(time_iteration_value.values).strftime("%Y-%m-%d %H:%M:%S")
)
logging.debug(f"Finished segmentation for {time_iteration_value.values}")

# Merge output from individual timesteps:
features_out = pd.concat(features_out_list)
Expand Down
128 changes: 128 additions & 0 deletions tobac/tests/test_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from datetime import datetime

import numpy as np
import pandas as pd
import cftime

import tobac.utils.datetime as datetime_utils


def test_to_cftime():
test_dates = [
"2000-01-01",
"2000-01-01 00:00:00",
datetime(2000, 1, 1),
np.datetime64("2000-01-01 00:00:00.000000000"),
np.datetime64("2000-01-01 00:00:00"),
pd.to_datetime("2000-01-01"),
cftime.datetime(2000, 1, 1),
cftime.DatetimeGregorian(2000, 1, 1),
cftime.Datetime360Day(2000, 1, 1),
cftime.DatetimeNoLeap(2000, 1, 1),
]

for date in test_dates:
assert datetime_utils.to_cftime(date, "standard") == cftime.datetime(2000, 1, 1)
assert datetime_utils.to_cftime(date, "gregorian") == cftime.DatetimeGregorian(
2000, 1, 1
)
assert datetime_utils.to_cftime(date, "360_day") == cftime.Datetime360Day(
2000, 1, 1
)
assert datetime_utils.to_cftime(date, "365_day") == cftime.DatetimeNoLeap(
2000, 1, 1
)


def test_to_timestamp():
test_dates = [
"2000-01-01",
"2000-01-01 00:00:00",
datetime(2000, 1, 1),
np.datetime64("2000-01-01 00:00:00.000000000"),
np.datetime64("2000-01-01 00:00:00"),
pd.to_datetime("2000-01-01"),
cftime.datetime(2000, 1, 1),
cftime.DatetimeGregorian(2000, 1, 1),
cftime.Datetime360Day(2000, 1, 1),
cftime.DatetimeNoLeap(2000, 1, 1),
]

for date in test_dates:
assert datetime_utils.to_timestamp(date) == pd.to_datetime("2000-01-01")


def test_to_datetime():
test_dates = [
"2000-01-01",
"2000-01-01 00:00:00",
datetime(2000, 1, 1),
np.datetime64("2000-01-01 00:00:00.000000000"),
np.datetime64("2000-01-01 00:00:00"),
pd.to_datetime("2000-01-01"),
cftime.datetime(2000, 1, 1),
cftime.DatetimeGregorian(2000, 1, 1),
cftime.Datetime360Day(2000, 1, 1),
cftime.DatetimeNoLeap(2000, 1, 1),
]

for date in test_dates:
assert datetime_utils.to_datetime(date) == datetime(2000, 1, 1)


def test_to_datetime64():
test_dates = [
"2000-01-01",
"2000-01-01 00:00:00",
datetime(2000, 1, 1),
np.datetime64("2000-01-01 00:00:00.000000000"),
np.datetime64("2000-01-01 00:00:00"),
pd.to_datetime("2000-01-01"),
cftime.datetime(2000, 1, 1),
cftime.DatetimeGregorian(2000, 1, 1),
cftime.Datetime360Day(2000, 1, 1),
cftime.DatetimeNoLeap(2000, 1, 1),
]

for date in test_dates:
assert datetime_utils.to_datetime64(date) == np.datetime64(
"2000-01-01 00:00:00.000000000"
)


def test_to_datestr():
test_dates = [
"2000-01-01",
"2000-01-01 00:00:00",
datetime(2000, 1, 1),
np.datetime64("2000-01-01 00:00:00.000000000"),
np.datetime64("2000-01-01 00:00:00"),
pd.to_datetime("2000-01-01"),
cftime.datetime(2000, 1, 1),
cftime.DatetimeGregorian(2000, 1, 1),
cftime.Datetime360Day(2000, 1, 1),
cftime.DatetimeNoLeap(2000, 1, 1),
]

for date in test_dates:
assert (
datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000000"
or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00"
)


def test_match_datetime_format():
test_dates = [
"2000-01-01T00:00:00.000000000",
datetime(2000, 1, 1),
np.datetime64("2000-01-01 00:00:00.000000000"),
pd.to_datetime("2000-01-01"),
cftime.datetime(2000, 1, 1),
cftime.DatetimeGregorian(2000, 1, 1),
cftime.Datetime360Day(2000, 1, 1),
cftime.DatetimeNoLeap(2000, 1, 1),
]

for target in test_dates:
for date in test_dates:
assert datetime_utils.match_datetime_format(date, target) == target
Loading
Loading