From 1953830c959b606ae3fbf7a1f9070fbe465edb8d Mon Sep 17 00:00:00 2001 From: pattplatt Date: Mon, 2 Jun 2025 20:27:17 +0200 Subject: [PATCH 1/5] reverted commits to sync with main, added whole series rockad implementation --- aeon/anomaly_detection/collection/__init__.py | 2 + aeon/anomaly_detection/collection/_rockad.py | 221 ++++++++++++++++++ .../collection/tests/__init__.py | 1 + .../collection/tests/test_rockad.py | 69 ++++++ 4 files changed, 293 insertions(+) create mode 100644 aeon/anomaly_detection/collection/_rockad.py create mode 100644 aeon/anomaly_detection/collection/tests/__init__.py create mode 100644 aeon/anomaly_detection/collection/tests/test_rockad.py diff --git a/aeon/anomaly_detection/collection/__init__.py b/aeon/anomaly_detection/collection/__init__.py index 4fc14ffd1f..2a13747aa4 100644 --- a/aeon/anomaly_detection/collection/__init__.py +++ b/aeon/anomaly_detection/collection/__init__.py @@ -4,8 +4,10 @@ "BaseCollectionAnomalyDetector", "ClassificationAdapter", "OutlierDetectionAdapter", + "ROCKAD", ] from aeon.anomaly_detection.collection._classification import ClassificationAdapter from aeon.anomaly_detection.collection._outlier_detection import OutlierDetectionAdapter +from aeon.anomaly_detection.collection._rockad import ROCKAD from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector diff --git a/aeon/anomaly_detection/collection/_rockad.py b/aeon/anomaly_detection/collection/_rockad.py new file mode 100644 index 0000000000..dbd9270181 --- /dev/null +++ b/aeon/anomaly_detection/collection/_rockad.py @@ -0,0 +1,221 @@ +"""ROCKAD anomaly detector.""" + +__all__ = ["ROCKAD"] + +import warnings +from typing import Optional + +import numpy as np +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import PowerTransformer +from sklearn.utils import resample + +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector +from aeon.transformations.collection.convolution_based import Rocket + + +class ROCKAD(BaseCollectionAnomalyDetector): + """ + ROCKET-based whole-series Anomaly Detector (ROCKAD). + + ROCKAD [1]_ leverages the ROCKET transformation for feature extraction from + time series data and applies the scikit learn k-nearest neighbors (k-NN) + approach with bootstrap aggregation for robust semi-supervised anomaly detection. + The data gets transformed into the ROCKET feature space. + Then the whole-series are compared based on the feature space by + finding the nearest neighbours. The time-point based ROCKAD anomaly detector + can be found at aeon/anomaly_detection/series/distance_based/_rockad.py + + This class supports both univariate and multivariate time series and + provides options for normalizing features, applying power transformations, + and customizing the distance metric. + + Parameters + ---------- + n_estimators : int, default=10 + Number of k-NN estimators to use in the bootstrap aggregation. + n_kernels : int, default=100 + Number of kernels to use in the ROCKET transformation. + normalise : bool, default=False + Whether to normalize the ROCKET-transformed features. + n_neighbors : int, default=5 + Number of neighbors to use for the k-NN algorithm. + n_jobs : int, default=1 + Number of parallel jobs to use for the k-NN algorithm and ROCKET transformation. + metric : str, default="euclidean" + Distance metric to use for the k-NN algorithm. + power_transform : bool, default=True + Whether to apply a power transformation (Yeo-Johnson) to the features. + random_state : int, default=42 + Random seed for reproducibility. + + Attributes + ---------- + rocket_transformer_ : Optional[Rocket] + Instance of the ROCKET transformer used to extract features, set after fitting. + list_baggers_ : Optional[list[NearestNeighbors]] + List containing k-NN estimators used for anomaly scoring, set after fitting. + power_transformer_ : PowerTransformer + Transformer used to apply power transformation to the features. + + References + ---------- + .. [1] Theissler, A., Wengert, M., Gerschner, F. (2023). + ROCKAD: Transferring ROCKET to Whole Time Series Anomaly Detection. + In: Crémilleux, B., Hess, S., Nijssen, S. (eds) Advances in Intelligent + Data Analysis XXI. IDA 2023. Lecture Notes in Computer Science, + vol 13876. Springer, Cham. https://doi.org/10.1007/978-3-031-30047-9_33 + + Examples + -------- + >>> import numpy as np + >>> from aeon.anomaly_detection.whole_series import ROCKAD + >>> rng = np.random.default_rng(seed=42) + >>> X_train = rng.normal(loc=0.0, scale=1.0, size=(10, 100)) + >>> X_test = rng.normal(loc=0.0, scale=1.0, size=(5, 100)) + >>> X_test[4][50:58] -= 5 + >>> detector = ROCKAD() + >>> detector.fit(X_train) + >>> detector.predict(X_test) + array([24.11974147, 23.93866453, 21.3941765 , 22.26811959, 64.9630108 ]) + """ + + _tags = { + "capability:univariate": True, + "capability:multivariate": True, + "capability:missing_values": False, + "capability:multithreading": True, + "fit_is_empty": False, + } + + def __init__( + self, + n_estimators=10, + n_kernels=100, + normalise=False, + n_neighbors=5, + metric="euclidean", + power_transform=True, + n_jobs=1, + random_state=42, + ): + + self.n_estimators = n_estimators + self.n_kernels = n_kernels + self.normalise = normalise + self.n_neighbors = n_neighbors + self.n_jobs = n_jobs + self.metric = metric + self.power_transform = power_transform + self.random_state = random_state + + self.rocket_transformer_: Optional[Rocket] = None + self.list_baggers_: Optional[list[NearestNeighbors]] = None + self.power_transformer_: Optional[PowerTransformer] = None + + super().__init__() + + def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "ROCKAD": + _X = X + self._inner_fit(_X) + + return self + + def _inner_fit(self, X: np.ndarray) -> None: + + self.rocket_transformer_ = Rocket( + n_kernels=self.n_kernels, + normalise=self.normalise, + n_jobs=self.n_jobs, + random_state=self.random_state, + ) + # XT: (n_cases, n_kernels*2) + Xt = self.rocket_transformer_.fit_transform(X) + Xt = Xt.astype(np.float64) + + if self.power_transform: + self.power_transformer_ = PowerTransformer() + try: + Xtp = self.power_transformer_.fit_transform(Xt) + + except Exception: + warnings.warn( + "Power Transform failed and thus has been disabled. ", + UserWarning, + stacklevel=2, + ) + self.power_transformer_ = None + Xtp = Xt + else: + Xtp = Xt + + self.list_baggers_ = [] + + for idx_estimator in range(self.n_estimators): + # Initialize estimator + estimator = NearestNeighbors( + n_neighbors=self.n_neighbors, + n_jobs=self.n_jobs, + metric=self.metric, + algorithm="kd_tree", + ) + # Bootstrap Aggregation + Xtp_scaled_sample = resample( + Xtp, + replace=True, + n_samples=None, + random_state=self.random_state + idx_estimator, + stratify=None, + ) + + # Fit estimator and append to estimator list + estimator.fit(Xtp_scaled_sample) + self.list_baggers_.append(estimator) + + def _predict(self, X) -> np.ndarray: + _X = X + collection_anomaly_scores = self._inner_predict(_X) + + return collection_anomaly_scores + + def _inner_predict(self, X: np.ndarray) -> np.ndarray: + """ + Return the anomaly scores for the input data. + + Parameters + ---------- + X (array-like): The input data. + + Returns + ------- + np.ndarray: The predicted probabilities. + + """ + y_scores = np.zeros((len(X), self.n_estimators)) + # Transform into rocket feature space + # XT: (n_cases, n_kernels*2) + Xt = self.rocket_transformer_.transform(X) + + Xt = Xt.astype(np.float64) + + if self.power_transformer_ is not None: + # Power Transform using yeo-johnson + Xtp = self.power_transformer_.transform(Xt) + + else: + Xtp = Xt + + for idx, bagger in enumerate(self.list_baggers_): + # Get scores from each estimator + distances, _ = bagger.kneighbors(Xtp) + + # Compute mean distance of nearest points in window + scores = distances.mean(axis=1).reshape(-1, 1) + scores = scores.squeeze() + + y_scores[:, idx] = scores + + # Average the scores to get the final score for each whole-series + collection_anomaly_scores = y_scores.mean(axis=1) + + return collection_anomaly_scores diff --git a/aeon/anomaly_detection/collection/tests/__init__.py b/aeon/anomaly_detection/collection/tests/__init__.py new file mode 100644 index 0000000000..53e3f63a49 --- /dev/null +++ b/aeon/anomaly_detection/collection/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for whole-series anomaly detection.""" diff --git a/aeon/anomaly_detection/collection/tests/test_rockad.py b/aeon/anomaly_detection/collection/tests/test_rockad.py new file mode 100644 index 0000000000..8b2316dcc8 --- /dev/null +++ b/aeon/anomaly_detection/collection/tests/test_rockad.py @@ -0,0 +1,69 @@ +"""Tests for the ROCKAD anomaly detector.""" + +import numpy as np +import pytest +from sklearn.utils import check_random_state + +from aeon.anomaly_detection.collection import ROCKAD + + +def test_rockad_univariate(): + """Test ROCKAD univariate output.""" + rng = check_random_state(seed=2) + train_series = rng.normal(loc=0.0, scale=1.0, size=(10, 100)) + test_series = rng.normal(loc=0.0, scale=1.0, size=(5, 100)) + + test_series[0][50:58] -= 5 + + ad = ROCKAD(n_estimators=100, n_kernels=10, n_neighbors=9) + + ad.fit(train_series) + pred = ad.predict(test_series) + + assert pred.shape == (5,) + assert pred.dtype == np.float64 + assert 0 <= np.argmax(pred) <= 1 + + +def test_rockad_multivariate(): + """Test ROCKAD multivariate output.""" + rng = check_random_state(seed=2) + train_series = rng.normal(loc=0.0, scale=1.0, size=(10, 3, 100)) + test_series = rng.normal(loc=0.0, scale=1.0, size=(5, 3, 100)) + + test_series[0][0][50:58] -= 5 + + ad = ROCKAD(n_estimators=1000, n_kernels=100, n_neighbors=9) + + ad.fit(train_series) + pred = ad.predict(test_series) + + assert pred.shape == (5,) + assert pred.dtype == np.float64 + assert 0 <= np.argmax(pred) <= 1 + + +def test_rockad_incorrect_input(): + """Test ROCKAD with invalid inputs.""" + rng = check_random_state(seed=2) + series = rng.normal(size=(10, 5)) + + with pytest.warns( + UserWarning, match=r"Power Transform failed and thus has been disabled." + ): + ad = ROCKAD() + ad.fit(series) + + train_series = rng.normal(loc=0.0, scale=1.0, size=(10, 100)) + test_series = rng.normal(loc=0.0, scale=1.0, size=(3, 100)) + + with pytest.raises( + ValueError, + match=( + r"Expected n_neighbors <= n_samples_fit, but n_neighbors = 100, " + r"n_samples_fit = 10, n_samples = 3" + ), + ): + ad = ROCKAD(n_estimators=100, n_kernels=10, n_neighbors=100) + ad.fit(train_series) + ad.predict(test_series) From a242919933eb060da03bbf77725b12778103e062 Mon Sep 17 00:00:00 2001 From: pattplatt Date: Tue, 3 Jun 2025 09:35:02 +0200 Subject: [PATCH 2/5] added necessary tags --- aeon/anomaly_detection/collection/_rockad.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aeon/anomaly_detection/collection/_rockad.py b/aeon/anomaly_detection/collection/_rockad.py index dbd9270181..4b9dc24850 100644 --- a/aeon/anomaly_detection/collection/_rockad.py +++ b/aeon/anomaly_detection/collection/_rockad.py @@ -81,6 +81,8 @@ class ROCKAD(BaseCollectionAnomalyDetector): """ _tags = { + "anomaly_output_type": "anomaly_scores", + "learning_type": "semi-supervised", "capability:univariate": True, "capability:multivariate": True, "capability:missing_values": False, From 45fd6934e8f73ab6685c1d595ec4d3e6ee3aea35 Mon Sep 17 00:00:00 2001 From: pattplatt Date: Tue, 3 Jun 2025 11:11:16 +0200 Subject: [PATCH 3/5] fixed import of example code, added skip commands that doctests pass --- aeon/anomaly_detection/collection/_rockad.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aeon/anomaly_detection/collection/_rockad.py b/aeon/anomaly_detection/collection/_rockad.py index 4b9dc24850..1ad658b0d7 100644 --- a/aeon/anomaly_detection/collection/_rockad.py +++ b/aeon/anomaly_detection/collection/_rockad.py @@ -69,14 +69,14 @@ class ROCKAD(BaseCollectionAnomalyDetector): Examples -------- >>> import numpy as np - >>> from aeon.anomaly_detection.whole_series import ROCKAD + >>> from aeon.anomaly_detection.collection import ROCKAD >>> rng = np.random.default_rng(seed=42) >>> X_train = rng.normal(loc=0.0, scale=1.0, size=(10, 100)) >>> X_test = rng.normal(loc=0.0, scale=1.0, size=(5, 100)) >>> X_test[4][50:58] -= 5 - >>> detector = ROCKAD() - >>> detector.fit(X_train) - >>> detector.predict(X_test) + >>> detector = ROCKAD() # doctest: +SKIP + >>> detector.fit(X_train) # doctest: +SKIP + >>> detector.predict(X_test) # doctest: +SKIP array([24.11974147, 23.93866453, 21.3941765 , 22.26811959, 64.9630108 ]) """ From a9fd78533c89fa02ae2b977db92beb40cb3c6be0 Mon Sep 17 00:00:00 2001 From: pattplatt Date: Tue, 3 Jun 2025 11:29:42 +0200 Subject: [PATCH 4/5] fixed learning_type parameter --- aeon/anomaly_detection/collection/_rockad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/collection/_rockad.py b/aeon/anomaly_detection/collection/_rockad.py index 1ad658b0d7..1745b89f3e 100644 --- a/aeon/anomaly_detection/collection/_rockad.py +++ b/aeon/anomaly_detection/collection/_rockad.py @@ -82,7 +82,7 @@ class ROCKAD(BaseCollectionAnomalyDetector): _tags = { "anomaly_output_type": "anomaly_scores", - "learning_type": "semi-supervised", + "learning_type": "semi_supervised", "capability:univariate": True, "capability:multivariate": True, "capability:missing_values": False, From ae4017ace2472946d7b7e63f635a6e30e9e96bc4 Mon Sep 17 00:00:00 2001 From: pattplatt Date: Tue, 3 Jun 2025 11:55:20 +0200 Subject: [PATCH 5/5] corrected learning_type tag to contain binary label --- aeon/anomaly_detection/collection/_rockad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/collection/_rockad.py b/aeon/anomaly_detection/collection/_rockad.py index 1745b89f3e..32c002907d 100644 --- a/aeon/anomaly_detection/collection/_rockad.py +++ b/aeon/anomaly_detection/collection/_rockad.py @@ -82,7 +82,7 @@ class ROCKAD(BaseCollectionAnomalyDetector): _tags = { "anomaly_output_type": "anomaly_scores", - "learning_type": "semi_supervised", + "learning_type:semi_supervised": True, "capability:univariate": True, "capability:multivariate": True, "capability:missing_values": False,