From 4a4991394580e51d950893774ba735712e788bc5 Mon Sep 17 00:00:00 2001 From: tina Date: Mon, 23 Jun 2025 18:25:36 +0900 Subject: [PATCH 1/2] add exogenous variable feature to base and regression forecasters --- aeon/forecasting/_regression.py | 72 ++++++++++++++++++------ aeon/forecasting/base.py | 25 +++++--- aeon/forecasting/tests/test_base.py | 14 +++++ aeon/forecasting/tests/test_regressor.py | 49 ++++++++++++++++ 4 files changed, 136 insertions(+), 24 deletions(-) diff --git a/aeon/forecasting/_regression.py b/aeon/forecasting/_regression.py index 7595b85a95..206d0ca05d 100644 --- a/aeon/forecasting/_regression.py +++ b/aeon/forecasting/_regression.py @@ -20,6 +20,8 @@ class RegressionForecaster(BaseForecaster): window to form training collection ``X``, take ``horizon`` points ahead to form ``y``, then apply an aeon or sklearn regressor. + If exogenous variables are provided, they are concatenated with the main series + and included in the regression windows. Parameters ---------- @@ -36,6 +38,10 @@ class RegressionForecaster(BaseForecaster): with sklearn regressors. """ + _tags = { + "capability:exogenous": True, + } + def __init__(self, window: int, horizon: int = 1, regressor=None): self.window = window self.regressor = regressor @@ -52,8 +58,7 @@ def _fit(self, y, exog=None): y : np.ndarray A time series on which to learn a forecaster to predict horizon ahead. exog : np.ndarray, default=None - Optional exogenous time series data. Included for interface - compatibility but ignored in this estimator. + Optional exogenous time series data, assumed to be aligned with y. Returns ------- @@ -65,18 +70,38 @@ def _fit(self, y, exog=None): self.regressor_ = LinearRegression() else: self.regressor_ = self.regressor - y = y.squeeze() - if self.window < 1 or self.window > len(y) - 3: + + # Combine y and exog for windowing + if exog is not None: + if exog.ndim == 1: + exog = exog.reshape(1, -1) + if exog.shape[1] != y.shape[1]: + raise ValueError("y and exog must have the same number of time points.") + combined_data = np.vstack([y, exog]) + else: + combined_data = y + + # Enforce a minimum number of training samples, currently 3 + if self.window < 1 or self.window >= combined_data.shape[1] - 3: raise ValueError( - f" window value {self.window} is invalid for series " f"length {len(y)}" + f"window value {self.window} is invalid for series length " + f"{combined_data.shape[1]}" ) - X = np.lib.stride_tricks.sliding_window_view(y, window_shape=self.window) - # Ignore the final horizon values: need to store these for pred with empty y + + # Create windowed data for X + X = np.lib.stride_tricks.sliding_window_view( + combined_data, window_shape=(combined_data.shape[0], self.window) + ) + X = X.squeeze(axis=0) + X = X[:, :, :].reshape(X.shape[0], -1) + + # Ignore the final horizon values for X X = X[: -self.horizon] - # Extract y_train - y_train = y[self.window + self.horizon - 1 :] - self.last_ = y[-self.window :] - self.last_ = self.last_.reshape(1, -1) + + # Extract y_train from the original series + y_train = y.squeeze()[self.window + self.horizon - 1 :] + + self.last_ = combined_data[:, -self.window :] self.regressor_.fit(X=X, y=y_train) return self @@ -90,8 +115,7 @@ def _predict(self, y=None, exog=None): A time series to predict the next horizon value for. If None, predict the next horizon value after series seen in fit. exog : np.ndarray, default=None - Optional exogenous time series data. Included for interface - compatibility but ignored in this estimator. + Optional exogenous time series data, assumed to be aligned with y. Returns ------- @@ -99,9 +123,25 @@ def _predict(self, y=None, exog=None): single prediction self.horizon steps ahead of y. """ if y is None: - return self.regressor_.predict(self.last_)[0] - last = y[:, -self.window :] - return self.regressor_.predict(last)[0] + # Flatten the last window to be compatible with sklearn regressors + last_window_flat = self.last_.reshape(1, -1) + return self.regressor_.predict(last_window_flat)[0] + + # Combine y and exog for prediction + if exog is not None: + if exog.ndim == 1: + exog = exog.reshape(1, -1) + if exog.shape[1] != y.shape[1]: + raise ValueError("y and exog must have the same number of time points.") + combined_data = np.vstack([y, exog]) + else: + combined_data = y + + # Extract the last window and flatten for prediction + last_window = combined_data[:, -self.window :] + last_window_flat = last_window.reshape(1, -1) + + return self.regressor_.predict(last_window_flat)[0] @classmethod def _get_test_params(cls, parameter_set: str = "default"): diff --git a/aeon/forecasting/base.py b/aeon/forecasting/base.py index 45ceb597db..3c9e899f56 100644 --- a/aeon/forecasting/base.py +++ b/aeon/forecasting/base.py @@ -68,12 +68,14 @@ def fit(self, y, exog=None): if self.get_tag("fit_is_empty"): self.is_fitted = True return self + horizon = self.get_tag("capability:horizon") if not horizon and self.horizon > 1: raise ValueError( f"Horizon is set >1, but {self.__class__.__name__} cannot handle a " f"horizon greater than 1" ) + exog_tag = self.get_tag("capability:exogenous") if not exog_tag and exog is not None: raise ValueError( @@ -83,8 +85,11 @@ def fit(self, y, exog=None): self._check_X(y, self.axis) y = self._convert_y(y, self.axis) + if exog is not None: - raise NotImplementedError("Exogenous variables not yet supported") + self._check_X(exog, self.axis) + exog = self._convert_y(exog, self.axis) + self.is_fitted = True return self._fit(y, exog) @@ -113,9 +118,10 @@ def predict(self, y=None, exog=None): self._check_X(y, self.axis) y = self._convert_y(y, self.axis) if exog is not None: - raise NotImplementedError("Exogenous variables not yet supported") - x = self._predict(y, exog) - return x + self._check_X(exog, self.axis) + exog = self._convert_y(exog, self.axis) + + return self._predict(y, exog) @abstractmethod def _predict(self, y=None, exog=None): ... @@ -141,6 +147,9 @@ def forecast(self, y, exog=None): """ self._check_X(y, self.axis) y = self._convert_y(y, self.axis) + if exog is not None: + self._check_X(exog, self.axis) + exog = self._convert_y(exog, self.axis) return self._forecast(y, exog) def _forecast(self, y, exog=None): @@ -149,7 +158,7 @@ def _forecast(self, y, exog=None): return self._predict(y, exog) @final - def direct_forecast(self, y, prediction_horizon): + def direct_forecast(self, y, prediction_horizon, exog=None): """ Make ``prediction_horizon`` ahead forecasts using a fit for each horizon. @@ -166,7 +175,8 @@ def direct_forecast(self, y, prediction_horizon): The time series to make forecasts about. prediction_horizon : int The number of future time steps to forecast. - + exog : np.ndarray, default =None + Optional exogenous time series data assumed to be aligned with y. predictions : np.ndarray An array of shape `(prediction_horizon,)` containing the forecasts for each horizon. @@ -198,7 +208,7 @@ def direct_forecast(self, y, prediction_horizon): preds = np.zeros(prediction_horizon) for i in range(0, prediction_horizon): self.horizon = i + 1 - preds[i] = self.forecast(y) + preds[i] = self.forecast(y, exog) return preds def iterative_forecast(self, y, prediction_horizon): @@ -263,7 +273,6 @@ def _convert_y(self, y: VALID_SERIES_INNER_TYPES, axis: int): if inner_names[0] == "ndarray": y = y.to_numpy() elif inner_names[0] == "DataFrame": - # converting a 1d array will create a 2d array in axis 0 format transpose = False if y.ndim == 1 and axis == 1: transpose = True diff --git a/aeon/forecasting/tests/test_base.py b/aeon/forecasting/tests/test_base.py index 09d39146e0..e6b729c62b 100644 --- a/aeon/forecasting/tests/test_base.py +++ b/aeon/forecasting/tests/test_base.py @@ -65,3 +65,17 @@ def test_recursive_forecast(): p = f.predict(y) assert p == preds[i] y = np.append(y, p) + + +def test_direct_forecast_with_exog(): + """Test direct forecasting with exogenous variables.""" + y = np.arange(50) + exog = np.arange(50) * 2 + f = RegressionForecaster(window=10) + + preds = f.direct_forecast(y, prediction_horizon=10, exog=exog) + assert isinstance(preds, np.ndarray) and len(preds) == 10 + + # Check that predictions are different from when no exog is used + preds_no_exog = f.direct_forecast(y, prediction_horizon=10) + assert not np.array_equal(preds, preds_no_exog) diff --git a/aeon/forecasting/tests/test_regressor.py b/aeon/forecasting/tests/test_regressor.py index f13161c4ec..f50519d1c9 100644 --- a/aeon/forecasting/tests/test_regressor.py +++ b/aeon/forecasting/tests/test_regressor.py @@ -32,3 +32,52 @@ def test_regression_forecaster(): with pytest.raises(ValueError): f = RegressionForecaster(window=101) f.fit(y) + + +def test_regression_forecaster_with_exog(): + """Test the regression forecaster with exogenous variables.""" + np.random.seed(0) + + n_samples = 100 + exog = np.random.rand(n_samples) * 10 + y = 2 * exog + np.random.rand(n_samples) * 0.1 + + f = RegressionForecaster(window=10) + + # Test fit and predict with exog + f.fit(y, exog=exog) + p1 = f.predict() + assert isinstance(p1, float) + + # Test that exog variable has an impact + exog_zeros = np.zeros(n_samples) + f.fit(y, exog=exog_zeros) + p2 = f.predict() + assert p1 != p2 + + # Test that forecast method works and is equivalent to fit+predict + y_new = np.arange(50, 150) + exog_new = np.arange(50, 150) * 2 + + # Manual fit + predict + f.fit(y=y_new, exog=exog_new) + p_manual = f.predict() + + # forecast() method + p_forecast = f.forecast(y=y_new, exog=exog_new) + assert p_manual == pytest.approx(p_forecast) + + +def test_regression_forecaster_with_exog_errors(): + """Test errors in regression forecaster with exogenous variables.""" + y = np.random.rand(100) + exog_short = np.random.rand(99) + f = RegressionForecaster(window=10) + + # Test for unequal length series + with pytest.raises(ValueError, match="must have the same number of time points"): + f.fit(y, exog=exog_short) + + with pytest.raises(ValueError, match="must have the same number of time points"): + f.fit(y) + f.predict(y, exog=exog_short) From 1d881084da426bfeb61780700a644571f924e441 Mon Sep 17 00:00:00 2001 From: tina Date: Wed, 25 Jun 2025 21:48:31 +0800 Subject: [PATCH 2/2] delete self._check_X for exog --- aeon/forecasting/base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/aeon/forecasting/base.py b/aeon/forecasting/base.py index 3c9e899f56..5b37e7cadc 100644 --- a/aeon/forecasting/base.py +++ b/aeon/forecasting/base.py @@ -87,7 +87,6 @@ def fit(self, y, exog=None): y = self._convert_y(y, self.axis) if exog is not None: - self._check_X(exog, self.axis) exog = self._convert_y(exog, self.axis) self.is_fitted = True @@ -118,7 +117,6 @@ def predict(self, y=None, exog=None): self._check_X(y, self.axis) y = self._convert_y(y, self.axis) if exog is not None: - self._check_X(exog, self.axis) exog = self._convert_y(exog, self.axis) return self._predict(y, exog) @@ -148,7 +146,6 @@ def forecast(self, y, exog=None): self._check_X(y, self.axis) y = self._convert_y(y, self.axis) if exog is not None: - self._check_X(exog, self.axis) exog = self._convert_y(exog, self.axis) return self._forecast(y, exog)