Skip to content

[EHN] Allow exogenous variables in regression forecasters #2915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 56 additions & 16 deletions aeon/forecasting/_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class RegressionForecaster(BaseForecaster):
window to form training collection ``X``, take ``horizon`` points ahead to form
``y``, then apply an aeon or sklearn regressor.

If exogenous variables are provided, they are concatenated with the main series
and included in the regression windows.

Parameters
----------
Expand All @@ -36,6 +38,10 @@ class RegressionForecaster(BaseForecaster):
with sklearn regressors.
"""

_tags = {
"capability:exogenous": True,
}

def __init__(self, window: int, horizon: int = 1, regressor=None):
self.window = window
self.regressor = regressor
Expand All @@ -52,8 +58,7 @@ def _fit(self, y, exog=None):
y : np.ndarray
A time series on which to learn a forecaster to predict horizon ahead.
exog : np.ndarray, default=None
Optional exogenous time series data. Included for interface
compatibility but ignored in this estimator.
Optional exogenous time series data, assumed to be aligned with y.

Returns
-------
Expand All @@ -65,18 +70,38 @@ def _fit(self, y, exog=None):
self.regressor_ = LinearRegression()
else:
self.regressor_ = self.regressor
y = y.squeeze()
if self.window < 1 or self.window > len(y) - 3:

# Combine y and exog for windowing
if exog is not None:
if exog.ndim == 1:
exog = exog.reshape(1, -1)
if exog.shape[1] != y.shape[1]:
raise ValueError("y and exog must have the same number of time points.")
combined_data = np.vstack([y, exog])
else:
combined_data = y

# Enforce a minimum number of training samples, currently 3
if self.window < 1 or self.window >= combined_data.shape[1] - 3:
raise ValueError(
f" window value {self.window} is invalid for series " f"length {len(y)}"
f"window value {self.window} is invalid for series length "
f"{combined_data.shape[1]}"
)
X = np.lib.stride_tricks.sliding_window_view(y, window_shape=self.window)
# Ignore the final horizon values: need to store these for pred with empty y

# Create windowed data for X
X = np.lib.stride_tricks.sliding_window_view(
combined_data, window_shape=(combined_data.shape[0], self.window)
)
X = X.squeeze(axis=0)
X = X[:, :, :].reshape(X.shape[0], -1)

# Ignore the final horizon values for X
X = X[: -self.horizon]
# Extract y_train
y_train = y[self.window + self.horizon - 1 :]
self.last_ = y[-self.window :]
self.last_ = self.last_.reshape(1, -1)

# Extract y_train from the original series
y_train = y.squeeze()[self.window + self.horizon - 1 :]

self.last_ = combined_data[:, -self.window :]
self.regressor_.fit(X=X, y=y_train)
return self

Expand All @@ -90,18 +115,33 @@ def _predict(self, y=None, exog=None):
A time series to predict the next horizon value for. If None,
predict the next horizon value after series seen in fit.
exog : np.ndarray, default=None
Optional exogenous time series data. Included for interface
compatibility but ignored in this estimator.
Optional exogenous time series data, assumed to be aligned with y.

Returns
-------
float
single prediction self.horizon steps ahead of y.
"""
if y is None:
return self.regressor_.predict(self.last_)[0]
last = y[:, -self.window :]
return self.regressor_.predict(last)[0]
# Flatten the last window to be compatible with sklearn regressors
last_window_flat = self.last_.reshape(1, -1)
return self.regressor_.predict(last_window_flat)[0]

# Combine y and exog for prediction
if exog is not None:
if exog.ndim == 1:
exog = exog.reshape(1, -1)
if exog.shape[1] != y.shape[1]:
raise ValueError("y and exog must have the same number of time points.")
combined_data = np.vstack([y, exog])
else:
combined_data = y

# Extract the last window and flatten for prediction
last_window = combined_data[:, -self.window :]
last_window_flat = last_window.reshape(1, -1)

return self.regressor_.predict(last_window_flat)[0]

@classmethod
def _get_test_params(cls, parameter_set: str = "default"):
Expand Down
22 changes: 14 additions & 8 deletions aeon/forecasting/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ def fit(self, y, exog=None):
if self.get_tag("fit_is_empty"):
self.is_fitted = True
return self

horizon = self.get_tag("capability:horizon")
if not horizon and self.horizon > 1:
raise ValueError(
f"Horizon is set >1, but {self.__class__.__name__} cannot handle a "
f"horizon greater than 1"
)

exog_tag = self.get_tag("capability:exogenous")
if not exog_tag and exog is not None:
raise ValueError(
Expand All @@ -83,8 +85,10 @@ def fit(self, y, exog=None):

self._check_X(y, self.axis)
y = self._convert_y(y, self.axis)

if exog is not None:
raise NotImplementedError("Exogenous variables not yet supported")
exog = self._convert_y(exog, self.axis)

self.is_fitted = True
return self._fit(y, exog)

Expand Down Expand Up @@ -113,9 +117,9 @@ def predict(self, y=None, exog=None):
self._check_X(y, self.axis)
y = self._convert_y(y, self.axis)
if exog is not None:
raise NotImplementedError("Exogenous variables not yet supported")
x = self._predict(y, exog)
return x
exog = self._convert_y(exog, self.axis)

return self._predict(y, exog)

@abstractmethod
def _predict(self, y=None, exog=None): ...
Expand All @@ -141,6 +145,8 @@ def forecast(self, y, exog=None):
"""
self._check_X(y, self.axis)
y = self._convert_y(y, self.axis)
if exog is not None:
exog = self._convert_y(exog, self.axis)
return self._forecast(y, exog)

def _forecast(self, y, exog=None):
Expand All @@ -149,7 +155,7 @@ def _forecast(self, y, exog=None):
return self._predict(y, exog)

@final
def direct_forecast(self, y, prediction_horizon):
def direct_forecast(self, y, prediction_horizon, exog=None):
"""
Make ``prediction_horizon`` ahead forecasts using a fit for each horizon.

Expand All @@ -166,7 +172,8 @@ def direct_forecast(self, y, prediction_horizon):
The time series to make forecasts about.
prediction_horizon : int
The number of future time steps to forecast.

exog : np.ndarray, default =None
Optional exogenous time series data assumed to be aligned with y.
predictions : np.ndarray
An array of shape `(prediction_horizon,)` containing the forecasts for
each horizon.
Expand Down Expand Up @@ -198,7 +205,7 @@ def direct_forecast(self, y, prediction_horizon):
preds = np.zeros(prediction_horizon)
for i in range(0, prediction_horizon):
self.horizon = i + 1
preds[i] = self.forecast(y)
preds[i] = self.forecast(y, exog)
return preds

def iterative_forecast(self, y, prediction_horizon):
Expand Down Expand Up @@ -263,7 +270,6 @@ def _convert_y(self, y: VALID_SERIES_INNER_TYPES, axis: int):
if inner_names[0] == "ndarray":
y = y.to_numpy()
elif inner_names[0] == "DataFrame":
# converting a 1d array will create a 2d array in axis 0 format
transpose = False
if y.ndim == 1 and axis == 1:
transpose = True
Expand Down
14 changes: 14 additions & 0 deletions aeon/forecasting/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,17 @@ def test_recursive_forecast():
p = f.predict(y)
assert p == preds[i]
y = np.append(y, p)


def test_direct_forecast_with_exog():
"""Test direct forecasting with exogenous variables."""
y = np.arange(50)
exog = np.arange(50) * 2
f = RegressionForecaster(window=10)

preds = f.direct_forecast(y, prediction_horizon=10, exog=exog)
assert isinstance(preds, np.ndarray) and len(preds) == 10

# Check that predictions are different from when no exog is used
preds_no_exog = f.direct_forecast(y, prediction_horizon=10)
assert not np.array_equal(preds, preds_no_exog)
49 changes: 49 additions & 0 deletions aeon/forecasting/tests/test_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,52 @@ def test_regression_forecaster():
with pytest.raises(ValueError):
f = RegressionForecaster(window=101)
f.fit(y)


def test_regression_forecaster_with_exog():
"""Test the regression forecaster with exogenous variables."""
np.random.seed(0)

n_samples = 100
exog = np.random.rand(n_samples) * 10
y = 2 * exog + np.random.rand(n_samples) * 0.1

f = RegressionForecaster(window=10)

# Test fit and predict with exog
f.fit(y, exog=exog)
p1 = f.predict()
assert isinstance(p1, float)

# Test that exog variable has an impact
exog_zeros = np.zeros(n_samples)
f.fit(y, exog=exog_zeros)
p2 = f.predict()
assert p1 != p2

# Test that forecast method works and is equivalent to fit+predict
y_new = np.arange(50, 150)
exog_new = np.arange(50, 150) * 2

# Manual fit + predict
f.fit(y=y_new, exog=exog_new)
p_manual = f.predict()

# forecast() method
p_forecast = f.forecast(y=y_new, exog=exog_new)
assert p_manual == pytest.approx(p_forecast)


def test_regression_forecaster_with_exog_errors():
"""Test errors in regression forecaster with exogenous variables."""
y = np.random.rand(100)
exog_short = np.random.rand(99)
f = RegressionForecaster(window=10)

# Test for unequal length series
with pytest.raises(ValueError, match="must have the same number of time points"):
f.fit(y, exog=exog_short)

with pytest.raises(ValueError, match="must have the same number of time points"):
f.fit(y)
f.predict(y, exog=exog_short)