From d381d5ee7e9d38bbea58a911d6cbd9c1303e6505 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 24 May 2025 14:43:02 +0100
Subject: [PATCH 01/50] arima first

---
 aeon/forecasting/_arima.py | 456 +++++++++++++++++++++++++++++++++++++
 1 file changed, 456 insertions(+)
 create mode 100644 aeon/forecasting/_arima.py

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
new file mode 100644
index 0000000000..54ebc47fe3
--- /dev/null
+++ b/aeon/forecasting/_arima.py
@@ -0,0 +1,456 @@
+"""ARIMAForecaster.
+
+An implementation of the ARIMA forecasting algorithm.
+"""
+
+__maintainer__ = ["alexbanwell1", "TonyBagnall"]
+__all__ = ["ARIMAForecaster"]
+
+from math import comb
+
+import numpy as np
+
+from aeon.forecasting._utils import calc_seasonal_period, kpss_test
+from aeon.forecasting.base import BaseForecaster
+
+NOGIL = False
+CACHE = True
+
+
+class ARIMAForecaster(BaseForecaster):
+    """AutoRegressive Integrated Moving Average (ARIMA) forecaster.
+
+    Implements the Hyndman-Khandakar automatic ARIMA algorithm for time series
+    forecasting with optional seasonal components. The model automatically selects
+    the orders of the non-seasonal (p, d, q) and seasonal (P, D, Q, m) components
+    based on information criteria, such as AIC.
+
+    Parameters
+    ----------
+    horizon : int, default=1
+        The forecasting horizon, i.e., the number of steps ahead to predict.
+
+    Attributes
+    ----------
+    data_ : list of float
+        Original training series values.
+    differenced_data_ : list of float
+        Differenced version of the training data used for stationarity.
+    residuals_ : list of float
+        Residual errors from the fitted model.
+    aic_ : float
+        Akaike Information Criterion for the selected model.
+    p_, d_, q_ : int
+        Orders of the ARIMA model: autoregressive (p), differencing (d),
+        and moving average (q) terms.
+    ps_, ds_, qs_ : int
+        Orders of the seasonal ARIMA model: seasonal AR (P), seasonal differencing (D),
+        and seasonal MA (Q) terms.
+    seasonal_period_ : int
+        Length of the seasonal cycle.
+    constant_term_ : float
+        Constant/intercept term in the model.
+    c_ : float
+        Estimated constant term (internal use).
+    phi_ : array-like
+        Coefficients for the non-seasonal autoregressive terms.
+    phi_s_ : array-like
+        Coefficients for the seasonal autoregressive terms.
+    theta_ : array-like
+        Coefficients for the non-seasonal moving average terms.
+    theta_s_ : array-like
+        Coefficients for the seasonal moving average terms.
+
+    References
+    ----------
+    .. [1] R. J. Hyndman and G. Athanasopoulos,
+       Forecasting: Principles and Practice. OTexts, 2014.
+       https://otexts.com/fpp3/
+    """
+
+    def __init__(self, horizon=1):
+        super().__init__(horizon=horizon, axis=1)
+        self.data_ = []
+        self.differenced_data_ = []
+        self.residuals_ = []
+        self.aic_ = 0
+        self.p_ = 0
+        self.d_ = 0
+        self.q_ = 0
+        self.ps_ = 0
+        self.ds_ = 0
+        self.qs_ = 0
+        self.seasonal_period_ = 0
+        self.constant_term_ = 0
+        self.c_ = 0
+        self.phi_ = 0
+        self.phi_s_ = 0
+        self.theta_ = 0
+        self.theta_s_ = 0
+
+    def _fit(self, y, exog=None):
+        """Fit AutoARIMA forecaster to series y.
+
+        Fit a forecaster to predict self.horizon steps ahead using y.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            A time series on which to learn a forecaster to predict horizon ahead
+        exog : np.ndarray, default =None
+            Optional exogenous time series data assumed to be aligned with y
+
+        Returns
+        -------
+        self
+            Fitted ARIMAForecaster.
+        """
+        self.data_ = np.array(y.squeeze(), dtype=np.float64)
+        (
+            self.differenced_data_,
+            self.aic_,
+            self.p_,
+            self.d_,
+            self.q_,
+            self.ps_,
+            self.ds_,
+            self.qs_,
+            self.seasonal_period_,
+            self.constant_term_,
+            parameters,
+        ) = auto_arima(self.data_)
+        (self.c_, self.phi_, self.phi_s_, self.theta_, self.theta_s_) = extract_params(
+            parameters, self.p_, self.q_, self.ps_, self.qs_, self.constant_term_
+        )
+        (
+            self.aic_,
+            self.residuals_,
+        ) = arima_log_likelihood(
+            parameters,
+            self.differenced_data_,
+            self.p_,
+            self.q_,
+            self.ps_,
+            self.qs_,
+            self.seasonal_period_,
+            self.constant_term_,
+        )
+        return self
+
+    def _predict(self, y=None, exog=None):
+        """
+        Predict the next horizon steps ahead.
+
+        Parameters
+        ----------
+        y : np.ndarray, default = None
+            A time series to predict the next horizon value for. If None,
+            predict the next horizon value after series seen in fit.
+        exog : np.ndarray, default =None
+            Optional exogenous time series data assumed to be aligned with y
+
+        Returns
+        -------
+        float
+            single prediction self.horizon steps ahead of y.
+        """
+        y = np.array(y, dtype=np.float64)
+        value = calc_arima(
+            self.differenced_data_,
+            self.p_,
+            self.q_,
+            self.ps_,
+            self.qs_,
+            self.seasonal_period_,
+            len(self.differenced_data_),
+            self.c_,
+            self.phi_,
+            self.phi_s_,
+            self.theta_,
+            self.theta_s_,
+            self.residuals_,
+        )
+        history = self.data_[::-1]
+        differenced_history = np.diff(self.data_, n=self.d_)[::-1]
+        # Step 1: undo seasonal differencing on y^(d)
+        for k in range(1, self.ds_ + 1):
+            lag = k * self.seasonal_period_
+            value += (-1) ** (k + 1) * comb(self.ds_, k) * differenced_history[lag - 1]
+
+        # Step 2: undo ordinary differencing
+        for k in range(1, self.d_ + 1):
+            value += (-1) ** (k + 1) * comb(self.d_, k) * history[k - 1]
+
+        if y is None:
+            return np.array([value])
+        else:
+            return np.insert(y, 0, value)[:-1]
+
+
+# Define the ARIMA(p, d, q) likelihood function
+def arima_log_likelihood(
+    params, data, p, q, ps, qs, seasonal_period, include_constant_term
+):
+    """Calculate the log-likelihood of an ARIMA model given the parameters."""
+    c, phi, phi_s, theta, theta_s = extract_params(
+        params, p, q, ps, qs, include_constant_term
+    )  # Extract parameters
+
+    # Initialize residuals
+    n = len(data)
+    residuals = np.zeros(n)
+    for t in range(n):
+        y_hat = calc_arima(
+            data,
+            p,
+            q,
+            ps,
+            qs,
+            seasonal_period,
+            t,
+            c,
+            phi,
+            phi_s,
+            theta,
+            theta_s,
+            residuals,
+        )
+        residuals[t] = data[t] - y_hat
+    # Calculate the log-likelihood
+    variance = np.mean(residuals**2)
+    liklihood = n * (np.log(2 * np.pi) + np.log(variance) + 1)
+    k = len(params)
+    aic = liklihood + 2 * k
+    return (
+        aic,
+        residuals,
+    )  # Return negative log-likelihood for minimization
+
+
+def extract_params(params, p, q, ps, qs, include_constant_term):
+    """Extract ARIMA parameters from the parameter vector."""
+    # Extract parameters
+    c = params[0] if include_constant_term else 0  # Constant term
+    # AR coefficients
+    phi = params[include_constant_term : p + include_constant_term]
+    # Seasonal AR coefficients
+    phi_s = params[include_constant_term + p : p + ps + include_constant_term]
+    # MA coefficients
+    theta = params[include_constant_term + p + ps : p + ps + q + include_constant_term]
+    # Seasonal MA coefficents
+    theta_s = params[
+        include_constant_term + p + ps + q : include_constant_term + p + ps + q + qs
+    ]
+    return c, phi, phi_s, theta, theta_s
+
+
+def calc_arima(
+    data, p, q, ps, qs, seasonal_period, t, c, phi, phi_s, theta, theta_s, residuals
+):
+    """Calculate the ARIMA forecast for time t."""
+    # AR part
+    ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
+    # Seasonal AR part
+    ars_term = (
+        0
+        if (t - seasonal_period * ps) < 0
+        else np.dot(phi_s, data[t - seasonal_period * ps : t : seasonal_period][::-1])
+    )
+    # MA part
+    ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
+    # Seasonal MA part
+    mas_term = (
+        0
+        if (t - seasonal_period * qs) < 0
+        else np.dot(
+            theta_s, residuals[t - seasonal_period * qs : t : seasonal_period][::-1]
+        )
+    )
+    y_hat = c + ar_term + ma_term + ars_term + mas_term
+    return y_hat
+
+
+def nelder_mead(
+    data,
+    p,
+    q,
+    ps,
+    qs,
+    seasonal_period,
+    include_constant_term,
+    tol=1e-6,
+    max_iter=500,
+):
+    """Implement the nelder-mead optimisation algorithm."""
+    num_params = include_constant_term + p + ps + q + qs
+    points = np.full((num_params + 1, num_params), 0.5)
+    for i in range(num_params):
+        points[i + 1][i] = 0.6
+    values = np.array(
+        [
+            arima_log_likelihood(
+                v, data, p, q, ps, qs, seasonal_period, include_constant_term
+            )[0]
+            for v in points
+        ]
+    )
+    for _iteration in range(max_iter):
+        # Order simplex by function values
+        order = np.argsort(values)
+        points = points[order]
+        values = values[order]
+
+        # Centroid of the best n points
+        centre_point = points[:-1].sum(axis=0) / len(points[:-1])
+
+        # Reflection
+        # centre + distance between centre and largest value
+        reflected_point = centre_point + (centre_point - points[-1])
+        reflected_value = arima_log_likelihood(
+            reflected_point,
+            data,
+            p,
+            q,
+            ps,
+            qs,
+            seasonal_period,
+            include_constant_term,
+        )[0]
+        # if between best and second best, use reflected value
+        if len(values) > 1 and values[0] <= reflected_value < values[-2]:
+            points[-1] = reflected_point
+            values[-1] = reflected_value
+            continue
+        # Expansion
+        # Otherwise if it is better than the best value
+        if reflected_value < values[0]:
+            expanded_point = centre_point + 2 * (reflected_point - centre_point)
+            expanded_value = arima_log_likelihood(
+                expanded_point,
+                data,
+                p,
+                q,
+                ps,
+                qs,
+                seasonal_period,
+                include_constant_term,
+            )[0]
+            # if less than reflected value use expanded, otherwise go back to reflected
+            if expanded_value < reflected_value:
+                points[-1] = expanded_point
+                values[-1] = expanded_value
+            else:
+                points[-1] = reflected_point
+                values[-1] = reflected_value
+            continue
+        # Contraction
+        # Otherwise if reflection is worse than all current values
+        contracted_point = centre_point - 0.5 * (centre_point - points[-1])
+        contracted_value = arima_log_likelihood(
+            contracted_point,
+            data,
+            p,
+            q,
+            ps,
+            qs,
+            seasonal_period,
+            include_constant_term,
+        )[0]
+        # If contraction is better use that otherwise move to shrinkage
+        if contracted_value < values[-1]:
+            points[-1] = contracted_point
+            values[-1] = contracted_value
+            continue
+
+        # Shrinkage
+        for i in range(1, len(points)):
+            points[i] = points[0] - 0.5 * (points[0] - points[i])
+            values[i] = arima_log_likelihood(
+                points[i],
+                data,
+                p,
+                q,
+                ps,
+                qs,
+                seasonal_period,
+                include_constant_term,
+            )[0]
+
+        # Convergence check
+        if np.max(np.abs(values - values[0])) < tol:
+            break
+    return points[0], values[0]
+
+
+# def calc_moving_variance(data, window):
+#     X = np.lib.stride_tricks.sliding_window_view(data, window_shape=window)
+#     return X.var()
+
+
+def auto_arima(data):
+    """
+    Implement the Hyndman-Khandakar algorithm.
+
+    For automatic ARIMA model selection.
+    """
+    seasonal_period = calc_seasonal_period(data)
+    difference = 0
+    while not kpss_test(data)[1]:
+        data = np.diff(data, n=1)
+        difference += 1
+    seasonal_difference = 1 if seasonal_period > 1 else 0
+    if seasonal_difference:
+        data = data[seasonal_period:] - data[:-seasonal_period]
+    include_c = 1 if difference == 0 else 0
+    model_parameters = [
+        [2, 2, 0, 0, include_c],
+        [0, 0, 0, 0, include_c],
+        [1, 0, 0, 0, include_c],
+        [0, 1, 0, 0, include_c],
+    ]
+    model_points = []
+    for p in model_parameters:
+        points, aic = nelder_mead(data, p[0], p[1], p[2], p[3], seasonal_period, p[4])
+        p.append(aic)
+        model_points.append(points)
+    current_model = min(model_parameters, key=lambda item: item[5])
+    current_points = model_points[model_parameters.index(current_model)]
+    while True:
+        better_model = False
+        for param_no in range(4):
+            for adjustment in [-1, 1]:
+                if (current_model[param_no] + adjustment) < 0:
+                    continue
+                model = current_model.copy()
+                model[param_no] += adjustment
+                for constant_term in [0, 1]:
+                    points, aic = nelder_mead(
+                        data,
+                        model[0],
+                        model[1],
+                        model[2],
+                        model[3],
+                        seasonal_period,
+                        constant_term,
+                    )
+                    if aic < current_model[5]:
+                        current_model = model
+                        current_points = points
+                        current_model[5] = aic
+                        current_model[4] = constant_term
+                        better_model = True
+        if not better_model:
+            break
+    return (
+        data,
+        current_model[5],
+        current_model[0],
+        difference,
+        current_model[1],
+        current_model[2],
+        seasonal_difference,
+        current_model[3],
+        seasonal_period,
+        current_model[4],
+        current_points,
+    )

From 3a0552b469de39ff3f0dada1696c20081a17aa27 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 24 May 2025 17:29:04 +0100
Subject: [PATCH 02/50] move utils

---
 aeon/forecasting/_arima.py            | 49 ++++++++++++++++++++-
 aeon/utils/forecasting/__init__.py    |  1 +
 aeon/utils/forecasting/_hypo_tests.py | 63 +++++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 2 deletions(-)
 create mode 100644 aeon/utils/forecasting/__init__.py
 create mode 100644 aeon/utils/forecasting/_hypo_tests.py

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 54ebc47fe3..76f4859557 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -9,9 +9,10 @@
 from math import comb
 
 import numpy as np
+from numba import njit
 
-from aeon.forecasting._utils import calc_seasonal_period, kpss_test
 from aeon.forecasting.base import BaseForecaster
+from aeon.utils.forecasting._hypo_tests import kpss_test
 
 NOGIL = False
 CACHE = True
@@ -393,7 +394,7 @@ def auto_arima(data):
 
     For automatic ARIMA model selection.
     """
-    seasonal_period = calc_seasonal_period(data)
+    seasonal_period = _calc_seasonal_period(data)
     difference = 0
     while not kpss_test(data)[1]:
         data = np.diff(data, n=1)
@@ -454,3 +455,47 @@ def auto_arima(data):
         current_model[4],
         current_points,
     )
+
+
+@njit(cache=True, fastmath=True)
+def _acf(X, max_lag):
+    length = len(X)
+    X_t = np.zeros(max_lag, dtype=float)
+    for lag in range(1, max_lag + 1):
+        lag_length = length - lag
+        x1 = X[:-lag]
+        x2 = X[lag:]
+        s1 = np.sum(x1)
+        s2 = np.sum(x2)
+        m1 = s1 / lag_length
+        m2 = s2 / lag_length
+        ss1 = np.sum(x1 * x1)
+        ss2 = np.sum(x2 * x2)
+        v1 = ss1 - s1 * m1
+        v2 = ss2 - s2 * m2
+        v1_is_zero, v2_is_zero = v1 <= 1e-9, v2 <= 1e-9
+        if v1_is_zero and v2_is_zero:  # Both zero variance,
+            # so must be 100% correlated
+            X_t[lag - 1] = 1
+        elif v1_is_zero or v2_is_zero:  # One zero variance
+            # the other not
+            X_t[lag - 1] = 0
+        else:
+            X_t[lag - 1] = np.sum((x1 - m1) * (x2 - m2)) / np.sqrt(v1 * v2)
+    return X_t
+
+
+@njit(cache=True, fastmath=True)
+def _calc_seasonal_period(data):
+    """Estimate the seasonal period based on the autocorrelation of the series."""
+    lags = _acf(data, 24)
+    lags = np.concatenate((np.array([1.0]), lags))
+    peaks = []
+    mean_lags = np.mean(lags)
+    for i in range(1, len(lags) - 1):  # Skip the first (lag 0) and last elements
+        if lags[i] >= lags[i - 1] and lags[i] >= lags[i + 1] and lags[i] > mean_lags:
+            peaks.append(i)
+    if not peaks:
+        return 1
+    else:
+        return peaks[0]
diff --git a/aeon/utils/forecasting/__init__.py b/aeon/utils/forecasting/__init__.py
new file mode 100644
index 0000000000..a168fa0f11
--- /dev/null
+++ b/aeon/utils/forecasting/__init__.py
@@ -0,0 +1 @@
+"""Forecasting utils."""
diff --git a/aeon/utils/forecasting/_hypo_tests.py b/aeon/utils/forecasting/_hypo_tests.py
new file mode 100644
index 0000000000..73d4521e5e
--- /dev/null
+++ b/aeon/utils/forecasting/_hypo_tests.py
@@ -0,0 +1,63 @@
+import numpy as np
+
+
+def kpss_test(y, regression="c", lags=None):  # Test if time series is stationary
+    """
+    Implement the KPSS test for stationarity.
+
+    Parameters
+    ----------
+    y (array-like): Time series data
+    regression (str): 'c' for constant, 'ct' for constant + trend
+    lags (int): Number of lags for HAC variance estimation (default: sqrt(n))
+
+    Returns
+    -------
+    kpss_stat (float): KPSS test statistic
+    stationary (bool): Whether the series is stationary according to the test
+    """
+    y = np.asarray(y)
+    n = len(y)
+
+    # Step 1: Fit regression model to estimate residuals
+    if regression == "c":  # Constant
+        X = np.ones((n, 1))
+    elif regression == "ct":  # Constant + Trend
+        X = np.column_stack((np.ones(n), np.arange(1, n + 1)))
+    else:
+        raise ValueError("regression must be 'c' or 'ct'")
+
+    beta = np.linalg.lstsq(X, y, rcond=None)[0]  # Estimate regression coefficients
+    residuals = y - X @ beta  # Get residuals (u_t)
+
+    # Step 2: Compute cumulative sum of residuals (S_t)
+    S_t = np.cumsum(residuals)
+
+    # Step 3: Estimate long-run variance (HAC variance)
+    if lags is None:
+        # lags = int(12 * (n / 100)**(1/4)) # Default statsmodels lag length
+        lags = int(np.sqrt(n))  # Default lag length
+
+    gamma_0 = np.sum(residuals**2) / (n - X.shape[1])  # Lag-0 autocovariance
+    gamma = [np.sum(residuals[k:] * residuals[:-k]) / n for k in range(1, lags + 1)]
+
+    # Bartlett weights
+    weights = [1 - (k / (lags + 1)) for k in range(1, lags + 1)]
+
+    # Long-run variance
+    sigma_squared = gamma_0 + 2 * np.sum([w * g for w, g in zip(weights, gamma)])
+
+    # Step 4: Calculate the KPSS statistic
+    kpss_stat = np.sum(S_t**2) / (n**2 * sigma_squared)
+
+    if regression == "ct":
+        # p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t,
+        # where beta is the trend, r_t a random walk and e_t a stationary
+        # error term.
+        crit = 0.146
+    else:  # hypo == "c"
+        # special case of the model above, where beta = 0 (so the null
+        # hypothesis is that the data is stationary around r_0).
+        crit = 0.463
+
+    return kpss_stat, kpss_stat < crit

From 0ac5380b4e6a14be8df57c103ca6eabe2a3b7cd1 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 24 May 2025 17:38:05 +0100
Subject: [PATCH 03/50] make functions private

---
 aeon/forecasting/_arima.py | 47 +++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 28 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 76f4859557..337444f827 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -119,14 +119,14 @@ def _fit(self, y, exog=None):
             self.seasonal_period_,
             self.constant_term_,
             parameters,
-        ) = auto_arima(self.data_)
-        (self.c_, self.phi_, self.phi_s_, self.theta_, self.theta_s_) = extract_params(
+        ) = _auto_arima(self.data_)
+        (self.c_, self.phi_, self.phi_s_, self.theta_, self.theta_s_) = _extract_params(
             parameters, self.p_, self.q_, self.ps_, self.qs_, self.constant_term_
         )
         (
             self.aic_,
             self.residuals_,
-        ) = arima_log_likelihood(
+        ) = _arima_log_likelihood(
             parameters,
             self.differenced_data_,
             self.p_,
@@ -156,7 +156,7 @@ def _predict(self, y=None, exog=None):
             single prediction self.horizon steps ahead of y.
         """
         y = np.array(y, dtype=np.float64)
-        value = calc_arima(
+        value = _calc_arima(
             self.differenced_data_,
             self.p_,
             self.q_,
@@ -181,19 +181,15 @@ def _predict(self, y=None, exog=None):
         # Step 2: undo ordinary differencing
         for k in range(1, self.d_ + 1):
             value += (-1) ** (k + 1) * comb(self.d_, k) * history[k - 1]
-
-        if y is None:
-            return np.array([value])
-        else:
-            return np.insert(y, 0, value)[:-1]
+        return value
 
 
 # Define the ARIMA(p, d, q) likelihood function
-def arima_log_likelihood(
+def _arima_log_likelihood(
     params, data, p, q, ps, qs, seasonal_period, include_constant_term
 ):
     """Calculate the log-likelihood of an ARIMA model given the parameters."""
-    c, phi, phi_s, theta, theta_s = extract_params(
+    c, phi, phi_s, theta, theta_s = _extract_params(
         params, p, q, ps, qs, include_constant_term
     )  # Extract parameters
 
@@ -201,7 +197,7 @@ def arima_log_likelihood(
     n = len(data)
     residuals = np.zeros(n)
     for t in range(n):
-        y_hat = calc_arima(
+        y_hat = _calc_arima(
             data,
             p,
             q,
@@ -228,7 +224,7 @@ def arima_log_likelihood(
     )  # Return negative log-likelihood for minimization
 
 
-def extract_params(params, p, q, ps, qs, include_constant_term):
+def _extract_params(params, p, q, ps, qs, include_constant_term):
     """Extract ARIMA parameters from the parameter vector."""
     # Extract parameters
     c = params[0] if include_constant_term else 0  # Constant term
@@ -245,7 +241,7 @@ def extract_params(params, p, q, ps, qs, include_constant_term):
     return c, phi, phi_s, theta, theta_s
 
 
-def calc_arima(
+def _calc_arima(
     data, p, q, ps, qs, seasonal_period, t, c, phi, phi_s, theta, theta_s, residuals
 ):
     """Calculate the ARIMA forecast for time t."""
@@ -271,7 +267,7 @@ def calc_arima(
     return y_hat
 
 
-def nelder_mead(
+def _nelder_mead(
     data,
     p,
     q,
@@ -289,7 +285,7 @@ def nelder_mead(
         points[i + 1][i] = 0.6
     values = np.array(
         [
-            arima_log_likelihood(
+            _arima_log_likelihood(
                 v, data, p, q, ps, qs, seasonal_period, include_constant_term
             )[0]
             for v in points
@@ -307,7 +303,7 @@ def nelder_mead(
         # Reflection
         # centre + distance between centre and largest value
         reflected_point = centre_point + (centre_point - points[-1])
-        reflected_value = arima_log_likelihood(
+        reflected_value = _arima_log_likelihood(
             reflected_point,
             data,
             p,
@@ -326,7 +322,7 @@ def nelder_mead(
         # Otherwise if it is better than the best value
         if reflected_value < values[0]:
             expanded_point = centre_point + 2 * (reflected_point - centre_point)
-            expanded_value = arima_log_likelihood(
+            expanded_value = _arima_log_likelihood(
                 expanded_point,
                 data,
                 p,
@@ -347,7 +343,7 @@ def nelder_mead(
         # Contraction
         # Otherwise if reflection is worse than all current values
         contracted_point = centre_point - 0.5 * (centre_point - points[-1])
-        contracted_value = arima_log_likelihood(
+        contracted_value = _arima_log_likelihood(
             contracted_point,
             data,
             p,
@@ -366,7 +362,7 @@ def nelder_mead(
         # Shrinkage
         for i in range(1, len(points)):
             points[i] = points[0] - 0.5 * (points[0] - points[i])
-            values[i] = arima_log_likelihood(
+            values[i] = _arima_log_likelihood(
                 points[i],
                 data,
                 p,
@@ -383,12 +379,7 @@ def nelder_mead(
     return points[0], values[0]
 
 
-# def calc_moving_variance(data, window):
-#     X = np.lib.stride_tricks.sliding_window_view(data, window_shape=window)
-#     return X.var()
-
-
-def auto_arima(data):
+def _auto_arima(data):
     """
     Implement the Hyndman-Khandakar algorithm.
 
@@ -411,7 +402,7 @@ def auto_arima(data):
     ]
     model_points = []
     for p in model_parameters:
-        points, aic = nelder_mead(data, p[0], p[1], p[2], p[3], seasonal_period, p[4])
+        points, aic = _nelder_mead(data, p[0], p[1], p[2], p[3], seasonal_period, p[4])
         p.append(aic)
         model_points.append(points)
     current_model = min(model_parameters, key=lambda item: item[5])
@@ -425,7 +416,7 @@ def auto_arima(data):
                 model = current_model.copy()
                 model[param_no] += adjustment
                 for constant_term in [0, 1]:
-                    points, aic = nelder_mead(
+                    points, aic = _nelder_mead(
                         data,
                         model[0],
                         model[1],

From 44b36a7b2d34c6b3452fdef97446a4ee83fe5789 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 13:49:51 +0100
Subject: [PATCH 04/50] Modularise SARIMA model

---
 aeon/forecasting/_arima.py              | 363 +++++++-----------------
 aeon/utils/forecasting/_seasonality.py  | 101 +++++++
 aeon/utils/optimisation/__init__.py     |   1 +
 aeon/utils/optimisation/_nelder_mead.py | 106 +++++++
 4 files changed, 313 insertions(+), 258 deletions(-)
 create mode 100644 aeon/utils/forecasting/_seasonality.py
 create mode 100644 aeon/utils/optimisation/__init__.py
 create mode 100644 aeon/utils/optimisation/_nelder_mead.py

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 337444f827..00d35ec55c 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -9,10 +9,11 @@
 from math import comb
 
 import numpy as np
-from numba import njit
 
 from aeon.forecasting.base import BaseForecaster
 from aeon.utils.forecasting._hypo_tests import kpss_test
+from aeon.utils.forecasting._seasonality import calc_seasonal_period
+from aeon.utils.optimisation._nelder_mead import nelder_mead
 
 NOGIL = False
 CACHE = True
@@ -83,11 +84,13 @@ def __init__(self, horizon=1):
         self.qs_ = 0
         self.seasonal_period_ = 0
         self.constant_term_ = 0
+        self.model_ = []
         self.c_ = 0
         self.phi_ = 0
         self.phi_s_ = 0
         self.theta_ = 0
         self.theta_s_ = 0
+        self.parameters_ = []
 
     def _fit(self, y, exog=None):
         """Fit AutoARIMA forecaster to series y.
@@ -109,32 +112,28 @@ def _fit(self, y, exog=None):
         self.data_ = np.array(y.squeeze(), dtype=np.float64)
         (
             self.differenced_data_,
+            self.d_,
+            self.ds_,
+            self.model_,
+            self.parameters_,
             self.aic_,
+        ) = _auto_arima(self.data_)
+        (
+            self.constant_term_,
             self.p_,
-            self.d_,
             self.q_,
             self.ps_,
-            self.ds_,
             self.qs_,
             self.seasonal_period_,
-            self.constant_term_,
-            parameters,
-        ) = _auto_arima(self.data_)
+        ) = self.model_
         (self.c_, self.phi_, self.phi_s_, self.theta_, self.theta_s_) = _extract_params(
-            parameters, self.p_, self.q_, self.ps_, self.qs_, self.constant_term_
+            self.parameters_, self.model_
         )
         (
             self.aic_,
             self.residuals_,
-        ) = _arima_log_likelihood(
-            parameters,
-            self.differenced_data_,
-            self.p_,
-            self.q_,
-            self.ps_,
-            self.qs_,
-            self.seasonal_period_,
-            self.constant_term_,
+        ) = _arima_model(
+            self.parameters_, _calc_sarima, self.differenced_data_, self.model_
         )
         return self
 
@@ -156,19 +155,11 @@ def _predict(self, y=None, exog=None):
             single prediction self.horizon steps ahead of y.
         """
         y = np.array(y, dtype=np.float64)
-        value = _calc_arima(
+        value = _calc_sarima(
             self.differenced_data_,
-            self.p_,
-            self.q_,
-            self.ps_,
-            self.qs_,
-            self.seasonal_period_,
+            self.model_,
             len(self.differenced_data_),
-            self.c_,
-            self.phi_,
-            self.phi_s_,
-            self.theta_,
-            self.theta_s_,
+            _extract_params(self.parameters_, self.model_),
             self.residuals_,
         )
         history = self.data_[::-1]
@@ -184,78 +175,86 @@ def _predict(self, y=None, exog=None):
         return value
 
 
+def _aic(residuals, num_params):
+    """Calculate the log-likelihood of a model."""
+    variance = np.mean(residuals**2)
+    liklihood = len(residuals) * (np.log(2 * np.pi) + np.log(variance) + 1)
+    return liklihood + 2 * num_params
+
+
 # Define the ARIMA(p, d, q) likelihood function
-def _arima_log_likelihood(
-    params, data, p, q, ps, qs, seasonal_period, include_constant_term
-):
+def _arima_model(params, base_function, data, model):
     """Calculate the log-likelihood of an ARIMA model given the parameters."""
-    c, phi, phi_s, theta, theta_s = _extract_params(
-        params, p, q, ps, qs, include_constant_term
-    )  # Extract parameters
+    formatted_params = _extract_params(params, model)  # Extract parameters
 
     # Initialize residuals
     n = len(data)
     residuals = np.zeros(n)
     for t in range(n):
-        y_hat = _calc_arima(
+        y_hat = base_function(
             data,
-            p,
-            q,
-            ps,
-            qs,
-            seasonal_period,
+            model,
             t,
-            c,
-            phi,
-            phi_s,
-            theta,
-            theta_s,
+            formatted_params,
             residuals,
         )
         residuals[t] = data[t] - y_hat
-    # Calculate the log-likelihood
-    variance = np.mean(residuals**2)
-    liklihood = n * (np.log(2 * np.pi) + np.log(variance) + 1)
-    k = len(params)
-    aic = liklihood + 2 * k
-    return (
-        aic,
-        residuals,
-    )  # Return negative log-likelihood for minimization
+    return _aic(residuals, len(params)), residuals
 
 
-def _extract_params(params, p, q, ps, qs, include_constant_term):
-    """Extract ARIMA parameters from the parameter vector."""
-    # Extract parameters
-    c = params[0] if include_constant_term else 0  # Constant term
-    # AR coefficients
-    phi = params[include_constant_term : p + include_constant_term]
-    # Seasonal AR coefficients
-    phi_s = params[include_constant_term + p : p + ps + include_constant_term]
-    # MA coefficients
-    theta = params[include_constant_term + p + ps : p + ps + q + include_constant_term]
-    # Seasonal MA coefficents
-    theta_s = params[
-        include_constant_term + p + ps + q : include_constant_term + p + ps + q + qs
-    ]
-    return c, phi, phi_s, theta, theta_s
+# Define the SARIMA(p, d, q)(P, D, Q) likelihood function
 
 
-def _calc_arima(
-    data, p, q, ps, qs, seasonal_period, t, c, phi, phi_s, theta, theta_s, residuals
-):
+def _extract_params(params, model):
+    """Extract ARIMA parameters from the parameter vector."""
+    if len(params) != np.sum(model):
+        previous_length = np.sum(model)
+        model = model[:-1]  # Remove the seasonal period
+        if len(params) != np.sum(model):
+            raise ValueError(
+                f"Expected {previous_length} parameters for a non-seasonal model or \
+                    {np.sum(model)} parameters for a seasonal model, got {len(params)}"
+            )
+    starts = np.cumsum([0] + model[:-1])
+    return [params[s : s + l].tolist() for s, l in zip(starts, model)]
+
+
+def _calc_arima(data, model, t, formatted_params, residuals):
     """Calculate the ARIMA forecast for time t."""
+    if len(model) != 3:
+        raise ValueError("Model must be of the form (c, p, q)")
     # AR part
+    p = model[1]
+    phi = formatted_params[1]
     ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
+
+    # MA part
+    q = model[2]
+    theta = formatted_params[2]
+    ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
+
+    c = formatted_params[0][0] if model[0] else 0
+    y_hat = c + ar_term + ma_term
+    return y_hat
+
+
+def _calc_sarima(data, model, t, formatted_params, residuals):
+    """Calculate the SARIMA forecast for time t."""
+    if len(model) != 6:
+        raise ValueError("Model must be of the form (c, p, q, ps, qs, seasonal_period)")
+    arima_forecast = _calc_arima(data, model[:3], t, formatted_params, residuals)
+    seasonal_period = model[5]
     # Seasonal AR part
+    ps = model[3]
+    phi_s = formatted_params[3]
     ars_term = (
         0
         if (t - seasonal_period * ps) < 0
         else np.dot(phi_s, data[t - seasonal_period * ps : t : seasonal_period][::-1])
     )
-    # MA part
-    ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
     # Seasonal MA part
+    qs = model[4]
+    theta_s = formatted_params[4]
     mas_term = (
         0
         if (t - seasonal_period * qs) < 0
@@ -263,120 +262,20 @@ def _calc_arima(
             theta_s, residuals[t - seasonal_period * qs : t : seasonal_period][::-1]
         )
     )
-    y_hat = c + ar_term + ma_term + ars_term + mas_term
-    return y_hat
+    return arima_forecast + ars_term + mas_term
 
 
-def _nelder_mead(
-    data,
-    p,
-    q,
-    ps,
-    qs,
-    seasonal_period,
-    include_constant_term,
-    tol=1e-6,
-    max_iter=500,
-):
-    """Implement the nelder-mead optimisation algorithm."""
-    num_params = include_constant_term + p + ps + q + qs
-    points = np.full((num_params + 1, num_params), 0.5)
-    for i in range(num_params):
-        points[i + 1][i] = 0.6
-    values = np.array(
-        [
-            _arima_log_likelihood(
-                v, data, p, q, ps, qs, seasonal_period, include_constant_term
-            )[0]
-            for v in points
-        ]
-    )
-    for _iteration in range(max_iter):
-        # Order simplex by function values
-        order = np.argsort(values)
-        points = points[order]
-        values = values[order]
-
-        # Centroid of the best n points
-        centre_point = points[:-1].sum(axis=0) / len(points[:-1])
-
-        # Reflection
-        # centre + distance between centre and largest value
-        reflected_point = centre_point + (centre_point - points[-1])
-        reflected_value = _arima_log_likelihood(
-            reflected_point,
-            data,
-            p,
-            q,
-            ps,
-            qs,
-            seasonal_period,
-            include_constant_term,
-        )[0]
-        # if between best and second best, use reflected value
-        if len(values) > 1 and values[0] <= reflected_value < values[-2]:
-            points[-1] = reflected_point
-            values[-1] = reflected_value
-            continue
-        # Expansion
-        # Otherwise if it is better than the best value
-        if reflected_value < values[0]:
-            expanded_point = centre_point + 2 * (reflected_point - centre_point)
-            expanded_value = _arima_log_likelihood(
-                expanded_point,
-                data,
-                p,
-                q,
-                ps,
-                qs,
-                seasonal_period,
-                include_constant_term,
-            )[0]
-            # if less than reflected value use expanded, otherwise go back to reflected
-            if expanded_value < reflected_value:
-                points[-1] = expanded_point
-                values[-1] = expanded_value
-            else:
-                points[-1] = reflected_point
-                values[-1] = reflected_value
-            continue
-        # Contraction
-        # Otherwise if reflection is worse than all current values
-        contracted_point = centre_point - 0.5 * (centre_point - points[-1])
-        contracted_value = _arima_log_likelihood(
-            contracted_point,
-            data,
-            p,
-            q,
-            ps,
-            qs,
-            seasonal_period,
-            include_constant_term,
-        )[0]
-        # If contraction is better use that otherwise move to shrinkage
-        if contracted_value < values[-1]:
-            points[-1] = contracted_point
-            values[-1] = contracted_value
-            continue
-
-        # Shrinkage
-        for i in range(1, len(points)):
-            points[i] = points[0] - 0.5 * (points[0] - points[i])
-            values[i] = _arima_log_likelihood(
-                points[i],
-                data,
-                p,
-                q,
-                ps,
-                qs,
-                seasonal_period,
-                include_constant_term,
-            )[0]
-
-        # Convergence check
-        if np.max(np.abs(values - values[0])) < tol:
-            break
-    return points[0], values[0]
+def make_arima_llf(base_function, data, model):
+    """
+    Return a parameterized log-likelihood function for ARIMA.
+
+    This can then be used with an optimization algorithm.
+    """
+
+    def loss_fn(v):
+        return _arima_model(v, base_function, data, model)[0]
+
+    return loss_fn
 
 
 def _auto_arima(data):
@@ -385,7 +284,7 @@ def _auto_arima(data):
 
     For automatic ARIMA model selection.
     """
-    seasonal_period = _calc_seasonal_period(data)
+    seasonal_period = calc_seasonal_period(data)
     difference = 0
     while not kpss_test(data)[1]:
         data = np.diff(data, n=1)
@@ -395,98 +294,46 @@ def _auto_arima(data):
         data = data[seasonal_period:] - data[:-seasonal_period]
     include_c = 1 if difference == 0 else 0
     model_parameters = [
-        [2, 2, 0, 0, include_c],
-        [0, 0, 0, 0, include_c],
-        [1, 0, 0, 0, include_c],
-        [0, 1, 0, 0, include_c],
+        [include_c, 2, 2, 0, 0, seasonal_period],
+        [include_c, 0, 0, 0, 0, seasonal_period],
+        [include_c, 1, 0, 0, 0, seasonal_period],
+        [include_c, 0, 1, 0, 0, seasonal_period],
     ]
     model_points = []
+    model_scores = []
     for p in model_parameters:
-        points, aic = _nelder_mead(data, p[0], p[1], p[2], p[3], seasonal_period, p[4])
-        p.append(aic)
+        points, aic = nelder_mead(make_arima_llf(_calc_sarima, data, p), np.sum(p[:5]))
         model_points.append(points)
-    current_model = min(model_parameters, key=lambda item: item[5])
-    current_points = model_points[model_parameters.index(current_model)]
+        model_scores.append(aic)
+    best_score = min(model_scores)
+    best_index = model_scores.index(best_score)
+    current_model = model_parameters[best_index]
+    current_points = model_points[best_index]
     while True:
         better_model = False
-        for param_no in range(4):
+        for param_no in range(1, 5):
             for adjustment in [-1, 1]:
                 if (current_model[param_no] + adjustment) < 0:
                     continue
                 model = current_model.copy()
                 model[param_no] += adjustment
                 for constant_term in [0, 1]:
-                    points, aic = _nelder_mead(
-                        data,
-                        model[0],
-                        model[1],
-                        model[2],
-                        model[3],
-                        seasonal_period,
-                        constant_term,
+                    model[0] = constant_term
+                    points, aic = nelder_mead(
+                        make_arima_llf(_calc_sarima, data, model), np.sum(model[:5])
                     )
-                    if aic < current_model[5]:
-                        current_model = model
+                    if aic < best_score:
+                        current_model = model.copy()
                         current_points = points
-                        current_model[5] = aic
-                        current_model[4] = constant_term
+                        best_score = aic
                         better_model = True
         if not better_model:
             break
     return (
         data,
-        current_model[5],
-        current_model[0],
         difference,
-        current_model[1],
-        current_model[2],
         seasonal_difference,
-        current_model[3],
-        seasonal_period,
-        current_model[4],
+        current_model,
         current_points,
+        best_score,
     )
-
-
-@njit(cache=True, fastmath=True)
-def _acf(X, max_lag):
-    length = len(X)
-    X_t = np.zeros(max_lag, dtype=float)
-    for lag in range(1, max_lag + 1):
-        lag_length = length - lag
-        x1 = X[:-lag]
-        x2 = X[lag:]
-        s1 = np.sum(x1)
-        s2 = np.sum(x2)
-        m1 = s1 / lag_length
-        m2 = s2 / lag_length
-        ss1 = np.sum(x1 * x1)
-        ss2 = np.sum(x2 * x2)
-        v1 = ss1 - s1 * m1
-        v2 = ss2 - s2 * m2
-        v1_is_zero, v2_is_zero = v1 <= 1e-9, v2 <= 1e-9
-        if v1_is_zero and v2_is_zero:  # Both zero variance,
-            # so must be 100% correlated
-            X_t[lag - 1] = 1
-        elif v1_is_zero or v2_is_zero:  # One zero variance
-            # the other not
-            X_t[lag - 1] = 0
-        else:
-            X_t[lag - 1] = np.sum((x1 - m1) * (x2 - m2)) / np.sqrt(v1 * v2)
-    return X_t
-
-
-@njit(cache=True, fastmath=True)
-def _calc_seasonal_period(data):
-    """Estimate the seasonal period based on the autocorrelation of the series."""
-    lags = _acf(data, 24)
-    lags = np.concatenate((np.array([1.0]), lags))
-    peaks = []
-    mean_lags = np.mean(lags)
-    for i in range(1, len(lags) - 1):  # Skip the first (lag 0) and last elements
-        if lags[i] >= lags[i - 1] and lags[i] >= lags[i + 1] and lags[i] > mean_lags:
-            peaks.append(i)
-    if not peaks:
-        return 1
-    else:
-        return peaks[0]
diff --git a/aeon/utils/forecasting/_seasonality.py b/aeon/utils/forecasting/_seasonality.py
new file mode 100644
index 0000000000..356b1a40d2
--- /dev/null
+++ b/aeon/utils/forecasting/_seasonality.py
@@ -0,0 +1,101 @@
+"""Seasonality Tools.
+
+Includes autocorrelation function (ACF) and seasonal period estimation.
+"""
+
+import numpy as np
+from numba import njit
+
+
+@njit(cache=True, fastmath=True)
+def acf(X, max_lag):
+    """
+    Compute the sample autocorrelation function (ACF) of a time series.
+
+    Up to a specified maximum lag.
+
+    The autocorrelation at lag k is defined as the Pearson correlation
+    coefficient between the series and a lagged version of itself.
+    If both segments at a given lag have zero variance, the function
+    returns 1 for that lag. If only one segment has zero variance,
+    the function returns 0.
+
+    Parameters
+    ----------
+    X : array-like, shape (n_samples,)
+        The input time series data.
+    max_lag : int
+        The maximum lag (number of steps) for which to
+        compute the autocorrelation.
+
+    Returns
+    -------
+    acf_values : np.ndarray, shape (max_lag,)
+        The autocorrelation values for lags 1 through `max_lag`.
+
+    Notes
+    -----
+    The function handles cases where the lagged segments have zero
+    variance to avoid division by zero.
+    The returned values correspond to
+    lags 1, 2, ..., `max_lag` (not including lag 0).
+    """
+    length = len(X)
+    X_t = np.zeros(max_lag, dtype=float)
+    for lag in range(1, max_lag + 1):
+        lag_length = length - lag
+        x1 = X[:-lag]
+        x2 = X[lag:]
+        s1 = np.sum(x1)
+        s2 = np.sum(x2)
+        m1 = s1 / lag_length
+        m2 = s2 / lag_length
+        ss1 = np.sum(x1 * x1)
+        ss2 = np.sum(x2 * x2)
+        v1 = ss1 - s1 * m1
+        v2 = ss2 - s2 * m2
+        v1_is_zero, v2_is_zero = v1 <= 1e-9, v2 <= 1e-9
+        if v1_is_zero and v2_is_zero:  # Both zero variance,
+            # so must be 100% correlated
+            X_t[lag - 1] = 1
+        elif v1_is_zero or v2_is_zero:  # One zero variance
+            # the other not
+            X_t[lag - 1] = 0
+        else:
+            X_t[lag - 1] = np.sum((x1 - m1) * (x2 - m2)) / np.sqrt(v1 * v2)
+    return X_t
+
+
+@njit(cache=True, fastmath=True)
+def calc_seasonal_period(data):
+    """
+    Estimate the seasonal period of a time series using autocorrelation analysis.
+
+    This function computes the autocorrelation function (ACF) of
+    the input series up to lag 24. It then identifies peaks in the
+    ACF above the mean value, treating the first such peak
+    as the estimated seasonal period. If no peak is found,
+    a period of 1 is returned.
+
+    Parameters
+    ----------
+    data : array-like, shape (n_samples,)
+        The input time series data.
+
+    Returns
+    -------
+    period : int
+        The estimated seasonal period (lag) of the series. Returns 1 if no significant
+        peak is detected in the autocorrelation.
+    """
+    lags = acf(data, 24)
+    lags = np.concatenate((np.array([1.0]), lags))
+    peaks = []
+    mean_lags = np.mean(lags)
+    for i in range(1, len(lags) - 1):  # Skip the first (lag 0) and last elements
+        if lags[i] >= lags[i - 1] and lags[i] >= lags[i + 1] and lags[i] > mean_lags:
+            peaks.append(i)
+    if not peaks:
+        return 1
+    else:
+        return peaks[0]
diff --git a/aeon/utils/optimisation/__init__.py b/aeon/utils/optimisation/__init__.py
new file mode 100644
index 0000000000..11eddea791
--- /dev/null
+++ b/aeon/utils/optimisation/__init__.py
@@ -0,0 +1 @@
+"""Optimisation utils."""
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
new file mode 100644
index 0000000000..36dfe732ab
--- /dev/null
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -0,0 +1,106 @@
+"""Optimisation algorithms for automatic parameter tuning."""
+
+import numpy as np
+
+
+def nelder_mead(
+    loss_function,
+    num_params,
+    tol=1e-6,
+    max_iter=500,
+):
+    """
+    Perform optimisation using the Nelder–Mead simplex algorithm.
+
+    This function minimises a given loss (objective) function using the Nelder–Mead
+    algorithm, a derivative-free method that iteratively refines a simplex of candidate
+    solutions. The implementation supports unconstrained minimisation of functions
+    with a fixed number of parameters.
+
+    Parameters
+    ----------
+    loss_function : callable
+        The objective function to minimise. Should accept a 1D NumPy array of length
+        `num_params` and return a scalar value.
+    num_params : int
+        The number of parameters (dimensions) in the optimisation problem.
+    tol : float, optional (default=1e-6)
+        Tolerance for convergence. The algorithm stops when the maximum difference
+        between function values at simplex vertices is less than `tol`.
+    max_iter : int, optional (default=500)
+        Maximum number of iterations to perform.
+
+    Returns
+    -------
+    best_params : np.ndarray, shape (`num_params`,)
+        The parameter vector that minimises the loss function.
+    best_value : float
+        The value of the loss function at the optimal parameter vector.
+
+    Notes
+    -----
+    - The initial simplex is constructed by setting each parameter to 0.5,
+    with one additional point per dimension at 0.6 for that dimension.
+    - This implementation does not support constraints or bounds on the parameters.
+    - The algorithm does not guarantee finding a global minimum.
+
+    Examples
+    --------
+    >>> def sphere(x):
+    ...     return np.sum(x**2)
+    >>> x_opt, val = nelder_mead(sphere, num_params=2)
+    """
+    points = np.full((num_params + 1, num_params), 0.5)
+    for i in range(num_params):
+        points[i + 1][i] = 0.6
+    values = np.array([loss_function(v) for v in points])
+    for _iteration in range(max_iter):
+        # Order simplex by function values
+        order = np.argsort(values)
+        points = points[order]
+        values = values[order]
+
+        # Centroid of the best n points
+        centre_point = points[:-1].sum(axis=0) / len(points[:-1])
+
+        # Reflection
+        # centre + distance between centre and largest value
+        reflected_point = centre_point + (centre_point - points[-1])
+        reflected_value = loss_function(reflected_point)
+        # if between best and second best, use reflected value
+        if len(values) > 1 and values[0] <= reflected_value < values[-2]:
+            points[-1] = reflected_point
+            values[-1] = reflected_value
+            continue
+        # Expansion
+        # Otherwise if it is better than the best value
+        if reflected_value < values[0]:
+            expanded_point = centre_point + 2 * (reflected_point - centre_point)
+            expanded_value = loss_function(expanded_point)
+            # if less than reflected value use expanded, otherwise go back to reflected
+            if expanded_value < reflected_value:
+                points[-1] = expanded_point
+                values[-1] = expanded_value
+            else:
+                points[-1] = reflected_point
+                values[-1] = reflected_value
+            continue
+        # Contraction
+        # Otherwise if reflection is worse than all current values
+        contracted_point = centre_point - 0.5 * (centre_point - points[-1])
+        contracted_value = loss_function(contracted_point)
+        # If contraction is better use that otherwise move to shrinkage
+        if contracted_value < values[-1]:
+            points[-1] = contracted_point
+            values[-1] = contracted_value
+            continue
+
+        # Shrinkage
+        for i in range(1, len(points)):
+            points[i] = points[0] - 0.5 * (points[0] - points[i])
+            values[i] = loss_function(points[i])
+
+        # Convergence check
+        if np.max(np.abs(values - values[0])) < tol:
+            break
+    return points[0], values[0]

From 6d18de9c7c7dea345e2accedd7ef16be65e83ac7 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 14:05:57 +0100
Subject: [PATCH 05/50] Add ARIMA forecaster to forecasting package

---
 aeon/forecasting/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aeon/forecasting/__init__.py b/aeon/forecasting/__init__.py
index 7a331f69e6..f6983cb89c 100644
--- a/aeon/forecasting/__init__.py
+++ b/aeon/forecasting/__init__.py
@@ -5,8 +5,10 @@
     "BaseForecaster",
     "RegressionForecaster",
     "ETSForecaster",
+    "ARIMAForecaster",
 ]
 
+from aeon.forecasting._arima import ARIMAForecaster
 from aeon.forecasting._ets import ETSForecaster
 from aeon.forecasting._naive import NaiveForecaster
 from aeon.forecasting._regression import RegressionForecaster

From b7e642432fc931d09a3f1b35b77e4f74c9a63f3b Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 14:06:31 +0100
Subject: [PATCH 06/50] Add example to ARIMA forecaster, this also tests the
 forecaster is producing the expected results

---
 aeon/forecasting/_arima.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 00d35ec55c..4c0e383140 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -68,6 +68,17 @@ class ARIMAForecaster(BaseForecaster):
     .. [1] R. J. Hyndman and G. Athanasopoulos,
        Forecasting: Principles and Practice. OTexts, 2014.
        https://otexts.com/fpp3/
+
+    Examples
+    --------
+    >>> from aeon.forecasting import ARIMAForecaster
+    >>> from aeon.datasets import load_airline
+    >>> y = load_airline()
+    >>> forecaster = ARIMAForecaster()
+    >>> forecaster.fit(y)
+    ARIMAForecaster()
+    >>> forecaster.predict()
+    450.74890401954826
     """
 
     def __init__(self, horizon=1):

From e33fa4d3d33121b30f30ca903c49ac996c6dd5b8 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 18:24:08 +0100
Subject: [PATCH 07/50] Basic ARIMA model

---
 aeon/forecasting/_arima.py | 168 +++++--------------------------------
 1 file changed, 21 insertions(+), 147 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 4c0e383140..29c42bffe5 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -11,8 +11,6 @@
 import numpy as np
 
 from aeon.forecasting.base import BaseForecaster
-from aeon.utils.forecasting._hypo_tests import kpss_test
-from aeon.utils.forecasting._seasonality import calc_seasonal_period
 from aeon.utils.optimisation._nelder_mead import nelder_mead
 
 NOGIL = False
@@ -22,10 +20,8 @@
 class ARIMAForecaster(BaseForecaster):
     """AutoRegressive Integrated Moving Average (ARIMA) forecaster.
 
-    Implements the Hyndman-Khandakar automatic ARIMA algorithm for time series
-    forecasting with optional seasonal components. The model automatically selects
-    the orders of the non-seasonal (p, d, q) and seasonal (P, D, Q, m) components
-    based on information criteria, such as AIC.
+    The model automatically selects the parameters of the model based
+    on information criteria, such as AIC.
 
     Parameters
     ----------
@@ -45,23 +41,14 @@ class ARIMAForecaster(BaseForecaster):
     p_, d_, q_ : int
         Orders of the ARIMA model: autoregressive (p), differencing (d),
         and moving average (q) terms.
-    ps_, ds_, qs_ : int
-        Orders of the seasonal ARIMA model: seasonal AR (P), seasonal differencing (D),
-        and seasonal MA (Q) terms.
-    seasonal_period_ : int
-        Length of the seasonal cycle.
     constant_term_ : float
         Constant/intercept term in the model.
     c_ : float
         Estimated constant term (internal use).
     phi_ : array-like
         Coefficients for the non-seasonal autoregressive terms.
-    phi_s_ : array-like
-        Coefficients for the seasonal autoregressive terms.
     theta_ : array-like
         Coefficients for the non-seasonal moving average terms.
-    theta_s_ : array-like
-        Coefficients for the seasonal moving average terms.
 
     References
     ----------
@@ -74,33 +61,27 @@ class ARIMAForecaster(BaseForecaster):
     >>> from aeon.forecasting import ARIMAForecaster
     >>> from aeon.datasets import load_airline
     >>> y = load_airline()
-    >>> forecaster = ARIMAForecaster()
+    >>> forecaster = ARIMAForecaster(2,1,1,0)
     >>> forecaster.fit(y)
     ARIMAForecaster()
     >>> forecaster.predict()
-    450.74890401954826
+    550.9147246631134
     """
 
-    def __init__(self, horizon=1):
+    def __init__(self, p=1, d=0, q=1, constant_term=0, horizon=1):
         super().__init__(horizon=horizon, axis=1)
         self.data_ = []
         self.differenced_data_ = []
         self.residuals_ = []
         self.aic_ = 0
-        self.p_ = 0
-        self.d_ = 0
-        self.q_ = 0
-        self.ps_ = 0
-        self.ds_ = 0
-        self.qs_ = 0
-        self.seasonal_period_ = 0
-        self.constant_term_ = 0
+        self.p = p
+        self.d = d
+        self.q = q
+        self.constant_term = constant_term
         self.model_ = []
         self.c_ = 0
         self.phi_ = 0
-        self.phi_s_ = 0
         self.theta_ = 0
-        self.theta_s_ = 0
         self.parameters_ = []
 
     def _fit(self, y, exog=None):
@@ -121,30 +102,17 @@ def _fit(self, y, exog=None):
             Fitted ARIMAForecaster.
         """
         self.data_ = np.array(y.squeeze(), dtype=np.float64)
-        (
-            self.differenced_data_,
-            self.d_,
-            self.ds_,
-            self.model_,
-            self.parameters_,
-            self.aic_,
-        ) = _auto_arima(self.data_)
-        (
-            self.constant_term_,
-            self.p_,
-            self.q_,
-            self.ps_,
-            self.qs_,
-            self.seasonal_period_,
-        ) = self.model_
-        (self.c_, self.phi_, self.phi_s_, self.theta_, self.theta_s_) = _extract_params(
+        self.model_ = [self.constant_term, self.p, self.q]
+        self.differenced_data_ = np.diff(self.data_, n=self.d)
+        (self.parameters_, self.aic_) = nelder_mead(
+            make_arima_llf(_calc_arima, self.data_, self.model_),
+            np.sum(self.model_[:3]),
+        )
+        (self.c_, self.phi_, self.theta_) = _extract_params(
             self.parameters_, self.model_
         )
-        (
-            self.aic_,
-            self.residuals_,
-        ) = _arima_model(
-            self.parameters_, _calc_sarima, self.differenced_data_, self.model_
+        (self.aic_, self.residuals_) = _arima_model(
+            self.parameters_, _calc_arima, self.differenced_data_, self.model_
         )
         return self
 
@@ -166,7 +134,7 @@ def _predict(self, y=None, exog=None):
             single prediction self.horizon steps ahead of y.
         """
         y = np.array(y, dtype=np.float64)
-        value = _calc_sarima(
+        value = _calc_arima(
             self.differenced_data_,
             self.model_,
             len(self.differenced_data_),
@@ -174,15 +142,9 @@ def _predict(self, y=None, exog=None):
             self.residuals_,
         )
         history = self.data_[::-1]
-        differenced_history = np.diff(self.data_, n=self.d_)[::-1]
-        # Step 1: undo seasonal differencing on y^(d)
-        for k in range(1, self.ds_ + 1):
-            lag = k * self.seasonal_period_
-            value += (-1) ** (k + 1) * comb(self.ds_, k) * differenced_history[lag - 1]
-
         # Step 2: undo ordinary differencing
-        for k in range(1, self.d_ + 1):
-            value += (-1) ** (k + 1) * comb(self.d_, k) * history[k - 1]
+        for k in range(1, self.d + 1):
+            value += (-1) ** (k + 1) * comb(self.d, k) * history[k - 1]
         return value
 
 
@@ -249,33 +211,6 @@ def _calc_arima(data, model, t, formatted_params, residuals):
     return y_hat
 
 
-def _calc_sarima(data, model, t, formatted_params, residuals):
-    """Calculate the SARIMA forecast for time t."""
-    if len(model) != 6:
-        raise ValueError("Model must be of the form (c, p, q, ps, qs, seasonal_period)")
-    arima_forecast = _calc_arima(data, model[:3], t, formatted_params, residuals)
-    seasonal_period = model[5]
-    # Seasonal AR part
-    ps = model[3]
-    phi_s = formatted_params[3]
-    ars_term = (
-        0
-        if (t - seasonal_period * ps) < 0
-        else np.dot(phi_s, data[t - seasonal_period * ps : t : seasonal_period][::-1])
-    )
-    # Seasonal MA part
-    qs = model[4]
-    theta_s = formatted_params[4]
-    mas_term = (
-        0
-        if (t - seasonal_period * qs) < 0
-        else np.dot(
-            theta_s, residuals[t - seasonal_period * qs : t : seasonal_period][::-1]
-        )
-    )
-    return arima_forecast + ars_term + mas_term
-
-
 def make_arima_llf(base_function, data, model):
     """
     Return a parameterized log-likelihood function for ARIMA.
@@ -287,64 +222,3 @@ def loss_fn(v):
         return _arima_model(v, base_function, data, model)[0]
 
     return loss_fn
-
-
-def _auto_arima(data):
-    """
-    Implement the Hyndman-Khandakar algorithm.
-
-    For automatic ARIMA model selection.
-    """
-    seasonal_period = calc_seasonal_period(data)
-    difference = 0
-    while not kpss_test(data)[1]:
-        data = np.diff(data, n=1)
-        difference += 1
-    seasonal_difference = 1 if seasonal_period > 1 else 0
-    if seasonal_difference:
-        data = data[seasonal_period:] - data[:-seasonal_period]
-    include_c = 1 if difference == 0 else 0
-    model_parameters = [
-        [include_c, 2, 2, 0, 0, seasonal_period],
-        [include_c, 0, 0, 0, 0, seasonal_period],
-        [include_c, 1, 0, 0, 0, seasonal_period],
-        [include_c, 0, 1, 0, 0, seasonal_period],
-    ]
-    model_points = []
-    model_scores = []
-    for p in model_parameters:
-        points, aic = nelder_mead(make_arima_llf(_calc_sarima, data, p), np.sum(p[:5]))
-        model_points.append(points)
-        model_scores.append(aic)
-    best_score = min(model_scores)
-    best_index = model_scores.index(best_score)
-    current_model = model_parameters[best_index]
-    current_points = model_points[best_index]
-    while True:
-        better_model = False
-        for param_no in range(1, 5):
-            for adjustment in [-1, 1]:
-                if (current_model[param_no] + adjustment) < 0:
-                    continue
-                model = current_model.copy()
-                model[param_no] += adjustment
-                for constant_term in [0, 1]:
-                    model[0] = constant_term
-                    points, aic = nelder_mead(
-                        make_arima_llf(_calc_sarima, data, model), np.sum(model[:5])
-                    )
-                    if aic < best_score:
-                        current_model = model.copy()
-                        current_points = points
-                        best_score = aic
-                        better_model = True
-        if not better_model:
-            break
-    return (
-        data,
-        difference,
-        seasonal_difference,
-        current_model,
-        current_points,
-        best_score,
-    )

From f613f7e4cd40990f577c3e7ce286bf14c59abdd8 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 18:42:25 +0100
Subject: [PATCH 08/50] Convert ARIMA to numba version

---
 aeon/forecasting/_arima.py              | 53 +++++++++++++------------
 aeon/utils/optimisation/_nelder_mead.py | 14 ++++---
 2 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 29c42bffe5..4ca197f3f0 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -9,6 +9,7 @@
 from math import comb
 
 import numpy as np
+from numba import njit
 
 from aeon.forecasting.base import BaseForecaster
 from aeon.utils.optimisation._nelder_mead import nelder_mead
@@ -65,7 +66,7 @@ class ARIMAForecaster(BaseForecaster):
     >>> forecaster.fit(y)
     ARIMAForecaster()
     >>> forecaster.predict()
-    550.9147246631134
+    550.9147246631135
     """
 
     def __init__(self, p=1, d=0, q=1, constant_term=0, horizon=1):
@@ -102,11 +103,13 @@ def _fit(self, y, exog=None):
             Fitted ARIMAForecaster.
         """
         self.data_ = np.array(y.squeeze(), dtype=np.float64)
-        self.model_ = [self.constant_term, self.p, self.q]
+        self.model_ = np.array((self.constant_term, self.p, self.q), dtype=np.int32)
         self.differenced_data_ = np.diff(self.data_, n=self.d)
         (self.parameters_, self.aic_) = nelder_mead(
-            make_arima_llf(_calc_arima, self.data_, self.model_),
+            _arima_model_wrapper,
             np.sum(self.model_[:3]),
+            self.data_,
+            self.model_,
         )
         (self.c_, self.phi_, self.theta_) = _extract_params(
             self.parameters_, self.model_
@@ -148,6 +151,7 @@ def _predict(self, y=None, exog=None):
         return value
 
 
+@njit(cache=True, fastmath=True)
 def _aic(residuals, num_params):
     """Calculate the log-likelihood of a model."""
     variance = np.mean(residuals**2)
@@ -155,7 +159,13 @@ def _aic(residuals, num_params):
     return liklihood + 2 * num_params
 
 
+@njit(fastmath=True)
+def _arima_model_wrapper(params, data, model):
+    return _arima_model(params, _calc_arima, data, model)[0]
+
+
 # Define the ARIMA(p, d, q) likelihood function
+@njit(cache=True, fastmath=True)
 def _arima_model(params, base_function, data, model):
     """Calculate the log-likelihood of an ARIMA model given the parameters."""
     formatted_params = _extract_params(params, model)  # Extract parameters
@@ -175,9 +185,7 @@ def _arima_model(params, base_function, data, model):
     return _aic(residuals, len(params)), residuals
 
 
-# Define the SARIMA(p, d, q)(P, D, Q) likelihood function
-
-
+@njit(cache=True, fastmath=True)
 def _extract_params(params, model):
     """Extract ARIMA parameters from the parameter vector."""
     if len(params) != np.sum(model):
@@ -188,37 +196,32 @@ def _extract_params(params, model):
                 f"Expected {previous_length} parameters for a non-seasonal model or \
                     {np.sum(model)} parameters for a seasonal model, got {len(params)}"
             )
-    starts = np.cumsum([0] + model[:-1])
-    return [params[s : s + l].tolist() for s, l in zip(starts, model)]
-
-
+    starts = np.cumsum(np.concatenate((np.zeros(1, dtype=np.int32), model[:-1])))
+    n = len(starts)
+    max_len = np.max(model)
+    result = np.full((n, max_len), np.nan, dtype=params.dtype)
+    for i in range(n):
+        length = model[i]
+        start = starts[i]
+        result[i, :length] = params[start : start + length]
+    return result
+
+
+@njit(cache=True, fastmath=True)
 def _calc_arima(data, model, t, formatted_params, residuals):
     """Calculate the ARIMA forecast for time t."""
     if len(model) != 3:
         raise ValueError("Model must be of the form (c, p, q)")
     # AR part
     p = model[1]
-    phi = formatted_params[1]
+    phi = formatted_params[1][:p]
     ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
 
     # MA part
     q = model[2]
-    theta = formatted_params[2]
+    theta = formatted_params[2][:q]
     ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
 
     c = formatted_params[0][0] if model[0] else 0
     y_hat = c + ar_term + ma_term
     return y_hat
-
-
-def make_arima_llf(base_function, data, model):
-    """
-    Return a parameterized log-likelihood function for ARIMA.
-
-    This can then be used with an optimization algorithm.
-    """
-
-    def loss_fn(v):
-        return _arima_model(v, base_function, data, model)[0]
-
-    return loss_fn
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 36dfe732ab..749187541d 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -1,11 +1,15 @@
 """Optimisation algorithms for automatic parameter tuning."""
 
 import numpy as np
+from numba import njit
 
 
+@njit(fastmath=True)
 def nelder_mead(
     loss_function,
     num_params,
+    data,
+    model,
     tol=1e-6,
     max_iter=500,
 ):
@@ -53,7 +57,7 @@ def nelder_mead(
     points = np.full((num_params + 1, num_params), 0.5)
     for i in range(num_params):
         points[i + 1][i] = 0.6
-    values = np.array([loss_function(v) for v in points])
+    values = np.array([loss_function(v, data, model) for v in points])
     for _iteration in range(max_iter):
         # Order simplex by function values
         order = np.argsort(values)
@@ -66,7 +70,7 @@ def nelder_mead(
         # Reflection
         # centre + distance between centre and largest value
         reflected_point = centre_point + (centre_point - points[-1])
-        reflected_value = loss_function(reflected_point)
+        reflected_value = loss_function(reflected_point, data, model)
         # if between best and second best, use reflected value
         if len(values) > 1 and values[0] <= reflected_value < values[-2]:
             points[-1] = reflected_point
@@ -76,7 +80,7 @@ def nelder_mead(
         # Otherwise if it is better than the best value
         if reflected_value < values[0]:
             expanded_point = centre_point + 2 * (reflected_point - centre_point)
-            expanded_value = loss_function(expanded_point)
+            expanded_value = loss_function(expanded_point, data, model)
             # if less than reflected value use expanded, otherwise go back to reflected
             if expanded_value < reflected_value:
                 points[-1] = expanded_point
@@ -88,7 +92,7 @@ def nelder_mead(
         # Contraction
         # Otherwise if reflection is worse than all current values
         contracted_point = centre_point - 0.5 * (centre_point - points[-1])
-        contracted_value = loss_function(contracted_point)
+        contracted_value = loss_function(contracted_point, data, model)
         # If contraction is better use that otherwise move to shrinkage
         if contracted_value < values[-1]:
             points[-1] = contracted_point
@@ -98,7 +102,7 @@ def nelder_mead(
         # Shrinkage
         for i in range(1, len(points)):
             points[i] = points[0] - 0.5 * (points[0] - points[i])
-            values[i] = loss_function(points[i])
+            values[i] = loss_function(points[i], data, model)
 
         # Convergence check
         if np.max(np.abs(values - values[0])) < tol:

From 9eb00f69f2d98640ea5765ff062feff57aaf1211 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 19:21:07 +0100
Subject: [PATCH 09/50] Adjust parameters to allow modification in fit

---
 aeon/forecasting/_arima.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 4ca197f3f0..412efde4f3 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -64,7 +64,7 @@ class ARIMAForecaster(BaseForecaster):
     >>> y = load_airline()
     >>> forecaster = ARIMAForecaster(2,1,1,0)
     >>> forecaster.fit(y)
-    ARIMAForecaster()
+    ARIMAForecaster(d=1, p=2)
     >>> forecaster.predict()
     550.9147246631135
     """
@@ -79,6 +79,10 @@ def __init__(self, p=1, d=0, q=1, constant_term=0, horizon=1):
         self.d = d
         self.q = q
         self.constant_term = constant_term
+        self.p_ = 0
+        self.d_ = 0
+        self.q_ = 0
+        self.constant_term_ = 0
         self.model_ = []
         self.c_ = 0
         self.phi_ = 0
@@ -102,6 +106,10 @@ def _fit(self, y, exog=None):
         self
             Fitted ARIMAForecaster.
         """
+        self.p_ = self.p
+        self.d_ = self.d
+        self.q_ = self.q
+        self.constant_term_ = self.constant_term
         self.data_ = np.array(y.squeeze(), dtype=np.float64)
         self.model_ = np.array((self.constant_term, self.p, self.q), dtype=np.int32)
         self.differenced_data_ = np.diff(self.data_, n=self.d)
@@ -146,8 +154,8 @@ def _predict(self, y=None, exog=None):
         )
         history = self.data_[::-1]
         # Step 2: undo ordinary differencing
-        for k in range(1, self.d + 1):
-            value += (-1) ** (k + 1) * comb(self.d, k) * history[k - 1]
+        for k in range(1, self.d_ + 1):
+            value += (-1) ** (k + 1) * comb(self.d_, k) * history[k - 1]
         return value
 
 

From d4ed4b1fc3845d6c9c23c7788527a91e6f1f4431 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 20:19:15 +0100
Subject: [PATCH 10/50] Update example and return native python type

---
 aeon/forecasting/_arima.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 412efde4f3..5c1933def8 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -62,7 +62,7 @@ class ARIMAForecaster(BaseForecaster):
     >>> from aeon.forecasting import ARIMAForecaster
     >>> from aeon.datasets import load_airline
     >>> y = load_airline()
-    >>> forecaster = ARIMAForecaster(2,1,1,0)
+    >>> forecaster = ARIMAForecaster(p=2,d=1)
     >>> forecaster.fit(y)
     ARIMAForecaster(d=1, p=2)
     >>> forecaster.predict()
@@ -156,7 +156,7 @@ def _predict(self, y=None, exog=None):
         # Step 2: undo ordinary differencing
         for k in range(1, self.d_ + 1):
             value += (-1) ** (k + 1) * comb(self.d_, k) * history[k - 1]
-        return value
+        return value.item()
 
 
 @njit(cache=True, fastmath=True)

From 2893e1b935d612caaf41610164c22736544756ab Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 21:16:35 +0100
Subject: [PATCH 11/50] Fix examples for tests

---
 aeon/forecasting/_arima.py              | 2 +-
 aeon/utils/optimisation/_nelder_mead.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 5c1933def8..94b6c51af8 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -66,7 +66,7 @@ class ARIMAForecaster(BaseForecaster):
     >>> forecaster.fit(y)
     ARIMAForecaster(d=1, p=2)
     >>> forecaster.predict()
-    550.9147246631135
+    550.9147246631132
     """
 
     def __init__(self, p=1, d=0, q=1, constant_term=0, horizon=1):
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 749187541d..767fbde506 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -50,9 +50,9 @@ def nelder_mead(
 
     Examples
     --------
-    >>> def sphere(x):
+    >>> def sphere(x, data, model):
     ...     return np.sum(x**2)
-    >>> x_opt, val = nelder_mead(sphere, num_params=2)
+    >>> x_opt, val = nelder_mead(sphere, num_params=2, data=None, model=None)
     """
     points = np.full((num_params + 1, num_params), 0.5)
     for i in range(num_params):

From 9801e8bdb0d7b34d7f149b116b96dd43f1b89183 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 21:55:28 +0100
Subject: [PATCH 12/50] Fix Nelder-Mead Optimisation Algorithm Example

---
 aeon/utils/optimisation/_nelder_mead.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 767fbde506..6d3058a7d1 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -50,7 +50,9 @@ def nelder_mead(
 
     Examples
     --------
-    >>> def sphere(x, data, model):
+    >>> from numba import njit
+    >>> @njit(fastmath=True)
+    ... def sphere(x, data, model):
     ...     return np.sum(x**2)
     >>> x_opt, val = nelder_mead(sphere, num_params=2, data=None, model=None)
     """

From 2f928c7533b19299d0d39ef542afa9fc439cb117 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 22:12:44 +0100
Subject: [PATCH 13/50] Fix Nelder-Mead Optimisation Algorithm Example #2

---
 aeon/utils/optimisation/_nelder_mead.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 6d3058a7d1..9ef5a6ad01 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -51,7 +51,7 @@ def nelder_mead(
     Examples
     --------
     >>> from numba import njit
-    >>> @njit(fastmath=True)
+    >>> @njit(cache=False, fastmath=True)
     ... def sphere(x, data, model):
     ...     return np.sum(x**2)
     >>> x_opt, val = nelder_mead(sphere, num_params=2, data=None, model=None)

From 94cd5b33a9534c90a57c2e9d7c1bd51a99822c83 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 22:22:52 +0100
Subject: [PATCH 14/50] Remove Nelder-Mead Example due to issues with numba
 caching functions

---
 aeon/utils/optimisation/_nelder_mead.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 9ef5a6ad01..3bc90ecb93 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -47,14 +47,6 @@ def nelder_mead(
     with one additional point per dimension at 0.6 for that dimension.
     - This implementation does not support constraints or bounds on the parameters.
     - The algorithm does not guarantee finding a global minimum.
-
-    Examples
-    --------
-    >>> from numba import njit
-    >>> @njit(cache=False, fastmath=True)
-    ... def sphere(x, data, model):
-    ...     return np.sum(x**2)
-    >>> x_opt, val = nelder_mead(sphere, num_params=2, data=None, model=None)
     """
     points = np.full((num_params + 1, num_params), 0.5)
     for i in range(num_params):

From 0d0d63fe106f99efb48ac10eb722d0ffd48b09aa Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 22:39:30 +0100
Subject: [PATCH 15/50] Fix return type issue

---
 aeon/forecasting/_arima.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 94b6c51af8..4644f27ab6 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -156,7 +156,7 @@ def _predict(self, y=None, exog=None):
         # Step 2: undo ordinary differencing
         for k in range(1, self.d_ + 1):
             value += (-1) ** (k + 1) * comb(self.d_, k) * history[k - 1]
-        return value.item()
+        return float(value)
 
 
 @njit(cache=True, fastmath=True)

From 39a3ed205ca1dfe8b16f16d3be9705325a188a8b Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 23:21:37 +0100
Subject: [PATCH 16/50] Address PR Feedback

---
 aeon/forecasting/_arima.py              | 17 +++++----
 aeon/utils/forecasting/_hypo_tests.py   | 51 ++++++++++++++++++++++---
 aeon/utils/optimisation/_nelder_mead.py | 15 ++++++++
 3 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 4644f27ab6..48b27d94da 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -14,9 +14,6 @@
 from aeon.forecasting.base import BaseForecaster
 from aeon.utils.optimisation._nelder_mead import nelder_mead
 
-NOGIL = False
-CACHE = True
-
 
 class ARIMAForecaster(BaseForecaster):
     """AutoRegressive Integrated Moving Average (ARIMA) forecaster.
@@ -31,24 +28,28 @@ class ARIMAForecaster(BaseForecaster):
 
     Attributes
     ----------
-    data_ : list of float
+    data_ : np.ndarray
         Original training series values.
-    differenced_data_ : list of float
+    differenced_data_ : np.ndarray
         Differenced version of the training data used for stationarity.
-    residuals_ : list of float
+    residuals_ : np.ndarray
         Residual errors from the fitted model.
     aic_ : float
         Akaike Information Criterion for the selected model.
+    p, d, q : int
+        Parameters passed to the forecaster see p_, d_, q_.
     p_, d_, q_ : int
         Orders of the ARIMA model: autoregressive (p), differencing (d),
         and moving average (q) terms.
+    constant_term : int
+        Parameters passed to the forecaster see constant_term_.
     constant_term_ : float
         Constant/intercept term in the model.
     c_ : float
         Estimated constant term (internal use).
-    phi_ : array-like
+    phi_ : np.ndarray
         Coefficients for the non-seasonal autoregressive terms.
-    theta_ : array-like
+    theta_ : np.ndarray
         Coefficients for the non-seasonal moving average terms.
 
     References
diff --git a/aeon/utils/forecasting/_hypo_tests.py b/aeon/utils/forecasting/_hypo_tests.py
index 73d4521e5e..664d0c76e5 100644
--- a/aeon/utils/forecasting/_hypo_tests.py
+++ b/aeon/utils/forecasting/_hypo_tests.py
@@ -3,18 +3,56 @@
 
 def kpss_test(y, regression="c", lags=None):  # Test if time series is stationary
     """
-    Implement the KPSS test for stationarity.
+    Perform the KPSS (Kwiatkowski-Phillips-Schmidt-Shin) test for stationarity.
+
+    The KPSS test evaluates the null hypothesis that a time series is
+    (trend or level) stationary against the alternative of a unit root
+    (non-stationarity). It can test for either stationarity around a
+    constant (level stationarity) or arounda deterministic trend
+    (trend stationarity).
 
     Parameters
     ----------
-    y (array-like): Time series data
-    regression (str): 'c' for constant, 'ct' for constant + trend
-    lags (int): Number of lags for HAC variance estimation (default: sqrt(n))
+    y : array-like
+        Time series data to test for stationarity.
+    regression : str, default="c"
+        Indicates the null hypothesis for stationarity:
+        - "c"  : Stationary around a constant (level stationarity)
+        - "ct" : Stationary around a constant and linear trend (trend stationarity)
+    lags : int or None, optional
+        Number of lags to use for the
+        HAC (heteroskedasticity and autocorrelation consistent) variance estimator.
+        If None, defaults to sqrt(n), where n is the sample size.
 
     Returns
     -------
-    kpss_stat (float): KPSS test statistic
-    stationary (bool): Whether the series is stationary according to the test
+    kpss_stat : float
+        The KPSS test statistic.
+    stationary : bool
+        True if the series is judged stationary at the 5% significance level
+        (i.e., test statistic is below the critical value); False otherwise.
+
+    Notes
+    -----
+    - Uses asymptotic 5% critical values from Kwiatkowski et al. (1992): 0.463 for level
+      stationarity, 0.146 for trend stationarity.
+    - Returns True for stationary if the test statistic is below the 5% critical value.
+
+    References
+    ----------
+    Kwiatkowski, D., Phillips, P.C.B., Schmidt, P., & Shin, Y. (1992).
+    "Testing the null hypothesis of stationarity against the alternative
+    of a unit root."
+    Journal of Econometrics, 54(1–3), 159–178.
+    https://doi.org/10.1016/0304-4076(92)90104-Y
+
+    Examples
+    --------
+    >>> from aeon.utils.forecasting._hypo_tests import kpss_test
+    >>> from aeon.datasets import load_airline
+    >>> y = load_airline()
+    >>> kpss_test(y)
+    (1.1966313813502716, False)
     """
     y = np.asarray(y)
     n = len(y)
@@ -50,6 +88,7 @@ def kpss_test(y, regression="c", lags=None):  # Test if time series is stationar
     # Step 4: Calculate the KPSS statistic
     kpss_stat = np.sum(S_t**2) / (n**2 * sigma_squared)
 
+    # 5% critical values for KPSS test
     if regression == "ct":
         # p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t,
         # where beta is the trend, r_t a random walk and e_t a stationary
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 3bc90ecb93..e59a70c5dd 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -28,6 +28,14 @@ def nelder_mead(
         `num_params` and return a scalar value.
     num_params : int
         The number of parameters (dimensions) in the optimisation problem.
+    data : np.ndarray
+        The input data used by the loss function. The shape and content depend on the
+        specific loss function being minimised.
+    model : np.ndarray
+        The model or context in which the loss function operates. This could be any
+        other object that the `loss_function` requires to compute its value.
+        The exact type and structure of `model` should be compatible with the
+        `loss_function`.
     tol : float, optional (default=1e-6)
         Tolerance for convergence. The algorithm stops when the maximum difference
         between function values at simplex vertices is less than `tol`.
@@ -47,6 +55,13 @@ def nelder_mead(
     with one additional point per dimension at 0.6 for that dimension.
     - This implementation does not support constraints or bounds on the parameters.
     - The algorithm does not guarantee finding a global minimum.
+
+    References
+    ----------
+    .. [1] Nelder, J. A. and Mead, R. (1965).
+       A Simplex Method for Function Minimization.
+       The Computer Journal, 7(4), 308–313.
+       https://doi.org/10.1093/comjnl/7.4.308
     """
     points = np.full((num_params + 1, num_params), 0.5)
     for i in range(num_params):

From 05a27850a44b1a2b804ec562b72576394d4bfb78 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 23:28:55 +0100
Subject: [PATCH 17/50] Ignore small tolerances in floating point value in
 output of example

---
 aeon/forecasting/_arima.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 48b27d94da..42d1dece25 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -67,7 +67,7 @@ class ARIMAForecaster(BaseForecaster):
     >>> forecaster.fit(y)
     ARIMAForecaster(d=1, p=2)
     >>> forecaster.predict()
-    550.9147246631132
+    550.914724663113...
     """
 
     def __init__(self, p=1, d=0, q=1, constant_term=0, horizon=1):

From 73966ab32a8dca49a5a10cc5aac5d2111d932d2e Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 23:37:12 +0100
Subject: [PATCH 18/50] Fix kpss_test example

---
 aeon/utils/forecasting/_hypo_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/utils/forecasting/_hypo_tests.py b/aeon/utils/forecasting/_hypo_tests.py
index 664d0c76e5..cfa86a70fc 100644
--- a/aeon/utils/forecasting/_hypo_tests.py
+++ b/aeon/utils/forecasting/_hypo_tests.py
@@ -52,7 +52,7 @@ def kpss_test(y, regression="c", lags=None):  # Test if time series is stationar
     >>> from aeon.datasets import load_airline
     >>> y = load_airline()
     >>> kpss_test(y)
-    (1.1966313813502716, False)
+    (np.float64(1.1966313813502716), np.False_)
     """
     y = np.asarray(y)
     n = len(y)

From a0f090d48e6ae066527acd38c43b15da63f17414 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 28 May 2025 23:58:28 +0100
Subject: [PATCH 19/50] Fix kpss_test example #2

---
 aeon/utils/forecasting/_hypo_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/utils/forecasting/_hypo_tests.py b/aeon/utils/forecasting/_hypo_tests.py
index cfa86a70fc..2d581e971e 100644
--- a/aeon/utils/forecasting/_hypo_tests.py
+++ b/aeon/utils/forecasting/_hypo_tests.py
@@ -52,7 +52,7 @@ def kpss_test(y, regression="c", lags=None):  # Test if time series is stationar
     >>> from aeon.datasets import load_airline
     >>> y = load_airline()
     >>> kpss_test(y)
-    (np.float64(1.1966313813502716), np.False_)
+    (np.float64(1.1966313813...), np.False_)
     """
     y = np.asarray(y)
     n = len(y)

From 68847033b6b4f0f9fdfab5c55f68292a438c9b24 Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Mon, 2 Jun 2025 21:01:28 +0100
Subject: [PATCH 20/50] Update documentation for ARIMAForecaster, change
 constant_term to be bool, and fix bug with it not operating on differemced
 data

---
 aeon/forecasting/_arima.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 42d1dece25..103fbc6d4c 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -23,6 +23,14 @@ class ARIMAForecaster(BaseForecaster):
 
     Parameters
     ----------
+    p : int, default=1,
+        Autoregressive (p) order of the ARIMA model
+    d : int, default=0,
+        Differencing (d) order of the ARIMA model
+    q : int, default=1,
+        Moving average (q) order of the ARIMA model
+    constant_term: bool = False,
+        Presence of a constant/intercept term in the model.
     horizon : int, default=1
         The forecasting horizon, i.e., the number of steps ahead to predict.
 
@@ -41,10 +49,10 @@ class ARIMAForecaster(BaseForecaster):
     p_, d_, q_ : int
         Orders of the ARIMA model: autoregressive (p), differencing (d),
         and moving average (q) terms.
-    constant_term : int
+    constant_term : bool
         Parameters passed to the forecaster see constant_term_.
-    constant_term_ : float
-        Constant/intercept term in the model.
+    constant_term_ : bool
+        Whether to include a constant/intercept term in the model.
     c_ : float
         Estimated constant term (internal use).
     phi_ : np.ndarray
@@ -67,10 +75,17 @@ class ARIMAForecaster(BaseForecaster):
     >>> forecaster.fit(y)
     ARIMAForecaster(d=1, p=2)
     >>> forecaster.predict()
-    550.914724663113...
+    474.49449...
     """
 
-    def __init__(self, p=1, d=0, q=1, constant_term=0, horizon=1):
+    def __init__(
+        self,
+        p: int = 1,
+        d: int = 0,
+        q: int = 1,
+        constant_term: bool = False,
+        horizon: int = 1,
+    ):
         super().__init__(horizon=horizon, axis=1)
         self.data_ = []
         self.differenced_data_ = []
@@ -83,7 +98,7 @@ def __init__(self, p=1, d=0, q=1, constant_term=0, horizon=1):
         self.p_ = 0
         self.d_ = 0
         self.q_ = 0
-        self.constant_term_ = 0
+        self.constant_term_ = False
         self.model_ = []
         self.c_ = 0
         self.phi_ = 0
@@ -112,12 +127,14 @@ def _fit(self, y, exog=None):
         self.q_ = self.q
         self.constant_term_ = self.constant_term
         self.data_ = np.array(y.squeeze(), dtype=np.float64)
-        self.model_ = np.array((self.constant_term, self.p, self.q), dtype=np.int32)
+        self.model_ = np.array(
+            (1 if self.constant_term else 0, self.p, self.q), dtype=np.int32
+        )
         self.differenced_data_ = np.diff(self.data_, n=self.d)
         (self.parameters_, self.aic_) = nelder_mead(
             _arima_model_wrapper,
             np.sum(self.model_[:3]),
-            self.data_,
+            self.differenced_data_,
             self.model_,
         )
         (self.c_, self.phi_, self.theta_) = _extract_params(

From 9af3a56f52ecae0e3dc1294fc1aa052026f81e8d Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Sun, 8 Jun 2025 19:38:49 +0100
Subject: [PATCH 21/50] Modify ARIMA to allow predicting multiple values by
 updating the state without refitting the model

---
 aeon/forecasting/_arima.py | 78 ++++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 103fbc6d4c..e176e3bb8e 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -6,8 +6,6 @@
 __maintainer__ = ["alexbanwell1", "TonyBagnall"]
 __all__ = ["ARIMAForecaster"]
 
-from math import comb
-
 import numpy as np
 from numba import njit
 
@@ -84,12 +82,12 @@ def __init__(
         d: int = 0,
         q: int = 1,
         constant_term: bool = False,
-        horizon: int = 1,
     ):
-        super().__init__(horizon=horizon, axis=1)
+        super().__init__(horizon=1, axis=1)
         self.data_ = []
         self.differenced_data_ = []
         self.residuals_ = []
+        self.fitted_values_ = []
         self.aic_ = 0
         self.p = p
         self.d = d
@@ -140,8 +138,12 @@ def _fit(self, y, exog=None):
         (self.c_, self.phi_, self.theta_) = _extract_params(
             self.parameters_, self.model_
         )
-        (self.aic_, self.residuals_) = _arima_model(
-            self.parameters_, _calc_arima, self.differenced_data_, self.model_
+        (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
+            self.parameters_,
+            _calc_arima,
+            self.differenced_data_,
+            self.model_,
+            np.empty(0),
         )
         return self
 
@@ -159,22 +161,28 @@ def _predict(self, y=None, exog=None):
 
         Returns
         -------
-        float
-            single prediction self.horizon steps ahead of y.
+        array[float]
+            Predictions len(y) steps ahead of the data seen in fit.
+        If y is None, then predict 1 step ahead of the data seen in fit.
         """
-        y = np.array(y, dtype=np.float64)
-        value = _calc_arima(
-            self.differenced_data_,
+        if y is not None:
+            combined_data = np.concatenate((self.data_, y.flatten()))
+        else:
+            combined_data = self.data_
+        n = len(self.data_)
+        differenced_data = np.diff(combined_data, n=self.d)
+        _aic, _residuals, predicted_values = _arima_model(
+            self.parameters_,
+            _calc_arima,
+            differenced_data,
             self.model_,
-            len(self.differenced_data_),
-            _extract_params(self.parameters_, self.model_),
             self.residuals_,
         )
-        history = self.data_[::-1]
-        # Step 2: undo ordinary differencing
-        for k in range(1, self.d_ + 1):
-            value += (-1) ** (k + 1) * comb(self.d_, k) * history[k - 1]
-        return float(value)
+        init = combined_data[n - self.d_ : n]
+        x = np.concatenate((init, predicted_values))
+        for _ in range(self.d_):
+            x = np.cumsum(x)
+        return x[self.d_ :]
 
 
 @njit(cache=True, fastmath=True)
@@ -187,28 +195,35 @@ def _aic(residuals, num_params):
 
 @njit(fastmath=True)
 def _arima_model_wrapper(params, data, model):
-    return _arima_model(params, _calc_arima, data, model)[0]
+    return _arima_model(params, _calc_arima, data, model, np.empty(0))[0]
 
 
 # Define the ARIMA(p, d, q) likelihood function
 @njit(cache=True, fastmath=True)
-def _arima_model(params, base_function, data, model):
+def _arima_model(params, base_function, data, model, residuals):
     """Calculate the log-likelihood of an ARIMA model given the parameters."""
     formatted_params = _extract_params(params, model)  # Extract parameters
 
     # Initialize residuals
     n = len(data)
-    residuals = np.zeros(n)
-    for t in range(n):
-        y_hat = base_function(
+    m = len(residuals)
+    num_predictions = n - m + 1
+    residuals = np.concatenate((residuals, np.zeros(num_predictions - 1)))
+    expect_full_history = m > 0  # I.e. we've been provided with some residuals
+    fitted_values = np.zeros(num_predictions)
+    for t in range(num_predictions):
+        fitted_values[t] = base_function(
             data,
             model,
-            t,
+            m + t,
             formatted_params,
             residuals,
+            expect_full_history,
         )
-        residuals[t] = data[t] - y_hat
-    return _aic(residuals, len(params)), residuals
+        if t != num_predictions - 1:
+            # Only calculate residuals for the predictions we have data for
+            residuals[m + t] = data[m + t] - fitted_values[t]
+    return _aic(residuals, len(params)), residuals, fitted_values
 
 
 @njit(cache=True, fastmath=True)
@@ -234,17 +249,22 @@ def _extract_params(params, model):
 
 
 @njit(cache=True, fastmath=True)
-def _calc_arima(data, model, t, formatted_params, residuals):
+def _calc_arima(data, model, t, formatted_params, residuals, expect_full_history=False):
     """Calculate the ARIMA forecast for time t."""
     if len(model) != 3:
         raise ValueError("Model must be of the form (c, p, q)")
-    # AR part
     p = model[1]
+    q = model[2]
+    if expect_full_history and (t - p < 0 or t - q < 0):
+        raise ValueError(
+            f"Insufficient data for ARIMA model at time {t}. "
+            f"Expected at least {p} past values for AR and {q} for MA."
+        )
+    # AR part
     phi = formatted_params[1][:p]
     ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
 
     # MA part
-    q = model[2]
     theta = formatted_params[2][:q]
     ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
 

From e898f2f8a263bc43dd5d2a43e1ef41d6f05b8f0f Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Mon, 9 Jun 2025 21:35:38 +0100
Subject: [PATCH 22/50] Fix bug using self.d rather than self.d_

---
 aeon/forecasting/_arima.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index e176e3bb8e..a0dd2cb052 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -170,7 +170,7 @@ def _predict(self, y=None, exog=None):
         else:
             combined_data = self.data_
         n = len(self.data_)
-        differenced_data = np.diff(combined_data, n=self.d)
+        differenced_data = np.diff(combined_data, n=self.d_)
         _aic, _residuals, predicted_values = _arima_model(
             self.parameters_,
             _calc_arima,

From 3c644a02a15a8827d763749d1a2d5d077c29c54f Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 11 Jun 2025 19:55:11 +0100
Subject: [PATCH 23/50] refactor ARIMA

---
 aeon/forecasting/_arima.py | 129 ++++++++++++++-----------------------
 1 file changed, 50 insertions(+), 79 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index a0dd2cb052..0a6a2caec8 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -16,8 +16,8 @@
 class ARIMAForecaster(BaseForecaster):
     """AutoRegressive Integrated Moving Average (ARIMA) forecaster.
 
-    The model automatically selects the parameters of the model based
-    on information criteria, such as AIC.
+    ARIMA with fixed model structure and fitted parameters found with an
+    nelder mead optimizer to minimise the AIC.
 
     Parameters
     ----------
@@ -27,36 +27,19 @@ class ARIMAForecaster(BaseForecaster):
         Differencing (d) order of the ARIMA model
     q : int, default=1,
         Moving average (q) order of the ARIMA model
-    constant_term: bool = False,
+    use_constant: bool = False,
         Presence of a constant/intercept term in the model.
-    horizon : int, default=1
-        The forecasting horizon, i.e., the number of steps ahead to predict.
 
     Attributes
     ----------
-    data_ : np.ndarray
-        Original training series values.
-    differenced_data_ : np.ndarray
-        Differenced version of the training data used for stationarity.
     residuals_ : np.ndarray
         Residual errors from the fitted model.
     aic_ : float
-        Akaike Information Criterion for the selected model.
-    p, d, q : int
-        Parameters passed to the forecaster see p_, d_, q_.
-    p_, d_, q_ : int
-        Orders of the ARIMA model: autoregressive (p), differencing (d),
-        and moving average (q) terms.
-    constant_term : bool
-        Parameters passed to the forecaster see constant_term_.
-    constant_term_ : bool
-        Whether to include a constant/intercept term in the model.
-    c_ : float
-        Estimated constant term (internal use).
+        Akaike Information Criterion for the fitted model.
     phi_ : np.ndarray
-        Coefficients for the non-seasonal autoregressive terms.
+        Coefficients for the non-seasonal autoregressive terms (length p).
     theta_ : np.ndarray
-        Coefficients for the non-seasonal moving average terms.
+        Coefficients for the non-seasonal moving average terms (length q).
 
     References
     ----------
@@ -76,80 +59,68 @@ class ARIMAForecaster(BaseForecaster):
     474.49449...
     """
 
-    def __init__(
-        self,
-        p: int = 1,
-        d: int = 0,
-        q: int = 1,
-        constant_term: bool = False,
-    ):
-        super().__init__(horizon=1, axis=1)
-        self.data_ = []
-        self.differenced_data_ = []
-        self.residuals_ = []
-        self.fitted_values_ = []
-        self.aic_ = 0
+    _tags = {
+        "capability:horizon": False,
+    }
+
+    def __init__(self, p: int = 1, d: int = 0, q: int = 1, use_constant: bool = False):
         self.p = p
         self.d = d
         self.q = q
-        self.constant_term = constant_term
-        self.p_ = 0
-        self.d_ = 0
-        self.q_ = 0
-        self.constant_term_ = False
-        self.model_ = []
-        self.c_ = 0
+        self.use_constant = use_constant
         self.phi_ = 0
         self.theta_ = 0
-        self.parameters_ = []
+        self._series = []
+        self._differenced_series = []
+        self.residuals_ = []
+        self.fitted_values_ = []
+        self.aic_ = 0
+        self._model = []
+        self._c = 0
+        self._parameters = []
+        super().__init__(horizon=1, axis=1)
 
     def _fit(self, y, exog=None):
-        """Fit AutoARIMA forecaster to series y.
-
-        Fit a forecaster to predict self.horizon steps ahead using y.
+        """Fit ARIMA forecaster to series y to predict one ahead using y.
 
         Parameters
         ----------
         y : np.ndarray
             A time series on which to learn a forecaster to predict horizon ahead
         exog : np.ndarray, default =None
-            Optional exogenous time series data assumed to be aligned with y
+            Not allowed for this forecaster
 
         Returns
         -------
         self
             Fitted ARIMAForecaster.
         """
-        self.p_ = self.p
-        self.d_ = self.d
-        self.q_ = self.q
-        self.constant_term_ = self.constant_term
-        self.data_ = np.array(y.squeeze(), dtype=np.float64)
-        self.model_ = np.array(
-            (1 if self.constant_term else 0, self.p, self.q), dtype=np.int32
+        self._series = np.array(y.squeeze(), dtype=np.float64)
+        self._model = np.array(
+            (1 if self.use_constant else 0, self.p, self.q), dtype=np.int32
         )
-        self.differenced_data_ = np.diff(self.data_, n=self.d)
-        (self.parameters_, self.aic_) = nelder_mead(
+        self._differenced_series = np.diff(self._series, n=self.d)
+        (self._parameters, self.aic_) = nelder_mead(
             _arima_model_wrapper,
-            np.sum(self.model_[:3]),
-            self.differenced_data_,
-            self.model_,
+            np.sum(self._model[:3]),
+            self._differenced_series,
+            self._model,
         )
         (self.c_, self.phi_, self.theta_) = _extract_params(
-            self.parameters_, self.model_
+            self._parameters, self._model
         )
         (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
-            self.parameters_,
+            self._parameters,
             _calc_arima,
-            self.differenced_data_,
-            self.model_,
+            self._differenced_series,
+            self._model,
             np.empty(0),
         )
         return self
 
     def _predict(self, y=None, exog=None):
         """
-        Predict the next horizon steps ahead.
+        Predict the next step ahead for training data or y.
 
         Parameters
         ----------
@@ -161,28 +132,28 @@ def _predict(self, y=None, exog=None):
 
         Returns
         -------
-        array[float]
-            Predictions len(y) steps ahead of the data seen in fit.
-        If y is None, then predict 1 step ahead of the data seen in fit.
+        float
+            Prediction 1 step ahead of the data seen in fit or passed as y.
         """
         if y is not None:
-            combined_data = np.concatenate((self.data_, y.flatten()))
+            series = y.squeeze()
         else:
-            combined_data = self.data_
-        n = len(self.data_)
-        differenced_data = np.diff(combined_data, n=self.d_)
-        _aic, _residuals, predicted_values = _arima_model(
-            self.parameters_,
+            series = self._series
+        n = len(series)
+        differenced_series = np.diff(series, n=self.d)
+        _, _, predicted_values = _arima_model(
+            self._parameters,
             _calc_arima,
-            differenced_data,
-            self.model_,
+            differenced_series,
+            self._model,
             self.residuals_,
         )
-        init = combined_data[n - self.d_ : n]
+        # Invert differences
+        init = series[n - self.d : n]
         x = np.concatenate((init, predicted_values))
-        for _ in range(self.d_):
+        for _ in range(self.d):
             x = np.cumsum(x)
-        return x[self.d_ :]
+        return x[self.d :][0]
 
 
 @njit(cache=True, fastmath=True)

From b91d135453b6e83d074866e38434305d21fc0dd2 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Mon, 16 Jun 2025 23:14:46 +0100
Subject: [PATCH 24/50] docstring

---
 aeon/forecasting/_arima.py | 49 ++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 0a6a2caec8..353137569d 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -36,10 +36,12 @@ class ARIMAForecaster(BaseForecaster):
         Residual errors from the fitted model.
     aic_ : float
         Akaike Information Criterion for the fitted model.
+    c_ : float, default = 0
+        Intercept term.
     phi_ : np.ndarray
-        Coefficients for the non-seasonal autoregressive terms (length p).
+        Coefficients for autoregressive terms (length p).
     theta_ : np.ndarray
-        Coefficients for the non-seasonal moving average terms (length q).
+        Coefficients for moving average terms (length q).
 
     References
     ----------
@@ -53,9 +55,7 @@ class ARIMAForecaster(BaseForecaster):
     >>> from aeon.datasets import load_airline
     >>> y = load_airline()
     >>> forecaster = ARIMAForecaster(p=2,d=1)
-    >>> forecaster.fit(y)
-    ARIMAForecaster(d=1, p=2)
-    >>> forecaster.predict()
+    >>> forecaster.forecast(y)
     474.49449...
     """
 
@@ -70,13 +70,13 @@ def __init__(self, p: int = 1, d: int = 0, q: int = 1, use_constant: bool = Fals
         self.use_constant = use_constant
         self.phi_ = 0
         self.theta_ = 0
+        self.c_ = 0
         self._series = []
         self._differenced_series = []
         self.residuals_ = []
         self.fitted_values_ = []
         self.aic_ = 0
         self._model = []
-        self._c = 0
         self._parameters = []
         super().__init__(horizon=1, axis=1)
 
@@ -100,6 +100,7 @@ def _fit(self, y, exog=None):
             (1 if self.use_constant else 0, self.p, self.q), dtype=np.int32
         )
         self._differenced_series = np.diff(self._series, n=self.d)
+
         (self._parameters, self.aic_) = nelder_mead(
             _arima_model_wrapper,
             np.sum(self._model[:3]),
@@ -137,10 +138,14 @@ def _predict(self, y=None, exog=None):
         """
         if y is not None:
             series = y.squeeze()
-        else:
-            series = self._series
-        n = len(series)
-        differenced_series = np.diff(series, n=self.d)
+            # Difference the series using numpy
+            differenced_series = np.diff(self._series, n=self.d)
+            pred = _single_forecast(differenced_series, self.c_, self.phi_, self.theta_)
+            forecast = pred + series[-self.d :].sum() if self.d > 0 else pred
+            return forecast
+
+        n = len(self._series)
+        differenced_series = np.diff(self._series, n=self.d)
         _, _, predicted_values = _arima_model(
             self._parameters,
             _calc_arima,
@@ -242,3 +247,27 @@ def _calc_arima(data, model, t, formatted_params, residuals, expect_full_history
     c = formatted_params[0][0] if model[0] else 0
     y_hat = c + ar_term + ma_term
     return y_hat
+
+
+@njit(cache=True, fastmath=True)
+def _single_forecast(series, c, phi, theta):
+    """Calculate the ARIMA forecast with fixed model.
+
+    This is equivalent to filter in statsmodels.
+    """
+    p = len(phi)
+    q = len(theta)
+    n = len(series)
+    residuals = np.zeros(n)
+    max_lag = max(p, q)
+    # Compute in-sample residuals
+    for t in range(max_lag, n):
+        ar_part = np.dot(phi, series[t - np.arange(1, p + 1)]) if p > 0 else 0.0
+        ma_part = np.dot(theta, residuals[t - np.arange(1, q + 1)]) if q > 0 else 0.0
+        pred = c + ar_part + ma_part
+        residuals[t] = series[t] - pred
+    # Forecast next value using most recent p values and q residuals
+    ar_forecast = np.dot(phi, series[-p:][::-1]) if p > 0 else 0.0
+    ma_forecast = np.dot(theta, residuals[-q:][::-1]) if q > 0 else 0.0
+    f = c + ar_forecast + ma_forecast
+    return f

From 149c0ad972ad2e36f505363f695fd76c001e1b4d Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 21 Jun 2025 20:31:02 +0100
Subject: [PATCH 25/50] find forecast_ in fit

---
 aeon/forecasting/_arima.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 353137569d..094517dff5 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -60,7 +60,7 @@ class ARIMAForecaster(BaseForecaster):
     """
 
     _tags = {
-        "capability:horizon": False,
+        "capability:horizon": False,  # cannot fit to a horizon other than 1
     }
 
     def __init__(self, p: int = 1, d: int = 0, q: int = 1, use_constant: bool = False):
@@ -112,11 +112,19 @@ def _fit(self, y, exog=None):
         )
         (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
             self._parameters,
-            _calc_arima,
+            _calc_arma,
             self._differenced_series,
             self._model,
             np.empty(0),
         )
+        self.forecast_ = _calc_arma(
+            self._differenced_series,
+            self._model,
+            len(y),
+            self._parameters,
+            self.residuals_,
+        )
+
         return self
 
     def _predict(self, y=None, exog=None):
@@ -148,7 +156,7 @@ def _predict(self, y=None, exog=None):
         differenced_series = np.diff(self._series, n=self.d)
         _, _, predicted_values = _arima_model(
             self._parameters,
-            _calc_arima,
+            _calc_arma,
             differenced_series,
             self._model,
             self.residuals_,
@@ -160,6 +168,11 @@ def _predict(self, y=None, exog=None):
             x = np.cumsum(x)
         return x[self.d :][0]
 
+    def _forecast(self, y, exog=None):
+        """Forecast one ahead for time series y."""
+        self.fit(y, exog)
+        return self.forecast_
+
 
 @njit(cache=True, fastmath=True)
 def _aic(residuals, num_params):
@@ -171,7 +184,7 @@ def _aic(residuals, num_params):
 
 @njit(fastmath=True)
 def _arima_model_wrapper(params, data, model):
-    return _arima_model(params, _calc_arima, data, model, np.empty(0))[0]
+    return _arima_model(params, _calc_arma, data, model, np.empty(0))[0]
 
 
 # Define the ARIMA(p, d, q) likelihood function
@@ -225,8 +238,8 @@ def _extract_params(params, model):
 
 
 @njit(cache=True, fastmath=True)
-def _calc_arima(data, model, t, formatted_params, residuals, expect_full_history=False):
-    """Calculate the ARIMA forecast for time t."""
+def _calc_arma(data, model, t, formatted_params, residuals, expect_full_history=False):
+    """Calculate the ARMA forecast for time t."""
     if len(model) != 3:
         raise ValueError("Model must be of the form (c, p, q)")
     p = model[1]

From 9d8b24f243aaa1b38001df90d185d86c9aa21179 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 10 Jul 2025 19:53:06 +0100
Subject: [PATCH 26/50] remove optional y

---
 aeon/forecasting/__init__.py |  4 +++-
 aeon/forecasting/_arima.py   | 18 +-----------------
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/aeon/forecasting/__init__.py b/aeon/forecasting/__init__.py
index 0b134857dd..5b54dd2a19 100644
--- a/aeon/forecasting/__init__.py
+++ b/aeon/forecasting/__init__.py
@@ -1,13 +1,15 @@
 """Forecasters."""
 
 __all__ = [
-    "NaiveForecaster",
     "BaseForecaster",
+    "NaiveForecaster",
     "RegressionForecaster",
     "ETSForecaster",
     "TVPForecaster",
+    "ARIMAForecaster",
 ]
 
+from aeon.forecasting._arima import ARIMAForecaster
 from aeon.forecasting._ets import ETSForecaster
 from aeon.forecasting._naive import NaiveForecaster
 from aeon.forecasting._regression import RegressionForecaster
diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 094517dff5..aed8c91f31 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -127,7 +127,7 @@ def _fit(self, y, exog=None):
 
         return self
 
-    def _predict(self, y=None, exog=None):
+    def _predict(self, y, exog=None):
         """
         Predict the next step ahead for training data or y.
 
@@ -152,22 +152,6 @@ def _predict(self, y=None, exog=None):
             forecast = pred + series[-self.d :].sum() if self.d > 0 else pred
             return forecast
 
-        n = len(self._series)
-        differenced_series = np.diff(self._series, n=self.d)
-        _, _, predicted_values = _arima_model(
-            self._parameters,
-            _calc_arma,
-            differenced_series,
-            self._model,
-            self.residuals_,
-        )
-        # Invert differences
-        init = series[n - self.d : n]
-        x = np.concatenate((init, predicted_values))
-        for _ in range(self.d):
-            x = np.cumsum(x)
-        return x[self.d :][0]
-
     def _forecast(self, y, exog=None):
         """Forecast one ahead for time series y."""
         self.fit(y, exog)

From d9b1e7a6cd9c8405d209d24fd5f05e4712b527d1 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 10 Jul 2025 20:37:45 +0100
Subject: [PATCH 27/50] add iterative

---
 aeon/forecasting/_arima.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index aed8c91f31..f19445c02e 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -144,19 +144,31 @@ def _predict(self, y, exog=None):
         float
             Prediction 1 step ahead of the data seen in fit or passed as y.
         """
-        if y is not None:
-            series = y.squeeze()
-            # Difference the series using numpy
-            differenced_series = np.diff(self._series, n=self.d)
-            pred = _single_forecast(differenced_series, self.c_, self.phi_, self.theta_)
-            forecast = pred + series[-self.d :].sum() if self.d > 0 else pred
-            return forecast
+        series = y.squeeze()
+        # Difference the series using numpy
+        differenced_series = np.diff(self._series, n=self.d)
+        pred = _single_forecast(differenced_series, self.c_, self.phi_, self.theta_)
+        forecast = pred + series[-self.d :].sum() if self.d > 0 else pred
+        # Need to undifference it!
+        return forecast
 
     def _forecast(self, y, exog=None):
         """Forecast one ahead for time series y."""
         self.fit(y, exog)
         return self.forecast_
 
+    def iterative_forecast(self, y, prediction_horizon):
+        self.fit(y)
+        preds = np.zeros(prediction_horizon)
+        preds[0] = self.forecast_
+        differenced_series = np.diff(self._series, n=self.d)
+        for i in range(1, prediction_horizon):
+            differenced_series = np.append(differenced_series, preds[i - 1])
+            preds[i] = _single_forecast(
+                differenced_series, self.c_, self.phi_, self.theta_
+            )
+        return preds
+
 
 @njit(cache=True, fastmath=True)
 def _aic(residuals, num_params):
@@ -248,9 +260,9 @@ def _calc_arma(data, model, t, formatted_params, residuals, expect_full_history=
 
 @njit(cache=True, fastmath=True)
 def _single_forecast(series, c, phi, theta):
-    """Calculate the ARIMA forecast with fixed model.
+    """Calculate the ARMA forecast with fixed model.
 
-    This is equivalent to filter in statsmodels.
+    This is equivalent to filter in statsmodels. Assumes differenced if necessary.
     """
     p = len(phi)
     q = len(theta)

From 2a962d8b36f2b3935ab9f665679f02f6fe445f9f Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 16 Jul 2025 17:55:15 +0100
Subject: [PATCH 28/50] typo

---
 aeon/forecasting/_arima.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index f19445c02e..2187ca6f74 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -174,8 +174,8 @@ def iterative_forecast(self, y, prediction_horizon):
 def _aic(residuals, num_params):
     """Calculate the log-likelihood of a model."""
     variance = np.mean(residuals**2)
-    liklihood = len(residuals) * (np.log(2 * np.pi) + np.log(variance) + 1)
-    return liklihood + 2 * num_params
+    likelihood = len(residuals) * (np.log(2 * np.pi) + np.log(variance) + 1)
+    return likelihood + 2 * num_params
 
 
 @njit(fastmath=True)

From c7616a4725d0b0327d34b98f7104897643c61e6d Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 10:05:19 +0100
Subject: [PATCH 29/50] typo

---
 aeon/forecasting/_arima.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 2187ca6f74..960265e8e5 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -96,20 +96,23 @@ def _fit(self, y, exog=None):
             Fitted ARIMAForecaster.
         """
         self._series = np.array(y.squeeze(), dtype=np.float64)
+        # Model is an array of the (c,p,q)
         self._model = np.array(
             (1 if self.use_constant else 0, self.p, self.q), dtype=np.int32
         )
         self._differenced_series = np.diff(self._series, n=self.d)
-
+        # Nelder Mead returns the parameters in a single array
         (self._parameters, self.aic_) = nelder_mead(
             _arima_model_wrapper,
             np.sum(self._model[:3]),
             self._differenced_series,
             self._model,
         )
+        # Extract the parameter values
         (self.c_, self.phi_, self.theta_) = _extract_params(
             self._parameters, self._model
         )
+        #
         (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
             self._parameters,
             _calc_arma,
@@ -117,13 +120,13 @@ def _fit(self, y, exog=None):
             self._model,
             np.empty(0),
         )
-        self.forecast_ = _calc_arma(
-            self._differenced_series,
-            self._model,
-            len(y),
-            self._parameters,
-            self.residuals_,
-        )
+        # self.forecast_ = _calc_arma(
+        #     self._differenced_series,
+        #     self._model,
+        #     len(y),
+        #     self._parameters,
+        #     self.residuals_,
+        # )
 
         return self
 

From f29c809940512282c994ea0f3cc3bd1d9643984e Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 11:57:57 +0100
Subject: [PATCH 30/50] calculate forecast_

---
 aeon/forecasting/_arima.py              | 44 +++++++++++++------------
 aeon/utils/optimisation/_nelder_mead.py |  4 +--
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 960265e8e5..a24391bd65 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -120,14 +120,23 @@ def _fit(self, y, exog=None):
             self._model,
             np.empty(0),
         )
-        # self.forecast_ = _calc_arma(
-        #     self._differenced_series,
-        #     self._model,
-        #     len(y),
-        #     self._parameters,
-        #     self.residuals_,
-        # )
+        formatted_params = _extract_params(self._parameters, self._model)  # Extract
+        # parameters
 
+        differenced_forecast = _calc_arma(
+            self._differenced_series,
+            self._model,
+            len(self._series),
+            formatted_params,
+            self.residuals_,
+            expect_full_history=True,
+        )
+        if self.d == 0:
+            self.forecast_ = differenced_forecast
+        elif self.d == 1:
+            self.forecast_ = differenced_forecast + self._series[-1]
+        else:
+            self.forecast_ = differenced_forecast + np.sum(self._series[-self.d :])
         return self
 
     def _predict(self, y, exog=None):
@@ -152,7 +161,6 @@ def _predict(self, y, exog=None):
         differenced_series = np.diff(self._series, n=self.d)
         pred = _single_forecast(differenced_series, self.c_, self.phi_, self.theta_)
         forecast = pred + series[-self.d :].sum() if self.d > 0 else pred
-        # Need to undifference it!
         return forecast
 
     def _forecast(self, y, exog=None):
@@ -218,13 +226,7 @@ def _arima_model(params, base_function, data, model, residuals):
 def _extract_params(params, model):
     """Extract ARIMA parameters from the parameter vector."""
     if len(params) != np.sum(model):
-        previous_length = np.sum(model)
         model = model[:-1]  # Remove the seasonal period
-        if len(params) != np.sum(model):
-            raise ValueError(
-                f"Expected {previous_length} parameters for a non-seasonal model or \
-                    {np.sum(model)} parameters for a seasonal model, got {len(params)}"
-            )
     starts = np.cumsum(np.concatenate((np.zeros(1, dtype=np.int32), model[:-1])))
     n = len(starts)
     max_len = np.max(model)
@@ -239,15 +241,15 @@ def _extract_params(params, model):
 @njit(cache=True, fastmath=True)
 def _calc_arma(data, model, t, formatted_params, residuals, expect_full_history=False):
     """Calculate the ARMA forecast for time t."""
-    if len(model) != 3:
-        raise ValueError("Model must be of the form (c, p, q)")
+    # if len(model) != 3:
+    #     raise ValueError("Model must be of the form (c, p, q)")
     p = model[1]
     q = model[2]
-    if expect_full_history and (t - p < 0 or t - q < 0):
-        raise ValueError(
-            f"Insufficient data for ARIMA model at time {t}. "
-            f"Expected at least {p} past values for AR and {q} for MA."
-        )
+    # if expect_full_history and (t - p < 0 or t - q < 0):
+    #     raise ValueError(
+    #         f"Insufficient data for ARIMA model at time {t}. "
+    #         f"Expected at least {p} past values for AR and {q} for MA."
+    #     )
     # AR part
     phi = formatted_params[1][:p]
     ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index e59a70c5dd..47c629aeec 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -4,14 +4,14 @@
 from numba import njit
 
 
-@njit(fastmath=True)
+@njit(cache=True, fastmath=True)
 def nelder_mead(
     loss_function,
     num_params,
     data,
     model,
     tol=1e-6,
-    max_iter=500,
+    max_iter=50,
 ):
     """
     Perform optimisation using the Nelder–Mead simplex algorithm.

From 5a2ee8daa75c5ad0f94346aaf3f7fc19cb7ece4e Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 12:21:37 +0100
Subject: [PATCH 31/50] use differenced

---
 aeon/forecasting/_arima.py              | 2 +-
 aeon/utils/optimisation/_nelder_mead.py | 2 +-
 pyproject.toml                          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index a24391bd65..4b1449a2c8 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -126,7 +126,7 @@ def _fit(self, y, exog=None):
         differenced_forecast = _calc_arma(
             self._differenced_series,
             self._model,
-            len(self._series),
+            len(self._differenced_series),
             formatted_params,
             self.residuals_,
             expect_full_history=True,
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 47c629aeec..263365ef6f 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -11,7 +11,7 @@ def nelder_mead(
     data,
     model,
     tol=1e-6,
-    max_iter=50,
+    max_iter=500,
 ):
     """
     Perform optimisation using the Nelder–Mead simplex algorithm.
diff --git a/pyproject.toml b/pyproject.toml
index b27cc4d00f..8965ccd402 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -169,7 +169,7 @@ testpaths = "aeon"
 doctest_optionflags = [
     "NORMALIZE_WHITESPACE",
     "ELLIPSIS",
-    "FLOAT_CMP",
+#    "FLOAT_CMP",
 ]
 addopts = [
     "--durations=20",

From 42e699c4f0c764354a41bc5b8c6ae3b14aa72f6c Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 12:56:51 +0100
Subject: [PATCH 32/50] example

---
 aeon/forecasting/_arima.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 4b1449a2c8..59ca908648 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -56,7 +56,7 @@ class ARIMAForecaster(BaseForecaster):
     >>> y = load_airline()
     >>> forecaster = ARIMAForecaster(p=2,d=1)
     >>> forecaster.forecast(y)
-    474.49449...
+    np.float64(474.4944911737508)
     """
 
     _tags = {

From d1caed36bc1327141210e78e5d23650b9c040ba3 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 15:48:11 +0100
Subject: [PATCH 33/50] iterative

---
 aeon/forecasting/_arima.py | 117 +++++++++++++++++++++++++++++--------
 1 file changed, 93 insertions(+), 24 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 59ca908648..cdaf56ea38 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -108,10 +108,6 @@ def _fit(self, y, exog=None):
             self._differenced_series,
             self._model,
         )
-        # Extract the parameter values
-        (self.c_, self.phi_, self.theta_) = _extract_params(
-            self._parameters, self._model
-        )
         #
         (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
             self._parameters,
@@ -137,31 +133,72 @@ def _fit(self, y, exog=None):
             self.forecast_ = differenced_forecast + self._series[-1]
         else:
             self.forecast_ = differenced_forecast + np.sum(self._series[-self.d :])
+        # Extract the parameter values
+        self.c_ = formatted_params[0][0]
+        self.phi_ = formatted_params[1][: self.p]
+        self.theta_ = formatted_params[2][: self.q]
+
         return self
 
     def _predict(self, y, exog=None):
         """
-        Predict the next step ahead for training data or y.
+        Predict the next step ahead for y.
 
         Parameters
         ----------
         y : np.ndarray, default = None
-            A time series to predict the next horizon value for. If None,
-            predict the next horizon value after series seen in fit.
+            A time series to predict the value of. y can be independent of the series
+            seen in fit.
         exog : np.ndarray, default =None
             Optional exogenous time series data assumed to be aligned with y
 
         Returns
         -------
         float
-            Prediction 1 step ahead of the data seen in fit or passed as y.
+            Prediction 1 step ahead of the last value in y.
         """
-        series = y.squeeze()
-        # Difference the series using numpy
-        differenced_series = np.diff(self._series, n=self.d)
-        pred = _single_forecast(differenced_series, self.c_, self.phi_, self.theta_)
-        forecast = pred + series[-self.d :].sum() if self.d > 0 else pred
-        return forecast
+        y = y.squeeze()
+        p, q, d = self.p, self.q, self.d
+        phi, theta = self.phi_, self.theta_
+        c = 0.0
+        if self.use_constant:
+            c = self.c_
+
+        # Apply differencing
+        if d > 0:
+            if len(y) <= d:
+                raise ValueError("Series too short for differencing.")
+            y_diff = np.diff(y, n=d)
+        else:
+            y_diff = y
+
+        n = len(y_diff)
+        if n < max(p, q):
+            raise ValueError("Series too short for ARMA(p,q) with given order.")
+
+        # Estimate in-sample residuals using model (fixed parameters)
+        residuals = np.zeros(n)
+        for t in range(max(p, q), n):
+            ar_part = np.dot(phi, y_diff[t - np.arange(1, p + 1)]) if p > 0 else 0.0
+            ma_part = (
+                np.dot(theta, residuals[t - np.arange(1, q + 1)]) if q > 0 else 0.0
+            )
+            pred = c + ar_part + ma_part
+            residuals[t] = y_diff[t] - pred
+
+        # Use most recent p values of y_diff and q values of residuals to forecast t+1
+        ar_forecast = np.dot(phi, y_diff[-np.arange(1, p + 1)]) if p > 0 else 0.0
+        ma_forecast = np.dot(theta, residuals[-np.arange(1, q + 1)]) if q > 0 else 0.0
+
+        forecast_diff = c + ar_forecast + ma_forecast
+
+        # Undifference the forecast
+        if d == 0:
+            return forecast_diff
+        elif d == 1:
+            return forecast_diff + y[-1]
+        else:
+            return forecast_diff + np.sum(y[-d:])
 
     def _forecast(self, y, exog=None):
         """Forecast one ahead for time series y."""
@@ -170,15 +207,47 @@ def _forecast(self, y, exog=None):
 
     def iterative_forecast(self, y, prediction_horizon):
         self.fit(y)
-        preds = np.zeros(prediction_horizon)
-        preds[0] = self.forecast_
-        differenced_series = np.diff(self._series, n=self.d)
-        for i in range(1, prediction_horizon):
-            differenced_series = np.append(differenced_series, preds[i - 1])
-            preds[i] = _single_forecast(
-                differenced_series, self.c_, self.phi_, self.theta_
-            )
-        return preds
+        n = len(self._differenced_series)
+        p, q = self.p, self.q
+        phi, theta = self.phi_, self.theta_
+        h = prediction_horizon
+        c = 0.0
+        if self.use_constant:
+            c = self.c_
+
+        # Start with a copy of the original series and residuals
+        residuals = np.zeros(len(self.residuals_) + h)
+        residuals[: len(self.residuals_)] = self.residuals_
+        forecast_series = np.zeros(n + h)
+        forecast_series[:n] = self._differenced_series
+        for i in range(h):
+            # Get most recent p values (lags)
+            t = n + i
+            ar_term = 0.0
+            if p > 0:
+                ar_term = np.dot(phi, forecast_series[t - np.arange(1, p + 1)])
+            # Get most recent q residuals (lags)
+            ma_term = 0.0
+            if q > 0:
+                ma_term = np.dot(theta, residuals[t - np.arange(1, q + 1)])
+            next_value = c + ar_term + ma_term
+            # Append prediction and a zero residual (placeholder)
+            forecast_series[n + i] = next_value
+            # Can't compute real residual during prediction, leave as zero
+
+        # Correct differencing using forecast values
+        y_forecast_diff = forecast_series[n : n + h]
+        d = self.d
+        if d == 0:
+            return y_forecast_diff
+        else:  # Correct undifferencing
+            # Start with last d values from original y
+            undiff = list(self._series[-d:])
+            for i in range(h):
+                # Take the last d values and sum them
+                reconstructed = y_forecast_diff[i] + sum(undiff[-d:])
+                undiff.append(reconstructed)
+            return np.array(undiff[d:])
 
 
 @njit(cache=True, fastmath=True)
@@ -263,7 +332,7 @@ def _calc_arma(data, model, t, formatted_params, residuals, expect_full_history=
     return y_hat
 
 
-@njit(cache=True, fastmath=True)
+# @njit(cache=True, fastmath=True)
 def _single_forecast(series, c, phi, theta):
     """Calculate the ARMA forecast with fixed model.
 

From 9f2a85da39eb572cd73717de667602da0e660391 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 16:05:47 +0100
Subject: [PATCH 34/50] arima tests

---
 aeon/forecasting/__init__.py         |  4 +-
 aeon/forecasting/_arima.py           | 12 ++---
 aeon/forecasting/tests/test_arima.py | 77 ++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+), 8 deletions(-)
 create mode 100644 aeon/forecasting/tests/test_arima.py

diff --git a/aeon/forecasting/__init__.py b/aeon/forecasting/__init__.py
index 5b54dd2a19..7d7cb7e6c4 100644
--- a/aeon/forecasting/__init__.py
+++ b/aeon/forecasting/__init__.py
@@ -6,10 +6,10 @@
     "RegressionForecaster",
     "ETSForecaster",
     "TVPForecaster",
-    "ARIMAForecaster",
+    "ARIMA",
 ]
 
-from aeon.forecasting._arima import ARIMAForecaster
+from aeon.forecasting._arima import ARIMA
 from aeon.forecasting._ets import ETSForecaster
 from aeon.forecasting._naive import NaiveForecaster
 from aeon.forecasting._regression import RegressionForecaster
diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index cdaf56ea38..0ede9d27b9 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -1,10 +1,10 @@
-"""ARIMAForecaster.
+"""ARIMA.
 
 An implementation of the ARIMA forecasting algorithm.
 """
 
 __maintainer__ = ["alexbanwell1", "TonyBagnall"]
-__all__ = ["ARIMAForecaster"]
+__all__ = ["ARIMA"]
 
 import numpy as np
 from numba import njit
@@ -13,7 +13,7 @@
 from aeon.utils.optimisation._nelder_mead import nelder_mead
 
 
-class ARIMAForecaster(BaseForecaster):
+class ARIMA(BaseForecaster):
     """AutoRegressive Integrated Moving Average (ARIMA) forecaster.
 
     ARIMA with fixed model structure and fitted parameters found with an
@@ -51,10 +51,10 @@ class ARIMAForecaster(BaseForecaster):
 
     Examples
     --------
-    >>> from aeon.forecasting import ARIMAForecaster
+    >>> from aeon.forecasting import ARIMA
     >>> from aeon.datasets import load_airline
     >>> y = load_airline()
-    >>> forecaster = ARIMAForecaster(p=2,d=1)
+    >>> forecaster = ARIMA(p=2,d=1)
     >>> forecaster.forecast(y)
     np.float64(474.4944911737508)
     """
@@ -93,7 +93,7 @@ def _fit(self, y, exog=None):
         Returns
         -------
         self
-            Fitted ARIMAForecaster.
+            Fitted ARIMA.
         """
         self._series = np.array(y.squeeze(), dtype=np.float64)
         # Model is an array of the (c,p,q)
diff --git a/aeon/forecasting/tests/test_arima.py b/aeon/forecasting/tests/test_arima.py
new file mode 100644
index 0000000000..f0093aba93
--- /dev/null
+++ b/aeon/forecasting/tests/test_arima.py
@@ -0,0 +1,77 @@
+"""Test the ARIMA forecaster."""
+
+import numpy as np
+import pytest
+
+from aeon.forecasting import ARIMA as ARIMAForecaster
+
+y = np.array(
+    [112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118], dtype=np.float64
+)
+
+
+def test_arima_zero_orders():
+    """Test ARIMA(0,0,0) which should return the mean if constant is used."""
+    model = ARIMAForecaster(p=0, d=0, q=0, use_constant=True)
+    model.fit(y)
+    forecast = model.predict(y)
+    assert np.isfinite(forecast)
+    assert abs(forecast - np.mean(y)) < 10
+
+
+@pytest.mark.parametrize(
+    "p, d, q, use_constant",
+    [
+        (1, 0, 1, True),  # basic ARIMA
+        (2, 1, 1, False),  # no constant
+        (1, 2, 1, True),  # higher-order differencing
+    ],
+)
+def test_arima_fit_and_predict_variants(p, d, q, use_constant):
+    """Test ARIMA fit and predict for various (p,d,q) and use_constant settings."""
+    model = ARIMAForecaster(p=p, d=d, q=q, use_constant=use_constant)
+    model.fit(y)
+    forecast = model.forecast_
+    assert isinstance(forecast, float)
+    assert np.isfinite(forecast)
+
+
+def test_arima_iterative_forecast():
+    """Test multi-step forecasting using iterative_forecast method."""
+    model = ARIMAForecaster(p=1, d=1, q=1)
+    horizon = 3
+    preds = model.iterative_forecast(y, prediction_horizon=horizon)
+    assert preds.shape == (horizon,)
+    assert np.all(np.isfinite(preds))
+
+
+def test_predict_too_short_for_d():
+    """Test error is raised when input series is too short for differencing."""
+    model = ARIMAForecaster(p=1, d=2, q=1)
+    model.fit(y)
+    with pytest.raises(ValueError, match="Series too short for differencing"):
+        model._predict(np.array([1.0, 2.0]))
+
+
+def test_predict_too_short_for_pq():
+    """Test error is raised when input series is too short for AR/MA terms."""
+    model = ARIMAForecaster(p=5, d=0, q=0)
+    model.fit(y)
+    with pytest.raises(ValueError, match="Series too short for ARMA"):
+        model._predict(np.array([1.0, 2.0, 3.0]))
+
+
+def test_forecast_attribute_set():
+    """Test that calling _forecast sets the internal forecast_ attribute."""
+    model = ARIMAForecaster(p=1, d=1, q=1)
+    forecast = model._forecast(y)
+    assert hasattr(model, "forecast_")
+    assert np.isclose(forecast, model.forecast_)
+
+
+def test_iterative_forecast_with_d2():
+    """Test iterative forecast output shape and validity with d=2."""
+    model = ARIMAForecaster(p=1, d=2, q=1)
+    preds = model.iterative_forecast(y, prediction_horizon=5)
+    assert preds.shape == (5,)
+    assert np.all(np.isfinite(preds))

From ca30d178354c123010fd04311c8ca6a10aee2687 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 16:09:22 +0100
Subject: [PATCH 35/50] revert to float

---
 aeon/forecasting/_arima.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 0ede9d27b9..1ac062994e 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -56,7 +56,7 @@ class ARIMA(BaseForecaster):
     >>> y = load_airline()
     >>> forecaster = ARIMA(p=2,d=1)
     >>> forecaster.forecast(y)
-    np.float64(474.4944911737508)
+    474.4944911737508
     """
 
     _tags = {
@@ -203,7 +203,7 @@ def _predict(self, y, exog=None):
     def _forecast(self, y, exog=None):
         """Forecast one ahead for time series y."""
         self.fit(y, exog)
-        return self.forecast_
+        return float(self.forecast_)
 
     def iterative_forecast(self, y, prediction_horizon):
         self.fit(y)

From 46d5ebc64e1325c74c40c33d0fff1e04afe54e4b Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 16:19:30 +0100
Subject: [PATCH 36/50] switch nelder_mead version

---
 aeon/forecasting/_arima.py              |  52 ++---------
 aeon/forecasting/_nelder_mead.py        | 119 ++++++++++++++++++++++++
 aeon/forecasting/tests/test_arima.py    |  16 ++--
 aeon/utils/optimisation/_nelder_mead.py |   2 +-
 4 files changed, 135 insertions(+), 54 deletions(-)
 create mode 100644 aeon/forecasting/_nelder_mead.py

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 1ac062994e..16129cfedd 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -9,8 +9,8 @@
 import numpy as np
 from numba import njit
 
+from aeon.forecasting._nelder_mead import nelder_mead
 from aeon.forecasting.base import BaseForecaster
-from aeon.utils.optimisation._nelder_mead import nelder_mead
 
 
 class ARIMA(BaseForecaster):
@@ -48,15 +48,6 @@ class ARIMA(BaseForecaster):
     .. [1] R. J. Hyndman and G. Athanasopoulos,
        Forecasting: Principles and Practice. OTexts, 2014.
        https://otexts.com/fpp3/
-
-    Examples
-    --------
-    >>> from aeon.forecasting import ARIMA
-    >>> from aeon.datasets import load_airline
-    >>> y = load_airline()
-    >>> forecaster = ARIMA(p=2,d=1)
-    >>> forecaster.forecast(y)
-    474.4944911737508
     """
 
     _tags = {
@@ -111,7 +102,7 @@ def _fit(self, y, exog=None):
         #
         (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
             self._parameters,
-            _calc_arma,
+            _in_sample_forecast,
             self._differenced_series,
             self._model,
             np.empty(0),
@@ -119,7 +110,7 @@ def _fit(self, y, exog=None):
         formatted_params = _extract_params(self._parameters, self._model)  # Extract
         # parameters
 
-        differenced_forecast = _calc_arma(
+        differenced_forecast = _in_sample_forecast(
             self._differenced_series,
             self._model,
             len(self._differenced_series),
@@ -260,7 +251,7 @@ def _aic(residuals, num_params):
 
 @njit(fastmath=True)
 def _arima_model_wrapper(params, data, model):
-    return _arima_model(params, _calc_arma, data, model, np.empty(0))[0]
+    return _arima_model(params, _in_sample_forecast, data, model, np.empty(0))[0]
 
 
 # Define the ARIMA(p, d, q) likelihood function
@@ -308,17 +299,12 @@ def _extract_params(params, model):
 
 
 @njit(cache=True, fastmath=True)
-def _calc_arma(data, model, t, formatted_params, residuals, expect_full_history=False):
+def _in_sample_forecast(
+    data, model, t, formatted_params, residuals, expect_full_history=False
+):
     """Calculate the ARMA forecast for time t."""
-    # if len(model) != 3:
-    #     raise ValueError("Model must be of the form (c, p, q)")
     p = model[1]
     q = model[2]
-    # if expect_full_history and (t - p < 0 or t - q < 0):
-    #     raise ValueError(
-    #         f"Insufficient data for ARIMA model at time {t}. "
-    #         f"Expected at least {p} past values for AR and {q} for MA."
-    #     )
     # AR part
     phi = formatted_params[1][:p]
     ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
@@ -330,27 +316,3 @@ def _calc_arma(data, model, t, formatted_params, residuals, expect_full_history=
     c = formatted_params[0][0] if model[0] else 0
     y_hat = c + ar_term + ma_term
     return y_hat
-
-
-# @njit(cache=True, fastmath=True)
-def _single_forecast(series, c, phi, theta):
-    """Calculate the ARMA forecast with fixed model.
-
-    This is equivalent to filter in statsmodels. Assumes differenced if necessary.
-    """
-    p = len(phi)
-    q = len(theta)
-    n = len(series)
-    residuals = np.zeros(n)
-    max_lag = max(p, q)
-    # Compute in-sample residuals
-    for t in range(max_lag, n):
-        ar_part = np.dot(phi, series[t - np.arange(1, p + 1)]) if p > 0 else 0.0
-        ma_part = np.dot(theta, residuals[t - np.arange(1, q + 1)]) if q > 0 else 0.0
-        pred = c + ar_part + ma_part
-        residuals[t] = series[t] - pred
-    # Forecast next value using most recent p values and q residuals
-    ar_forecast = np.dot(phi, series[-p:][::-1]) if p > 0 else 0.0
-    ma_forecast = np.dot(theta, residuals[-q:][::-1]) if q > 0 else 0.0
-    f = c + ar_forecast + ma_forecast
-    return f
diff --git a/aeon/forecasting/_nelder_mead.py b/aeon/forecasting/_nelder_mead.py
new file mode 100644
index 0000000000..263365ef6f
--- /dev/null
+++ b/aeon/forecasting/_nelder_mead.py
@@ -0,0 +1,119 @@
+"""Optimisation algorithms for automatic parameter tuning."""
+
+import numpy as np
+from numba import njit
+
+
+@njit(cache=True, fastmath=True)
+def nelder_mead(
+    loss_function,
+    num_params,
+    data,
+    model,
+    tol=1e-6,
+    max_iter=500,
+):
+    """
+    Perform optimisation using the Nelder–Mead simplex algorithm.
+
+    This function minimises a given loss (objective) function using the Nelder–Mead
+    algorithm, a derivative-free method that iteratively refines a simplex of candidate
+    solutions. The implementation supports unconstrained minimisation of functions
+    with a fixed number of parameters.
+
+    Parameters
+    ----------
+    loss_function : callable
+        The objective function to minimise. Should accept a 1D NumPy array of length
+        `num_params` and return a scalar value.
+    num_params : int
+        The number of parameters (dimensions) in the optimisation problem.
+    data : np.ndarray
+        The input data used by the loss function. The shape and content depend on the
+        specific loss function being minimised.
+    model : np.ndarray
+        The model or context in which the loss function operates. This could be any
+        other object that the `loss_function` requires to compute its value.
+        The exact type and structure of `model` should be compatible with the
+        `loss_function`.
+    tol : float, optional (default=1e-6)
+        Tolerance for convergence. The algorithm stops when the maximum difference
+        between function values at simplex vertices is less than `tol`.
+    max_iter : int, optional (default=500)
+        Maximum number of iterations to perform.
+
+    Returns
+    -------
+    best_params : np.ndarray, shape (`num_params`,)
+        The parameter vector that minimises the loss function.
+    best_value : float
+        The value of the loss function at the optimal parameter vector.
+
+    Notes
+    -----
+    - The initial simplex is constructed by setting each parameter to 0.5,
+    with one additional point per dimension at 0.6 for that dimension.
+    - This implementation does not support constraints or bounds on the parameters.
+    - The algorithm does not guarantee finding a global minimum.
+
+    References
+    ----------
+    .. [1] Nelder, J. A. and Mead, R. (1965).
+       A Simplex Method for Function Minimization.
+       The Computer Journal, 7(4), 308–313.
+       https://doi.org/10.1093/comjnl/7.4.308
+    """
+    points = np.full((num_params + 1, num_params), 0.5)
+    for i in range(num_params):
+        points[i + 1][i] = 0.6
+    values = np.array([loss_function(v, data, model) for v in points])
+    for _iteration in range(max_iter):
+        # Order simplex by function values
+        order = np.argsort(values)
+        points = points[order]
+        values = values[order]
+
+        # Centroid of the best n points
+        centre_point = points[:-1].sum(axis=0) / len(points[:-1])
+
+        # Reflection
+        # centre + distance between centre and largest value
+        reflected_point = centre_point + (centre_point - points[-1])
+        reflected_value = loss_function(reflected_point, data, model)
+        # if between best and second best, use reflected value
+        if len(values) > 1 and values[0] <= reflected_value < values[-2]:
+            points[-1] = reflected_point
+            values[-1] = reflected_value
+            continue
+        # Expansion
+        # Otherwise if it is better than the best value
+        if reflected_value < values[0]:
+            expanded_point = centre_point + 2 * (reflected_point - centre_point)
+            expanded_value = loss_function(expanded_point, data, model)
+            # if less than reflected value use expanded, otherwise go back to reflected
+            if expanded_value < reflected_value:
+                points[-1] = expanded_point
+                values[-1] = expanded_value
+            else:
+                points[-1] = reflected_point
+                values[-1] = reflected_value
+            continue
+        # Contraction
+        # Otherwise if reflection is worse than all current values
+        contracted_point = centre_point - 0.5 * (centre_point - points[-1])
+        contracted_value = loss_function(contracted_point, data, model)
+        # If contraction is better use that otherwise move to shrinkage
+        if contracted_value < values[-1]:
+            points[-1] = contracted_point
+            values[-1] = contracted_value
+            continue
+
+        # Shrinkage
+        for i in range(1, len(points)):
+            points[i] = points[0] - 0.5 * (points[0] - points[i])
+            values[i] = loss_function(points[i], data, model)
+
+        # Convergence check
+        if np.max(np.abs(values - values[0])) < tol:
+            break
+    return points[0], values[0]
diff --git a/aeon/forecasting/tests/test_arima.py b/aeon/forecasting/tests/test_arima.py
index f0093aba93..ea78088021 100644
--- a/aeon/forecasting/tests/test_arima.py
+++ b/aeon/forecasting/tests/test_arima.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from aeon.forecasting import ARIMA as ARIMAForecaster
+from aeon.forecasting import ARIMA
 
 y = np.array(
     [112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118], dtype=np.float64
@@ -12,7 +12,7 @@
 
 def test_arima_zero_orders():
     """Test ARIMA(0,0,0) which should return the mean if constant is used."""
-    model = ARIMAForecaster(p=0, d=0, q=0, use_constant=True)
+    model = ARIMA(p=0, d=0, q=0, use_constant=True)
     model.fit(y)
     forecast = model.predict(y)
     assert np.isfinite(forecast)
@@ -29,7 +29,7 @@ def test_arima_zero_orders():
 )
 def test_arima_fit_and_predict_variants(p, d, q, use_constant):
     """Test ARIMA fit and predict for various (p,d,q) and use_constant settings."""
-    model = ARIMAForecaster(p=p, d=d, q=q, use_constant=use_constant)
+    model = ARIMA(p=p, d=d, q=q, use_constant=use_constant)
     model.fit(y)
     forecast = model.forecast_
     assert isinstance(forecast, float)
@@ -38,7 +38,7 @@ def test_arima_fit_and_predict_variants(p, d, q, use_constant):
 
 def test_arima_iterative_forecast():
     """Test multi-step forecasting using iterative_forecast method."""
-    model = ARIMAForecaster(p=1, d=1, q=1)
+    model = ARIMA(p=1, d=1, q=1)
     horizon = 3
     preds = model.iterative_forecast(y, prediction_horizon=horizon)
     assert preds.shape == (horizon,)
@@ -47,7 +47,7 @@ def test_arima_iterative_forecast():
 
 def test_predict_too_short_for_d():
     """Test error is raised when input series is too short for differencing."""
-    model = ARIMAForecaster(p=1, d=2, q=1)
+    model = ARIMA(p=1, d=2, q=1)
     model.fit(y)
     with pytest.raises(ValueError, match="Series too short for differencing"):
         model._predict(np.array([1.0, 2.0]))
@@ -55,7 +55,7 @@ def test_predict_too_short_for_d():
 
 def test_predict_too_short_for_pq():
     """Test error is raised when input series is too short for AR/MA terms."""
-    model = ARIMAForecaster(p=5, d=0, q=0)
+    model = ARIMA(p=5, d=0, q=0)
     model.fit(y)
     with pytest.raises(ValueError, match="Series too short for ARMA"):
         model._predict(np.array([1.0, 2.0, 3.0]))
@@ -63,7 +63,7 @@ def test_predict_too_short_for_pq():
 
 def test_forecast_attribute_set():
     """Test that calling _forecast sets the internal forecast_ attribute."""
-    model = ARIMAForecaster(p=1, d=1, q=1)
+    model = ARIMA(p=1, d=1, q=1)
     forecast = model._forecast(y)
     assert hasattr(model, "forecast_")
     assert np.isclose(forecast, model.forecast_)
@@ -71,7 +71,7 @@ def test_forecast_attribute_set():
 
 def test_iterative_forecast_with_d2():
     """Test iterative forecast output shape and validity with d=2."""
-    model = ARIMAForecaster(p=1, d=2, q=1)
+    model = ARIMA(p=1, d=2, q=1)
     preds = model.iterative_forecast(y, prediction_horizon=5)
     assert preds.shape == (5,)
     assert np.all(np.isfinite(preds))
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
index 263365ef6f..e59a70c5dd 100644
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ b/aeon/utils/optimisation/_nelder_mead.py
@@ -4,7 +4,7 @@
 from numba import njit
 
 
-@njit(cache=True, fastmath=True)
+@njit(fastmath=True)
 def nelder_mead(
     loss_function,
     num_params,

From 9648b996b597e2f2f7e318527e482d692a91992f Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 17:07:01 +0100
Subject: [PATCH 37/50] isolate loss function

---
 aeon/forecasting/_arima.py          | 11 +----
 aeon/forecasting/_loss_functions.py | 67 +++++++++++++++++++++++++++++
 aeon/forecasting/_nelder_mead.py    |  7 ++-
 3 files changed, 72 insertions(+), 13 deletions(-)
 create mode 100644 aeon/forecasting/_loss_functions.py

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 16129cfedd..9336d3e526 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -94,7 +94,6 @@ def _fit(self, y, exog=None):
         self._differenced_series = np.diff(self._series, n=self.d)
         # Nelder Mead returns the parameters in a single array
         (self._parameters, self.aic_) = nelder_mead(
-            _arima_model_wrapper,
             np.sum(self._model[:3]),
             self._differenced_series,
             self._model,
@@ -102,7 +101,6 @@ def _fit(self, y, exog=None):
         #
         (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
             self._parameters,
-            _in_sample_forecast,
             self._differenced_series,
             self._model,
             np.empty(0),
@@ -249,14 +247,9 @@ def _aic(residuals, num_params):
     return likelihood + 2 * num_params
 
 
-@njit(fastmath=True)
-def _arima_model_wrapper(params, data, model):
-    return _arima_model(params, _in_sample_forecast, data, model, np.empty(0))[0]
-
-
 # Define the ARIMA(p, d, q) likelihood function
 @njit(cache=True, fastmath=True)
-def _arima_model(params, base_function, data, model, residuals):
+def _arima_model(params, data, model, residuals):
     """Calculate the log-likelihood of an ARIMA model given the parameters."""
     formatted_params = _extract_params(params, model)  # Extract parameters
 
@@ -268,7 +261,7 @@ def _arima_model(params, base_function, data, model, residuals):
     expect_full_history = m > 0  # I.e. we've been provided with some residuals
     fitted_values = np.zeros(num_predictions)
     for t in range(num_predictions):
-        fitted_values[t] = base_function(
+        fitted_values[t] = _in_sample_forecast(
             data,
             model,
             m + t,
diff --git a/aeon/forecasting/_loss_functions.py b/aeon/forecasting/_loss_functions.py
new file mode 100644
index 0000000000..e8d1e21b00
--- /dev/null
+++ b/aeon/forecasting/_loss_functions.py
@@ -0,0 +1,67 @@
+"""Loss functions for optimiser."""
+
+import numpy as np
+from numba import njit
+
+
+@njit(cache=True, fastmath=True)
+def _arima_fit(params, data, model):
+    """Calculate the AIC of an ARIMA model given the parameters."""
+    formatted_params = _extract_params(params, model)  # Extract parameters
+
+    # Initialize residuals
+    n = len(data)
+    residuals = np.zeros(n)
+    fitted_values = np.zeros(n)
+    for t in range(n):
+        fitted_values[t] = _in_sample_forecast(
+            data,
+            model,
+            t,
+            formatted_params,
+            residuals,
+        )
+        residuals[t] = data[t] - fitted_values[t]
+    return _aic(residuals, len(params))
+
+
+@njit(cache=True, fastmath=True)
+def _in_sample_forecast(data, model, t, formatted_params, residuals):
+    """Calculate the ARMA forecast for time t."""
+    p = model[1]
+    q = model[2]
+    # AR part
+    phi = formatted_params[1][:p]
+    ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
+
+    # MA part
+    theta = formatted_params[2][:q]
+    ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
+
+    c = formatted_params[0][0] if model[0] else 0
+    y_hat = c + ar_term + ma_term
+    return y_hat
+
+
+@njit(cache=True, fastmath=True)
+def _extract_params(params, model):
+    """Extract ARIMA parameters from the parameter vector."""
+    if len(params) != np.sum(model):
+        model = model[:-1]  # Remove the seasonal period
+    starts = np.cumsum(np.concatenate((np.zeros(1, dtype=np.int32), model[:-1])))
+    n = len(starts)
+    max_len = np.max(model)
+    result = np.full((n, max_len), np.nan, dtype=params.dtype)
+    for i in range(n):
+        length = model[i]
+        start = starts[i]
+        result[i, :length] = params[start : start + length]
+    return result
+
+
+@njit(cache=True, fastmath=True)
+def _aic(residuals, num_params):
+    """Calculate the log-likelihood of a model."""
+    variance = np.mean(residuals**2)
+    likelihood = len(residuals) * (np.log(2 * np.pi) + np.log(variance) + 1)
+    return likelihood + 2 * num_params
diff --git a/aeon/forecasting/_nelder_mead.py b/aeon/forecasting/_nelder_mead.py
index 263365ef6f..a5688989bb 100644
--- a/aeon/forecasting/_nelder_mead.py
+++ b/aeon/forecasting/_nelder_mead.py
@@ -3,10 +3,11 @@
 import numpy as np
 from numba import njit
 
+from aeon.forecasting._loss_functions import _arima_fit
+
 
 @njit(cache=True, fastmath=True)
 def nelder_mead(
-    loss_function,
     num_params,
     data,
     model,
@@ -23,9 +24,6 @@ def nelder_mead(
 
     Parameters
     ----------
-    loss_function : callable
-        The objective function to minimise. Should accept a 1D NumPy array of length
-        `num_params` and return a scalar value.
     num_params : int
         The number of parameters (dimensions) in the optimisation problem.
     data : np.ndarray
@@ -63,6 +61,7 @@ def nelder_mead(
        The Computer Journal, 7(4), 308–313.
        https://doi.org/10.1093/comjnl/7.4.308
     """
+    loss_function = _arima_fit
     points = np.full((num_params + 1, num_params), 0.5)
     for i in range(num_params):
         points[i + 1][i] = 0.6

From e8157d634b9101cd0e12aefc886cfc750d5d639a Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 18:03:03 +0100
Subject: [PATCH 38/50] isolate loss function

---
 aeon/forecasting/_arima.py          |  1 +
 aeon/forecasting/_loss_functions.py | 29 +++++++++++++++++++----------
 aeon/forecasting/_nelder_mead.py    | 26 +++++++++++++++++++-------
 3 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 9336d3e526..42335fe7d4 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -94,6 +94,7 @@ def _fit(self, y, exog=None):
         self._differenced_series = np.diff(self._series, n=self.d)
         # Nelder Mead returns the parameters in a single array
         (self._parameters, self.aic_) = nelder_mead(
+            0,
             np.sum(self._model[:3]),
             self._differenced_series,
             self._model,
diff --git a/aeon/forecasting/_loss_functions.py b/aeon/forecasting/_loss_functions.py
index e8d1e21b00..94edd410ce 100644
--- a/aeon/forecasting/_loss_functions.py
+++ b/aeon/forecasting/_loss_functions.py
@@ -13,20 +13,31 @@ def _arima_fit(params, data, model):
     n = len(data)
     residuals = np.zeros(n)
     fitted_values = np.zeros(n)
+    c = formatted_params[0][0] if model[0] else 0
+    p = model[1]
+    q = model[2]
+    # AR part
+    phi = formatted_params[1][:p]
+    theta = formatted_params[2][:q]
     for t in range(n):
-        fitted_values[t] = _in_sample_forecast(
-            data,
-            model,
-            t,
-            formatted_params,
-            residuals,
-        )
+        ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
+        ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
+        fitted_values[t] = c + ar_term + ma_term
         residuals[t] = data[t] - fitted_values[t]
     return _aic(residuals, len(params))
 
 
 @njit(cache=True, fastmath=True)
-def _in_sample_forecast(data, model, t, formatted_params, residuals):
+def _in_sample_forecast(
+    data,
+    model,
+    t,
+    formatted_params,
+    residuals,
+    p,
+    q,
+    phi,
+):
     """Calculate the ARMA forecast for time t."""
     p = model[1]
     q = model[2]
@@ -46,8 +57,6 @@ def _in_sample_forecast(data, model, t, formatted_params, residuals):
 @njit(cache=True, fastmath=True)
 def _extract_params(params, model):
     """Extract ARIMA parameters from the parameter vector."""
-    if len(params) != np.sum(model):
-        model = model[:-1]  # Remove the seasonal period
     starts = np.cumsum(np.concatenate((np.zeros(1, dtype=np.int32), model[:-1])))
     n = len(starts)
     max_len = np.max(model)
diff --git a/aeon/forecasting/_nelder_mead.py b/aeon/forecasting/_nelder_mead.py
index a5688989bb..f617aa5727 100644
--- a/aeon/forecasting/_nelder_mead.py
+++ b/aeon/forecasting/_nelder_mead.py
@@ -6,8 +6,17 @@
 from aeon.forecasting._loss_functions import _arima_fit
 
 
+@njit(cache=True, fastmath=True)
+def dispatch_loss(fn_id, params, data, model):
+    if fn_id == 0:
+        return _arima_fit(params, data, model)
+    else:
+        raise ValueError("Unknown loss function ID")
+
+
 @njit(cache=True, fastmath=True)
 def nelder_mead(
+    loss_id,
     num_params,
     data,
     model,
@@ -24,6 +33,8 @@ def nelder_mead(
 
     Parameters
     ----------
+    loss_id : int
+        ID for loss function to optimise, used by dispatch_loss.
     num_params : int
         The number of parameters (dimensions) in the optimisation problem.
     data : np.ndarray
@@ -61,12 +72,13 @@ def nelder_mead(
        The Computer Journal, 7(4), 308–313.
        https://doi.org/10.1093/comjnl/7.4.308
     """
-    loss_function = _arima_fit
     points = np.full((num_params + 1, num_params), 0.5)
     for i in range(num_params):
         points[i + 1][i] = 0.6
-    values = np.array([loss_function(v, data, model) for v in points])
-    for _iteration in range(max_iter):
+    values = np.empty(len(points), dtype=np.float64)
+    for i in range(len(points)):
+        values[i] = dispatch_loss(loss_id, points[i].copy(), data, model)
+    for i in range(max_iter):
         # Order simplex by function values
         order = np.argsort(values)
         points = points[order]
@@ -78,7 +90,7 @@ def nelder_mead(
         # Reflection
         # centre + distance between centre and largest value
         reflected_point = centre_point + (centre_point - points[-1])
-        reflected_value = loss_function(reflected_point, data, model)
+        reflected_value = dispatch_loss(0, reflected_point, data, model)
         # if between best and second best, use reflected value
         if len(values) > 1 and values[0] <= reflected_value < values[-2]:
             points[-1] = reflected_point
@@ -88,7 +100,7 @@ def nelder_mead(
         # Otherwise if it is better than the best value
         if reflected_value < values[0]:
             expanded_point = centre_point + 2 * (reflected_point - centre_point)
-            expanded_value = loss_function(expanded_point, data, model)
+            expanded_value = dispatch_loss(0, expanded_point, data, model)
             # if less than reflected value use expanded, otherwise go back to reflected
             if expanded_value < reflected_value:
                 points[-1] = expanded_point
@@ -100,7 +112,7 @@ def nelder_mead(
         # Contraction
         # Otherwise if reflection is worse than all current values
         contracted_point = centre_point - 0.5 * (centre_point - points[-1])
-        contracted_value = loss_function(contracted_point, data, model)
+        contracted_value = dispatch_loss(0, contracted_point, data, model)
         # If contraction is better use that otherwise move to shrinkage
         if contracted_value < values[-1]:
             points[-1] = contracted_point
@@ -110,7 +122,7 @@ def nelder_mead(
         # Shrinkage
         for i in range(1, len(points)):
             points[i] = points[0] - 0.5 * (points[0] - points[i])
-            values[i] = loss_function(points[i], data, model)
+            values[i] = dispatch_loss(0, points[i], data, model)
 
         # Convergence check
         if np.max(np.abs(values - values[0])) < tol:

From f45400c4bb7f0d7a7f52e6f8a666e9a42e386f0d Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 17 Jul 2025 18:19:30 +0100
Subject: [PATCH 39/50] remove the utils version of nelder mead

---
 aeon/utils/optimisation/__init__.py     |   1 -
 aeon/utils/optimisation/_nelder_mead.py | 119 ------------------------
 2 files changed, 120 deletions(-)
 delete mode 100644 aeon/utils/optimisation/__init__.py
 delete mode 100644 aeon/utils/optimisation/_nelder_mead.py

diff --git a/aeon/utils/optimisation/__init__.py b/aeon/utils/optimisation/__init__.py
deleted file mode 100644
index 11eddea791..0000000000
--- a/aeon/utils/optimisation/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Optimisation utils."""
diff --git a/aeon/utils/optimisation/_nelder_mead.py b/aeon/utils/optimisation/_nelder_mead.py
deleted file mode 100644
index e59a70c5dd..0000000000
--- a/aeon/utils/optimisation/_nelder_mead.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""Optimisation algorithms for automatic parameter tuning."""
-
-import numpy as np
-from numba import njit
-
-
-@njit(fastmath=True)
-def nelder_mead(
-    loss_function,
-    num_params,
-    data,
-    model,
-    tol=1e-6,
-    max_iter=500,
-):
-    """
-    Perform optimisation using the Nelder–Mead simplex algorithm.
-
-    This function minimises a given loss (objective) function using the Nelder–Mead
-    algorithm, a derivative-free method that iteratively refines a simplex of candidate
-    solutions. The implementation supports unconstrained minimisation of functions
-    with a fixed number of parameters.
-
-    Parameters
-    ----------
-    loss_function : callable
-        The objective function to minimise. Should accept a 1D NumPy array of length
-        `num_params` and return a scalar value.
-    num_params : int
-        The number of parameters (dimensions) in the optimisation problem.
-    data : np.ndarray
-        The input data used by the loss function. The shape and content depend on the
-        specific loss function being minimised.
-    model : np.ndarray
-        The model or context in which the loss function operates. This could be any
-        other object that the `loss_function` requires to compute its value.
-        The exact type and structure of `model` should be compatible with the
-        `loss_function`.
-    tol : float, optional (default=1e-6)
-        Tolerance for convergence. The algorithm stops when the maximum difference
-        between function values at simplex vertices is less than `tol`.
-    max_iter : int, optional (default=500)
-        Maximum number of iterations to perform.
-
-    Returns
-    -------
-    best_params : np.ndarray, shape (`num_params`,)
-        The parameter vector that minimises the loss function.
-    best_value : float
-        The value of the loss function at the optimal parameter vector.
-
-    Notes
-    -----
-    - The initial simplex is constructed by setting each parameter to 0.5,
-    with one additional point per dimension at 0.6 for that dimension.
-    - This implementation does not support constraints or bounds on the parameters.
-    - The algorithm does not guarantee finding a global minimum.
-
-    References
-    ----------
-    .. [1] Nelder, J. A. and Mead, R. (1965).
-       A Simplex Method for Function Minimization.
-       The Computer Journal, 7(4), 308–313.
-       https://doi.org/10.1093/comjnl/7.4.308
-    """
-    points = np.full((num_params + 1, num_params), 0.5)
-    for i in range(num_params):
-        points[i + 1][i] = 0.6
-    values = np.array([loss_function(v, data, model) for v in points])
-    for _iteration in range(max_iter):
-        # Order simplex by function values
-        order = np.argsort(values)
-        points = points[order]
-        values = values[order]
-
-        # Centroid of the best n points
-        centre_point = points[:-1].sum(axis=0) / len(points[:-1])
-
-        # Reflection
-        # centre + distance between centre and largest value
-        reflected_point = centre_point + (centre_point - points[-1])
-        reflected_value = loss_function(reflected_point, data, model)
-        # if between best and second best, use reflected value
-        if len(values) > 1 and values[0] <= reflected_value < values[-2]:
-            points[-1] = reflected_point
-            values[-1] = reflected_value
-            continue
-        # Expansion
-        # Otherwise if it is better than the best value
-        if reflected_value < values[0]:
-            expanded_point = centre_point + 2 * (reflected_point - centre_point)
-            expanded_value = loss_function(expanded_point, data, model)
-            # if less than reflected value use expanded, otherwise go back to reflected
-            if expanded_value < reflected_value:
-                points[-1] = expanded_point
-                values[-1] = expanded_value
-            else:
-                points[-1] = reflected_point
-                values[-1] = reflected_value
-            continue
-        # Contraction
-        # Otherwise if reflection is worse than all current values
-        contracted_point = centre_point - 0.5 * (centre_point - points[-1])
-        contracted_value = loss_function(contracted_point, data, model)
-        # If contraction is better use that otherwise move to shrinkage
-        if contracted_value < values[-1]:
-            points[-1] = contracted_point
-            values[-1] = contracted_value
-            continue
-
-        # Shrinkage
-        for i in range(1, len(points)):
-            points[i] = points[0] - 0.5 * (points[0] - points[i])
-            values[i] = loss_function(points[i], data, model)
-
-        # Convergence check
-        if np.max(np.abs(values - values[0])) < tol:
-            break
-    return points[0], values[0]

From 150998907d852c8265d10e529166bec433999344 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 19 Jul 2025 09:45:55 +0100
Subject: [PATCH 40/50] set self.c_ correctly

---
 aeon/forecasting/_arima.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 42335fe7d4..cdd0ab30d2 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -124,7 +124,8 @@ def _fit(self, y, exog=None):
         else:
             self.forecast_ = differenced_forecast + np.sum(self._series[-self.d :])
         # Extract the parameter values
-        self.c_ = formatted_params[0][0]
+        if self.use_constant:
+            self.c_ = formatted_params[0][0]
         self.phi_ = formatted_params[1][: self.p]
         self.theta_ = formatted_params[2][: self.q]
 

From a2578d9a8685e2b2713582f0ebea0a78cfac5815 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 19 Jul 2025 12:12:56 +0100
Subject: [PATCH 41/50] numba optimise

---
 aeon/forecasting/_arima.py          | 33 ++++------------
 aeon/forecasting/_loss_functions.py | 58 +++++++----------------------
 2 files changed, 22 insertions(+), 69 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index cdd0ab30d2..1a974d1930 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -104,19 +104,11 @@ def _fit(self, y, exog=None):
             self._parameters,
             self._differenced_series,
             self._model,
-            np.empty(0),
         )
         formatted_params = _extract_params(self._parameters, self._model)  # Extract
         # parameters
+        differenced_forecast = self.fitted_values_[-1]
 
-        differenced_forecast = _in_sample_forecast(
-            self._differenced_series,
-            self._model,
-            len(self._differenced_series),
-            formatted_params,
-            self.residuals_,
-            expect_full_history=True,
-        )
         if self.d == 0:
             self.forecast_ = differenced_forecast
         elif self.d == 1:
@@ -251,29 +243,22 @@ def _aic(residuals, num_params):
 
 # Define the ARIMA(p, d, q) likelihood function
 @njit(cache=True, fastmath=True)
-def _arima_model(params, data, model, residuals):
+def _arima_model(params, data, model):
     """Calculate the log-likelihood of an ARIMA model given the parameters."""
     formatted_params = _extract_params(params, model)  # Extract parameters
 
     # Initialize residuals
     n = len(data)
-    m = len(residuals)
-    num_predictions = n - m + 1
-    residuals = np.concatenate((residuals, np.zeros(num_predictions - 1)))
-    expect_full_history = m > 0  # I.e. we've been provided with some residuals
+    num_predictions = n + 1
+    residuals = np.zeros(num_predictions - 1)
     fitted_values = np.zeros(num_predictions)
     for t in range(num_predictions):
         fitted_values[t] = _in_sample_forecast(
-            data,
-            model,
-            m + t,
-            formatted_params,
-            residuals,
-            expect_full_history,
+            data, model, t, formatted_params, residuals
         )
         if t != num_predictions - 1:
             # Only calculate residuals for the predictions we have data for
-            residuals[m + t] = data[m + t] - fitted_values[t]
+            residuals[t] = data[t] - fitted_values[t]
     return _aic(residuals, len(params)), residuals, fitted_values
 
 
@@ -294,10 +279,8 @@ def _extract_params(params, model):
 
 
 @njit(cache=True, fastmath=True)
-def _in_sample_forecast(
-    data, model, t, formatted_params, residuals, expect_full_history=False
-):
-    """Calculate the ARMA forecast for time t."""
+def _in_sample_forecast(data, model, t, formatted_params, residuals):
+    """Calculate the ARMA forecast for time t for fitted model."""
     p = model[1]
     q = model[2]
     # AR part
diff --git a/aeon/forecasting/_loss_functions.py b/aeon/forecasting/_loss_functions.py
index 94edd410ce..546846eb09 100644
--- a/aeon/forecasting/_loss_functions.py
+++ b/aeon/forecasting/_loss_functions.py
@@ -12,46 +12,24 @@ def _arima_fit(params, data, model):
     # Initialize residuals
     n = len(data)
     residuals = np.zeros(n)
-    fitted_values = np.zeros(n)
     c = formatted_params[0][0] if model[0] else 0
     p = model[1]
     q = model[2]
-    # AR part
-    phi = formatted_params[1][:p]
-    theta = formatted_params[2][:q]
     for t in range(n):
-        ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
-        ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
-        fitted_values[t] = c + ar_term + ma_term
-        residuals[t] = data[t] - fitted_values[t]
-    return _aic(residuals, len(params))
-
-
-@njit(cache=True, fastmath=True)
-def _in_sample_forecast(
-    data,
-    model,
-    t,
-    formatted_params,
-    residuals,
-    p,
-    q,
-    phi,
-):
-    """Calculate the ARMA forecast for time t."""
-    p = model[1]
-    q = model[2]
-    # AR part
-    phi = formatted_params[1][:p]
-    ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
-
-    # MA part
-    theta = formatted_params[2][:q]
-    ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
-
-    c = formatted_params[0][0] if model[0] else 0
-    y_hat = c + ar_term + ma_term
-    return y_hat
+        ar_term = 0
+        max_ar = min(p, t)
+        for j in range(max_ar):
+            ar_term += formatted_params[1, j] * data[t - j - 1]
+        ma_term = 0
+        max_ma = min(q, t)
+        for j in range(max_ma):
+            ma_term += formatted_params[2, j] * residuals[t - j - 1]
+        y_hat = c + ar_term + ma_term
+        residuals[t] = data[t] - y_hat
+    variance = np.mean(residuals**2)
+    LOG_2PI = 1.8378770664093453
+    likelihood = n * (LOG_2PI + np.log(variance) + 1.0)
+    return likelihood + 2 * len(params)
 
 
 @njit(cache=True, fastmath=True)
@@ -66,11 +44,3 @@ def _extract_params(params, model):
         start = starts[i]
         result[i, :length] = params[start : start + length]
     return result
-
-
-@njit(cache=True, fastmath=True)
-def _aic(residuals, num_params):
-    """Calculate the log-likelihood of a model."""
-    variance = np.mean(residuals**2)
-    likelihood = len(residuals) * (np.log(2 * np.pi) + np.log(variance) + 1)
-    return likelihood + 2 * num_params

From 35e5b1c1a12bfd1fd992cea91c91f8d3baf293f0 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 19 Jul 2025 12:17:05 +0100
Subject: [PATCH 42/50] numba optimise

---
 aeon/forecasting/_loss_functions.py | 34 ++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/aeon/forecasting/_loss_functions.py b/aeon/forecasting/_loss_functions.py
index 546846eb09..ef4fb30e55 100644
--- a/aeon/forecasting/_loss_functions.py
+++ b/aeon/forecasting/_loss_functions.py
@@ -3,6 +3,8 @@
 import numpy as np
 from numba import njit
 
+LOG_2PI = 1.8378770664093453
+
 
 @njit(cache=True, fastmath=True)
 def _arima_fit(params, data, model):
@@ -16,31 +18,43 @@ def _arima_fit(params, data, model):
     p = model[1]
     q = model[2]
     for t in range(n):
-        ar_term = 0
+        ar_term = 0.0
         max_ar = min(p, t)
         for j in range(max_ar):
             ar_term += formatted_params[1, j] * data[t - j - 1]
-        ma_term = 0
+        ma_term = 0.0
         max_ma = min(q, t)
         for j in range(max_ma):
             ma_term += formatted_params[2, j] * residuals[t - j - 1]
         y_hat = c + ar_term + ma_term
         residuals[t] = data[t] - y_hat
-    variance = np.mean(residuals**2)
-    LOG_2PI = 1.8378770664093453
+    sse = 0.0
+    for i in range(n):
+        sse += residuals[i] ** 2
+    variance = sse / n
     likelihood = n * (LOG_2PI + np.log(variance) + 1.0)
-    return likelihood + 2 * len(params)
+    k = len(params)
+    return likelihood + 2 * k
 
 
 @njit(cache=True, fastmath=True)
 def _extract_params(params, model):
     """Extract ARIMA parameters from the parameter vector."""
-    starts = np.cumsum(np.concatenate((np.zeros(1, dtype=np.int32), model[:-1])))
-    n = len(starts)
+    n_parts = len(model)
+    starts = np.zeros(n_parts, dtype=np.int32)
+    for i in range(1, n_parts):
+        starts[i] = starts[i - 1] + model[i - 1]
+
     max_len = np.max(model)
-    result = np.full((n, max_len), np.nan, dtype=params.dtype)
-    for i in range(n):
+    result = np.empty((n_parts, max_len), dtype=params.dtype)
+    for i in range(n_parts):
+        for j in range(max_len):
+            result[i, j] = np.nan
+
+    for i in range(n_parts):
         length = model[i]
         start = starts[i]
-        result[i, :length] = params[start : start + length]
+        for j in range(length):
+            result[i, j] = params[start + j]
+
     return result

From 79c8c2d521f1f191c510995df858b637be455b79 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 19 Jul 2025 13:01:26 +0100
Subject: [PATCH 43/50] numba optimise

---
 aeon/forecasting/_arima.py          | 58 ++++++++++++++---------------
 aeon/forecasting/_loss_functions.py | 27 ++------------
 2 files changed, 31 insertions(+), 54 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 1a974d1930..941e97d694 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -9,6 +9,7 @@
 import numpy as np
 from numba import njit
 
+from aeon.forecasting._extract_paras import _extract_arma_params
 from aeon.forecasting._nelder_mead import nelder_mead
 from aeon.forecasting.base import BaseForecaster
 
@@ -27,8 +28,10 @@ class ARIMA(BaseForecaster):
         Differencing (d) order of the ARIMA model
     q : int, default=1,
         Moving average (q) order of the ARIMA model
-    use_constant: bool = False,
+    use_constant : bool = False,
         Presence of a constant/intercept term in the model.
+    iterations : int, default = 200
+        Maximum number of iterations to use in the Nelder-Mead parameter search.
 
     Attributes
     ----------
@@ -54,11 +57,19 @@ class ARIMA(BaseForecaster):
         "capability:horizon": False,  # cannot fit to a horizon other than 1
     }
 
-    def __init__(self, p: int = 1, d: int = 0, q: int = 1, use_constant: bool = False):
+    def __init__(
+        self,
+        p: int = 1,
+        d: int = 0,
+        q: int = 1,
+        use_constant: bool = False,
+        iterations: int = 200,
+    ):
         self.p = p
         self.d = d
         self.q = q
         self.use_constant = use_constant
+        self.iterations = iterations
         self.phi_ = 0
         self.theta_ = 0
         self.c_ = 0
@@ -98,6 +109,7 @@ def _fit(self, y, exog=None):
             np.sum(self._model[:3]),
             self._differenced_series,
             self._model,
+            max_iter=self.iterations,
         )
         #
         (self.aic_, self.residuals_, self.fitted_values_) = _arima_model(
@@ -105,7 +117,9 @@ def _fit(self, y, exog=None):
             self._differenced_series,
             self._model,
         )
-        formatted_params = _extract_params(self._parameters, self._model)  # Extract
+        formatted_params = _extract_arma_params(
+            self._parameters, self._model
+        )  # Extract
         # parameters
         differenced_forecast = self.fitted_values_[-1]
 
@@ -245,7 +259,7 @@ def _aic(residuals, num_params):
 @njit(cache=True, fastmath=True)
 def _arima_model(params, data, model):
     """Calculate the log-likelihood of an ARIMA model given the parameters."""
-    formatted_params = _extract_params(params, model)  # Extract parameters
+    formatted_params = _extract_arma_params(params, model)  # Extract parameters
 
     # Initialize residuals
     n = len(data)
@@ -262,35 +276,19 @@ def _arima_model(params, data, model):
     return _aic(residuals, len(params)), residuals, fitted_values
 
 
-@njit(cache=True, fastmath=True)
-def _extract_params(params, model):
-    """Extract ARIMA parameters from the parameter vector."""
-    if len(params) != np.sum(model):
-        model = model[:-1]  # Remove the seasonal period
-    starts = np.cumsum(np.concatenate((np.zeros(1, dtype=np.int32), model[:-1])))
-    n = len(starts)
-    max_len = np.max(model)
-    result = np.full((n, max_len), np.nan, dtype=params.dtype)
-    for i in range(n):
-        length = model[i]
-        start = starts[i]
-        result[i, :length] = params[start : start + length]
-    return result
-
-
 @njit(cache=True, fastmath=True)
 def _in_sample_forecast(data, model, t, formatted_params, residuals):
-    """Calculate the ARMA forecast for time t for fitted model."""
+    """Efficient ARMA one-step forecast at time t for fitted model."""
     p = model[1]
     q = model[2]
-    # AR part
-    phi = formatted_params[1][:p]
-    ar_term = 0 if (t - p) < 0 else np.dot(phi, data[t - p : t][::-1])
+    c = formatted_params[0][0] if model[0] else 0.0
+
+    ar_term = 0.0
+    for j in range(min(p, t)):
+        ar_term += formatted_params[1, j] * data[t - j - 1]
 
-    # MA part
-    theta = formatted_params[2][:q]
-    ma_term = 0 if (t - q) < 0 else np.dot(theta, residuals[t - q : t][::-1])
+    ma_term = 0.0
+    for j in range(min(q, t)):
+        ma_term += formatted_params[2, j] * residuals[t - j - 1]
 
-    c = formatted_params[0][0] if model[0] else 0
-    y_hat = c + ar_term + ma_term
-    return y_hat
+    return c + ar_term + ma_term
diff --git a/aeon/forecasting/_loss_functions.py b/aeon/forecasting/_loss_functions.py
index ef4fb30e55..ec2c5faaa4 100644
--- a/aeon/forecasting/_loss_functions.py
+++ b/aeon/forecasting/_loss_functions.py
@@ -3,13 +3,15 @@
 import numpy as np
 from numba import njit
 
+from aeon.forecasting._extract_paras import _extract_arma_params
+
 LOG_2PI = 1.8378770664093453
 
 
 @njit(cache=True, fastmath=True)
 def _arima_fit(params, data, model):
     """Calculate the AIC of an ARIMA model given the parameters."""
-    formatted_params = _extract_params(params, model)  # Extract parameters
+    formatted_params = _extract_arma_params(params, model)  # Extract parameters
 
     # Initialize residuals
     n = len(data)
@@ -35,26 +37,3 @@ def _arima_fit(params, data, model):
     likelihood = n * (LOG_2PI + np.log(variance) + 1.0)
     k = len(params)
     return likelihood + 2 * k
-
-
-@njit(cache=True, fastmath=True)
-def _extract_params(params, model):
-    """Extract ARIMA parameters from the parameter vector."""
-    n_parts = len(model)
-    starts = np.zeros(n_parts, dtype=np.int32)
-    for i in range(1, n_parts):
-        starts[i] = starts[i - 1] + model[i - 1]
-
-    max_len = np.max(model)
-    result = np.empty((n_parts, max_len), dtype=params.dtype)
-    for i in range(n_parts):
-        for j in range(max_len):
-            result[i, j] = np.nan
-
-    for i in range(n_parts):
-        length = model[i]
-        start = starts[i]
-        for j in range(length):
-            result[i, j] = params[start + j]
-
-    return result

From fc4f7044b900bc724286d312638e9b38f060880e Mon Sep 17 00:00:00 2001
From: Alex Banwell <arb1g19@soton.ac.uk>
Date: Wed, 23 Jul 2025 13:37:04 +0100
Subject: [PATCH 44/50] Add missing _extract_paras file

---
 aeon/forecasting/_extract_paras.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 aeon/forecasting/_extract_paras.py

diff --git a/aeon/forecasting/_extract_paras.py b/aeon/forecasting/_extract_paras.py
new file mode 100644
index 0000000000..2e38a3e4dd
--- /dev/null
+++ b/aeon/forecasting/_extract_paras.py
@@ -0,0 +1,27 @@
+"""ARIMA Utility Function."""
+
+import numpy as np
+from numba import njit
+
+
+@njit(cache=True, fastmath=True)
+def _extract_arma_params(params, model):
+    """Extract ARIMA parameters from the parameter vector."""
+    n_parts = len(model)
+    starts = np.zeros(n_parts, dtype=np.int32)
+    for i in range(1, n_parts):
+        starts[i] = starts[i - 1] + model[i - 1]
+
+    max_len = np.max(model)
+    result = np.empty((n_parts, max_len), dtype=params.dtype)
+    for i in range(n_parts):
+        for j in range(max_len):
+            result[i, j] = np.nan
+
+    for i in range(n_parts):
+        length = model[i]
+        start = starts[i]
+        for j in range(length):
+            result[i, j] = params[start + j]
+
+    return result

From 724992b3d045b1d2aafc55a1648b6af459a92eec Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sat, 26 Jul 2025 14:41:28 +0100
Subject: [PATCH 45/50] initial residuals bug and proper differencing d>1

---
 aeon/forecasting/_arima.py           | 19 ++++++---
 aeon/forecasting/tests/test_arima.py | 62 ++++++++++++++++++++++------
 2 files changed, 62 insertions(+), 19 deletions(-)

diff --git a/aeon/forecasting/_arima.py b/aeon/forecasting/_arima.py
index 941e97d694..6f08af4325 100644
--- a/aeon/forecasting/_arima.py
+++ b/aeon/forecasting/_arima.py
@@ -124,12 +124,18 @@ def _fit(self, y, exog=None):
         differenced_forecast = self.fitted_values_[-1]
 
         if self.d == 0:
-            self.forecast_ = differenced_forecast
+            forecast_value = differenced_forecast
         elif self.d == 1:
-            self.forecast_ = differenced_forecast + self._series[-1]
-        else:
-            self.forecast_ = differenced_forecast + np.sum(self._series[-self.d :])
-        # Extract the parameter values
+            forecast_value = differenced_forecast + self._series[-1]
+        else:  # for d > 1, iteratively undifference
+            forecast_value = differenced_forecast
+            last_vals = self._series[-self.d :]
+            for _ in range(self.d):
+                forecast_value += last_vals[-1] - last_vals[-2]
+                # Shift values to avoid appending to list (efficient)
+                last_vals = np.roll(last_vals, -1)
+                last_vals[-1] = forecast_value  # Extract the parameter values
+        self.forecast_ = forecast_value
         if self.use_constant:
             self.c_ = formatted_params[0][0]
         self.phi_ = formatted_params[1][: self.p]
@@ -266,7 +272,8 @@ def _arima_model(params, data, model):
     num_predictions = n + 1
     residuals = np.zeros(num_predictions - 1)
     fitted_values = np.zeros(num_predictions)
-    for t in range(num_predictions):
+    # Leave first max(p,q) residuals and fitted as zero.
+    for t in range(max(model[1], model[2]), num_predictions):
         fitted_values[t] = _in_sample_forecast(
             data, model, t, formatted_params, residuals
         )
diff --git a/aeon/forecasting/tests/test_arima.py b/aeon/forecasting/tests/test_arima.py
index ea78088021..50528a81cf 100644
--- a/aeon/forecasting/tests/test_arima.py
+++ b/aeon/forecasting/tests/test_arima.py
@@ -45,20 +45,19 @@ def test_arima_iterative_forecast():
     assert np.all(np.isfinite(preds))
 
 
-def test_predict_too_short_for_d():
-    """Test error is raised when input series is too short for differencing."""
-    model = ARIMA(p=1, d=2, q=1)
-    model.fit(y)
-    with pytest.raises(ValueError, match="Series too short for differencing"):
-        model._predict(np.array([1.0, 2.0]))
-
-
-def test_predict_too_short_for_pq():
-    """Test error is raised when input series is too short for AR/MA terms."""
-    model = ARIMA(p=5, d=0, q=0)
+@pytest.mark.parametrize(
+    "y_input, error_match",
+    [
+        (np.array([1.0, 2.0]), "Series too short for differencing"),
+        (np.array([1.0, 2.0, 3.0]), "Series too short for ARMA"),
+    ],
+)
+def test_arima_too_short_series_errors(y_input, error_match):
+    """Test errors raised for too short input series."""
+    model = ARIMA(p=3, d=2, q=3)
     model.fit(y)
-    with pytest.raises(ValueError, match="Series too short for ARMA"):
-        model._predict(np.array([1.0, 2.0, 3.0]))
+    with pytest.raises(ValueError, match=error_match):
+        model._predict(y_input)
 
 
 def test_forecast_attribute_set():
@@ -75,3 +74,40 @@ def test_iterative_forecast_with_d2():
     preds = model.iterative_forecast(y, prediction_horizon=5)
     assert preds.shape == (5,)
     assert np.all(np.isfinite(preds))
+
+
+@pytest.mark.parametrize(
+    "p, d, q, use_constant, expected_forecast",
+    [
+        (1, 0, 1, False, 118.47506756),  # precomputed from known ARIMA implementation
+        (2, 1, 1, False, 202.1120270),  # precomputed
+        (3, 0, 0, True, 125.2778775),  # precomputed
+    ],
+)
+def test_arima_fixed_paras(p, d, q, use_constant, expected_forecast):
+    """Test ARIMA fit/predict accuracy against known forecasts.
+
+    expected values calculated with values fitted by Nelder-Mead:
+
+    1. phi = [0.99497524] theta [0.0691515]
+    2. phi = [ 0.02898788 -0.4330671 ] theta [1.26699252]
+    3. phi = [ 0.19202414  0.05207654 -0.07367897] theta [], constant 105.970867164
+
+    """
+    model = ARIMA(p=p, d=d, q=q, use_constant=use_constant)
+    model.fit(y)
+    forecast = model.forecast_
+    assert isinstance(forecast, float)
+    assert np.isfinite(forecast)
+    assert np.isclose(forecast, expected_forecast, atol=0.5)
+
+
+def test_arima_known_output():
+    """Test ARIMA for fixed parameters.
+
+    Test ARMIMA with forecast generated externally.
+    """
+    model = ARIMA(p=1, d=0, q=1)
+    model.fit(y)
+    f = model.forecast_
+    assert np.isclose(118.47506756, f)

From 755f927e6ef9fcd55defa03b377c4d7290f0e210 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sun, 27 Jul 2025 17:33:28 +0100
Subject: [PATCH 46/50] loss function ignores zeroed start points

---
 aeon/forecasting/_loss_functions.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/aeon/forecasting/_loss_functions.py b/aeon/forecasting/_loss_functions.py
index ec2c5faaa4..c88211b645 100644
--- a/aeon/forecasting/_loss_functions.py
+++ b/aeon/forecasting/_loss_functions.py
@@ -31,9 +31,10 @@ def _arima_fit(params, data, model):
         y_hat = c + ar_term + ma_term
         residuals[t] = data[t] - y_hat
     sse = 0.0
-    for i in range(n):
-        sse += residuals[i] ** 2
-    variance = sse / n
-    likelihood = n * (LOG_2PI + np.log(variance) + 1.0)
+    start = max(p, q)
+    for i in range(start, n):
+        sse += residuals[i] * residuals[i]
+    variance = sse / (n - start)
+    likelihood = (n - start) * (LOG_2PI + np.log(variance) + 1.0)
     k = len(params)
     return likelihood + 2 * k

From f2181067204390a9c43cb4b57115878d5cc67810 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Mon, 28 Jul 2025 09:27:47 +0100
Subject: [PATCH 47/50] fix test

---
 aeon/forecasting/tests/test_arima.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/aeon/forecasting/tests/test_arima.py b/aeon/forecasting/tests/test_arima.py
index 50528a81cf..73d99d7b50 100644
--- a/aeon/forecasting/tests/test_arima.py
+++ b/aeon/forecasting/tests/test_arima.py
@@ -80,8 +80,8 @@ def test_iterative_forecast_with_d2():
     "p, d, q, use_constant, expected_forecast",
     [
         (1, 0, 1, False, 118.47506756),  # precomputed from known ARIMA implementation
-        (2, 1, 1, False, 202.1120270),  # precomputed
-        (3, 0, 0, True, 125.2778775),  # precomputed
+        (2, 1, 1, False, 209.1099231455),  # precomputed
+        (3, 0, 0, True, 137.47368045155),  # precomputed
     ],
 )
 def test_arima_fixed_paras(p, d, q, use_constant, expected_forecast):
@@ -99,7 +99,7 @@ def test_arima_fixed_paras(p, d, q, use_constant, expected_forecast):
     forecast = model.forecast_
     assert isinstance(forecast, float)
     assert np.isfinite(forecast)
-    assert np.isclose(forecast, expected_forecast, atol=0.5)
+    assert np.isclose(forecast, expected_forecast, atol=1e-6)
 
 
 def test_arima_known_output():

From 6a13e4f6b11e8d3f0ab39ac151f6d6e2988ce061 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Mon, 28 Jul 2025 09:41:43 +0100
Subject: [PATCH 48/50] revert FLOAT_CMP

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8965ccd402..b27cc4d00f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -169,7 +169,7 @@ testpaths = "aeon"
 doctest_optionflags = [
     "NORMALIZE_WHITESPACE",
     "ELLIPSIS",
-#    "FLOAT_CMP",
+    "FLOAT_CMP",
 ]
 addopts = [
     "--durations=20",

From 9721a5ca64b81c60b61cad1ff75e775da8758590 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Mon, 28 Jul 2025 10:16:52 +0100
Subject: [PATCH 49/50] refactor into forecasting

---
 aeon/{utils => }/forecasting/_hypo_tests.py  | 2 +-
 aeon/{utils => }/forecasting/_seasonality.py | 0
 aeon/utils/forecasting/__init__.py           | 1 -
 3 files changed, 1 insertion(+), 2 deletions(-)
 rename aeon/{utils => }/forecasting/_hypo_tests.py (98%)
 rename aeon/{utils => }/forecasting/_seasonality.py (100%)
 delete mode 100644 aeon/utils/forecasting/__init__.py

diff --git a/aeon/utils/forecasting/_hypo_tests.py b/aeon/forecasting/_hypo_tests.py
similarity index 98%
rename from aeon/utils/forecasting/_hypo_tests.py
rename to aeon/forecasting/_hypo_tests.py
index 2d581e971e..664d0c76e5 100644
--- a/aeon/utils/forecasting/_hypo_tests.py
+++ b/aeon/forecasting/_hypo_tests.py
@@ -52,7 +52,7 @@ def kpss_test(y, regression="c", lags=None):  # Test if time series is stationar
     >>> from aeon.datasets import load_airline
     >>> y = load_airline()
     >>> kpss_test(y)
-    (np.float64(1.1966313813...), np.False_)
+    (1.1966313813502716, False)
     """
     y = np.asarray(y)
     n = len(y)
diff --git a/aeon/utils/forecasting/_seasonality.py b/aeon/forecasting/_seasonality.py
similarity index 100%
rename from aeon/utils/forecasting/_seasonality.py
rename to aeon/forecasting/_seasonality.py
diff --git a/aeon/utils/forecasting/__init__.py b/aeon/utils/forecasting/__init__.py
deleted file mode 100644
index a168fa0f11..0000000000
--- a/aeon/utils/forecasting/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Forecasting utils."""

From 6d99d76a4be4cdbe29e458897751a199f69d9421 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Mon, 28 Jul 2025 10:28:36 +0100
Subject: [PATCH 50/50] remove example

---
 aeon/forecasting/_hypo_tests.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/aeon/forecasting/_hypo_tests.py b/aeon/forecasting/_hypo_tests.py
index 664d0c76e5..198a4472a8 100644
--- a/aeon/forecasting/_hypo_tests.py
+++ b/aeon/forecasting/_hypo_tests.py
@@ -45,14 +45,6 @@ def kpss_test(y, regression="c", lags=None):  # Test if time series is stationar
     of a unit root."
     Journal of Econometrics, 54(1–3), 159–178.
     https://doi.org/10.1016/0304-4076(92)90104-Y
-
-    Examples
-    --------
-    >>> from aeon.utils.forecasting._hypo_tests import kpss_test
-    >>> from aeon.datasets import load_airline
-    >>> y = load_airline()
-    >>> kpss_test(y)
-    (1.1966313813502716, False)
     """
     y = np.asarray(y)
     n = len(y)