From a70b7337782a852be7f8e69a1134d0f3fa1283ca Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 24 Jun 2025 21:00:39 +0200 Subject: [PATCH 01/21] Reorganize structural model module --- pymc_extras/statespace/models/structural.py | 1679 ----------------- .../statespace/models/structural/__init__.py | 21 + .../models/structural/components/__init__.py | 0 .../structural/components/autoregressive.py | 122 ++ .../models/structural/components/cycle.py | 201 ++ .../structural/components/level_trend.py | 196 ++ .../components/measurement_error.py | 80 + .../structural/components/regression.py | 111 ++ .../structural/components/seasonality.py | 353 ++++ .../statespace/models/structural/core.py | 697 +++++++ .../statespace/models/structural/utils.py | 16 + .../statespace/models/structural/__init__.py | 0 .../models/structural/components/__init__.py | 0 .../components/test_autoregressive.py | 28 + .../structural/components/test_cycle.py | 52 + .../structural/components/test_level_trend.py | 24 + .../components/test_measurement_error.py | 10 + .../structural/components/test_regression.py | 69 + .../structural/components/test_seasonality.py | 83 + .../statespace/models/structural/conftest.py | 27 + .../test_against_statsmodels.py} | 305 --- .../statespace/models/structural/test_core.py | 102 + 22 files changed, 2192 insertions(+), 1984 deletions(-) delete mode 100644 pymc_extras/statespace/models/structural.py create mode 100644 pymc_extras/statespace/models/structural/__init__.py create mode 100644 pymc_extras/statespace/models/structural/components/__init__.py create mode 100644 pymc_extras/statespace/models/structural/components/autoregressive.py create mode 100644 pymc_extras/statespace/models/structural/components/cycle.py create mode 100644 pymc_extras/statespace/models/structural/components/level_trend.py create mode 100644 pymc_extras/statespace/models/structural/components/measurement_error.py create mode 100644 pymc_extras/statespace/models/structural/components/regression.py create mode 100644 pymc_extras/statespace/models/structural/components/seasonality.py create mode 100644 pymc_extras/statespace/models/structural/core.py create mode 100644 pymc_extras/statespace/models/structural/utils.py create mode 100644 tests/statespace/models/structural/__init__.py create mode 100644 tests/statespace/models/structural/components/__init__.py create mode 100644 tests/statespace/models/structural/components/test_autoregressive.py create mode 100644 tests/statespace/models/structural/components/test_cycle.py create mode 100644 tests/statespace/models/structural/components/test_level_trend.py create mode 100644 tests/statespace/models/structural/components/test_measurement_error.py create mode 100644 tests/statespace/models/structural/components/test_regression.py create mode 100644 tests/statespace/models/structural/components/test_seasonality.py create mode 100644 tests/statespace/models/structural/conftest.py rename tests/statespace/models/{test_structural.py => structural/test_against_statsmodels.py} (62%) create mode 100644 tests/statespace/models/structural/test_core.py diff --git a/pymc_extras/statespace/models/structural.py b/pymc_extras/statespace/models/structural.py deleted file mode 100644 index a982366c3..000000000 --- a/pymc_extras/statespace/models/structural.py +++ /dev/null @@ -1,1679 +0,0 @@ -import functools as ft -import logging - -from abc import ABC -from collections.abc import Sequence -from itertools import pairwise -from typing import Any - -import numpy as np -import pytensor -import pytensor.tensor as pt -import xarray as xr - -from pytensor import Variable -from pytensor.compile.mode import Mode - -from pymc_extras.statespace.core import PytensorRepresentation -from pymc_extras.statespace.core.statespace import PyMCStateSpace -from pymc_extras.statespace.models.utilities import ( - conform_time_varying_and_time_invariant_matrices, - make_default_coords, -) -from pymc_extras.statespace.utils.constants import ( - ALL_STATE_AUX_DIM, - ALL_STATE_DIM, - AR_PARAM_DIM, - LONG_MATRIX_NAMES, - POSITION_DERIVATIVE_NAMES, - TIME_DIM, -) - -_log = logging.getLogger("pymc.experimental.statespace") - -floatX = pytensor.config.floatX - - -def order_to_mask(order): - if isinstance(order, int): - return np.ones(order).astype(bool) - else: - return np.array(order).astype(bool) - - -def _frequency_transition_block(s, j): - lam = 2 * np.pi * j / s - - return pt.stack([[pt.cos(lam), pt.sin(lam)], [-pt.sin(lam), pt.cos(lam)]]) - - -class StructuralTimeSeries(PyMCStateSpace): - r""" - Structural Time Series Model - - The structural time series model, named by [1] and presented in statespace form in [2], is a framework for - decomposing a univariate time series into level, trend, seasonal, and cycle components. It also admits the - possibility of exogenous regressors. Unlike the SARIMAX framework, the time series is not assumed to be stationary. - - Notes - ----- - - .. math:: - y_t = \mu_t + \gamma_t + c_t + \varepsilon_t - - """ - - def __init__( - self, - ssm: PytensorRepresentation, - state_names: list[str], - data_names: list[str], - shock_names: list[str], - param_names: list[str], - exog_names: list[str], - param_dims: dict[str, tuple[int]], - coords: dict[str, Sequence], - param_info: dict[str, dict[str, Any]], - data_info: dict[str, dict[str, Any]], - component_info: dict[str, dict[str, Any]], - measurement_error: bool, - name_to_variable: dict[str, Variable], - name_to_data: dict[str, Variable] | None = None, - name: str | None = None, - verbose: bool = True, - filter_type: str = "standard", - mode: str | Mode | None = None, - ): - # Add the initial state covariance to the parameters - if name is None: - name = "data" - self._name = name - - k_states, k_posdef, k_endog = ssm.k_states, ssm.k_posdef, ssm.k_endog - param_names, param_dims, param_info = self._add_inital_state_cov_to_properties( - param_names, param_dims, param_info, k_states - ) - self._state_names = state_names.copy() - self._data_names = data_names.copy() - self._shock_names = shock_names.copy() - self._param_names = param_names.copy() - self._param_dims = param_dims.copy() - - default_coords = make_default_coords(self) - coords.update(default_coords) - - self._coords = coords - self._param_info = param_info.copy() - self._data_info = data_info.copy() - self.measurement_error = measurement_error - - super().__init__( - k_endog, - k_states, - max(1, k_posdef), - filter_type=filter_type, - verbose=verbose, - measurement_error=measurement_error, - mode=mode, - ) - self.ssm = ssm.copy() - - if k_posdef == 0: - # If there is no randomness in the model, add dummy matrices to the representation to avoid errors - # when we go to construct random variables from the matrices - self.ssm.k_posdef = self.k_posdef - self.ssm.shapes["state_cov"] = (1, 1, 1) - self.ssm["state_cov"] = pt.zeros((1, 1, 1)) - - self.ssm.shapes["selection"] = (1, self.k_states, 1) - self.ssm["selection"] = pt.zeros((1, self.k_states, 1)) - - self._component_info = component_info.copy() - - self._name_to_variable = name_to_variable.copy() - self._name_to_data = name_to_data.copy() - - self._exog_names = exog_names.copy() - self._needs_exog_data = len(exog_names) > 0 - - P0 = self.make_and_register_variable("P0", shape=(self.k_states, self.k_states)) - self.ssm["initial_state_cov"] = P0 - - @staticmethod - def _add_inital_state_cov_to_properties(param_names, param_dims, param_info, k_states): - param_names += ["P0"] - param_dims["P0"] = (ALL_STATE_DIM, ALL_STATE_AUX_DIM) - param_info["P0"] = { - "shape": (k_states, k_states), - "constraints": "Positive semi-definite", - "dims": param_dims["P0"], - } - - return param_names, param_dims, param_info - - @property - def param_names(self): - return self._param_names - - @property - def data_names(self) -> list[str]: - return self._data_names - - @property - def state_names(self): - return self._state_names - - @property - def observed_states(self): - return [self._name] - - @property - def shock_names(self): - return self._shock_names - - @property - def param_dims(self): - return self._param_dims - - @property - def coords(self) -> dict[str, Sequence]: - return self._coords - - @property - def param_info(self) -> dict[str, dict[str, Any]]: - return self._param_info - - @property - def data_info(self) -> dict[str, dict[str, Any]]: - return self._data_info - - def make_symbolic_graph(self) -> None: - """ - Assign placeholder pytensor variables among statespace matrices in positions where PyMC variables will go. - - Notes - ----- - This assignment is handled by the components, so this function is implemented only to avoid the - NotImplementedError raised by the base class. - """ - - pass - - def _state_slices_from_info(self): - info = self._component_info.copy() - comp_states = np.cumsum([0] + [info["k_states"] for info in info.values()]) - state_slices = [slice(i, j) for i, j in pairwise(comp_states)] - - return state_slices - - def _hidden_states_from_data(self, data): - state_slices = self._state_slices_from_info() - info = self._component_info - names = info.keys() - result = [] - - for i, (name, s) in enumerate(zip(names, state_slices)): - obs_idx = info[name]["obs_state_idx"] - if obs_idx is None: - continue - - X = data[..., s] - if info[name]["combine_hidden_states"]: - sum_idx = np.flatnonzero(obs_idx) - result.append(X[..., sum_idx].sum(axis=-1)[..., None]) - else: - comp_names = self.state_names[s] - for j, state_name in enumerate(comp_names): - result.append(X[..., j, None]) - - return np.concatenate(result, axis=-1) - - def _get_subcomponent_names(self): - state_slices = self._state_slices_from_info() - info = self._component_info - names = info.keys() - result = [] - - for i, (name, s) in enumerate(zip(names, state_slices)): - if info[name]["combine_hidden_states"]: - result.append(name) - else: - comp_names = self.state_names[s] - result.extend([f"{name}[{comp_name}]" for comp_name in comp_names]) - return result - - def extract_components_from_idata(self, idata: xr.Dataset) -> xr.Dataset: - r""" - Extract interpretable hidden states from an InferenceData returned by a PyMCStateSpace sampling method - - Parameters - ---------- - idata: Dataset - A Dataset object, returned by a PyMCStateSpace sampling method - - Returns - ------- - idata: Dataset - An Dataset object with hidden states transformed to represent only the "interpretable" subcomponents - of the structural model. - - Notes - ----- - In general, a structural statespace model can be represented as: - - .. math:: - y_t = \mu_t + \nu_t + \cdots + \gamma_t + c_t + \xi_t + \epsilon_t \tag{1} - - Where: - - - :math:`\mu_t` is the level of the data at time t - - :math:`\nu_t` is the slope of the data at time t - - :math:`\cdots` are higher time derivatives of the position (acceleration, jerk, etc) at time t - - :math:`\gamma_t` is the seasonal component at time t - - :math:`c_t` is the cycle component at time t - - :math:`\xi_t` is the autoregressive error at time t - - :math:`\varepsilon_t` is the measurement error at time t - - In state space form, some or all of these components are represented as linear combinations of other - subcomponents, making interpretation of the outputs of the outputs difficult. The purpose of this function is - to take the expended statespace representation and return a "reduced form" of only the components shown in - equation (1). - """ - - def _extract_and_transform_variable(idata, new_state_names): - *_, time_dim, state_dim = idata.dims - state_func = ft.partial(self._hidden_states_from_data) - new_idata = xr.apply_ufunc( - state_func, - idata, - input_core_dims=[[time_dim, state_dim]], - output_core_dims=[[time_dim, state_dim]], - exclude_dims={state_dim}, - ) - new_idata.coords.update({state_dim: new_state_names}) - return new_idata - - var_names = list(idata.data_vars.keys()) - is_latent = [idata[name].shape[-1] == self.k_states for name in var_names] - new_state_names = self._get_subcomponent_names() - - latent_names = [name for latent, name in zip(is_latent, var_names) if latent] - dropped_vars = set(var_names) - set(latent_names) - if len(dropped_vars) > 0: - _log.warning( - f'Variables {", ".join(dropped_vars)} do not contain all hidden states (their last dimension ' - f"is not {self.k_states}). They will not be present in the modified idata." - ) - if len(dropped_vars) == len(var_names): - raise ValueError( - "Provided idata had no variables with all hidden states; cannot extract components." - ) - - idata_new = xr.Dataset( - { - name: _extract_and_transform_variable(idata[name], new_state_names) - for name in latent_names - } - ) - return idata_new - - -class Component(ABC): - r""" - Base class for a component of a structural timeseries model. - - This base class contains a subset of the class attributes of the PyMCStateSpace class, and none of the class - methods. The purpose of a component is to allow the partial definition of a structural model. Components are - assembled into a full model by the StructuralTimeSeries class. - - Parameters - ---------- - name: str - The name of the component - k_endog: int - Number of endogenous variables being modeled. Currently, must be one because structural models only support - univariate data. - k_states: int - Number of hidden states in the component model - k_posdef: int - Rank of the state covariance matrix, or the number of sources of innovations in the component model - measurement_error: bool - Whether the observation associated with the component has measurement error. Default is False. - combine_hidden_states: bool - Flag for the ``extract_hidden_states_from_data`` method. When ``True``, hidden states from the component model - are extracted as ``hidden_states[:, np.flatnonzero(Z)]``. Should be True in models where hidden states - individually have no interpretation, such as seasonal or autoregressive components. - """ - - def __init__( - self, - name, - k_endog, - k_states, - k_posdef, - state_names=None, - data_names=None, - shock_names=None, - param_names=None, - exog_names=None, - representation: PytensorRepresentation | None = None, - measurement_error=False, - combine_hidden_states=True, - component_from_sum=False, - obs_state_idxs=None, - ): - self.name = name - self.k_endog = k_endog - self.k_states = k_states - self.k_posdef = k_posdef - self.measurement_error = measurement_error - - self.state_names = state_names if state_names is not None else [] - self.data_names = data_names if data_names is not None else [] - self.shock_names = shock_names if shock_names is not None else [] - self.param_names = param_names if param_names is not None else [] - self.exog_names = exog_names if exog_names is not None else [] - - self.needs_exog_data = len(self.exog_names) > 0 - self.coords = {} - self.param_dims = {} - - self.param_info = {} - self.data_info = {} - - self.param_counts = {} - - if representation is None: - self.ssm = PytensorRepresentation(k_endog=k_endog, k_states=k_states, k_posdef=k_posdef) - else: - self.ssm = representation - - self._name_to_variable = {} - self._name_to_data = {} - - if not component_from_sum: - self.populate_component_properties() - self.make_symbolic_graph() - - self._component_info = { - self.name: { - "k_states": self.k_states, - "k_enodg": self.k_endog, - "k_posdef": self.k_posdef, - "combine_hidden_states": combine_hidden_states, - "obs_state_idx": obs_state_idxs, - } - } - - def make_and_register_variable(self, name, shape, dtype=floatX) -> Variable: - r""" - Helper function to create a pytensor symbolic variable and register it in the _name_to_variable dictionary - - Parameters - ---------- - name : str - The name of the placeholder variable. Must be the name of a model parameter. - shape : int or tuple of int - Shape of the parameter - dtype : str, default pytensor.config.floatX - dtype of the parameter - - Notes - ----- - Symbolic pytensor variables are used in the ``make_symbolic_graph`` method as placeholders for PyMC random - variables. The change is made in the ``_insert_random_variables`` method via ``pytensor.graph_replace``. To - make the change, a dictionary mapping pytensor variables to PyMC random variables needs to be constructed. - - The purpose of this method is to: - 1. Create the placeholder symbolic variables - 2. Register the placeholder variable in the ``_name_to_variable`` dictionary - - The shape provided here will define the shape of the prior that will need to be provided by the user. - - An error is raised if the provided name has already been registered, or if the name is not present in the - ``param_names`` property. - """ - if name not in self.param_names: - raise ValueError( - f"{name} is not a model parameter. All placeholder variables should correspond to model " - f"parameters." - ) - - if name in self._name_to_variable.keys(): - raise ValueError( - f"{name} is already a registered placeholder variable with shape " - f"{self._name_to_variable[name].type.shape}" - ) - - placeholder = pt.tensor(name, shape=shape, dtype=dtype) - self._name_to_variable[name] = placeholder - return placeholder - - def make_and_register_data(self, name, shape, dtype=floatX) -> Variable: - r""" - Helper function to create a pytensor symbolic variable and register it in the _name_to_data dictionary - - Parameters - ---------- - name : str - The name of the placeholder data. Must be the name of an expected data variable. - shape : int or tuple of int - Shape of the parameter - dtype : str, default pytensor.config.floatX - dtype of the parameter - - Notes - ----- - See docstring for make_and_register_variable for more details. This function is similar, but handles data - inputs instead of model parameters. - - An error is raised if the provided name has already been registered, or if the name is not present in the - ``data_names`` property. - """ - if name not in self.data_names: - raise ValueError( - f"{name} is not a model parameter. All placeholder variables should correspond to model " - f"parameters." - ) - - if name in self._name_to_data.keys(): - raise ValueError( - f"{name} is already a registered placeholder variable with shape " - f"{self._name_to_data[name].type.shape}" - ) - - placeholder = pt.tensor(name, shape=shape, dtype=dtype) - self._name_to_data[name] = placeholder - return placeholder - - def make_symbolic_graph(self) -> None: - raise NotImplementedError - - def populate_component_properties(self): - raise NotImplementedError - - def _get_combined_shapes(self, other): - k_states = self.k_states + other.k_states - k_posdef = self.k_posdef + other.k_posdef - if self.k_endog != other.k_endog: - raise NotImplementedError( - "Merging elements with different numbers of observed states is not supported.>" - ) - k_endog = self.k_endog - - return k_states, k_posdef, k_endog - - def _combine_statespace_representations(self, other): - def make_slice(name, x, o_x): - ndim = max(x.ndim, o_x.ndim) - return (name,) + (slice(None, None, None),) * ndim - - k_states, k_posdef, k_endog = self._get_combined_shapes(other) - - self_matrices = [self.ssm[name] for name in LONG_MATRIX_NAMES] - other_matrices = [other.ssm[name] for name in LONG_MATRIX_NAMES] - - x0, P0, c, d, T, Z, R, H, Q = ( - self.ssm[make_slice(name, x, o_x)] - for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) - ) - o_x0, o_P0, o_c, o_d, o_T, o_Z, o_R, o_H, o_Q = ( - other.ssm[make_slice(name, x, o_x)] - for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) - ) - - initial_state = pt.concatenate(conform_time_varying_and_time_invariant_matrices(x0, o_x0)) - initial_state.name = x0.name - - initial_state_cov = pt.linalg.block_diag(P0, o_P0) - initial_state_cov.name = P0.name - - state_intercept = pt.concatenate(conform_time_varying_and_time_invariant_matrices(c, o_c)) - state_intercept.name = c.name - - obs_intercept = d + o_d - obs_intercept.name = d.name - - transition = pt.linalg.block_diag(T, o_T) - transition.name = T.name - - design = pt.concatenate(conform_time_varying_and_time_invariant_matrices(Z, o_Z), axis=-1) - design.name = Z.name - - selection = pt.linalg.block_diag(R, o_R) - selection.name = R.name - - obs_cov = H + o_H - obs_cov.name = H.name - - state_cov = pt.linalg.block_diag(Q, o_Q) - state_cov.name = Q.name - - new_ssm = PytensorRepresentation( - k_endog=k_endog, - k_states=k_states, - k_posdef=k_posdef, - initial_state=initial_state, - initial_state_cov=initial_state_cov, - state_intercept=state_intercept, - obs_intercept=obs_intercept, - transition=transition, - design=design, - selection=selection, - obs_cov=obs_cov, - state_cov=state_cov, - ) - - return new_ssm - - def _combine_property(self, other, name): - self_prop = getattr(self, name) - if isinstance(self_prop, list): - return self_prop + getattr(other, name) - elif isinstance(self_prop, dict): - new_prop = self_prop.copy() - new_prop.update(getattr(other, name)) - return new_prop - - def _combine_component_info(self, other): - combined_info = {} - for key, value in self._component_info.items(): - if not key.startswith("StateSpace"): - if key in combined_info.keys(): - raise ValueError(f"Found duplicate component named {key}") - combined_info[key] = value - - for key, value in other._component_info.items(): - if not key.startswith("StateSpace"): - if key in combined_info.keys(): - raise ValueError(f"Found duplicate component named {key}") - combined_info[key] = value - - return combined_info - - def _make_combined_name(self): - components = self._component_info.keys() - name = f'StateSpace[{", ".join(components)}]' - return name - - def __add__(self, other): - state_names = self._combine_property(other, "state_names") - data_names = self._combine_property(other, "data_names") - param_names = self._combine_property(other, "param_names") - shock_names = self._combine_property(other, "shock_names") - param_info = self._combine_property(other, "param_info") - data_info = self._combine_property(other, "data_info") - param_dims = self._combine_property(other, "param_dims") - coords = self._combine_property(other, "coords") - exog_names = self._combine_property(other, "exog_names") - - _name_to_variable = self._combine_property(other, "_name_to_variable") - _name_to_data = self._combine_property(other, "_name_to_data") - - measurement_error = any([self.measurement_error, other.measurement_error]) - - k_states, k_posdef, k_endog = self._get_combined_shapes(other) - ssm = self._combine_statespace_representations(other) - - new_comp = Component( - name="", - k_endog=1, - k_states=k_states, - k_posdef=k_posdef, - measurement_error=measurement_error, - representation=ssm, - component_from_sum=True, - ) - new_comp._component_info = self._combine_component_info(other) - new_comp.name = new_comp._make_combined_name() - - names_and_props = [ - ("state_names", state_names), - ("data_names", data_names), - ("param_names", param_names), - ("shock_names", shock_names), - ("param_dims", param_dims), - ("coords", coords), - ("param_dims", param_dims), - ("param_info", param_info), - ("data_info", data_info), - ("exog_names", exog_names), - ("_name_to_variable", _name_to_variable), - ("_name_to_data", _name_to_data), - ] - - for prop, value in names_and_props: - setattr(new_comp, prop, value) - - return new_comp - - def build( - self, name=None, filter_type="standard", verbose=True, mode: str | Mode | None = None - ): - """ - Build a StructuralTimeSeries statespace model from the current component(s) - - Parameters - ---------- - name: str, optional - Name of the exogenous data being modeled. Default is "data" - - filter_type : str, optional - The type of Kalman filter to use. Valid options are "standard", "univariate", "single", "cholesky", and - "steady_state". For more information, see the docs for each filter. Default is "standard". - - verbose : bool, optional - If True, displays information about the initialized model. Defaults to True. - - mode: str or Mode, optional - Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and - ``forecast``. The mode does **not** effect calls to ``pm.sample``. - - Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument - to all sampling methods. - - Returns - ------- - PyMCStateSpace - An initialized instance of a PyMCStateSpace, constructed using the system matrices contained in the - components. - """ - - return StructuralTimeSeries( - self.ssm, - name=name, - state_names=self.state_names, - data_names=self.data_names, - shock_names=self.shock_names, - param_names=self.param_names, - param_dims=self.param_dims, - coords=self.coords, - param_info=self.param_info, - data_info=self.data_info, - component_info=self._component_info, - measurement_error=self.measurement_error, - exog_names=self.exog_names, - name_to_variable=self._name_to_variable, - name_to_data=self._name_to_data, - filter_type=filter_type, - verbose=verbose, - mode=mode, - ) - - -class LevelTrendComponent(Component): - r""" - Level and trend component of a structural time series model - - Parameters - ---------- - __________ - order : int - - Number of time derivatives of the trend to include in the model. For example, when order=3, the trend will - be of the form ``y = a + b * t + c * t ** 2``, where the coefficients ``a, b, c`` come from the initial - state values. - - innovations_order : int or sequence of int, optional - - The number of stochastic innovations to include in the model. By default, ``innovations_order = order`` - - Notes - ----- - This class implements the level and trend components of the general structural time series model. In the most - general form, the level and trend is described by a system of two time-varying equations. - - .. math:: - \begin{align} - \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ - \nu_{t+1} &= \nu_t + \xi_t - \zeta_t &\sim N(0, \sigma_\zeta) \\ - \xi_t &\sim N(0, \sigma_\xi) - \end{align} - - Where :math:`\mu_{t+1}` is the mean of the timeseries at time t, and :math:`\nu_t` is the drift or the slope of - the process. When both innovations :math:`\zeta_t` and :math:`\xi_t` are included in the model, it is known as a - *local linear trend* model. This system of two equations, corresponding to ``order=2``, can be expanded or - contracted by adding or removing equations. ``order=3`` would add an acceleration term to the sytsem: - - .. math:: - \begin{align} - \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ - \nu_{t+1} &= \nu_t + \eta_t + \xi_t \\ - \eta_{t+1} &= \eta_{t-1} + \omega_t \\ - \zeta_t &\sim N(0, \sigma_\zeta) \\ - \xi_t &\sim N(0, \sigma_\xi) \\ - \omega_t &\sim N(0, \sigma_\omega) - \end{align} - - After setting all innovation terms to zero and defining initial states :math:`\mu_0, \nu_0, \eta_0`, these equations - can be collapsed to: - - .. math:: - \mu_t = \mu_0 + \nu_0 \cdot t + \eta_0 \cdot t^2 - - Which clarifies how the order and initial states influence the model. In particular, the initial states are the - coefficients on the intercept, slope, acceleration, and so on. - - In this light, allowing for innovations can be understood as allowing these coefficients to vary over time. Each - component can be individually selected for time variation by passing a list to the ``innovations_order`` argument. - For example, a constant intercept with time varying trend and acceleration is specified as ``order=3, - innovations_order=[0, 1, 1]``. - - By choosing the ``order`` and ``innovations_order``, a large variety of models can be obtained. Notable - models include: - - * Constant intercept, ``order=1, innovations_order=0`` - - .. math:: - \mu_t = \mu - - * Constant linear slope, ``order=2, innovations_order=0`` - - .. math:: - \mu_t = \mu_{t-1} + \nu - - * Gaussian Random Walk, ``order=1, innovations_order=1`` - - .. math:: - \mu_t = \mu_{t-1} + \zeta_t - - * Gaussian Random Walk with Drift, ``order=2, innovations_order=1`` - - .. math:: - \mu_t = \mu_{t-1} + \nu + \zeta_t - - * Smooth Trend, ``order=2, innovations_order=[0, 1]`` - - .. math:: - \begin{align} - \mu_t &= \mu_{t-1} + \nu_{t-1} \\ - \nu_t &= \nu_{t-1} + \xi_t - \end{align} - - * Local Level, ``order=2, innovations_order=2`` - - [1] notes that the smooth trend model produces more gradually changing slopes than the full local linear trend - model, and is equivalent to an "integrated trend model". - - References - ---------- - .. [1] Durbin, James, and Siem Jan Koopman. 2012. - Time Series Analysis by State Space Methods: Second Edition. - Oxford University Press. - - """ - - def __init__( - self, - order: int | list[int] = 2, - innovations_order: int | list[int] | None = None, - name: str = "LevelTrend", - ): - if innovations_order is None: - innovations_order = order - - self._order_mask = order_to_mask(order) - max_state = np.flatnonzero(self._order_mask)[-1].item() + 1 - - # If the user passes excess zeros, raise an error. The alternative is to prune them, but this would cause - # the shape of the state to be different to what the user expects. - if len(self._order_mask) > max_state: - raise ValueError( - f"order={order} is invalid. The highest derivative should not be set to zero. If you want a " - f"lower order model, explicitly omit the zeros." - ) - k_states = max_state - - if isinstance(innovations_order, int): - n = innovations_order - innovations_order = order_to_mask(k_states) - if n > 0: - innovations_order[n:] = False - else: - innovations_order[:] = False - else: - innovations_order = order_to_mask(innovations_order) - - self.innovations_order = innovations_order[:max_state] - k_posdef = int(sum(innovations_order)) - - super().__init__( - name, - k_endog=1, - k_states=k_states, - k_posdef=k_posdef, - measurement_error=False, - combine_hidden_states=False, - obs_state_idxs=np.array([1.0] + [0.0] * (k_states - 1)), - ) - - def populate_component_properties(self): - name_slice = POSITION_DERIVATIVE_NAMES[: self.k_states] - self.param_names = ["initial_trend"] - self.state_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] - self.param_dims = {"initial_trend": ("trend_state",)} - self.coords = {"trend_state": self.state_names} - self.param_info = {"initial_trend": {"shape": (self.k_states,), "constraints": None}} - - if self.k_posdef > 0: - self.param_names += ["sigma_trend"] - self.shock_names = [ - name for name, mask in zip(name_slice, self.innovations_order) if mask - ] - self.param_dims["sigma_trend"] = ("trend_shock",) - self.coords["trend_shock"] = self.shock_names - self.param_info["sigma_trend"] = {"shape": (self.k_posdef,), "constraints": "Positive"} - - for name in self.param_names: - self.param_info[name]["dims"] = self.param_dims[name] - - def make_symbolic_graph(self) -> None: - initial_trend = self.make_and_register_variable("initial_trend", shape=(self.k_states,)) - self.ssm["initial_state", :] = initial_trend - triu_idx = np.triu_indices(self.k_states) - self.ssm[np.s_["transition", triu_idx[0], triu_idx[1]]] = 1 - - R = np.eye(self.k_states) - R = R[:, self.innovations_order] - self.ssm["selection", :, :] = R - - self.ssm["design", 0, :] = np.array([1.0] + [0.0] * (self.k_states - 1)) - - if self.k_posdef > 0: - sigma_trend = self.make_and_register_variable("sigma_trend", shape=(self.k_posdef,)) - diag_idx = np.diag_indices(self.k_posdef) - idx = np.s_["state_cov", diag_idx[0], diag_idx[1]] - self.ssm[idx] = sigma_trend**2 - - -class MeasurementError(Component): - r""" - Measurement error term for a structural timeseries model - - Parameters - ---------- - name: str, optional - - Name of the observed data. Default is "obs". - - Notes - ----- - This component should only be used in combination with other components, because it has no states. It's only use - is to add a variance parameter to the model, associated with the observation noise matrix H. - - Examples - -------- - Create and estimate a deterministic linear trend with measurement error - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - - trend = st.LevelTrendComponent(order=2, innovations_order=0) - error = st.MeasurementError() - ss_mod = (trend + error).build() - - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) - sigma_obs = pm.Exponential('sigma_obs', 1, dims=ss_mod.param_dims['sigma_obs']) - - ss_mod.build_statespace_graph(data) - idata = pm.sample(nuts_sampler='numpyro') - """ - - def __init__(self, name: str = "MeasurementError"): - k_endog = 1 - k_states = 0 - k_posdef = 0 - - super().__init__( - name, k_endog, k_states, k_posdef, measurement_error=True, combine_hidden_states=False - ) - - def populate_component_properties(self): - self.param_names = [f"sigma_{self.name}"] - self.param_dims = {} - self.param_info = { - f"sigma_{self.name}": { - "shape": (), - "constraints": "Positive", - "dims": None, - } - } - - def make_symbolic_graph(self) -> None: - sigma_shape = () - error_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=sigma_shape) - diag_idx = np.diag_indices(self.k_endog) - idx = np.s_["obs_cov", diag_idx[0], diag_idx[1]] - self.ssm[idx] = error_sigma**2 - - -class AutoregressiveComponent(Component): - r""" - Autoregressive timeseries component - - Parameters - ---------- - order: int or sequence of int - - If int, the number of lags to include in the model. - If a sequence, an array-like of zeros and ones indicating which lags to include in the model. - - Notes - ----- - An autoregressive component can be thought of as a way o introducing serially correlated errors into the model. - The process is modeled: - - .. math:: - x_t = \sum_{i=1}^p \rho_i x_{t-i} - - Where ``p``, the number of autoregressive terms to model, is the order of the process. By default, all lags up to - ``p`` are included in the model. To disable lags, pass a list of zeros and ones to the ``order`` argumnet. For - example, ``order=[1, 1, 0, 1]`` would become: - - .. math:: - x_t = \rho_1 x_{t-1} + \rho_2 x_{t-1} + \rho_4 x_{t-1} - - The coefficient :math:`\rho_3` has been constrained to zero. - - .. warning:: This class is meant to be used as a component in a structural time series model. For modeling of - stationary processes with ARIMA, use ``statespace.BayesianSARIMA``. - - Examples - -------- - Model a timeseries as an AR(2) process with non-zero mean: - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - - trend = st.LevelTrendComponent(order=1, innovations_order=0) - ar = st.AutoregressiveComponent(2) - ss_mod = (trend + ar).build() - - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) - ar_params = pm.Normal('ar_params', dims=ss_mod.param_dims['ar_params']) - sigma_ar = pm.Exponential('sigma_ar', 1, dims=ss_mod.param_dims['sigma_ar']) - - ss_mod.build_statespace_graph(data) - idata = pm.sample(nuts_sampler='numpyro') - - """ - - def __init__(self, order: int = 1, name: str = "AutoRegressive"): - order = order_to_mask(order) - ar_lags = np.flatnonzero(order).ravel().astype(int) + 1 - k_states = len(order) - - self.order = order - self.ar_lags = ar_lags - - super().__init__( - name=name, - k_endog=1, - k_states=k_states, - k_posdef=1, - measurement_error=True, - combine_hidden_states=True, - obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], - ) - - def populate_component_properties(self): - self.state_names = [f"L{i + 1}.data" for i in range(self.k_states)] - self.shock_names = [f"{self.name}_innovation"] - self.param_names = ["ar_params", "sigma_ar"] - self.param_dims = {"ar_params": (AR_PARAM_DIM,)} - self.coords = {AR_PARAM_DIM: self.ar_lags.tolist()} - - self.param_info = { - "ar_params": { - "shape": (self.k_states,), - "constraints": None, - "dims": (AR_PARAM_DIM,), - }, - "sigma_ar": {"shape": (), "constraints": "Positive", "dims": None}, - } - - def make_symbolic_graph(self) -> None: - k_nonzero = int(sum(self.order)) - ar_params = self.make_and_register_variable("ar_params", shape=(k_nonzero,)) - sigma_ar = self.make_and_register_variable("sigma_ar", shape=()) - - T = np.eye(self.k_states, k=-1) - self.ssm["transition", :, :] = T - self.ssm["selection", 0, 0] = 1 - self.ssm["design", 0, 0] = 1 - - ar_idx = ("transition", np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) - self.ssm[ar_idx] = ar_params - - cov_idx = ("state_cov", *np.diag_indices(1)) - self.ssm[cov_idx] = sigma_ar**2 - - -class TimeSeasonality(Component): - r""" - Seasonal component, modeled in the time domain - - Parameters - ---------- - season_length: int - The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for - daily data with weekly seasonal pattern, etc. - - innovations: bool, default True - Whether to include stochastic innovations in the strength of the seasonal effect - - name: str, default None - A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal - components are included in the same model. Default is ``f"Seasonal[s={season_length}]"`` - - state_names: list of str, default None - List of strings for seasonal effect labels. If provided, it must be of length ``season_length``. An example - would be ``state_names = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']`` when data is daily with a weekly - seasonal pattern (``season_length = 7``). - - If None, states will be numbered ``[State_0, ..., State_s]`` - - remove_first_state: bool, default True - If True, the first state will be removed from the model. This is done because there are only n-1 degrees of - freedom in the seasonal component, and one state is not identified. If False, the first state will be - included in the model, but it will not be identified -- you will need to handle this in the priors (e.g. with - ZeroSumNormal). - - Notes - ----- - A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to - model seasonal effects, the implementation used here is the one described by [1] as the "canonical" time domain - representation. The seasonal component can be expressed: - - .. math:: - \gamma_t = -\sum_{i=1}^{s-1} \gamma_{t-i} + \omega_t, \quad \omega_t \sim N(0, \sigma_\gamma) - - Where :math:`s` is the ``seasonal_length`` parameter and :math:`\omega_t` is the (optional) stochastic innovation. - To give interpretation to the :math:`\gamma` terms, it is helpful to work through the algebra for a simple - example. Let :math:`s=4`, and omit the shock term. Define initial conditions :math:`\gamma_0, \gamma_{-1}, - \gamma_{-2}`. The value of the seasonal component for the first 5 timesteps will be: - - .. math:: - \begin{align} - \gamma_1 &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ - \gamma_2 &= -\gamma_1 - \gamma_0 - \gamma_{-1} \\ - &= -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 - \gamma_{-1} \\ - &= (\gamma_0 - \gamma_0 )+ (\gamma_{-1} - \gamma_{-1}) + \gamma_{-2} \\ - &= \gamma_{-2} \\ - \gamma_3 &= -\gamma_2 - \gamma_1 - \gamma_0 \\ - &= -\gamma_{-2} - (-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 \\ - &= (\gamma_{-2} - \gamma_{-2}) + \gamma_{-1} + (\gamma_0 - \gamma_0) \\ - &= \gamma_{-1} \\ - \gamma_4 &= -\gamma_3 - \gamma_2 - \gamma_1 \\ - &= -\gamma_{-1} - \gamma_{-2} -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) \\ - &= (\gamma_{-2} - \gamma_{-2}) + (\gamma_{-1} - \gamma_{-1}) + \gamma_0 \\ - &= \gamma_0 \\ - \gamma_5 &= -\gamma_4 - \gamma_3 - \gamma_2 \\ - &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ - &= \gamma_1 - \end{align} - - This exercise shows that, given a list ``initial_conditions`` of length ``s-1``, the effects of this model will be: - - - Period 1: ``-sum(initial_conditions)`` - - Period 2: ``initial_conditions[-1]`` - - Period 3: ``initial_conditions[-2]`` - - ... - - Period s: ``initial_conditions[0]`` - - Period s+1: ``-sum(initial_condition)`` - - And so on. So for interpretation, the ``season_length - 1`` initial states are, when reversed, the coefficients - associated with ``state_names[1:]``. - - .. warning:: - Although the ``state_names`` argument expects a list of length ``season_length``, only ``state_names[1:]`` - will be saved as model dimensions, since the 1st coefficient is not identified (it is defined as - :math:`-\sum_{i=1}^{s} \gamma_{t-i}`). - - Examples - -------- - Estimate monthly with a model with a gaussian random walk trend and monthly seasonality: - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - import pandas as pd - - # Get month names - state_names = pd.date_range('1900-01-01', '1900-12-31', freq='MS').month_name().tolist() - - # Build the structural model - grw = st.LevelTrendComponent(order=1, innovations_order=1) - annual_season = st.TimeSeasonality(season_length=12, name='annual', state_names=state_names, innovations=False) - ss_mod = (grw + annual_season).build() - - # Estimate with PyMC - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Deterministic('initial_trend', pt.zeros(1), dims=ss_mod.param_dims['initial_trend']) - annual_coefs = pm.Normal('annual_coefs', sigma=1e-2, dims=ss_mod.param_dims['annual_coefs']) - trend_sigmas = pm.HalfNormal('trend_sigmas', sigma=1e-6, dims=ss_mod.param_dims['trend_sigmas']) - ss_mod.build_statespace_graph(data) - idata = pm.sample(nuts_sampler='numpyro') - - References - ---------- - .. [1] Durbin, James, and Siem Jan Koopman. 2012. - Time Series Analysis by State Space Methods: Second Edition. - Oxford University Press. - """ - - def __init__( - self, - season_length: int, - innovations: bool = True, - name: str | None = None, - state_names: list | None = None, - remove_first_state: bool = True, - ): - if name is None: - name = f"Seasonal[s={season_length}]" - if state_names is None: - state_names = [f"{name}_{i}" for i in range(season_length)] - else: - if len(state_names) != season_length: - raise ValueError( - f"state_names must be a list of length season_length, got {len(state_names)}" - ) - state_names = state_names.copy() - self.innovations = innovations - self.remove_first_state = remove_first_state - - if self.remove_first_state: - # In traditional models, the first state isn't identified, so we can help out the user by automatically - # discarding it. - # TODO: Can this be stashed and reconstructed automatically somehow? - state_names.pop(0) - - k_states = season_length - int(self.remove_first_state) - - super().__init__( - name=name, - k_endog=1, - k_states=k_states, - k_posdef=int(innovations), - state_names=state_names, - measurement_error=False, - combine_hidden_states=True, - obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], - ) - - def populate_component_properties(self): - self.param_names = [f"{self.name}_coefs"] - self.param_info = { - f"{self.name}_coefs": { - "shape": (self.k_states,), - "constraints": None, - "dims": (f"{self.name}_state",), - } - } - self.param_dims = {f"{self.name}_coefs": (f"{self.name}_state",)} - self.coords = {f"{self.name}_state": self.state_names} - - if self.innovations: - self.param_names += [f"sigma_{self.name}"] - self.param_info[f"sigma_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": None, - } - self.shock_names = [f"{self.name}"] - - def make_symbolic_graph(self) -> None: - if self.remove_first_state: - # In this case, parameters are normalized to sum to zero, so the current state is the negative sum of - # all previous states. - T = np.eye(self.k_states, k=-1) - T[0, :] = -1 - else: - # In this case we assume the user to be responsible for ensuring the states sum to zero, so T is just a - # circulant matrix that cycles between the states. - T = np.eye(self.k_states, k=1) - T[-1, 0] = 1 - - self.ssm["transition", :, :] = T - self.ssm["design", 0, 0] = 1 - - initial_states = self.make_and_register_variable( - f"{self.name}_coefs", shape=(self.k_states,) - ) - self.ssm["initial_state", np.arange(self.k_states, dtype=int)] = initial_states - - if self.innovations: - self.ssm["selection", 0, 0] = 1 - season_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - cov_idx = ("state_cov", *np.diag_indices(1)) - self.ssm[cov_idx] = season_sigma**2 - - -class FrequencySeasonality(Component): - r""" - Seasonal component, modeled in the frequency domain - - Parameters - ---------- - season_length: float - The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for - daily data with weekly seasonal pattern, etc. Non-integer seasonal_length is also permitted, for example - 365.2422 days in a (solar) year. - - n: int - Number of fourier features to include in the seasonal component. Default is ``season_length // 2``, which - is the maximum possible. A smaller number can be used for a more wave-like seasonal pattern. - - name: str, default None - A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal - components are included in the same model. Default is ``f"Seasonal[s={season_length}, n={n}]"`` - - innovations: bool, default True - Whether to include stochastic innovations in the strength of the seasonal effect - - Notes - ----- - A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to - model seasonal effects, the implementation used here is the one described by [1] as the "canonical" frequency domain - representation. The seasonal component can be expressed: - - .. math:: - \begin{align} - \gamma_t &= \sum_{j=1}^{2n} \gamma_{j,t} \\ - \gamma_{j, t+1} &= \gamma_{j,t} \cos \lambda_j + \gamma_{j,t}^\star \sin \lambda_j + \omega_{j, t} \\ - \gamma_{j, t}^\star &= -\gamma_{j,t} \sin \lambda_j + \gamma_{j,t}^\star \cos \lambda_j + \omega_{j,t}^\star - \lambda_j &= \frac{2\pi j}{s} - \end{align} - - Where :math:`s` is the ``seasonal_length``. - - Unlike a ``TimeSeasonality`` component, a ``FrequencySeasonality`` component does not require integer season - length. In addition, for long seasonal periods, it is possible to obtain a more compact state space representation - by choosing ``n << s // 2``. Using ``TimeSeasonality``, an annual seasonal pattern in daily data requires 364 - states, whereas ``FrequencySeasonality`` always requires ``2 * n`` states, regardless of the ``seasonal_length``. - The price of this compactness is less representational power. At ``n = 1``, the seasonal pattern will be a pure - sine wave. At ``n = s // 2``, any arbitrary pattern can be represented. - - One cost of the added flexibility of ``FrequencySeasonality`` is reduced interpretability. States of this model are - coefficients :math:`\gamma_1, \gamma^\star_1, \gamma_2, \gamma_2^\star ..., \gamma_n, \gamma^\star_n` associated - with different frequencies in the fourier representation of the seasonal pattern. As a result, it is not possible - to isolate and identify a "Monday" effect, for instance. - """ - - def __init__(self, season_length, n=None, name=None, innovations=True): - if n is None: - n = int(season_length // 2) - if name is None: - name = f"Frequency[s={season_length}, n={n}]" - - k_states = n * 2 - self.n = n - self.season_length = season_length - self.innovations = innovations - - # If the model is completely saturated (n = s // 2), the last state will not be identified, so it shouldn't - # get a parameter assigned to it and should just be fixed to zero. - # Test this way (rather than n == s // 2) to catch cases when n is non-integer. - self.last_state_not_identified = self.season_length / self.n == 2.0 - self.n_coefs = k_states - int(self.last_state_not_identified) - - obs_state_idx = np.zeros(k_states) - obs_state_idx[slice(0, k_states, 2)] = 1 - - super().__init__( - name=name, - k_endog=1, - k_states=k_states, - k_posdef=k_states * int(self.innovations), - measurement_error=False, - combine_hidden_states=True, - obs_state_idxs=obs_state_idx, - ) - - def make_symbolic_graph(self) -> None: - self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 - - init_state = self.make_and_register_variable(f"{self.name}", shape=(self.n_coefs,)) - - init_state_idx = np.arange(self.n_coefs, dtype=int) - self.ssm["initial_state", init_state_idx] = init_state - - T_mats = [_frequency_transition_block(self.season_length, j + 1) for j in range(self.n)] - T = pt.linalg.block_diag(*T_mats) - self.ssm["transition", :, :] = T - - if self.innovations: - sigma_season = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_season**2 - self.ssm["selection", :, :] = np.eye(self.k_states) - - def populate_component_properties(self): - self.state_names = [f"{self.name}_{f}_{i}" for i in range(self.n) for f in ["Cos", "Sin"]] - self.param_names = [f"{self.name}"] - - self.param_dims = {self.name: (f"{self.name}_state",)} - self.param_info = { - f"{self.name}": { - "shape": (self.k_states - int(self.last_state_not_identified),), - "constraints": None, - "dims": (f"{self.name}_state",), - } - } - - init_state_idx = np.arange(self.k_states, dtype=int) - if self.last_state_not_identified: - init_state_idx = init_state_idx[:-1] - self.coords = {f"{self.name}_state": [self.state_names[i] for i in init_state_idx]} - - if self.innovations: - self.shock_names = self.state_names.copy() - self.param_names += [f"sigma_{self.name}"] - self.param_info[f"sigma_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": None, - } - - -class CycleComponent(Component): - r""" - A component for modeling longer-term cyclical effects - - Parameters - ---------- - name: str - Name of the component. Used in generated coordinates and state names. If None, a descriptive name will be - used. - - cycle_length: int, optional - The length of the cycle, in the calendar units of your data. For example, if your data is monthly, and you - want to model a 12-month cycle, use ``cycle_length=12``. You cannot specify both ``cycle_length`` and - ``estimate_cycle_length``. - - estimate_cycle_length: bool, default False - Whether to estimate the cycle length. If True, an additional parameter, ``cycle_length`` will be added to the - model. You cannot specify both ``cycle_length`` and ``estimate_cycle_length``. - - dampen: bool, default False - Whether to dampen the cycle by multiplying by a dampening factor :math:`\rho` at every timestep. If true, - an additional parameter, ``dampening_factor`` will be added to the model. - - innovations: bool, default True - Whether to include stochastic innovations in the strength of the seasonal effect. If True, an additional - parameter, ``sigma_{name}`` will be added to the model. - - Notes - ----- - The cycle component is very similar in implementation to the frequency domain seasonal component, expect that it - is restricted to n=1. The cycle component can be expressed: - - .. math:: - \begin{align} - \gamma_t &= \rho \gamma_{t-1} \cos \lambda + \rho \gamma_{t-1}^\star \sin \lambda + \omega_{t} \\ - \gamma_{t}^\star &= -\rho \gamma_{t-1} \sin \lambda + \rho \gamma_{t-1}^\star \cos \lambda + \omega_{t}^\star \\ - \lambda &= \frac{2\pi}{s} - \end{align} - - Where :math:`s` is the ``cycle_length``. [1] recommend that this component be used for longer term cyclical - effects, such as business cycles, and that the seasonal component be used for shorter term effects, such as - weekly or monthly seasonality. - - Unlike a FrequencySeasonality component, the length of a CycleComponent can be estimated. - - Examples - -------- - Estimate a business cycle with length between 6 and 12 years: - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - import pandas as pd - import numpy as np - - data = np.random.normal(size=(100, 1)) - - # Build the structural model - grw = st.LevelTrendComponent(order=1, innovations_order=1) - cycle = st.CycleComponent('business_cycle', estimate_cycle_length=True, dampen=False) - ss_mod = (grw + cycle).build() - - # Estimate with PyMC - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states), dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Normal('initial_trend', dims=ss_mod.param_dims['initial_trend']) - sigma_trend = pm.HalfNormal('sigma_trend', dims=ss_mod.param_dims['sigma_trend']) - - cycle_strength = pm.Normal('business_cycle') - cycle_length = pm.Uniform('business_cycle_length', lower=6, upper=12) - - sigma_cycle = pm.HalfNormal('sigma_business_cycle', sigma=1) - ss_mod.build_statespace_graph(data) - - idata = pm.sample(nuts_sampler='numpyro') - - References - ---------- - .. [1] Durbin, James, and Siem Jan Koopman. 2012. - Time Series Analysis by State Space Methods: Second Edition. - Oxford University Press. - """ - - def __init__( - self, - name: str | None = None, - cycle_length: int | None = None, - estimate_cycle_length: bool = False, - dampen: bool = False, - innovations: bool = True, - ): - if cycle_length is None and not estimate_cycle_length: - raise ValueError("Must specify cycle_length if estimate_cycle_length is False") - if cycle_length is not None and estimate_cycle_length: - raise ValueError("Cannot specify cycle_length if estimate_cycle_length is True") - if name is None: - cycle = int(cycle_length) if cycle_length is not None else "Estimate" - name = f"Cycle[s={cycle}, dampen={dampen}, innovations={innovations}]" - - self.estimate_cycle_length = estimate_cycle_length - self.cycle_length = cycle_length - self.innovations = innovations - self.dampen = dampen - self.n_coefs = 1 - - k_states = 2 - k_endog = 1 - k_posdef = 2 - - obs_state_idx = np.zeros(k_states) - obs_state_idx[slice(0, k_states, 2)] = 1 - - super().__init__( - name=name, - k_endog=k_endog, - k_states=k_states, - k_posdef=k_posdef, - measurement_error=False, - combine_hidden_states=True, - obs_state_idxs=obs_state_idx, - ) - - def make_symbolic_graph(self) -> None: - self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 - self.ssm["selection", :, :] = np.eye(self.k_states) - self.param_dims = {self.name: (f"{self.name}_state",)} - self.coords = {f"{self.name}_state": self.state_names} - - init_state = self.make_and_register_variable(f"{self.name}", shape=(self.k_states,)) - - self.ssm["initial_state", :] = init_state - - if self.estimate_cycle_length: - lamb = self.make_and_register_variable(f"{self.name}_length", shape=()) - else: - lamb = self.cycle_length - - if self.dampen: - rho = self.make_and_register_variable(f"{self.name}_dampening_factor", shape=()) - else: - rho = 1 - - T = rho * _frequency_transition_block(lamb, j=1) - self.ssm["transition", :, :] = T - - if self.innovations: - sigma_cycle = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_cycle**2 - - def populate_component_properties(self): - self.state_names = [f"{self.name}_{f}" for f in ["Cos", "Sin"]] - self.param_names = [f"{self.name}"] - - self.param_info = { - f"{self.name}": { - "shape": (2,), - "constraints": None, - "dims": (f"{self.name}_state",), - } - } - - if self.estimate_cycle_length: - self.param_names += [f"{self.name}_length"] - self.param_info[f"{self.name}_length"] = { - "shape": (), - "constraints": "Positive, non-zero", - "dims": None, - } - - if self.dampen: - self.param_names += [f"{self.name}_dampening_factor"] - self.param_info[f"{self.name}_dampening_factor"] = { - "shape": (), - "constraints": "0 < x ≤ 1", - "dims": None, - } - - if self.innovations: - self.param_names += [f"sigma_{self.name}"] - self.param_info[f"sigma_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": None, - } - self.shock_names = self.state_names.copy() - - -class RegressionComponent(Component): - def __init__( - self, - k_exog: int | None = None, - name: str | None = "Exogenous", - state_names: list[str] | None = None, - innovations=False, - ): - self.innovations = innovations - k_exog = self._handle_input_data(k_exog, state_names, name) - - k_states = k_exog - k_endog = 1 - k_posdef = k_exog - - super().__init__( - name=name, - k_endog=k_endog, - k_states=k_states, - k_posdef=k_posdef, - state_names=self.state_names, - measurement_error=False, - combine_hidden_states=False, - exog_names=[f"data_{name}"], - obs_state_idxs=np.ones(k_states), - ) - - @staticmethod - def _get_state_names(k_exog: int | None, state_names: list[str] | None, name: str): - if k_exog is None and state_names is None: - raise ValueError("Must specify at least one of k_exog or state_names") - if state_names is not None and k_exog is not None: - if len(state_names) != k_exog: - raise ValueError(f"Expected {k_exog} state names, found {len(state_names)}") - elif k_exog is None: - k_exog = len(state_names) - else: - state_names = [f"{name}_{i + 1}" for i in range(k_exog)] - - return k_exog, state_names - - def _handle_input_data(self, k_exog: int, state_names: list[str] | None, name) -> int: - k_exog, state_names = self._get_state_names(k_exog, state_names, name) - self.state_names = state_names - - return k_exog - - def make_symbolic_graph(self) -> None: - betas = self.make_and_register_variable(f"beta_{self.name}", shape=(self.k_states,)) - regression_data = self.make_and_register_data( - f"data_{self.name}", shape=(None, self.k_states) - ) - - self.ssm["initial_state", :] = betas - self.ssm["transition", :, :] = np.eye(self.k_states) - self.ssm["selection", :, :] = np.eye(self.k_states) - self.ssm["design"] = pt.expand_dims(regression_data, 1) - - if self.innovations: - sigma_beta = self.make_and_register_variable( - f"sigma_beta_{self.name}", (self.k_states,) - ) - row_idx, col_idx = np.diag_indices(self.k_states) - self.ssm["state_cov", row_idx, col_idx] = sigma_beta**2 - - def populate_component_properties(self) -> None: - self.shock_names = self.state_names - - self.param_names = [f"beta_{self.name}"] - self.data_names = [f"data_{self.name}"] - self.param_dims = { - f"beta_{self.name}": ("exog_state",), - } - - self.param_info = { - f"beta_{self.name}": { - "shape": (self.k_states,), - "constraints": None, - "dims": ("exog_state",), - }, - } - - self.data_info = { - f"data_{self.name}": { - "shape": (None, self.k_states), - "dims": (TIME_DIM, "exog_state"), - }, - } - self.coords = {"exog_state": self.state_names} - - if self.innovations: - self.param_names += [f"sigma_beta_{self.name}"] - self.param_dims[f"sigma_beta_{self.name}"] = "exog_state" - self.param_info[f"sigma_beta_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": ("exog_state",), - } diff --git a/pymc_extras/statespace/models/structural/__init__.py b/pymc_extras/statespace/models/structural/__init__.py new file mode 100644 index 000000000..57cb6d7ac --- /dev/null +++ b/pymc_extras/statespace/models/structural/__init__.py @@ -0,0 +1,21 @@ +from pymc_extras.statespace.models.structural.components.autoregressive import ( + AutoregressiveComponent, +) +from pymc_extras.statespace.models.structural.components.cycle import CycleComponent +from pymc_extras.statespace.models.structural.components.level_trend import LevelTrendComponent +from pymc_extras.statespace.models.structural.components.measurement_error import MeasurementError +from pymc_extras.statespace.models.structural.components.regression import RegressionComponent +from pymc_extras.statespace.models.structural.components.seasonality import ( + FrequencySeasonality, + TimeSeasonality, +) + +__all__ = [ + "LevelTrendComponent", + "MeasurementError", + "AutoregressiveComponent", + "TimeSeasonality", + "FrequencySeasonality", + "RegressionComponent", + "CycleComponent", +] diff --git a/pymc_extras/statespace/models/structural/components/__init__.py b/pymc_extras/statespace/models/structural/components/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pymc_extras/statespace/models/structural/components/autoregressive.py b/pymc_extras/statespace/models/structural/components/autoregressive.py new file mode 100644 index 000000000..0eca94295 --- /dev/null +++ b/pymc_extras/statespace/models/structural/components/autoregressive.py @@ -0,0 +1,122 @@ +import numpy as np + +from pymc_extras.statespace.models.structural.core import Component +from pymc_extras.statespace.models.structural.utils import order_to_mask +from pymc_extras.statespace.utils.constants import AR_PARAM_DIM + + +class AutoregressiveComponent(Component): + r""" + Autoregressive timeseries component + + Parameters + ---------- + order: int or sequence of int + + If int, the number of lags to include in the model. + If a sequence, an array-like of zeros and ones indicating which lags to include in the model. + + Notes + ----- + An autoregressive component can be thought of as a way o introducing serially correlated errors into the model. + The process is modeled: + + .. math:: + x_t = \sum_{i=1}^p \rho_i x_{t-i} + + Where ``p``, the number of autoregressive terms to model, is the order of the process. By default, all lags up to + ``p`` are included in the model. To disable lags, pass a list of zeros and ones to the ``order`` argumnet. For + example, ``order=[1, 1, 0, 1]`` would become: + + .. math:: + x_t = \rho_1 x_{t-1} + \rho_2 x_{t-1} + \rho_4 x_{t-1} + + The coefficient :math:`\rho_3` has been constrained to zero. + + .. warning:: This class is meant to be used as a component in a structural time series model. For modeling of + stationary processes with ARIMA, use ``statespace.BayesianSARIMA``. + + Examples + -------- + Model a timeseries as an AR(2) process with non-zero mean: + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + + trend = st.LevelTrendComponent(order=1, innovations_order=0) + ar = st.AutoregressiveComponent(2) + ss_mod = (trend + ar).build() + + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) + ar_params = pm.Normal('ar_params', dims=ss_mod.param_dims['ar_params']) + sigma_ar = pm.Exponential('sigma_ar', 1, dims=ss_mod.param_dims['sigma_ar']) + + ss_mod.build_statespace_graph(data) + idata = pm.sample(nuts_sampler='numpyro') + + """ + + def __init__( + self, + order: int = 1, + name: str = "AutoRegressive", + observed_state_names: list[str] | None = None, + ): + if observed_state_names is None: + observed_state_names = ["data"] + + order = order_to_mask(order) + ar_lags = np.flatnonzero(order).ravel().astype(int) + 1 + k_states = len(order) + k_posdef = k_endog = len(observed_state_names) + + self.order = order + self.ar_lags = ar_lags + + super().__init__( + name=name, + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + measurement_error=True, + combine_hidden_states=True, + observed_state_names=observed_state_names, + obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], + ) + + def populate_component_properties(self): + self.state_names = [f"L{i + 1}.data" for i in range(self.k_states)] + self.shock_names = [f"{self.name}_innovation"] + self.param_names = ["ar_params", "sigma_ar"] + self.param_dims = {"ar_params": (AR_PARAM_DIM,)} + self.coords = {AR_PARAM_DIM: self.ar_lags.tolist()} + + self.param_info = { + "ar_params": { + "shape": (self.k_states,), + "constraints": None, + "dims": (AR_PARAM_DIM,), + }, + "sigma_ar": {"shape": (), "constraints": "Positive", "dims": None}, + } + + def make_symbolic_graph(self) -> None: + k_nonzero = int(sum(self.order)) + ar_params = self.make_and_register_variable("ar_params", shape=(k_nonzero,)) + sigma_ar = self.make_and_register_variable("sigma_ar", shape=()) + + T = np.eye(self.k_states, k=-1) + self.ssm["transition", :, :] = T + self.ssm["selection", 0, 0] = 1 + self.ssm["design", 0, 0] = 1 + + ar_idx = ("transition", np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) + self.ssm[ar_idx] = ar_params + + cov_idx = ("state_cov", *np.diag_indices(1)) + self.ssm[cov_idx] = sigma_ar**2 diff --git a/pymc_extras/statespace/models/structural/components/cycle.py b/pymc_extras/statespace/models/structural/components/cycle.py new file mode 100644 index 000000000..4c0f4603f --- /dev/null +++ b/pymc_extras/statespace/models/structural/components/cycle.py @@ -0,0 +1,201 @@ +import numpy as np + +from pytensor import tensor as pt + +from pymc_extras.statespace.models.structural.core import Component +from pymc_extras.statespace.models.structural.utils import _frequency_transition_block + + +class CycleComponent(Component): + r""" + A component for modeling longer-term cyclical effects + + Parameters + ---------- + name: str + Name of the component. Used in generated coordinates and state names. If None, a descriptive name will be + used. + + cycle_length: int, optional + The length of the cycle, in the calendar units of your data. For example, if your data is monthly, and you + want to model a 12-month cycle, use ``cycle_length=12``. You cannot specify both ``cycle_length`` and + ``estimate_cycle_length``. + + estimate_cycle_length: bool, default False + Whether to estimate the cycle length. If True, an additional parameter, ``cycle_length`` will be added to the + model. You cannot specify both ``cycle_length`` and ``estimate_cycle_length``. + + dampen: bool, default False + Whether to dampen the cycle by multiplying by a dampening factor :math:`\rho` at every timestep. If true, + an additional parameter, ``dampening_factor`` will be added to the model. + + innovations: bool, default True + Whether to include stochastic innovations in the strength of the seasonal effect. If True, an additional + parameter, ``sigma_{name}`` will be added to the model. + + Notes + ----- + The cycle component is very similar in implementation to the frequency domain seasonal component, expect that it + is restricted to n=1. The cycle component can be expressed: + + .. math:: + \begin{align} + \gamma_t &= \rho \gamma_{t-1} \cos \lambda + \rho \gamma_{t-1}^\star \sin \lambda + \omega_{t} \\ + \gamma_{t}^\star &= -\rho \gamma_{t-1} \sin \lambda + \rho \gamma_{t-1}^\star \cos \lambda + \omega_{t}^\star \\ + \lambda &= \frac{2\pi}{s} + \end{align} + + Where :math:`s` is the ``cycle_length``. [1] recommend that this component be used for longer term cyclical + effects, such as business cycles, and that the seasonal component be used for shorter term effects, such as + weekly or monthly seasonality. + + Unlike a FrequencySeasonality component, the length of a CycleComponent can be estimated. + + Examples + -------- + Estimate a business cycle with length between 6 and 12 years: + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + import pandas as pd + import numpy as np + + data = np.random.normal(size=(100, 1)) + + # Build the structural model + grw = st.LevelTrendComponent(order=1, innovations_order=1) + cycle = st.CycleComponent('business_cycle', estimate_cycle_length=True, dampen=False) + ss_mod = (grw + cycle).build() + + # Estimate with PyMC + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states), dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Normal('initial_trend', dims=ss_mod.param_dims['initial_trend']) + sigma_trend = pm.HalfNormal('sigma_trend', dims=ss_mod.param_dims['sigma_trend']) + + cycle_strength = pm.Normal('business_cycle') + cycle_length = pm.Uniform('business_cycle_length', lower=6, upper=12) + + sigma_cycle = pm.HalfNormal('sigma_business_cycle', sigma=1) + ss_mod.build_statespace_graph(data) + + idata = pm.sample(nuts_sampler='numpyro') + + References + ---------- + .. [1] Durbin, James, and Siem Jan Koopman. 2012. + Time Series Analysis by State Space Methods: Second Edition. + Oxford University Press. + """ + + def __init__( + self, + name: str | None = None, + cycle_length: int | None = None, + estimate_cycle_length: bool = False, + dampen: bool = False, + innovations: bool = True, + observed_state_names: list[str] | None = None, + ): + if observed_state_names is None: + observed_state_names = ["data"] + + if cycle_length is None and not estimate_cycle_length: + raise ValueError("Must specify cycle_length if estimate_cycle_length is False") + if cycle_length is not None and estimate_cycle_length: + raise ValueError("Cannot specify cycle_length if estimate_cycle_length is True") + if name is None: + cycle = int(cycle_length) if cycle_length is not None else "Estimate" + name = f"Cycle[s={cycle}, dampen={dampen}, innovations={innovations}]" + + self.estimate_cycle_length = estimate_cycle_length + self.cycle_length = cycle_length + self.innovations = innovations + self.dampen = dampen + self.n_coefs = 1 + + k_endog = len(observed_state_names) + + k_states = 2 * k_endog + k_posdef = 2 * k_endog + + obs_state_idx = np.zeros(k_states) + obs_state_idx[slice(0, k_states, 2)] = 1 + + super().__init__( + name=name, + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + measurement_error=False, + combine_hidden_states=True, + obs_state_idxs=obs_state_idx, + observed_state_names=observed_state_names, + ) + + def make_symbolic_graph(self) -> None: + self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 + self.ssm["selection", :, :] = np.eye(self.k_states) + self.param_dims = {self.name: (f"{self.name}_state",)} + self.coords = {f"{self.name}_state": self.state_names} + + init_state = self.make_and_register_variable(f"{self.name}", shape=(self.k_states,)) + + self.ssm["initial_state", :] = init_state + + if self.estimate_cycle_length: + lamb = self.make_and_register_variable(f"{self.name}_length", shape=()) + else: + lamb = self.cycle_length + + if self.dampen: + rho = self.make_and_register_variable(f"{self.name}_dampening_factor", shape=()) + else: + rho = 1 + + T = rho * _frequency_transition_block(lamb, j=1) + self.ssm["transition", :, :] = T + + if self.innovations: + sigma_cycle = self.make_and_register_variable(f"sigma_{self.name}", shape=()) + self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_cycle**2 + + def populate_component_properties(self): + self.state_names = [f"{self.name}_{f}" for f in ["Cos", "Sin"]] + self.param_names = [f"{self.name}"] + + self.param_info = { + f"{self.name}": { + "shape": (2,), + "constraints": None, + "dims": (f"{self.name}_state",), + } + } + + if self.estimate_cycle_length: + self.param_names += [f"{self.name}_length"] + self.param_info[f"{self.name}_length"] = { + "shape": (), + "constraints": "Positive, non-zero", + "dims": None, + } + + if self.dampen: + self.param_names += [f"{self.name}_dampening_factor"] + self.param_info[f"{self.name}_dampening_factor"] = { + "shape": (), + "constraints": "0 < x ≤ 1", + "dims": None, + } + + if self.innovations: + self.param_names += [f"sigma_{self.name}"] + self.param_info[f"sigma_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": None, + } + self.shock_names = self.state_names.copy() diff --git a/pymc_extras/statespace/models/structural/components/level_trend.py b/pymc_extras/statespace/models/structural/components/level_trend.py new file mode 100644 index 000000000..b3372f822 --- /dev/null +++ b/pymc_extras/statespace/models/structural/components/level_trend.py @@ -0,0 +1,196 @@ +import numpy as np + +from pymc_extras.statespace.models.structural.core import Component +from pymc_extras.statespace.models.structural.utils import order_to_mask +from pymc_extras.statespace.utils.constants import POSITION_DERIVATIVE_NAMES + + +class LevelTrendComponent(Component): + r""" + Level and trend component of a structural time series model + + Parameters + ---------- + __________ + order : int + + Number of time derivatives of the trend to include in the model. For example, when order=3, the trend will + be of the form ``y = a + b * t + c * t ** 2``, where the coefficients ``a, b, c`` come from the initial + state values. + + innovations_order : int or sequence of int, optional + + The number of stochastic innovations to include in the model. By default, ``innovations_order = order`` + + Notes + ----- + This class implements the level and trend components of the general structural time series model. In the most + general form, the level and trend is described by a system of two time-varying equations. + + .. math:: + \begin{align} + \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ + \nu_{t+1} &= \nu_t + \xi_t + \zeta_t &\sim N(0, \sigma_\zeta) \\ + \xi_t &\sim N(0, \sigma_\xi) + \end{align} + + Where :math:`\mu_{t+1}` is the mean of the timeseries at time t, and :math:`\nu_t` is the drift or the slope of + the process. When both innovations :math:`\zeta_t` and :math:`\xi_t` are included in the model, it is known as a + *local linear trend* model. This system of two equations, corresponding to ``order=2``, can be expanded or + contracted by adding or removing equations. ``order=3`` would add an acceleration term to the sytsem: + + .. math:: + \begin{align} + \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ + \nu_{t+1} &= \nu_t + \eta_t + \xi_t \\ + \eta_{t+1} &= \eta_{t-1} + \omega_t \\ + \zeta_t &\sim N(0, \sigma_\zeta) \\ + \xi_t &\sim N(0, \sigma_\xi) \\ + \omega_t &\sim N(0, \sigma_\omega) + \end{align} + + After setting all innovation terms to zero and defining initial states :math:`\mu_0, \nu_0, \eta_0`, these equations + can be collapsed to: + + .. math:: + \mu_t = \mu_0 + \nu_0 \cdot t + \eta_0 \cdot t^2 + + Which clarifies how the order and initial states influence the model. In particular, the initial states are the + coefficients on the intercept, slope, acceleration, and so on. + + In this light, allowing for innovations can be understood as allowing these coefficients to vary over time. Each + component can be individually selected for time variation by passing a list to the ``innovations_order`` argument. + For example, a constant intercept with time varying trend and acceleration is specified as ``order=3, + innovations_order=[0, 1, 1]``. + + By choosing the ``order`` and ``innovations_order``, a large variety of models can be obtained. Notable + models include: + + * Constant intercept, ``order=1, innovations_order=0`` + + .. math:: + \mu_t = \mu + + * Constant linear slope, ``order=2, innovations_order=0`` + + .. math:: + \mu_t = \mu_{t-1} + \nu + + * Gaussian Random Walk, ``order=1, innovations_order=1`` + + .. math:: + \mu_t = \mu_{t-1} + \zeta_t + + * Gaussian Random Walk with Drift, ``order=2, innovations_order=1`` + + .. math:: + \mu_t = \mu_{t-1} + \nu + \zeta_t + + * Smooth Trend, ``order=2, innovations_order=[0, 1]`` + + .. math:: + \begin{align} + \mu_t &= \mu_{t-1} + \nu_{t-1} \\ + \nu_t &= \nu_{t-1} + \xi_t + \end{align} + + * Local Level, ``order=2, innovations_order=2`` + + [1] notes that the smooth trend model produces more gradually changing slopes than the full local linear trend + model, and is equivalent to an "integrated trend model". + + References + ---------- + .. [1] Durbin, James, and Siem Jan Koopman. 2012. + Time Series Analysis by State Space Methods: Second Edition. + Oxford University Press. + + """ + + def __init__( + self, + order: int | list[int] = 2, + innovations_order: int | list[int] | None = None, + name: str = "LevelTrend", + observed_state_names: list[str] | None = None, + ): + if innovations_order is None: + innovations_order = order + + if observed_state_names is None: + observed_state_names = ["data"] + + self._order_mask = order_to_mask(order) + max_state = np.flatnonzero(self._order_mask)[-1].item() + 1 + + # If the user passes excess zeros, raise an error. The alternative is to prune them, but this would cause + # the shape of the state to be different to what the user expects. + if len(self._order_mask) > max_state: + raise ValueError( + f"order={order} is invalid. The highest derivative should not be set to zero. If you want a " + f"lower order model, explicitly omit the zeros." + ) + k_states = max_state + + if isinstance(innovations_order, int): + n = innovations_order + innovations_order = order_to_mask(k_states) + if n > 0: + innovations_order[n:] = False + else: + innovations_order[:] = False + else: + innovations_order = order_to_mask(innovations_order) + + self.innovations_order = innovations_order[:max_state] + k_posdef = int(sum(innovations_order)) + + super().__init__( + name, + k_endog=len(observed_state_names), + k_states=k_states, + k_posdef=k_posdef, + observed_state_names=observed_state_names, + measurement_error=False, + combine_hidden_states=False, + obs_state_idxs=np.array([1.0] + [0.0] * (k_states - 1)), + ) + + def populate_component_properties(self): + name_slice = POSITION_DERIVATIVE_NAMES[: self.k_states] + self.param_names = ["initial_trend"] + self.state_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] + self.param_dims = {"initial_trend": ("trend_state",)} + self.coords = {"trend_state": self.state_names} + self.param_info = {"initial_trend": {"shape": (self.k_states,), "constraints": None}} + + if self.k_posdef > 0: + self.param_names += ["sigma_trend"] + self.shock_names = [ + name for name, mask in zip(name_slice, self.innovations_order) if mask + ] + self.param_dims["sigma_trend"] = ("trend_shock",) + self.coords["trend_shock"] = self.shock_names + self.param_info["sigma_trend"] = {"shape": (self.k_posdef,), "constraints": "Positive"} + + for name in self.param_names: + self.param_info[name]["dims"] = self.param_dims[name] + + def make_symbolic_graph(self) -> None: + initial_trend = self.make_and_register_variable("initial_trend", shape=(self.k_states,)) + self.ssm["initial_state", :] = initial_trend + triu_idx = np.triu_indices(self.k_states) + self.ssm[np.s_["transition", triu_idx[0], triu_idx[1]]] = 1 + + R = np.eye(self.k_states) + R = R[:, self.innovations_order] + self.ssm["selection", :, :] = R + + self.ssm["design", 0, :] = np.array([1.0] + [0.0] * (self.k_states - 1)) + + if self.k_posdef > 0: + sigma_trend = self.make_and_register_variable("sigma_trend", shape=(self.k_posdef,)) + diag_idx = np.diag_indices(self.k_posdef) + idx = np.s_["state_cov", diag_idx[0], diag_idx[1]] + self.ssm[idx] = sigma_trend**2 diff --git a/pymc_extras/statespace/models/structural/components/measurement_error.py b/pymc_extras/statespace/models/structural/components/measurement_error.py new file mode 100644 index 000000000..4017f0551 --- /dev/null +++ b/pymc_extras/statespace/models/structural/components/measurement_error.py @@ -0,0 +1,80 @@ +import numpy as np + +from pymc_extras.statespace.models.structural.core import Component + + +class MeasurementError(Component): + r""" + Measurement error term for a structural timeseries model + + Parameters + ---------- + name: str, optional + + Name of the observed data. Default is "obs". + + Notes + ----- + This component should only be used in combination with other components, because it has no states. It's only use + is to add a variance parameter to the model, associated with the observation noise matrix H. + + Examples + -------- + Create and estimate a deterministic linear trend with measurement error + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + + trend = st.LevelTrendComponent(order=2, innovations_order=0) + error = st.MeasurementError() + ss_mod = (trend + error).build() + + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) + sigma_obs = pm.Exponential('sigma_obs', 1, dims=ss_mod.param_dims['sigma_obs']) + + ss_mod.build_statespace_graph(data) + idata = pm.sample(nuts_sampler='numpyro') + """ + + def __init__( + self, name: str = "MeasurementError", observed_state_names: list[str] | None = None + ): + if observed_state_names is None: + observed_state_names = ["data"] + + k_endog = len(observed_state_names) + k_states = 0 + k_posdef = 0 + + super().__init__( + name, + k_endog, + k_states, + k_posdef, + measurement_error=True, + combine_hidden_states=False, + observed_state_names=observed_state_names, + ) + + def populate_component_properties(self): + self.param_names = [f"sigma_{self.name}"] + self.param_dims = {} + self.param_info = { + f"sigma_{self.name}": { + "shape": (), + "constraints": "Positive", + "dims": None, + } + } + + def make_symbolic_graph(self) -> None: + sigma_shape = () + error_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=sigma_shape) + diag_idx = np.diag_indices(self.k_endog) + idx = np.s_["obs_cov", diag_idx[0], diag_idx[1]] + self.ssm[idx] = error_sigma**2 diff --git a/pymc_extras/statespace/models/structural/components/regression.py b/pymc_extras/statespace/models/structural/components/regression.py new file mode 100644 index 000000000..c290812c2 --- /dev/null +++ b/pymc_extras/statespace/models/structural/components/regression.py @@ -0,0 +1,111 @@ +import numpy as np + +from pytensor import tensor as pt + +from pymc_extras.statespace.models.structural.core import Component +from pymc_extras.statespace.utils.constants import TIME_DIM + + +class RegressionComponent(Component): + def __init__( + self, + k_exog: int | None = None, + name: str | None = "Exogenous", + state_names: list[str] | None = None, + observed_state_names: list[str] | None = None, + innovations=False, + ): + if observed_state_names is None: + observed_state_names = ["data"] + + self.innovations = innovations + k_exog = self._handle_input_data(k_exog, state_names, name) + + k_states = k_exog + k_endog = len(observed_state_names) + k_posdef = k_exog + + super().__init__( + name=name, + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + state_names=self.state_names, + observed_state_names=observed_state_names, + measurement_error=False, + combine_hidden_states=False, + exog_names=[f"data_{name}"], + obs_state_idxs=np.ones(k_states), + ) + + @staticmethod + def _get_state_names(k_exog: int | None, state_names: list[str] | None, name: str): + if k_exog is None and state_names is None: + raise ValueError("Must specify at least one of k_exog or state_names") + if state_names is not None and k_exog is not None: + if len(state_names) != k_exog: + raise ValueError(f"Expected {k_exog} state names, found {len(state_names)}") + elif k_exog is None: + k_exog = len(state_names) + else: + state_names = [f"{name}_{i + 1}" for i in range(k_exog)] + + return k_exog, state_names + + def _handle_input_data(self, k_exog: int, state_names: list[str] | None, name) -> int: + k_exog, state_names = self._get_state_names(k_exog, state_names, name) + self.state_names = state_names + + return k_exog + + def make_symbolic_graph(self) -> None: + betas = self.make_and_register_variable(f"beta_{self.name}", shape=(self.k_states,)) + regression_data = self.make_and_register_data( + f"data_{self.name}", shape=(None, self.k_states) + ) + + self.ssm["initial_state", :] = betas + self.ssm["transition", :, :] = np.eye(self.k_states) + self.ssm["selection", :, :] = np.eye(self.k_states) + self.ssm["design"] = pt.expand_dims(regression_data, 1) + + if self.innovations: + sigma_beta = self.make_and_register_variable( + f"sigma_beta_{self.name}", (self.k_states,) + ) + row_idx, col_idx = np.diag_indices(self.k_states) + self.ssm["state_cov", row_idx, col_idx] = sigma_beta**2 + + def populate_component_properties(self) -> None: + self.shock_names = self.state_names + + self.param_names = [f"beta_{self.name}"] + self.data_names = [f"data_{self.name}"] + self.param_dims = { + f"beta_{self.name}": ("exog_state",), + } + + self.param_info = { + f"beta_{self.name}": { + "shape": (self.k_states,), + "constraints": None, + "dims": ("exog_state",), + }, + } + + self.data_info = { + f"data_{self.name}": { + "shape": (None, self.k_states), + "dims": (TIME_DIM, "exog_state"), + }, + } + self.coords = {"exog_state": self.state_names} + + if self.innovations: + self.param_names += [f"sigma_beta_{self.name}"] + self.param_dims[f"sigma_beta_{self.name}"] = "exog_state" + self.param_info[f"sigma_beta_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": ("exog_state",), + } diff --git a/pymc_extras/statespace/models/structural/components/seasonality.py b/pymc_extras/statespace/models/structural/components/seasonality.py new file mode 100644 index 000000000..20f47636f --- /dev/null +++ b/pymc_extras/statespace/models/structural/components/seasonality.py @@ -0,0 +1,353 @@ +import numpy as np + +from pytensor import tensor as pt + +from pymc_extras.statespace.models.structural.core import Component +from pymc_extras.statespace.models.structural.utils import _frequency_transition_block + + +class TimeSeasonality(Component): + r""" + Seasonal component, modeled in the time domain + + Parameters + ---------- + season_length: int + The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for + daily data with weekly seasonal pattern, etc. + + innovations: bool, default True + Whether to include stochastic innovations in the strength of the seasonal effect + + name: str, default None + A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal + components are included in the same model. Default is ``f"Seasonal[s={season_length}]"`` + + state_names: list of str, default None + List of strings for seasonal effect labels. If provided, it must be of length ``season_length``. An example + would be ``state_names = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']`` when data is daily with a weekly + seasonal pattern (``season_length = 7``). + + If None, states will be numbered ``[State_0, ..., State_s]`` + + remove_first_state: bool, default True + If True, the first state will be removed from the model. This is done because there are only n-1 degrees of + freedom in the seasonal component, and one state is not identified. If False, the first state will be + included in the model, but it will not be identified -- you will need to handle this in the priors (e.g. with + ZeroSumNormal). + + Notes + ----- + A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to + model seasonal effects, the implementation used here is the one described by [1] as the "canonical" time domain + representation. The seasonal component can be expressed: + + .. math:: + \gamma_t = -\sum_{i=1}^{s-1} \gamma_{t-i} + \omega_t, \quad \omega_t \sim N(0, \sigma_\gamma) + + Where :math:`s` is the ``seasonal_length`` parameter and :math:`\omega_t` is the (optional) stochastic innovation. + To give interpretation to the :math:`\gamma` terms, it is helpful to work through the algebra for a simple + example. Let :math:`s=4`, and omit the shock term. Define initial conditions :math:`\gamma_0, \gamma_{-1}, + \gamma_{-2}`. The value of the seasonal component for the first 5 timesteps will be: + + .. math:: + \begin{align} + \gamma_1 &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ + \gamma_2 &= -\gamma_1 - \gamma_0 - \gamma_{-1} \\ + &= -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 - \gamma_{-1} \\ + &= (\gamma_0 - \gamma_0 )+ (\gamma_{-1} - \gamma_{-1}) + \gamma_{-2} \\ + &= \gamma_{-2} \\ + \gamma_3 &= -\gamma_2 - \gamma_1 - \gamma_0 \\ + &= -\gamma_{-2} - (-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 \\ + &= (\gamma_{-2} - \gamma_{-2}) + \gamma_{-1} + (\gamma_0 - \gamma_0) \\ + &= \gamma_{-1} \\ + \gamma_4 &= -\gamma_3 - \gamma_2 - \gamma_1 \\ + &= -\gamma_{-1} - \gamma_{-2} -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) \\ + &= (\gamma_{-2} - \gamma_{-2}) + (\gamma_{-1} - \gamma_{-1}) + \gamma_0 \\ + &= \gamma_0 \\ + \gamma_5 &= -\gamma_4 - \gamma_3 - \gamma_2 \\ + &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ + &= \gamma_1 + \end{align} + + This exercise shows that, given a list ``initial_conditions`` of length ``s-1``, the effects of this model will be: + + - Period 1: ``-sum(initial_conditions)`` + - Period 2: ``initial_conditions[-1]`` + - Period 3: ``initial_conditions[-2]`` + - ... + - Period s: ``initial_conditions[0]`` + - Period s+1: ``-sum(initial_condition)`` + + And so on. So for interpretation, the ``season_length - 1`` initial states are, when reversed, the coefficients + associated with ``state_names[1:]``. + + .. warning:: + Although the ``state_names`` argument expects a list of length ``season_length``, only ``state_names[1:]`` + will be saved as model dimensions, since the 1st coefficient is not identified (it is defined as + :math:`-\sum_{i=1}^{s} \gamma_{t-i}`). + + Examples + -------- + Estimate monthly with a model with a gaussian random walk trend and monthly seasonality: + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + import pandas as pd + + # Get month names + state_names = pd.date_range('1900-01-01', '1900-12-31', freq='MS').month_name().tolist() + + # Build the structural model + grw = st.LevelTrendComponent(order=1, innovations_order=1) + annual_season = st.TimeSeasonality(season_length=12, name='annual', state_names=state_names, innovations=False) + ss_mod = (grw + annual_season).build() + + # Estimate with PyMC + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Deterministic('initial_trend', pt.zeros(1), dims=ss_mod.param_dims['initial_trend']) + annual_coefs = pm.Normal('annual_coefs', sigma=1e-2, dims=ss_mod.param_dims['annual_coefs']) + trend_sigmas = pm.HalfNormal('trend_sigmas', sigma=1e-6, dims=ss_mod.param_dims['trend_sigmas']) + ss_mod.build_statespace_graph(data) + idata = pm.sample(nuts_sampler='numpyro') + + References + ---------- + .. [1] Durbin, James, and Siem Jan Koopman. 2012. + Time Series Analysis by State Space Methods: Second Edition. + Oxford University Press. + """ + + def __init__( + self, + season_length: int, + innovations: bool = True, + name: str | None = None, + state_names: list | None = None, + remove_first_state: bool = True, + observed_state_names: list[str] | None = None, + ): + if observed_state_names is None: + observed_state_names = ["data"] + + if name is None: + name = f"Seasonal[s={season_length}]" + if state_names is None: + state_names = [f"{name}_{i}" for i in range(season_length)] + else: + if len(state_names) != season_length: + raise ValueError( + f"state_names must be a list of length season_length, got {len(state_names)}" + ) + state_names = state_names.copy() + + self.innovations = innovations + self.remove_first_state = remove_first_state + + if self.remove_first_state: + # In traditional models, the first state isn't identified, so we can help out the user by automatically + # discarding it. + # TODO: Can this be stashed and reconstructed automatically somehow? + state_names.pop(0) + + k_states = season_length - int(self.remove_first_state) + + super().__init__( + name=name, + k_endog=len(observed_state_names), + k_states=k_states, + k_posdef=int(innovations), + state_names=state_names, + observed_state_names=observed_state_names, + measurement_error=False, + combine_hidden_states=True, + obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], + ) + + def populate_component_properties(self): + self.param_names = [f"{self.name}_coefs"] + self.param_info = { + f"{self.name}_coefs": { + "shape": (self.k_states,), + "constraints": None, + "dims": (f"{self.name}_state",), + } + } + self.param_dims = {f"{self.name}_coefs": (f"{self.name}_state",)} + self.coords = {f"{self.name}_state": self.state_names} + + if self.innovations: + self.param_names += [f"sigma_{self.name}"] + self.param_info[f"sigma_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": None, + } + self.shock_names = [f"{self.name}"] + + def make_symbolic_graph(self) -> None: + if self.remove_first_state: + # In this case, parameters are normalized to sum to zero, so the current state is the negative sum of + # all previous states. + T = np.eye(self.k_states, k=-1) + T[0, :] = -1 + else: + # In this case we assume the user to be responsible for ensuring the states sum to zero, so T is just a + # circulant matrix that cycles between the states. + T = np.eye(self.k_states, k=1) + T[-1, 0] = 1 + + self.ssm["transition", :, :] = T + self.ssm["design", 0, 0] = 1 + + initial_states = self.make_and_register_variable( + f"{self.name}_coefs", shape=(self.k_states,) + ) + self.ssm["initial_state", np.arange(self.k_states, dtype=int)] = initial_states + + if self.innovations: + self.ssm["selection", 0, 0] = 1 + season_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=()) + cov_idx = ("state_cov", *np.diag_indices(1)) + self.ssm[cov_idx] = season_sigma**2 + + +class FrequencySeasonality(Component): + r""" + Seasonal component, modeled in the frequency domain + + Parameters + ---------- + season_length: float + The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for + daily data with weekly seasonal pattern, etc. Non-integer seasonal_length is also permitted, for example + 365.2422 days in a (solar) year. + + n: int + Number of fourier features to include in the seasonal component. Default is ``season_length // 2``, which + is the maximum possible. A smaller number can be used for a more wave-like seasonal pattern. + + name: str, default None + A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal + components are included in the same model. Default is ``f"Seasonal[s={season_length}, n={n}]"`` + + innovations: bool, default True + Whether to include stochastic innovations in the strength of the seasonal effect + + Notes + ----- + A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to + model seasonal effects, the implementation used here is the one described by [1] as the "canonical" frequency domain + representation. The seasonal component can be expressed: + + .. math:: + \begin{align} + \gamma_t &= \sum_{j=1}^{2n} \gamma_{j,t} \\ + \gamma_{j, t+1} &= \gamma_{j,t} \cos \lambda_j + \gamma_{j,t}^\star \sin \lambda_j + \omega_{j, t} \\ + \gamma_{j, t}^\star &= -\gamma_{j,t} \sin \lambda_j + \gamma_{j,t}^\star \cos \lambda_j + \omega_{j,t}^\star + \lambda_j &= \frac{2\pi j}{s} + \end{align} + + Where :math:`s` is the ``seasonal_length``. + + Unlike a ``TimeSeasonality`` component, a ``FrequencySeasonality`` component does not require integer season + length. In addition, for long seasonal periods, it is possible to obtain a more compact state space representation + by choosing ``n << s // 2``. Using ``TimeSeasonality``, an annual seasonal pattern in daily data requires 364 + states, whereas ``FrequencySeasonality`` always requires ``2 * n`` states, regardless of the ``seasonal_length``. + The price of this compactness is less representational power. At ``n = 1``, the seasonal pattern will be a pure + sine wave. At ``n = s // 2``, any arbitrary pattern can be represented. + + One cost of the added flexibility of ``FrequencySeasonality`` is reduced interpretability. States of this model are + coefficients :math:`\gamma_1, \gamma^\star_1, \gamma_2, \gamma_2^\star ..., \gamma_n, \gamma^\star_n` associated + with different frequencies in the fourier representation of the seasonal pattern. As a result, it is not possible + to isolate and identify a "Monday" effect, for instance. + """ + + def __init__( + self, + season_length, + n=None, + name=None, + innovations=True, + observed_state_names: list[str] | None = None, + ): + if observed_state_names is None: + observed_state_names = ["data"] + + if n is None: + n = int(season_length // 2) + if name is None: + name = f"Frequency[s={season_length}, n={n}]" + + k_states = n * 2 + self.n = n + self.season_length = season_length + self.innovations = innovations + + # If the model is completely saturated (n = s // 2), the last state will not be identified, so it shouldn't + # get a parameter assigned to it and should just be fixed to zero. + # Test this way (rather than n == s // 2) to catch cases when n is non-integer. + self.last_state_not_identified = self.season_length / self.n == 2.0 + self.n_coefs = k_states - int(self.last_state_not_identified) + + obs_state_idx = np.zeros(k_states) + obs_state_idx[slice(0, k_states, 2)] = 1 + + super().__init__( + name=name, + k_endog=1, + k_states=k_states, + k_posdef=k_states * int(self.innovations), + observed_state_names=observed_state_names, + measurement_error=False, + combine_hidden_states=True, + obs_state_idxs=obs_state_idx, + ) + + def make_symbolic_graph(self) -> None: + self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 + + init_state = self.make_and_register_variable(f"{self.name}", shape=(self.n_coefs,)) + + init_state_idx = np.arange(self.n_coefs, dtype=int) + self.ssm["initial_state", init_state_idx] = init_state + + T_mats = [_frequency_transition_block(self.season_length, j + 1) for j in range(self.n)] + T = pt.linalg.block_diag(*T_mats) + self.ssm["transition", :, :] = T + + if self.innovations: + sigma_season = self.make_and_register_variable(f"sigma_{self.name}", shape=()) + self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_season**2 + self.ssm["selection", :, :] = np.eye(self.k_states) + + def populate_component_properties(self): + self.state_names = [f"{self.name}_{f}_{i}" for i in range(self.n) for f in ["Cos", "Sin"]] + self.param_names = [f"{self.name}"] + + self.param_dims = {self.name: (f"{self.name}_state",)} + self.param_info = { + f"{self.name}": { + "shape": (self.k_states - int(self.last_state_not_identified),), + "constraints": None, + "dims": (f"{self.name}_state",), + } + } + + init_state_idx = np.arange(self.k_states, dtype=int) + if self.last_state_not_identified: + init_state_idx = init_state_idx[:-1] + self.coords = {f"{self.name}_state": [self.state_names[i] for i in init_state_idx]} + + if self.innovations: + self.shock_names = self.state_names.copy() + self.param_names += [f"sigma_{self.name}"] + self.param_info[f"sigma_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": None, + } diff --git a/pymc_extras/statespace/models/structural/core.py b/pymc_extras/statespace/models/structural/core.py new file mode 100644 index 000000000..1a273a6e2 --- /dev/null +++ b/pymc_extras/statespace/models/structural/core.py @@ -0,0 +1,697 @@ +import functools as ft +import logging + +from collections.abc import Sequence +from itertools import pairwise +from typing import Any + +import numpy as np +import xarray as xr + +from pytensor import Mode, Variable, config +from pytensor import tensor as pt + +from pymc_extras.statespace.core import PyMCStateSpace, PytensorRepresentation +from pymc_extras.statespace.models.utilities import ( + conform_time_varying_and_time_invariant_matrices, + make_default_coords, +) +from pymc_extras.statespace.utils.constants import ( + ALL_STATE_AUX_DIM, + ALL_STATE_DIM, + LONG_MATRIX_NAMES, +) + +_log = logging.getLogger(__name__) +floatX = config.floatX + + +class StructuralTimeSeries(PyMCStateSpace): + r""" + Structural Time Series Model + + The structural time series model, named by [1] and presented in statespace form in [2], is a framework for + decomposing a univariate time series into level, trend, seasonal, and cycle components. It also admits the + possibility of exogenous regressors. Unlike the SARIMAX framework, the time series is not assumed to be stationary. + + Notes + ----- + + .. math:: + + y_t = \mu_t + \gamma_t + c_t + \varepsilon_t + + """ + + def __init__( + self, + ssm: PytensorRepresentation, + name: str, + state_names: list[str], + observed_state_names: list[str], + data_names: list[str], + shock_names: list[str], + param_names: list[str], + exog_names: list[str], + param_dims: dict[str, tuple[int]], + coords: dict[str, Sequence], + param_info: dict[str, dict[str, Any]], + data_info: dict[str, dict[str, Any]], + component_info: dict[str, dict[str, Any]], + measurement_error: bool, + name_to_variable: dict[str, Variable], + name_to_data: dict[str, Variable] | None = None, + verbose: bool = True, + filter_type: str = "standard", + mode: str | Mode | None = None, + ): + name = "StructuralTimeSeries" if name is None else name + + self._name = name + self._observed_state_names = observed_state_names + + k_states, k_posdef, k_endog = ssm.k_states, ssm.k_posdef, ssm.k_endog + param_names, param_dims, param_info = self._add_inital_state_cov_to_properties( + param_names, param_dims, param_info, k_states + ) + self._state_names = state_names.copy() + self._data_names = data_names.copy() + self._shock_names = shock_names.copy() + self._param_names = param_names.copy() + self._param_dims = param_dims.copy() + + default_coords = make_default_coords(self) + coords.update(default_coords) + + self._coords = coords + self._param_info = param_info.copy() + self._data_info = data_info.copy() + self.measurement_error = measurement_error + + super().__init__( + k_endog, + k_states, + max(1, k_posdef), + filter_type=filter_type, + verbose=verbose, + measurement_error=measurement_error, + mode=mode, + ) + self.ssm = ssm.copy() + + if k_posdef == 0: + # If there is no randomness in the model, add dummy matrices to the representation to avoid errors + # when we go to construct random variables from the matrices + self.ssm.k_posdef = self.k_posdef + self.ssm.shapes["state_cov"] = (1, 1, 1) + self.ssm["state_cov"] = pt.zeros((1, 1, 1)) + + self.ssm.shapes["selection"] = (1, self.k_states, 1) + self.ssm["selection"] = pt.zeros((1, self.k_states, 1)) + + self._component_info = component_info.copy() + + self._name_to_variable = name_to_variable.copy() + self._name_to_data = name_to_data.copy() + + self._exog_names = exog_names.copy() + self._needs_exog_data = len(exog_names) > 0 + + P0 = self.make_and_register_variable("P0", shape=(self.k_states, self.k_states)) + self.ssm["initial_state_cov"] = P0 + + @staticmethod + def _add_inital_state_cov_to_properties(param_names, param_dims, param_info, k_states): + param_names += ["P0"] + param_dims["P0"] = (ALL_STATE_DIM, ALL_STATE_AUX_DIM) + param_info["P0"] = { + "shape": (k_states, k_states), + "constraints": "Positive semi-definite", + "dims": param_dims["P0"], + } + + return param_names, param_dims, param_info + + @property + def param_names(self): + return self._param_names + + @property + def data_names(self) -> list[str]: + return self._data_names + + @property + def state_names(self): + return self._state_names + + @property + def observed_states(self): + return self._observed_state_names + + @property + def shock_names(self): + return self._shock_names + + @property + def param_dims(self): + return self._param_dims + + @property + def coords(self) -> dict[str, Sequence]: + return self._coords + + @property + def param_info(self) -> dict[str, dict[str, Any]]: + return self._param_info + + @property + def data_info(self) -> dict[str, dict[str, Any]]: + return self._data_info + + def make_symbolic_graph(self) -> None: + """ + Assign placeholder pytensor variables among statespace matrices in positions where PyMC variables will go. + + Notes + ----- + This assignment is handled by the components, so this function is implemented only to avoid the + NotImplementedError raised by the base class. + """ + + pass + + def _state_slices_from_info(self): + info = self._component_info.copy() + comp_states = np.cumsum([0] + [info["k_states"] for info in info.values()]) + state_slices = [slice(i, j) for i, j in pairwise(comp_states)] + + return state_slices + + def _hidden_states_from_data(self, data): + state_slices = self._state_slices_from_info() + info = self._component_info + names = info.keys() + result = [] + + for i, (name, s) in enumerate(zip(names, state_slices)): + obs_idx = info[name]["obs_state_idx"] + if obs_idx is None: + continue + + X = data[..., s] + if info[name]["combine_hidden_states"]: + sum_idx = np.flatnonzero(obs_idx) + result.append(X[..., sum_idx].sum(axis=-1)[..., None]) + else: + comp_names = self.state_names[s] + for j, state_name in enumerate(comp_names): + result.append(X[..., j, None]) + + return np.concatenate(result, axis=-1) + + def _get_subcomponent_names(self): + state_slices = self._state_slices_from_info() + info = self._component_info + names = info.keys() + result = [] + + for i, (name, s) in enumerate(zip(names, state_slices)): + if info[name]["combine_hidden_states"]: + result.append(name) + else: + comp_names = self.state_names[s] + result.extend([f"{name}[{comp_name}]" for comp_name in comp_names]) + return result + + def extract_components_from_idata(self, idata: xr.Dataset) -> xr.Dataset: + r""" + Extract interpretable hidden states from an InferenceData returned by a PyMCStateSpace sampling method + + Parameters + ---------- + idata: Dataset + A Dataset object, returned by a PyMCStateSpace sampling method + + Returns + ------- + idata: Dataset + An Dataset object with hidden states transformed to represent only the "interpretable" subcomponents + of the structural model. + + Notes + ----- + In general, a structural statespace model can be represented as: + + .. math:: + y_t = \mu_t + \nu_t + \cdots + \gamma_t + c_t + \xi_t + \epsilon_t \tag{1} + + Where: + + - :math:`\mu_t` is the level of the data at time t + - :math:`\nu_t` is the slope of the data at time t + - :math:`\cdots` are higher time derivatives of the position (acceleration, jerk, etc) at time t + - :math:`\gamma_t` is the seasonal component at time t + - :math:`c_t` is the cycle component at time t + - :math:`\xi_t` is the autoregressive error at time t + - :math:`\varepsilon_t` is the measurement error at time t + + In state space form, some or all of these components are represented as linear combinations of other + subcomponents, making interpretation of the outputs of the outputs difficult. The purpose of this function is + to take the expended statespace representation and return a "reduced form" of only the components shown in + equation (1). + """ + + def _extract_and_transform_variable(idata, new_state_names): + *_, time_dim, state_dim = idata.dims + state_func = ft.partial(self._hidden_states_from_data) + new_idata = xr.apply_ufunc( + state_func, + idata, + input_core_dims=[[time_dim, state_dim]], + output_core_dims=[[time_dim, state_dim]], + exclude_dims={state_dim}, + ) + new_idata.coords.update({state_dim: new_state_names}) + return new_idata + + var_names = list(idata.data_vars.keys()) + is_latent = [idata[name].shape[-1] == self.k_states for name in var_names] + new_state_names = self._get_subcomponent_names() + + latent_names = [name for latent, name in zip(is_latent, var_names) if latent] + dropped_vars = set(var_names) - set(latent_names) + if len(dropped_vars) > 0: + _log.warning( + f'Variables {", ".join(dropped_vars)} do not contain all hidden states (their last dimension ' + f"is not {self.k_states}). They will not be present in the modified idata." + ) + if len(dropped_vars) == len(var_names): + raise ValueError( + "Provided idata had no variables with all hidden states; cannot extract components." + ) + + idata_new = xr.Dataset( + { + name: _extract_and_transform_variable(idata[name], new_state_names) + for name in latent_names + } + ) + return idata_new + + +class Component: + r""" + Base class for a component of a structural timeseries model. + + This base class contains a subset of the class attributes of the PyMCStateSpace class, and none of the class + methods. The purpose of a component is to allow the partial definition of a structural model. Components are + assembled into a full model by the StructuralTimeSeries class. + + Parameters + ---------- + name: str + The name of the component + k_endog: int + Number of endogenous variables being modeled. + k_states: int + Number of hidden states in the component model + k_posdef: int + Rank of the state covariance matrix, or the number of sources of innovations in the component model + observed_state_names: str or list or str, optional + Names of the observed states associated with this component. Must have the same length as k_endog. If not + provided, generic names are generated: ``observed_state_1, observed_state_2, ..., observed_state_k_endog``. + measurement_error: bool + Whether the observation associated with the component has measurement error. Default is False. + combine_hidden_states: bool + Flag for the ``extract_hidden_states_from_data`` method. When ``True``, hidden states from the component model + are extracted as ``hidden_states[:, np.flatnonzero(Z)]``. Should be True in models where hidden states + individually have no interpretation, such as seasonal or autoregressive components. + """ + + def __init__( + self, + name, + k_endog, + k_states, + k_posdef, + state_names=None, + observed_state_names=None, + data_names=None, + shock_names=None, + param_names=None, + exog_names=None, + representation: PytensorRepresentation | None = None, + measurement_error=False, + combine_hidden_states=True, + component_from_sum=False, + obs_state_idxs=None, + ): + self.name = name + self.k_endog = k_endog + self.k_states = k_states + self.k_posdef = k_posdef + self.measurement_error = measurement_error + + self.state_names = state_names if state_names is not None else [] + self.observed_state_names = observed_state_names if observed_state_names is not None else [] + self.data_names = data_names if data_names is not None else [] + self.shock_names = shock_names if shock_names is not None else [] + self.param_names = param_names if param_names is not None else [] + self.exog_names = exog_names if exog_names is not None else [] + + self.needs_exog_data = len(self.exog_names) > 0 + self.coords = {} + self.param_dims = {} + + self.param_info = {} + self.data_info = {} + + self.param_counts = {} + + if representation is None: + self.ssm = PytensorRepresentation(k_endog=k_endog, k_states=k_states, k_posdef=k_posdef) + else: + self.ssm = representation + + self._name_to_variable = {} + self._name_to_data = {} + + if not component_from_sum: + self.populate_component_properties() + self.make_symbolic_graph() + + self._component_info = { + self.name: { + "k_states": self.k_states, + "k_enodg": self.k_endog, + "k_posdef": self.k_posdef, + "observed_state_names": self.observed_state_names, + "combine_hidden_states": combine_hidden_states, + "obs_state_idx": obs_state_idxs, + } + } + + def make_and_register_variable(self, name, shape, dtype=floatX) -> Variable: + r""" + Helper function to create a pytensor symbolic variable and register it in the _name_to_variable dictionary + + Parameters + ---------- + name : str + The name of the placeholder variable. Must be the name of a model parameter. + shape : int or tuple of int + Shape of the parameter + dtype : str, default pytensor.config.floatX + dtype of the parameter + + Notes + ----- + Symbolic pytensor variables are used in the ``make_symbolic_graph`` method as placeholders for PyMC random + variables. The change is made in the ``_insert_random_variables`` method via ``pytensor.graph_replace``. To + make the change, a dictionary mapping pytensor variables to PyMC random variables needs to be constructed. + + The purpose of this method is to: + 1. Create the placeholder symbolic variables + 2. Register the placeholder variable in the ``_name_to_variable`` dictionary + + The shape provided here will define the shape of the prior that will need to be provided by the user. + + An error is raised if the provided name has already been registered, or if the name is not present in the + ``param_names`` property. + """ + if name not in self.param_names: + raise ValueError( + f"{name} is not a model parameter. All placeholder variables should correspond to model " + f"parameters." + ) + + if name in self._name_to_variable.keys(): + raise ValueError( + f"{name} is already a registered placeholder variable with shape " + f"{self._name_to_variable[name].type.shape}" + ) + + placeholder = pt.tensor(name, shape=shape, dtype=dtype) + self._name_to_variable[name] = placeholder + return placeholder + + def make_and_register_data(self, name, shape, dtype=floatX) -> Variable: + r""" + Helper function to create a pytensor symbolic variable and register it in the _name_to_data dictionary + + Parameters + ---------- + name : str + The name of the placeholder data. Must be the name of an expected data variable. + shape : int or tuple of int + Shape of the parameter + dtype : str, default pytensor.config.floatX + dtype of the parameter + + Notes + ----- + See docstring for make_and_register_variable for more details. This function is similar, but handles data + inputs instead of model parameters. + + An error is raised if the provided name has already been registered, or if the name is not present in the + ``data_names`` property. + """ + if name not in self.data_names: + raise ValueError( + f"{name} is not a model parameter. All placeholder variables should correspond to model " + f"parameters." + ) + + if name in self._name_to_data.keys(): + raise ValueError( + f"{name} is already a registered placeholder variable with shape " + f"{self._name_to_data[name].type.shape}" + ) + + placeholder = pt.tensor(name, shape=shape, dtype=dtype) + self._name_to_data[name] = placeholder + return placeholder + + def make_symbolic_graph(self) -> None: + raise NotImplementedError + + def populate_component_properties(self): + raise NotImplementedError + + def _get_combined_shapes(self, other): + k_states = self.k_states + other.k_states + k_posdef = self.k_posdef + other.k_posdef + if self.k_endog != other.k_endog: + raise NotImplementedError( + "Merging elements with different numbers of observed states is not supported." + ) + k_endog = self.k_endog + + return k_states, k_posdef, k_endog + + def _combine_statespace_representations(self, other): + def make_slice(name, x, o_x): + ndim = max(x.ndim, o_x.ndim) + return (name,) + (slice(None, None, None),) * ndim + + k_states, k_posdef, k_endog = self._get_combined_shapes(other) + + self_matrices = [self.ssm[name] for name in LONG_MATRIX_NAMES] + other_matrices = [other.ssm[name] for name in LONG_MATRIX_NAMES] + + x0, P0, c, d, T, Z, R, H, Q = ( + self.ssm[make_slice(name, x, o_x)] + for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) + ) + o_x0, o_P0, o_c, o_d, o_T, o_Z, o_R, o_H, o_Q = ( + other.ssm[make_slice(name, x, o_x)] + for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) + ) + + initial_state = pt.concatenate(conform_time_varying_and_time_invariant_matrices(x0, o_x0)) + initial_state.name = x0.name + + initial_state_cov = pt.linalg.block_diag(P0, o_P0) + initial_state_cov.name = P0.name + + state_intercept = pt.concatenate(conform_time_varying_and_time_invariant_matrices(c, o_c)) + state_intercept.name = c.name + + obs_intercept = d + o_d + obs_intercept.name = d.name + + transition = pt.linalg.block_diag(T, o_T) + transition.name = T.name + + design = pt.concatenate(conform_time_varying_and_time_invariant_matrices(Z, o_Z), axis=-1) + design.name = Z.name + + selection = pt.linalg.block_diag(R, o_R) + selection.name = R.name + + obs_cov = H + o_H + obs_cov.name = H.name + + state_cov = pt.linalg.block_diag(Q, o_Q) + state_cov.name = Q.name + + new_ssm = PytensorRepresentation( + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + initial_state=initial_state, + initial_state_cov=initial_state_cov, + state_intercept=state_intercept, + obs_intercept=obs_intercept, + transition=transition, + design=design, + selection=selection, + obs_cov=obs_cov, + state_cov=state_cov, + ) + + return new_ssm + + def _combine_property(self, other, name, allow_duplicates=True): + self_prop = getattr(self, name) + if isinstance(self_prop, list) and allow_duplicates: + return self_prop + getattr(other, name) + elif isinstance(self_prop, list) and not allow_duplicates: + return self_prop + [x for x in getattr(other, name) if x not in self_prop] + elif isinstance(self_prop, dict): + new_prop = self_prop.copy() + new_prop.update(getattr(other, name)) + return new_prop + + def _combine_component_info(self, other): + combined_info = {} + for key, value in self._component_info.items(): + if not key.startswith("StateSpace"): + if key in combined_info.keys(): + raise ValueError(f"Found duplicate component named {key}") + combined_info[key] = value + + for key, value in other._component_info.items(): + if not key.startswith("StateSpace"): + if key in combined_info.keys(): + raise ValueError(f"Found duplicate component named {key}") + combined_info[key] = value + + return combined_info + + def _make_combined_name(self): + components = self._component_info.keys() + name = f'StateSpace[{", ".join(components)}]' + return name + + def __add__(self, other): + state_names = self._combine_property(other, "state_names") + data_names = self._combine_property(other, "data_names") + observed_state_names = self._combine_property( + other, "observed_state_names", allow_duplicates=False + ) + + param_names = self._combine_property(other, "param_names") + shock_names = self._combine_property(other, "shock_names") + param_info = self._combine_property(other, "param_info") + data_info = self._combine_property(other, "data_info") + param_dims = self._combine_property(other, "param_dims") + coords = self._combine_property(other, "coords") + exog_names = self._combine_property(other, "exog_names") + + _name_to_variable = self._combine_property(other, "_name_to_variable") + _name_to_data = self._combine_property(other, "_name_to_data") + + measurement_error = any([self.measurement_error, other.measurement_error]) + + k_states, k_posdef, k_endog = self._get_combined_shapes(other) + + ssm = self._combine_statespace_representations(other) + + new_comp = Component( + name="", + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + observed_state_names=observed_state_names, + measurement_error=measurement_error, + representation=ssm, + component_from_sum=True, + ) + new_comp._component_info = self._combine_component_info(other) + new_comp.name = new_comp._make_combined_name() + + names_and_props = [ + ("state_names", state_names), + ("observed_state_names", observed_state_names), + ("data_names", data_names), + ("param_names", param_names), + ("shock_names", shock_names), + ("param_dims", param_dims), + ("coords", coords), + ("param_dims", param_dims), + ("param_info", param_info), + ("data_info", data_info), + ("exog_names", exog_names), + ("_name_to_variable", _name_to_variable), + ("_name_to_data", _name_to_data), + ] + + for prop, value in names_and_props: + setattr(new_comp, prop, value) + + return new_comp + + def build( + self, name=None, filter_type="standard", verbose=True, mode: str | Mode | None = None + ): + """ + Build a StructuralTimeSeries statespace model from the current component(s) + + Parameters + ---------- + name: str, optional + Name of the exogenous data being modeled. Default is "data" + + filter_type : str, optional + The type of Kalman filter to use. Valid options are "standard", "univariate", "single", "cholesky", and + "steady_state". For more information, see the docs for each filter. Default is "standard". + + verbose : bool, optional + If True, displays information about the initialized model. Defaults to True. + + mode: str or Mode, optional + Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and + ``forecast``. The mode does **not** effect calls to ``pm.sample``. + + Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument + to all sampling methods. + + Returns + ------- + PyMCStateSpace + An initialized instance of a PyMCStateSpace, constructed using the system matrices contained in the + components. + """ + + return StructuralTimeSeries( + self.ssm, + name=name, + state_names=self.state_names, + observed_state_names=self.observed_state_names, + data_names=self.data_names, + shock_names=self.shock_names, + param_names=self.param_names, + param_dims=self.param_dims, + coords=self.coords, + param_info=self.param_info, + data_info=self.data_info, + component_info=self._component_info, + measurement_error=self.measurement_error, + exog_names=self.exog_names, + name_to_variable=self._name_to_variable, + name_to_data=self._name_to_data, + filter_type=filter_type, + verbose=verbose, + mode=mode, + ) diff --git a/pymc_extras/statespace/models/structural/utils.py b/pymc_extras/statespace/models/structural/utils.py new file mode 100644 index 000000000..d75252225 --- /dev/null +++ b/pymc_extras/statespace/models/structural/utils.py @@ -0,0 +1,16 @@ +import numpy as np + +from pytensor import tensor as pt + + +def order_to_mask(order): + if isinstance(order, int): + return np.ones(order).astype(bool) + else: + return np.array(order).astype(bool) + + +def _frequency_transition_block(s, j): + lam = 2 * np.pi * j / s + + return pt.stack([[pt.cos(lam), pt.sin(lam)], [-pt.sin(lam), pt.cos(lam)]]) diff --git a/tests/statespace/models/structural/__init__.py b/tests/statespace/models/structural/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/statespace/models/structural/components/__init__.py b/tests/statespace/models/structural/components/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/statespace/models/structural/components/test_autoregressive.py b/tests/statespace/models/structural/components/test_autoregressive.py new file mode 100644 index 000000000..f68a34de6 --- /dev/null +++ b/tests/statespace/models/structural/components/test_autoregressive.py @@ -0,0 +1,28 @@ +import numpy as np +import pytest + +from numpy.testing import assert_allclose +from pytensor import config + +from pymc_extras.statespace.models import structural as st +from tests.statespace.models.structural.conftest import _assert_basic_coords_correct +from tests.statespace.test_utilities import simulate_from_numpy_model + + +@pytest.mark.parametrize("order", [1, 2, [1, 0, 1]], ids=["AR1", "AR2", "AR(1,0,1)"]) +def test_autoregressive_model(order, rng): + ar = st.AutoregressiveComponent(order=order) + params = { + "ar_params": np.full((sum(ar.order),), 0.5, dtype=config.floatX), + "sigma_ar": 0.0, + } + + x, y = simulate_from_numpy_model(ar, rng, params, steps=100) + + # Check coords + ar.build(verbose=False) + _assert_basic_coords_correct(ar) + lags = np.arange(len(order) if isinstance(order, list) else order, dtype="int") + 1 + if isinstance(order, list): + lags = lags[np.flatnonzero(order)] + assert_allclose(ar.coords["ar_lag"], lags) diff --git a/tests/statespace/models/structural/components/test_cycle.py b/tests/statespace/models/structural/components/test_cycle.py new file mode 100644 index 000000000..b24eae290 --- /dev/null +++ b/tests/statespace/models/structural/components/test_cycle.py @@ -0,0 +1,52 @@ +import numpy as np + +from numpy.testing import assert_allclose +from pytensor import config + +from pymc_extras.statespace.models import structural as st +from tests.statespace.models.structural.conftest import _assert_basic_coords_correct +from tests.statespace.test_utilities import assert_pattern_repeats, simulate_from_numpy_model + +ATOL = 1e-8 if config.floatX.endswith("64") else 1e-4 +RTOL = 0 if config.floatX.endswith("64") else 1e-6 + + +cycle_test_vals = zip([None, None, 3, 5, 10], [False, True, True, False, False]) + + +def test_cycle_component_deterministic(rng): + cycle = st.CycleComponent( + name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False + ) + params = {"cycle": np.array([1.0, 1.0], dtype=config.floatX)} + x, y = simulate_from_numpy_model(cycle, rng, params, steps=12 * 12) + + assert_pattern_repeats(y, 12, atol=ATOL, rtol=RTOL) + + +def test_cycle_component_with_dampening(rng): + cycle = st.CycleComponent( + name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False, dampen=True + ) + params = {"cycle": np.array([10.0, 10.0], dtype=config.floatX), "cycle_dampening_factor": 0.75} + x, y = simulate_from_numpy_model(cycle, rng, params, steps=100) + + # Check that the cycle dampens to zero over time + assert_allclose(y[-1], 0.0, atol=ATOL, rtol=RTOL) + + +def test_cycle_component_with_innovations_and_cycle_length(rng): + cycle = st.CycleComponent( + name="cycle", estimate_cycle_length=True, innovations=True, dampen=True + ) + params = { + "cycle": np.array([1.0, 1.0], dtype=config.floatX), + "cycle_length": 12.0, + "cycle_dampening_factor": 0.95, + "sigma_cycle": 1.0, + } + + x, y = simulate_from_numpy_model(cycle, rng, params) + + cycle.build(verbose=False) + _assert_basic_coords_correct(cycle) diff --git a/tests/statespace/models/structural/components/test_level_trend.py b/tests/statespace/models/structural/components/test_level_trend.py new file mode 100644 index 000000000..9b48ba5b9 --- /dev/null +++ b/tests/statespace/models/structural/components/test_level_trend.py @@ -0,0 +1,24 @@ +import numpy as np + +from numpy.testing import assert_allclose +from pytensor import config + +from pymc_extras.statespace.models import structural as st +from tests.statespace.models.structural.conftest import _assert_basic_coords_correct +from tests.statespace.test_utilities import simulate_from_numpy_model + +ATOL = 1e-8 if config.floatX.endswith("64") else 1e-4 +RTOL = 0 if config.floatX.endswith("64") else 1e-6 + + +def test_level_trend_model(rng): + mod = st.LevelTrendComponent(order=2, innovations_order=0) + params = {"initial_trend": [0.0, 1.0]} + x, y = simulate_from_numpy_model(mod, rng, params) + + assert_allclose(np.diff(y), 1, atol=ATOL, rtol=RTOL) + + # Check coords + mod = mod.build(verbose=False) + _assert_basic_coords_correct(mod) + assert mod.coords["trend_state"] == ["level", "trend"] diff --git a/tests/statespace/models/structural/components/test_measurement_error.py b/tests/statespace/models/structural/components/test_measurement_error.py new file mode 100644 index 000000000..752e8513c --- /dev/null +++ b/tests/statespace/models/structural/components/test_measurement_error.py @@ -0,0 +1,10 @@ +from pymc_extras.statespace.models import structural as st +from tests.statespace.models.structural.conftest import _assert_basic_coords_correct + + +def test_measurement_error(rng): + mod = st.MeasurementError("obs") + st.LevelTrendComponent(order=2) + mod = mod.build(verbose=False) + + _assert_basic_coords_correct(mod) + assert "sigma_obs" in mod.param_names diff --git a/tests/statespace/models/structural/components/test_regression.py b/tests/statespace/models/structural/components/test_regression.py new file mode 100644 index 000000000..504ee8eb2 --- /dev/null +++ b/tests/statespace/models/structural/components/test_regression.py @@ -0,0 +1,69 @@ +import numpy as np +import pandas as pd +import pymc as pm + +from numpy.testing import assert_allclose +from pytensor import config +from pytensor import tensor as pt + +from pymc_extras.statespace.models import structural as st +from tests.statespace.models.structural.conftest import _assert_basic_coords_correct +from tests.statespace.test_utilities import simulate_from_numpy_model + +ATOL = 1e-8 if config.floatX.endswith("64") else 1e-4 +RTOL = 0 if config.floatX.endswith("64") else 1e-6 + + +def test_exogenous_component(rng): + data = rng.normal(size=(100, 2)).astype(config.floatX) + mod = st.RegressionComponent(state_names=["feature_1", "feature_2"], name="exog") + + params = {"beta_exog": np.array([1.0, 2.0], dtype=config.floatX)} + exog_data = {"data_exog": data} + x, y = simulate_from_numpy_model(mod, rng, params, exog_data) + + # Check that the generated data is just a linear regression + assert_allclose(y, data @ params["beta_exog"], atol=ATOL, rtol=RTOL) + + mod.build(verbose=False) + _assert_basic_coords_correct(mod) + assert mod.coords["exog_state"] == ["feature_1", "feature_2"] + + +def test_adding_exogenous_component(rng): + data = rng.normal(size=(100, 2)).astype(config.floatX) + reg = st.RegressionComponent(state_names=["a", "b"], name="exog") + ll = st.LevelTrendComponent(name="level") + + seasonal = st.FrequencySeasonality(name="annual", season_length=12, n=4) + mod = reg + ll + seasonal + + assert mod.ssm["design"].eval({"data_exog": data}).shape == (100, 1, 2 + 2 + 8) + assert_allclose(mod.ssm["design", 5, 0, :2].eval({"data_exog": data}), data[5]) + + +def test_filter_scans_time_varying_design_matrix(rng): + time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) + data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) + + y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) + + reg = st.RegressionComponent(state_names=["a", "b"], name="exog") + mod = reg.build(verbose=False) + + with pm.Model(coords=mod.coords) as m: + data_exog = pm.Data("data_exog", data.values) + + x0 = pm.Normal("x0", dims=["state"]) + P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) + beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) + + mod.build_statespace_graph(y) + x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() + pm.Deterministic("Z", Z) + + prior = pm.sample_prior_predictive(draws=10) + + prior_Z = prior.prior.Z.values + assert prior_Z.shape == (1, 10, 100, 1, 2) + assert_allclose(prior_Z[0, :, :, 0, :], data.values[None].repeat(10, axis=0)) diff --git a/tests/statespace/models/structural/components/test_seasonality.py b/tests/statespace/models/structural/components/test_seasonality.py new file mode 100644 index 000000000..61ad4b198 --- /dev/null +++ b/tests/statespace/models/structural/components/test_seasonality.py @@ -0,0 +1,83 @@ +import numpy as np +import pytest + +from pytensor import config + +from pymc_extras.statespace.models import structural as st +from tests.statespace.models.structural.conftest import _assert_basic_coords_correct +from tests.statespace.test_utilities import assert_pattern_repeats, simulate_from_numpy_model + +ATOL = 1e-8 if config.floatX.endswith("64") else 1e-4 +RTOL = 0 if config.floatX.endswith("64") else 1e-6 + + +@pytest.mark.parametrize("s", [10, 25, 50]) +@pytest.mark.parametrize("innovations", [True, False]) +@pytest.mark.parametrize("remove_first_state", [True, False]) +@pytest.mark.filterwarnings( + "ignore:divide by zero encountered in matmul:RuntimeWarning", + "ignore:overflow encountered in matmul:RuntimeWarning", + "ignore:invalid value encountered in matmul:RuntimeWarning", +) +def test_time_seasonality(s, innovations, remove_first_state, rng): + def random_word(rng): + return "".join(rng.choice(list("abcdefghijklmnopqrstuvwxyz")) for _ in range(5)) + + state_names = [random_word(rng) for _ in range(s)] + mod = st.TimeSeasonality( + season_length=s, + innovations=innovations, + name="season", + state_names=state_names, + remove_first_state=remove_first_state, + ) + x0 = np.zeros(mod.k_states, dtype=config.floatX) + x0[0] = 1 + + params = {"season_coefs": x0} + if mod.innovations: + params["sigma_season"] = 0.0 + + x, y = simulate_from_numpy_model(mod, rng, params) + y = y.ravel() + if not innovations: + assert_pattern_repeats(y, s, atol=ATOL, rtol=RTOL) + + # Check coords + mod.build(verbose=False) + _assert_basic_coords_correct(mod) + test_slice = slice(1, None) if remove_first_state else slice(None) + assert mod.coords["season_state"] == state_names[test_slice] + + +def get_shift_factor(s): + s_str = str(s) + if "." not in s_str: + return 1 + _, decimal = s_str.split(".") + return 10 ** len(decimal) + + +@pytest.mark.parametrize("n", [*np.arange(1, 6, dtype="int").tolist(), None]) +@pytest.mark.parametrize("s", [5, 10, 25, 25.2]) +def test_frequency_seasonality(n, s, rng): + mod = st.FrequencySeasonality(season_length=s, n=n, name="season") + x0 = rng.normal(size=mod.n_coefs).astype(config.floatX) + params = {"season": x0, "sigma_season": 0.0} + k = get_shift_factor(s) + T = int(s * k) + + x, y = simulate_from_numpy_model(mod, rng, params, steps=2 * T) + assert_pattern_repeats(y, T, atol=ATOL, rtol=RTOL) + + # Check coords + mod.build(verbose=False) + _assert_basic_coords_correct(mod) + if n is None: + n = int(s // 2) + states = [f"season_{f}_{i}" for i in range(n) for f in ["Cos", "Sin"]] + + # Remove the last state when the model is completely saturated + if s / n == 2.0: + states.pop() + assert mod.coords["season_state"] == states diff --git a/tests/statespace/models/structural/conftest.py b/tests/statespace/models/structural/conftest.py new file mode 100644 index 000000000..63ce45c1b --- /dev/null +++ b/tests/statespace/models/structural/conftest.py @@ -0,0 +1,27 @@ +import numpy as np +import pytest + +from pymc_extras.statespace.utils.constants import ( + ALL_STATE_AUX_DIM, + ALL_STATE_DIM, + OBS_STATE_AUX_DIM, + OBS_STATE_DIM, + SHOCK_AUX_DIM, + SHOCK_DIM, +) + +TEST_SEED = sum(map(ord, "Structural Statespace")) + + +@pytest.fixture(scope="session") +def rng(): + return np.random.default_rng(TEST_SEED) + + +def _assert_basic_coords_correct(mod): + assert mod.coords[ALL_STATE_DIM] == mod.state_names + assert mod.coords[ALL_STATE_AUX_DIM] == mod.state_names + assert mod.coords[SHOCK_DIM] == mod.shock_names + assert mod.coords[SHOCK_AUX_DIM] == mod.shock_names + assert mod.coords[OBS_STATE_DIM] == ["data"] + assert mod.coords[OBS_STATE_AUX_DIM] == ["data"] diff --git a/tests/statespace/models/test_structural.py b/tests/statespace/models/structural/test_against_statsmodels.py similarity index 62% rename from tests/statespace/models/test_structural.py rename to tests/statespace/models/structural/test_against_statsmodels.py index 1662e164a..94da8afe1 100644 --- a/tests/statespace/models/test_structural.py +++ b/tests/statespace/models/structural/test_against_statsmodels.py @@ -4,15 +4,11 @@ from collections import defaultdict import numpy as np -import pandas as pd -import pymc as pm import pytensor -import pytensor.tensor as pt import pytest import statsmodels.api as sm from numpy.testing import assert_allclose -from scipy import linalg from pymc_extras.statespace import structural as st from pymc_extras.statespace.utils.constants import ( @@ -29,8 +25,6 @@ rng, ) from tests.statespace.test_utilities import ( - assert_pattern_repeats, - simulate_from_numpy_model, unpack_symbolic_matrices_with_params, ) @@ -106,15 +100,6 @@ def _assert_coord_shapes_match_matrices(mod, params): ), f"Q expected to have shape (n_shocks, n_shocks), found {Q.shape[-2:]}" -def _assert_basic_coords_correct(mod): - assert mod.coords[ALL_STATE_DIM] == mod.state_names - assert mod.coords[ALL_STATE_AUX_DIM] == mod.state_names - assert mod.coords[SHOCK_DIM] == mod.shock_names - assert mod.coords[SHOCK_AUX_DIM] == mod.shock_names - assert mod.coords[OBS_STATE_DIM] == ["data"] - assert mod.coords[OBS_STATE_AUX_DIM] == ["data"] - - def _assert_keys_match(test_dict, expected_dict): expected_keys = list(expected_dict.keys()) param_keys = list(test_dict.keys()) @@ -548,293 +533,3 @@ def test_structural_model_against_statsmodels( _assert_param_dims_correct(built_model.param_dims, expected_dims) _assert_coords_correct(built_model.coords, expected_coords) _assert_params_info_correct(built_model.param_info, built_model.coords, built_model.param_dims) - - -def test_level_trend_model(rng): - mod = st.LevelTrendComponent(order=2, innovations_order=0) - params = {"initial_trend": [0.0, 1.0]} - x, y = simulate_from_numpy_model(mod, rng, params) - - assert_allclose(np.diff(y), 1, atol=ATOL, rtol=RTOL) - - # Check coords - mod = mod.build(verbose=False) - _assert_basic_coords_correct(mod) - assert mod.coords["trend_state"] == ["level", "trend"] - - -def test_measurement_error(rng): - mod = st.MeasurementError("obs") + st.LevelTrendComponent(order=2) - mod = mod.build(verbose=False) - - _assert_basic_coords_correct(mod) - assert "sigma_obs" in mod.param_names - - -@pytest.mark.parametrize("order", [1, 2, [1, 0, 1]], ids=["AR1", "AR2", "AR(1,0,1)"]) -def test_autoregressive_model(order, rng): - ar = st.AutoregressiveComponent(order=order) - params = { - "ar_params": np.full((sum(ar.order),), 0.5, dtype=floatX), - "sigma_ar": 0.0, - } - - x, y = simulate_from_numpy_model(ar, rng, params, steps=100) - - # Check coords - ar.build(verbose=False) - _assert_basic_coords_correct(ar) - lags = np.arange(len(order) if isinstance(order, list) else order, dtype="int") + 1 - if isinstance(order, list): - lags = lags[np.flatnonzero(order)] - assert_allclose(ar.coords["ar_lag"], lags) - - -@pytest.mark.parametrize("s", [10, 25, 50]) -@pytest.mark.parametrize("innovations", [True, False]) -@pytest.mark.parametrize("remove_first_state", [True, False]) -@pytest.mark.filterwarnings( - "ignore:divide by zero encountered in matmul:RuntimeWarning", - "ignore:overflow encountered in matmul:RuntimeWarning", - "ignore:invalid value encountered in matmul:RuntimeWarning", -) -def test_time_seasonality(s, innovations, remove_first_state, rng): - def random_word(rng): - return "".join(rng.choice(list("abcdefghijklmnopqrstuvwxyz")) for _ in range(5)) - - state_names = [random_word(rng) for _ in range(s)] - mod = st.TimeSeasonality( - season_length=s, - innovations=innovations, - name="season", - state_names=state_names, - remove_first_state=remove_first_state, - ) - x0 = np.zeros(mod.k_states, dtype=floatX) - x0[0] = 1 - - params = {"season_coefs": x0} - if mod.innovations: - params["sigma_season"] = 0.0 - - x, y = simulate_from_numpy_model(mod, rng, params) - y = y.ravel() - if not innovations: - assert_pattern_repeats(y, s, atol=ATOL, rtol=RTOL) - - # Check coords - mod.build(verbose=False) - _assert_basic_coords_correct(mod) - test_slice = slice(1, None) if remove_first_state else slice(None) - assert mod.coords["season_state"] == state_names[test_slice] - - -def get_shift_factor(s): - s_str = str(s) - if "." not in s_str: - return 1 - _, decimal = s_str.split(".") - return 10 ** len(decimal) - - -@pytest.mark.parametrize("n", [*np.arange(1, 6, dtype="int").tolist(), None]) -@pytest.mark.parametrize("s", [5, 10, 25, 25.2]) -def test_frequency_seasonality(n, s, rng): - mod = st.FrequencySeasonality(season_length=s, n=n, name="season") - x0 = rng.normal(size=mod.n_coefs).astype(floatX) - params = {"season": x0, "sigma_season": 0.0} - k = get_shift_factor(s) - T = int(s * k) - - x, y = simulate_from_numpy_model(mod, rng, params, steps=2 * T) - assert_pattern_repeats(y, T, atol=ATOL, rtol=RTOL) - - # Check coords - mod.build(verbose=False) - _assert_basic_coords_correct(mod) - if n is None: - n = int(s // 2) - states = [f"season_{f}_{i}" for i in range(n) for f in ["Cos", "Sin"]] - - # Remove the last state when the model is completely saturated - if s / n == 2.0: - states.pop() - assert mod.coords["season_state"] == states - - -cycle_test_vals = zip([None, None, 3, 5, 10], [False, True, True, False, False]) - - -def test_cycle_component_deterministic(rng): - cycle = st.CycleComponent( - name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False - ) - params = {"cycle": np.array([1.0, 1.0], dtype=floatX)} - x, y = simulate_from_numpy_model(cycle, rng, params, steps=12 * 12) - - assert_pattern_repeats(y, 12, atol=ATOL, rtol=RTOL) - - -def test_cycle_component_with_dampening(rng): - cycle = st.CycleComponent( - name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False, dampen=True - ) - params = {"cycle": np.array([10.0, 10.0], dtype=floatX), "cycle_dampening_factor": 0.75} - x, y = simulate_from_numpy_model(cycle, rng, params, steps=100) - - # Check that the cycle dampens to zero over time - assert_allclose(y[-1], 0.0, atol=ATOL, rtol=RTOL) - - -def test_cycle_component_with_innovations_and_cycle_length(rng): - cycle = st.CycleComponent( - name="cycle", estimate_cycle_length=True, innovations=True, dampen=True - ) - params = { - "cycle": np.array([1.0, 1.0], dtype=floatX), - "cycle_length": 12.0, - "cycle_dampening_factor": 0.95, - "sigma_cycle": 1.0, - } - - x, y = simulate_from_numpy_model(cycle, rng, params) - - cycle.build(verbose=False) - _assert_basic_coords_correct(cycle) - - -def test_exogenous_component(rng): - data = rng.normal(size=(100, 2)).astype(floatX) - mod = st.RegressionComponent(state_names=["feature_1", "feature_2"], name="exog") - - params = {"beta_exog": np.array([1.0, 2.0], dtype=floatX)} - exog_data = {"data_exog": data} - x, y = simulate_from_numpy_model(mod, rng, params, exog_data) - - # Check that the generated data is just a linear regression - assert_allclose(y, data @ params["beta_exog"], atol=ATOL, rtol=RTOL) - - mod.build(verbose=False) - _assert_basic_coords_correct(mod) - assert mod.coords["exog_state"] == ["feature_1", "feature_2"] - - -def test_adding_exogenous_component(rng): - data = rng.normal(size=(100, 2)).astype(floatX) - reg = st.RegressionComponent(state_names=["a", "b"], name="exog") - ll = st.LevelTrendComponent(name="level") - - seasonal = st.FrequencySeasonality(name="annual", season_length=12, n=4) - mod = reg + ll + seasonal - - assert mod.ssm["design"].eval({"data_exog": data}).shape == (100, 1, 2 + 2 + 8) - assert_allclose(mod.ssm["design", 5, 0, :2].eval({"data_exog": data}), data[5]) - - -def test_add_components(): - ll = st.LevelTrendComponent(order=2) - se = st.TimeSeasonality(name="seasonal", season_length=12) - mod = ll + se - - ll_params = { - "initial_trend": np.zeros(2, dtype=floatX), - "sigma_trend": np.ones(2, dtype=floatX), - } - se_params = { - "seasonal_coefs": np.ones(11, dtype=floatX), - "sigma_seasonal": 1.0, - } - all_params = ll_params.copy() - all_params.update(se_params) - - (ll_x0, ll_P0, ll_c, ll_d, ll_T, ll_Z, ll_R, ll_H, ll_Q) = unpack_symbolic_matrices_with_params( - ll, ll_params - ) - (se_x0, se_P0, se_c, se_d, se_T, se_Z, se_R, se_H, se_Q) = unpack_symbolic_matrices_with_params( - se, se_params - ) - x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, all_params) - - for property in ["param_names", "shock_names", "param_info", "coords", "param_dims"]: - assert [x in getattr(mod, property) for x in getattr(ll, property)] - assert [x in getattr(mod, property) for x in getattr(se, property)] - - ll_mats = [ll_T, ll_R, ll_Q] - se_mats = [se_T, se_R, se_Q] - all_mats = [T, R, Q] - - for ll_mat, se_mat, all_mat in zip(ll_mats, se_mats, all_mats): - assert_allclose(all_mat, linalg.block_diag(ll_mat, se_mat), atol=ATOL, rtol=RTOL) - - ll_mats = [ll_x0, ll_c, ll_Z] - se_mats = [se_x0, se_c, se_Z] - all_mats = [x0, c, Z] - axes = [0, 0, 1] - - for ll_mat, se_mat, all_mat, axis in zip(ll_mats, se_mats, all_mats, axes): - assert_allclose(all_mat, np.concatenate([ll_mat, se_mat], axis=axis), atol=ATOL, rtol=RTOL) - - -def test_filter_scans_time_varying_design_matrix(rng): - time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) - data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) - - y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) - - reg = st.RegressionComponent(state_names=["a", "b"], name="exog") - mod = reg.build(verbose=False) - - with pm.Model(coords=mod.coords) as m: - data_exog = pm.Data("data_exog", data.values) - - x0 = pm.Normal("x0", dims=["state"]) - P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) - beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) - - mod.build_statespace_graph(y) - x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() - pm.Deterministic("Z", Z) - - prior = pm.sample_prior_predictive(draws=10) - - prior_Z = prior.prior.Z.values - assert prior_Z.shape == (1, 10, 100, 1, 2) - assert_allclose(prior_Z[0, :, :, 0, :], data.values[None].repeat(10, axis=0)) - - -@pytest.mark.skipif(floatX.endswith("32"), reason="Prior covariance not PSD at half-precision") -def test_extract_components_from_idata(rng): - time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) - data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) - - y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) - - ll = st.LevelTrendComponent() - season = st.FrequencySeasonality(name="seasonal", season_length=12, n=2, innovations=False) - reg = st.RegressionComponent(state_names=["a", "b"], name="exog") - me = st.MeasurementError("obs") - mod = (ll + season + reg + me).build(verbose=False) - - with pm.Model(coords=mod.coords) as m: - data_exog = pm.Data("data_exog", data.values) - - x0 = pm.Normal("x0", dims=["state"]) - P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) - beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) - initial_trend = pm.Normal("initial_trend", dims=["trend_state"]) - sigma_trend = pm.Exponential("sigma_trend", 1, dims=["trend_shock"]) - seasonal_coefs = pm.Normal("seasonal", dims=["seasonal_state"]) - sigma_obs = pm.Exponential("sigma_obs", 1) - - mod.build_statespace_graph(y) - - x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() - prior = pm.sample_prior_predictive(draws=10) - - filter_prior = mod.sample_conditional_prior(prior) - comp_prior = mod.extract_components_from_idata(filter_prior) - comp_states = comp_prior.filtered_prior.coords["state"].values - expected_states = ["LevelTrend[level]", "LevelTrend[trend]", "seasonal", "exog[a]", "exog[b]"] - missing = set(comp_states) - set(expected_states) - - assert len(missing) == 0, missing diff --git a/tests/statespace/models/structural/test_core.py b/tests/statespace/models/structural/test_core.py new file mode 100644 index 000000000..500e8b1a2 --- /dev/null +++ b/tests/statespace/models/structural/test_core.py @@ -0,0 +1,102 @@ +import numpy as np +import pandas as pd +import pymc as pm +import pytest + +from numpy.testing import assert_allclose +from pytensor import config +from pytensor import tensor as pt +from scipy import linalg + +from pymc_extras.statespace.models import structural as st +from tests.statespace.test_utilities import unpack_symbolic_matrices_with_params + +floatX = config.floatX +ATOL = 1e-8 if floatX.endswith("64") else 1e-4 +RTOL = 0 if floatX.endswith("64") else 1e-6 + + +def test_add_components(): + ll = st.LevelTrendComponent(order=2) + se = st.TimeSeasonality(name="seasonal", season_length=12) + mod = ll + se + + ll_params = { + "initial_trend": np.zeros(2, dtype=floatX), + "sigma_trend": np.ones(2, dtype=floatX), + } + se_params = { + "seasonal_coefs": np.ones(11, dtype=floatX), + "sigma_seasonal": 1.0, + } + all_params = ll_params.copy() + all_params.update(se_params) + + (ll_x0, ll_P0, ll_c, ll_d, ll_T, ll_Z, ll_R, ll_H, ll_Q) = unpack_symbolic_matrices_with_params( + ll, ll_params + ) + (se_x0, se_P0, se_c, se_d, se_T, se_Z, se_R, se_H, se_Q) = unpack_symbolic_matrices_with_params( + se, se_params + ) + x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, all_params) + + for property in ["param_names", "shock_names", "param_info", "coords", "param_dims"]: + assert [x in getattr(mod, property) for x in getattr(ll, property)] + assert [x in getattr(mod, property) for x in getattr(se, property)] + + assert (mod.observed_state_names == ll.observed_state_names) and ( + ll.observed_state_names == se.observed_state_names + ) + + ll_mats = [ll_T, ll_R, ll_Q] + se_mats = [se_T, se_R, se_Q] + all_mats = [T, R, Q] + + for ll_mat, se_mat, all_mat in zip(ll_mats, se_mats, all_mats): + assert_allclose(all_mat, linalg.block_diag(ll_mat, se_mat), atol=ATOL, rtol=RTOL) + + ll_mats = [ll_x0, ll_c, ll_Z] + se_mats = [se_x0, se_c, se_Z] + all_mats = [x0, c, Z] + axes = [0, 0, 1] + + for ll_mat, se_mat, all_mat, axis in zip(ll_mats, se_mats, all_mats, axes): + assert_allclose(all_mat, np.concatenate([ll_mat, se_mat], axis=axis), atol=ATOL, rtol=RTOL) + + +@pytest.mark.skipif(floatX.endswith("32"), reason="Prior covariance not PSD at half-precision") +def test_extract_components_from_idata(rng): + time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) + data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) + + y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) + + ll = st.LevelTrendComponent() + season = st.FrequencySeasonality(name="seasonal", season_length=12, n=2, innovations=False) + reg = st.RegressionComponent(state_names=["a", "b"], name="exog") + me = st.MeasurementError("obs") + mod = (ll + season + reg + me).build(verbose=False) + + with pm.Model(coords=mod.coords) as m: + data_exog = pm.Data("data_exog", data.values) + + x0 = pm.Normal("x0", dims=["state"]) + P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) + beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) + initial_trend = pm.Normal("initial_trend", dims=["trend_state"]) + sigma_trend = pm.Exponential("sigma_trend", 1, dims=["trend_shock"]) + seasonal_coefs = pm.Normal("seasonal", dims=["seasonal_state"]) + sigma_obs = pm.Exponential("sigma_obs", 1) + + mod.build_statespace_graph(y) + + x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() + prior = pm.sample_prior_predictive(draws=10) + + filter_prior = mod.sample_conditional_prior(prior) + comp_prior = mod.extract_components_from_idata(filter_prior) + comp_states = comp_prior.filtered_prior.coords["state"].values + expected_states = ["LevelTrend[level]", "LevelTrend[trend]", "seasonal", "exog[a]", "exog[b]"] + missing = set(comp_states) - set(expected_states) + + assert len(missing) == 0, missing From b970a6c53d4335b208217d076e265314b39a664f Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 24 Jun 2025 22:17:31 +0200 Subject: [PATCH 02/21] Allow combination of components with different numbers of observed states --- .../statespace/models/structural/core.py | 35 +- pymc_extras/statespace/models/utilities.py | 256 +++++++++++++++ tests/statespace/models/test_utilities.py | 298 ++++++++++++++++++ tests/statespace/test_utilities.py | 2 +- 4 files changed, 583 insertions(+), 8 deletions(-) create mode 100644 tests/statespace/models/test_utilities.py diff --git a/pymc_extras/statespace/models/structural/core.py b/pymc_extras/statespace/models/structural/core.py index 1a273a6e2..913c58e17 100644 --- a/pymc_extras/statespace/models/structural/core.py +++ b/pymc_extras/statespace/models/structural/core.py @@ -13,7 +13,9 @@ from pymc_extras.statespace.core import PyMCStateSpace, PytensorRepresentation from pymc_extras.statespace.models.utilities import ( + add_tensors_by_dim_labels, conform_time_varying_and_time_invariant_matrices, + join_tensors_by_dim_labels, make_default_coords, ) from pymc_extras.statespace.utils.constants import ( @@ -481,11 +483,13 @@ def populate_component_properties(self): def _get_combined_shapes(self, other): k_states = self.k_states + other.k_states k_posdef = self.k_posdef + other.k_posdef - if self.k_endog != other.k_endog: - raise NotImplementedError( - "Merging elements with different numbers of observed states is not supported." + if self.k_endog == other.k_endog: + k_endog = self.k_endog + else: + combined_states = self._combine_property( + other, "observed_state_names", allow_duplicates=False ) - k_endog = self.k_endog + k_endog = len(combined_states) return k_states, k_posdef, k_endog @@ -499,6 +503,9 @@ def make_slice(name, x, o_x): self_matrices = [self.ssm[name] for name in LONG_MATRIX_NAMES] other_matrices = [other.ssm[name] for name in LONG_MATRIX_NAMES] + self_observed_states = self.observed_state_names + other_observed_states = other.observed_state_names + x0, P0, c, d, T, Z, R, H, Q = ( self.ssm[make_slice(name, x, o_x)] for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) @@ -517,19 +524,33 @@ def make_slice(name, x, o_x): state_intercept = pt.concatenate(conform_time_varying_and_time_invariant_matrices(c, o_c)) state_intercept.name = c.name - obs_intercept = d + o_d + obs_intercept = add_tensors_by_dim_labels( + d, o_d, labels=self_observed_states, other_labels=other_observed_states, labeled_axis=-1 + ) obs_intercept.name = d.name transition = pt.linalg.block_diag(T, o_T) transition.name = T.name - design = pt.concatenate(conform_time_varying_and_time_invariant_matrices(Z, o_Z), axis=-1) + design = join_tensors_by_dim_labels( + *conform_time_varying_and_time_invariant_matrices(Z, o_Z), + labels=self_observed_states, + other_labels=other_observed_states, + labeled_axis=-2, + join_axis=-1, + ) design.name = Z.name selection = pt.linalg.block_diag(R, o_R) selection.name = R.name - obs_cov = H + o_H + obs_cov = add_tensors_by_dim_labels( + H, + o_H, + labels=self_observed_states, + other_labels=other_observed_states, + labeled_axis=(-1, -2), + ) obs_cov.name = H.name state_cov = pt.linalg.block_diag(Q, o_Q) diff --git a/pymc_extras/statespace/models/utilities.py b/pymc_extras/statespace/models/utilities.py index 6bc22370b..ad2ba33b0 100644 --- a/pymc_extras/statespace/models/utilities.py +++ b/pymc_extras/statespace/models/utilities.py @@ -1,6 +1,10 @@ +from typing import cast as type_cast + import numpy as np import pytensor.tensor as pt +from pytensor.tensor import TensorVariable + from pymc_extras.statespace.utils.constants import ( ALL_STATE_AUX_DIM, ALL_STATE_DIM, @@ -374,6 +378,258 @@ def conform_time_varying_and_time_invariant_matrices(A, B): return A, B +def normalize_axis(x, axis): + """ + Convert negative axis values to positive axis values + """ + if isinstance(axis, tuple): + return tuple([normalize_axis(x, i) for i in axis]) + if axis < 0: + axis = x.ndim + axis + return axis + + +def reorder_from_labels( + x: TensorVariable, + labels: list[str], + ordered_labels: list[str], + labeled_axis: int | tuple[int, int], +) -> TensorVariable: + """ + Reorder an input tensor along request axis/axes based on lists of string labels + + Parameters + ---------- + x: TensorVariable + Input tensor + labels: list of str + Labels associated with values of the input tensor ``x``, along the ``labeled_axis``. At runtime, should have + ``x.shape[labeled_axis] == len(labels)`` + ordered_labels: list of str + Target ordering according to which ``x`` will be reordered. + labeled_axis: int or tuple of int + Axis along which ``x`` will be labeled. If a tuple, each axis will be assumed to have identical labels, and + and reorganization will be done on all requested axes together (NOT fancy indexing!) + + Returns + ------- + x_sorted: TensorVariable + Output tensor sorted along ``labeled_axis`` according to ``ordered_labels`` + """ + n_out = len(ordered_labels) + label_to_index = {label: index for index, label in enumerate(ordered_labels)} + + missing_labels = [label for label in ordered_labels if label not in labels] + indices = np.argsort([label_to_index[label] for label in [*labels, *missing_labels]]) + + if isinstance(labeled_axis, int): + labeled_axis = (labeled_axis,) + + if indices.tolist() != list(range(n_out)): + for axis in labeled_axis: + idx = np.s_[tuple([slice(None, None) if i != axis else indices for i in range(x.ndim)])] + x = x[idx] + + return x + + +def pad_and_reorder( + x: TensorVariable, labels: list[str], ordered_labels: list[str], labeled_axis: int +) -> TensorVariable: + """ + Pad input tensor ``x`` along the `labeled_axis` to match the length of ``ordered_labels``, then reorder the + padded dimension to match the ordering in ``ordered_labels``. + + Parameters + ---------- + x: TensorVariable + Input tensor + labels: list of str + String labels associated with the `x` tensor at the ``labeled_axis`` dimension. At runtime, should have + ``x.shape[labeled_axis] == len(labels)``. ``labels`` should be a subset of ``ordered_labels``. + ordered_labels: list of str + Target ordering according to which ``x`` will be reordered. + labeled_axis: int + Axis along which ``x`` will be labeled. + + Returns + ------- + x_padded: TensorVariable + Output tensor padded along ``labeled_axis`` according to ``ordered_labels``, then reordered. + + """ + n_out = len(ordered_labels) + n_missing = n_out - len(labels) + + if n_missing > 0: + zeros = pt.zeros( + tuple([x.shape[i] if i != labeled_axis else n_missing for i in range(x.ndim)]) + ) + x_padded = pt.concatenate([x, zeros], axis=labeled_axis) + else: + x_padded = x + + return reorder_from_labels(x_padded, labels, ordered_labels, labeled_axis) + + +def ndim_pad_and_reorder( + x: TensorVariable, + labels: list[str], + ordered_labels: list[str], + labeled_axis: int | tuple[int, int], +) -> TensorVariable: + """ + Pad input tensor ``x`` along the `labeled_axis` to match the length of ``ordered_labels``, then reorder the + padded dimension to match the ordering in ``ordered_labels``. + + Unlike ``pad_and_reorder``, this function allows padding and reordering to be done simultaneously on multiple + axes. In this case, reordering is done jointly on all axes -- it does *not* use fancy indexing. + + Parameters + ---------- + x: TensorVariable + Input tensor + labels: list of str + Labels associated with values of the input tensor ``x``, along the ``labeled_axis``. At runtime, should have + ``x.shape[labeled_axis] == len(labels)``. If ``labeled_axis`` is a tuple, all axes are assumed to have the + same labels. + ordered_labels: list of str + Target ordering according to which ``x`` will be reordered. ``labels`` should be a subset of ``ordered_labels``. + labeled_axis: int or tuple of int + Axis along which ``x`` will be labeled. If a tuple, each axis will be assumed to have identical labels, and + and reorganization will be done on all requested axes together (NOT fancy indexing!) + + Returns + ------- + x_sorted: TensorVariable + Output tensor. Each ``labeled_axis`` is padded to the length of ``ordered_labels``, then reordered. + """ + n_missing = len(ordered_labels) - len(labels) + + if isinstance(labeled_axis, int): + labeled_axis = (labeled_axis,) + + if n_missing > 0: + pad_size = [(0, 0) if i not in labeled_axis else (0, n_missing) for i in range(x.ndim)] + x = pt.pad(x, pad_size, mode="constant", constant_values=0) + + return reorder_from_labels(x, labels, ordered_labels, labeled_axis) + + +def add_tensors_by_dim_labels( + tensor: TensorVariable, + other_tensor: TensorVariable, + labels: list[str], + other_labels: list[str], + labeled_axis: int | tuple[int, int] = -1, +) -> TensorVariable: + """ + Add two tensors based on labels associated with one dimension. + + When combining statespace matrices associated with structural components with potentially different states, it is + important to make sure that duplicated states are handled correctly. For bias vectors and covariance matrices, + duplicated states should be summed. + + When a state appears in one component but not another, that state should be treated as an implicit zero in the + components where the state does not appear. This amounts to padding the relevant matrices with zeros before + performing the addition. + + When labeled_axis is a tuple, each provided label is assumed to be identically labeled in each input tensor. This + is the case, for example, when working with a covariance matrix. In this case, padding and alignment will be + done on each indicated index. + + Parameters + ---------- + tensor: TensorVariable + A statespace matrix to be summed with ``other_matrix``. + other_tensor: TensorVariable + A statespace matrix to be summed with ``matrix``. + labels: list of str + Dimension labels associated with ``matrix``, on the ``labeled_axis`` dimension. + other_labels: list of str + Dimension labels associated with ``other_matrix``, on the ``labeled_axis`` dimension. + labeled_axis: int or tuple of int + Dimension that is labeled by ``labels`` and ``other_labels``. ``matrix.shape[labeled_axis]`` must have the + shape of ``len(labels)`` at runtime. + + Returns + ------- + result: TensorVariable + Result of addition of ``matrix`` and ``other_matrix``, along the ``labeled_axis`` dimension. The ordering of + the output will be ``labels + [label for label in other_labels if label not in labels]``. That is, ``labels`` + come first, followed by any new labels introduced by ``other_labels``. + + """ + labeled_axis = normalize_axis(tensor, labeled_axis) + new_labels = [label for label in other_labels if label not in labels] + combined_labels = type_cast(list[str], [*labels, *new_labels]) + + # If there is no overlap at all, directly concatenate the two matrices -- there's no need to worry about the order + # of things, or padding. This is equivalent to padding both out with zeros then adding them. + if combined_labels == [*labels, *other_labels]: + if isinstance(labeled_axis, int): + return pt.concatenate([tensor, other_tensor], axis=labeled_axis) + else: + # In the case where we want to align multiple dimensions, use block_diag to accomplish padding on the last + # two dimensions + dims = [*[i for i in range(tensor.ndim) if i not in labeled_axis], *labeled_axis] + return pt.linalg.block_diag( + type_cast(TensorVariable, tensor.transpose(*dims)), + type_cast(TensorVariable, other_tensor.transpose(*dims)), + ) + # Otherwise, there are two possibilities. If all labels are the same, we might need to re-order one or both to get + # them to agree. If *some* labels are the same, we will need to pad first, then potentially re-order. In any case, + # the final step is just to add the padded and re-ordered tensors. + fn = pad_and_reorder if isinstance(labeled_axis, int) else ndim_pad_and_reorder + + padded_tensor = fn( + tensor, + labels=type_cast(list[str], labels), + ordered_labels=combined_labels, + labeled_axis=labeled_axis, + ) + padded_tensor.name = tensor.name + + padded_other_tensor = fn( + other_tensor, + labels=type_cast(list[str], other_labels), + ordered_labels=combined_labels, + labeled_axis=labeled_axis, + ) + + padded_other_tensor.name = other_tensor.name + + return padded_tensor + padded_other_tensor + + +def join_tensors_by_dim_labels( + tensor: TensorVariable, + other_tensor: TensorVariable, + labels: list[str], + other_labels: list[str], + labeled_axis: int = -1, + join_axis: int = -1, + block_diag_join: bool = False, +) -> TensorVariable: + labeled_axis = normalize_axis(tensor, labeled_axis) + new_labels = [label for label in other_labels if label not in labels] + combined_labels = [*labels, *new_labels] + + # Check for no overlap first. In this case, do a block_diagonal join, which implicitly results in padding zeros + # everywhere they are needed -- no other sorting or padding necessary + if combined_labels == [*labels, *other_labels]: + return pt.linalg.block_diag(tensor, other_tensor) + + # Otherwise there is either total overlap or partial overlap. Let the padding and reordering function figure it out. + tensor = ndim_pad_and_reorder(tensor, labels, combined_labels, labeled_axis) + other_tensor = ndim_pad_and_reorder(other_tensor, other_labels, combined_labels, labeled_axis) + + if block_diag_join: + return pt.linalg.block_diag(tensor, other_tensor) + else: + return pt.concatenate([tensor, other_tensor], axis=join_axis) + + def get_exog_dims_from_idata(exog_name, idata): if exog_name in idata.posterior.data_vars: exog_dims = idata.posterior[exog_name].dims[2:] diff --git a/tests/statespace/models/test_utilities.py b/tests/statespace/models/test_utilities.py new file mode 100644 index 000000000..b667658e4 --- /dev/null +++ b/tests/statespace/models/test_utilities.py @@ -0,0 +1,298 @@ +import numpy as np +import pytest + +from pytensor import function +from pytensor import tensor as pt + +from pymc_extras.statespace.models.utilities import ( + add_tensors_by_dim_labels, + join_tensors_by_dim_labels, + reorder_from_labels, +) + + +def test_reorder_from_labels(): + x = pt.tensor("x", shape=(None, None)) + labels = ["A", "B", "D"] + combined_labels = ["A", "D", "B"] + + x_sorted = reorder_from_labels(x, labels, combined_labels, labeled_axis=0) + fn = function([x], x_sorted) + + test_val = np.eye(3) * np.arange(1, 4) + idx = np.array([0, 2, 1]) + out = fn(test_val) + np.testing.assert_allclose(out, test_val[idx, :]) + + x_sorted = reorder_from_labels(x, labels, combined_labels, labeled_axis=1) + fn = function([x], x_sorted) + + out = fn(test_val) + np.testing.assert_allclose(out, test_val[:, idx]) + + x_sorted = reorder_from_labels(x, labels, combined_labels, labeled_axis=(0, 1)) + fn = function([x], x_sorted) + + out = fn(test_val) + np.testing.assert_allclose(out, test_val[np.ix_(idx, idx)]) + + +def make_zeros(x): + if x.ndim == 1: + zeros = np.zeros( + 1, + ) + else: + zeros = np.zeros((x.shape[0], 1)) + return zeros + + +def add(left, right): + return left + right + + +def same_but_mixed(left, right): + return left + right[..., np.array([1, 2, 0])] + + +def concat(left, right): + return np.concatenate([left, right], axis=-1) + + +def pad_and_add_left(left, right): + left = np.concatenate([left, make_zeros(left)], axis=-1) + return left + right + + +def pad_and_add_right(left, right): + right = np.concatenate([right, make_zeros(right)], axis=-1) + return left + right + + +def mixed_and_padded(left, right): + left = np.concatenate([left, make_zeros(left)], axis=-1) + right = right[..., np.array([2, 1, 0])] + return left + right + + +@pytest.mark.parametrize( + "left_names, right_names, expected_computation", + [ + (["data"], ["data"], add), + (["A", "C", "B"], ["B", "A", "C"], same_but_mixed), + (["data"], ["different_data"], concat), + (["data"], ["data", "different_data"], pad_and_add_left), + (["data", "more_data"], ["data"], pad_and_add_right), + (["A", "B"], ["D", "B", "A"], mixed_and_padded), + ], + ids=[ + "same_names", + "same_but_mixed", + "different_names", + "overlap_right", + "overlap_left", + "pad_and_mix", + ], +) +@pytest.mark.parametrize("ndim", [1, 2], ids=["vector", "matrix"]) +def test_add_matrices_by_observed_state_names(left_names, right_names, expected_computation, ndim): + rng = np.random.default_rng() + n_left = len(left_names) + n_right = len(right_names) + + left = pt.tensor("left", shape=(None,) * ndim) + right = pt.tensor("right", shape=(None,) * ndim) + + result = add_tensors_by_dim_labels(left, right, left_names, right_names) + fn = function([left, right], result) + + left_value = rng.normal(size=(n_left,) if ndim == 1 else (10, n_left)) + right_value = rng.normal(size=(n_right,) if ndim == 1 else (10, n_right)) + + np.testing.assert_allclose( + fn(left_value, right_value), expected_computation(left_value, right_value) + ) + + +class TestAddCovarianceMatrices: + def _setup_H(self, states_1, states_2): + n_1 = len(states_1) + n_2 = len(states_2) + + H_1 = pt.tensor("H_1", shape=(n_1, n_1)) + H_2 = pt.tensor("H_2", shape=(n_2, n_2)) + + return H_1, H_2 + + @pytest.mark.parametrize("n_states", [1, 3], ids=["1x1", "3x3"]) + def test_add_fully_overlapping_covariance_matrices(self, n_states): + rng = np.random.default_rng() + states = list("ABCD") + + observed_states_1 = states[:n_states] + observed_states_2 = states[:n_states] + + H_1, H_2 = self._setup_H(observed_states_1, observed_states_2) + res = add_tensors_by_dim_labels( + H_1, H_2, observed_states_1, observed_states_2, labeled_axis=(0, 1) + ) + + fn = function([H_1, H_2], res) + + H_1_val = rng.normal(size=(n_states, n_states)) + H_2_val = rng.normal(size=(n_states, n_states)) + + np.testing.assert_allclose(fn(H_1_val, H_2_val), H_1_val + H_2_val) + + def test_add_fully_overlapping_mixed_covariance_matrices(self): + rng = np.random.default_rng() + + observed_states_1 = ["A", "B", "C", "D"] + observed_states_2 = ["A", "B", "C", "D"] + rng.shuffle(observed_states_2) + + H_1, H_2 = self._setup_H(observed_states_1, observed_states_2) + + res = add_tensors_by_dim_labels( + H_1, H_2, observed_states_1, observed_states_2, labeled_axis=(0, 1) + ) + + H_1_val = rng.normal(size=(4, 4)) + H_2_val = rng.normal(size=(4, 4)) + + fn = function([H_1, H_2], res) + + state_to_idx = {name: idx for idx, name in enumerate(observed_states_1)} + idx = np.argsort([state_to_idx[state] for state in observed_states_2]) + + np.testing.assert_allclose(fn(H_1_val, H_2_val), H_1_val + H_2_val[np.ix_(idx, idx)]) + + def test_add_non_overlapping_covaraince_matrices(self): + rng = np.random.default_rng() + + observed_states_1 = ["A", "B"] + observed_states_2 = ["C", "D"] + + H_1, H_2 = self._setup_H(observed_states_1, observed_states_2) + + res = add_tensors_by_dim_labels( + H_1, H_2, observed_states_1, observed_states_2, labeled_axis=(0, 1) + ) + + H_1_val = rng.normal(size=(2, 2)) + H_2_val = rng.normal(size=(2, 2)) + zeros = np.zeros_like(H_1_val) + + fn = function([H_1, H_2], res) + + np.testing.assert_allclose( + fn(H_1_val, H_2_val), np.block([[H_1_val, zeros], [zeros, H_2_val]]) + ) + + def test_add_partially_overlapping_covaraince_matrices(self): + rng = np.random.default_rng() + observed_states_1 = ["A", "B"] + observed_states_2 = ["B", "C", "D", "A"] + H_1, H_2 = self._setup_H(observed_states_1, observed_states_2) + + res = add_tensors_by_dim_labels( + H_1, H_2, observed_states_1, observed_states_2, labeled_axis=(-2, -1) + ) + + fn = function([H_1, H_2], res) + H_1_val = rng.normal(size=(2, 2)) + H_2_val = rng.normal(size=(4, 4)) + + upper = np.zeros((4, 4)) + upper_idx = np.ix_([0, 1], [0, 1]) + upper[upper_idx] = H_1_val + expected_value = upper + H_2_val[np.ix_([3, 0, 1, 2], [3, 0, 1, 2])] + + np.testing.assert_allclose(fn(H_1_val, H_2_val), expected_value) + + +class TestJoinDesignMatrices: + def _setup_Z(self, states_1, states_2, k_endog=2): + Z_1 = pt.tensor("Z_1", shape=(len(states_1), k_endog)) + Z_2 = pt.tensor("Z_2", shape=(len(states_2), k_endog)) + + return Z_1, Z_2 + + def test_join_fully_overlapping_design_matrices(self): + observed_states_1 = ["A"] + observed_states_2 = ["A"] + + Z_1, Z_2 = self._setup_Z(observed_states_1, observed_states_2) + res = join_tensors_by_dim_labels( + Z_1, Z_2, observed_states_1, observed_states_2, labeled_axis=0, join_axis=1 + ) + + fn = function([Z_1, Z_2], res) + + Z_1_val = np.array([[1.0, 0.0]]) + Z_2_val = np.array([[0.0, 1.0]]) + + np.testing.assert_allclose(fn(Z_1_val, Z_2_val), np.array([[1.0, 0.0, 0.0, 1.0]])) + + def test_join_fully_overlapping_mixed_design_matrices(self): + observed_states_1 = ["A", "B", "C"] + observed_states_2 = ["C", "B", "A"] + + Z_1, Z_2 = self._setup_Z(observed_states_1, observed_states_2, k_endog=3) + res = join_tensors_by_dim_labels( + Z_1, Z_2, observed_states_1, observed_states_2, labeled_axis=0, join_axis=1 + ) + + fn = function([Z_1, Z_2], res) + + Z_1_val = np.array([[1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 1.0]]) + Z_2_val = np.array([[0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 0.0]]) + + # Rows 0 and 2 should be swapped in the output, because the ordering A, B, C becomes canonical as it was passed + # in first, and because we said the labeled dim was axis=0. After reordering, the matrices should be + # concatenated on axis = 1 (again, as requested). + np.testing.assert_allclose( + fn(Z_1_val, Z_2_val), + np.array( + [ + [1.0, 0.0, 1.0, 1.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 1.0, 0.0, 1.0], + [1.0, 0.0, 1.0, 0.0, 1.0, 1.0], + ] + ), + ) + + def test_join_non_overlapping_design_matrices(self): + observed_states_1 = ["A"] + observed_states_2 = ["B"] + + Z_1, Z_2 = self._setup_Z(observed_states_1, observed_states_2) + fn = function( + [Z_1, Z_2], join_tensors_by_dim_labels(Z_1, Z_2, observed_states_1, observed_states_2) + ) + + Z_1_val = np.array([[1.0, 0.0]]) + Z_2_val = np.array([[1.0, 0.0]]) + out = fn(Z_1_val, Z_2_val) + + np.testing.assert_allclose(out, [[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]) + + def test_join_partially_overlapping_design_matrices(self): + observed_states_1 = ["A"] + observed_states_2 = ["A", "B", "C"] + + Z_1, Z_2 = self._setup_Z(observed_states_1, observed_states_2) + res = join_tensors_by_dim_labels( + Z_1, Z_2, observed_states_1, observed_states_2, labeled_axis=0, join_axis=1 + ) + fn = function([Z_1, Z_2], res) + + Z_1_val = np.array([[1.0, 0.0]]) + Z_2_val = np.array([[0.0, 1.0], [1.0, 0.0], [1.0, 0.0]]) + + # Z_1 should be zero padded with the missing observed states, then concatenated along axis = -1 + expected_output = np.array( + [[1.0, 0.0, 0.0, 1.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 0.0]] + ) + + np.testing.assert_allclose(fn(Z_1_val, Z_2_val), expected_output) diff --git a/tests/statespace/test_utilities.py b/tests/statespace/test_utilities.py index 91c8ed513..6837d4fe7 100644 --- a/tests/statespace/test_utilities.py +++ b/tests/statespace/test_utilities.py @@ -272,7 +272,7 @@ def simulate_from_numpy_model(mod, rng, param_dict, data_dict=None, steps=100): else: y[t] = (d + Z[t] @ x[t] + error).squeeze() - return x, y + return x, y.squeeze() def assert_pattern_repeats(y, T, atol, rtol): From 7cae4875b848f4ffbd74e83f7af3fe1ec54c9e7b Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Wed, 25 Jun 2025 05:27:56 +0200 Subject: [PATCH 03/21] Allow multiple observed in LevelTrend component --- .../structural/components/level_trend.py | 75 ++++++++++++++----- .../structural/components/test_level_trend.py | 67 +++++++++++++++++ tests/statespace/test_utilities.py | 3 +- 3 files changed, 125 insertions(+), 20 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/level_trend.py b/pymc_extras/statespace/models/structural/components/level_trend.py index b3372f822..b6735007f 100644 --- a/pymc_extras/statespace/models/structural/components/level_trend.py +++ b/pymc_extras/statespace/models/structural/components/level_trend.py @@ -1,5 +1,7 @@ import numpy as np +from scipy import linalg + from pymc_extras.statespace.models.structural.core import Component from pymc_extras.statespace.models.structural.utils import order_to_mask from pymc_extras.statespace.utils.constants import POSITION_DERIVATIVE_NAMES @@ -120,6 +122,7 @@ def __init__( if observed_state_names is None: observed_state_names = ["data"] + k_endog = len(observed_state_names) self._order_mask = order_to_mask(order) max_state = np.flatnonzero(self._order_mask)[-1].item() + 1 @@ -148,49 +151,83 @@ def __init__( super().__init__( name, - k_endog=len(observed_state_names), - k_states=k_states, - k_posdef=k_posdef, + k_endog=k_endog, + k_states=k_states * k_endog, + k_posdef=k_posdef * k_endog, observed_state_names=observed_state_names, measurement_error=False, combine_hidden_states=False, - obs_state_idxs=np.array([1.0] + [0.0] * (k_states - 1)), + obs_state_idxs=np.tile(np.array([1.0] + [0.0] * (k_states - 1)), k_endog), ) def populate_component_properties(self): - name_slice = POSITION_DERIVATIVE_NAMES[: self.k_states] + k_endog = self.k_endog + k_states = self.k_states // k_endog + k_posdef = self.k_posdef // k_endog + + name_slice = POSITION_DERIVATIVE_NAMES[:k_states] self.param_names = ["initial_trend"] self.state_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] self.param_dims = {"initial_trend": ("trend_state",)} self.coords = {"trend_state": self.state_names} - self.param_info = {"initial_trend": {"shape": (self.k_states,), "constraints": None}} + + if k_endog > 1: + self.param_dims["trend_state"] = ( + "trend_endog", + "trend_state", + ) + self.coords["trend_endog"] = self.observed_state_names + + shape = (k_endog, k_states) if k_endog > 1 else (k_states,) + self.param_info = {"initial_trend": {"shape": shape, "constraints": None}} if self.k_posdef > 0: self.param_names += ["sigma_trend"] self.shock_names = [ name for name, mask in zip(name_slice, self.innovations_order) if mask ] - self.param_dims["sigma_trend"] = ("trend_shock",) + self.param_dims["sigma_trend"] = ( + ("trend_shock",) if k_endog == 1 else ("trend_endog", "trend_shock") + ) self.coords["trend_shock"] = self.shock_names - self.param_info["sigma_trend"] = {"shape": (self.k_posdef,), "constraints": "Positive"} + self.param_info["sigma_trend"] = { + "shape": (k_posdef,) if k_endog == 1 else (k_endog, k_posdef), + "constraints": "Positive", + } for name in self.param_names: self.param_info[name]["dims"] = self.param_dims[name] def make_symbolic_graph(self) -> None: - initial_trend = self.make_and_register_variable("initial_trend", shape=(self.k_states,)) - self.ssm["initial_state", :] = initial_trend - triu_idx = np.triu_indices(self.k_states) - self.ssm[np.s_["transition", triu_idx[0], triu_idx[1]]] = 1 + k_endog = self.k_endog + k_states = self.k_states // k_endog + k_posdef = self.k_posdef // k_endog - R = np.eye(self.k_states) + initial_trend = self.make_and_register_variable( + "initial_trend", + shape=(k_states,) if k_endog == 1 else (k_endog, k_states), + ) + self.ssm["initial_state", :] = initial_trend.ravel() + + triu_idx = np.triu_indices(k_states) + T = np.zeros((k_states, k_states)) + T[triu_idx[0], triu_idx[1]] = 1 + + self.ssm["transition"] = linalg.block_diag(*[T for _ in range(k_endog)]) + + R = np.eye(k_states) R = R[:, self.innovations_order] - self.ssm["selection", :, :] = R - self.ssm["design", 0, :] = np.array([1.0] + [0.0] * (self.k_states - 1)) + self.ssm["selection", :, :] = linalg.block_diag(*[R for _ in range(k_endog)]) - if self.k_posdef > 0: - sigma_trend = self.make_and_register_variable("sigma_trend", shape=(self.k_posdef,)) - diag_idx = np.diag_indices(self.k_posdef) + Z = np.array([1.0] + [0.0] * (k_states - 1)).reshape((1, -1)) + self.ssm["design"] = linalg.block_diag(*[Z for _ in range(k_endog)]) + + if k_posdef > 0: + sigma_trend = self.make_and_register_variable( + "sigma_trend", + shape=(k_posdef,) if k_endog == 1 else (k_endog, k_posdef), + ) + diag_idx = np.diag_indices(k_posdef * k_endog) idx = np.s_["state_cov", diag_idx[0], diag_idx[1]] - self.ssm[idx] = sigma_trend**2 + self.ssm[idx] = (sigma_trend**2).ravel() diff --git a/tests/statespace/models/structural/components/test_level_trend.py b/tests/statespace/models/structural/components/test_level_trend.py index 9b48ba5b9..64f04b403 100644 --- a/tests/statespace/models/structural/components/test_level_trend.py +++ b/tests/statespace/models/structural/components/test_level_trend.py @@ -22,3 +22,70 @@ def test_level_trend_model(rng): mod = mod.build(verbose=False) _assert_basic_coords_correct(mod) assert mod.coords["trend_state"] == ["level", "trend"] + + +def test_level_trend_multiple_observed_construction(): + mod = st.LevelTrendComponent( + order=2, innovations_order=1, observed_state_names=["data_1", "data_2", "data_3"] + ) + mod = mod.build(verbose=False) + assert mod.k_endog == 3 + assert mod.k_states == 6 + assert mod.k_posdef == 3 + + assert mod.coords["trend_state"] == ["level", "trend"] + assert mod.coords["trend_endog"] == ["data_1", "data_2", "data_3"] + + Z = mod.ssm["design"].eval() + T = mod.ssm["transition"].eval() + R = mod.ssm["selection"].eval() + + np.testing.assert_allclose( + Z, + np.array( + [ + [1.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 1.0, 0.0], + ] + ), + ) + + np.testing.assert_allclose( + T, + np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 1.0], + ] + ), + ) + + np.testing.assert_allclose( + R, + np.array( + [ + [1.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 0.0, 0.0], + [0.0, 0.0, 1.0], + [0.0, 0.0, 0.0], + ] + ), + ) + + +def test_level_trend_multiple_observed(rng): + mod = st.LevelTrendComponent( + order=2, innovations_order=0, observed_state_names=["data_1", "data_2", "data_3"] + ) + params = {"initial_trend": np.array([[0.0, 1.0], [0.0, 2.0], [0.0, 3.0]])} + + x, y = simulate_from_numpy_model(mod, rng, params) + assert (np.diff(y, axis=0) == np.array([[1.0, 2.0, 3.0]])).all().all() + assert (np.diff(x, axis=0) == np.array([[1.0, 0.0, 2.0, 0.0, 3.0, 0.0]])).all().all() diff --git a/tests/statespace/test_utilities.py b/tests/statespace/test_utilities.py index 6837d4fe7..ab054ba19 100644 --- a/tests/statespace/test_utilities.py +++ b/tests/statespace/test_utilities.py @@ -242,11 +242,12 @@ def simulate_from_numpy_model(mod, rng, param_dict, data_dict=None, steps=100): Helper function to visualize the components outside of a PyMC model context """ x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, param_dict, data_dict) + k_endog = mod.k_endog k_states = mod.k_states k_posdef = mod.k_posdef x = np.zeros((steps, k_states)) - y = np.zeros(steps) + y = np.zeros((steps, k_endog)) x[0] = x0 y[0] = (Z @ x0).squeeze() if Z.ndim == 2 else (Z[0] @ x0).squeeze() From bba84317501dfbc52d69187205cd8e69447df4e6 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Wed, 25 Jun 2025 06:01:32 +0200 Subject: [PATCH 04/21] Allow multiple observed states in measurement error component --- .../components/measurement_error.py | 12 +++++++--- .../components/test_measurement_error.py | 22 +++++++++++++++++++ .../structural/test_against_statsmodels.py | 4 ++-- .../statespace/models/structural/test_core.py | 10 +++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/measurement_error.py b/pymc_extras/statespace/models/structural/components/measurement_error.py index 4017f0551..b62c8fce2 100644 --- a/pymc_extras/statespace/models/structural/components/measurement_error.py +++ b/pymc_extras/statespace/models/structural/components/measurement_error.py @@ -64,16 +64,22 @@ def __init__( def populate_component_properties(self): self.param_names = [f"sigma_{self.name}"] self.param_dims = {} + self.coords = {} + + if self.k_endog > 1: + self.param_dims[f"sigma_{self.name}"] = (f"endog_{self.name}",) + self.coords[f"endog_{self.name}"] = self.observed_state_names + self.param_info = { f"sigma_{self.name}": { - "shape": (), + "shape": (self.k_endog,) if self.k_endog > 1 else (), "constraints": "Positive", - "dims": None, + "dims": (f"endog_{self.name}",) if self.k_endog > 1 else None, } } def make_symbolic_graph(self) -> None: - sigma_shape = () + sigma_shape = () if self.k_endog == 1 else (self.k_endog,) error_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=sigma_shape) diag_idx = np.diag_indices(self.k_endog) idx = np.s_["obs_cov", diag_idx[0], diag_idx[1]] diff --git a/tests/statespace/models/structural/components/test_measurement_error.py b/tests/statespace/models/structural/components/test_measurement_error.py index 752e8513c..ba6a654f9 100644 --- a/tests/statespace/models/structural/components/test_measurement_error.py +++ b/tests/statespace/models/structural/components/test_measurement_error.py @@ -1,3 +1,5 @@ +import numpy as np + from pymc_extras.statespace.models import structural as st from tests.statespace.models.structural.conftest import _assert_basic_coords_correct @@ -8,3 +10,23 @@ def test_measurement_error(rng): _assert_basic_coords_correct(mod) assert "sigma_obs" in mod.param_names + + +def test_measurement_error_multiple_observed(): + mod = st.MeasurementError("obs", observed_state_names=["data_1", "data_2"]) + assert mod.k_endog == 2 + assert mod.coords["endog_obs"] == ["data_1", "data_2"] + assert mod.param_dims["sigma_obs"] == ("endog_obs",) + + +def test_build_with_measurement_error_subset(): + ll = st.LevelTrendComponent(order=2, observed_state_names=["data_1", "data_2", "data_3"]) + me = st.MeasurementError("obs", observed_state_names=["data_1", "data_3"]) + mod = (ll + me).build() + + H = mod.ssm["obs_cov"] + assert H.type.shape == (3, 3) + np.testing.assert_allclose( + H.eval({"sigma_obs": [1.0, 3.0]}), + np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 9.0]]), + ) diff --git a/tests/statespace/models/structural/test_against_statsmodels.py b/tests/statespace/models/structural/test_against_statsmodels.py index 94da8afe1..3495ecc14 100644 --- a/tests/statespace/models/structural/test_against_statsmodels.py +++ b/tests/statespace/models/structural/test_against_statsmodels.py @@ -416,8 +416,8 @@ def create_structural_model_and_equivalent_statsmodel( expected_coords[AR_PARAM_DIM] += tuple(list(range(1, autoregressive + 1))) expected_coords[ALL_STATE_DIM] += ar_names expected_coords[ALL_STATE_AUX_DIM] += ar_names - expected_coords[SHOCK_DIM] += ["ar_innovation"] - expected_coords[SHOCK_AUX_DIM] += ["ar_innovation"] + expected_coords[SHOCK_DIM] += ["data_ar_innovation"] + expected_coords[SHOCK_AUX_DIM] += ["data_ar_innovation"] sm_params["sigma2.ar"] = sigma2 for i, rho in enumerate(ar_params): diff --git a/tests/statespace/models/structural/test_core.py b/tests/statespace/models/structural/test_core.py index 500e8b1a2..46115b659 100644 --- a/tests/statespace/models/structural/test_core.py +++ b/tests/statespace/models/structural/test_core.py @@ -64,6 +64,16 @@ def test_add_components(): assert_allclose(all_mat, np.concatenate([ll_mat, se_mat], axis=axis), atol=ATOL, rtol=RTOL) +def test_add_components_multiple_observed(): + ll = st.LevelTrendComponent(order=2, observed_state_names=["data_1", "data_2"]) + me = st.MeasurementError(name="obs", observed_state_names=["data_1", "data_2"]) + + mod = (ll + me).build() + + for property in ["param_names", "shock_names", "param_info", "coords", "param_dims"]: + assert [x in getattr(mod, property) for x in getattr(ll, property)] + + @pytest.mark.skipif(floatX.endswith("32"), reason="Prior covariance not PSD at half-precision") def test_extract_components_from_idata(rng): time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) From 0a84576f039809866b3727500cbbe4548dd99ebd Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Wed, 25 Jun 2025 22:07:38 +0800 Subject: [PATCH 05/21] Allow multiple observed in AutoRegressive component --- .../structural/components/autoregressive.py | 89 +++++++++++++++---- .../components/test_autoregressive.py | 19 ++++ 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/autoregressive.py b/pymc_extras/statespace/models/structural/components/autoregressive.py index 0eca94295..441913fe9 100644 --- a/pymc_extras/statespace/models/structural/components/autoregressive.py +++ b/pymc_extras/statespace/models/structural/components/autoregressive.py @@ -1,4 +1,5 @@ import numpy as np +import pytensor.tensor as pt from pymc_extras.statespace.models.structural.core import Component from pymc_extras.statespace.models.structural.utils import order_to_mask @@ -70,10 +71,11 @@ def __init__( if observed_state_names is None: observed_state_names = ["data"] + k_posdef = k_endog = len(observed_state_names) + order = order_to_mask(order) ar_lags = np.flatnonzero(order).ravel().astype(int) + 1 k_states = len(order) - k_posdef = k_endog = len(observed_state_names) self.order = order self.ar_lags = ar_lags @@ -81,42 +83,97 @@ def __init__( super().__init__( name=name, k_endog=k_endog, - k_states=k_states, + k_states=k_states * k_endog, k_posdef=k_posdef, measurement_error=True, combine_hidden_states=True, observed_state_names=observed_state_names, - obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], + obs_state_idxs=np.tile(np.r_[[1.0], np.zeros(k_states - 1)], k_endog), ) def populate_component_properties(self): - self.state_names = [f"L{i + 1}.data" for i in range(self.k_states)] - self.shock_names = [f"{self.name}_innovation"] + self.state_names = [ + f"L{i + 1}.{state_name}" + for i in range(self.k_states) + for state_name in self.observed_state_names + ] + self.shock_names = [f"{name}_{self.name}_innovation" for name in self.observed_state_names] self.param_names = ["ar_params", "sigma_ar"] self.param_dims = {"ar_params": (AR_PARAM_DIM,)} self.coords = {AR_PARAM_DIM: self.ar_lags.tolist()} + if self.k_endog > 1: + self.param_dims["ar_params"] = ( + f"{self.name}_endog", + AR_PARAM_DIM, + ) + self.param_dims["sigma_ar"] = (f"{self.name}_endog",) + + self.coords[f"{self.name}_endog"] = self.observed_state_names + self.param_info = { "ar_params": { - "shape": (self.k_states,), + "shape": (self.k_states,) if self.k_endog == 1 else (self.k_endog, self.k_states), "constraints": None, - "dims": (AR_PARAM_DIM,), + "dims": (AR_PARAM_DIM,) + if self.k_endog == 1 + else ( + f"{self.name}_endog", + AR_PARAM_DIM, + ), + }, + "sigma_ar": { + "shape": () if self.k_endog == 1 else (self.k_endog,), + "constraints": "Positive", + "dims": None if self.k_endog == 1 else (f"{self.name}_endog",), }, - "sigma_ar": {"shape": (), "constraints": "Positive", "dims": None}, } def make_symbolic_graph(self) -> None: + k_endog = self.k_endog + k_states = self.k_states // k_endog + k_posdef = self.k_posdef + k_nonzero = int(sum(self.order)) - ar_params = self.make_and_register_variable("ar_params", shape=(k_nonzero,)) - sigma_ar = self.make_and_register_variable("sigma_ar", shape=()) + ar_params = self.make_and_register_variable( + "ar_params", shape=(k_nonzero,) if k_endog == 1 else (k_endog, k_nonzero) + ) + sigma_ar = self.make_and_register_variable( + "sigma_ar", shape=() if k_endog == 1 else (k_endog,) + ) + + if k_endog == 1: + T = pt.eye(k_states, k=-1) + ar_idx = (np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) + T = T[ar_idx].set(ar_params) + + else: + transition_matrices = [] + + for i in range(k_endog): + T = pt.eye(k_states, k=-1) + ar_idx = (np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) + T = T[ar_idx].set(ar_params[i]) + transition_matrices.append(T) + T = pt.specify_shape( + pt.linalg.block_diag(*transition_matrices), (self.k_states, self.k_states) + ) - T = np.eye(self.k_states, k=-1) self.ssm["transition", :, :] = T - self.ssm["selection", 0, 0] = 1 - self.ssm["design", 0, 0] = 1 - ar_idx = ("transition", np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) - self.ssm[ar_idx] = ar_params + R = np.eye(k_states) + R_mask = np.full((k_states), False) + R_mask[0] = True + R = R[:, R_mask] + + self.ssm["selection", :, :] = pt.specify_shape( + pt.linalg.block_diag(*[R for _ in range(k_endog)]), (self.k_states, self.k_posdef) + ) + + Z = pt.zeros((1, k_states))[0, 0].set(1.0) + self.ssm["design", :, :] = pt.specify_shape( + pt.linalg.block_diag(*[Z for _ in range(k_endog)]), (self.k_endog, self.k_states) + ) - cov_idx = ("state_cov", *np.diag_indices(1)) + cov_idx = ("state_cov", *np.diag_indices(k_posdef)) self.ssm[cov_idx] = sigma_ar**2 diff --git a/tests/statespace/models/structural/components/test_autoregressive.py b/tests/statespace/models/structural/components/test_autoregressive.py index f68a34de6..21234aa2a 100644 --- a/tests/statespace/models/structural/components/test_autoregressive.py +++ b/tests/statespace/models/structural/components/test_autoregressive.py @@ -26,3 +26,22 @@ def test_autoregressive_model(order, rng): if isinstance(order, list): lags = lags[np.flatnonzero(order)] assert_allclose(ar.coords["ar_lag"], lags) + + +def test_autoregressive_multiple_observed(rng): + ar = st.AutoregressiveComponent(order=3, observed_state_names=["data_1", "data_2"]) + mod = ar.build(verbose=False) + + params = { + "ar_params": np.full( + ( + 2, + sum(ar.order), + ), + 0.5, + dtype=config.floatX, + ), + "sigma_ar": np.ones((2,)) * 1e-3, + } + + x, y = simulate_from_numpy_model(ar, rng, params, steps=100) From 480f4fb7a9aedcdd4767b5095a8690590e2ad06f Mon Sep 17 00:00:00 2001 From: Alexandre Andorra Date: Tue, 1 Jul 2025 09:37:14 -0400 Subject: [PATCH 06/21] Fix typo in docstrings --- pymc_extras/statespace/models/utilities.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pymc_extras/statespace/models/utilities.py b/pymc_extras/statespace/models/utilities.py index ad2ba33b0..31de98f85 100644 --- a/pymc_extras/statespace/models/utilities.py +++ b/pymc_extras/statespace/models/utilities.py @@ -541,21 +541,21 @@ def add_tensors_by_dim_labels( Parameters ---------- tensor: TensorVariable - A statespace matrix to be summed with ``other_matrix``. + A statespace matrix to be summed with ``other_tensor``. other_tensor: TensorVariable - A statespace matrix to be summed with ``matrix``. + A statespace matrix to be summed with ``tensor``. labels: list of str - Dimension labels associated with ``matrix``, on the ``labeled_axis`` dimension. + Dimension labels associated with ``tensor``, on the ``labeled_axis`` dimension. other_labels: list of str - Dimension labels associated with ``other_matrix``, on the ``labeled_axis`` dimension. + Dimension labels associated with ``other_tensor``, on the ``labeled_axis`` dimension. labeled_axis: int or tuple of int - Dimension that is labeled by ``labels`` and ``other_labels``. ``matrix.shape[labeled_axis]`` must have the + Dimension that is labeled by ``labels`` and ``other_labels``. ``tensor.shape[labeled_axis]`` must have the shape of ``len(labels)`` at runtime. Returns ------- result: TensorVariable - Result of addition of ``matrix`` and ``other_matrix``, along the ``labeled_axis`` dimension. The ordering of + Result of addition of ``tensor`` and ``other_tensor``, along the ``labeled_axis`` dimension. The ordering of the output will be ``labels + [label for label in other_labels if label not in labels]``. That is, ``labels`` come first, followed by any new labels introduced by ``other_labels``. From a898eb69c156a894fce4d6d7175dc8aa797ec801 Mon Sep 17 00:00:00 2001 From: Alex Andorra Date: Tue, 1 Jul 2025 14:02:09 -0400 Subject: [PATCH 07/21] Allow multiple observed in Cycle component --- .../models/structural/components/cycle.py | 140 +++++++++++++++--- .../structural/components/test_cycle.py | 88 +++++++++++ .../statespace/models/structural/conftest.py | 5 +- 3 files changed, 211 insertions(+), 22 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/cycle.py b/pymc_extras/statespace/models/structural/components/cycle.py index 4c0f4603f..b47febf5d 100644 --- a/pymc_extras/statespace/models/structural/components/cycle.py +++ b/pymc_extras/statespace/models/structural/components/cycle.py @@ -1,6 +1,7 @@ import numpy as np from pytensor import tensor as pt +from scipy import linalg from pymc_extras.statespace.models.structural.core import Component from pymc_extras.statespace.models.structural.utils import _frequency_transition_block @@ -10,6 +11,10 @@ class CycleComponent(Component): r""" A component for modeling longer-term cyclical effects + Supports both univariate and multivariate time series. For multivariate time series, + each endogenous variable gets its own independent cycle component with separate + cosine/sine states and optional variable-specific innovation variances. + Parameters ---------- name: str @@ -32,6 +37,11 @@ class CycleComponent(Component): innovations: bool, default True Whether to include stochastic innovations in the strength of the seasonal effect. If True, an additional parameter, ``sigma_{name}`` will be added to the model. + For multivariate time series, this is a vector (variable-specific innovation variances). + + observed_state_names: list[str], optional + Names of the observed state variables. For univariate time series, defaults to ``["data"]``. + For multivariate time series, specify a list of names for each endogenous variable. Notes ----- @@ -51,8 +61,16 @@ class CycleComponent(Component): Unlike a FrequencySeasonality component, the length of a CycleComponent can be estimated. + **Multivariate Support:** + For multivariate time series with k endogenous variables, the component creates: + - 2k states (cosine and sine components for each variable) + - Block diagonal transition and selection matrices + - Variable-specific innovation variances (optional) + - Proper parameter shapes: (k, 2) for initial states, (k,) for innovation variances + Examples -------- + **Univariate Example:** Estimate a business cycle with length between 6 and 12 years: .. code:: python @@ -84,6 +102,35 @@ class CycleComponent(Component): idata = pm.sample(nuts_sampler='numpyro') + **Multivariate Example:** + Model cycles for multiple economic indicators with variable-specific innovation variances: + + .. code:: python + + # Multivariate cycle component + cycle = st.CycleComponent( + name='business_cycle', + cycle_length=12, + estimate_cycle_length=False, + innovations=True, + dampen=True, + observed_state_names=['gdp', 'unemployment', 'inflation'] + ) + + # Build the model + ss_mod = cycle.build() + + # In PyMC model: + with pm.Model(coords=ss_mod.coords) as model: + # Initial states: shape (3, 2) for 3 variables, 2 states each + cycle_init = pm.Normal('business_cycle', dims=('business_cycle_endog', 'business_cycle_state')) + + # Dampening factor: scalar (shared across variables) + dampening = pm.Uniform('business_cycle_dampening_factor', lower=0.8, upper=1.0) + + # Innovation variances: shape (3,) for variable-specific variances + sigma_cycle = pm.HalfNormal('sigma_business_cycle', dims=('business_cycle_endog',)) + References ---------- .. [1] Durbin, James, and Siem Jan Koopman. 2012. @@ -137,14 +184,23 @@ def __init__( ) def make_symbolic_graph(self) -> None: - self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 - self.ssm["selection", :, :] = np.eye(self.k_states) - self.param_dims = {self.name: (f"{self.name}_state",)} - self.coords = {f"{self.name}_state": self.state_names} + if self.k_endog == 1: + self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 + self.ssm["selection", :, :] = np.eye(self.k_states) + init_state = self.make_and_register_variable(f"{self.name}", shape=(self.k_states,)) + + else: + Z = np.array([1.0, 0.0]).reshape((1, -1)) + design_matrix = linalg.block_diag(*[Z for _ in range(self.k_endog)]) + self.ssm["design", :, :] = pt.as_tensor_variable(design_matrix) - init_state = self.make_and_register_variable(f"{self.name}", shape=(self.k_states,)) + R = np.eye(2) # 2x2 identity for each cycle component + selection_matrix = linalg.block_diag(*[R for _ in range(self.k_endog)]) + self.ssm["selection", :, :] = pt.as_tensor_variable(selection_matrix) - self.ssm["initial_state", :] = init_state + init_state = self.make_and_register_variable(f"{self.name}", shape=(self.k_endog, 2)) + + self.ssm["initial_state", :] = init_state.ravel() if self.estimate_cycle_length: lamb = self.make_and_register_variable(f"{self.name}_length", shape=()) @@ -157,23 +213,59 @@ def make_symbolic_graph(self) -> None: rho = 1 T = rho * _frequency_transition_block(lamb, j=1) - self.ssm["transition", :, :] = T + if self.k_endog == 1: + self.ssm["transition", :, :] = T + else: + # can't make the linalg.block_diag logic work here + # doing it manually for now + for i in range(self.k_endog): + start_idx = i * 2 + end_idx = (i + 1) * 2 + self.ssm["transition", start_idx:end_idx, start_idx:end_idx] = T if self.innovations: - sigma_cycle = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_cycle**2 + if self.k_endog == 1: + sigma_cycle = self.make_and_register_variable(f"sigma_{self.name}", shape=()) + self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_cycle**2 + else: + sigma_cycle = self.make_and_register_variable( + f"sigma_{self.name}", shape=(self.k_endog,) + ) + # can't make the linalg.block_diag logic work here + # doing it manually for now + for i in range(self.k_endog): + start_idx = i * 2 + end_idx = (i + 1) * 2 + Q_block = pt.eye(2) * sigma_cycle[i] ** 2 + self.ssm["state_cov", start_idx:end_idx, start_idx:end_idx] = Q_block def populate_component_properties(self): self.state_names = [f"{self.name}_{f}" for f in ["Cos", "Sin"]] self.param_names = [f"{self.name}"] - self.param_info = { - f"{self.name}": { - "shape": (2,), - "constraints": None, - "dims": (f"{self.name}_state",), + if self.k_endog == 1: + self.param_dims = {self.name: (f"{self.name}_state",)} + self.coords = {f"{self.name}_state": self.state_names} + self.param_info = { + f"{self.name}": { + "shape": (2,), + "constraints": None, + "dims": (f"{self.name}_state",), + } + } + else: + self.param_dims = {self.name: (f"{self.name}_endog", f"{self.name}_state")} + self.coords = { + f"{self.name}_state": self.state_names, + f"{self.name}_endog": self.observed_state_names, + } + self.param_info = { + f"{self.name}": { + "shape": (self.k_endog, 2), + "constraints": None, + "dims": (f"{self.name}_endog", f"{self.name}_state"), + } } - } if self.estimate_cycle_length: self.param_names += [f"{self.name}_length"] @@ -193,9 +285,17 @@ def populate_component_properties(self): if self.innovations: self.param_names += [f"sigma_{self.name}"] - self.param_info[f"sigma_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": None, - } + if self.k_endog == 1: + self.param_info[f"sigma_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": None, + } + else: + self.param_dims[f"sigma_{self.name}"] = (f"{self.name}_endog",) + self.param_info[f"sigma_{self.name}"] = { + "shape": (self.k_endog,), + "constraints": "Positive", + "dims": (f"{self.name}_endog",), + } self.shock_names = self.state_names.copy() diff --git a/tests/statespace/models/structural/components/test_cycle.py b/tests/statespace/models/structural/components/test_cycle.py index b24eae290..987cbf914 100644 --- a/tests/statespace/models/structural/components/test_cycle.py +++ b/tests/statespace/models/structural/components/test_cycle.py @@ -45,8 +45,96 @@ def test_cycle_component_with_innovations_and_cycle_length(rng): "cycle_dampening_factor": 0.95, "sigma_cycle": 1.0, } + x, y = simulate_from_numpy_model(cycle, rng, params) + + cycle.build(verbose=False) + _assert_basic_coords_correct(cycle) + + +def test_cycle_multivariate_deterministic(rng): + """Test multivariate cycle component with deterministic cycles.""" + cycle = st.CycleComponent( + name="cycle", + cycle_length=12, + estimate_cycle_length=False, + innovations=False, + observed_state_names=["data_1", "data_2", "data_3"], + ) + params = {"cycle": np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], dtype=config.floatX)} + x, y = simulate_from_numpy_model(cycle, rng, params, steps=12 * 12) + + # Check that each variable has a cyclical pattern with the expected period + for i in range(3): + assert_pattern_repeats(y[:, i], 12, atol=ATOL, rtol=RTOL) + + # Check that the cycles have different amplitudes (different initial states) + assert np.std(y[:, 0]) > 0 + assert np.std(y[:, 1]) > 0 + assert np.std(y[:, 2]) > 0 + # The second and third variables should have larger amplitudes due to larger initial states + assert np.std(y[:, 1]) > np.std(y[:, 0]) + assert np.std(y[:, 2]) > np.std(y[:, 0]) + +def test_cycle_multivariate_with_dampening(rng): + """Test multivariate cycle component with dampening.""" + cycle = st.CycleComponent( + name="cycle", + cycle_length=12, + estimate_cycle_length=False, + innovations=False, + dampen=True, + observed_state_names=["data_1", "data_2", "data_3"], + ) + params = { + "cycle": np.array([[10.0, 10.0], [20.0, 20.0], [30.0, 30.0]], dtype=config.floatX), + "cycle_dampening_factor": 0.75, + } + x, y = simulate_from_numpy_model(cycle, rng, params, steps=100) + + # Check that all cycles dampen to zero over time + for i in range(3): + assert_allclose(y[-1, i], 0.0, atol=ATOL, rtol=RTOL) + + # Check that the dampening pattern is consistent across variables + # The variables should dampen at the same rate but with different initial amplitudes + for i in range(1, 3): + # The ratio of final to initial values should be similar across variables + ratio_0 = abs(y[-1, 0] / y[0, 0]) if y[0, 0] != 0 else 0 + ratio_i = abs(y[-1, i] / y[0, i]) if y[0, i] != 0 else 0 + assert_allclose(ratio_0, ratio_i, atol=1e-2, rtol=1e-2) + + +def test_cycle_multivariate_with_innovations_and_cycle_length(rng): + """Test multivariate cycle component with innovations and estimated cycle length.""" + cycle = st.CycleComponent( + name="cycle", + estimate_cycle_length=True, + innovations=True, + dampen=True, + observed_state_names=["data_1", "data_2", "data_3"], + ) + params = { + "cycle": np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], dtype=config.floatX), + "cycle_length": 12.0, + "cycle_dampening_factor": 0.95, + "sigma_cycle": np.array([0.5, 1.0, 1.5]), # Different innovation variances per variable + } x, y = simulate_from_numpy_model(cycle, rng, params) cycle.build(verbose=False) _assert_basic_coords_correct(cycle) + + assert cycle.coords["cycle_state"] == ["cycle_Cos", "cycle_Sin"] + assert cycle.coords["cycle_endog"] == ["data_1", "data_2", "data_3"] + + assert cycle.k_endog == 3 + assert cycle.k_states == 6 # 2 states per variable + assert cycle.k_posdef == 6 # 2 innovations per variable + + # Check that the data has the expected shape + assert y.shape[1] == 3 # 3 variables + + # Check that each variable shows some variation (due to innovations) + for i in range(3): + assert np.std(y[:, i]) > 0 diff --git a/tests/statespace/models/structural/conftest.py b/tests/statespace/models/structural/conftest.py index 63ce45c1b..c5f2396bc 100644 --- a/tests/statespace/models/structural/conftest.py +++ b/tests/statespace/models/structural/conftest.py @@ -23,5 +23,6 @@ def _assert_basic_coords_correct(mod): assert mod.coords[ALL_STATE_AUX_DIM] == mod.state_names assert mod.coords[SHOCK_DIM] == mod.shock_names assert mod.coords[SHOCK_AUX_DIM] == mod.shock_names - assert mod.coords[OBS_STATE_DIM] == ["data"] - assert mod.coords[OBS_STATE_AUX_DIM] == ["data"] + expected_obs = mod.observed_state_names if hasattr(mod, "observed_state_names") else ["data"] + assert mod.coords[OBS_STATE_DIM] == expected_obs + assert mod.coords[OBS_STATE_AUX_DIM] == expected_obs From 62d07507b6c357639daa88c870e5658697bcf29a Mon Sep 17 00:00:00 2001 From: Alex Andorra Date: Tue, 1 Jul 2025 17:05:58 -0400 Subject: [PATCH 08/21] Fix Cycle docstring examples --- .../statespace/models/structural/components/cycle.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/cycle.py b/pymc_extras/statespace/models/structural/components/cycle.py index b47febf5d..bcbfed2a1 100644 --- a/pymc_extras/statespace/models/structural/components/cycle.py +++ b/pymc_extras/statespace/models/structural/components/cycle.py @@ -94,13 +94,13 @@ class CycleComponent(Component): intitial_trend = pm.Normal('initial_trend', dims=ss_mod.param_dims['initial_trend']) sigma_trend = pm.HalfNormal('sigma_trend', dims=ss_mod.param_dims['sigma_trend']) - cycle_strength = pm.Normal('business_cycle') + cycle_strength = pm.Normal("business_cycle", dims=ss_mod.param_dims["business_cycle"]) cycle_length = pm.Uniform('business_cycle_length', lower=6, upper=12) sigma_cycle = pm.HalfNormal('sigma_business_cycle', sigma=1) - ss_mod.build_statespace_graph(data) - idata = pm.sample(nuts_sampler='numpyro') + ss_mod.build_statespace_graph(data) + idata = pm.sample() **Multivariate Example:** Model cycles for multiple economic indicators with variable-specific innovation variances: @@ -122,6 +122,7 @@ class CycleComponent(Component): # In PyMC model: with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic("P0", pt.eye(ss_mod.k_states), dims=ss_mod.param_dims["P0"]) # Initial states: shape (3, 2) for 3 variables, 2 states each cycle_init = pm.Normal('business_cycle', dims=('business_cycle_endog', 'business_cycle_state')) @@ -131,6 +132,9 @@ class CycleComponent(Component): # Innovation variances: shape (3,) for variable-specific variances sigma_cycle = pm.HalfNormal('sigma_business_cycle', dims=('business_cycle_endog',)) + ss_mod.build_statespace_graph(data) + idata = pm.sample() + References ---------- .. [1] Durbin, James, and Siem Jan Koopman. 2012. From 152e96276aea111c8d9bb5004b7dc8869c870029 Mon Sep 17 00:00:00 2001 From: Alex Andorra Date: Wed, 2 Jul 2025 18:10:05 -0400 Subject: [PATCH 09/21] Use pytensor block_diag for Cycle --- .../models/structural/components/cycle.py | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/cycle.py b/pymc_extras/statespace/models/structural/components/cycle.py index bcbfed2a1..1636c51b1 100644 --- a/pymc_extras/statespace/models/structural/components/cycle.py +++ b/pymc_extras/statespace/models/structural/components/cycle.py @@ -1,6 +1,7 @@ import numpy as np from pytensor import tensor as pt +from pytensor.tensor.slinalg import block_diag from scipy import linalg from pymc_extras.statespace.models.structural.core import Component @@ -96,7 +97,6 @@ class CycleComponent(Component): cycle_strength = pm.Normal("business_cycle", dims=ss_mod.param_dims["business_cycle"]) cycle_length = pm.Uniform('business_cycle_length', lower=6, upper=12) - sigma_cycle = pm.HalfNormal('sigma_business_cycle', sigma=1) ss_mod.build_statespace_graph(data) @@ -124,13 +124,15 @@ class CycleComponent(Component): with pm.Model(coords=ss_mod.coords) as model: P0 = pm.Deterministic("P0", pt.eye(ss_mod.k_states), dims=ss_mod.param_dims["P0"]) # Initial states: shape (3, 2) for 3 variables, 2 states each - cycle_init = pm.Normal('business_cycle', dims=('business_cycle_endog', 'business_cycle_state')) + cycle_init = pm.Normal('business_cycle', dims=ss_mod.param_dims["business_cycle"]) # Dampening factor: scalar (shared across variables) - dampening = pm.Uniform('business_cycle_dampening_factor', lower=0.8, upper=1.0) + dampening = pm.Beta("business_cycle_dampening_factor", 2, 2) # Innovation variances: shape (3,) for variable-specific variances - sigma_cycle = pm.HalfNormal('sigma_business_cycle', dims=('business_cycle_endog',)) + sigma_cycle = pm.HalfNormal( + "sigma_business_cycle", dims=ss_mod.param_dims["sigma_business_cycle"] + ) ss_mod.build_statespace_graph(data) idata = pm.sample() @@ -220,12 +222,8 @@ def make_symbolic_graph(self) -> None: if self.k_endog == 1: self.ssm["transition", :, :] = T else: - # can't make the linalg.block_diag logic work here - # doing it manually for now - for i in range(self.k_endog): - start_idx = i * 2 - end_idx = (i + 1) * 2 - self.ssm["transition", start_idx:end_idx, start_idx:end_idx] = T + transition = block_diag(*[T for _ in range(self.k_endog)]) + self.ssm["transition"] = pt.specify_shape(transition, (self.k_states, self.k_states)) if self.innovations: if self.k_endog == 1: @@ -235,16 +233,20 @@ def make_symbolic_graph(self) -> None: sigma_cycle = self.make_and_register_variable( f"sigma_{self.name}", shape=(self.k_endog,) ) - # can't make the linalg.block_diag logic work here - # doing it manually for now - for i in range(self.k_endog): - start_idx = i * 2 - end_idx = (i + 1) * 2 - Q_block = pt.eye(2) * sigma_cycle[i] ** 2 - self.ssm["state_cov", start_idx:end_idx, start_idx:end_idx] = Q_block + state_cov = block_diag( + *[pt.eye(2) * sigma_cycle[i] ** 2 for i in range(self.k_endog)] + ) + self.ssm["state_cov"] = pt.specify_shape(state_cov, (self.k_states, self.k_states)) def populate_component_properties(self): - self.state_names = [f"{self.name}_{f}" for f in ["Cos", "Sin"]] + if self.k_endog == 1: + self.state_names = [f"{self.name}_{f}" for f in ["Cos", "Sin"]] + else: + # For multivariate cycles, create state names for each observed state + self.state_names = [] + for var_name in self.observed_state_names: + self.state_names.extend([f"{self.name}_{var_name}_{f}" for f in ["Cos", "Sin"]]) + self.param_names = [f"{self.name}"] if self.k_endog == 1: @@ -260,7 +262,7 @@ def populate_component_properties(self): else: self.param_dims = {self.name: (f"{self.name}_endog", f"{self.name}_state")} self.coords = { - f"{self.name}_state": self.state_names, + f"{self.name}_state": [f"{self.name}_Cos", f"{self.name}_Sin"], f"{self.name}_endog": self.observed_state_names, } self.param_info = { From 7e9bb071425e8659291a816b963c795b96c1043c Mon Sep 17 00:00:00 2001 From: Jonathan Dekermanjian Date: Sat, 5 Jul 2025 08:23:29 -0600 Subject: [PATCH 10/21] 1. updated level_trend component coord/param labels 2. Adjusted the regression component to allow multivariate regression component specification 3. Added a notebook for quick evaluation of the adjustments and additions made --- notebooks/multivariate_ssm.ipynb | 729 +++++++ pymc_extras/statespace/models/structural.py | 1679 +++++++++++++++++ .../structural/components/level_trend.py | 8 +- .../structural/components/regression.py | 44 +- tests/statespace/models/test_structural.py | 840 +++++++++ 5 files changed, 3284 insertions(+), 16 deletions(-) create mode 100644 notebooks/multivariate_ssm.ipynb create mode 100644 pymc_extras/statespace/models/structural.py create mode 100644 tests/statespace/models/test_structural.py diff --git a/notebooks/multivariate_ssm.ipynb b/notebooks/multivariate_ssm.ipynb new file mode 100644 index 000000000..83cc74e2e --- /dev/null +++ b/notebooks/multivariate_ssm.ipynb @@ -0,0 +1,729 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "id": "a5b7dcb3", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from pymc_extras.statespace.models import structural as st\n", + "\n", + "import pymc as pm\n", + "import arviz as az\n", + "import pytensor.tensor as pt\n", + "\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f8bfe995", + "metadata": {}, + "outputs": [], + "source": [ + "rng = np.random" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "a96a731b", + "metadata": {}, + "outputs": [], + "source": [ + "def simulate_local_level_with_exog(\n", + " n_endog: int = 1,\n", + " time_steps: int = 100,\n", + " mu0: float = 0.0,\n", + " sigma_eta: float = 1.0,\n", + " sigma_eps: float = 0.5,\n", + " beta1: float = 2.0,\n", + " beta2: float = -1.5,\n", + " seed: int | None = None,\n", + "):\n", + " \"\"\"\n", + " Simulates a local level model with exogenous variables.\n", + "\n", + " Parameters\n", + " ----------\n", + " n_endog: int\n", + " The number of series to simulate\n", + " time_steps: int\n", + " The length of the time-series to simulate\n", + " mu0: float\n", + " The initial state\n", + " sigma_eta: float\n", + " The level innovations standard deviation\n", + " sigma_eps: float\n", + " The observations standard deviation\n", + " beta1: float\n", + " The weight of the binary exogenous variable\n", + " beta2: float\n", + " The weight of the continuous exogenous variable\n", + " seed: int\n", + " Random generator seed for reproducibility\n", + "\n", + " Returns\n", + " -------\n", + " ys: dict[str, float]\n", + " n_endog number of observations\n", + " mu: float\n", + " latent state\n", + " x1: int\n", + " binary exogenous observations\n", + " x2: float\n", + " continuous exogenous observations\n", + " \"\"\"\n", + " if seed is not None:\n", + " np.random.seed(seed)\n", + "\n", + " # init state and observation vectors\n", + " mu = np.zeros(time_steps)\n", + " y = np.zeros(time_steps)\n", + "\n", + " # initial state\n", + " mu[0] = mu0\n", + "\n", + " # simulate exogenous variables\n", + " # binary variable\n", + " x1 = np.random.binomial(1, 0.2, size=time_steps)\n", + "\n", + " # continous variable\n", + " x2 = np.random.normal(0, 1, size=time_steps)\n", + "\n", + " # simulate latent state (local level)\n", + " for t in range(1, time_steps):\n", + " mu[t] = mu[t - 1] + np.random.normal(0, sigma_eta)\n", + "\n", + " # simulate observations\n", + " ys = {\n", + " f\"y{i+1}\": mu + beta1 * x1 + beta2 * x2 + np.random.normal(0, sigma_eps, size=time_steps)\n", + " for i in range(n_endog)\n", + " }\n", + "\n", + " return ys, mu, x1, x2" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a4130131", + "metadata": {}, + "outputs": [], + "source": [ + "# Simulate\n", + "T = 100\n", + "true_sigma_eta = 0.3\n", + "true_sigma_eps = 0.6\n", + "true_beta1 = 3.0\n", + "true_beta2 = -1.0\n", + "ys, mu, x1, x2 = simulate_local_level_with_exog(\n", + " n_endog=3,\n", + " time_steps=T,\n", + " mu0=0,\n", + " sigma_eta=true_sigma_eta,\n", + " sigma_eps=true_sigma_eps,\n", + " beta1=true_beta1,\n", + " beta2=true_beta2,\n", + " seed=42,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e9acbb8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y = ys[\"y1\"]\n", + "\n", + "# Plot\n", + "plt.figure(figsize=(12, 6))\n", + "for k, vy in ys.items():\n", + " plt.plot(vy, label=f\"Observed ${k}_t$\", alpha=0.6)\n", + "plt.plot(mu, label=\"Latent level $\\\\mu_t$\", linewidth=2)\n", + "plt.plot(x1 * 5, label=\"Binary Exog $x^{(1)}_t$\", linestyle=\"--\") # need to blow up to see it\n", + "plt.plot(x2, label=\"Continuous Exog $x^{(2)}_t$\", linestyle=\":\")\n", + "plt.legend()\n", + "plt.title(\"Local Level Model with Exogenous Variables\")\n", + "plt.xlabel(\"Time\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "f23bb2ed", + "metadata": {}, + "source": [ + "# Quick and dirty test" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "d51ff06e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
                                  Model Requirements                                   \n",
+       "                                                                                       \n",
+       "  Variable        Shape       Constraints                                  Dimensions  \n",
+       " ───────────────────────────────────────────────────────────────────────────────────── \n",
+       "  initial_trend   (3, 1)                               ('trend_endog', 'trend_state')  \n",
+       "  sigma_trend     (3, 1)      Positive                 ('trend_endog', 'trend_shock')  \n",
+       "  beta_exog       (3, 2)                                 ('exog_endog', 'exog_state')  \n",
+       "  P0              (9, 9)      Positive semi-definite           ('state', 'state_aux')  \n",
+       "                                                                                       \n",
+       "  data_exog       (None, 2)   pm.Data                          ('time', 'exog_state')  \n",
+       "                                                                                       \n",
+       "These parameters should be assigned priors inside a PyMC model block before calling the\n",
+       "                            build_statespace_graph method.                             \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[3m Model Requirements \u001b[0m\n", + " \n", + " \u001b[1m \u001b[0m\u001b[1mVariable \u001b[0m\u001b[1m \u001b[0m \u001b[1m \u001b[0m\u001b[1mShape \u001b[0m\u001b[1m \u001b[0m \u001b[1m \u001b[0m\u001b[1mConstraints \u001b[0m\u001b[1m \u001b[0m \u001b[1m \u001b[0m\u001b[1m Dimensions\u001b[0m\u001b[1m \u001b[0m \n", + " ───────────────────────────────────────────────────────────────────────────────────── \n", + " initial_trend \u001b[1m(\u001b[0m\u001b[1;36m3\u001b[0m, \u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m \u001b[1m(\u001b[0m\u001b[32m'trend_endog'\u001b[0m, \u001b[32m'trend_state'\u001b[0m\u001b[1m)\u001b[0m \n", + " sigma_trend \u001b[1m(\u001b[0m\u001b[1;36m3\u001b[0m, \u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m Positive \u001b[1m(\u001b[0m\u001b[32m'trend_endog'\u001b[0m, \u001b[32m'trend_shock'\u001b[0m\u001b[1m)\u001b[0m \n", + " beta_exog \u001b[1m(\u001b[0m\u001b[1;36m3\u001b[0m, \u001b[1;36m2\u001b[0m\u001b[1m)\u001b[0m \u001b[1m(\u001b[0m\u001b[32m'exog_endog'\u001b[0m, \u001b[32m'exog_state'\u001b[0m\u001b[1m)\u001b[0m \n", + " P0 \u001b[1m(\u001b[0m\u001b[1;36m9\u001b[0m, \u001b[1;36m9\u001b[0m\u001b[1m)\u001b[0m Positive semi-definite \u001b[1m(\u001b[0m\u001b[32m'state'\u001b[0m, \u001b[32m'state_aux'\u001b[0m\u001b[1m)\u001b[0m \n", + " \n", + " data_exog \u001b[1m(\u001b[0m\u001b[3;35mNone\u001b[0m, \u001b[1;36m2\u001b[0m\u001b[1m)\u001b[0m pm.Data \u001b[1m(\u001b[0m\u001b[32m'time'\u001b[0m, \u001b[32m'exog_state'\u001b[0m\u001b[1m)\u001b[0m \n", + " \n", + "\u001b[2;3mThese parameters should be assigned priors inside a PyMC model block before calling the\u001b[0m\n", + "\u001b[2;3m build_statespace_graph method. \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "level_trend = st.LevelTrendComponent(\n", + " order=1, innovations_order=[1], name=\"level_trend\", observed_state_names=[\"y1\", \"y2\", \"y3\"]\n", + ")\n", + "\n", + "exog = st.RegressionComponent(\n", + " name=\"exog\",\n", + " k_exog=2,\n", + " innovations=False,\n", + " state_names=[\"x1\", \"x2\"],\n", + " observed_state_names=[\"y1\", \"y2\", \"y3\"],\n", + ")\n", + "\n", + "ss_mod = (level_trend + exog).build(mode=\"JAX\")" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "eec30de3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/dekermanjian/Desktop/Open_Source_Contributions/pymc-extras/pymc_extras/statespace/utils/data_tools.py:74: UserWarning: No time index found on the supplied data. A simple range index will be automatically generated.\n", + " warnings.warn(NO_TIME_INDEX_WARNING)\n" + ] + } + ], + "source": [ + "with pm.Model(coords=ss_mod.coords) as level_trend_exog_model:\n", + " # Data container\n", + " data_exog = pm.Data(\"data_exog\", np.vstack((x1, x2)).T, dims=(\"time\", \"exog_state\"))\n", + "\n", + " # Initial process covariance matrix\n", + " P0_diag = pm.Gamma(\"P0_diag\", alpha=2, beta=4, dims=\"state\")\n", + " P0 = pm.Deterministic(\"P0\", pt.diag(P0_diag), dims=(\"state\", \"state_aux\"))\n", + "\n", + " # Initial local level trend\n", + " initial_trend = pm.Normal(\"initial_trend\", mu=0, sigma=1, dims=(\"trend_endog\", \"trend_state\"))\n", + "\n", + " # Local level innovations sigma\n", + " sigma_trend = pm.HalfNormal(\"sigma_trend\", 1, dims=(\"trend_endog\", \"trend_shock\"))\n", + "\n", + " # exogenous variable parameter priors\n", + " beta_exog = pm.Normal(\"beta_exog\", 0, 5, dims=(\"exog_endog\", \"exog_state\"))\n", + "\n", + " ss_mod.build_statespace_graph(np.vstack((ys[\"y1\"], ys[\"y2\"], ys[\"y3\"])).T)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "05830b2b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Sampler Progress

\n", + "

Total Chains: 4

\n", + "

Active Chains: 0

\n", + "

\n", + " Finished Chains:\n", + " 4\n", + "

\n", + "

Sampling for 42 seconds

\n", + "

\n", + " Estimated Time to Completion:\n", + " now\n", + "

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProgressDrawsDivergencesStep SizeGradients/Draw
\n", + " \n", + " \n", + " 2000100.527
\n", + " \n", + " \n", + " 2000100.527
\n", + " \n", + " \n", + " 2000870.537
\n", + " \n", + " \n", + " 200040.527
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with level_trend_exog_model:\n", + " idata = pm.sample(\n", + " nuts_sampler=\"nutpie\", nuts_sampler_kwargs={\"backend\": \"JAX\", \"gradient_backend\": \"JAX\"}\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "466fb92a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meansdhdi_3%hdi_97%mcse_meanmcse_sdess_bulkess_tailr_hat
beta_exog[y1, x1]2.9470.7291.4334.2780.0160.0142232.02077.01.00
beta_exog[y1, x2]-0.9910.681-2.2700.3140.0120.0143102.02334.01.00
beta_exog[y2, x1]3.0070.7131.6374.3560.0150.0152152.02107.01.00
beta_exog[y2, x2]-1.0470.714-2.4710.4080.0140.0193007.01644.01.00
beta_exog[y3, x1]2.8000.7121.4384.1650.0130.0193473.02458.01.00
beta_exog[y3, x2]-0.9340.675-2.1840.4260.0130.0143017.02326.01.00
sigma_trend[y1, level]0.7570.0570.6570.8710.0020.002878.0367.01.01
sigma_trend[y2, level]0.9270.0680.8101.0620.0020.0011846.01633.01.00
sigma_trend[y3, level]0.8470.0620.7330.9610.0010.0012009.02855.01.00
\n", + "
" + ], + "text/plain": [ + " mean sd hdi_3% hdi_97% mcse_mean mcse_sd \\\n", + "beta_exog[y1, x1] 2.947 0.729 1.433 4.278 0.016 0.014 \n", + "beta_exog[y1, x2] -0.991 0.681 -2.270 0.314 0.012 0.014 \n", + "beta_exog[y2, x1] 3.007 0.713 1.637 4.356 0.015 0.015 \n", + "beta_exog[y2, x2] -1.047 0.714 -2.471 0.408 0.014 0.019 \n", + "beta_exog[y3, x1] 2.800 0.712 1.438 4.165 0.013 0.019 \n", + "beta_exog[y3, x2] -0.934 0.675 -2.184 0.426 0.013 0.014 \n", + "sigma_trend[y1, level] 0.757 0.057 0.657 0.871 0.002 0.002 \n", + "sigma_trend[y2, level] 0.927 0.068 0.810 1.062 0.002 0.001 \n", + "sigma_trend[y3, level] 0.847 0.062 0.733 0.961 0.001 0.001 \n", + "\n", + " ess_bulk ess_tail r_hat \n", + "beta_exog[y1, x1] 2232.0 2077.0 1.00 \n", + "beta_exog[y1, x2] 3102.0 2334.0 1.00 \n", + "beta_exog[y2, x1] 2152.0 2107.0 1.00 \n", + "beta_exog[y2, x2] 3007.0 1644.0 1.00 \n", + "beta_exog[y3, x1] 3473.0 2458.0 1.00 \n", + "beta_exog[y3, x2] 3017.0 2326.0 1.00 \n", + "sigma_trend[y1, level] 878.0 367.0 1.01 \n", + "sigma_trend[y2, level] 1846.0 1633.0 1.00 \n", + "sigma_trend[y3, level] 2009.0 2855.0 1.00 " + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "az.summary(idata, var_names=[\"beta_exog\", \"sigma_trend\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "3684616b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "az.plot_posterior(\n", + " idata,\n", + " var_names=\"beta_exog\",\n", + " ref_val={\n", + " \"beta_exog\": [\n", + " {\"exog_endog\": \"y1\", \"exog_state\": \"x1\", \"ref_val\": true_beta1},\n", + " {\"exog_endog\": \"y1\", \"exog_state\": \"x2\", \"ref_val\": true_beta2},\n", + " ]\n", + " },\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "1b85190c", + "metadata": {}, + "source": [ + "# Need to test with missing data" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pymc-extras-test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pymc_extras/statespace/models/structural.py b/pymc_extras/statespace/models/structural.py new file mode 100644 index 000000000..a982366c3 --- /dev/null +++ b/pymc_extras/statespace/models/structural.py @@ -0,0 +1,1679 @@ +import functools as ft +import logging + +from abc import ABC +from collections.abc import Sequence +from itertools import pairwise +from typing import Any + +import numpy as np +import pytensor +import pytensor.tensor as pt +import xarray as xr + +from pytensor import Variable +from pytensor.compile.mode import Mode + +from pymc_extras.statespace.core import PytensorRepresentation +from pymc_extras.statespace.core.statespace import PyMCStateSpace +from pymc_extras.statespace.models.utilities import ( + conform_time_varying_and_time_invariant_matrices, + make_default_coords, +) +from pymc_extras.statespace.utils.constants import ( + ALL_STATE_AUX_DIM, + ALL_STATE_DIM, + AR_PARAM_DIM, + LONG_MATRIX_NAMES, + POSITION_DERIVATIVE_NAMES, + TIME_DIM, +) + +_log = logging.getLogger("pymc.experimental.statespace") + +floatX = pytensor.config.floatX + + +def order_to_mask(order): + if isinstance(order, int): + return np.ones(order).astype(bool) + else: + return np.array(order).astype(bool) + + +def _frequency_transition_block(s, j): + lam = 2 * np.pi * j / s + + return pt.stack([[pt.cos(lam), pt.sin(lam)], [-pt.sin(lam), pt.cos(lam)]]) + + +class StructuralTimeSeries(PyMCStateSpace): + r""" + Structural Time Series Model + + The structural time series model, named by [1] and presented in statespace form in [2], is a framework for + decomposing a univariate time series into level, trend, seasonal, and cycle components. It also admits the + possibility of exogenous regressors. Unlike the SARIMAX framework, the time series is not assumed to be stationary. + + Notes + ----- + + .. math:: + y_t = \mu_t + \gamma_t + c_t + \varepsilon_t + + """ + + def __init__( + self, + ssm: PytensorRepresentation, + state_names: list[str], + data_names: list[str], + shock_names: list[str], + param_names: list[str], + exog_names: list[str], + param_dims: dict[str, tuple[int]], + coords: dict[str, Sequence], + param_info: dict[str, dict[str, Any]], + data_info: dict[str, dict[str, Any]], + component_info: dict[str, dict[str, Any]], + measurement_error: bool, + name_to_variable: dict[str, Variable], + name_to_data: dict[str, Variable] | None = None, + name: str | None = None, + verbose: bool = True, + filter_type: str = "standard", + mode: str | Mode | None = None, + ): + # Add the initial state covariance to the parameters + if name is None: + name = "data" + self._name = name + + k_states, k_posdef, k_endog = ssm.k_states, ssm.k_posdef, ssm.k_endog + param_names, param_dims, param_info = self._add_inital_state_cov_to_properties( + param_names, param_dims, param_info, k_states + ) + self._state_names = state_names.copy() + self._data_names = data_names.copy() + self._shock_names = shock_names.copy() + self._param_names = param_names.copy() + self._param_dims = param_dims.copy() + + default_coords = make_default_coords(self) + coords.update(default_coords) + + self._coords = coords + self._param_info = param_info.copy() + self._data_info = data_info.copy() + self.measurement_error = measurement_error + + super().__init__( + k_endog, + k_states, + max(1, k_posdef), + filter_type=filter_type, + verbose=verbose, + measurement_error=measurement_error, + mode=mode, + ) + self.ssm = ssm.copy() + + if k_posdef == 0: + # If there is no randomness in the model, add dummy matrices to the representation to avoid errors + # when we go to construct random variables from the matrices + self.ssm.k_posdef = self.k_posdef + self.ssm.shapes["state_cov"] = (1, 1, 1) + self.ssm["state_cov"] = pt.zeros((1, 1, 1)) + + self.ssm.shapes["selection"] = (1, self.k_states, 1) + self.ssm["selection"] = pt.zeros((1, self.k_states, 1)) + + self._component_info = component_info.copy() + + self._name_to_variable = name_to_variable.copy() + self._name_to_data = name_to_data.copy() + + self._exog_names = exog_names.copy() + self._needs_exog_data = len(exog_names) > 0 + + P0 = self.make_and_register_variable("P0", shape=(self.k_states, self.k_states)) + self.ssm["initial_state_cov"] = P0 + + @staticmethod + def _add_inital_state_cov_to_properties(param_names, param_dims, param_info, k_states): + param_names += ["P0"] + param_dims["P0"] = (ALL_STATE_DIM, ALL_STATE_AUX_DIM) + param_info["P0"] = { + "shape": (k_states, k_states), + "constraints": "Positive semi-definite", + "dims": param_dims["P0"], + } + + return param_names, param_dims, param_info + + @property + def param_names(self): + return self._param_names + + @property + def data_names(self) -> list[str]: + return self._data_names + + @property + def state_names(self): + return self._state_names + + @property + def observed_states(self): + return [self._name] + + @property + def shock_names(self): + return self._shock_names + + @property + def param_dims(self): + return self._param_dims + + @property + def coords(self) -> dict[str, Sequence]: + return self._coords + + @property + def param_info(self) -> dict[str, dict[str, Any]]: + return self._param_info + + @property + def data_info(self) -> dict[str, dict[str, Any]]: + return self._data_info + + def make_symbolic_graph(self) -> None: + """ + Assign placeholder pytensor variables among statespace matrices in positions where PyMC variables will go. + + Notes + ----- + This assignment is handled by the components, so this function is implemented only to avoid the + NotImplementedError raised by the base class. + """ + + pass + + def _state_slices_from_info(self): + info = self._component_info.copy() + comp_states = np.cumsum([0] + [info["k_states"] for info in info.values()]) + state_slices = [slice(i, j) for i, j in pairwise(comp_states)] + + return state_slices + + def _hidden_states_from_data(self, data): + state_slices = self._state_slices_from_info() + info = self._component_info + names = info.keys() + result = [] + + for i, (name, s) in enumerate(zip(names, state_slices)): + obs_idx = info[name]["obs_state_idx"] + if obs_idx is None: + continue + + X = data[..., s] + if info[name]["combine_hidden_states"]: + sum_idx = np.flatnonzero(obs_idx) + result.append(X[..., sum_idx].sum(axis=-1)[..., None]) + else: + comp_names = self.state_names[s] + for j, state_name in enumerate(comp_names): + result.append(X[..., j, None]) + + return np.concatenate(result, axis=-1) + + def _get_subcomponent_names(self): + state_slices = self._state_slices_from_info() + info = self._component_info + names = info.keys() + result = [] + + for i, (name, s) in enumerate(zip(names, state_slices)): + if info[name]["combine_hidden_states"]: + result.append(name) + else: + comp_names = self.state_names[s] + result.extend([f"{name}[{comp_name}]" for comp_name in comp_names]) + return result + + def extract_components_from_idata(self, idata: xr.Dataset) -> xr.Dataset: + r""" + Extract interpretable hidden states from an InferenceData returned by a PyMCStateSpace sampling method + + Parameters + ---------- + idata: Dataset + A Dataset object, returned by a PyMCStateSpace sampling method + + Returns + ------- + idata: Dataset + An Dataset object with hidden states transformed to represent only the "interpretable" subcomponents + of the structural model. + + Notes + ----- + In general, a structural statespace model can be represented as: + + .. math:: + y_t = \mu_t + \nu_t + \cdots + \gamma_t + c_t + \xi_t + \epsilon_t \tag{1} + + Where: + + - :math:`\mu_t` is the level of the data at time t + - :math:`\nu_t` is the slope of the data at time t + - :math:`\cdots` are higher time derivatives of the position (acceleration, jerk, etc) at time t + - :math:`\gamma_t` is the seasonal component at time t + - :math:`c_t` is the cycle component at time t + - :math:`\xi_t` is the autoregressive error at time t + - :math:`\varepsilon_t` is the measurement error at time t + + In state space form, some or all of these components are represented as linear combinations of other + subcomponents, making interpretation of the outputs of the outputs difficult. The purpose of this function is + to take the expended statespace representation and return a "reduced form" of only the components shown in + equation (1). + """ + + def _extract_and_transform_variable(idata, new_state_names): + *_, time_dim, state_dim = idata.dims + state_func = ft.partial(self._hidden_states_from_data) + new_idata = xr.apply_ufunc( + state_func, + idata, + input_core_dims=[[time_dim, state_dim]], + output_core_dims=[[time_dim, state_dim]], + exclude_dims={state_dim}, + ) + new_idata.coords.update({state_dim: new_state_names}) + return new_idata + + var_names = list(idata.data_vars.keys()) + is_latent = [idata[name].shape[-1] == self.k_states for name in var_names] + new_state_names = self._get_subcomponent_names() + + latent_names = [name for latent, name in zip(is_latent, var_names) if latent] + dropped_vars = set(var_names) - set(latent_names) + if len(dropped_vars) > 0: + _log.warning( + f'Variables {", ".join(dropped_vars)} do not contain all hidden states (their last dimension ' + f"is not {self.k_states}). They will not be present in the modified idata." + ) + if len(dropped_vars) == len(var_names): + raise ValueError( + "Provided idata had no variables with all hidden states; cannot extract components." + ) + + idata_new = xr.Dataset( + { + name: _extract_and_transform_variable(idata[name], new_state_names) + for name in latent_names + } + ) + return idata_new + + +class Component(ABC): + r""" + Base class for a component of a structural timeseries model. + + This base class contains a subset of the class attributes of the PyMCStateSpace class, and none of the class + methods. The purpose of a component is to allow the partial definition of a structural model. Components are + assembled into a full model by the StructuralTimeSeries class. + + Parameters + ---------- + name: str + The name of the component + k_endog: int + Number of endogenous variables being modeled. Currently, must be one because structural models only support + univariate data. + k_states: int + Number of hidden states in the component model + k_posdef: int + Rank of the state covariance matrix, or the number of sources of innovations in the component model + measurement_error: bool + Whether the observation associated with the component has measurement error. Default is False. + combine_hidden_states: bool + Flag for the ``extract_hidden_states_from_data`` method. When ``True``, hidden states from the component model + are extracted as ``hidden_states[:, np.flatnonzero(Z)]``. Should be True in models where hidden states + individually have no interpretation, such as seasonal or autoregressive components. + """ + + def __init__( + self, + name, + k_endog, + k_states, + k_posdef, + state_names=None, + data_names=None, + shock_names=None, + param_names=None, + exog_names=None, + representation: PytensorRepresentation | None = None, + measurement_error=False, + combine_hidden_states=True, + component_from_sum=False, + obs_state_idxs=None, + ): + self.name = name + self.k_endog = k_endog + self.k_states = k_states + self.k_posdef = k_posdef + self.measurement_error = measurement_error + + self.state_names = state_names if state_names is not None else [] + self.data_names = data_names if data_names is not None else [] + self.shock_names = shock_names if shock_names is not None else [] + self.param_names = param_names if param_names is not None else [] + self.exog_names = exog_names if exog_names is not None else [] + + self.needs_exog_data = len(self.exog_names) > 0 + self.coords = {} + self.param_dims = {} + + self.param_info = {} + self.data_info = {} + + self.param_counts = {} + + if representation is None: + self.ssm = PytensorRepresentation(k_endog=k_endog, k_states=k_states, k_posdef=k_posdef) + else: + self.ssm = representation + + self._name_to_variable = {} + self._name_to_data = {} + + if not component_from_sum: + self.populate_component_properties() + self.make_symbolic_graph() + + self._component_info = { + self.name: { + "k_states": self.k_states, + "k_enodg": self.k_endog, + "k_posdef": self.k_posdef, + "combine_hidden_states": combine_hidden_states, + "obs_state_idx": obs_state_idxs, + } + } + + def make_and_register_variable(self, name, shape, dtype=floatX) -> Variable: + r""" + Helper function to create a pytensor symbolic variable and register it in the _name_to_variable dictionary + + Parameters + ---------- + name : str + The name of the placeholder variable. Must be the name of a model parameter. + shape : int or tuple of int + Shape of the parameter + dtype : str, default pytensor.config.floatX + dtype of the parameter + + Notes + ----- + Symbolic pytensor variables are used in the ``make_symbolic_graph`` method as placeholders for PyMC random + variables. The change is made in the ``_insert_random_variables`` method via ``pytensor.graph_replace``. To + make the change, a dictionary mapping pytensor variables to PyMC random variables needs to be constructed. + + The purpose of this method is to: + 1. Create the placeholder symbolic variables + 2. Register the placeholder variable in the ``_name_to_variable`` dictionary + + The shape provided here will define the shape of the prior that will need to be provided by the user. + + An error is raised if the provided name has already been registered, or if the name is not present in the + ``param_names`` property. + """ + if name not in self.param_names: + raise ValueError( + f"{name} is not a model parameter. All placeholder variables should correspond to model " + f"parameters." + ) + + if name in self._name_to_variable.keys(): + raise ValueError( + f"{name} is already a registered placeholder variable with shape " + f"{self._name_to_variable[name].type.shape}" + ) + + placeholder = pt.tensor(name, shape=shape, dtype=dtype) + self._name_to_variable[name] = placeholder + return placeholder + + def make_and_register_data(self, name, shape, dtype=floatX) -> Variable: + r""" + Helper function to create a pytensor symbolic variable and register it in the _name_to_data dictionary + + Parameters + ---------- + name : str + The name of the placeholder data. Must be the name of an expected data variable. + shape : int or tuple of int + Shape of the parameter + dtype : str, default pytensor.config.floatX + dtype of the parameter + + Notes + ----- + See docstring for make_and_register_variable for more details. This function is similar, but handles data + inputs instead of model parameters. + + An error is raised if the provided name has already been registered, or if the name is not present in the + ``data_names`` property. + """ + if name not in self.data_names: + raise ValueError( + f"{name} is not a model parameter. All placeholder variables should correspond to model " + f"parameters." + ) + + if name in self._name_to_data.keys(): + raise ValueError( + f"{name} is already a registered placeholder variable with shape " + f"{self._name_to_data[name].type.shape}" + ) + + placeholder = pt.tensor(name, shape=shape, dtype=dtype) + self._name_to_data[name] = placeholder + return placeholder + + def make_symbolic_graph(self) -> None: + raise NotImplementedError + + def populate_component_properties(self): + raise NotImplementedError + + def _get_combined_shapes(self, other): + k_states = self.k_states + other.k_states + k_posdef = self.k_posdef + other.k_posdef + if self.k_endog != other.k_endog: + raise NotImplementedError( + "Merging elements with different numbers of observed states is not supported.>" + ) + k_endog = self.k_endog + + return k_states, k_posdef, k_endog + + def _combine_statespace_representations(self, other): + def make_slice(name, x, o_x): + ndim = max(x.ndim, o_x.ndim) + return (name,) + (slice(None, None, None),) * ndim + + k_states, k_posdef, k_endog = self._get_combined_shapes(other) + + self_matrices = [self.ssm[name] for name in LONG_MATRIX_NAMES] + other_matrices = [other.ssm[name] for name in LONG_MATRIX_NAMES] + + x0, P0, c, d, T, Z, R, H, Q = ( + self.ssm[make_slice(name, x, o_x)] + for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) + ) + o_x0, o_P0, o_c, o_d, o_T, o_Z, o_R, o_H, o_Q = ( + other.ssm[make_slice(name, x, o_x)] + for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) + ) + + initial_state = pt.concatenate(conform_time_varying_and_time_invariant_matrices(x0, o_x0)) + initial_state.name = x0.name + + initial_state_cov = pt.linalg.block_diag(P0, o_P0) + initial_state_cov.name = P0.name + + state_intercept = pt.concatenate(conform_time_varying_and_time_invariant_matrices(c, o_c)) + state_intercept.name = c.name + + obs_intercept = d + o_d + obs_intercept.name = d.name + + transition = pt.linalg.block_diag(T, o_T) + transition.name = T.name + + design = pt.concatenate(conform_time_varying_and_time_invariant_matrices(Z, o_Z), axis=-1) + design.name = Z.name + + selection = pt.linalg.block_diag(R, o_R) + selection.name = R.name + + obs_cov = H + o_H + obs_cov.name = H.name + + state_cov = pt.linalg.block_diag(Q, o_Q) + state_cov.name = Q.name + + new_ssm = PytensorRepresentation( + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + initial_state=initial_state, + initial_state_cov=initial_state_cov, + state_intercept=state_intercept, + obs_intercept=obs_intercept, + transition=transition, + design=design, + selection=selection, + obs_cov=obs_cov, + state_cov=state_cov, + ) + + return new_ssm + + def _combine_property(self, other, name): + self_prop = getattr(self, name) + if isinstance(self_prop, list): + return self_prop + getattr(other, name) + elif isinstance(self_prop, dict): + new_prop = self_prop.copy() + new_prop.update(getattr(other, name)) + return new_prop + + def _combine_component_info(self, other): + combined_info = {} + for key, value in self._component_info.items(): + if not key.startswith("StateSpace"): + if key in combined_info.keys(): + raise ValueError(f"Found duplicate component named {key}") + combined_info[key] = value + + for key, value in other._component_info.items(): + if not key.startswith("StateSpace"): + if key in combined_info.keys(): + raise ValueError(f"Found duplicate component named {key}") + combined_info[key] = value + + return combined_info + + def _make_combined_name(self): + components = self._component_info.keys() + name = f'StateSpace[{", ".join(components)}]' + return name + + def __add__(self, other): + state_names = self._combine_property(other, "state_names") + data_names = self._combine_property(other, "data_names") + param_names = self._combine_property(other, "param_names") + shock_names = self._combine_property(other, "shock_names") + param_info = self._combine_property(other, "param_info") + data_info = self._combine_property(other, "data_info") + param_dims = self._combine_property(other, "param_dims") + coords = self._combine_property(other, "coords") + exog_names = self._combine_property(other, "exog_names") + + _name_to_variable = self._combine_property(other, "_name_to_variable") + _name_to_data = self._combine_property(other, "_name_to_data") + + measurement_error = any([self.measurement_error, other.measurement_error]) + + k_states, k_posdef, k_endog = self._get_combined_shapes(other) + ssm = self._combine_statespace_representations(other) + + new_comp = Component( + name="", + k_endog=1, + k_states=k_states, + k_posdef=k_posdef, + measurement_error=measurement_error, + representation=ssm, + component_from_sum=True, + ) + new_comp._component_info = self._combine_component_info(other) + new_comp.name = new_comp._make_combined_name() + + names_and_props = [ + ("state_names", state_names), + ("data_names", data_names), + ("param_names", param_names), + ("shock_names", shock_names), + ("param_dims", param_dims), + ("coords", coords), + ("param_dims", param_dims), + ("param_info", param_info), + ("data_info", data_info), + ("exog_names", exog_names), + ("_name_to_variable", _name_to_variable), + ("_name_to_data", _name_to_data), + ] + + for prop, value in names_and_props: + setattr(new_comp, prop, value) + + return new_comp + + def build( + self, name=None, filter_type="standard", verbose=True, mode: str | Mode | None = None + ): + """ + Build a StructuralTimeSeries statespace model from the current component(s) + + Parameters + ---------- + name: str, optional + Name of the exogenous data being modeled. Default is "data" + + filter_type : str, optional + The type of Kalman filter to use. Valid options are "standard", "univariate", "single", "cholesky", and + "steady_state". For more information, see the docs for each filter. Default is "standard". + + verbose : bool, optional + If True, displays information about the initialized model. Defaults to True. + + mode: str or Mode, optional + Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and + ``forecast``. The mode does **not** effect calls to ``pm.sample``. + + Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument + to all sampling methods. + + Returns + ------- + PyMCStateSpace + An initialized instance of a PyMCStateSpace, constructed using the system matrices contained in the + components. + """ + + return StructuralTimeSeries( + self.ssm, + name=name, + state_names=self.state_names, + data_names=self.data_names, + shock_names=self.shock_names, + param_names=self.param_names, + param_dims=self.param_dims, + coords=self.coords, + param_info=self.param_info, + data_info=self.data_info, + component_info=self._component_info, + measurement_error=self.measurement_error, + exog_names=self.exog_names, + name_to_variable=self._name_to_variable, + name_to_data=self._name_to_data, + filter_type=filter_type, + verbose=verbose, + mode=mode, + ) + + +class LevelTrendComponent(Component): + r""" + Level and trend component of a structural time series model + + Parameters + ---------- + __________ + order : int + + Number of time derivatives of the trend to include in the model. For example, when order=3, the trend will + be of the form ``y = a + b * t + c * t ** 2``, where the coefficients ``a, b, c`` come from the initial + state values. + + innovations_order : int or sequence of int, optional + + The number of stochastic innovations to include in the model. By default, ``innovations_order = order`` + + Notes + ----- + This class implements the level and trend components of the general structural time series model. In the most + general form, the level and trend is described by a system of two time-varying equations. + + .. math:: + \begin{align} + \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ + \nu_{t+1} &= \nu_t + \xi_t + \zeta_t &\sim N(0, \sigma_\zeta) \\ + \xi_t &\sim N(0, \sigma_\xi) + \end{align} + + Where :math:`\mu_{t+1}` is the mean of the timeseries at time t, and :math:`\nu_t` is the drift or the slope of + the process. When both innovations :math:`\zeta_t` and :math:`\xi_t` are included in the model, it is known as a + *local linear trend* model. This system of two equations, corresponding to ``order=2``, can be expanded or + contracted by adding or removing equations. ``order=3`` would add an acceleration term to the sytsem: + + .. math:: + \begin{align} + \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ + \nu_{t+1} &= \nu_t + \eta_t + \xi_t \\ + \eta_{t+1} &= \eta_{t-1} + \omega_t \\ + \zeta_t &\sim N(0, \sigma_\zeta) \\ + \xi_t &\sim N(0, \sigma_\xi) \\ + \omega_t &\sim N(0, \sigma_\omega) + \end{align} + + After setting all innovation terms to zero and defining initial states :math:`\mu_0, \nu_0, \eta_0`, these equations + can be collapsed to: + + .. math:: + \mu_t = \mu_0 + \nu_0 \cdot t + \eta_0 \cdot t^2 + + Which clarifies how the order and initial states influence the model. In particular, the initial states are the + coefficients on the intercept, slope, acceleration, and so on. + + In this light, allowing for innovations can be understood as allowing these coefficients to vary over time. Each + component can be individually selected for time variation by passing a list to the ``innovations_order`` argument. + For example, a constant intercept with time varying trend and acceleration is specified as ``order=3, + innovations_order=[0, 1, 1]``. + + By choosing the ``order`` and ``innovations_order``, a large variety of models can be obtained. Notable + models include: + + * Constant intercept, ``order=1, innovations_order=0`` + + .. math:: + \mu_t = \mu + + * Constant linear slope, ``order=2, innovations_order=0`` + + .. math:: + \mu_t = \mu_{t-1} + \nu + + * Gaussian Random Walk, ``order=1, innovations_order=1`` + + .. math:: + \mu_t = \mu_{t-1} + \zeta_t + + * Gaussian Random Walk with Drift, ``order=2, innovations_order=1`` + + .. math:: + \mu_t = \mu_{t-1} + \nu + \zeta_t + + * Smooth Trend, ``order=2, innovations_order=[0, 1]`` + + .. math:: + \begin{align} + \mu_t &= \mu_{t-1} + \nu_{t-1} \\ + \nu_t &= \nu_{t-1} + \xi_t + \end{align} + + * Local Level, ``order=2, innovations_order=2`` + + [1] notes that the smooth trend model produces more gradually changing slopes than the full local linear trend + model, and is equivalent to an "integrated trend model". + + References + ---------- + .. [1] Durbin, James, and Siem Jan Koopman. 2012. + Time Series Analysis by State Space Methods: Second Edition. + Oxford University Press. + + """ + + def __init__( + self, + order: int | list[int] = 2, + innovations_order: int | list[int] | None = None, + name: str = "LevelTrend", + ): + if innovations_order is None: + innovations_order = order + + self._order_mask = order_to_mask(order) + max_state = np.flatnonzero(self._order_mask)[-1].item() + 1 + + # If the user passes excess zeros, raise an error. The alternative is to prune them, but this would cause + # the shape of the state to be different to what the user expects. + if len(self._order_mask) > max_state: + raise ValueError( + f"order={order} is invalid. The highest derivative should not be set to zero. If you want a " + f"lower order model, explicitly omit the zeros." + ) + k_states = max_state + + if isinstance(innovations_order, int): + n = innovations_order + innovations_order = order_to_mask(k_states) + if n > 0: + innovations_order[n:] = False + else: + innovations_order[:] = False + else: + innovations_order = order_to_mask(innovations_order) + + self.innovations_order = innovations_order[:max_state] + k_posdef = int(sum(innovations_order)) + + super().__init__( + name, + k_endog=1, + k_states=k_states, + k_posdef=k_posdef, + measurement_error=False, + combine_hidden_states=False, + obs_state_idxs=np.array([1.0] + [0.0] * (k_states - 1)), + ) + + def populate_component_properties(self): + name_slice = POSITION_DERIVATIVE_NAMES[: self.k_states] + self.param_names = ["initial_trend"] + self.state_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] + self.param_dims = {"initial_trend": ("trend_state",)} + self.coords = {"trend_state": self.state_names} + self.param_info = {"initial_trend": {"shape": (self.k_states,), "constraints": None}} + + if self.k_posdef > 0: + self.param_names += ["sigma_trend"] + self.shock_names = [ + name for name, mask in zip(name_slice, self.innovations_order) if mask + ] + self.param_dims["sigma_trend"] = ("trend_shock",) + self.coords["trend_shock"] = self.shock_names + self.param_info["sigma_trend"] = {"shape": (self.k_posdef,), "constraints": "Positive"} + + for name in self.param_names: + self.param_info[name]["dims"] = self.param_dims[name] + + def make_symbolic_graph(self) -> None: + initial_trend = self.make_and_register_variable("initial_trend", shape=(self.k_states,)) + self.ssm["initial_state", :] = initial_trend + triu_idx = np.triu_indices(self.k_states) + self.ssm[np.s_["transition", triu_idx[0], triu_idx[1]]] = 1 + + R = np.eye(self.k_states) + R = R[:, self.innovations_order] + self.ssm["selection", :, :] = R + + self.ssm["design", 0, :] = np.array([1.0] + [0.0] * (self.k_states - 1)) + + if self.k_posdef > 0: + sigma_trend = self.make_and_register_variable("sigma_trend", shape=(self.k_posdef,)) + diag_idx = np.diag_indices(self.k_posdef) + idx = np.s_["state_cov", diag_idx[0], diag_idx[1]] + self.ssm[idx] = sigma_trend**2 + + +class MeasurementError(Component): + r""" + Measurement error term for a structural timeseries model + + Parameters + ---------- + name: str, optional + + Name of the observed data. Default is "obs". + + Notes + ----- + This component should only be used in combination with other components, because it has no states. It's only use + is to add a variance parameter to the model, associated with the observation noise matrix H. + + Examples + -------- + Create and estimate a deterministic linear trend with measurement error + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + + trend = st.LevelTrendComponent(order=2, innovations_order=0) + error = st.MeasurementError() + ss_mod = (trend + error).build() + + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) + sigma_obs = pm.Exponential('sigma_obs', 1, dims=ss_mod.param_dims['sigma_obs']) + + ss_mod.build_statespace_graph(data) + idata = pm.sample(nuts_sampler='numpyro') + """ + + def __init__(self, name: str = "MeasurementError"): + k_endog = 1 + k_states = 0 + k_posdef = 0 + + super().__init__( + name, k_endog, k_states, k_posdef, measurement_error=True, combine_hidden_states=False + ) + + def populate_component_properties(self): + self.param_names = [f"sigma_{self.name}"] + self.param_dims = {} + self.param_info = { + f"sigma_{self.name}": { + "shape": (), + "constraints": "Positive", + "dims": None, + } + } + + def make_symbolic_graph(self) -> None: + sigma_shape = () + error_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=sigma_shape) + diag_idx = np.diag_indices(self.k_endog) + idx = np.s_["obs_cov", diag_idx[0], diag_idx[1]] + self.ssm[idx] = error_sigma**2 + + +class AutoregressiveComponent(Component): + r""" + Autoregressive timeseries component + + Parameters + ---------- + order: int or sequence of int + + If int, the number of lags to include in the model. + If a sequence, an array-like of zeros and ones indicating which lags to include in the model. + + Notes + ----- + An autoregressive component can be thought of as a way o introducing serially correlated errors into the model. + The process is modeled: + + .. math:: + x_t = \sum_{i=1}^p \rho_i x_{t-i} + + Where ``p``, the number of autoregressive terms to model, is the order of the process. By default, all lags up to + ``p`` are included in the model. To disable lags, pass a list of zeros and ones to the ``order`` argumnet. For + example, ``order=[1, 1, 0, 1]`` would become: + + .. math:: + x_t = \rho_1 x_{t-1} + \rho_2 x_{t-1} + \rho_4 x_{t-1} + + The coefficient :math:`\rho_3` has been constrained to zero. + + .. warning:: This class is meant to be used as a component in a structural time series model. For modeling of + stationary processes with ARIMA, use ``statespace.BayesianSARIMA``. + + Examples + -------- + Model a timeseries as an AR(2) process with non-zero mean: + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + + trend = st.LevelTrendComponent(order=1, innovations_order=0) + ar = st.AutoregressiveComponent(2) + ss_mod = (trend + ar).build() + + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) + ar_params = pm.Normal('ar_params', dims=ss_mod.param_dims['ar_params']) + sigma_ar = pm.Exponential('sigma_ar', 1, dims=ss_mod.param_dims['sigma_ar']) + + ss_mod.build_statespace_graph(data) + idata = pm.sample(nuts_sampler='numpyro') + + """ + + def __init__(self, order: int = 1, name: str = "AutoRegressive"): + order = order_to_mask(order) + ar_lags = np.flatnonzero(order).ravel().astype(int) + 1 + k_states = len(order) + + self.order = order + self.ar_lags = ar_lags + + super().__init__( + name=name, + k_endog=1, + k_states=k_states, + k_posdef=1, + measurement_error=True, + combine_hidden_states=True, + obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], + ) + + def populate_component_properties(self): + self.state_names = [f"L{i + 1}.data" for i in range(self.k_states)] + self.shock_names = [f"{self.name}_innovation"] + self.param_names = ["ar_params", "sigma_ar"] + self.param_dims = {"ar_params": (AR_PARAM_DIM,)} + self.coords = {AR_PARAM_DIM: self.ar_lags.tolist()} + + self.param_info = { + "ar_params": { + "shape": (self.k_states,), + "constraints": None, + "dims": (AR_PARAM_DIM,), + }, + "sigma_ar": {"shape": (), "constraints": "Positive", "dims": None}, + } + + def make_symbolic_graph(self) -> None: + k_nonzero = int(sum(self.order)) + ar_params = self.make_and_register_variable("ar_params", shape=(k_nonzero,)) + sigma_ar = self.make_and_register_variable("sigma_ar", shape=()) + + T = np.eye(self.k_states, k=-1) + self.ssm["transition", :, :] = T + self.ssm["selection", 0, 0] = 1 + self.ssm["design", 0, 0] = 1 + + ar_idx = ("transition", np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) + self.ssm[ar_idx] = ar_params + + cov_idx = ("state_cov", *np.diag_indices(1)) + self.ssm[cov_idx] = sigma_ar**2 + + +class TimeSeasonality(Component): + r""" + Seasonal component, modeled in the time domain + + Parameters + ---------- + season_length: int + The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for + daily data with weekly seasonal pattern, etc. + + innovations: bool, default True + Whether to include stochastic innovations in the strength of the seasonal effect + + name: str, default None + A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal + components are included in the same model. Default is ``f"Seasonal[s={season_length}]"`` + + state_names: list of str, default None + List of strings for seasonal effect labels. If provided, it must be of length ``season_length``. An example + would be ``state_names = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']`` when data is daily with a weekly + seasonal pattern (``season_length = 7``). + + If None, states will be numbered ``[State_0, ..., State_s]`` + + remove_first_state: bool, default True + If True, the first state will be removed from the model. This is done because there are only n-1 degrees of + freedom in the seasonal component, and one state is not identified. If False, the first state will be + included in the model, but it will not be identified -- you will need to handle this in the priors (e.g. with + ZeroSumNormal). + + Notes + ----- + A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to + model seasonal effects, the implementation used here is the one described by [1] as the "canonical" time domain + representation. The seasonal component can be expressed: + + .. math:: + \gamma_t = -\sum_{i=1}^{s-1} \gamma_{t-i} + \omega_t, \quad \omega_t \sim N(0, \sigma_\gamma) + + Where :math:`s` is the ``seasonal_length`` parameter and :math:`\omega_t` is the (optional) stochastic innovation. + To give interpretation to the :math:`\gamma` terms, it is helpful to work through the algebra for a simple + example. Let :math:`s=4`, and omit the shock term. Define initial conditions :math:`\gamma_0, \gamma_{-1}, + \gamma_{-2}`. The value of the seasonal component for the first 5 timesteps will be: + + .. math:: + \begin{align} + \gamma_1 &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ + \gamma_2 &= -\gamma_1 - \gamma_0 - \gamma_{-1} \\ + &= -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 - \gamma_{-1} \\ + &= (\gamma_0 - \gamma_0 )+ (\gamma_{-1} - \gamma_{-1}) + \gamma_{-2} \\ + &= \gamma_{-2} \\ + \gamma_3 &= -\gamma_2 - \gamma_1 - \gamma_0 \\ + &= -\gamma_{-2} - (-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 \\ + &= (\gamma_{-2} - \gamma_{-2}) + \gamma_{-1} + (\gamma_0 - \gamma_0) \\ + &= \gamma_{-1} \\ + \gamma_4 &= -\gamma_3 - \gamma_2 - \gamma_1 \\ + &= -\gamma_{-1} - \gamma_{-2} -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) \\ + &= (\gamma_{-2} - \gamma_{-2}) + (\gamma_{-1} - \gamma_{-1}) + \gamma_0 \\ + &= \gamma_0 \\ + \gamma_5 &= -\gamma_4 - \gamma_3 - \gamma_2 \\ + &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ + &= \gamma_1 + \end{align} + + This exercise shows that, given a list ``initial_conditions`` of length ``s-1``, the effects of this model will be: + + - Period 1: ``-sum(initial_conditions)`` + - Period 2: ``initial_conditions[-1]`` + - Period 3: ``initial_conditions[-2]`` + - ... + - Period s: ``initial_conditions[0]`` + - Period s+1: ``-sum(initial_condition)`` + + And so on. So for interpretation, the ``season_length - 1`` initial states are, when reversed, the coefficients + associated with ``state_names[1:]``. + + .. warning:: + Although the ``state_names`` argument expects a list of length ``season_length``, only ``state_names[1:]`` + will be saved as model dimensions, since the 1st coefficient is not identified (it is defined as + :math:`-\sum_{i=1}^{s} \gamma_{t-i}`). + + Examples + -------- + Estimate monthly with a model with a gaussian random walk trend and monthly seasonality: + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + import pandas as pd + + # Get month names + state_names = pd.date_range('1900-01-01', '1900-12-31', freq='MS').month_name().tolist() + + # Build the structural model + grw = st.LevelTrendComponent(order=1, innovations_order=1) + annual_season = st.TimeSeasonality(season_length=12, name='annual', state_names=state_names, innovations=False) + ss_mod = (grw + annual_season).build() + + # Estimate with PyMC + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Deterministic('initial_trend', pt.zeros(1), dims=ss_mod.param_dims['initial_trend']) + annual_coefs = pm.Normal('annual_coefs', sigma=1e-2, dims=ss_mod.param_dims['annual_coefs']) + trend_sigmas = pm.HalfNormal('trend_sigmas', sigma=1e-6, dims=ss_mod.param_dims['trend_sigmas']) + ss_mod.build_statespace_graph(data) + idata = pm.sample(nuts_sampler='numpyro') + + References + ---------- + .. [1] Durbin, James, and Siem Jan Koopman. 2012. + Time Series Analysis by State Space Methods: Second Edition. + Oxford University Press. + """ + + def __init__( + self, + season_length: int, + innovations: bool = True, + name: str | None = None, + state_names: list | None = None, + remove_first_state: bool = True, + ): + if name is None: + name = f"Seasonal[s={season_length}]" + if state_names is None: + state_names = [f"{name}_{i}" for i in range(season_length)] + else: + if len(state_names) != season_length: + raise ValueError( + f"state_names must be a list of length season_length, got {len(state_names)}" + ) + state_names = state_names.copy() + self.innovations = innovations + self.remove_first_state = remove_first_state + + if self.remove_first_state: + # In traditional models, the first state isn't identified, so we can help out the user by automatically + # discarding it. + # TODO: Can this be stashed and reconstructed automatically somehow? + state_names.pop(0) + + k_states = season_length - int(self.remove_first_state) + + super().__init__( + name=name, + k_endog=1, + k_states=k_states, + k_posdef=int(innovations), + state_names=state_names, + measurement_error=False, + combine_hidden_states=True, + obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], + ) + + def populate_component_properties(self): + self.param_names = [f"{self.name}_coefs"] + self.param_info = { + f"{self.name}_coefs": { + "shape": (self.k_states,), + "constraints": None, + "dims": (f"{self.name}_state",), + } + } + self.param_dims = {f"{self.name}_coefs": (f"{self.name}_state",)} + self.coords = {f"{self.name}_state": self.state_names} + + if self.innovations: + self.param_names += [f"sigma_{self.name}"] + self.param_info[f"sigma_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": None, + } + self.shock_names = [f"{self.name}"] + + def make_symbolic_graph(self) -> None: + if self.remove_first_state: + # In this case, parameters are normalized to sum to zero, so the current state is the negative sum of + # all previous states. + T = np.eye(self.k_states, k=-1) + T[0, :] = -1 + else: + # In this case we assume the user to be responsible for ensuring the states sum to zero, so T is just a + # circulant matrix that cycles between the states. + T = np.eye(self.k_states, k=1) + T[-1, 0] = 1 + + self.ssm["transition", :, :] = T + self.ssm["design", 0, 0] = 1 + + initial_states = self.make_and_register_variable( + f"{self.name}_coefs", shape=(self.k_states,) + ) + self.ssm["initial_state", np.arange(self.k_states, dtype=int)] = initial_states + + if self.innovations: + self.ssm["selection", 0, 0] = 1 + season_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=()) + cov_idx = ("state_cov", *np.diag_indices(1)) + self.ssm[cov_idx] = season_sigma**2 + + +class FrequencySeasonality(Component): + r""" + Seasonal component, modeled in the frequency domain + + Parameters + ---------- + season_length: float + The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for + daily data with weekly seasonal pattern, etc. Non-integer seasonal_length is also permitted, for example + 365.2422 days in a (solar) year. + + n: int + Number of fourier features to include in the seasonal component. Default is ``season_length // 2``, which + is the maximum possible. A smaller number can be used for a more wave-like seasonal pattern. + + name: str, default None + A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal + components are included in the same model. Default is ``f"Seasonal[s={season_length}, n={n}]"`` + + innovations: bool, default True + Whether to include stochastic innovations in the strength of the seasonal effect + + Notes + ----- + A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to + model seasonal effects, the implementation used here is the one described by [1] as the "canonical" frequency domain + representation. The seasonal component can be expressed: + + .. math:: + \begin{align} + \gamma_t &= \sum_{j=1}^{2n} \gamma_{j,t} \\ + \gamma_{j, t+1} &= \gamma_{j,t} \cos \lambda_j + \gamma_{j,t}^\star \sin \lambda_j + \omega_{j, t} \\ + \gamma_{j, t}^\star &= -\gamma_{j,t} \sin \lambda_j + \gamma_{j,t}^\star \cos \lambda_j + \omega_{j,t}^\star + \lambda_j &= \frac{2\pi j}{s} + \end{align} + + Where :math:`s` is the ``seasonal_length``. + + Unlike a ``TimeSeasonality`` component, a ``FrequencySeasonality`` component does not require integer season + length. In addition, for long seasonal periods, it is possible to obtain a more compact state space representation + by choosing ``n << s // 2``. Using ``TimeSeasonality``, an annual seasonal pattern in daily data requires 364 + states, whereas ``FrequencySeasonality`` always requires ``2 * n`` states, regardless of the ``seasonal_length``. + The price of this compactness is less representational power. At ``n = 1``, the seasonal pattern will be a pure + sine wave. At ``n = s // 2``, any arbitrary pattern can be represented. + + One cost of the added flexibility of ``FrequencySeasonality`` is reduced interpretability. States of this model are + coefficients :math:`\gamma_1, \gamma^\star_1, \gamma_2, \gamma_2^\star ..., \gamma_n, \gamma^\star_n` associated + with different frequencies in the fourier representation of the seasonal pattern. As a result, it is not possible + to isolate and identify a "Monday" effect, for instance. + """ + + def __init__(self, season_length, n=None, name=None, innovations=True): + if n is None: + n = int(season_length // 2) + if name is None: + name = f"Frequency[s={season_length}, n={n}]" + + k_states = n * 2 + self.n = n + self.season_length = season_length + self.innovations = innovations + + # If the model is completely saturated (n = s // 2), the last state will not be identified, so it shouldn't + # get a parameter assigned to it and should just be fixed to zero. + # Test this way (rather than n == s // 2) to catch cases when n is non-integer. + self.last_state_not_identified = self.season_length / self.n == 2.0 + self.n_coefs = k_states - int(self.last_state_not_identified) + + obs_state_idx = np.zeros(k_states) + obs_state_idx[slice(0, k_states, 2)] = 1 + + super().__init__( + name=name, + k_endog=1, + k_states=k_states, + k_posdef=k_states * int(self.innovations), + measurement_error=False, + combine_hidden_states=True, + obs_state_idxs=obs_state_idx, + ) + + def make_symbolic_graph(self) -> None: + self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 + + init_state = self.make_and_register_variable(f"{self.name}", shape=(self.n_coefs,)) + + init_state_idx = np.arange(self.n_coefs, dtype=int) + self.ssm["initial_state", init_state_idx] = init_state + + T_mats = [_frequency_transition_block(self.season_length, j + 1) for j in range(self.n)] + T = pt.linalg.block_diag(*T_mats) + self.ssm["transition", :, :] = T + + if self.innovations: + sigma_season = self.make_and_register_variable(f"sigma_{self.name}", shape=()) + self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_season**2 + self.ssm["selection", :, :] = np.eye(self.k_states) + + def populate_component_properties(self): + self.state_names = [f"{self.name}_{f}_{i}" for i in range(self.n) for f in ["Cos", "Sin"]] + self.param_names = [f"{self.name}"] + + self.param_dims = {self.name: (f"{self.name}_state",)} + self.param_info = { + f"{self.name}": { + "shape": (self.k_states - int(self.last_state_not_identified),), + "constraints": None, + "dims": (f"{self.name}_state",), + } + } + + init_state_idx = np.arange(self.k_states, dtype=int) + if self.last_state_not_identified: + init_state_idx = init_state_idx[:-1] + self.coords = {f"{self.name}_state": [self.state_names[i] for i in init_state_idx]} + + if self.innovations: + self.shock_names = self.state_names.copy() + self.param_names += [f"sigma_{self.name}"] + self.param_info[f"sigma_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": None, + } + + +class CycleComponent(Component): + r""" + A component for modeling longer-term cyclical effects + + Parameters + ---------- + name: str + Name of the component. Used in generated coordinates and state names. If None, a descriptive name will be + used. + + cycle_length: int, optional + The length of the cycle, in the calendar units of your data. For example, if your data is monthly, and you + want to model a 12-month cycle, use ``cycle_length=12``. You cannot specify both ``cycle_length`` and + ``estimate_cycle_length``. + + estimate_cycle_length: bool, default False + Whether to estimate the cycle length. If True, an additional parameter, ``cycle_length`` will be added to the + model. You cannot specify both ``cycle_length`` and ``estimate_cycle_length``. + + dampen: bool, default False + Whether to dampen the cycle by multiplying by a dampening factor :math:`\rho` at every timestep. If true, + an additional parameter, ``dampening_factor`` will be added to the model. + + innovations: bool, default True + Whether to include stochastic innovations in the strength of the seasonal effect. If True, an additional + parameter, ``sigma_{name}`` will be added to the model. + + Notes + ----- + The cycle component is very similar in implementation to the frequency domain seasonal component, expect that it + is restricted to n=1. The cycle component can be expressed: + + .. math:: + \begin{align} + \gamma_t &= \rho \gamma_{t-1} \cos \lambda + \rho \gamma_{t-1}^\star \sin \lambda + \omega_{t} \\ + \gamma_{t}^\star &= -\rho \gamma_{t-1} \sin \lambda + \rho \gamma_{t-1}^\star \cos \lambda + \omega_{t}^\star \\ + \lambda &= \frac{2\pi}{s} + \end{align} + + Where :math:`s` is the ``cycle_length``. [1] recommend that this component be used for longer term cyclical + effects, such as business cycles, and that the seasonal component be used for shorter term effects, such as + weekly or monthly seasonality. + + Unlike a FrequencySeasonality component, the length of a CycleComponent can be estimated. + + Examples + -------- + Estimate a business cycle with length between 6 and 12 years: + + .. code:: python + + from pymc_extras.statespace import structural as st + import pymc as pm + import pytensor.tensor as pt + import pandas as pd + import numpy as np + + data = np.random.normal(size=(100, 1)) + + # Build the structural model + grw = st.LevelTrendComponent(order=1, innovations_order=1) + cycle = st.CycleComponent('business_cycle', estimate_cycle_length=True, dampen=False) + ss_mod = (grw + cycle).build() + + # Estimate with PyMC + with pm.Model(coords=ss_mod.coords) as model: + P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states), dims=ss_mod.param_dims['P0']) + intitial_trend = pm.Normal('initial_trend', dims=ss_mod.param_dims['initial_trend']) + sigma_trend = pm.HalfNormal('sigma_trend', dims=ss_mod.param_dims['sigma_trend']) + + cycle_strength = pm.Normal('business_cycle') + cycle_length = pm.Uniform('business_cycle_length', lower=6, upper=12) + + sigma_cycle = pm.HalfNormal('sigma_business_cycle', sigma=1) + ss_mod.build_statespace_graph(data) + + idata = pm.sample(nuts_sampler='numpyro') + + References + ---------- + .. [1] Durbin, James, and Siem Jan Koopman. 2012. + Time Series Analysis by State Space Methods: Second Edition. + Oxford University Press. + """ + + def __init__( + self, + name: str | None = None, + cycle_length: int | None = None, + estimate_cycle_length: bool = False, + dampen: bool = False, + innovations: bool = True, + ): + if cycle_length is None and not estimate_cycle_length: + raise ValueError("Must specify cycle_length if estimate_cycle_length is False") + if cycle_length is not None and estimate_cycle_length: + raise ValueError("Cannot specify cycle_length if estimate_cycle_length is True") + if name is None: + cycle = int(cycle_length) if cycle_length is not None else "Estimate" + name = f"Cycle[s={cycle}, dampen={dampen}, innovations={innovations}]" + + self.estimate_cycle_length = estimate_cycle_length + self.cycle_length = cycle_length + self.innovations = innovations + self.dampen = dampen + self.n_coefs = 1 + + k_states = 2 + k_endog = 1 + k_posdef = 2 + + obs_state_idx = np.zeros(k_states) + obs_state_idx[slice(0, k_states, 2)] = 1 + + super().__init__( + name=name, + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + measurement_error=False, + combine_hidden_states=True, + obs_state_idxs=obs_state_idx, + ) + + def make_symbolic_graph(self) -> None: + self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 + self.ssm["selection", :, :] = np.eye(self.k_states) + self.param_dims = {self.name: (f"{self.name}_state",)} + self.coords = {f"{self.name}_state": self.state_names} + + init_state = self.make_and_register_variable(f"{self.name}", shape=(self.k_states,)) + + self.ssm["initial_state", :] = init_state + + if self.estimate_cycle_length: + lamb = self.make_and_register_variable(f"{self.name}_length", shape=()) + else: + lamb = self.cycle_length + + if self.dampen: + rho = self.make_and_register_variable(f"{self.name}_dampening_factor", shape=()) + else: + rho = 1 + + T = rho * _frequency_transition_block(lamb, j=1) + self.ssm["transition", :, :] = T + + if self.innovations: + sigma_cycle = self.make_and_register_variable(f"sigma_{self.name}", shape=()) + self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_cycle**2 + + def populate_component_properties(self): + self.state_names = [f"{self.name}_{f}" for f in ["Cos", "Sin"]] + self.param_names = [f"{self.name}"] + + self.param_info = { + f"{self.name}": { + "shape": (2,), + "constraints": None, + "dims": (f"{self.name}_state",), + } + } + + if self.estimate_cycle_length: + self.param_names += [f"{self.name}_length"] + self.param_info[f"{self.name}_length"] = { + "shape": (), + "constraints": "Positive, non-zero", + "dims": None, + } + + if self.dampen: + self.param_names += [f"{self.name}_dampening_factor"] + self.param_info[f"{self.name}_dampening_factor"] = { + "shape": (), + "constraints": "0 < x ≤ 1", + "dims": None, + } + + if self.innovations: + self.param_names += [f"sigma_{self.name}"] + self.param_info[f"sigma_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": None, + } + self.shock_names = self.state_names.copy() + + +class RegressionComponent(Component): + def __init__( + self, + k_exog: int | None = None, + name: str | None = "Exogenous", + state_names: list[str] | None = None, + innovations=False, + ): + self.innovations = innovations + k_exog = self._handle_input_data(k_exog, state_names, name) + + k_states = k_exog + k_endog = 1 + k_posdef = k_exog + + super().__init__( + name=name, + k_endog=k_endog, + k_states=k_states, + k_posdef=k_posdef, + state_names=self.state_names, + measurement_error=False, + combine_hidden_states=False, + exog_names=[f"data_{name}"], + obs_state_idxs=np.ones(k_states), + ) + + @staticmethod + def _get_state_names(k_exog: int | None, state_names: list[str] | None, name: str): + if k_exog is None and state_names is None: + raise ValueError("Must specify at least one of k_exog or state_names") + if state_names is not None and k_exog is not None: + if len(state_names) != k_exog: + raise ValueError(f"Expected {k_exog} state names, found {len(state_names)}") + elif k_exog is None: + k_exog = len(state_names) + else: + state_names = [f"{name}_{i + 1}" for i in range(k_exog)] + + return k_exog, state_names + + def _handle_input_data(self, k_exog: int, state_names: list[str] | None, name) -> int: + k_exog, state_names = self._get_state_names(k_exog, state_names, name) + self.state_names = state_names + + return k_exog + + def make_symbolic_graph(self) -> None: + betas = self.make_and_register_variable(f"beta_{self.name}", shape=(self.k_states,)) + regression_data = self.make_and_register_data( + f"data_{self.name}", shape=(None, self.k_states) + ) + + self.ssm["initial_state", :] = betas + self.ssm["transition", :, :] = np.eye(self.k_states) + self.ssm["selection", :, :] = np.eye(self.k_states) + self.ssm["design"] = pt.expand_dims(regression_data, 1) + + if self.innovations: + sigma_beta = self.make_and_register_variable( + f"sigma_beta_{self.name}", (self.k_states,) + ) + row_idx, col_idx = np.diag_indices(self.k_states) + self.ssm["state_cov", row_idx, col_idx] = sigma_beta**2 + + def populate_component_properties(self) -> None: + self.shock_names = self.state_names + + self.param_names = [f"beta_{self.name}"] + self.data_names = [f"data_{self.name}"] + self.param_dims = { + f"beta_{self.name}": ("exog_state",), + } + + self.param_info = { + f"beta_{self.name}": { + "shape": (self.k_states,), + "constraints": None, + "dims": ("exog_state",), + }, + } + + self.data_info = { + f"data_{self.name}": { + "shape": (None, self.k_states), + "dims": (TIME_DIM, "exog_state"), + }, + } + self.coords = {"exog_state": self.state_names} + + if self.innovations: + self.param_names += [f"sigma_beta_{self.name}"] + self.param_dims[f"sigma_beta_{self.name}"] = "exog_state" + self.param_info[f"sigma_beta_{self.name}"] = { + "shape": (), + "constraints": "Positive", + "dims": ("exog_state",), + } diff --git a/pymc_extras/statespace/models/structural/components/level_trend.py b/pymc_extras/statespace/models/structural/components/level_trend.py index b6735007f..1563dde72 100644 --- a/pymc_extras/statespace/models/structural/components/level_trend.py +++ b/pymc_extras/statespace/models/structural/components/level_trend.py @@ -167,15 +167,19 @@ def populate_component_properties(self): name_slice = POSITION_DERIVATIVE_NAMES[:k_states] self.param_names = ["initial_trend"] - self.state_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] + base_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] + self.state_names = [ + f"{name}[{obs_name}]" for obs_name in self.observed_state_names for name in base_names + ] self.param_dims = {"initial_trend": ("trend_state",)} - self.coords = {"trend_state": self.state_names} + self.coords = {"trend_state": base_names} if k_endog > 1: self.param_dims["trend_state"] = ( "trend_endog", "trend_state", ) + self.param_dims = {"initial_trend": ("trend_endog", "trend_state")} self.coords["trend_endog"] = self.observed_state_names shape = (k_endog, k_states) if k_endog > 1 else (k_states,) diff --git a/pymc_extras/statespace/models/structural/components/regression.py b/pymc_extras/statespace/models/structural/components/regression.py index c290812c2..435db50d1 100644 --- a/pymc_extras/statespace/models/structural/components/regression.py +++ b/pymc_extras/statespace/models/structural/components/regression.py @@ -1,6 +1,7 @@ import numpy as np from pytensor import tensor as pt +from scipy import linalg from pymc_extras.statespace.models.structural.core import Component from pymc_extras.statespace.utils.constants import TIME_DIM @@ -28,8 +29,8 @@ def __init__( super().__init__( name=name, k_endog=k_endog, - k_states=k_states, - k_posdef=k_posdef, + k_states=k_states * k_endog, + k_posdef=k_posdef * k_endog, state_names=self.state_names, observed_state_names=observed_state_names, measurement_error=False, @@ -59,15 +60,21 @@ def _handle_input_data(self, k_exog: int, state_names: list[str] | None, name) - return k_exog def make_symbolic_graph(self) -> None: - betas = self.make_and_register_variable(f"beta_{self.name}", shape=(self.k_states,)) - regression_data = self.make_and_register_data( - f"data_{self.name}", shape=(None, self.k_states) - ) + k_endog = self.k_endog + k_states = self.k_states // k_endog + self.k_posdef // k_endog + + betas = self.make_and_register_variable(f"beta_{self.name}", shape=(k_endog, k_states)) + regression_data = self.make_and_register_data(f"data_{self.name}", shape=(None, k_states)) - self.ssm["initial_state", :] = betas - self.ssm["transition", :, :] = np.eye(self.k_states) + self.ssm["initial_state", :] = betas.reshape((1, -1)).squeeze() + T = np.eye(k_states) + self.ssm["transition", :, :] = linalg.block_diag(*[T for _ in range(k_endog)]) self.ssm["selection", :, :] = np.eye(self.k_states) - self.ssm["design"] = pt.expand_dims(regression_data, 1) + Z = pt.linalg.block_diag(*[pt.expand_dims(regression_data, 1) for _ in range(k_endog)]) + self.ssm["design"] = pt.specify_shape( + Z, (None, k_endog, regression_data.type.shape[1] * k_endog) + ) if self.innovations: sigma_beta = self.make_and_register_variable( @@ -77,29 +84,38 @@ def make_symbolic_graph(self) -> None: self.ssm["state_cov", row_idx, col_idx] = sigma_beta**2 def populate_component_properties(self) -> None: + k_endog = self.k_endog + k_states = self.k_states // k_endog + self.k_posdef // k_endog + self.shock_names = self.state_names self.param_names = [f"beta_{self.name}"] self.data_names = [f"data_{self.name}"] self.param_dims = { - f"beta_{self.name}": ("exog_state",), + f"beta_{self.name}": ("exog_endog", "exog_state"), } + base_names = self.state_names + self.state_names = [ + f"{name}[{obs_name}]" for obs_name in self.observed_state_names for name in base_names + ] + self.param_info = { f"beta_{self.name}": { - "shape": (self.k_states,), + "shape": (k_endog, k_states), "constraints": None, - "dims": ("exog_state",), + "dims": ("exog_endog", "exog_state"), }, } self.data_info = { f"data_{self.name}": { - "shape": (None, self.k_states), + "shape": (None, k_states), "dims": (TIME_DIM, "exog_state"), }, } - self.coords = {"exog_state": self.state_names} + self.coords = {"exog_state": base_names, "exog_endog": self.observed_state_names} if self.innovations: self.param_names += [f"sigma_beta_{self.name}"] diff --git a/tests/statespace/models/test_structural.py b/tests/statespace/models/test_structural.py new file mode 100644 index 000000000..1662e164a --- /dev/null +++ b/tests/statespace/models/test_structural.py @@ -0,0 +1,840 @@ +import functools as ft +import warnings + +from collections import defaultdict + +import numpy as np +import pandas as pd +import pymc as pm +import pytensor +import pytensor.tensor as pt +import pytest +import statsmodels.api as sm + +from numpy.testing import assert_allclose +from scipy import linalg + +from pymc_extras.statespace import structural as st +from pymc_extras.statespace.utils.constants import ( + ALL_STATE_AUX_DIM, + ALL_STATE_DIM, + AR_PARAM_DIM, + OBS_STATE_AUX_DIM, + OBS_STATE_DIM, + SHOCK_AUX_DIM, + SHOCK_DIM, + SHORT_NAME_TO_LONG, +) +from tests.statespace.shared_fixtures import ( # pylint: disable=unused-import + rng, +) +from tests.statespace.test_utilities import ( + assert_pattern_repeats, + simulate_from_numpy_model, + unpack_symbolic_matrices_with_params, +) + +floatX = pytensor.config.floatX +ATOL = 1e-8 if floatX.endswith("64") else 1e-4 +RTOL = 0 if floatX.endswith("64") else 1e-6 + + +def _assert_all_statespace_matrices_match(mod, params, sm_mod): + x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, params) + + sm_x0, sm_H0, sm_P0 = sm_mod.initialization() + + if len(x0) > 0: + assert_allclose(x0, sm_x0) + + for name, matrix in zip(["T", "R", "Z", "Q"], [T, R, Z, Q]): + long_name = SHORT_NAME_TO_LONG[name] + if np.any([x == 0 for x in matrix.shape]): + continue + assert_allclose( + sm_mod.ssm[long_name], + matrix, + err_msg=f"matrix {name} does not match statsmodels", + atol=ATOL, + rtol=RTOL, + ) + + +def _assert_coord_shapes_match_matrices(mod, params): + if "initial_state_cov" not in params: + params["initial_state_cov"] = np.eye(mod.k_states) + + x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, params) + + n_states = len(mod.coords[ALL_STATE_DIM]) + + # There will always be one shock dimension -- dummies are inserted into fully deterministic models to avoid errors + # in the state space representation. + n_shocks = max(1, len(mod.coords[SHOCK_DIM])) + n_obs = len(mod.coords[OBS_STATE_DIM]) + + assert x0.shape[-1:] == ( + n_states, + ), f"x0 expected to have shape (n_states, ), found {x0.shape[-1:]}" + assert P0.shape[-2:] == ( + n_states, + n_states, + ), f"P0 expected to have shape (n_states, n_states), found {P0.shape[-2:]}" + assert c.shape[-1:] == ( + n_states, + ), f"c expected to have shape (n_states, ), found {c.shape[-1:]}" + assert d.shape[-1:] == (n_obs,), f"d expected to have shape (n_obs, ), found {d.shape[-1:]}" + assert T.shape[-2:] == ( + n_states, + n_states, + ), f"T expected to have shape (n_states, n_states), found {T.shape[-2:]}" + assert Z.shape[-2:] == ( + n_obs, + n_states, + ), f"Z expected to have shape (n_obs, n_states), found {Z.shape[-2:]}" + assert R.shape[-2:] == ( + n_states, + n_shocks, + ), f"R expected to have shape (n_states, n_shocks), found {R.shape[-2:]}" + assert H.shape[-2:] == ( + n_obs, + n_obs, + ), f"H expected to have shape (n_obs, n_obs), found {H.shape[-2:]}" + assert Q.shape[-2:] == ( + n_shocks, + n_shocks, + ), f"Q expected to have shape (n_shocks, n_shocks), found {Q.shape[-2:]}" + + +def _assert_basic_coords_correct(mod): + assert mod.coords[ALL_STATE_DIM] == mod.state_names + assert mod.coords[ALL_STATE_AUX_DIM] == mod.state_names + assert mod.coords[SHOCK_DIM] == mod.shock_names + assert mod.coords[SHOCK_AUX_DIM] == mod.shock_names + assert mod.coords[OBS_STATE_DIM] == ["data"] + assert mod.coords[OBS_STATE_AUX_DIM] == ["data"] + + +def _assert_keys_match(test_dict, expected_dict): + expected_keys = list(expected_dict.keys()) + param_keys = list(test_dict.keys()) + key_diff = set(expected_keys) - set(param_keys) + assert len(key_diff) == 0, f'{", ".join(key_diff)} were not found in the test_dict keys.' + + key_diff = set(param_keys) - set(expected_keys) + assert ( + len(key_diff) == 0 + ), f'{", ".join(key_diff)} were keys of the tests_dict not in expected_dict.' + + +def _assert_param_dims_correct(param_dims, expected_dims): + if len(expected_dims) == 0 and len(param_dims) == 0: + return + + _assert_keys_match(param_dims, expected_dims) + for param, dims in expected_dims.items(): + assert dims == param_dims[param], f"dims for parameter {param} do not match" + + +def _assert_coords_correct(coords, expected_coords): + if len(coords) == 0 and len(expected_coords) == 0: + return + + _assert_keys_match(coords, expected_coords) + for dim, labels in expected_coords.items(): + assert labels == coords[dim], f"labels on dimension {dim} do not match" + + +def _assert_params_info_correct(param_info, coords, param_dims): + for param in param_info.keys(): + info = param_info[param] + + dims = info["dims"] + labels = [coords[dim] for dim in dims] if dims is not None else None + if labels is not None: + assert param in param_dims.keys() + inferred_dims = param_dims[param] + else: + inferred_dims = None + + shape = tuple(len(label) for label in labels) if labels is not None else () + + assert info["shape"] == shape + assert dims == inferred_dims + + +def create_structural_model_and_equivalent_statsmodel( + rng, + level: bool | None = False, + trend: bool | None = False, + seasonal: int | None = None, + freq_seasonal: list[dict] | None = None, + cycle: bool = False, + autoregressive: int | None = None, + exog: np.ndarray | None = None, + irregular: bool | None = False, + stochastic_level: bool | None = True, + stochastic_trend: bool | None = False, + stochastic_seasonal: bool | None = True, + stochastic_freq_seasonal: list[bool] | None = None, + stochastic_cycle: bool | None = False, + damped_cycle: bool | None = False, +): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + mod = ft.partial( + sm.tsa.UnobservedComponents, + level=level, + trend=trend, + seasonal=seasonal, + freq_seasonal=freq_seasonal, + cycle=cycle, + autoregressive=autoregressive, + exog=exog, + irregular=irregular, + stochastic_level=stochastic_level, + stochastic_trend=stochastic_trend, + stochastic_seasonal=stochastic_seasonal, + stochastic_freq_seasonal=stochastic_freq_seasonal, + stochastic_cycle=stochastic_cycle, + damped_cycle=damped_cycle, + mle_regression=False, + ) + + params = {} + sm_params = {} + sm_init = {} + expected_param_dims = defaultdict(tuple) + expected_coords = defaultdict(list) + expected_param_dims["P0"] += ("state", "state_aux") + + default_states = [ + ALL_STATE_DIM, + ALL_STATE_AUX_DIM, + OBS_STATE_DIM, + OBS_STATE_AUX_DIM, + SHOCK_DIM, + SHOCK_AUX_DIM, + ] + default_values = [[], [], ["data"], ["data"], [], []] + for dim, value in zip(default_states, default_values): + expected_coords[dim] += value + + components = [] + + if irregular: + sigma2 = np.abs(rng.normal()).astype(floatX).item() + params["sigma_irregular"] = np.sqrt(sigma2) + sm_params["sigma2.irregular"] = sigma2 + + comp = st.MeasurementError("irregular") + components.append(comp) + + level_trend_order = [0, 0] + level_trend_innov_order = [0, 0] + + if level: + level_trend_order[0] = 1 + expected_coords["trend_state"] += [ + "level", + ] + expected_coords[ALL_STATE_DIM] += [ + "level", + ] + expected_coords[ALL_STATE_AUX_DIM] += [ + "level", + ] + if stochastic_level: + level_trend_innov_order[0] = 1 + expected_coords["trend_shock"] += ["level"] + expected_coords[SHOCK_DIM] += [ + "level", + ] + expected_coords[SHOCK_AUX_DIM] += [ + "level", + ] + + if trend: + level_trend_order[1] = 1 + expected_coords["trend_state"] += [ + "trend", + ] + expected_coords[ALL_STATE_DIM] += [ + "trend", + ] + expected_coords[ALL_STATE_AUX_DIM] += [ + "trend", + ] + + if stochastic_trend: + level_trend_innov_order[1] = 1 + expected_coords["trend_shock"] += ["trend"] + expected_coords[SHOCK_DIM] += ["trend"] + expected_coords[SHOCK_AUX_DIM] += ["trend"] + + if level or trend: + expected_param_dims["initial_trend"] += ("trend_state",) + level_value = np.where( + level_trend_order, + rng.normal( + size=2, + ).astype(floatX), + np.zeros(2, dtype=floatX), + ) + sigma_level_value2 = np.abs(rng.normal(size=(2,)))[ + np.array(level_trend_innov_order, dtype="bool") + ] + max_order = np.flatnonzero(level_value)[-1].item() + 1 + level_trend_order = level_trend_order[:max_order] + + params["initial_trend"] = level_value[:max_order] + sm_init["level"] = level_value[0] + sm_init["trend"] = level_value[1] + + if sum(level_trend_innov_order) > 0: + expected_param_dims["sigma_trend"] += ("trend_shock",) + params["sigma_trend"] = np.sqrt(sigma_level_value2) + + sigma_level_value = sigma_level_value2.tolist() + if stochastic_level: + sigma = sigma_level_value.pop(0) + sm_params["sigma2.level"] = sigma + if stochastic_trend: + sigma = sigma_level_value.pop(0) + sm_params["sigma2.trend"] = sigma + + comp = st.LevelTrendComponent( + name="level", order=level_trend_order, innovations_order=level_trend_innov_order + ) + components.append(comp) + + if seasonal is not None: + state_names = [f"seasonal_{i}" for i in range(seasonal)][1:] + seasonal_coefs = rng.normal(size=(seasonal - 1,)).astype(floatX) + params["seasonal_coefs"] = seasonal_coefs + expected_param_dims["seasonal_coefs"] += ("seasonal_state",) + + expected_coords["seasonal_state"] += tuple(state_names) + expected_coords[ALL_STATE_DIM] += state_names + expected_coords[ALL_STATE_AUX_DIM] += state_names + + seasonal_dict = { + "seasonal" if i == 0 else f"seasonal.L{i}": c for i, c in enumerate(seasonal_coefs) + } + sm_init.update(seasonal_dict) + + if stochastic_seasonal: + sigma2 = np.abs(rng.normal()).astype(floatX) + params["sigma_seasonal"] = np.sqrt(sigma2) + sm_params["sigma2.seasonal"] = sigma2 + expected_coords[SHOCK_DIM] += [ + "seasonal", + ] + expected_coords[SHOCK_AUX_DIM] += [ + "seasonal", + ] + + comp = st.TimeSeasonality( + name="seasonal", season_length=seasonal, innovations=stochastic_seasonal + ) + components.append(comp) + + if freq_seasonal is not None: + state_count = 0 + for d, has_innov in zip(freq_seasonal, stochastic_freq_seasonal): + n = d["harmonics"] + s = d["period"] + last_state_not_identified = (s / n) == 2.0 + n_states = 2 * n - int(last_state_not_identified) + state_names = [f"seasonal_{s}_{f}_{i}" for i in range(n) for f in ["Cos", "Sin"]] + + seasonal_params = rng.normal(size=n_states).astype(floatX) + + params[f"seasonal_{s}"] = seasonal_params + expected_param_dims[f"seasonal_{s}"] += (f"seasonal_{s}_state",) + expected_coords[ALL_STATE_DIM] += state_names + expected_coords[ALL_STATE_AUX_DIM] += state_names + expected_coords[f"seasonal_{s}_state"] += ( + tuple(state_names[:-1]) if last_state_not_identified else tuple(state_names) + ) + + for param in seasonal_params: + sm_init[f"freq_seasonal.{state_count}"] = param + state_count += 1 + if last_state_not_identified: + sm_init[f"freq_seasonal.{state_count}"] = 0.0 + state_count += 1 + + if has_innov: + sigma2 = np.abs(rng.normal()).astype(floatX) + params[f"sigma_seasonal_{s}"] = np.sqrt(sigma2) + sm_params[f"sigma2.freq_seasonal_{s}({n})"] = sigma2 + expected_coords[SHOCK_DIM] += state_names + expected_coords[SHOCK_AUX_DIM] += state_names + + comp = st.FrequencySeasonality( + name=f"seasonal_{s}", season_length=s, n=n, innovations=has_innov + ) + components.append(comp) + + if cycle: + cycle_length = np.random.choice(np.arange(2, 12)).astype(floatX) + + # Statsmodels takes the frequency not the cycle length, so convert it. + sm_params["frequency.cycle"] = 2.0 * np.pi / cycle_length + params["cycle_length"] = cycle_length + + init_cycle = rng.normal(size=(2,)).astype(floatX) + params["cycle"] = init_cycle + expected_param_dims["cycle"] += ("cycle_state",) + + state_names = ["cycle_Cos", "cycle_Sin"] + expected_coords["cycle_state"] += state_names + expected_coords[ALL_STATE_DIM] += state_names + expected_coords[ALL_STATE_AUX_DIM] += state_names + + sm_init["cycle"] = init_cycle[0] + sm_init["cycle.auxilliary"] = init_cycle[1] + + if stochastic_cycle: + sigma2 = np.abs(rng.normal()).astype(floatX) + params["sigma_cycle"] = np.sqrt(sigma2) + expected_coords[SHOCK_DIM] += state_names + expected_coords[SHOCK_AUX_DIM] += state_names + + sm_params["sigma2.cycle"] = sigma2 + + if damped_cycle: + rho = rng.beta(1, 1) + params["cycle_dampening_factor"] = rho + sm_params["damping.cycle"] = rho + + comp = st.CycleComponent( + name="cycle", + dampen=damped_cycle, + innovations=stochastic_cycle, + estimate_cycle_length=True, + ) + + components.append(comp) + + if autoregressive is not None: + ar_names = [f"L{i+1}.data" for i in range(autoregressive)] + ar_params = rng.normal(size=(autoregressive,)).astype(floatX) + if autoregressive == 1: + ar_params = ar_params.item() + sigma2 = np.abs(rng.normal()).astype(floatX) + + params["ar_params"] = ar_params + params["sigma_ar"] = np.sqrt(sigma2) + expected_param_dims["ar_params"] += (AR_PARAM_DIM,) + expected_coords[AR_PARAM_DIM] += tuple(list(range(1, autoregressive + 1))) + expected_coords[ALL_STATE_DIM] += ar_names + expected_coords[ALL_STATE_AUX_DIM] += ar_names + expected_coords[SHOCK_DIM] += ["ar_innovation"] + expected_coords[SHOCK_AUX_DIM] += ["ar_innovation"] + + sm_params["sigma2.ar"] = sigma2 + for i, rho in enumerate(ar_params): + sm_init[f"ar.L{i+1}"] = 0 + sm_params[f"ar.L{i+1}"] = rho + + comp = st.AutoregressiveComponent(name="ar", order=autoregressive) + components.append(comp) + + if exog is not None: + names = [f"x{i + 1}" for i in range(exog.shape[1])] + betas = rng.normal(size=(exog.shape[1],)).astype(floatX) + params["beta_exog"] = betas + params["data_exog"] = exog + expected_param_dims["beta_exog"] += ("exog_state",) + expected_param_dims["data_exog"] += ("time", "exog_data") + + expected_coords["exog_state"] += tuple(names) + + for i, beta in enumerate(betas): + sm_params[f"beta.x{i + 1}"] = beta + sm_init[f"beta.x{i+1}"] = beta + comp = st.RegressionComponent(name="exog", state_names=names) + components.append(comp) + + st_mod = components.pop(0) + for comp in components: + st_mod += comp + return mod, st_mod, params, sm_params, sm_init, expected_param_dims, expected_coords + + +@pytest.mark.parametrize( + "level, trend, stochastic_level, stochastic_trend, irregular", + [ + (False, False, False, False, True), + (True, True, True, True, True), + (True, True, False, True, False), + ], +) +@pytest.mark.parametrize("autoregressive", [None, 3]) +@pytest.mark.parametrize("seasonal, stochastic_seasonal", [(None, False), (12, False), (12, True)]) +@pytest.mark.parametrize( + "freq_seasonal, stochastic_freq_seasonal", + [ + (None, None), + ([{"period": 12, "harmonics": 2}], [False]), + ([{"period": 12, "harmonics": 6}], [True]), + ], +) +@pytest.mark.parametrize( + "cycle, damped_cycle, stochastic_cycle", + [(False, False, False), (True, False, True), (True, True, True)], +) +@pytest.mark.filterwarnings("ignore::statsmodels.tools.sm_exceptions.ConvergenceWarning") +@pytest.mark.filterwarnings("ignore::statsmodels.tools.sm_exceptions.SpecificationWarning") +def test_structural_model_against_statsmodels( + level, + trend, + stochastic_level, + stochastic_trend, + irregular, + autoregressive, + seasonal, + stochastic_seasonal, + freq_seasonal, + stochastic_freq_seasonal, + cycle, + damped_cycle, + stochastic_cycle, + rng, +): + retvals = create_structural_model_and_equivalent_statsmodel( + rng, + level=level, + trend=trend, + seasonal=seasonal, + freq_seasonal=freq_seasonal, + cycle=cycle, + damped_cycle=damped_cycle, + autoregressive=autoregressive, + irregular=irregular, + stochastic_level=stochastic_level, + stochastic_trend=stochastic_trend, + stochastic_seasonal=stochastic_seasonal, + stochastic_freq_seasonal=stochastic_freq_seasonal, + stochastic_cycle=stochastic_cycle, + ) + f_sm_mod, mod, params, sm_params, sm_init, expected_dims, expected_coords = retvals + + data = rng.normal(size=(100,)).astype(floatX) + sm_mod = f_sm_mod(data) + + if len(sm_init) > 0: + init_array = np.concatenate( + [np.atleast_1d(sm_init[k]).ravel() for k in sm_mod.state_names if k != "dummy"] + ) + sm_mod.initialize_known(init_array, np.eye(sm_mod.k_states)) + else: + sm_mod.initialize_default() + + if len(sm_params) > 0: + param_array = np.concatenate( + [np.atleast_1d(sm_params[k]).ravel() for k in sm_mod.param_names] + ) + sm_mod.update(param_array, transformed=True) + + _assert_all_statespace_matrices_match(mod, params, sm_mod) + + built_model = mod.build(verbose=False, mode="FAST_RUN") + assert built_model.mode == "FAST_RUN" + + _assert_coord_shapes_match_matrices(built_model, params) + _assert_param_dims_correct(built_model.param_dims, expected_dims) + _assert_coords_correct(built_model.coords, expected_coords) + _assert_params_info_correct(built_model.param_info, built_model.coords, built_model.param_dims) + + +def test_level_trend_model(rng): + mod = st.LevelTrendComponent(order=2, innovations_order=0) + params = {"initial_trend": [0.0, 1.0]} + x, y = simulate_from_numpy_model(mod, rng, params) + + assert_allclose(np.diff(y), 1, atol=ATOL, rtol=RTOL) + + # Check coords + mod = mod.build(verbose=False) + _assert_basic_coords_correct(mod) + assert mod.coords["trend_state"] == ["level", "trend"] + + +def test_measurement_error(rng): + mod = st.MeasurementError("obs") + st.LevelTrendComponent(order=2) + mod = mod.build(verbose=False) + + _assert_basic_coords_correct(mod) + assert "sigma_obs" in mod.param_names + + +@pytest.mark.parametrize("order", [1, 2, [1, 0, 1]], ids=["AR1", "AR2", "AR(1,0,1)"]) +def test_autoregressive_model(order, rng): + ar = st.AutoregressiveComponent(order=order) + params = { + "ar_params": np.full((sum(ar.order),), 0.5, dtype=floatX), + "sigma_ar": 0.0, + } + + x, y = simulate_from_numpy_model(ar, rng, params, steps=100) + + # Check coords + ar.build(verbose=False) + _assert_basic_coords_correct(ar) + lags = np.arange(len(order) if isinstance(order, list) else order, dtype="int") + 1 + if isinstance(order, list): + lags = lags[np.flatnonzero(order)] + assert_allclose(ar.coords["ar_lag"], lags) + + +@pytest.mark.parametrize("s", [10, 25, 50]) +@pytest.mark.parametrize("innovations", [True, False]) +@pytest.mark.parametrize("remove_first_state", [True, False]) +@pytest.mark.filterwarnings( + "ignore:divide by zero encountered in matmul:RuntimeWarning", + "ignore:overflow encountered in matmul:RuntimeWarning", + "ignore:invalid value encountered in matmul:RuntimeWarning", +) +def test_time_seasonality(s, innovations, remove_first_state, rng): + def random_word(rng): + return "".join(rng.choice(list("abcdefghijklmnopqrstuvwxyz")) for _ in range(5)) + + state_names = [random_word(rng) for _ in range(s)] + mod = st.TimeSeasonality( + season_length=s, + innovations=innovations, + name="season", + state_names=state_names, + remove_first_state=remove_first_state, + ) + x0 = np.zeros(mod.k_states, dtype=floatX) + x0[0] = 1 + + params = {"season_coefs": x0} + if mod.innovations: + params["sigma_season"] = 0.0 + + x, y = simulate_from_numpy_model(mod, rng, params) + y = y.ravel() + if not innovations: + assert_pattern_repeats(y, s, atol=ATOL, rtol=RTOL) + + # Check coords + mod.build(verbose=False) + _assert_basic_coords_correct(mod) + test_slice = slice(1, None) if remove_first_state else slice(None) + assert mod.coords["season_state"] == state_names[test_slice] + + +def get_shift_factor(s): + s_str = str(s) + if "." not in s_str: + return 1 + _, decimal = s_str.split(".") + return 10 ** len(decimal) + + +@pytest.mark.parametrize("n", [*np.arange(1, 6, dtype="int").tolist(), None]) +@pytest.mark.parametrize("s", [5, 10, 25, 25.2]) +def test_frequency_seasonality(n, s, rng): + mod = st.FrequencySeasonality(season_length=s, n=n, name="season") + x0 = rng.normal(size=mod.n_coefs).astype(floatX) + params = {"season": x0, "sigma_season": 0.0} + k = get_shift_factor(s) + T = int(s * k) + + x, y = simulate_from_numpy_model(mod, rng, params, steps=2 * T) + assert_pattern_repeats(y, T, atol=ATOL, rtol=RTOL) + + # Check coords + mod.build(verbose=False) + _assert_basic_coords_correct(mod) + if n is None: + n = int(s // 2) + states = [f"season_{f}_{i}" for i in range(n) for f in ["Cos", "Sin"]] + + # Remove the last state when the model is completely saturated + if s / n == 2.0: + states.pop() + assert mod.coords["season_state"] == states + + +cycle_test_vals = zip([None, None, 3, 5, 10], [False, True, True, False, False]) + + +def test_cycle_component_deterministic(rng): + cycle = st.CycleComponent( + name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False + ) + params = {"cycle": np.array([1.0, 1.0], dtype=floatX)} + x, y = simulate_from_numpy_model(cycle, rng, params, steps=12 * 12) + + assert_pattern_repeats(y, 12, atol=ATOL, rtol=RTOL) + + +def test_cycle_component_with_dampening(rng): + cycle = st.CycleComponent( + name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False, dampen=True + ) + params = {"cycle": np.array([10.0, 10.0], dtype=floatX), "cycle_dampening_factor": 0.75} + x, y = simulate_from_numpy_model(cycle, rng, params, steps=100) + + # Check that the cycle dampens to zero over time + assert_allclose(y[-1], 0.0, atol=ATOL, rtol=RTOL) + + +def test_cycle_component_with_innovations_and_cycle_length(rng): + cycle = st.CycleComponent( + name="cycle", estimate_cycle_length=True, innovations=True, dampen=True + ) + params = { + "cycle": np.array([1.0, 1.0], dtype=floatX), + "cycle_length": 12.0, + "cycle_dampening_factor": 0.95, + "sigma_cycle": 1.0, + } + + x, y = simulate_from_numpy_model(cycle, rng, params) + + cycle.build(verbose=False) + _assert_basic_coords_correct(cycle) + + +def test_exogenous_component(rng): + data = rng.normal(size=(100, 2)).astype(floatX) + mod = st.RegressionComponent(state_names=["feature_1", "feature_2"], name="exog") + + params = {"beta_exog": np.array([1.0, 2.0], dtype=floatX)} + exog_data = {"data_exog": data} + x, y = simulate_from_numpy_model(mod, rng, params, exog_data) + + # Check that the generated data is just a linear regression + assert_allclose(y, data @ params["beta_exog"], atol=ATOL, rtol=RTOL) + + mod.build(verbose=False) + _assert_basic_coords_correct(mod) + assert mod.coords["exog_state"] == ["feature_1", "feature_2"] + + +def test_adding_exogenous_component(rng): + data = rng.normal(size=(100, 2)).astype(floatX) + reg = st.RegressionComponent(state_names=["a", "b"], name="exog") + ll = st.LevelTrendComponent(name="level") + + seasonal = st.FrequencySeasonality(name="annual", season_length=12, n=4) + mod = reg + ll + seasonal + + assert mod.ssm["design"].eval({"data_exog": data}).shape == (100, 1, 2 + 2 + 8) + assert_allclose(mod.ssm["design", 5, 0, :2].eval({"data_exog": data}), data[5]) + + +def test_add_components(): + ll = st.LevelTrendComponent(order=2) + se = st.TimeSeasonality(name="seasonal", season_length=12) + mod = ll + se + + ll_params = { + "initial_trend": np.zeros(2, dtype=floatX), + "sigma_trend": np.ones(2, dtype=floatX), + } + se_params = { + "seasonal_coefs": np.ones(11, dtype=floatX), + "sigma_seasonal": 1.0, + } + all_params = ll_params.copy() + all_params.update(se_params) + + (ll_x0, ll_P0, ll_c, ll_d, ll_T, ll_Z, ll_R, ll_H, ll_Q) = unpack_symbolic_matrices_with_params( + ll, ll_params + ) + (se_x0, se_P0, se_c, se_d, se_T, se_Z, se_R, se_H, se_Q) = unpack_symbolic_matrices_with_params( + se, se_params + ) + x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, all_params) + + for property in ["param_names", "shock_names", "param_info", "coords", "param_dims"]: + assert [x in getattr(mod, property) for x in getattr(ll, property)] + assert [x in getattr(mod, property) for x in getattr(se, property)] + + ll_mats = [ll_T, ll_R, ll_Q] + se_mats = [se_T, se_R, se_Q] + all_mats = [T, R, Q] + + for ll_mat, se_mat, all_mat in zip(ll_mats, se_mats, all_mats): + assert_allclose(all_mat, linalg.block_diag(ll_mat, se_mat), atol=ATOL, rtol=RTOL) + + ll_mats = [ll_x0, ll_c, ll_Z] + se_mats = [se_x0, se_c, se_Z] + all_mats = [x0, c, Z] + axes = [0, 0, 1] + + for ll_mat, se_mat, all_mat, axis in zip(ll_mats, se_mats, all_mats, axes): + assert_allclose(all_mat, np.concatenate([ll_mat, se_mat], axis=axis), atol=ATOL, rtol=RTOL) + + +def test_filter_scans_time_varying_design_matrix(rng): + time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) + data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) + + y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) + + reg = st.RegressionComponent(state_names=["a", "b"], name="exog") + mod = reg.build(verbose=False) + + with pm.Model(coords=mod.coords) as m: + data_exog = pm.Data("data_exog", data.values) + + x0 = pm.Normal("x0", dims=["state"]) + P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) + beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) + + mod.build_statespace_graph(y) + x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() + pm.Deterministic("Z", Z) + + prior = pm.sample_prior_predictive(draws=10) + + prior_Z = prior.prior.Z.values + assert prior_Z.shape == (1, 10, 100, 1, 2) + assert_allclose(prior_Z[0, :, :, 0, :], data.values[None].repeat(10, axis=0)) + + +@pytest.mark.skipif(floatX.endswith("32"), reason="Prior covariance not PSD at half-precision") +def test_extract_components_from_idata(rng): + time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) + data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) + + y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) + + ll = st.LevelTrendComponent() + season = st.FrequencySeasonality(name="seasonal", season_length=12, n=2, innovations=False) + reg = st.RegressionComponent(state_names=["a", "b"], name="exog") + me = st.MeasurementError("obs") + mod = (ll + season + reg + me).build(verbose=False) + + with pm.Model(coords=mod.coords) as m: + data_exog = pm.Data("data_exog", data.values) + + x0 = pm.Normal("x0", dims=["state"]) + P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) + beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) + initial_trend = pm.Normal("initial_trend", dims=["trend_state"]) + sigma_trend = pm.Exponential("sigma_trend", 1, dims=["trend_shock"]) + seasonal_coefs = pm.Normal("seasonal", dims=["seasonal_state"]) + sigma_obs = pm.Exponential("sigma_obs", 1) + + mod.build_statespace_graph(y) + + x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() + prior = pm.sample_prior_predictive(draws=10) + + filter_prior = mod.sample_conditional_prior(prior) + comp_prior = mod.extract_components_from_idata(filter_prior) + comp_states = comp_prior.filtered_prior.coords["state"].values + expected_states = ["LevelTrend[level]", "LevelTrend[trend]", "seasonal", "exog[a]", "exog[b]"] + missing = set(comp_states) - set(expected_states) + + assert len(missing) == 0, missing From c0a4a47effa235598deca04966a24c17155389c9 Mon Sep 17 00:00:00 2001 From: Jonathan Dekermanjian Date: Sat, 5 Jul 2025 08:43:56 -0600 Subject: [PATCH 11/21] 1. removed incorrectly comitted file test_structural.py 2. replaced scipy block diag with pytensor block diag 3. Added forecast to test model in multivariate ssm notebook --- notebooks/multivariate_ssm.ipynb | 847 +++++++++++++++--- .../structural/components/regression.py | 3 +- tests/statespace/models/test_structural.py | 840 ----------------- 3 files changed, 735 insertions(+), 955 deletions(-) delete mode 100644 tests/statespace/models/test_structural.py diff --git a/notebooks/multivariate_ssm.ipynb b/notebooks/multivariate_ssm.ipynb index 83cc74e2e..dc86c5879 100644 --- a/notebooks/multivariate_ssm.ipynb +++ b/notebooks/multivariate_ssm.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 16, + "execution_count": 1, "id": "a5b7dcb3", "metadata": {}, "outputs": [], @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 3, "id": "a96a731b", "metadata": {}, "outputs": [], @@ -110,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 4, "id": "a4130131", "metadata": {}, "outputs": [], @@ -135,13 +135,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "5e9acbb8", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -177,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 6, "id": "d51ff06e", "metadata": {}, "outputs": [ @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 7, "id": "eec30de3", "metadata": {}, "outputs": [ @@ -273,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 8, "id": "05830b2b", "metadata": {}, "outputs": [ @@ -387,7 +387,7 @@ " Finished Chains:\n", " 4\n", "

\n", - "

Sampling for 42 seconds

\n", + "

Sampling for 43 seconds

\n", "

\n", " Estimated Time to Completion:\n", " now\n", @@ -418,7 +418,7 @@ " \n", " \n", " 2000\n", - " 10\n", + " 4\n", " 0.52\n", " 7\n", " \n", @@ -431,8 +431,8 @@ " \n", " \n", " 2000\n", - " 10\n", - " 0.52\n", + " 5\n", + " 0.51\n", " 7\n", " \n", " \n", @@ -444,8 +444,8 @@ " \n", " \n", " 2000\n", - " 87\n", - " 0.53\n", + " 6\n", + " 0.55\n", " 7\n", " \n", " \n", @@ -457,8 +457,8 @@ " \n", " \n", " 2000\n", - " 4\n", - " 0.52\n", + " 13\n", + " 0.53\n", " 7\n", " \n", " \n", @@ -468,7 +468,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -484,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 9, "id": "466fb92a", "metadata": {}, "outputs": [ @@ -524,110 +524,110 @@ " \n", " beta_exog[y1, x1]\n", " 2.947\n", - " 0.729\n", - " 1.433\n", - " 4.278\n", - " 0.016\n", - " 0.014\n", - " 2232.0\n", - " 2077.0\n", - " 1.00\n", + " 0.708\n", + " 1.603\n", + " 4.324\n", + " 0.012\n", + " 0.015\n", + " 3842.0\n", + " 2349.0\n", + " 1.0\n", " \n", " \n", " beta_exog[y1, x2]\n", - " -0.991\n", - " 0.681\n", - " -2.270\n", - " 0.314\n", + " -0.984\n", + " 0.711\n", + " -2.443\n", + " 0.335\n", " 0.012\n", - " 0.014\n", - " 3102.0\n", - " 2334.0\n", - " 1.00\n", + " 0.015\n", + " 3976.0\n", + " 2520.0\n", + " 1.0\n", " \n", " \n", " beta_exog[y2, x1]\n", - " 3.007\n", - " 0.713\n", - " 1.637\n", - " 4.356\n", - " 0.015\n", - " 0.015\n", - " 2152.0\n", - " 2107.0\n", - " 1.00\n", + " 2.984\n", + " 0.732\n", + " 1.580\n", + " 4.441\n", + " 0.014\n", + " 0.016\n", + " 2873.0\n", + " 1903.0\n", + " 1.0\n", " \n", " \n", " beta_exog[y2, x2]\n", - " -1.047\n", - " 0.714\n", - " -2.471\n", - " 0.408\n", - " 0.014\n", - " 0.019\n", - " 3007.0\n", - " 1644.0\n", - " 1.00\n", + " -1.040\n", + " 0.719\n", + " -2.241\n", + " 0.519\n", + " 0.013\n", + " 0.016\n", + " 3148.0\n", + " 2390.0\n", + " 1.0\n", " \n", " \n", " beta_exog[y3, x1]\n", - " 2.800\n", - " 0.712\n", - " 1.438\n", - " 4.165\n", + " 2.776\n", + " 0.711\n", + " 1.423\n", + " 4.204\n", " 0.013\n", - " 0.019\n", - " 3473.0\n", - " 2458.0\n", - " 1.00\n", + " 0.015\n", + " 3406.0\n", + " 2475.0\n", + " 1.0\n", " \n", " \n", " beta_exog[y3, x2]\n", - " -0.934\n", - " 0.675\n", - " -2.184\n", - " 0.426\n", + " -0.946\n", + " 0.691\n", + " -2.254\n", + " 0.386\n", " 0.013\n", - " 0.014\n", - " 3017.0\n", - " 2326.0\n", - " 1.00\n", + " 0.017\n", + " 3252.0\n", + " 2249.0\n", + " 1.0\n", " \n", " \n", " sigma_trend[y1, level]\n", - " 0.757\n", - " 0.057\n", - " 0.657\n", - " 0.871\n", - " 0.002\n", - " 0.002\n", - " 878.0\n", - " 367.0\n", - " 1.01\n", + " 0.755\n", + " 0.055\n", + " 0.654\n", + " 0.858\n", + " 0.001\n", + " 0.001\n", + " 6764.0\n", + " 3005.0\n", + " 1.0\n", " \n", " \n", " sigma_trend[y2, level]\n", - " 0.927\n", - " 0.068\n", - " 0.810\n", - " 1.062\n", - " 0.002\n", + " 0.926\n", + " 0.067\n", + " 0.799\n", + " 1.051\n", + " 0.001\n", " 0.001\n", - " 1846.0\n", - " 1633.0\n", - " 1.00\n", + " 6923.0\n", + " 2801.0\n", + " 1.0\n", " \n", " \n", " sigma_trend[y3, level]\n", - " 0.847\n", - " 0.062\n", - " 0.733\n", - " 0.961\n", + " 0.848\n", + " 0.063\n", + " 0.738\n", + " 0.975\n", " 0.001\n", " 0.001\n", - " 2009.0\n", - " 2855.0\n", - " 1.00\n", + " 6697.0\n", + " 2873.0\n", + " 1.0\n", " \n", " \n", "\n", @@ -635,29 +635,29 @@ ], "text/plain": [ " mean sd hdi_3% hdi_97% mcse_mean mcse_sd \\\n", - "beta_exog[y1, x1] 2.947 0.729 1.433 4.278 0.016 0.014 \n", - "beta_exog[y1, x2] -0.991 0.681 -2.270 0.314 0.012 0.014 \n", - "beta_exog[y2, x1] 3.007 0.713 1.637 4.356 0.015 0.015 \n", - "beta_exog[y2, x2] -1.047 0.714 -2.471 0.408 0.014 0.019 \n", - "beta_exog[y3, x1] 2.800 0.712 1.438 4.165 0.013 0.019 \n", - "beta_exog[y3, x2] -0.934 0.675 -2.184 0.426 0.013 0.014 \n", - "sigma_trend[y1, level] 0.757 0.057 0.657 0.871 0.002 0.002 \n", - "sigma_trend[y2, level] 0.927 0.068 0.810 1.062 0.002 0.001 \n", - "sigma_trend[y3, level] 0.847 0.062 0.733 0.961 0.001 0.001 \n", + "beta_exog[y1, x1] 2.947 0.708 1.603 4.324 0.012 0.015 \n", + "beta_exog[y1, x2] -0.984 0.711 -2.443 0.335 0.012 0.015 \n", + "beta_exog[y2, x1] 2.984 0.732 1.580 4.441 0.014 0.016 \n", + "beta_exog[y2, x2] -1.040 0.719 -2.241 0.519 0.013 0.016 \n", + "beta_exog[y3, x1] 2.776 0.711 1.423 4.204 0.013 0.015 \n", + "beta_exog[y3, x2] -0.946 0.691 -2.254 0.386 0.013 0.017 \n", + "sigma_trend[y1, level] 0.755 0.055 0.654 0.858 0.001 0.001 \n", + "sigma_trend[y2, level] 0.926 0.067 0.799 1.051 0.001 0.001 \n", + "sigma_trend[y3, level] 0.848 0.063 0.738 0.975 0.001 0.001 \n", "\n", " ess_bulk ess_tail r_hat \n", - "beta_exog[y1, x1] 2232.0 2077.0 1.00 \n", - "beta_exog[y1, x2] 3102.0 2334.0 1.00 \n", - "beta_exog[y2, x1] 2152.0 2107.0 1.00 \n", - "beta_exog[y2, x2] 3007.0 1644.0 1.00 \n", - "beta_exog[y3, x1] 3473.0 2458.0 1.00 \n", - "beta_exog[y3, x2] 3017.0 2326.0 1.00 \n", - "sigma_trend[y1, level] 878.0 367.0 1.01 \n", - "sigma_trend[y2, level] 1846.0 1633.0 1.00 \n", - "sigma_trend[y3, level] 2009.0 2855.0 1.00 " + "beta_exog[y1, x1] 3842.0 2349.0 1.0 \n", + "beta_exog[y1, x2] 3976.0 2520.0 1.0 \n", + "beta_exog[y2, x1] 2873.0 1903.0 1.0 \n", + "beta_exog[y2, x2] 3148.0 2390.0 1.0 \n", + "beta_exog[y3, x1] 3406.0 2475.0 1.0 \n", + "beta_exog[y3, x2] 3252.0 2249.0 1.0 \n", + "sigma_trend[y1, level] 6764.0 3005.0 1.0 \n", + "sigma_trend[y2, level] 6923.0 2801.0 1.0 \n", + "sigma_trend[y3, level] 6697.0 2873.0 1.0 " ] }, - "execution_count": 107, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -668,13 +668,13 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 10, "id": "3684616b", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "

" ] @@ -696,6 +696,627 @@ ");" ] }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7dc5d11b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No start date provided. Using the last date in the data index. To silence this warning, explicitly pass a start date or set verbose = False\n", + "/Users/dekermanjian/Desktop/Open_Source_Contributions/pymc-extras/pymc_extras/statespace/utils/data_tools.py:74: UserWarning: No time index found on the supplied data. A simple range index will be automatically generated.\n", + " warnings.warn(NO_TIME_INDEX_WARNING)\n", + "/opt/miniconda3/envs/pymc-extras-test/lib/python3.12/site-packages/pytensor/link/jax/linker.py:32: UserWarning: The RandomType SharedVariables [RNG()] will not be used in the compiled JAX graph. Instead a copy will be used.\n", + " warnings.warn(\n", + "Sampling: [forecast_combined]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fe28f382d9e04444ac804b184abdd524", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 4MB\n",
+       "Dimensions:            (chain: 4, draw: 1000, time: 10, state: 9,\n",
+       "                        observed_state: 3)\n",
+       "Coordinates:\n",
+       "  * chain              (chain) int64 32B 0 1 2 3\n",
+       "  * draw               (draw) int64 8kB 0 1 2 3 4 5 ... 994 995 996 997 998 999\n",
+       "  * time               (time) int64 80B 100 101 102 103 104 105 106 107 108 109\n",
+       "  * state              (state) <U9 324B 'level[y1]' 'level[y2]' ... 'x2[y3]'\n",
+       "  * observed_state     (observed_state) <U2 24B 'y1' 'y2' 'y3'\n",
+       "Data variables:\n",
+       "    forecast_latent    (chain, draw, time, state) float64 3MB 2.679 ... -1.022\n",
+       "    forecast_observed  (chain, draw, time, observed_state) float64 960kB 1.81...\n",
+       "Attributes:\n",
+       "    created_at:                 2025-07-05T14:42:22.808692+00:00\n",
+       "    arviz_version:              0.21.0\n",
+       "    inference_library:          pymc\n",
+       "    inference_library_version:  5.23.0
" + ], + "text/plain": [ + " Size: 4MB\n", + "Dimensions: (chain: 4, draw: 1000, time: 10, state: 9,\n", + " observed_state: 3)\n", + "Coordinates:\n", + " * chain (chain) int64 32B 0 1 2 3\n", + " * draw (draw) int64 8kB 0 1 2 3 4 5 ... 994 995 996 997 998 999\n", + " * time (time) int64 80B 100 101 102 103 104 105 106 107 108 109\n", + " * state (state) None: self.ssm["initial_state", :] = betas.reshape((1, -1)).squeeze() T = np.eye(k_states) - self.ssm["transition", :, :] = linalg.block_diag(*[T for _ in range(k_endog)]) + self.ssm["transition", :, :] = pt.linalg.block_diag(*[T for _ in range(k_endog)]) self.ssm["selection", :, :] = np.eye(self.k_states) Z = pt.linalg.block_diag(*[pt.expand_dims(regression_data, 1) for _ in range(k_endog)]) self.ssm["design"] = pt.specify_shape( diff --git a/tests/statespace/models/test_structural.py b/tests/statespace/models/test_structural.py deleted file mode 100644 index 1662e164a..000000000 --- a/tests/statespace/models/test_structural.py +++ /dev/null @@ -1,840 +0,0 @@ -import functools as ft -import warnings - -from collections import defaultdict - -import numpy as np -import pandas as pd -import pymc as pm -import pytensor -import pytensor.tensor as pt -import pytest -import statsmodels.api as sm - -from numpy.testing import assert_allclose -from scipy import linalg - -from pymc_extras.statespace import structural as st -from pymc_extras.statespace.utils.constants import ( - ALL_STATE_AUX_DIM, - ALL_STATE_DIM, - AR_PARAM_DIM, - OBS_STATE_AUX_DIM, - OBS_STATE_DIM, - SHOCK_AUX_DIM, - SHOCK_DIM, - SHORT_NAME_TO_LONG, -) -from tests.statespace.shared_fixtures import ( # pylint: disable=unused-import - rng, -) -from tests.statespace.test_utilities import ( - assert_pattern_repeats, - simulate_from_numpy_model, - unpack_symbolic_matrices_with_params, -) - -floatX = pytensor.config.floatX -ATOL = 1e-8 if floatX.endswith("64") else 1e-4 -RTOL = 0 if floatX.endswith("64") else 1e-6 - - -def _assert_all_statespace_matrices_match(mod, params, sm_mod): - x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, params) - - sm_x0, sm_H0, sm_P0 = sm_mod.initialization() - - if len(x0) > 0: - assert_allclose(x0, sm_x0) - - for name, matrix in zip(["T", "R", "Z", "Q"], [T, R, Z, Q]): - long_name = SHORT_NAME_TO_LONG[name] - if np.any([x == 0 for x in matrix.shape]): - continue - assert_allclose( - sm_mod.ssm[long_name], - matrix, - err_msg=f"matrix {name} does not match statsmodels", - atol=ATOL, - rtol=RTOL, - ) - - -def _assert_coord_shapes_match_matrices(mod, params): - if "initial_state_cov" not in params: - params["initial_state_cov"] = np.eye(mod.k_states) - - x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, params) - - n_states = len(mod.coords[ALL_STATE_DIM]) - - # There will always be one shock dimension -- dummies are inserted into fully deterministic models to avoid errors - # in the state space representation. - n_shocks = max(1, len(mod.coords[SHOCK_DIM])) - n_obs = len(mod.coords[OBS_STATE_DIM]) - - assert x0.shape[-1:] == ( - n_states, - ), f"x0 expected to have shape (n_states, ), found {x0.shape[-1:]}" - assert P0.shape[-2:] == ( - n_states, - n_states, - ), f"P0 expected to have shape (n_states, n_states), found {P0.shape[-2:]}" - assert c.shape[-1:] == ( - n_states, - ), f"c expected to have shape (n_states, ), found {c.shape[-1:]}" - assert d.shape[-1:] == (n_obs,), f"d expected to have shape (n_obs, ), found {d.shape[-1:]}" - assert T.shape[-2:] == ( - n_states, - n_states, - ), f"T expected to have shape (n_states, n_states), found {T.shape[-2:]}" - assert Z.shape[-2:] == ( - n_obs, - n_states, - ), f"Z expected to have shape (n_obs, n_states), found {Z.shape[-2:]}" - assert R.shape[-2:] == ( - n_states, - n_shocks, - ), f"R expected to have shape (n_states, n_shocks), found {R.shape[-2:]}" - assert H.shape[-2:] == ( - n_obs, - n_obs, - ), f"H expected to have shape (n_obs, n_obs), found {H.shape[-2:]}" - assert Q.shape[-2:] == ( - n_shocks, - n_shocks, - ), f"Q expected to have shape (n_shocks, n_shocks), found {Q.shape[-2:]}" - - -def _assert_basic_coords_correct(mod): - assert mod.coords[ALL_STATE_DIM] == mod.state_names - assert mod.coords[ALL_STATE_AUX_DIM] == mod.state_names - assert mod.coords[SHOCK_DIM] == mod.shock_names - assert mod.coords[SHOCK_AUX_DIM] == mod.shock_names - assert mod.coords[OBS_STATE_DIM] == ["data"] - assert mod.coords[OBS_STATE_AUX_DIM] == ["data"] - - -def _assert_keys_match(test_dict, expected_dict): - expected_keys = list(expected_dict.keys()) - param_keys = list(test_dict.keys()) - key_diff = set(expected_keys) - set(param_keys) - assert len(key_diff) == 0, f'{", ".join(key_diff)} were not found in the test_dict keys.' - - key_diff = set(param_keys) - set(expected_keys) - assert ( - len(key_diff) == 0 - ), f'{", ".join(key_diff)} were keys of the tests_dict not in expected_dict.' - - -def _assert_param_dims_correct(param_dims, expected_dims): - if len(expected_dims) == 0 and len(param_dims) == 0: - return - - _assert_keys_match(param_dims, expected_dims) - for param, dims in expected_dims.items(): - assert dims == param_dims[param], f"dims for parameter {param} do not match" - - -def _assert_coords_correct(coords, expected_coords): - if len(coords) == 0 and len(expected_coords) == 0: - return - - _assert_keys_match(coords, expected_coords) - for dim, labels in expected_coords.items(): - assert labels == coords[dim], f"labels on dimension {dim} do not match" - - -def _assert_params_info_correct(param_info, coords, param_dims): - for param in param_info.keys(): - info = param_info[param] - - dims = info["dims"] - labels = [coords[dim] for dim in dims] if dims is not None else None - if labels is not None: - assert param in param_dims.keys() - inferred_dims = param_dims[param] - else: - inferred_dims = None - - shape = tuple(len(label) for label in labels) if labels is not None else () - - assert info["shape"] == shape - assert dims == inferred_dims - - -def create_structural_model_and_equivalent_statsmodel( - rng, - level: bool | None = False, - trend: bool | None = False, - seasonal: int | None = None, - freq_seasonal: list[dict] | None = None, - cycle: bool = False, - autoregressive: int | None = None, - exog: np.ndarray | None = None, - irregular: bool | None = False, - stochastic_level: bool | None = True, - stochastic_trend: bool | None = False, - stochastic_seasonal: bool | None = True, - stochastic_freq_seasonal: list[bool] | None = None, - stochastic_cycle: bool | None = False, - damped_cycle: bool | None = False, -): - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - mod = ft.partial( - sm.tsa.UnobservedComponents, - level=level, - trend=trend, - seasonal=seasonal, - freq_seasonal=freq_seasonal, - cycle=cycle, - autoregressive=autoregressive, - exog=exog, - irregular=irregular, - stochastic_level=stochastic_level, - stochastic_trend=stochastic_trend, - stochastic_seasonal=stochastic_seasonal, - stochastic_freq_seasonal=stochastic_freq_seasonal, - stochastic_cycle=stochastic_cycle, - damped_cycle=damped_cycle, - mle_regression=False, - ) - - params = {} - sm_params = {} - sm_init = {} - expected_param_dims = defaultdict(tuple) - expected_coords = defaultdict(list) - expected_param_dims["P0"] += ("state", "state_aux") - - default_states = [ - ALL_STATE_DIM, - ALL_STATE_AUX_DIM, - OBS_STATE_DIM, - OBS_STATE_AUX_DIM, - SHOCK_DIM, - SHOCK_AUX_DIM, - ] - default_values = [[], [], ["data"], ["data"], [], []] - for dim, value in zip(default_states, default_values): - expected_coords[dim] += value - - components = [] - - if irregular: - sigma2 = np.abs(rng.normal()).astype(floatX).item() - params["sigma_irregular"] = np.sqrt(sigma2) - sm_params["sigma2.irregular"] = sigma2 - - comp = st.MeasurementError("irregular") - components.append(comp) - - level_trend_order = [0, 0] - level_trend_innov_order = [0, 0] - - if level: - level_trend_order[0] = 1 - expected_coords["trend_state"] += [ - "level", - ] - expected_coords[ALL_STATE_DIM] += [ - "level", - ] - expected_coords[ALL_STATE_AUX_DIM] += [ - "level", - ] - if stochastic_level: - level_trend_innov_order[0] = 1 - expected_coords["trend_shock"] += ["level"] - expected_coords[SHOCK_DIM] += [ - "level", - ] - expected_coords[SHOCK_AUX_DIM] += [ - "level", - ] - - if trend: - level_trend_order[1] = 1 - expected_coords["trend_state"] += [ - "trend", - ] - expected_coords[ALL_STATE_DIM] += [ - "trend", - ] - expected_coords[ALL_STATE_AUX_DIM] += [ - "trend", - ] - - if stochastic_trend: - level_trend_innov_order[1] = 1 - expected_coords["trend_shock"] += ["trend"] - expected_coords[SHOCK_DIM] += ["trend"] - expected_coords[SHOCK_AUX_DIM] += ["trend"] - - if level or trend: - expected_param_dims["initial_trend"] += ("trend_state",) - level_value = np.where( - level_trend_order, - rng.normal( - size=2, - ).astype(floatX), - np.zeros(2, dtype=floatX), - ) - sigma_level_value2 = np.abs(rng.normal(size=(2,)))[ - np.array(level_trend_innov_order, dtype="bool") - ] - max_order = np.flatnonzero(level_value)[-1].item() + 1 - level_trend_order = level_trend_order[:max_order] - - params["initial_trend"] = level_value[:max_order] - sm_init["level"] = level_value[0] - sm_init["trend"] = level_value[1] - - if sum(level_trend_innov_order) > 0: - expected_param_dims["sigma_trend"] += ("trend_shock",) - params["sigma_trend"] = np.sqrt(sigma_level_value2) - - sigma_level_value = sigma_level_value2.tolist() - if stochastic_level: - sigma = sigma_level_value.pop(0) - sm_params["sigma2.level"] = sigma - if stochastic_trend: - sigma = sigma_level_value.pop(0) - sm_params["sigma2.trend"] = sigma - - comp = st.LevelTrendComponent( - name="level", order=level_trend_order, innovations_order=level_trend_innov_order - ) - components.append(comp) - - if seasonal is not None: - state_names = [f"seasonal_{i}" for i in range(seasonal)][1:] - seasonal_coefs = rng.normal(size=(seasonal - 1,)).astype(floatX) - params["seasonal_coefs"] = seasonal_coefs - expected_param_dims["seasonal_coefs"] += ("seasonal_state",) - - expected_coords["seasonal_state"] += tuple(state_names) - expected_coords[ALL_STATE_DIM] += state_names - expected_coords[ALL_STATE_AUX_DIM] += state_names - - seasonal_dict = { - "seasonal" if i == 0 else f"seasonal.L{i}": c for i, c in enumerate(seasonal_coefs) - } - sm_init.update(seasonal_dict) - - if stochastic_seasonal: - sigma2 = np.abs(rng.normal()).astype(floatX) - params["sigma_seasonal"] = np.sqrt(sigma2) - sm_params["sigma2.seasonal"] = sigma2 - expected_coords[SHOCK_DIM] += [ - "seasonal", - ] - expected_coords[SHOCK_AUX_DIM] += [ - "seasonal", - ] - - comp = st.TimeSeasonality( - name="seasonal", season_length=seasonal, innovations=stochastic_seasonal - ) - components.append(comp) - - if freq_seasonal is not None: - state_count = 0 - for d, has_innov in zip(freq_seasonal, stochastic_freq_seasonal): - n = d["harmonics"] - s = d["period"] - last_state_not_identified = (s / n) == 2.0 - n_states = 2 * n - int(last_state_not_identified) - state_names = [f"seasonal_{s}_{f}_{i}" for i in range(n) for f in ["Cos", "Sin"]] - - seasonal_params = rng.normal(size=n_states).astype(floatX) - - params[f"seasonal_{s}"] = seasonal_params - expected_param_dims[f"seasonal_{s}"] += (f"seasonal_{s}_state",) - expected_coords[ALL_STATE_DIM] += state_names - expected_coords[ALL_STATE_AUX_DIM] += state_names - expected_coords[f"seasonal_{s}_state"] += ( - tuple(state_names[:-1]) if last_state_not_identified else tuple(state_names) - ) - - for param in seasonal_params: - sm_init[f"freq_seasonal.{state_count}"] = param - state_count += 1 - if last_state_not_identified: - sm_init[f"freq_seasonal.{state_count}"] = 0.0 - state_count += 1 - - if has_innov: - sigma2 = np.abs(rng.normal()).astype(floatX) - params[f"sigma_seasonal_{s}"] = np.sqrt(sigma2) - sm_params[f"sigma2.freq_seasonal_{s}({n})"] = sigma2 - expected_coords[SHOCK_DIM] += state_names - expected_coords[SHOCK_AUX_DIM] += state_names - - comp = st.FrequencySeasonality( - name=f"seasonal_{s}", season_length=s, n=n, innovations=has_innov - ) - components.append(comp) - - if cycle: - cycle_length = np.random.choice(np.arange(2, 12)).astype(floatX) - - # Statsmodels takes the frequency not the cycle length, so convert it. - sm_params["frequency.cycle"] = 2.0 * np.pi / cycle_length - params["cycle_length"] = cycle_length - - init_cycle = rng.normal(size=(2,)).astype(floatX) - params["cycle"] = init_cycle - expected_param_dims["cycle"] += ("cycle_state",) - - state_names = ["cycle_Cos", "cycle_Sin"] - expected_coords["cycle_state"] += state_names - expected_coords[ALL_STATE_DIM] += state_names - expected_coords[ALL_STATE_AUX_DIM] += state_names - - sm_init["cycle"] = init_cycle[0] - sm_init["cycle.auxilliary"] = init_cycle[1] - - if stochastic_cycle: - sigma2 = np.abs(rng.normal()).astype(floatX) - params["sigma_cycle"] = np.sqrt(sigma2) - expected_coords[SHOCK_DIM] += state_names - expected_coords[SHOCK_AUX_DIM] += state_names - - sm_params["sigma2.cycle"] = sigma2 - - if damped_cycle: - rho = rng.beta(1, 1) - params["cycle_dampening_factor"] = rho - sm_params["damping.cycle"] = rho - - comp = st.CycleComponent( - name="cycle", - dampen=damped_cycle, - innovations=stochastic_cycle, - estimate_cycle_length=True, - ) - - components.append(comp) - - if autoregressive is not None: - ar_names = [f"L{i+1}.data" for i in range(autoregressive)] - ar_params = rng.normal(size=(autoregressive,)).astype(floatX) - if autoregressive == 1: - ar_params = ar_params.item() - sigma2 = np.abs(rng.normal()).astype(floatX) - - params["ar_params"] = ar_params - params["sigma_ar"] = np.sqrt(sigma2) - expected_param_dims["ar_params"] += (AR_PARAM_DIM,) - expected_coords[AR_PARAM_DIM] += tuple(list(range(1, autoregressive + 1))) - expected_coords[ALL_STATE_DIM] += ar_names - expected_coords[ALL_STATE_AUX_DIM] += ar_names - expected_coords[SHOCK_DIM] += ["ar_innovation"] - expected_coords[SHOCK_AUX_DIM] += ["ar_innovation"] - - sm_params["sigma2.ar"] = sigma2 - for i, rho in enumerate(ar_params): - sm_init[f"ar.L{i+1}"] = 0 - sm_params[f"ar.L{i+1}"] = rho - - comp = st.AutoregressiveComponent(name="ar", order=autoregressive) - components.append(comp) - - if exog is not None: - names = [f"x{i + 1}" for i in range(exog.shape[1])] - betas = rng.normal(size=(exog.shape[1],)).astype(floatX) - params["beta_exog"] = betas - params["data_exog"] = exog - expected_param_dims["beta_exog"] += ("exog_state",) - expected_param_dims["data_exog"] += ("time", "exog_data") - - expected_coords["exog_state"] += tuple(names) - - for i, beta in enumerate(betas): - sm_params[f"beta.x{i + 1}"] = beta - sm_init[f"beta.x{i+1}"] = beta - comp = st.RegressionComponent(name="exog", state_names=names) - components.append(comp) - - st_mod = components.pop(0) - for comp in components: - st_mod += comp - return mod, st_mod, params, sm_params, sm_init, expected_param_dims, expected_coords - - -@pytest.mark.parametrize( - "level, trend, stochastic_level, stochastic_trend, irregular", - [ - (False, False, False, False, True), - (True, True, True, True, True), - (True, True, False, True, False), - ], -) -@pytest.mark.parametrize("autoregressive", [None, 3]) -@pytest.mark.parametrize("seasonal, stochastic_seasonal", [(None, False), (12, False), (12, True)]) -@pytest.mark.parametrize( - "freq_seasonal, stochastic_freq_seasonal", - [ - (None, None), - ([{"period": 12, "harmonics": 2}], [False]), - ([{"period": 12, "harmonics": 6}], [True]), - ], -) -@pytest.mark.parametrize( - "cycle, damped_cycle, stochastic_cycle", - [(False, False, False), (True, False, True), (True, True, True)], -) -@pytest.mark.filterwarnings("ignore::statsmodels.tools.sm_exceptions.ConvergenceWarning") -@pytest.mark.filterwarnings("ignore::statsmodels.tools.sm_exceptions.SpecificationWarning") -def test_structural_model_against_statsmodels( - level, - trend, - stochastic_level, - stochastic_trend, - irregular, - autoregressive, - seasonal, - stochastic_seasonal, - freq_seasonal, - stochastic_freq_seasonal, - cycle, - damped_cycle, - stochastic_cycle, - rng, -): - retvals = create_structural_model_and_equivalent_statsmodel( - rng, - level=level, - trend=trend, - seasonal=seasonal, - freq_seasonal=freq_seasonal, - cycle=cycle, - damped_cycle=damped_cycle, - autoregressive=autoregressive, - irregular=irregular, - stochastic_level=stochastic_level, - stochastic_trend=stochastic_trend, - stochastic_seasonal=stochastic_seasonal, - stochastic_freq_seasonal=stochastic_freq_seasonal, - stochastic_cycle=stochastic_cycle, - ) - f_sm_mod, mod, params, sm_params, sm_init, expected_dims, expected_coords = retvals - - data = rng.normal(size=(100,)).astype(floatX) - sm_mod = f_sm_mod(data) - - if len(sm_init) > 0: - init_array = np.concatenate( - [np.atleast_1d(sm_init[k]).ravel() for k in sm_mod.state_names if k != "dummy"] - ) - sm_mod.initialize_known(init_array, np.eye(sm_mod.k_states)) - else: - sm_mod.initialize_default() - - if len(sm_params) > 0: - param_array = np.concatenate( - [np.atleast_1d(sm_params[k]).ravel() for k in sm_mod.param_names] - ) - sm_mod.update(param_array, transformed=True) - - _assert_all_statespace_matrices_match(mod, params, sm_mod) - - built_model = mod.build(verbose=False, mode="FAST_RUN") - assert built_model.mode == "FAST_RUN" - - _assert_coord_shapes_match_matrices(built_model, params) - _assert_param_dims_correct(built_model.param_dims, expected_dims) - _assert_coords_correct(built_model.coords, expected_coords) - _assert_params_info_correct(built_model.param_info, built_model.coords, built_model.param_dims) - - -def test_level_trend_model(rng): - mod = st.LevelTrendComponent(order=2, innovations_order=0) - params = {"initial_trend": [0.0, 1.0]} - x, y = simulate_from_numpy_model(mod, rng, params) - - assert_allclose(np.diff(y), 1, atol=ATOL, rtol=RTOL) - - # Check coords - mod = mod.build(verbose=False) - _assert_basic_coords_correct(mod) - assert mod.coords["trend_state"] == ["level", "trend"] - - -def test_measurement_error(rng): - mod = st.MeasurementError("obs") + st.LevelTrendComponent(order=2) - mod = mod.build(verbose=False) - - _assert_basic_coords_correct(mod) - assert "sigma_obs" in mod.param_names - - -@pytest.mark.parametrize("order", [1, 2, [1, 0, 1]], ids=["AR1", "AR2", "AR(1,0,1)"]) -def test_autoregressive_model(order, rng): - ar = st.AutoregressiveComponent(order=order) - params = { - "ar_params": np.full((sum(ar.order),), 0.5, dtype=floatX), - "sigma_ar": 0.0, - } - - x, y = simulate_from_numpy_model(ar, rng, params, steps=100) - - # Check coords - ar.build(verbose=False) - _assert_basic_coords_correct(ar) - lags = np.arange(len(order) if isinstance(order, list) else order, dtype="int") + 1 - if isinstance(order, list): - lags = lags[np.flatnonzero(order)] - assert_allclose(ar.coords["ar_lag"], lags) - - -@pytest.mark.parametrize("s", [10, 25, 50]) -@pytest.mark.parametrize("innovations", [True, False]) -@pytest.mark.parametrize("remove_first_state", [True, False]) -@pytest.mark.filterwarnings( - "ignore:divide by zero encountered in matmul:RuntimeWarning", - "ignore:overflow encountered in matmul:RuntimeWarning", - "ignore:invalid value encountered in matmul:RuntimeWarning", -) -def test_time_seasonality(s, innovations, remove_first_state, rng): - def random_word(rng): - return "".join(rng.choice(list("abcdefghijklmnopqrstuvwxyz")) for _ in range(5)) - - state_names = [random_word(rng) for _ in range(s)] - mod = st.TimeSeasonality( - season_length=s, - innovations=innovations, - name="season", - state_names=state_names, - remove_first_state=remove_first_state, - ) - x0 = np.zeros(mod.k_states, dtype=floatX) - x0[0] = 1 - - params = {"season_coefs": x0} - if mod.innovations: - params["sigma_season"] = 0.0 - - x, y = simulate_from_numpy_model(mod, rng, params) - y = y.ravel() - if not innovations: - assert_pattern_repeats(y, s, atol=ATOL, rtol=RTOL) - - # Check coords - mod.build(verbose=False) - _assert_basic_coords_correct(mod) - test_slice = slice(1, None) if remove_first_state else slice(None) - assert mod.coords["season_state"] == state_names[test_slice] - - -def get_shift_factor(s): - s_str = str(s) - if "." not in s_str: - return 1 - _, decimal = s_str.split(".") - return 10 ** len(decimal) - - -@pytest.mark.parametrize("n", [*np.arange(1, 6, dtype="int").tolist(), None]) -@pytest.mark.parametrize("s", [5, 10, 25, 25.2]) -def test_frequency_seasonality(n, s, rng): - mod = st.FrequencySeasonality(season_length=s, n=n, name="season") - x0 = rng.normal(size=mod.n_coefs).astype(floatX) - params = {"season": x0, "sigma_season": 0.0} - k = get_shift_factor(s) - T = int(s * k) - - x, y = simulate_from_numpy_model(mod, rng, params, steps=2 * T) - assert_pattern_repeats(y, T, atol=ATOL, rtol=RTOL) - - # Check coords - mod.build(verbose=False) - _assert_basic_coords_correct(mod) - if n is None: - n = int(s // 2) - states = [f"season_{f}_{i}" for i in range(n) for f in ["Cos", "Sin"]] - - # Remove the last state when the model is completely saturated - if s / n == 2.0: - states.pop() - assert mod.coords["season_state"] == states - - -cycle_test_vals = zip([None, None, 3, 5, 10], [False, True, True, False, False]) - - -def test_cycle_component_deterministic(rng): - cycle = st.CycleComponent( - name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False - ) - params = {"cycle": np.array([1.0, 1.0], dtype=floatX)} - x, y = simulate_from_numpy_model(cycle, rng, params, steps=12 * 12) - - assert_pattern_repeats(y, 12, atol=ATOL, rtol=RTOL) - - -def test_cycle_component_with_dampening(rng): - cycle = st.CycleComponent( - name="cycle", cycle_length=12, estimate_cycle_length=False, innovations=False, dampen=True - ) - params = {"cycle": np.array([10.0, 10.0], dtype=floatX), "cycle_dampening_factor": 0.75} - x, y = simulate_from_numpy_model(cycle, rng, params, steps=100) - - # Check that the cycle dampens to zero over time - assert_allclose(y[-1], 0.0, atol=ATOL, rtol=RTOL) - - -def test_cycle_component_with_innovations_and_cycle_length(rng): - cycle = st.CycleComponent( - name="cycle", estimate_cycle_length=True, innovations=True, dampen=True - ) - params = { - "cycle": np.array([1.0, 1.0], dtype=floatX), - "cycle_length": 12.0, - "cycle_dampening_factor": 0.95, - "sigma_cycle": 1.0, - } - - x, y = simulate_from_numpy_model(cycle, rng, params) - - cycle.build(verbose=False) - _assert_basic_coords_correct(cycle) - - -def test_exogenous_component(rng): - data = rng.normal(size=(100, 2)).astype(floatX) - mod = st.RegressionComponent(state_names=["feature_1", "feature_2"], name="exog") - - params = {"beta_exog": np.array([1.0, 2.0], dtype=floatX)} - exog_data = {"data_exog": data} - x, y = simulate_from_numpy_model(mod, rng, params, exog_data) - - # Check that the generated data is just a linear regression - assert_allclose(y, data @ params["beta_exog"], atol=ATOL, rtol=RTOL) - - mod.build(verbose=False) - _assert_basic_coords_correct(mod) - assert mod.coords["exog_state"] == ["feature_1", "feature_2"] - - -def test_adding_exogenous_component(rng): - data = rng.normal(size=(100, 2)).astype(floatX) - reg = st.RegressionComponent(state_names=["a", "b"], name="exog") - ll = st.LevelTrendComponent(name="level") - - seasonal = st.FrequencySeasonality(name="annual", season_length=12, n=4) - mod = reg + ll + seasonal - - assert mod.ssm["design"].eval({"data_exog": data}).shape == (100, 1, 2 + 2 + 8) - assert_allclose(mod.ssm["design", 5, 0, :2].eval({"data_exog": data}), data[5]) - - -def test_add_components(): - ll = st.LevelTrendComponent(order=2) - se = st.TimeSeasonality(name="seasonal", season_length=12) - mod = ll + se - - ll_params = { - "initial_trend": np.zeros(2, dtype=floatX), - "sigma_trend": np.ones(2, dtype=floatX), - } - se_params = { - "seasonal_coefs": np.ones(11, dtype=floatX), - "sigma_seasonal": 1.0, - } - all_params = ll_params.copy() - all_params.update(se_params) - - (ll_x0, ll_P0, ll_c, ll_d, ll_T, ll_Z, ll_R, ll_H, ll_Q) = unpack_symbolic_matrices_with_params( - ll, ll_params - ) - (se_x0, se_P0, se_c, se_d, se_T, se_Z, se_R, se_H, se_Q) = unpack_symbolic_matrices_with_params( - se, se_params - ) - x0, P0, c, d, T, Z, R, H, Q = unpack_symbolic_matrices_with_params(mod, all_params) - - for property in ["param_names", "shock_names", "param_info", "coords", "param_dims"]: - assert [x in getattr(mod, property) for x in getattr(ll, property)] - assert [x in getattr(mod, property) for x in getattr(se, property)] - - ll_mats = [ll_T, ll_R, ll_Q] - se_mats = [se_T, se_R, se_Q] - all_mats = [T, R, Q] - - for ll_mat, se_mat, all_mat in zip(ll_mats, se_mats, all_mats): - assert_allclose(all_mat, linalg.block_diag(ll_mat, se_mat), atol=ATOL, rtol=RTOL) - - ll_mats = [ll_x0, ll_c, ll_Z] - se_mats = [se_x0, se_c, se_Z] - all_mats = [x0, c, Z] - axes = [0, 0, 1] - - for ll_mat, se_mat, all_mat, axis in zip(ll_mats, se_mats, all_mats, axes): - assert_allclose(all_mat, np.concatenate([ll_mat, se_mat], axis=axis), atol=ATOL, rtol=RTOL) - - -def test_filter_scans_time_varying_design_matrix(rng): - time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) - data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) - - y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) - - reg = st.RegressionComponent(state_names=["a", "b"], name="exog") - mod = reg.build(verbose=False) - - with pm.Model(coords=mod.coords) as m: - data_exog = pm.Data("data_exog", data.values) - - x0 = pm.Normal("x0", dims=["state"]) - P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) - beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) - - mod.build_statespace_graph(y) - x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() - pm.Deterministic("Z", Z) - - prior = pm.sample_prior_predictive(draws=10) - - prior_Z = prior.prior.Z.values - assert prior_Z.shape == (1, 10, 100, 1, 2) - assert_allclose(prior_Z[0, :, :, 0, :], data.values[None].repeat(10, axis=0)) - - -@pytest.mark.skipif(floatX.endswith("32"), reason="Prior covariance not PSD at half-precision") -def test_extract_components_from_idata(rng): - time_idx = pd.date_range(start="2000-01-01", freq="D", periods=100) - data = pd.DataFrame(rng.normal(size=(100, 2)), columns=["a", "b"], index=time_idx) - - y = pd.DataFrame(rng.normal(size=(100, 1)), columns=["data"], index=time_idx) - - ll = st.LevelTrendComponent() - season = st.FrequencySeasonality(name="seasonal", season_length=12, n=2, innovations=False) - reg = st.RegressionComponent(state_names=["a", "b"], name="exog") - me = st.MeasurementError("obs") - mod = (ll + season + reg + me).build(verbose=False) - - with pm.Model(coords=mod.coords) as m: - data_exog = pm.Data("data_exog", data.values) - - x0 = pm.Normal("x0", dims=["state"]) - P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) - beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) - initial_trend = pm.Normal("initial_trend", dims=["trend_state"]) - sigma_trend = pm.Exponential("sigma_trend", 1, dims=["trend_shock"]) - seasonal_coefs = pm.Normal("seasonal", dims=["seasonal_state"]) - sigma_obs = pm.Exponential("sigma_obs", 1) - - mod.build_statespace_graph(y) - - x0, P0, c, d, T, Z, R, H, Q = mod.unpack_statespace() - prior = pm.sample_prior_predictive(draws=10) - - filter_prior = mod.sample_conditional_prior(prior) - comp_prior = mod.extract_components_from_idata(filter_prior) - comp_states = comp_prior.filtered_prior.coords["state"].values - expected_states = ["LevelTrend[level]", "LevelTrend[trend]", "seasonal", "exog[a]", "exog[b]"] - missing = set(comp_states) - set(expected_states) - - assert len(missing) == 0, missing From 1f3dc3a87cc81f24b2beb464249f76f51287b05a Mon Sep 17 00:00:00 2001 From: Jonathan Dekermanjian Date: Sat, 5 Jul 2025 08:48:04 -0600 Subject: [PATCH 12/21] removed incorrectly committed file structural.py --- pymc_extras/statespace/models/structural.py | 1679 ------------------- 1 file changed, 1679 deletions(-) delete mode 100644 pymc_extras/statespace/models/structural.py diff --git a/pymc_extras/statespace/models/structural.py b/pymc_extras/statespace/models/structural.py deleted file mode 100644 index a982366c3..000000000 --- a/pymc_extras/statespace/models/structural.py +++ /dev/null @@ -1,1679 +0,0 @@ -import functools as ft -import logging - -from abc import ABC -from collections.abc import Sequence -from itertools import pairwise -from typing import Any - -import numpy as np -import pytensor -import pytensor.tensor as pt -import xarray as xr - -from pytensor import Variable -from pytensor.compile.mode import Mode - -from pymc_extras.statespace.core import PytensorRepresentation -from pymc_extras.statespace.core.statespace import PyMCStateSpace -from pymc_extras.statespace.models.utilities import ( - conform_time_varying_and_time_invariant_matrices, - make_default_coords, -) -from pymc_extras.statespace.utils.constants import ( - ALL_STATE_AUX_DIM, - ALL_STATE_DIM, - AR_PARAM_DIM, - LONG_MATRIX_NAMES, - POSITION_DERIVATIVE_NAMES, - TIME_DIM, -) - -_log = logging.getLogger("pymc.experimental.statespace") - -floatX = pytensor.config.floatX - - -def order_to_mask(order): - if isinstance(order, int): - return np.ones(order).astype(bool) - else: - return np.array(order).astype(bool) - - -def _frequency_transition_block(s, j): - lam = 2 * np.pi * j / s - - return pt.stack([[pt.cos(lam), pt.sin(lam)], [-pt.sin(lam), pt.cos(lam)]]) - - -class StructuralTimeSeries(PyMCStateSpace): - r""" - Structural Time Series Model - - The structural time series model, named by [1] and presented in statespace form in [2], is a framework for - decomposing a univariate time series into level, trend, seasonal, and cycle components. It also admits the - possibility of exogenous regressors. Unlike the SARIMAX framework, the time series is not assumed to be stationary. - - Notes - ----- - - .. math:: - y_t = \mu_t + \gamma_t + c_t + \varepsilon_t - - """ - - def __init__( - self, - ssm: PytensorRepresentation, - state_names: list[str], - data_names: list[str], - shock_names: list[str], - param_names: list[str], - exog_names: list[str], - param_dims: dict[str, tuple[int]], - coords: dict[str, Sequence], - param_info: dict[str, dict[str, Any]], - data_info: dict[str, dict[str, Any]], - component_info: dict[str, dict[str, Any]], - measurement_error: bool, - name_to_variable: dict[str, Variable], - name_to_data: dict[str, Variable] | None = None, - name: str | None = None, - verbose: bool = True, - filter_type: str = "standard", - mode: str | Mode | None = None, - ): - # Add the initial state covariance to the parameters - if name is None: - name = "data" - self._name = name - - k_states, k_posdef, k_endog = ssm.k_states, ssm.k_posdef, ssm.k_endog - param_names, param_dims, param_info = self._add_inital_state_cov_to_properties( - param_names, param_dims, param_info, k_states - ) - self._state_names = state_names.copy() - self._data_names = data_names.copy() - self._shock_names = shock_names.copy() - self._param_names = param_names.copy() - self._param_dims = param_dims.copy() - - default_coords = make_default_coords(self) - coords.update(default_coords) - - self._coords = coords - self._param_info = param_info.copy() - self._data_info = data_info.copy() - self.measurement_error = measurement_error - - super().__init__( - k_endog, - k_states, - max(1, k_posdef), - filter_type=filter_type, - verbose=verbose, - measurement_error=measurement_error, - mode=mode, - ) - self.ssm = ssm.copy() - - if k_posdef == 0: - # If there is no randomness in the model, add dummy matrices to the representation to avoid errors - # when we go to construct random variables from the matrices - self.ssm.k_posdef = self.k_posdef - self.ssm.shapes["state_cov"] = (1, 1, 1) - self.ssm["state_cov"] = pt.zeros((1, 1, 1)) - - self.ssm.shapes["selection"] = (1, self.k_states, 1) - self.ssm["selection"] = pt.zeros((1, self.k_states, 1)) - - self._component_info = component_info.copy() - - self._name_to_variable = name_to_variable.copy() - self._name_to_data = name_to_data.copy() - - self._exog_names = exog_names.copy() - self._needs_exog_data = len(exog_names) > 0 - - P0 = self.make_and_register_variable("P0", shape=(self.k_states, self.k_states)) - self.ssm["initial_state_cov"] = P0 - - @staticmethod - def _add_inital_state_cov_to_properties(param_names, param_dims, param_info, k_states): - param_names += ["P0"] - param_dims["P0"] = (ALL_STATE_DIM, ALL_STATE_AUX_DIM) - param_info["P0"] = { - "shape": (k_states, k_states), - "constraints": "Positive semi-definite", - "dims": param_dims["P0"], - } - - return param_names, param_dims, param_info - - @property - def param_names(self): - return self._param_names - - @property - def data_names(self) -> list[str]: - return self._data_names - - @property - def state_names(self): - return self._state_names - - @property - def observed_states(self): - return [self._name] - - @property - def shock_names(self): - return self._shock_names - - @property - def param_dims(self): - return self._param_dims - - @property - def coords(self) -> dict[str, Sequence]: - return self._coords - - @property - def param_info(self) -> dict[str, dict[str, Any]]: - return self._param_info - - @property - def data_info(self) -> dict[str, dict[str, Any]]: - return self._data_info - - def make_symbolic_graph(self) -> None: - """ - Assign placeholder pytensor variables among statespace matrices in positions where PyMC variables will go. - - Notes - ----- - This assignment is handled by the components, so this function is implemented only to avoid the - NotImplementedError raised by the base class. - """ - - pass - - def _state_slices_from_info(self): - info = self._component_info.copy() - comp_states = np.cumsum([0] + [info["k_states"] for info in info.values()]) - state_slices = [slice(i, j) for i, j in pairwise(comp_states)] - - return state_slices - - def _hidden_states_from_data(self, data): - state_slices = self._state_slices_from_info() - info = self._component_info - names = info.keys() - result = [] - - for i, (name, s) in enumerate(zip(names, state_slices)): - obs_idx = info[name]["obs_state_idx"] - if obs_idx is None: - continue - - X = data[..., s] - if info[name]["combine_hidden_states"]: - sum_idx = np.flatnonzero(obs_idx) - result.append(X[..., sum_idx].sum(axis=-1)[..., None]) - else: - comp_names = self.state_names[s] - for j, state_name in enumerate(comp_names): - result.append(X[..., j, None]) - - return np.concatenate(result, axis=-1) - - def _get_subcomponent_names(self): - state_slices = self._state_slices_from_info() - info = self._component_info - names = info.keys() - result = [] - - for i, (name, s) in enumerate(zip(names, state_slices)): - if info[name]["combine_hidden_states"]: - result.append(name) - else: - comp_names = self.state_names[s] - result.extend([f"{name}[{comp_name}]" for comp_name in comp_names]) - return result - - def extract_components_from_idata(self, idata: xr.Dataset) -> xr.Dataset: - r""" - Extract interpretable hidden states from an InferenceData returned by a PyMCStateSpace sampling method - - Parameters - ---------- - idata: Dataset - A Dataset object, returned by a PyMCStateSpace sampling method - - Returns - ------- - idata: Dataset - An Dataset object with hidden states transformed to represent only the "interpretable" subcomponents - of the structural model. - - Notes - ----- - In general, a structural statespace model can be represented as: - - .. math:: - y_t = \mu_t + \nu_t + \cdots + \gamma_t + c_t + \xi_t + \epsilon_t \tag{1} - - Where: - - - :math:`\mu_t` is the level of the data at time t - - :math:`\nu_t` is the slope of the data at time t - - :math:`\cdots` are higher time derivatives of the position (acceleration, jerk, etc) at time t - - :math:`\gamma_t` is the seasonal component at time t - - :math:`c_t` is the cycle component at time t - - :math:`\xi_t` is the autoregressive error at time t - - :math:`\varepsilon_t` is the measurement error at time t - - In state space form, some or all of these components are represented as linear combinations of other - subcomponents, making interpretation of the outputs of the outputs difficult. The purpose of this function is - to take the expended statespace representation and return a "reduced form" of only the components shown in - equation (1). - """ - - def _extract_and_transform_variable(idata, new_state_names): - *_, time_dim, state_dim = idata.dims - state_func = ft.partial(self._hidden_states_from_data) - new_idata = xr.apply_ufunc( - state_func, - idata, - input_core_dims=[[time_dim, state_dim]], - output_core_dims=[[time_dim, state_dim]], - exclude_dims={state_dim}, - ) - new_idata.coords.update({state_dim: new_state_names}) - return new_idata - - var_names = list(idata.data_vars.keys()) - is_latent = [idata[name].shape[-1] == self.k_states for name in var_names] - new_state_names = self._get_subcomponent_names() - - latent_names = [name for latent, name in zip(is_latent, var_names) if latent] - dropped_vars = set(var_names) - set(latent_names) - if len(dropped_vars) > 0: - _log.warning( - f'Variables {", ".join(dropped_vars)} do not contain all hidden states (their last dimension ' - f"is not {self.k_states}). They will not be present in the modified idata." - ) - if len(dropped_vars) == len(var_names): - raise ValueError( - "Provided idata had no variables with all hidden states; cannot extract components." - ) - - idata_new = xr.Dataset( - { - name: _extract_and_transform_variable(idata[name], new_state_names) - for name in latent_names - } - ) - return idata_new - - -class Component(ABC): - r""" - Base class for a component of a structural timeseries model. - - This base class contains a subset of the class attributes of the PyMCStateSpace class, and none of the class - methods. The purpose of a component is to allow the partial definition of a structural model. Components are - assembled into a full model by the StructuralTimeSeries class. - - Parameters - ---------- - name: str - The name of the component - k_endog: int - Number of endogenous variables being modeled. Currently, must be one because structural models only support - univariate data. - k_states: int - Number of hidden states in the component model - k_posdef: int - Rank of the state covariance matrix, or the number of sources of innovations in the component model - measurement_error: bool - Whether the observation associated with the component has measurement error. Default is False. - combine_hidden_states: bool - Flag for the ``extract_hidden_states_from_data`` method. When ``True``, hidden states from the component model - are extracted as ``hidden_states[:, np.flatnonzero(Z)]``. Should be True in models where hidden states - individually have no interpretation, such as seasonal or autoregressive components. - """ - - def __init__( - self, - name, - k_endog, - k_states, - k_posdef, - state_names=None, - data_names=None, - shock_names=None, - param_names=None, - exog_names=None, - representation: PytensorRepresentation | None = None, - measurement_error=False, - combine_hidden_states=True, - component_from_sum=False, - obs_state_idxs=None, - ): - self.name = name - self.k_endog = k_endog - self.k_states = k_states - self.k_posdef = k_posdef - self.measurement_error = measurement_error - - self.state_names = state_names if state_names is not None else [] - self.data_names = data_names if data_names is not None else [] - self.shock_names = shock_names if shock_names is not None else [] - self.param_names = param_names if param_names is not None else [] - self.exog_names = exog_names if exog_names is not None else [] - - self.needs_exog_data = len(self.exog_names) > 0 - self.coords = {} - self.param_dims = {} - - self.param_info = {} - self.data_info = {} - - self.param_counts = {} - - if representation is None: - self.ssm = PytensorRepresentation(k_endog=k_endog, k_states=k_states, k_posdef=k_posdef) - else: - self.ssm = representation - - self._name_to_variable = {} - self._name_to_data = {} - - if not component_from_sum: - self.populate_component_properties() - self.make_symbolic_graph() - - self._component_info = { - self.name: { - "k_states": self.k_states, - "k_enodg": self.k_endog, - "k_posdef": self.k_posdef, - "combine_hidden_states": combine_hidden_states, - "obs_state_idx": obs_state_idxs, - } - } - - def make_and_register_variable(self, name, shape, dtype=floatX) -> Variable: - r""" - Helper function to create a pytensor symbolic variable and register it in the _name_to_variable dictionary - - Parameters - ---------- - name : str - The name of the placeholder variable. Must be the name of a model parameter. - shape : int or tuple of int - Shape of the parameter - dtype : str, default pytensor.config.floatX - dtype of the parameter - - Notes - ----- - Symbolic pytensor variables are used in the ``make_symbolic_graph`` method as placeholders for PyMC random - variables. The change is made in the ``_insert_random_variables`` method via ``pytensor.graph_replace``. To - make the change, a dictionary mapping pytensor variables to PyMC random variables needs to be constructed. - - The purpose of this method is to: - 1. Create the placeholder symbolic variables - 2. Register the placeholder variable in the ``_name_to_variable`` dictionary - - The shape provided here will define the shape of the prior that will need to be provided by the user. - - An error is raised if the provided name has already been registered, or if the name is not present in the - ``param_names`` property. - """ - if name not in self.param_names: - raise ValueError( - f"{name} is not a model parameter. All placeholder variables should correspond to model " - f"parameters." - ) - - if name in self._name_to_variable.keys(): - raise ValueError( - f"{name} is already a registered placeholder variable with shape " - f"{self._name_to_variable[name].type.shape}" - ) - - placeholder = pt.tensor(name, shape=shape, dtype=dtype) - self._name_to_variable[name] = placeholder - return placeholder - - def make_and_register_data(self, name, shape, dtype=floatX) -> Variable: - r""" - Helper function to create a pytensor symbolic variable and register it in the _name_to_data dictionary - - Parameters - ---------- - name : str - The name of the placeholder data. Must be the name of an expected data variable. - shape : int or tuple of int - Shape of the parameter - dtype : str, default pytensor.config.floatX - dtype of the parameter - - Notes - ----- - See docstring for make_and_register_variable for more details. This function is similar, but handles data - inputs instead of model parameters. - - An error is raised if the provided name has already been registered, or if the name is not present in the - ``data_names`` property. - """ - if name not in self.data_names: - raise ValueError( - f"{name} is not a model parameter. All placeholder variables should correspond to model " - f"parameters." - ) - - if name in self._name_to_data.keys(): - raise ValueError( - f"{name} is already a registered placeholder variable with shape " - f"{self._name_to_data[name].type.shape}" - ) - - placeholder = pt.tensor(name, shape=shape, dtype=dtype) - self._name_to_data[name] = placeholder - return placeholder - - def make_symbolic_graph(self) -> None: - raise NotImplementedError - - def populate_component_properties(self): - raise NotImplementedError - - def _get_combined_shapes(self, other): - k_states = self.k_states + other.k_states - k_posdef = self.k_posdef + other.k_posdef - if self.k_endog != other.k_endog: - raise NotImplementedError( - "Merging elements with different numbers of observed states is not supported.>" - ) - k_endog = self.k_endog - - return k_states, k_posdef, k_endog - - def _combine_statespace_representations(self, other): - def make_slice(name, x, o_x): - ndim = max(x.ndim, o_x.ndim) - return (name,) + (slice(None, None, None),) * ndim - - k_states, k_posdef, k_endog = self._get_combined_shapes(other) - - self_matrices = [self.ssm[name] for name in LONG_MATRIX_NAMES] - other_matrices = [other.ssm[name] for name in LONG_MATRIX_NAMES] - - x0, P0, c, d, T, Z, R, H, Q = ( - self.ssm[make_slice(name, x, o_x)] - for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) - ) - o_x0, o_P0, o_c, o_d, o_T, o_Z, o_R, o_H, o_Q = ( - other.ssm[make_slice(name, x, o_x)] - for name, x, o_x in zip(LONG_MATRIX_NAMES, self_matrices, other_matrices) - ) - - initial_state = pt.concatenate(conform_time_varying_and_time_invariant_matrices(x0, o_x0)) - initial_state.name = x0.name - - initial_state_cov = pt.linalg.block_diag(P0, o_P0) - initial_state_cov.name = P0.name - - state_intercept = pt.concatenate(conform_time_varying_and_time_invariant_matrices(c, o_c)) - state_intercept.name = c.name - - obs_intercept = d + o_d - obs_intercept.name = d.name - - transition = pt.linalg.block_diag(T, o_T) - transition.name = T.name - - design = pt.concatenate(conform_time_varying_and_time_invariant_matrices(Z, o_Z), axis=-1) - design.name = Z.name - - selection = pt.linalg.block_diag(R, o_R) - selection.name = R.name - - obs_cov = H + o_H - obs_cov.name = H.name - - state_cov = pt.linalg.block_diag(Q, o_Q) - state_cov.name = Q.name - - new_ssm = PytensorRepresentation( - k_endog=k_endog, - k_states=k_states, - k_posdef=k_posdef, - initial_state=initial_state, - initial_state_cov=initial_state_cov, - state_intercept=state_intercept, - obs_intercept=obs_intercept, - transition=transition, - design=design, - selection=selection, - obs_cov=obs_cov, - state_cov=state_cov, - ) - - return new_ssm - - def _combine_property(self, other, name): - self_prop = getattr(self, name) - if isinstance(self_prop, list): - return self_prop + getattr(other, name) - elif isinstance(self_prop, dict): - new_prop = self_prop.copy() - new_prop.update(getattr(other, name)) - return new_prop - - def _combine_component_info(self, other): - combined_info = {} - for key, value in self._component_info.items(): - if not key.startswith("StateSpace"): - if key in combined_info.keys(): - raise ValueError(f"Found duplicate component named {key}") - combined_info[key] = value - - for key, value in other._component_info.items(): - if not key.startswith("StateSpace"): - if key in combined_info.keys(): - raise ValueError(f"Found duplicate component named {key}") - combined_info[key] = value - - return combined_info - - def _make_combined_name(self): - components = self._component_info.keys() - name = f'StateSpace[{", ".join(components)}]' - return name - - def __add__(self, other): - state_names = self._combine_property(other, "state_names") - data_names = self._combine_property(other, "data_names") - param_names = self._combine_property(other, "param_names") - shock_names = self._combine_property(other, "shock_names") - param_info = self._combine_property(other, "param_info") - data_info = self._combine_property(other, "data_info") - param_dims = self._combine_property(other, "param_dims") - coords = self._combine_property(other, "coords") - exog_names = self._combine_property(other, "exog_names") - - _name_to_variable = self._combine_property(other, "_name_to_variable") - _name_to_data = self._combine_property(other, "_name_to_data") - - measurement_error = any([self.measurement_error, other.measurement_error]) - - k_states, k_posdef, k_endog = self._get_combined_shapes(other) - ssm = self._combine_statespace_representations(other) - - new_comp = Component( - name="", - k_endog=1, - k_states=k_states, - k_posdef=k_posdef, - measurement_error=measurement_error, - representation=ssm, - component_from_sum=True, - ) - new_comp._component_info = self._combine_component_info(other) - new_comp.name = new_comp._make_combined_name() - - names_and_props = [ - ("state_names", state_names), - ("data_names", data_names), - ("param_names", param_names), - ("shock_names", shock_names), - ("param_dims", param_dims), - ("coords", coords), - ("param_dims", param_dims), - ("param_info", param_info), - ("data_info", data_info), - ("exog_names", exog_names), - ("_name_to_variable", _name_to_variable), - ("_name_to_data", _name_to_data), - ] - - for prop, value in names_and_props: - setattr(new_comp, prop, value) - - return new_comp - - def build( - self, name=None, filter_type="standard", verbose=True, mode: str | Mode | None = None - ): - """ - Build a StructuralTimeSeries statespace model from the current component(s) - - Parameters - ---------- - name: str, optional - Name of the exogenous data being modeled. Default is "data" - - filter_type : str, optional - The type of Kalman filter to use. Valid options are "standard", "univariate", "single", "cholesky", and - "steady_state". For more information, see the docs for each filter. Default is "standard". - - verbose : bool, optional - If True, displays information about the initialized model. Defaults to True. - - mode: str or Mode, optional - Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and - ``forecast``. The mode does **not** effect calls to ``pm.sample``. - - Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument - to all sampling methods. - - Returns - ------- - PyMCStateSpace - An initialized instance of a PyMCStateSpace, constructed using the system matrices contained in the - components. - """ - - return StructuralTimeSeries( - self.ssm, - name=name, - state_names=self.state_names, - data_names=self.data_names, - shock_names=self.shock_names, - param_names=self.param_names, - param_dims=self.param_dims, - coords=self.coords, - param_info=self.param_info, - data_info=self.data_info, - component_info=self._component_info, - measurement_error=self.measurement_error, - exog_names=self.exog_names, - name_to_variable=self._name_to_variable, - name_to_data=self._name_to_data, - filter_type=filter_type, - verbose=verbose, - mode=mode, - ) - - -class LevelTrendComponent(Component): - r""" - Level and trend component of a structural time series model - - Parameters - ---------- - __________ - order : int - - Number of time derivatives of the trend to include in the model. For example, when order=3, the trend will - be of the form ``y = a + b * t + c * t ** 2``, where the coefficients ``a, b, c`` come from the initial - state values. - - innovations_order : int or sequence of int, optional - - The number of stochastic innovations to include in the model. By default, ``innovations_order = order`` - - Notes - ----- - This class implements the level and trend components of the general structural time series model. In the most - general form, the level and trend is described by a system of two time-varying equations. - - .. math:: - \begin{align} - \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ - \nu_{t+1} &= \nu_t + \xi_t - \zeta_t &\sim N(0, \sigma_\zeta) \\ - \xi_t &\sim N(0, \sigma_\xi) - \end{align} - - Where :math:`\mu_{t+1}` is the mean of the timeseries at time t, and :math:`\nu_t` is the drift or the slope of - the process. When both innovations :math:`\zeta_t` and :math:`\xi_t` are included in the model, it is known as a - *local linear trend* model. This system of two equations, corresponding to ``order=2``, can be expanded or - contracted by adding or removing equations. ``order=3`` would add an acceleration term to the sytsem: - - .. math:: - \begin{align} - \mu_{t+1} &= \mu_t + \nu_t + \zeta_t \\ - \nu_{t+1} &= \nu_t + \eta_t + \xi_t \\ - \eta_{t+1} &= \eta_{t-1} + \omega_t \\ - \zeta_t &\sim N(0, \sigma_\zeta) \\ - \xi_t &\sim N(0, \sigma_\xi) \\ - \omega_t &\sim N(0, \sigma_\omega) - \end{align} - - After setting all innovation terms to zero and defining initial states :math:`\mu_0, \nu_0, \eta_0`, these equations - can be collapsed to: - - .. math:: - \mu_t = \mu_0 + \nu_0 \cdot t + \eta_0 \cdot t^2 - - Which clarifies how the order and initial states influence the model. In particular, the initial states are the - coefficients on the intercept, slope, acceleration, and so on. - - In this light, allowing for innovations can be understood as allowing these coefficients to vary over time. Each - component can be individually selected for time variation by passing a list to the ``innovations_order`` argument. - For example, a constant intercept with time varying trend and acceleration is specified as ``order=3, - innovations_order=[0, 1, 1]``. - - By choosing the ``order`` and ``innovations_order``, a large variety of models can be obtained. Notable - models include: - - * Constant intercept, ``order=1, innovations_order=0`` - - .. math:: - \mu_t = \mu - - * Constant linear slope, ``order=2, innovations_order=0`` - - .. math:: - \mu_t = \mu_{t-1} + \nu - - * Gaussian Random Walk, ``order=1, innovations_order=1`` - - .. math:: - \mu_t = \mu_{t-1} + \zeta_t - - * Gaussian Random Walk with Drift, ``order=2, innovations_order=1`` - - .. math:: - \mu_t = \mu_{t-1} + \nu + \zeta_t - - * Smooth Trend, ``order=2, innovations_order=[0, 1]`` - - .. math:: - \begin{align} - \mu_t &= \mu_{t-1} + \nu_{t-1} \\ - \nu_t &= \nu_{t-1} + \xi_t - \end{align} - - * Local Level, ``order=2, innovations_order=2`` - - [1] notes that the smooth trend model produces more gradually changing slopes than the full local linear trend - model, and is equivalent to an "integrated trend model". - - References - ---------- - .. [1] Durbin, James, and Siem Jan Koopman. 2012. - Time Series Analysis by State Space Methods: Second Edition. - Oxford University Press. - - """ - - def __init__( - self, - order: int | list[int] = 2, - innovations_order: int | list[int] | None = None, - name: str = "LevelTrend", - ): - if innovations_order is None: - innovations_order = order - - self._order_mask = order_to_mask(order) - max_state = np.flatnonzero(self._order_mask)[-1].item() + 1 - - # If the user passes excess zeros, raise an error. The alternative is to prune them, but this would cause - # the shape of the state to be different to what the user expects. - if len(self._order_mask) > max_state: - raise ValueError( - f"order={order} is invalid. The highest derivative should not be set to zero. If you want a " - f"lower order model, explicitly omit the zeros." - ) - k_states = max_state - - if isinstance(innovations_order, int): - n = innovations_order - innovations_order = order_to_mask(k_states) - if n > 0: - innovations_order[n:] = False - else: - innovations_order[:] = False - else: - innovations_order = order_to_mask(innovations_order) - - self.innovations_order = innovations_order[:max_state] - k_posdef = int(sum(innovations_order)) - - super().__init__( - name, - k_endog=1, - k_states=k_states, - k_posdef=k_posdef, - measurement_error=False, - combine_hidden_states=False, - obs_state_idxs=np.array([1.0] + [0.0] * (k_states - 1)), - ) - - def populate_component_properties(self): - name_slice = POSITION_DERIVATIVE_NAMES[: self.k_states] - self.param_names = ["initial_trend"] - self.state_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] - self.param_dims = {"initial_trend": ("trend_state",)} - self.coords = {"trend_state": self.state_names} - self.param_info = {"initial_trend": {"shape": (self.k_states,), "constraints": None}} - - if self.k_posdef > 0: - self.param_names += ["sigma_trend"] - self.shock_names = [ - name for name, mask in zip(name_slice, self.innovations_order) if mask - ] - self.param_dims["sigma_trend"] = ("trend_shock",) - self.coords["trend_shock"] = self.shock_names - self.param_info["sigma_trend"] = {"shape": (self.k_posdef,), "constraints": "Positive"} - - for name in self.param_names: - self.param_info[name]["dims"] = self.param_dims[name] - - def make_symbolic_graph(self) -> None: - initial_trend = self.make_and_register_variable("initial_trend", shape=(self.k_states,)) - self.ssm["initial_state", :] = initial_trend - triu_idx = np.triu_indices(self.k_states) - self.ssm[np.s_["transition", triu_idx[0], triu_idx[1]]] = 1 - - R = np.eye(self.k_states) - R = R[:, self.innovations_order] - self.ssm["selection", :, :] = R - - self.ssm["design", 0, :] = np.array([1.0] + [0.0] * (self.k_states - 1)) - - if self.k_posdef > 0: - sigma_trend = self.make_and_register_variable("sigma_trend", shape=(self.k_posdef,)) - diag_idx = np.diag_indices(self.k_posdef) - idx = np.s_["state_cov", diag_idx[0], diag_idx[1]] - self.ssm[idx] = sigma_trend**2 - - -class MeasurementError(Component): - r""" - Measurement error term for a structural timeseries model - - Parameters - ---------- - name: str, optional - - Name of the observed data. Default is "obs". - - Notes - ----- - This component should only be used in combination with other components, because it has no states. It's only use - is to add a variance parameter to the model, associated with the observation noise matrix H. - - Examples - -------- - Create and estimate a deterministic linear trend with measurement error - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - - trend = st.LevelTrendComponent(order=2, innovations_order=0) - error = st.MeasurementError() - ss_mod = (trend + error).build() - - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) - sigma_obs = pm.Exponential('sigma_obs', 1, dims=ss_mod.param_dims['sigma_obs']) - - ss_mod.build_statespace_graph(data) - idata = pm.sample(nuts_sampler='numpyro') - """ - - def __init__(self, name: str = "MeasurementError"): - k_endog = 1 - k_states = 0 - k_posdef = 0 - - super().__init__( - name, k_endog, k_states, k_posdef, measurement_error=True, combine_hidden_states=False - ) - - def populate_component_properties(self): - self.param_names = [f"sigma_{self.name}"] - self.param_dims = {} - self.param_info = { - f"sigma_{self.name}": { - "shape": (), - "constraints": "Positive", - "dims": None, - } - } - - def make_symbolic_graph(self) -> None: - sigma_shape = () - error_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=sigma_shape) - diag_idx = np.diag_indices(self.k_endog) - idx = np.s_["obs_cov", diag_idx[0], diag_idx[1]] - self.ssm[idx] = error_sigma**2 - - -class AutoregressiveComponent(Component): - r""" - Autoregressive timeseries component - - Parameters - ---------- - order: int or sequence of int - - If int, the number of lags to include in the model. - If a sequence, an array-like of zeros and ones indicating which lags to include in the model. - - Notes - ----- - An autoregressive component can be thought of as a way o introducing serially correlated errors into the model. - The process is modeled: - - .. math:: - x_t = \sum_{i=1}^p \rho_i x_{t-i} - - Where ``p``, the number of autoregressive terms to model, is the order of the process. By default, all lags up to - ``p`` are included in the model. To disable lags, pass a list of zeros and ones to the ``order`` argumnet. For - example, ``order=[1, 1, 0, 1]`` would become: - - .. math:: - x_t = \rho_1 x_{t-1} + \rho_2 x_{t-1} + \rho_4 x_{t-1} - - The coefficient :math:`\rho_3` has been constrained to zero. - - .. warning:: This class is meant to be used as a component in a structural time series model. For modeling of - stationary processes with ARIMA, use ``statespace.BayesianSARIMA``. - - Examples - -------- - Model a timeseries as an AR(2) process with non-zero mean: - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - - trend = st.LevelTrendComponent(order=1, innovations_order=0) - ar = st.AutoregressiveComponent(2) - ss_mod = (trend + ar).build() - - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend']) - ar_params = pm.Normal('ar_params', dims=ss_mod.param_dims['ar_params']) - sigma_ar = pm.Exponential('sigma_ar', 1, dims=ss_mod.param_dims['sigma_ar']) - - ss_mod.build_statespace_graph(data) - idata = pm.sample(nuts_sampler='numpyro') - - """ - - def __init__(self, order: int = 1, name: str = "AutoRegressive"): - order = order_to_mask(order) - ar_lags = np.flatnonzero(order).ravel().astype(int) + 1 - k_states = len(order) - - self.order = order - self.ar_lags = ar_lags - - super().__init__( - name=name, - k_endog=1, - k_states=k_states, - k_posdef=1, - measurement_error=True, - combine_hidden_states=True, - obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], - ) - - def populate_component_properties(self): - self.state_names = [f"L{i + 1}.data" for i in range(self.k_states)] - self.shock_names = [f"{self.name}_innovation"] - self.param_names = ["ar_params", "sigma_ar"] - self.param_dims = {"ar_params": (AR_PARAM_DIM,)} - self.coords = {AR_PARAM_DIM: self.ar_lags.tolist()} - - self.param_info = { - "ar_params": { - "shape": (self.k_states,), - "constraints": None, - "dims": (AR_PARAM_DIM,), - }, - "sigma_ar": {"shape": (), "constraints": "Positive", "dims": None}, - } - - def make_symbolic_graph(self) -> None: - k_nonzero = int(sum(self.order)) - ar_params = self.make_and_register_variable("ar_params", shape=(k_nonzero,)) - sigma_ar = self.make_and_register_variable("sigma_ar", shape=()) - - T = np.eye(self.k_states, k=-1) - self.ssm["transition", :, :] = T - self.ssm["selection", 0, 0] = 1 - self.ssm["design", 0, 0] = 1 - - ar_idx = ("transition", np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0]) - self.ssm[ar_idx] = ar_params - - cov_idx = ("state_cov", *np.diag_indices(1)) - self.ssm[cov_idx] = sigma_ar**2 - - -class TimeSeasonality(Component): - r""" - Seasonal component, modeled in the time domain - - Parameters - ---------- - season_length: int - The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for - daily data with weekly seasonal pattern, etc. - - innovations: bool, default True - Whether to include stochastic innovations in the strength of the seasonal effect - - name: str, default None - A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal - components are included in the same model. Default is ``f"Seasonal[s={season_length}]"`` - - state_names: list of str, default None - List of strings for seasonal effect labels. If provided, it must be of length ``season_length``. An example - would be ``state_names = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']`` when data is daily with a weekly - seasonal pattern (``season_length = 7``). - - If None, states will be numbered ``[State_0, ..., State_s]`` - - remove_first_state: bool, default True - If True, the first state will be removed from the model. This is done because there are only n-1 degrees of - freedom in the seasonal component, and one state is not identified. If False, the first state will be - included in the model, but it will not be identified -- you will need to handle this in the priors (e.g. with - ZeroSumNormal). - - Notes - ----- - A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to - model seasonal effects, the implementation used here is the one described by [1] as the "canonical" time domain - representation. The seasonal component can be expressed: - - .. math:: - \gamma_t = -\sum_{i=1}^{s-1} \gamma_{t-i} + \omega_t, \quad \omega_t \sim N(0, \sigma_\gamma) - - Where :math:`s` is the ``seasonal_length`` parameter and :math:`\omega_t` is the (optional) stochastic innovation. - To give interpretation to the :math:`\gamma` terms, it is helpful to work through the algebra for a simple - example. Let :math:`s=4`, and omit the shock term. Define initial conditions :math:`\gamma_0, \gamma_{-1}, - \gamma_{-2}`. The value of the seasonal component for the first 5 timesteps will be: - - .. math:: - \begin{align} - \gamma_1 &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ - \gamma_2 &= -\gamma_1 - \gamma_0 - \gamma_{-1} \\ - &= -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 - \gamma_{-1} \\ - &= (\gamma_0 - \gamma_0 )+ (\gamma_{-1} - \gamma_{-1}) + \gamma_{-2} \\ - &= \gamma_{-2} \\ - \gamma_3 &= -\gamma_2 - \gamma_1 - \gamma_0 \\ - &= -\gamma_{-2} - (-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 \\ - &= (\gamma_{-2} - \gamma_{-2}) + \gamma_{-1} + (\gamma_0 - \gamma_0) \\ - &= \gamma_{-1} \\ - \gamma_4 &= -\gamma_3 - \gamma_2 - \gamma_1 \\ - &= -\gamma_{-1} - \gamma_{-2} -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) \\ - &= (\gamma_{-2} - \gamma_{-2}) + (\gamma_{-1} - \gamma_{-1}) + \gamma_0 \\ - &= \gamma_0 \\ - \gamma_5 &= -\gamma_4 - \gamma_3 - \gamma_2 \\ - &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\ - &= \gamma_1 - \end{align} - - This exercise shows that, given a list ``initial_conditions`` of length ``s-1``, the effects of this model will be: - - - Period 1: ``-sum(initial_conditions)`` - - Period 2: ``initial_conditions[-1]`` - - Period 3: ``initial_conditions[-2]`` - - ... - - Period s: ``initial_conditions[0]`` - - Period s+1: ``-sum(initial_condition)`` - - And so on. So for interpretation, the ``season_length - 1`` initial states are, when reversed, the coefficients - associated with ``state_names[1:]``. - - .. warning:: - Although the ``state_names`` argument expects a list of length ``season_length``, only ``state_names[1:]`` - will be saved as model dimensions, since the 1st coefficient is not identified (it is defined as - :math:`-\sum_{i=1}^{s} \gamma_{t-i}`). - - Examples - -------- - Estimate monthly with a model with a gaussian random walk trend and monthly seasonality: - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - import pandas as pd - - # Get month names - state_names = pd.date_range('1900-01-01', '1900-12-31', freq='MS').month_name().tolist() - - # Build the structural model - grw = st.LevelTrendComponent(order=1, innovations_order=1) - annual_season = st.TimeSeasonality(season_length=12, name='annual', state_names=state_names, innovations=False) - ss_mod = (grw + annual_season).build() - - # Estimate with PyMC - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Deterministic('initial_trend', pt.zeros(1), dims=ss_mod.param_dims['initial_trend']) - annual_coefs = pm.Normal('annual_coefs', sigma=1e-2, dims=ss_mod.param_dims['annual_coefs']) - trend_sigmas = pm.HalfNormal('trend_sigmas', sigma=1e-6, dims=ss_mod.param_dims['trend_sigmas']) - ss_mod.build_statespace_graph(data) - idata = pm.sample(nuts_sampler='numpyro') - - References - ---------- - .. [1] Durbin, James, and Siem Jan Koopman. 2012. - Time Series Analysis by State Space Methods: Second Edition. - Oxford University Press. - """ - - def __init__( - self, - season_length: int, - innovations: bool = True, - name: str | None = None, - state_names: list | None = None, - remove_first_state: bool = True, - ): - if name is None: - name = f"Seasonal[s={season_length}]" - if state_names is None: - state_names = [f"{name}_{i}" for i in range(season_length)] - else: - if len(state_names) != season_length: - raise ValueError( - f"state_names must be a list of length season_length, got {len(state_names)}" - ) - state_names = state_names.copy() - self.innovations = innovations - self.remove_first_state = remove_first_state - - if self.remove_first_state: - # In traditional models, the first state isn't identified, so we can help out the user by automatically - # discarding it. - # TODO: Can this be stashed and reconstructed automatically somehow? - state_names.pop(0) - - k_states = season_length - int(self.remove_first_state) - - super().__init__( - name=name, - k_endog=1, - k_states=k_states, - k_posdef=int(innovations), - state_names=state_names, - measurement_error=False, - combine_hidden_states=True, - obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], - ) - - def populate_component_properties(self): - self.param_names = [f"{self.name}_coefs"] - self.param_info = { - f"{self.name}_coefs": { - "shape": (self.k_states,), - "constraints": None, - "dims": (f"{self.name}_state",), - } - } - self.param_dims = {f"{self.name}_coefs": (f"{self.name}_state",)} - self.coords = {f"{self.name}_state": self.state_names} - - if self.innovations: - self.param_names += [f"sigma_{self.name}"] - self.param_info[f"sigma_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": None, - } - self.shock_names = [f"{self.name}"] - - def make_symbolic_graph(self) -> None: - if self.remove_first_state: - # In this case, parameters are normalized to sum to zero, so the current state is the negative sum of - # all previous states. - T = np.eye(self.k_states, k=-1) - T[0, :] = -1 - else: - # In this case we assume the user to be responsible for ensuring the states sum to zero, so T is just a - # circulant matrix that cycles between the states. - T = np.eye(self.k_states, k=1) - T[-1, 0] = 1 - - self.ssm["transition", :, :] = T - self.ssm["design", 0, 0] = 1 - - initial_states = self.make_and_register_variable( - f"{self.name}_coefs", shape=(self.k_states,) - ) - self.ssm["initial_state", np.arange(self.k_states, dtype=int)] = initial_states - - if self.innovations: - self.ssm["selection", 0, 0] = 1 - season_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - cov_idx = ("state_cov", *np.diag_indices(1)) - self.ssm[cov_idx] = season_sigma**2 - - -class FrequencySeasonality(Component): - r""" - Seasonal component, modeled in the frequency domain - - Parameters - ---------- - season_length: float - The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for - daily data with weekly seasonal pattern, etc. Non-integer seasonal_length is also permitted, for example - 365.2422 days in a (solar) year. - - n: int - Number of fourier features to include in the seasonal component. Default is ``season_length // 2``, which - is the maximum possible. A smaller number can be used for a more wave-like seasonal pattern. - - name: str, default None - A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal - components are included in the same model. Default is ``f"Seasonal[s={season_length}, n={n}]"`` - - innovations: bool, default True - Whether to include stochastic innovations in the strength of the seasonal effect - - Notes - ----- - A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to - model seasonal effects, the implementation used here is the one described by [1] as the "canonical" frequency domain - representation. The seasonal component can be expressed: - - .. math:: - \begin{align} - \gamma_t &= \sum_{j=1}^{2n} \gamma_{j,t} \\ - \gamma_{j, t+1} &= \gamma_{j,t} \cos \lambda_j + \gamma_{j,t}^\star \sin \lambda_j + \omega_{j, t} \\ - \gamma_{j, t}^\star &= -\gamma_{j,t} \sin \lambda_j + \gamma_{j,t}^\star \cos \lambda_j + \omega_{j,t}^\star - \lambda_j &= \frac{2\pi j}{s} - \end{align} - - Where :math:`s` is the ``seasonal_length``. - - Unlike a ``TimeSeasonality`` component, a ``FrequencySeasonality`` component does not require integer season - length. In addition, for long seasonal periods, it is possible to obtain a more compact state space representation - by choosing ``n << s // 2``. Using ``TimeSeasonality``, an annual seasonal pattern in daily data requires 364 - states, whereas ``FrequencySeasonality`` always requires ``2 * n`` states, regardless of the ``seasonal_length``. - The price of this compactness is less representational power. At ``n = 1``, the seasonal pattern will be a pure - sine wave. At ``n = s // 2``, any arbitrary pattern can be represented. - - One cost of the added flexibility of ``FrequencySeasonality`` is reduced interpretability. States of this model are - coefficients :math:`\gamma_1, \gamma^\star_1, \gamma_2, \gamma_2^\star ..., \gamma_n, \gamma^\star_n` associated - with different frequencies in the fourier representation of the seasonal pattern. As a result, it is not possible - to isolate and identify a "Monday" effect, for instance. - """ - - def __init__(self, season_length, n=None, name=None, innovations=True): - if n is None: - n = int(season_length // 2) - if name is None: - name = f"Frequency[s={season_length}, n={n}]" - - k_states = n * 2 - self.n = n - self.season_length = season_length - self.innovations = innovations - - # If the model is completely saturated (n = s // 2), the last state will not be identified, so it shouldn't - # get a parameter assigned to it and should just be fixed to zero. - # Test this way (rather than n == s // 2) to catch cases when n is non-integer. - self.last_state_not_identified = self.season_length / self.n == 2.0 - self.n_coefs = k_states - int(self.last_state_not_identified) - - obs_state_idx = np.zeros(k_states) - obs_state_idx[slice(0, k_states, 2)] = 1 - - super().__init__( - name=name, - k_endog=1, - k_states=k_states, - k_posdef=k_states * int(self.innovations), - measurement_error=False, - combine_hidden_states=True, - obs_state_idxs=obs_state_idx, - ) - - def make_symbolic_graph(self) -> None: - self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 - - init_state = self.make_and_register_variable(f"{self.name}", shape=(self.n_coefs,)) - - init_state_idx = np.arange(self.n_coefs, dtype=int) - self.ssm["initial_state", init_state_idx] = init_state - - T_mats = [_frequency_transition_block(self.season_length, j + 1) for j in range(self.n)] - T = pt.linalg.block_diag(*T_mats) - self.ssm["transition", :, :] = T - - if self.innovations: - sigma_season = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_season**2 - self.ssm["selection", :, :] = np.eye(self.k_states) - - def populate_component_properties(self): - self.state_names = [f"{self.name}_{f}_{i}" for i in range(self.n) for f in ["Cos", "Sin"]] - self.param_names = [f"{self.name}"] - - self.param_dims = {self.name: (f"{self.name}_state",)} - self.param_info = { - f"{self.name}": { - "shape": (self.k_states - int(self.last_state_not_identified),), - "constraints": None, - "dims": (f"{self.name}_state",), - } - } - - init_state_idx = np.arange(self.k_states, dtype=int) - if self.last_state_not_identified: - init_state_idx = init_state_idx[:-1] - self.coords = {f"{self.name}_state": [self.state_names[i] for i in init_state_idx]} - - if self.innovations: - self.shock_names = self.state_names.copy() - self.param_names += [f"sigma_{self.name}"] - self.param_info[f"sigma_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": None, - } - - -class CycleComponent(Component): - r""" - A component for modeling longer-term cyclical effects - - Parameters - ---------- - name: str - Name of the component. Used in generated coordinates and state names. If None, a descriptive name will be - used. - - cycle_length: int, optional - The length of the cycle, in the calendar units of your data. For example, if your data is monthly, and you - want to model a 12-month cycle, use ``cycle_length=12``. You cannot specify both ``cycle_length`` and - ``estimate_cycle_length``. - - estimate_cycle_length: bool, default False - Whether to estimate the cycle length. If True, an additional parameter, ``cycle_length`` will be added to the - model. You cannot specify both ``cycle_length`` and ``estimate_cycle_length``. - - dampen: bool, default False - Whether to dampen the cycle by multiplying by a dampening factor :math:`\rho` at every timestep. If true, - an additional parameter, ``dampening_factor`` will be added to the model. - - innovations: bool, default True - Whether to include stochastic innovations in the strength of the seasonal effect. If True, an additional - parameter, ``sigma_{name}`` will be added to the model. - - Notes - ----- - The cycle component is very similar in implementation to the frequency domain seasonal component, expect that it - is restricted to n=1. The cycle component can be expressed: - - .. math:: - \begin{align} - \gamma_t &= \rho \gamma_{t-1} \cos \lambda + \rho \gamma_{t-1}^\star \sin \lambda + \omega_{t} \\ - \gamma_{t}^\star &= -\rho \gamma_{t-1} \sin \lambda + \rho \gamma_{t-1}^\star \cos \lambda + \omega_{t}^\star \\ - \lambda &= \frac{2\pi}{s} - \end{align} - - Where :math:`s` is the ``cycle_length``. [1] recommend that this component be used for longer term cyclical - effects, such as business cycles, and that the seasonal component be used for shorter term effects, such as - weekly or monthly seasonality. - - Unlike a FrequencySeasonality component, the length of a CycleComponent can be estimated. - - Examples - -------- - Estimate a business cycle with length between 6 and 12 years: - - .. code:: python - - from pymc_extras.statespace import structural as st - import pymc as pm - import pytensor.tensor as pt - import pandas as pd - import numpy as np - - data = np.random.normal(size=(100, 1)) - - # Build the structural model - grw = st.LevelTrendComponent(order=1, innovations_order=1) - cycle = st.CycleComponent('business_cycle', estimate_cycle_length=True, dampen=False) - ss_mod = (grw + cycle).build() - - # Estimate with PyMC - with pm.Model(coords=ss_mod.coords) as model: - P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states), dims=ss_mod.param_dims['P0']) - intitial_trend = pm.Normal('initial_trend', dims=ss_mod.param_dims['initial_trend']) - sigma_trend = pm.HalfNormal('sigma_trend', dims=ss_mod.param_dims['sigma_trend']) - - cycle_strength = pm.Normal('business_cycle') - cycle_length = pm.Uniform('business_cycle_length', lower=6, upper=12) - - sigma_cycle = pm.HalfNormal('sigma_business_cycle', sigma=1) - ss_mod.build_statespace_graph(data) - - idata = pm.sample(nuts_sampler='numpyro') - - References - ---------- - .. [1] Durbin, James, and Siem Jan Koopman. 2012. - Time Series Analysis by State Space Methods: Second Edition. - Oxford University Press. - """ - - def __init__( - self, - name: str | None = None, - cycle_length: int | None = None, - estimate_cycle_length: bool = False, - dampen: bool = False, - innovations: bool = True, - ): - if cycle_length is None and not estimate_cycle_length: - raise ValueError("Must specify cycle_length if estimate_cycle_length is False") - if cycle_length is not None and estimate_cycle_length: - raise ValueError("Cannot specify cycle_length if estimate_cycle_length is True") - if name is None: - cycle = int(cycle_length) if cycle_length is not None else "Estimate" - name = f"Cycle[s={cycle}, dampen={dampen}, innovations={innovations}]" - - self.estimate_cycle_length = estimate_cycle_length - self.cycle_length = cycle_length - self.innovations = innovations - self.dampen = dampen - self.n_coefs = 1 - - k_states = 2 - k_endog = 1 - k_posdef = 2 - - obs_state_idx = np.zeros(k_states) - obs_state_idx[slice(0, k_states, 2)] = 1 - - super().__init__( - name=name, - k_endog=k_endog, - k_states=k_states, - k_posdef=k_posdef, - measurement_error=False, - combine_hidden_states=True, - obs_state_idxs=obs_state_idx, - ) - - def make_symbolic_graph(self) -> None: - self.ssm["design", 0, slice(0, self.k_states, 2)] = 1 - self.ssm["selection", :, :] = np.eye(self.k_states) - self.param_dims = {self.name: (f"{self.name}_state",)} - self.coords = {f"{self.name}_state": self.state_names} - - init_state = self.make_and_register_variable(f"{self.name}", shape=(self.k_states,)) - - self.ssm["initial_state", :] = init_state - - if self.estimate_cycle_length: - lamb = self.make_and_register_variable(f"{self.name}_length", shape=()) - else: - lamb = self.cycle_length - - if self.dampen: - rho = self.make_and_register_variable(f"{self.name}_dampening_factor", shape=()) - else: - rho = 1 - - T = rho * _frequency_transition_block(lamb, j=1) - self.ssm["transition", :, :] = T - - if self.innovations: - sigma_cycle = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * sigma_cycle**2 - - def populate_component_properties(self): - self.state_names = [f"{self.name}_{f}" for f in ["Cos", "Sin"]] - self.param_names = [f"{self.name}"] - - self.param_info = { - f"{self.name}": { - "shape": (2,), - "constraints": None, - "dims": (f"{self.name}_state",), - } - } - - if self.estimate_cycle_length: - self.param_names += [f"{self.name}_length"] - self.param_info[f"{self.name}_length"] = { - "shape": (), - "constraints": "Positive, non-zero", - "dims": None, - } - - if self.dampen: - self.param_names += [f"{self.name}_dampening_factor"] - self.param_info[f"{self.name}_dampening_factor"] = { - "shape": (), - "constraints": "0 < x ≤ 1", - "dims": None, - } - - if self.innovations: - self.param_names += [f"sigma_{self.name}"] - self.param_info[f"sigma_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": None, - } - self.shock_names = self.state_names.copy() - - -class RegressionComponent(Component): - def __init__( - self, - k_exog: int | None = None, - name: str | None = "Exogenous", - state_names: list[str] | None = None, - innovations=False, - ): - self.innovations = innovations - k_exog = self._handle_input_data(k_exog, state_names, name) - - k_states = k_exog - k_endog = 1 - k_posdef = k_exog - - super().__init__( - name=name, - k_endog=k_endog, - k_states=k_states, - k_posdef=k_posdef, - state_names=self.state_names, - measurement_error=False, - combine_hidden_states=False, - exog_names=[f"data_{name}"], - obs_state_idxs=np.ones(k_states), - ) - - @staticmethod - def _get_state_names(k_exog: int | None, state_names: list[str] | None, name: str): - if k_exog is None and state_names is None: - raise ValueError("Must specify at least one of k_exog or state_names") - if state_names is not None and k_exog is not None: - if len(state_names) != k_exog: - raise ValueError(f"Expected {k_exog} state names, found {len(state_names)}") - elif k_exog is None: - k_exog = len(state_names) - else: - state_names = [f"{name}_{i + 1}" for i in range(k_exog)] - - return k_exog, state_names - - def _handle_input_data(self, k_exog: int, state_names: list[str] | None, name) -> int: - k_exog, state_names = self._get_state_names(k_exog, state_names, name) - self.state_names = state_names - - return k_exog - - def make_symbolic_graph(self) -> None: - betas = self.make_and_register_variable(f"beta_{self.name}", shape=(self.k_states,)) - regression_data = self.make_and_register_data( - f"data_{self.name}", shape=(None, self.k_states) - ) - - self.ssm["initial_state", :] = betas - self.ssm["transition", :, :] = np.eye(self.k_states) - self.ssm["selection", :, :] = np.eye(self.k_states) - self.ssm["design"] = pt.expand_dims(regression_data, 1) - - if self.innovations: - sigma_beta = self.make_and_register_variable( - f"sigma_beta_{self.name}", (self.k_states,) - ) - row_idx, col_idx = np.diag_indices(self.k_states) - self.ssm["state_cov", row_idx, col_idx] = sigma_beta**2 - - def populate_component_properties(self) -> None: - self.shock_names = self.state_names - - self.param_names = [f"beta_{self.name}"] - self.data_names = [f"data_{self.name}"] - self.param_dims = { - f"beta_{self.name}": ("exog_state",), - } - - self.param_info = { - f"beta_{self.name}": { - "shape": (self.k_states,), - "constraints": None, - "dims": ("exog_state",), - }, - } - - self.data_info = { - f"data_{self.name}": { - "shape": (None, self.k_states), - "dims": (TIME_DIM, "exog_state"), - }, - } - self.coords = {"exog_state": self.state_names} - - if self.innovations: - self.param_names += [f"sigma_beta_{self.name}"] - self.param_dims[f"sigma_beta_{self.name}"] = "exog_state" - self.param_info[f"sigma_beta_{self.name}"] = { - "shape": (), - "constraints": "Positive", - "dims": ("exog_state",), - } From 0c4590eb88f30ede71f57be08ffb42d0182ffaab Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 11:59:59 +0800 Subject: [PATCH 13/21] Always count names to determine k_endog --- pymc_extras/statespace/models/structural/core.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pymc_extras/statespace/models/structural/core.py b/pymc_extras/statespace/models/structural/core.py index 913c58e17..d5fbe4e6e 100644 --- a/pymc_extras/statespace/models/structural/core.py +++ b/pymc_extras/statespace/models/structural/core.py @@ -483,13 +483,12 @@ def populate_component_properties(self): def _get_combined_shapes(self, other): k_states = self.k_states + other.k_states k_posdef = self.k_posdef + other.k_posdef - if self.k_endog == other.k_endog: - k_endog = self.k_endog - else: - combined_states = self._combine_property( - other, "observed_state_names", allow_duplicates=False - ) - k_endog = len(combined_states) + + # To count endog states, we have to count unique names between the two components. + combined_states = self._combine_property( + other, "observed_state_names", allow_duplicates=False + ) + k_endog = len(combined_states) return k_states, k_posdef, k_endog From 3c5124d6ce6c662fa8462ef0595cb042a2bb14ef Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 12:11:52 +0800 Subject: [PATCH 14/21] LevelTrend state/shock names depend on component name --- .../structural/components/level_trend.py | 70 ++++++++++------ .../structural/components/test_level_trend.py | 83 +++++++++++++++++-- .../structural/test_against_statsmodels.py | 16 ++-- 3 files changed, 126 insertions(+), 43 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/level_trend.py b/pymc_extras/statespace/models/structural/components/level_trend.py index 1563dde72..7e301e8d8 100644 --- a/pymc_extras/statespace/models/structural/components/level_trend.py +++ b/pymc_extras/statespace/models/structural/components/level_trend.py @@ -1,6 +1,5 @@ import numpy as np - -from scipy import linalg +import pytensor.tensor as pt from pymc_extras.statespace.models.structural.core import Component from pymc_extras.statespace.models.structural.utils import order_to_mask @@ -114,7 +113,7 @@ def __init__( self, order: int | list[int] = 2, innovations_order: int | list[int] | None = None, - name: str = "LevelTrend", + name: str = "level_trend", observed_state_names: list[str] | None = None, ): if innovations_order is None: @@ -166,35 +165,46 @@ def populate_component_properties(self): k_posdef = self.k_posdef // k_endog name_slice = POSITION_DERIVATIVE_NAMES[:k_states] - self.param_names = ["initial_trend"] + self.param_names = [f"{self.name}_initial"] base_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] self.state_names = [ - f"{name}[{obs_name}]" for obs_name in self.observed_state_names for name in base_names + f"{name}[{obs_name}]" if k_endog > 1 else name + for obs_name in self.observed_state_names + for name in base_names ] - self.param_dims = {"initial_trend": ("trend_state",)} - self.coords = {"trend_state": base_names} + self.param_dims = {f"{self.name}_initial": (f"{self.name}_state",)} + self.coords = {f"{self.name}_state": base_names} if k_endog > 1: - self.param_dims["trend_state"] = ( - "trend_endog", - "trend_state", + self.param_dims[f"{self.name}_state"] = ( + f"{self.name}_endog", + f"{self.name}_state", ) - self.param_dims = {"initial_trend": ("trend_endog", "trend_state")} - self.coords["trend_endog"] = self.observed_state_names + self.param_dims = {f"{self.name}_initial": (f"{self.name}_endog", f"{self.name}_state")} + self.coords[f"{self.name}_endog"] = self.observed_state_names shape = (k_endog, k_states) if k_endog > 1 else (k_states,) - self.param_info = {"initial_trend": {"shape": shape, "constraints": None}} + self.param_info = {f"{self.name}_initial": {"shape": shape, "constraints": None}} if self.k_posdef > 0: - self.param_names += ["sigma_trend"] - self.shock_names = [ + self.param_names += [f"{self.name}_sigma"] + + shock_base_names = [ name for name, mask in zip(name_slice, self.innovations_order) if mask ] - self.param_dims["sigma_trend"] = ( - ("trend_shock",) if k_endog == 1 else ("trend_endog", "trend_shock") + self.shock_names = [ + f"{name}[{obs_name}]" if k_endog > 1 else name + for obs_name in self.observed_state_names + for name in shock_base_names + ] + + self.param_dims[f"{self.name}_sigma"] = ( + (f"{self.name}_shock",) + if k_endog == 1 + else (f"{self.name}_endog", f"{self.name}_shock") ) - self.coords["trend_shock"] = self.shock_names - self.param_info["sigma_trend"] = { + self.coords[f"{self.name}_shock"] = self.shock_names + self.param_info[f"{self.name}_sigma"] = { "shape": (k_posdef,) if k_endog == 1 else (k_endog, k_posdef), "constraints": "Positive", } @@ -208,28 +218,34 @@ def make_symbolic_graph(self) -> None: k_posdef = self.k_posdef // k_endog initial_trend = self.make_and_register_variable( - "initial_trend", + f"{self.name}_initial", shape=(k_states,) if k_endog == 1 else (k_endog, k_states), ) self.ssm["initial_state", :] = initial_trend.ravel() - triu_idx = np.triu_indices(k_states) - T = np.zeros((k_states, k_states)) - T[triu_idx[0], triu_idx[1]] = 1 + triu_idx = pt.triu_indices(k_states) + T = pt.zeros((k_states, k_states))[triu_idx[0], triu_idx[1]].set(1) - self.ssm["transition"] = linalg.block_diag(*[T for _ in range(k_endog)]) + self.ssm["transition", :, :] = pt.specify_shape( + pt.linalg.block_diag(*[T for _ in range(k_endog)]), (self.k_states, self.k_states) + ) R = np.eye(k_states) R = R[:, self.innovations_order] - self.ssm["selection", :, :] = linalg.block_diag(*[R for _ in range(k_endog)]) + self.ssm["selection", :, :] = pt.specify_shape( + pt.linalg.block_diag(*[R for _ in range(k_endog)]), (self.k_states, self.k_posdef) + ) Z = np.array([1.0] + [0.0] * (k_states - 1)).reshape((1, -1)) - self.ssm["design"] = linalg.block_diag(*[Z for _ in range(k_endog)]) + + self.ssm["design", :, :] = pt.specify_shape( + pt.linalg.block_diag(*[Z for _ in range(k_endog)]), (self.k_endog, self.k_states) + ) if k_posdef > 0: sigma_trend = self.make_and_register_variable( - "sigma_trend", + f"{self.name}_sigma", shape=(k_posdef,) if k_endog == 1 else (k_endog, k_posdef), ) diag_idx = np.diag_indices(k_posdef * k_endog) diff --git a/tests/statespace/models/structural/components/test_level_trend.py b/tests/statespace/models/structural/components/test_level_trend.py index 64f04b403..45503912a 100644 --- a/tests/statespace/models/structural/components/test_level_trend.py +++ b/tests/statespace/models/structural/components/test_level_trend.py @@ -1,4 +1,5 @@ import numpy as np +import pytensor from numpy.testing import assert_allclose from pytensor import config @@ -13,7 +14,7 @@ def test_level_trend_model(rng): mod = st.LevelTrendComponent(order=2, innovations_order=0) - params = {"initial_trend": [0.0, 1.0]} + params = {"level_trend_initial": [0.0, 1.0]} x, y = simulate_from_numpy_model(mod, rng, params) assert_allclose(np.diff(y), 1, atol=ATOL, rtol=RTOL) @@ -21,7 +22,7 @@ def test_level_trend_model(rng): # Check coords mod = mod.build(verbose=False) _assert_basic_coords_correct(mod) - assert mod.coords["trend_state"] == ["level", "trend"] + assert mod.coords["level_trend_state"] == ["level", "trend"] def test_level_trend_multiple_observed_construction(): @@ -33,12 +34,22 @@ def test_level_trend_multiple_observed_construction(): assert mod.k_states == 6 assert mod.k_posdef == 3 - assert mod.coords["trend_state"] == ["level", "trend"] - assert mod.coords["trend_endog"] == ["data_1", "data_2", "data_3"] + assert mod.coords["level_trend_state"] == ["level", "trend"] + assert mod.coords["level_trend_endog"] == ["data_1", "data_2", "data_3"] - Z = mod.ssm["design"].eval() - T = mod.ssm["transition"].eval() - R = mod.ssm["selection"].eval() + assert mod.state_names == [ + "level[data_1]", + "trend[data_1]", + "level[data_2]", + "trend[data_2]", + "level[data_3]", + "trend[data_3]", + ] + assert mod.shock_names == ["level_shock[data_1]", "level_shock[data_2]", "level_shock[data_3]"] + + Z, T, R = pytensor.function( + [], [mod.ssm["design"], mod.ssm["transition"], mod.ssm["selection"]], mode="FAST_COMPILE" + )() np.testing.assert_allclose( Z, @@ -84,8 +95,64 @@ def test_level_trend_multiple_observed(rng): mod = st.LevelTrendComponent( order=2, innovations_order=0, observed_state_names=["data_1", "data_2", "data_3"] ) - params = {"initial_trend": np.array([[0.0, 1.0], [0.0, 2.0], [0.0, 3.0]])} + params = {"level_trend_initial": np.array([[0.0, 1.0], [0.0, 2.0], [0.0, 3.0]])} x, y = simulate_from_numpy_model(mod, rng, params) assert (np.diff(y, axis=0) == np.array([[1.0, 2.0, 3.0]])).all().all() assert (np.diff(x, axis=0) == np.array([[1.0, 0.0, 2.0, 0.0, 3.0, 0.0]])).all().all() + + +def test_add_level_trend_with_different_observed(): + mod_1 = st.LevelTrendComponent( + name="ll", order=2, innovations_order=[0, 1], observed_state_names=["data_1"] + ) + mod_2 = st.LevelTrendComponent( + name="grw", order=1, innovations_order=[1], observed_state_names=["data_2"] + ) + + mod = (mod_1 + mod_2).build(verbose=False) + assert mod.k_endog == 2 + assert mod.k_states == 3 + assert mod.k_posdef == 2 + + assert mod.coords["ll_state"] == ["level", "trend"] + assert mod.coords["grw_state"] == ["level"] + + assert mod.state_names == ["level[data_1]", "trend[data_1]", "level[data_2]"] + assert mod.shock_names == ["trend_shock[data_1]", "level_shock[data_2]"] + + Z, T, R = pytensor.function( + [], [mod.ssm["design"], mod.ssm["transition"], mod.ssm["selection"]], mode="FAST_COMPILE" + )() + + np.testing.assert_allclose( + Z, + np.array( + [ + [1.0, 0.0, 0.0], + [0.0, 0.0, 1.0], + ] + ), + ) + + np.testing.assert_allclose( + T, + np.array( + [ + [1.0, 1.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 0.0, 1.0], + ] + ), + ) + + np.testing.assert_allclose( + R, + np.array( + [ + [0.0, 0.0], + [1.0, 0.0], + [0.0, 1.0], + ] + ), + ) diff --git a/tests/statespace/models/structural/test_against_statsmodels.py b/tests/statespace/models/structural/test_against_statsmodels.py index 3495ecc14..98318711f 100644 --- a/tests/statespace/models/structural/test_against_statsmodels.py +++ b/tests/statespace/models/structural/test_against_statsmodels.py @@ -220,7 +220,7 @@ def create_structural_model_and_equivalent_statsmodel( if level: level_trend_order[0] = 1 - expected_coords["trend_state"] += [ + expected_coords["level_state"] += [ "level", ] expected_coords[ALL_STATE_DIM] += [ @@ -231,7 +231,7 @@ def create_structural_model_and_equivalent_statsmodel( ] if stochastic_level: level_trend_innov_order[0] = 1 - expected_coords["trend_shock"] += ["level"] + expected_coords["level_shock"] += ["level"] expected_coords[SHOCK_DIM] += [ "level", ] @@ -241,7 +241,7 @@ def create_structural_model_and_equivalent_statsmodel( if trend: level_trend_order[1] = 1 - expected_coords["trend_state"] += [ + expected_coords["level_state"] += [ "trend", ] expected_coords[ALL_STATE_DIM] += [ @@ -253,12 +253,12 @@ def create_structural_model_and_equivalent_statsmodel( if stochastic_trend: level_trend_innov_order[1] = 1 - expected_coords["trend_shock"] += ["trend"] + expected_coords["level_shock"] += ["trend"] expected_coords[SHOCK_DIM] += ["trend"] expected_coords[SHOCK_AUX_DIM] += ["trend"] if level or trend: - expected_param_dims["initial_trend"] += ("trend_state",) + expected_param_dims["level_initial"] += ("level_state",) level_value = np.where( level_trend_order, rng.normal( @@ -272,13 +272,13 @@ def create_structural_model_and_equivalent_statsmodel( max_order = np.flatnonzero(level_value)[-1].item() + 1 level_trend_order = level_trend_order[:max_order] - params["initial_trend"] = level_value[:max_order] + params["level_initial"] = level_value[:max_order] sm_init["level"] = level_value[0] sm_init["trend"] = level_value[1] if sum(level_trend_innov_order) > 0: - expected_param_dims["sigma_trend"] += ("trend_shock",) - params["sigma_trend"] = np.sqrt(sigma_level_value2) + expected_param_dims["level_sigma"] += ("level_shock",) + params["level_sigma"] = np.sqrt(sigma_level_value2) sigma_level_value = sigma_level_value2.tolist() if stochastic_level: From b9322559ab061fefab5e8f19d8b5dbff5b82f42e Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 12:52:46 +0800 Subject: [PATCH 15/21] Update tests to new names --- tests/statespace/core/test_statespace.py | 16 +++++++++------- tests/statespace/utils/test_coord_assignment.py | 12 ++++++------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/statespace/core/test_statespace.py b/tests/statespace/core/test_statespace.py index bfcd114ae..947069d86 100644 --- a/tests/statespace/core/test_statespace.py +++ b/tests/statespace/core/test_statespace.py @@ -167,7 +167,9 @@ def exog_pymc_mod(exog_ss_mod, exog_data): P0_diag = pm.Gamma("P0_diag", alpha=2, beta=4, dims=["state"]) P0 = pm.Deterministic("P0", pt.diag(P0_diag), dims=["state", "state_aux"]) - initial_trend = pm.Normal("initial_trend", mu=[0], sigma=[0.005], dims=["trend_state"]) + initial_trend = pm.Normal( + "level_trend_initial", mu=[0], sigma=[0.005], dims=["level_trend_state"] + ) data_exog = pm.Data( "data_exog", exog_data["x1"].values[:, None], dims=["time", "exog_state"] @@ -184,12 +186,12 @@ def pymc_mod_no_exog(ss_mod_no_exog, rng): y = pd.DataFrame(rng.normal(size=(100, 1)).astype(floatX), columns=["y"]) with pm.Model(coords=ss_mod_no_exog.coords) as m: - initial_trend = pm.Normal("initial_trend", dims=["trend_state"]) + initial_trend = pm.Normal("level_trend_initial", dims=["level_trend_state"]) P0_sigma = pm.Exponential("P0_sigma", 1) P0 = pm.Deterministic( "P0", pt.eye(ss_mod_no_exog.k_states) * P0_sigma, dims=["state", "state_aux"] ) - sigma_trend = pm.Exponential("sigma_trend", 1, dims=["trend_shock"]) + sigma_trend = pm.Exponential("level_trend_sigma", 1, dims=["level_trend_shock"]) ss_mod_no_exog.build_statespace_graph(y) return m @@ -204,12 +206,12 @@ def pymc_mod_no_exog_dt(ss_mod_no_exog_dt, rng): ) with pm.Model(coords=ss_mod_no_exog_dt.coords) as m: - initial_trend = pm.Normal("initial_trend", dims=["trend_state"]) + initial_trend = pm.Normal("level_trend_initial", dims=["level_trend_state"]) P0_sigma = pm.Exponential("P0_sigma", 1) P0 = pm.Deterministic( "P0", pt.eye(ss_mod_no_exog_dt.k_states) * P0_sigma, dims=["state", "state_aux"] ) - sigma_trend = pm.Exponential("sigma_trend", 1, dims=["trend_shock"]) + sigma_trend = pm.Exponential("level_trend_sigma", 1, dims=["level_trend_shock"]) ss_mod_no_exog_dt.build_statespace_graph(y) return m @@ -313,7 +315,7 @@ def test_build_statespace_graph_warns_if_data_has_nans(): ss_mod = st.LevelTrendComponent(order=1, innovations_order=0).build(verbose=False) with pm.Model() as pymc_mod: - initial_trend = pm.Normal("initial_trend", shape=(1,)) + initial_trend = pm.Normal("level_trend_initial", shape=(1,)) P0 = pm.Deterministic("P0", pt.eye(1, dtype=floatX)) with pytest.warns(pm.ImputationWarning): ss_mod.build_statespace_graph( @@ -326,7 +328,7 @@ def test_build_statespace_graph_raises_if_data_has_missing_fill(): ss_mod = st.LevelTrendComponent(order=1, innovations_order=0).build(verbose=False) with pm.Model() as pymc_mod: - initial_trend = pm.Normal("initial_trend", shape=(1,)) + initial_trend = pm.Normal("level_trend_initial", shape=(1,)) P0 = pm.Deterministic("P0", pt.eye(1, dtype=floatX)) with pytest.raises(ValueError, match="Provided data contains the value 1.0"): data = np.ones((10, 1), dtype=floatX) diff --git a/tests/statespace/utils/test_coord_assignment.py b/tests/statespace/utils/test_coord_assignment.py index a3b419914..fe846c4fe 100644 --- a/tests/statespace/utils/test_coord_assignment.py +++ b/tests/statespace/utils/test_coord_assignment.py @@ -80,8 +80,8 @@ def _create_model(f): dims="state", ) P0 = pm.Deterministic("P0", pt.diag(P0_diag), dims=("state", "state_aux")) - initial_trend = pm.Normal("initial_trend", dims="trend_state") - sigma_trend = pm.Exponential("sigma_trend", 1, dims="trend_shock") + initial_trend = pm.Normal("level_trend_initial", dims="level_trend_state") + sigma_trend = pm.Exponential("level_trend_sigma", 1, dims="level_trend_shock") ss_mod.build_statespace_graph(data, save_kalman_filter_outputs_in_idata=True) return mod @@ -103,8 +103,8 @@ def test_model_build_without_coords(load_dataset): with pm.Model() as mod: P0_diag = pm.Exponential("P0_diag", 1, shape=(2,)) P0 = pm.Deterministic("P0", pt.diag(P0_diag)) - initial_trend = pm.Normal("initial_trend", shape=(2,)) - sigma_trend = pm.Exponential("sigma_trend", 1, shape=(2,)) + initial_trend = pm.Normal("level_trend_initial", shape=(2,)) + sigma_trend = pm.Exponential("level_trend_sigma", 1, shape=(2,)) ss_mod.build_statespace_graph(data, register_data=False) assert mod.coords == {} @@ -131,8 +131,8 @@ def make_model(index): P0_diag = pm.Gamma("P0_diag", alpha=5, beta=5) P0 = pm.Deterministic("P0", pt.eye(ss_mod.k_states) * P0_diag, dims=P0_dims) - initial_trend = pm.Normal("initial_trend", dims=initial_trend_dims) - sigma_trend = pm.Gamma("sigma_trend", alpha=2, beta=50, dims=sigma_trend_dims) + initial_trend = pm.Normal("level_trend_initial", dims=initial_trend_dims) + sigma_trend = pm.Gamma("level_trend_sigma", alpha=2, beta=50, dims=sigma_trend_dims) with pytest.warns(UserWarning, match="No time index found on the supplied data"): ss_mod.build_statespace_graph( From 6debd2375db833c511b5ddefc2069d4853ae1c46 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 13:08:33 +0800 Subject: [PATCH 16/21] More test updates --- tests/statespace/filters/test_distributions.py | 12 ++++++------ .../models/structural/components/test_level_trend.py | 2 +- tests/statespace/models/structural/test_core.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/statespace/filters/test_distributions.py b/tests/statespace/filters/test_distributions.py index 1958d0bf0..383257196 100644 --- a/tests/statespace/filters/test_distributions.py +++ b/tests/statespace/filters/test_distributions.py @@ -52,8 +52,8 @@ def pymc_model(data): data = pm.Data("data", data.values) P0_diag = pm.Exponential("P0_diag", 1, shape=(2,)) P0 = pm.Deterministic("P0", pt.diag(P0_diag)) - initial_trend = pm.Normal("initial_trend", shape=(2,)) - sigma_trend = pm.Exponential("sigma_trend", 1, shape=(2,)) + initial_trend = pm.Normal("level_trend_initial", shape=(2,)) + sigma_trend = pm.Exponential("level_trend_sigma", 1, shape=(2,)) return mod @@ -69,8 +69,8 @@ def pymc_model_2(data): with pm.Model(coords=coords) as mod: P0_diag = pm.Exponential("P0_diag", 1, shape=(2,)) P0 = pm.Deterministic("P0", pt.diag(P0_diag)) - initial_trend = pm.Normal("initial_trend", shape=(2,)) - sigma_trend = pm.Exponential("sigma_trend", 1, shape=(2,)) + initial_trend = pm.Normal("level_trend_initial", shape=(2,)) + sigma_trend = pm.Exponential("level_trend_sigma", 1, shape=(2,)) sigma_me = pm.Exponential("sigma_error", 1) return mod @@ -207,8 +207,8 @@ def test_lgss_with_time_varying_inputs(output_name, rng): exog_data = pm.Data("data_exog", X) P0_diag = pm.Exponential("P0_diag", 1, shape=(mod.k_states,)) P0 = pm.Deterministic("P0", pt.diag(P0_diag)) - initial_trend = pm.Normal("initial_trend", shape=(2,)) - sigma_trend = pm.Exponential("sigma_trend", 1, shape=(2,)) + initial_trend = pm.Normal("level_trend_initial", shape=(2,)) + sigma_trend = pm.Exponential("level_trend_sigma", 1, shape=(2,)) beta_exog = pm.Normal("beta_exog", shape=(3,)) mod._insert_random_variables() diff --git a/tests/statespace/models/structural/components/test_level_trend.py b/tests/statespace/models/structural/components/test_level_trend.py index 45503912a..37f8c14a2 100644 --- a/tests/statespace/models/structural/components/test_level_trend.py +++ b/tests/statespace/models/structural/components/test_level_trend.py @@ -45,7 +45,7 @@ def test_level_trend_multiple_observed_construction(): "level[data_3]", "trend[data_3]", ] - assert mod.shock_names == ["level_shock[data_1]", "level_shock[data_2]", "level_shock[data_3]"] + assert mod.shock_names == ["level[data_1]", "level[data_2]", "level[data_3]"] Z, T, R = pytensor.function( [], [mod.ssm["design"], mod.ssm["transition"], mod.ssm["selection"]], mode="FAST_COMPILE" diff --git a/tests/statespace/models/structural/test_core.py b/tests/statespace/models/structural/test_core.py index 46115b659..50059bd43 100644 --- a/tests/statespace/models/structural/test_core.py +++ b/tests/statespace/models/structural/test_core.py @@ -22,8 +22,8 @@ def test_add_components(): mod = ll + se ll_params = { - "initial_trend": np.zeros(2, dtype=floatX), - "sigma_trend": np.ones(2, dtype=floatX), + "level_trend_initial": np.zeros(2, dtype=floatX), + "level_trend_sigma": np.ones(2, dtype=floatX), } se_params = { "seasonal_coefs": np.ones(11, dtype=floatX), @@ -93,8 +93,8 @@ def test_extract_components_from_idata(rng): x0 = pm.Normal("x0", dims=["state"]) P0 = pm.Deterministic("P0", pt.eye(mod.k_states), dims=["state", "state_aux"]) beta_exog = pm.Normal("beta_exog", dims=["exog_state"]) - initial_trend = pm.Normal("initial_trend", dims=["trend_state"]) - sigma_trend = pm.Exponential("sigma_trend", 1, dims=["trend_shock"]) + initial_trend = pm.Normal("level_trend_initial", dims=["level_trend_state"]) + sigma_trend = pm.Exponential("level_trend_sigma", 1, dims=["level_trend_shock"]) seasonal_coefs = pm.Normal("seasonal", dims=["seasonal_state"]) sigma_obs = pm.Exponential("sigma_obs", 1) From fbc61a14bf2adf47d056a4bad587abfeea991dc8 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 13:25:37 +0800 Subject: [PATCH 17/21] Delay dropping data names from states/coords until `.build` --- .../structural/components/level_trend.py | 6 ++-- .../statespace/models/structural/core.py | 34 +++++++++++++++---- .../structural/components/test_level_trend.py | 2 +- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/level_trend.py b/pymc_extras/statespace/models/structural/components/level_trend.py index 7e301e8d8..c9077f7b0 100644 --- a/pymc_extras/statespace/models/structural/components/level_trend.py +++ b/pymc_extras/statespace/models/structural/components/level_trend.py @@ -168,9 +168,7 @@ def populate_component_properties(self): self.param_names = [f"{self.name}_initial"] base_names = [name for name, mask in zip(name_slice, self._order_mask) if mask] self.state_names = [ - f"{name}[{obs_name}]" if k_endog > 1 else name - for obs_name in self.observed_state_names - for name in base_names + f"{name}[{obs_name}]" for obs_name in self.observed_state_names for name in base_names ] self.param_dims = {f"{self.name}_initial": (f"{self.name}_state",)} self.coords = {f"{self.name}_state": base_names} @@ -193,7 +191,7 @@ def populate_component_properties(self): name for name, mask in zip(name_slice, self.innovations_order) if mask ] self.shock_names = [ - f"{name}[{obs_name}]" if k_endog > 1 else name + f"{name}[{obs_name}]" for obs_name in self.observed_state_names for name in shock_base_names ] diff --git a/pymc_extras/statespace/models/structural/core.py b/pymc_extras/statespace/models/structural/core.py index d5fbe4e6e..418f57123 100644 --- a/pymc_extras/statespace/models/structural/core.py +++ b/pymc_extras/statespace/models/structural/core.py @@ -76,16 +76,19 @@ def __init__( param_names, param_dims, param_info = self._add_inital_state_cov_to_properties( param_names, param_dims, param_info, k_states ) - self._state_names = state_names.copy() - self._data_names = data_names.copy() - self._shock_names = shock_names.copy() - self._param_names = param_names.copy() - self._param_dims = param_dims.copy() + + self._state_names = self._strip_data_names_if_unambiguous(state_names, k_endog) + self._data_names = self._strip_data_names_if_unambiguous(data_names, k_endog) + self._shock_names = self._strip_data_names_if_unambiguous(shock_names, k_endog) + self._param_names = self._strip_data_names_if_unambiguous(param_names, k_endog) + self._param_dims = param_dims default_coords = make_default_coords(self) coords.update(default_coords) - self._coords = coords + self._coords = { + k: self._strip_data_names_if_unambiguous(v, k_endog) for k, v in coords.items() + } self._param_info = param_info.copy() self._data_info = data_info.copy() self.measurement_error = measurement_error @@ -122,6 +125,25 @@ def __init__( P0 = self.make_and_register_variable("P0", shape=(self.k_states, self.k_states)) self.ssm["initial_state_cov"] = P0 + def _strip_data_names_if_unambiguous(self, names: list[str], k_endog: int): + """ + State names from components should always be of the form name[data_name], in the case that the component is + associated with multiple observed states. Not doing so leads to ambiguity -- we might have two level states, + but which goes to which observed component? So we set `level[data_1]` and `level[data_2]`. + + In cases where there is only one observed state (when k_endog == 1), we can strip the data part and just use + the state name. This is a bit cleaner. + """ + if k_endog == 1: + [data_name] = self.observed_states + return [ + name.replace(f"[{data_name}]", "") if isinstance(name, str) else name + for name in names + ] + + else: + return names + @staticmethod def _add_inital_state_cov_to_properties(param_names, param_dims, param_info, k_states): param_names += ["P0"] diff --git a/tests/statespace/models/structural/components/test_level_trend.py b/tests/statespace/models/structural/components/test_level_trend.py index 37f8c14a2..c8a9c419a 100644 --- a/tests/statespace/models/structural/components/test_level_trend.py +++ b/tests/statespace/models/structural/components/test_level_trend.py @@ -119,7 +119,7 @@ def test_add_level_trend_with_different_observed(): assert mod.coords["grw_state"] == ["level"] assert mod.state_names == ["level[data_1]", "trend[data_1]", "level[data_2]"] - assert mod.shock_names == ["trend_shock[data_1]", "level_shock[data_2]"] + assert mod.shock_names == ["trend[data_1]", "level[data_2]"] Z, T, R = pytensor.function( [], [mod.ssm["design"], mod.ssm["transition"], mod.ssm["selection"]], mode="FAST_COMPILE" From 85b78fe375373cec604f0c324e99a74ceac105d3 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 13:38:51 +0800 Subject: [PATCH 18/21] Remove docstring typo --- .../statespace/models/structural/components/level_trend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc_extras/statespace/models/structural/components/level_trend.py b/pymc_extras/statespace/models/structural/components/level_trend.py index c9077f7b0..4a8418543 100644 --- a/pymc_extras/statespace/models/structural/components/level_trend.py +++ b/pymc_extras/statespace/models/structural/components/level_trend.py @@ -12,7 +12,6 @@ class LevelTrendComponent(Component): Parameters ---------- - __________ order : int Number of time derivatives of the trend to include in the model. For example, when order=3, the trend will From a6327b77b78d18b0ac335a8c221f60dcadba9855 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 18:16:30 +0800 Subject: [PATCH 19/21] Update autoregressive component and tests --- .../structural/components/autoregressive.py | 29 +++-- .../components/test_autoregressive.py | 113 ++++++++++++++++-- .../statespace/models/structural/conftest.py | 1 + .../structural/test_against_statsmodels.py | 8 +- 4 files changed, 123 insertions(+), 28 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/autoregressive.py b/pymc_extras/statespace/models/structural/components/autoregressive.py index 441913fe9..a0dfbdff2 100644 --- a/pymc_extras/statespace/models/structural/components/autoregressive.py +++ b/pymc_extras/statespace/models/structural/components/autoregressive.py @@ -65,7 +65,7 @@ class AutoregressiveComponent(Component): def __init__( self, order: int = 1, - name: str = "AutoRegressive", + name: str = "auto_regressive", observed_state_names: list[str] | None = None, ): if observed_state_names is None: @@ -92,27 +92,30 @@ def __init__( ) def populate_component_properties(self): + k_states = self.k_states // self.k_endog + self.state_names = [ - f"L{i + 1}.{state_name}" - for i in range(self.k_states) + f"L{i + 1}[{state_name}]" for state_name in self.observed_state_names + for i in range(k_states) ] - self.shock_names = [f"{name}_{self.name}_innovation" for name in self.observed_state_names] - self.param_names = ["ar_params", "sigma_ar"] - self.param_dims = {"ar_params": (AR_PARAM_DIM,)} - self.coords = {AR_PARAM_DIM: self.ar_lags.tolist()} + + self.shock_names = self.observed_state_names.copy() + self.param_names = [f"{self.name}_params", f"{self.name}_sigma"] + self.param_dims = {f"{self.name}_params": (f"{self.name}_lag",)} + self.coords = {f"{self.name}_lag": self.ar_lags.tolist()} if self.k_endog > 1: - self.param_dims["ar_params"] = ( + self.param_dims[f"{self.name}_params"] = ( f"{self.name}_endog", AR_PARAM_DIM, ) - self.param_dims["sigma_ar"] = (f"{self.name}_endog",) + self.param_dims[f"{self.name}_sigma"] = (f"{self.name}_endog",) self.coords[f"{self.name}_endog"] = self.observed_state_names self.param_info = { - "ar_params": { + f"{self.name}_params": { "shape": (self.k_states,) if self.k_endog == 1 else (self.k_endog, self.k_states), "constraints": None, "dims": (AR_PARAM_DIM,) @@ -122,7 +125,7 @@ def populate_component_properties(self): AR_PARAM_DIM, ), }, - "sigma_ar": { + f"{self.name}_sigma": { "shape": () if self.k_endog == 1 else (self.k_endog,), "constraints": "Positive", "dims": None if self.k_endog == 1 else (f"{self.name}_endog",), @@ -136,10 +139,10 @@ def make_symbolic_graph(self) -> None: k_nonzero = int(sum(self.order)) ar_params = self.make_and_register_variable( - "ar_params", shape=(k_nonzero,) if k_endog == 1 else (k_endog, k_nonzero) + f"{self.name}_params", shape=(k_nonzero,) if k_endog == 1 else (k_endog, k_nonzero) ) sigma_ar = self.make_and_register_variable( - "sigma_ar", shape=() if k_endog == 1 else (k_endog,) + f"{self.name}_sigma", shape=() if k_endog == 1 else (k_endog,) ) if k_endog == 1: diff --git a/tests/statespace/models/structural/components/test_autoregressive.py b/tests/statespace/models/structural/components/test_autoregressive.py index 21234aa2a..34458905a 100644 --- a/tests/statespace/models/structural/components/test_autoregressive.py +++ b/tests/statespace/models/structural/components/test_autoregressive.py @@ -1,8 +1,10 @@ import numpy as np +import pytensor import pytest from numpy.testing import assert_allclose from pytensor import config +from pytensor.graph.basic import explicit_graph_inputs from pymc_extras.statespace.models import structural as st from tests.statespace.models.structural.conftest import _assert_basic_coords_correct @@ -11,29 +13,44 @@ @pytest.mark.parametrize("order", [1, 2, [1, 0, 1]], ids=["AR1", "AR2", "AR(1,0,1)"]) def test_autoregressive_model(order, rng): - ar = st.AutoregressiveComponent(order=order) + k = sum(order) if isinstance(order, list) else order + ar = st.AutoregressiveComponent(order=order).build(verbose=False) params = { - "ar_params": np.full((sum(ar.order),), 0.5, dtype=config.floatX), - "sigma_ar": 0.0, + "auto_regressive_params": np.full((k,), 0.5, dtype=config.floatX), + "auto_regressive_sigma": 0.1, + "initial_state_cov": np.eye(k), } - x, y = simulate_from_numpy_model(ar, rng, params, steps=100) - # Check coords - ar.build(verbose=False) _assert_basic_coords_correct(ar) + lags = np.arange(len(order) if isinstance(order, list) else order, dtype="int") + 1 if isinstance(order, list): lags = lags[np.flatnonzero(order)] - assert_allclose(ar.coords["ar_lag"], lags) + assert_allclose(ar.coords["auto_regressive_lag"], lags) -def test_autoregressive_multiple_observed(rng): +def test_autoregressive_multiple_observed_build(rng): ar = st.AutoregressiveComponent(order=3, observed_state_names=["data_1", "data_2"]) mod = ar.build(verbose=False) + assert mod.k_endog == 2 + assert mod.k_states == 6 + assert mod.k_posdef == 2 + + assert mod.state_names == [ + "L1[data_1]", + "L2[data_1]", + "L3[data_1]", + "L1[data_2]", + "L2[data_2]", + "L3[data_2]", + ] + + assert mod.shock_names == ["data_1", "data_2"] + params = { - "ar_params": np.full( + "auto_regressive_params": np.full( ( 2, sum(ar.order), @@ -41,7 +58,81 @@ def test_autoregressive_multiple_observed(rng): 0.5, dtype=config.floatX, ), - "sigma_ar": np.ones((2,)) * 1e-3, + "auto_regressive_sigma": np.array([0.05, 0.12]), } + _, _, _, _, T, Z, R, _, Q = mod._unpack_statespace_with_placeholders() + input_vars = explicit_graph_inputs([T, Z, R, Q]) + fn = pytensor.function( + inputs=list(input_vars), + outputs=[T, Z, R, Q], + mode="FAST_COMPILE", + ) + + T, Z, R, Q = fn(**params) + + np.testing.assert_allclose( + T, + np.array( + [ + [0.5, 0.5, 0.5, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.5, 0.5, 0.5], + [0.0, 0.0, 0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 1.0, 0.0], + ] + ), + ) + + np.testing.assert_allclose( + Z, np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0]]) + ) + + np.testing.assert_allclose( + R, np.array([[1.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [0.0, 0.0], [0.0, 0.0]]) + ) + + np.testing.assert_allclose(Q, np.diag([0.05**2, 0.12**2])) + + +def test_autoregressive_multiple_observed_data(rng): + ar = st.AutoregressiveComponent(order=1, observed_state_names=["data_1", "data_2", "data_3"]) + mod = ar.build(verbose=False) + + params = { + "auto_regressive_params": np.array([0.9, 0.8, 0.5]).reshape((3, 1)), + "auto_regressive_sigma": np.array([0.05, 0.12, 0.22]), + "initial_state_cov": np.eye(3), + } + + # Recover the AR(1) coefficients from the simulated data via OLS + x, y = simulate_from_numpy_model(mod, rng, params, steps=2000) + for i in range(3): + ols_coefs = np.polyfit(y[:-1, i], y[1:, i], 1) + np.testing.assert_allclose(ols_coefs[0], params["auto_regressive_params"][i, 0], atol=1e-1) + + +def test_add_autoregressive_different_observed(): + mod_1 = st.AutoregressiveComponent(order=1, name="ar1", observed_state_names=["data_1"]) + mod_2 = st.AutoregressiveComponent(name="ar6", order=6, observed_state_names=["data_2"]) + + mod = (mod_1 + mod_2).build(verbose=False) + + print(mod.coords) + + assert mod.k_endog == 2 + assert mod.k_states == 7 + assert mod.k_posdef == 2 + assert mod.state_names == [ + "L1[data_1]", + "L1[data_2]", + "L2[data_2]", + "L3[data_2]", + "L4[data_2]", + "L5[data_2]", + "L6[data_2]", + ] - x, y = simulate_from_numpy_model(ar, rng, params, steps=100) + assert mod.shock_names == ["data_1", "data_2"] + assert mod.coords["ar1_lag"] == [1] + assert mod.coords["ar6_lag"] == [1, 2, 3, 4, 5, 6] diff --git a/tests/statespace/models/structural/conftest.py b/tests/statespace/models/structural/conftest.py index c5f2396bc..b9e58ca68 100644 --- a/tests/statespace/models/structural/conftest.py +++ b/tests/statespace/models/structural/conftest.py @@ -24,5 +24,6 @@ def _assert_basic_coords_correct(mod): assert mod.coords[SHOCK_DIM] == mod.shock_names assert mod.coords[SHOCK_AUX_DIM] == mod.shock_names expected_obs = mod.observed_state_names if hasattr(mod, "observed_state_names") else ["data"] + assert mod.coords[OBS_STATE_DIM] == expected_obs assert mod.coords[OBS_STATE_AUX_DIM] == expected_obs diff --git a/tests/statespace/models/structural/test_against_statsmodels.py b/tests/statespace/models/structural/test_against_statsmodels.py index 98318711f..6909e3caa 100644 --- a/tests/statespace/models/structural/test_against_statsmodels.py +++ b/tests/statespace/models/structural/test_against_statsmodels.py @@ -404,20 +404,20 @@ def create_structural_model_and_equivalent_statsmodel( components.append(comp) if autoregressive is not None: - ar_names = [f"L{i+1}.data" for i in range(autoregressive)] + ar_names = [f"L{i+1}" for i in range(autoregressive)] ar_params = rng.normal(size=(autoregressive,)).astype(floatX) if autoregressive == 1: ar_params = ar_params.item() sigma2 = np.abs(rng.normal()).astype(floatX) params["ar_params"] = ar_params - params["sigma_ar"] = np.sqrt(sigma2) + params["ar_sigma"] = np.sqrt(sigma2) expected_param_dims["ar_params"] += (AR_PARAM_DIM,) expected_coords[AR_PARAM_DIM] += tuple(list(range(1, autoregressive + 1))) expected_coords[ALL_STATE_DIM] += ar_names expected_coords[ALL_STATE_AUX_DIM] += ar_names - expected_coords[SHOCK_DIM] += ["data_ar_innovation"] - expected_coords[SHOCK_AUX_DIM] += ["data_ar_innovation"] + expected_coords[SHOCK_DIM] += ["data"] + expected_coords[SHOCK_AUX_DIM] += ["data"] sm_params["sigma2.ar"] = sigma2 for i, rho in enumerate(ar_params): From 0b20dbc332753ca863f24fd7f2e78b89a0748b16 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 22:36:57 +0800 Subject: [PATCH 20/21] Add component name to shock state names --- .../models/structural/components/autoregressive.py | 2 +- .../structural/components/test_autoregressive.py | 10 ++-------- .../models/structural/test_against_statsmodels.py | 4 ++-- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/autoregressive.py b/pymc_extras/statespace/models/structural/components/autoregressive.py index a0dfbdff2..0a3dd0586 100644 --- a/pymc_extras/statespace/models/structural/components/autoregressive.py +++ b/pymc_extras/statespace/models/structural/components/autoregressive.py @@ -100,7 +100,7 @@ def populate_component_properties(self): for i in range(k_states) ] - self.shock_names = self.observed_state_names.copy() + self.shock_names = [f"{self.name}[{obs_name}]" for obs_name in self.observed_state_names] self.param_names = [f"{self.name}_params", f"{self.name}_sigma"] self.param_dims = {f"{self.name}_params": (f"{self.name}_lag",)} self.coords = {f"{self.name}_lag": self.ar_lags.tolist()} diff --git a/tests/statespace/models/structural/components/test_autoregressive.py b/tests/statespace/models/structural/components/test_autoregressive.py index 34458905a..71a181925 100644 --- a/tests/statespace/models/structural/components/test_autoregressive.py +++ b/tests/statespace/models/structural/components/test_autoregressive.py @@ -13,13 +13,7 @@ @pytest.mark.parametrize("order", [1, 2, [1, 0, 1]], ids=["AR1", "AR2", "AR(1,0,1)"]) def test_autoregressive_model(order, rng): - k = sum(order) if isinstance(order, list) else order ar = st.AutoregressiveComponent(order=order).build(verbose=False) - params = { - "auto_regressive_params": np.full((k,), 0.5, dtype=config.floatX), - "auto_regressive_sigma": 0.1, - "initial_state_cov": np.eye(k), - } # Check coords _assert_basic_coords_correct(ar) @@ -47,7 +41,7 @@ def test_autoregressive_multiple_observed_build(rng): "L3[data_2]", ] - assert mod.shock_names == ["data_1", "data_2"] + assert mod.shock_names == ["auto_regressive[data_1]", "auto_regressive[data_2]"] params = { "auto_regressive_params": np.full( @@ -133,6 +127,6 @@ def test_add_autoregressive_different_observed(): "L6[data_2]", ] - assert mod.shock_names == ["data_1", "data_2"] + assert mod.shock_names == ["ar1[data_1]", "ar6[data_2]"] assert mod.coords["ar1_lag"] == [1] assert mod.coords["ar6_lag"] == [1, 2, 3, 4, 5, 6] diff --git a/tests/statespace/models/structural/test_against_statsmodels.py b/tests/statespace/models/structural/test_against_statsmodels.py index 6909e3caa..1db4350b5 100644 --- a/tests/statespace/models/structural/test_against_statsmodels.py +++ b/tests/statespace/models/structural/test_against_statsmodels.py @@ -416,8 +416,8 @@ def create_structural_model_and_equivalent_statsmodel( expected_coords[AR_PARAM_DIM] += tuple(list(range(1, autoregressive + 1))) expected_coords[ALL_STATE_DIM] += ar_names expected_coords[ALL_STATE_AUX_DIM] += ar_names - expected_coords[SHOCK_DIM] += ["data"] - expected_coords[SHOCK_AUX_DIM] += ["data"] + expected_coords[SHOCK_DIM] += ["ar"] + expected_coords[SHOCK_AUX_DIM] += ["ar"] sm_params["sigma2.ar"] = sigma2 for i, rho in enumerate(ar_params): From a8564b74fd603056c5281dd4e5a3e055b37918af Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 6 Jul 2025 23:13:34 +0800 Subject: [PATCH 21/21] Allow multiple observed in TimeSeasonality component --- .../structural/components/seasonality.py | 57 ++++-- .../structural/components/test_seasonality.py | 169 +++++++++++++++++- 2 files changed, 207 insertions(+), 19 deletions(-) diff --git a/pymc_extras/statespace/models/structural/components/seasonality.py b/pymc_extras/statespace/models/structural/components/seasonality.py index 20f47636f..33135affd 100644 --- a/pymc_extras/statespace/models/structural/components/seasonality.py +++ b/pymc_extras/statespace/models/structural/components/seasonality.py @@ -154,27 +154,41 @@ def __init__( # TODO: Can this be stashed and reconstructed automatically somehow? state_names.pop(0) + self.provided_state_names = state_names + k_states = season_length - int(self.remove_first_state) + k_endog = len(observed_state_names) + k_posdef = int(innovations) super().__init__( name=name, - k_endog=len(observed_state_names), - k_states=k_states, - k_posdef=int(innovations), - state_names=state_names, + k_endog=k_endog, + k_states=k_states * k_endog, + k_posdef=k_posdef * k_endog, observed_state_names=observed_state_names, measurement_error=False, combine_hidden_states=True, - obs_state_idxs=np.r_[[1.0], np.zeros(k_states - 1)], + obs_state_idxs=np.tile(np.array([1.0] + [0.0] * (k_states - 1)), k_endog), ) def populate_component_properties(self): + k_states = self.k_states // self.k_endog + k_endog = self.k_endog + + self.state_names = [ + f"{state_name}[{endog_name}]" + for endog_name in self.observed_state_names + for state_name in self.provided_state_names + ] self.param_names = [f"{self.name}_coefs"] + self.param_info = { f"{self.name}_coefs": { - "shape": (self.k_states,), + "shape": (k_states,) if k_endog == 1 else (k_endog, k_states), "constraints": None, - "dims": (f"{self.name}_state",), + "dims": (f"{self.name}_state",) + if k_endog == 1 + else (f"{self.name}_endog", f"{self.name}_state"), } } self.param_dims = {f"{self.name}_coefs": (f"{self.name}_state",)} @@ -187,32 +201,41 @@ def populate_component_properties(self): "constraints": "Positive", "dims": None, } - self.shock_names = [f"{self.name}"] + self.shock_names = [f"{self.name}[{name}]" for name in self.observed_state_names] def make_symbolic_graph(self) -> None: + k_states = self.k_states // self.k_endog + k_posdef = self.k_posdef // self.k_endog + k_endog = self.k_endog + if self.remove_first_state: # In this case, parameters are normalized to sum to zero, so the current state is the negative sum of # all previous states. - T = np.eye(self.k_states, k=-1) + T = np.eye(k_states, k=-1) T[0, :] = -1 else: # In this case we assume the user to be responsible for ensuring the states sum to zero, so T is just a # circulant matrix that cycles between the states. - T = np.eye(self.k_states, k=1) + T = np.eye(k_states, k=1) T[-1, 0] = 1 - self.ssm["transition", :, :] = T - self.ssm["design", 0, 0] = 1 + self.ssm["transition", :, :] = pt.linalg.block_diag(*[T for _ in range(k_endog)]) + + Z = pt.zeros((1, k_states))[0, 0].set(1) + self.ssm["design", :, :] = pt.linalg.block_diag(*[Z for _ in range(k_endog)]) initial_states = self.make_and_register_variable( - f"{self.name}_coefs", shape=(self.k_states,) + f"{self.name}_coefs", shape=(k_states,) if k_endog == 1 else (k_endog, k_states) ) - self.ssm["initial_state", np.arange(self.k_states, dtype=int)] = initial_states + self.ssm["initial_state", :] = initial_states.ravel() if self.innovations: - self.ssm["selection", 0, 0] = 1 - season_sigma = self.make_and_register_variable(f"sigma_{self.name}", shape=()) - cov_idx = ("state_cov", *np.diag_indices(1)) + R = pt.zeros((k_states, k_posdef))[0, 0].set(1.0) + self.ssm["selection", :, :] = pt.join(0, *[R for _ in range(k_endog)]) + season_sigma = self.make_and_register_variable( + f"sigma_{self.name}", shape=() if k_endog == 1 else (k_endog,) + ) + cov_idx = ("state_cov", *np.diag_indices(k_posdef * k_endog)) self.ssm[cov_idx] = season_sigma**2 diff --git a/tests/statespace/models/structural/components/test_seasonality.py b/tests/statespace/models/structural/components/test_seasonality.py index 61ad4b198..a62a85c0e 100644 --- a/tests/statespace/models/structural/components/test_seasonality.py +++ b/tests/statespace/models/structural/components/test_seasonality.py @@ -1,7 +1,9 @@ import numpy as np +import pytensor import pytest from pytensor import config +from pytensor.graph.basic import explicit_graph_inputs from pymc_extras.statespace.models import structural as st from tests.statespace.models.structural.conftest import _assert_basic_coords_correct @@ -35,7 +37,7 @@ def random_word(rng): x0[0] = 1 params = {"season_coefs": x0} - if mod.innovations: + if innovations: params["sigma_season"] = 0.0 x, y = simulate_from_numpy_model(mod, rng, params) @@ -44,12 +46,175 @@ def random_word(rng): assert_pattern_repeats(y, s, atol=ATOL, rtol=RTOL) # Check coords - mod.build(verbose=False) + mod = mod.build(verbose=False) _assert_basic_coords_correct(mod) test_slice = slice(1, None) if remove_first_state else slice(None) assert mod.coords["season_state"] == state_names[test_slice] +@pytest.mark.parametrize( + "remove_first_state", [True, False], ids=["remove_first_state", "keep_first_state"] +) +def test_time_seasonality_multiple_observed(rng, remove_first_state): + s = 3 + state_names = [f"state_{i}" for i in range(s)] + mod = st.TimeSeasonality( + season_length=s, + innovations=True, + name="season", + state_names=state_names, + observed_state_names=["data_1", "data_2"], + remove_first_state=remove_first_state, + ) + x0 = np.zeros((mod.k_endog, mod.k_states // mod.k_endog), dtype=config.floatX) + + expected_states = [ + f"state_{i}[data_{j}]" for j in range(1, 3) for i in range(int(remove_first_state), s) + ] + assert mod.state_names == expected_states + assert mod.shock_names == ["season[data_1]", "season[data_2]"] + + x0[0, 0] = 1 + x0[1, 0] = 2.0 + + params = {"season_coefs": x0, "sigma_season": np.array([0.0, 0.0], dtype=config.floatX)} + + x, y = simulate_from_numpy_model(mod, rng, params, steps=123) + assert_pattern_repeats(y[:, 0], s, atol=ATOL, rtol=RTOL) + assert_pattern_repeats(y[:, 1], s, atol=ATOL, rtol=RTOL) + + mod = mod.build(verbose=False) + x0, *_, T, Z, R, _, Q = mod._unpack_statespace_with_placeholders() + + input_vars = explicit_graph_inputs([x0, T, Z, R, Q]) + + fn = pytensor.function( + inputs=list(input_vars), + outputs=[x0, T, Z, R, Q], + mode="FAST_COMPILE", + ) + + params["sigma_season"] = np.array([0.1, 0.8], dtype=config.floatX) + x0, T, Z, R, Q = fn(**params) + + if remove_first_state: + expected_x0 = np.array([1.0, 0.0, 2.0, 0.0]) + + expected_T = np.array( + [ + [-1.0, -1.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, -1.0, -1.0], + [0.0, 0.0, 1.0, 0.0], + ] + ) + expected_R = np.array([[1.0, 1.0], [0.0, 0.0], [1.0, 1.0], [0.0, 0.0]]) + expected_Z = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]) + + else: + expected_x0 = np.array([1.0, 0.0, 0.0, 2.0, 0.0, 0.0]) + expected_T = np.array( + [ + [0.0, 1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 1.0], + [0.0, 0.0, 0.0, 1.0, 0.0, 0.0], + ] + ) + expected_R = np.array( + [[1.0, 1.0], [0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [0.0, 0.0], [0.0, 0.0]] + ) + expected_Z = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0]]) + + expected_Q = np.array([[0.1**2, 0.0], [0.0, 0.8**2]]) + + for matrix, expected in zip( + [x0, T, Z, R, Q], + [expected_x0, expected_T, expected_Z, expected_R, expected_Q], + ): + np.testing.assert_allclose(matrix, expected) + + +def test_add_two_time_seasonality_different_observed(rng): + mod1 = st.TimeSeasonality( + season_length=3, + innovations=True, + name="season1", + state_names=[f"state_{i}" for i in range(3)], + observed_state_names=["data_1"], + remove_first_state=False, + ) + mod2 = st.TimeSeasonality( + season_length=5, + innovations=True, + name="season2", + state_names=[f"state_{i}" for i in range(5)], + observed_state_names=["data_2"], + ) + + mod = (mod1 + mod2).build(verbose=False) + + params = { + "season1_coefs": np.array([1.0, 0.0, 0.0], dtype=config.floatX), + "season2_coefs": np.array([3.0, 0.0, 0.0, 0.0], dtype=config.floatX), + "sigma_season1": np.array(0.0, dtype=config.floatX), + "sigma_season2": np.array(0.0, dtype=config.floatX), + "initial_state_cov": np.eye(mod.k_states, dtype=config.floatX), + } + + x, y = simulate_from_numpy_model(mod, rng, params, steps=3 * 5 * 5) + assert_pattern_repeats(y[:, 0], 3, atol=ATOL, rtol=RTOL) + assert_pattern_repeats(y[:, 1], 5, atol=ATOL, rtol=RTOL) + + assert mod.state_names == [ + "state_0[data_1]", + "state_1[data_1]", + "state_2[data_1]", + "state_1[data_2]", + "state_2[data_2]", + "state_3[data_2]", + "state_4[data_2]", + ] + + assert mod.shock_names == ["season1[data_1]", "season2[data_2]"] + + x0, *_, T = mod._unpack_statespace_with_placeholders()[:5] + input_vars = explicit_graph_inputs([x0, T]) + fn = pytensor.function( + inputs=list(input_vars), + outputs=[x0, T], + mode="FAST_COMPILE", + ) + + x0, T = fn( + season1_coefs=np.array([1.0, 0.0, 0.0], dtype=config.floatX), + season2_coefs=np.array([3.0, 0.0, 0.0, 1.2], dtype=config.floatX), + ) + + np.testing.assert_allclose( + np.array([1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.2]), x0, atol=ATOL, rtol=RTOL + ) + + np.testing.assert_allclose( + np.array( + [ + [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, -1.0, -1.0, -1.0, -1.0], + [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0], + ] + ), + T, + atol=ATOL, + rtol=RTOL, + ) + + def get_shift_factor(s): s_str = str(s) if "." not in s_str: