Skip to content

Commit 25b0805

Browse files
Split idata utilities into idata.py
1 parent 23140a5 commit 25b0805

File tree

3 files changed

+677
-2
lines changed

3 files changed

+677
-2
lines changed
Lines changed: 378 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,378 @@
1+
from itertools import product
2+
from typing import Any, Literal
3+
4+
import arviz as az
5+
import numpy as np
6+
import pymc as pm
7+
import xarray as xr
8+
9+
from arviz import dict_to_dataset
10+
from better_optimize.constants import minimize_method
11+
from pymc.backends.arviz import coords_and_dims_for_inferencedata, find_constants, find_observations
12+
from pymc.blocking import RaveledVars
13+
from scipy.optimize import OptimizeResult
14+
from scipy.sparse.linalg import LinearOperator
15+
16+
17+
def make_unpacked_variable_names(names: list[str], model: pm.Model) -> list[str]:
18+
coords = model.coords
19+
initial_point = model.initial_point()
20+
21+
value_to_dim = {
22+
value.name: model.named_vars_to_dims.get(model.values_to_rvs[value].name, None)
23+
for value in model.value_vars
24+
}
25+
value_to_dim = {k: v for k, v in value_to_dim.items() if v is not None}
26+
27+
rv_to_dim = model.named_vars_to_dims
28+
dims_dict = rv_to_dim | value_to_dim
29+
30+
unpacked_variable_names = []
31+
for name in names:
32+
shape = initial_point[name].shape
33+
if shape:
34+
labels_by_dim = [
35+
coords[dim] if shape[i] == len(coords[dim]) else np.arange(shape[i])
36+
for i, dim in enumerate(dims_dict.get(name, [name]))
37+
]
38+
labels = product(*labels_by_dim)
39+
unpacked_variable_names.extend(
40+
[f"{name}[{','.join(map(str, label))}]" for label in labels]
41+
)
42+
else:
43+
unpacked_variable_names.extend([name])
44+
return unpacked_variable_names
45+
46+
47+
def map_results_to_inference_data(results: dict[str, Any], model: pm.Model | None = None):
48+
"""
49+
Convert a dictionary of results to an InferenceData object.
50+
51+
Parameters
52+
----------
53+
results: dict
54+
A dictionary containing the results to convert.
55+
model: Model, optional
56+
A PyMC model. If None, the model is taken from the current model context.
57+
58+
Returns
59+
-------
60+
idata: az.InferenceData
61+
An InferenceData object containing the results.
62+
"""
63+
model = pm.modelcontext(model)
64+
coords, dims = coords_and_dims_for_inferencedata(model)
65+
66+
idata = az.convert_to_inference_data(results, coords=coords, dims=dims)
67+
return idata
68+
69+
70+
def add_map_posterior_to_inference_data(
71+
idata: az.InferenceData,
72+
map_point: dict[str, float | int | np.ndarray],
73+
model: pm.Model | None = None,
74+
):
75+
"""
76+
Add the MAP point to an InferenceData object in the posterior group.
77+
78+
Unlike a typical posterior, the MAP point is a single point estimate rather than a distribution. As a result, it
79+
does not have a chain or draw dimension, and is stored as a single point in the posterior group.
80+
81+
Parameters
82+
----------
83+
idata: az.InferenceData
84+
An InferenceData object to which the MAP point will be added.
85+
map_point: dict
86+
A dictionary containing the MAP point estimates for each variable. The keys should be the variable names, and
87+
the values should be the corresponding MAP estimates.
88+
model: Model, optional
89+
A PyMC model. If None, the model is taken from the current model context.
90+
91+
Returns
92+
-------
93+
idata: az.InferenceData
94+
The provided InferenceData, with the MAP point added to the posterior group.
95+
"""
96+
97+
model = pm.modelcontext(model) if model is None else model
98+
coords, dims = coords_and_dims_for_inferencedata(model)
99+
initial_point = model.initial_point()
100+
101+
# The MAP point will have both the transformed and untransformed variables, so we need to ensure that
102+
# we have the correct dimensions for each variable.
103+
var_name_to_value_name = {
104+
rv.name: value.name
105+
for rv, value in model.rvs_to_values.items()
106+
if rv not in model.observed_RVs
107+
}
108+
dims.update(
109+
{
110+
value_name: dims[var_name]
111+
for var_name, value_name in var_name_to_value_name.items()
112+
if var_name in dims and (initial_point[value_name].shape == map_point[var_name].shape)
113+
}
114+
)
115+
116+
idata = az.from_dict(
117+
{k: np.expand_dims(v, (0, 1)) for k, v in map_point.items()}, coords=coords, dims=dims
118+
)
119+
120+
return idata
121+
122+
123+
def add_fit_to_inference_data(
124+
idata: az.InferenceData, mu: RaveledVars, H_inv: np.ndarray, model: pm.Model | None = None
125+
) -> az.InferenceData:
126+
"""
127+
Add the mean vector and covariance matrix of the Laplace approximation to an InferenceData object.
128+
129+
Parameters
130+
----------
131+
idata: az.InfereceData
132+
An InferenceData object containing the approximated posterior samples.
133+
mu: RaveledVars
134+
The MAP estimate of the model parameters.
135+
H_inv: np.ndarray
136+
The inverse Hessian matrix of the log-posterior evaluated at the MAP estimate.
137+
model: Model, optional
138+
A PyMC model. If None, the model is taken from the current model context.
139+
140+
Returns
141+
-------
142+
idata: az.InferenceData
143+
The provided InferenceData, with the mean vector and covariance matrix added to the "fit" group.
144+
"""
145+
model = pm.modelcontext(model) if model is None else model
146+
147+
variable_names, *_ = zip(*mu.point_map_info)
148+
149+
unpacked_variable_names = make_unpacked_variable_names(variable_names, model)
150+
151+
mean_dataarray = xr.DataArray(mu.data, dims=["rows"], coords={"rows": unpacked_variable_names})
152+
153+
data = {"mean_vector": mean_dataarray}
154+
155+
if H_inv is not None:
156+
cov_dataarray = xr.DataArray(
157+
H_inv,
158+
dims=["rows", "columns"],
159+
coords={"rows": unpacked_variable_names, "columns": unpacked_variable_names},
160+
)
161+
data["covariance_matrix"] = cov_dataarray
162+
163+
dataset = xr.Dataset(data)
164+
idata.add_groups(fit=dataset)
165+
166+
return idata
167+
168+
169+
def add_data_to_inference_data(
170+
idata: az.InferenceData,
171+
progressbar: bool = True,
172+
model: pm.Model | None = None,
173+
compile_kwargs: dict | None = None,
174+
) -> az.InferenceData:
175+
"""
176+
Add observed and constant data to an InferenceData object.
177+
178+
Parameters
179+
----------
180+
idata: az.InferenceData
181+
An InferenceData object containing the approximated posterior samples.
182+
progressbar: bool
183+
Whether to display a progress bar during computations. Default is True.
184+
model: Model, optional
185+
A PyMC model. If None, the model is taken from the current model context.
186+
compile_kwargs: dict, optional
187+
Additional keyword arguments to pass to pytensor.function.
188+
189+
Returns
190+
-------
191+
idata: az.InferenceData
192+
The provided InferenceData, with observed and constant data added.
193+
"""
194+
model = pm.modelcontext(model) if model is None else model
195+
196+
if model.deterministics:
197+
expand_dims = {}
198+
if "chain" not in idata.posterior.coords:
199+
expand_dims["chain"] = [0]
200+
if "draw" not in idata.posterior.coords:
201+
expand_dims["draw"] = [0]
202+
203+
idata.posterior = pm.compute_deterministics(
204+
idata.posterior.expand_dims(expand_dims),
205+
model=model,
206+
merge_dataset=True,
207+
progressbar=progressbar,
208+
compile_kwargs=compile_kwargs,
209+
)
210+
211+
coords, dims = coords_and_dims_for_inferencedata(model)
212+
213+
observed_data = dict_to_dataset(
214+
find_observations(model),
215+
library=pm,
216+
coords=coords,
217+
dims=dims,
218+
default_dims=[],
219+
)
220+
221+
constant_data = dict_to_dataset(
222+
find_constants(model),
223+
library=pm,
224+
coords=coords,
225+
dims=dims,
226+
default_dims=[],
227+
)
228+
229+
idata.add_groups(
230+
{"observed_data": observed_data, "constant_data": constant_data},
231+
coords=coords,
232+
dims=dims,
233+
)
234+
235+
return idata
236+
237+
238+
def optimizer_result_to_dataset(
239+
result: OptimizeResult,
240+
method: minimize_method | Literal["basinhopping"],
241+
mu: RaveledVars | None = None,
242+
model: pm.Model | None = None,
243+
) -> xr.Dataset:
244+
"""
245+
Convert an OptimizeResult object to an xarray Dataset object.
246+
247+
Parameters
248+
----------
249+
result: OptimizeResult
250+
The result of the optimization process.
251+
method: minimize_method or "basinhopping"
252+
The optimization method used.
253+
254+
Returns
255+
-------
256+
dataset: xr.Dataset
257+
An xarray Dataset containing the optimization results.
258+
"""
259+
if not isinstance(result, OptimizeResult):
260+
raise TypeError("result must be an instance of OptimizeResult")
261+
262+
model = pm.modelcontext(model) if model is None else model
263+
variable_names, *_ = zip(*mu.point_map_info)
264+
unpacked_variable_names = make_unpacked_variable_names(variable_names, model)
265+
266+
data_vars = {}
267+
268+
if hasattr(result, "lowest_optimization_result"):
269+
# If we did basinhopping, there's a results inside the results. We want to pop this out and collapse them,
270+
# overwriting outer keys with the inner keys
271+
inner_res = result.pop("lowest_optimization_result")
272+
for key in inner_res.keys():
273+
result[key] = inner_res[key]
274+
275+
if hasattr(result, "x"):
276+
data_vars["x"] = xr.DataArray(
277+
result.x, dims=["variables"], coords={"variables": unpacked_variable_names}
278+
)
279+
if hasattr(result, "fun"):
280+
data_vars["fun"] = xr.DataArray(result.fun, dims=[])
281+
if hasattr(result, "success"):
282+
data_vars["success"] = xr.DataArray(result.success, dims=[])
283+
if hasattr(result, "message"):
284+
data_vars["message"] = xr.DataArray(str(result.message), dims=[])
285+
if hasattr(result, "jac") and result.jac is not None:
286+
jac = np.asarray(result.jac)
287+
if jac.ndim == 1:
288+
data_vars["jac"] = xr.DataArray(
289+
jac, dims=["variables"], coords={"variables": unpacked_variable_names}
290+
)
291+
else:
292+
data_vars["jac"] = xr.DataArray(
293+
jac,
294+
dims=["variables", "variables_aux"],
295+
coords={
296+
"variables": unpacked_variable_names,
297+
"variables_aux": unpacked_variable_names,
298+
},
299+
)
300+
301+
if hasattr(result, "hess_inv") and result.hess_inv is not None:
302+
hess_inv = result.hess_inv
303+
if isinstance(hess_inv, LinearOperator):
304+
n = hess_inv.shape[0]
305+
eye = np.eye(n)
306+
hess_inv_mat = np.column_stack([hess_inv.matvec(eye[:, i]) for i in range(n)])
307+
hess_inv = hess_inv_mat
308+
else:
309+
hess_inv = np.asarray(hess_inv)
310+
data_vars["hess_inv"] = xr.DataArray(
311+
hess_inv,
312+
dims=["variables", "variables_aux"],
313+
coords={"variables": unpacked_variable_names, "variables_aux": unpacked_variable_names},
314+
)
315+
316+
if hasattr(result, "nit"):
317+
data_vars["nit"] = xr.DataArray(result.nit, dims=[])
318+
if hasattr(result, "nfev"):
319+
data_vars["nfev"] = xr.DataArray(result.nfev, dims=[])
320+
if hasattr(result, "njev"):
321+
data_vars["njev"] = xr.DataArray(result.njev, dims=[])
322+
if hasattr(result, "status"):
323+
data_vars["status"] = xr.DataArray(result.status, dims=[])
324+
325+
# Add any other fields present in result
326+
for key, value in result.items():
327+
if key in data_vars:
328+
continue # already added
329+
if value is None:
330+
continue
331+
arr = np.asarray(value)
332+
333+
# TODO: We can probably do something smarter here with a dictionary of all possible values and their expected
334+
# dimensions.
335+
dims = [f"{key}_dim_{i}" for i in range(arr.ndim)]
336+
data_vars[key] = xr.DataArray(
337+
arr,
338+
dims=dims,
339+
coords={f"{key}_dim_{i}": np.arange(arr.shape[i]) for i in range(len(dims))},
340+
)
341+
342+
data_vars["method"] = xr.DataArray(np.array(method), dims=[])
343+
344+
return xr.Dataset(data_vars)
345+
346+
347+
def add_optimizer_result_to_inference_data(
348+
idata: az.InferenceData,
349+
result: OptimizeResult,
350+
method: minimize_method | Literal["basinhopping"],
351+
mu: RaveledVars | None = None,
352+
model: pm.Model | None = None,
353+
) -> az.InferenceData:
354+
"""
355+
Add the optimization result to an InferenceData object.
356+
357+
Parameters
358+
----------
359+
idata: az.InferenceData
360+
An InferenceData object containing the approximated posterior samples.
361+
result: OptimizeResult
362+
The result of the optimization process.
363+
method: minimize_method or "basinhopping"
364+
The optimization method used.
365+
mu: RaveledVars, optional
366+
The MAP estimate of the model parameters.
367+
model: Model, optional
368+
A PyMC model. If None, the model is taken from the current model context.
369+
370+
Returns
371+
-------
372+
idata: az.InferenceData
373+
The provided InferenceData, with the optimization results added to the "optimizer" group.
374+
"""
375+
dataset = optimizer_result_to_dataset(result, method=method, mu=mu, model=model)
376+
idata.add_groups({"optimizer_result": dataset})
377+
378+
return idata

pymc_extras/inference/pathfinder/pathfinder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
# TODO: change to typing.Self after Python versions greater than 3.10
6464
from typing_extensions import Self
6565

66-
from pymc_extras.inference.laplace_approx.idata import add_data_to_inferencedata
66+
from pymc_extras.inference.laplace_approx.idata import add_data_to_inference_data
6767
from pymc_extras.inference.pathfinder.importance_sampling import (
6868
importance_sampling as _importance_sampling,
6969
)
@@ -1759,6 +1759,6 @@ def fit_pathfinder(
17591759
importance_sampling=importance_sampling,
17601760
)
17611761

1762-
idata = add_data_to_inferencedata(idata, progressbar, model, compile_kwargs)
1762+
idata = add_data_to_inference_data(idata, progressbar, model, compile_kwargs)
17631763

17641764
return idata

0 commit comments

Comments
 (0)