Skip to content

Commit c9054d2

Browse files
Split idata utilities into idata.py
1 parent 23140a5 commit c9054d2

File tree

3 files changed

+660
-2
lines changed

3 files changed

+660
-2
lines changed
Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
from functools import reduce
2+
from itertools import product
3+
from typing import Any, Literal
4+
5+
import arviz as az
6+
import numpy as np
7+
import pymc as pm
8+
import xarray as xr
9+
10+
from arviz import dict_to_dataset
11+
from better_optimize.constants import minimize_method
12+
from pymc.backends.arviz import coords_and_dims_for_inferencedata, find_constants, find_observations
13+
from pymc.blocking import RaveledVars
14+
from scipy.optimize import OptimizeResult
15+
from scipy.sparse.linalg import LinearOperator
16+
17+
18+
def make_unpacked_variable_names(name, model: pm.Model) -> list[str]:
19+
coords = model.coords
20+
21+
value_to_dim = {
22+
x.name: model.named_vars_to_dims.get(model.values_to_rvs[x].name, None)
23+
for x in model.value_vars
24+
}
25+
value_to_dim = {k: v for k, v in value_to_dim.items() if v is not None}
26+
27+
rv_to_dim = model.named_vars_to_dims
28+
dims_dict = rv_to_dim | value_to_dim
29+
30+
dims = dims_dict.get(name)
31+
if dims is None:
32+
return [name]
33+
labels = product(*(coords[dim] for dim in dims))
34+
return [f"{name}[{','.join(map(str, label))}]" for label in labels]
35+
36+
37+
def map_results_to_inference_data(results: dict[str, Any], model: pm.Model | None = None):
38+
"""
39+
Convert a dictionary of results to an InferenceData object.
40+
41+
Parameters
42+
----------
43+
results: dict
44+
A dictionary containing the results to convert.
45+
model: Model, optional
46+
A PyMC model. If None, the model is taken from the current model context.
47+
48+
Returns
49+
-------
50+
idata: az.InferenceData
51+
An InferenceData object containing the results.
52+
"""
53+
model = pm.modelcontext(model)
54+
coords, dims = coords_and_dims_for_inferencedata(model)
55+
56+
idata = az.convert_to_inference_data(results, coords=coords, dims=dims)
57+
return idata
58+
59+
60+
def add_map_posterior_to_inference_data(
61+
idata: az.InferenceData,
62+
map_point: dict[str, float | int | np.ndarray],
63+
model: pm.Model | None = None,
64+
):
65+
"""
66+
Add the MAP point to an InferenceData object in the posterior group.
67+
68+
Unlike a typical posterior, the MAP point is a single point estimate rather than a distribution. As a result, it
69+
does not have a chain or draw dimension, and is stored as a single point in the posterior group.
70+
71+
Parameters
72+
----------
73+
idata: az.InferenceData
74+
An InferenceData object to which the MAP point will be added.
75+
map_point: dict
76+
A dictionary containing the MAP point estimates for each variable. The keys should be the variable names, and
77+
the values should be the corresponding MAP estimates.
78+
model: Model, optional
79+
A PyMC model. If None, the model is taken from the current model context.
80+
81+
Returns
82+
-------
83+
idata: az.InferenceData
84+
The provided InferenceData, with the MAP point added to the posterior group.
85+
"""
86+
87+
model = pm.modelcontext(model) if model is None else model
88+
coords, dims = coords_and_dims_for_inferencedata(model)
89+
90+
# The MAP point will have both the transformed and untransformed variables, so we need to ensure that
91+
# we have the correct dimensions for each variable.
92+
var_name_to_value_name = {rv.name: value.name for rv, value in model.rvs_to_values.items()}
93+
dims.update(
94+
{
95+
value_name: dims[var_name]
96+
for var_name, value_name in var_name_to_value_name.items()
97+
if var_name in dims
98+
}
99+
)
100+
101+
posterior_data = {
102+
name: xr.DataArray(
103+
data=np.asarray(value),
104+
coords={dim: coords[dim] for dim in dims.get(name, [])},
105+
dims=dims.get(name),
106+
name=name,
107+
)
108+
for name, value in map_point.items()
109+
}
110+
idata.add_groups(posterior=xr.Dataset(posterior_data))
111+
112+
return idata
113+
114+
115+
def add_fit_to_inference_data(
116+
idata: az.InferenceData, mu: RaveledVars, H_inv: np.ndarray, model: pm.Model | None = None
117+
) -> az.InferenceData:
118+
"""
119+
Add the mean vector and covariance matrix of the Laplace approximation to an InferenceData object.
120+
121+
Parameters
122+
----------
123+
idata: az.InfereceData
124+
An InferenceData object containing the approximated posterior samples.
125+
mu: RaveledVars
126+
The MAP estimate of the model parameters.
127+
H_inv: np.ndarray
128+
The inverse Hessian matrix of the log-posterior evaluated at the MAP estimate.
129+
model: Model, optional
130+
A PyMC model. If None, the model is taken from the current model context.
131+
132+
Returns
133+
-------
134+
idata: az.InferenceData
135+
The provided InferenceData, with the mean vector and covariance matrix added to the "fit" group.
136+
"""
137+
model = pm.modelcontext(model) if model is None else model
138+
139+
variable_names, *_ = zip(*mu.point_map_info)
140+
141+
unpacked_variable_names = reduce(
142+
lambda lst, name: lst + make_unpacked_variable_names(name, model), variable_names, []
143+
)
144+
145+
mean_dataarray = xr.DataArray(mu.data, dims=["rows"], coords={"rows": unpacked_variable_names})
146+
147+
data = {"mean_vector": mean_dataarray}
148+
149+
if H_inv is not None:
150+
cov_dataarray = xr.DataArray(
151+
H_inv,
152+
dims=["rows", "columns"],
153+
coords={"rows": unpacked_variable_names, "columns": unpacked_variable_names},
154+
)
155+
data["covariance_matrix"] = cov_dataarray
156+
157+
dataset = xr.Dataset(data)
158+
idata.add_groups(fit=dataset)
159+
160+
return idata
161+
162+
163+
def add_data_to_inference_data(
164+
idata: az.InferenceData,
165+
progressbar: bool = True,
166+
model: pm.Model | None = None,
167+
compile_kwargs: dict | None = None,
168+
) -> az.InferenceData:
169+
"""
170+
Add observed and constant data to an InferenceData object.
171+
172+
Parameters
173+
----------
174+
idata: az.InferenceData
175+
An InferenceData object containing the approximated posterior samples.
176+
progressbar: bool
177+
Whether to display a progress bar during computations. Default is True.
178+
model: Model, optional
179+
A PyMC model. If None, the model is taken from the current model context.
180+
compile_kwargs: dict, optional
181+
Additional keyword arguments to pass to pytensor.function.
182+
183+
Returns
184+
-------
185+
idata: az.InferenceData
186+
The provided InferenceData, with observed and constant data added.
187+
"""
188+
model = pm.modelcontext(model) if model is None else model
189+
190+
if model.deterministics:
191+
expand_dims = {}
192+
if "chain" not in idata.posterior.coords:
193+
expand_dims["chain"] = [0]
194+
if "draw" not in idata.posterior.coords:
195+
expand_dims["draw"] = [0]
196+
197+
idata.posterior = pm.compute_deterministics(
198+
idata.posterior.expand_dims(expand_dims),
199+
model=model,
200+
merge_dataset=True,
201+
progressbar=progressbar,
202+
compile_kwargs=compile_kwargs,
203+
)
204+
205+
coords, dims = coords_and_dims_for_inferencedata(model)
206+
207+
observed_data = dict_to_dataset(
208+
find_observations(model),
209+
library=pm,
210+
coords=coords,
211+
dims=dims,
212+
default_dims=[],
213+
)
214+
215+
constant_data = dict_to_dataset(
216+
find_constants(model),
217+
library=pm,
218+
coords=coords,
219+
dims=dims,
220+
default_dims=[],
221+
)
222+
223+
idata.add_groups(
224+
{"observed_data": observed_data, "constant_data": constant_data},
225+
coords=coords,
226+
dims=dims,
227+
)
228+
229+
return idata
230+
231+
232+
def optimizer_result_to_dataset(
233+
result: OptimizeResult,
234+
method: minimize_method | Literal["basinhopping"],
235+
mu: RaveledVars | None = None,
236+
model: pm.Model | None = None,
237+
) -> xr.Dataset:
238+
"""
239+
Convert an OptimizeResult object to an xarray Dataset object.
240+
241+
Parameters
242+
----------
243+
result: OptimizeResult
244+
The result of the optimization process.
245+
method: minimize_method or "basinhopping"
246+
The optimization method used.
247+
248+
Returns
249+
-------
250+
dataset: xr.Dataset
251+
An xarray Dataset containing the optimization results.
252+
"""
253+
if not isinstance(result, OptimizeResult):
254+
raise TypeError("result must be an instance of OptimizeResult")
255+
256+
model = pm.modelcontext(model) if model is None else model
257+
variable_names, *_ = zip(*mu.point_map_info)
258+
unpacked_variable_names = reduce(
259+
lambda lst, name: lst + make_unpacked_variable_names(name, model), variable_names, []
260+
)
261+
262+
data_vars = {}
263+
264+
if hasattr(result, "lowest_optimization_result"):
265+
# If we did basinhopping, there's a results inside the results. We want to pop this out and collapse them,
266+
# overwriting outer keys with the inner keys
267+
inner_res = result.pop("lowest_optimization_result")
268+
for key in inner_res.keys():
269+
result[key] = inner_res[key]
270+
271+
if hasattr(result, "x"):
272+
data_vars["x"] = xr.DataArray(
273+
result.x, dims=["variables"], coords={"variables": unpacked_variable_names}
274+
)
275+
if hasattr(result, "fun"):
276+
data_vars["fun"] = xr.DataArray(result.fun, dims=[])
277+
if hasattr(result, "success"):
278+
data_vars["success"] = xr.DataArray(result.success, dims=[])
279+
if hasattr(result, "message"):
280+
data_vars["message"] = xr.DataArray(str(result.message), dims=[])
281+
if hasattr(result, "jac") and result.jac is not None:
282+
jac = np.asarray(result.jac)
283+
if jac.ndim == 1:
284+
data_vars["jac"] = xr.DataArray(
285+
jac, dims=["variables"], coords={"variables": unpacked_variable_names}
286+
)
287+
else:
288+
data_vars["jac"] = xr.DataArray(
289+
jac,
290+
dims=["variables", "variables_aux"],
291+
coords={
292+
"variables": unpacked_variable_names,
293+
"variables_aux": unpacked_variable_names,
294+
},
295+
)
296+
297+
if hasattr(result, "hess_inv") and result.hess_inv is not None:
298+
hess_inv = result.hess_inv
299+
if isinstance(hess_inv, LinearOperator):
300+
n = hess_inv.shape[0]
301+
eye = np.eye(n)
302+
hess_inv_mat = np.column_stack([hess_inv.matvec(eye[:, i]) for i in range(n)])
303+
hess_inv = hess_inv_mat
304+
else:
305+
hess_inv = np.asarray(hess_inv)
306+
data_vars["hess_inv"] = xr.DataArray(
307+
hess_inv,
308+
dims=["variables", "variables_aux"],
309+
coords={"variables": unpacked_variable_names, "variables_aux": unpacked_variable_names},
310+
)
311+
312+
if hasattr(result, "nit"):
313+
data_vars["nit"] = xr.DataArray(result.nit, dims=[])
314+
if hasattr(result, "nfev"):
315+
data_vars["nfev"] = xr.DataArray(result.nfev, dims=[])
316+
if hasattr(result, "njev"):
317+
data_vars["njev"] = xr.DataArray(result.njev, dims=[])
318+
if hasattr(result, "status"):
319+
data_vars["status"] = xr.DataArray(result.status, dims=[])
320+
321+
# Add any other fields present in result
322+
for key, value in result.items():
323+
if key in data_vars:
324+
continue # already added
325+
if value is None:
326+
continue
327+
arr = np.asarray(value)
328+
329+
# TODO: We can probably do something smarter here with a dictionary of all possible values and their expected
330+
# dimensions.
331+
dims = [f"{key}_dim_{i}" for i in range(arr.ndim)]
332+
data_vars[key] = xr.DataArray(
333+
arr,
334+
dims=dims,
335+
coords={f"{key}_dim_{i}": np.arange(arr.shape[i]) for i in range(len(dims))},
336+
)
337+
338+
data_vars["method"] = xr.DataArray(np.array(method), dims=[])
339+
340+
return xr.Dataset(data_vars)
341+
342+
343+
def add_optimizer_result_to_inference_data(
344+
idata: az.InferenceData,
345+
result: OptimizeResult,
346+
method: minimize_method | Literal["basinhopping"],
347+
mu: RaveledVars | None = None,
348+
model: pm.Model | None = None,
349+
) -> az.InferenceData:
350+
"""
351+
Add the optimization result to an InferenceData object.
352+
353+
Parameters
354+
----------
355+
idata: az.InferenceData
356+
An InferenceData object containing the approximated posterior samples.
357+
result: OptimizeResult
358+
The result of the optimization process.
359+
method: minimize_method or "basinhopping"
360+
The optimization method used.
361+
mu: RaveledVars, optional
362+
The MAP estimate of the model parameters.
363+
model: Model, optional
364+
A PyMC model. If None, the model is taken from the current model context.
365+
366+
Returns
367+
-------
368+
idata: az.InferenceData
369+
The provided InferenceData, with the optimization results added to the "optimizer" group.
370+
"""
371+
dataset = optimizer_result_to_dataset(result, method=method, mu=mu, model=model)
372+
idata.add_groups({"optimizer_result": dataset})
373+
374+
return idata

pymc_extras/inference/pathfinder/pathfinder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
# TODO: change to typing.Self after Python versions greater than 3.10
6464
from typing_extensions import Self
6565

66-
from pymc_extras.inference.laplace_approx.idata import add_data_to_inferencedata
66+
from pymc_extras.inference.laplace_approx.idata import add_data_to_inference_data
6767
from pymc_extras.inference.pathfinder.importance_sampling import (
6868
importance_sampling as _importance_sampling,
6969
)
@@ -1759,6 +1759,6 @@ def fit_pathfinder(
17591759
importance_sampling=importance_sampling,
17601760
)
17611761

1762-
idata = add_data_to_inferencedata(idata, progressbar, model, compile_kwargs)
1762+
idata = add_data_to_inference_data(idata, progressbar, model, compile_kwargs)
17631763

17641764
return idata

0 commit comments

Comments
 (0)