fix: rebuild flow logic in score methods (#1404)

janfb · web-flow · commit dd4aef779b49 · 2025-02-25T15:04:08.000+01:00
* change rebuild_flow default to False

* rebuild flow for each new x, but not in __call__. fix map test

* remove old kwargs

* add tolerance options in log_prob; fix map test.

* fix tests
diff --git a/sbi/diagnostics/sbc.py b/sbi/diagnostics/sbc.py
@@ -10,7 +10,7 @@
 from torch.distributions import Uniform
 from tqdm.auto import tqdm
 
-from sbi.inference import DirectPosterior
+from sbi.inference import DirectPosterior, ScorePosterior
 from sbi.inference.posteriors.base_posterior import NeuralPosterior
 from sbi.inference.posteriors.vi_posterior import VIPosterior
 from sbi.utils.diagnostics_utils import (
@@ -186,7 +186,7 @@ def get_nltp(thetas: Tensor, xs: Tensor, posterior: NeuralPosterior) -> Tensor:
         nltp: negative log probs of true parameters under approximate posteriors.
     """
     nltp = torch.zeros(thetas.shape[0])
-    unnormalized_log_prob = not isinstance(posterior, DirectPosterior)
+    unnormalized_log_prob = not isinstance(posterior, (DirectPosterior, ScorePosterior))
 
     for idx, (tho, xo) in enumerate(zip(thetas, xs)):
         # Log prob of true params under posterior.
diff --git a/sbi/inference/posteriors/score_posterior.py b/sbi/inference/posteriors/score_posterior.py
@@ -272,8 +272,8 @@ def log_prob(
         x: Optional[Tensor] = None,
         track_gradients: bool = False,
         atol: float = 1e-5,
-        rtol: float = 1e-6,
-        exact: bool = True,
+        rtol: float = 1e-5,
+        exact: bool = False,
     ) -> Tensor:
         r"""Returns the log-probability of the posterior $p(\theta|x)$.
 
@@ -294,15 +294,14 @@ def log_prob(
             `(len(θ),)`-shaped log posterior probability $\log p(\theta|x)$ for θ in the
             support of the prior, -∞ (corresponding to 0 probability) outside.
         """
-        self.potential_fn.set_x(self._x_else_default_x(x))
+        self.potential_fn.set_x(
+            self._x_else_default_x(x), atol=atol, rtol=rtol, exact=exact
+        )
 
         theta = ensure_theta_batched(torch.as_tensor(theta))
         return self.potential_fn(
             theta.to(self._device),
             track_gradients=track_gradients,
-            atol=atol,
-            rtol=rtol,
-            exact=exact,
         )
 
     def sample_batched(
@@ -318,6 +317,31 @@ def sample_batched(
         max_sampling_batch_size: int = 10000,
         show_progress_bars: bool = True,
     ) -> Tensor:
+        r"""Given a batch of observations [x_1, ..., x_B] this function samples from
+        posteriors $p(\theta|x_1)$, ... ,$p(\theta|x_B)$, in a batched (i.e. vectorized)
+        manner.
+
+        Args:
+            sample_shape: Desired shape of samples that are drawn from the posterior
+                given every observation.
+            x: A batch of observations, of shape `(batch_dim, event_shape_x)`.
+                `batch_dim` corresponds to the number of observations to be
+                drawn.
+            predictor: The predictor for the diffusion-based sampler. Can be a string or
+                a custom predictor following the API in `sbi.samplers.score.predictors`.
+                Currently, only `euler_maruyama` is implemented.
+            corrector: The corrector for the diffusion-based sampler.
+            predictor_params: Additional parameters passed to predictor.
+            corrector_params: Additional parameters passed to corrector.
+            steps: Number of steps to take for the Euler-Maruyama method.
+            ts: Time points at which to evaluate the diffusion process. If None, a
+                linear grid between t_max and t_min is used.
+            max_sampling_batch_size: Maximum batch size for sampling.
+            show_progress_bars: Whether to show sampling progress monitor.
+
+        Returns:
+            Samples from the posteriors of shape (*sample_shape, B, *input_shape)
+        """
         num_samples = torch.Size(sample_shape).numel()
         x = reshape_to_batch_event(x, self.score_estimator.condition_shape)
         condition_dim = len(self.score_estimator.condition_shape)
@@ -339,7 +363,6 @@ def sample_batched(
                 num_xos=batch_size,
                 show_progress_bars=show_progress_bars,
                 max_sampling_batch_size=max_sampling_batch_size,
-                proposal_sampling_kwargs={"x": x},
             )[0]
             samples = samples.reshape(
                 sample_shape + batch_shape + self.score_estimator.input_shape
@@ -436,7 +459,8 @@ def map(
             )
 
         if self._map is None or force_update:
-            self.potential_fn.set_x(self.default_x)
+            # rebuild coarse flow fast for MAP optimization.
+            self.potential_fn.set_x(self.default_x, atol=1e-2, rtol=1e-3, exact=True)
             callable_potential_fn = CallableDifferentiablePotentialFunction(
                 self.potential_fn
             )
diff --git a/sbi/inference/potentials/score_based_potential.py b/sbi/inference/potentials/score_based_potential.py
@@ -1,7 +1,6 @@
 # This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
 # under the Apache License Version 2.0, see <https://www.apache.org/licenses/>
 
-from functools import partial
 from typing import Optional, Tuple
 
 import torch
@@ -79,41 +78,36 @@ def set_x(
         self,
         x_o: Optional[Tensor],
         x_is_iid: Optional[bool] = False,
-        rebuild_flow: Optional[bool] = True,
+        atol: float = 1e-5,
+        rtol: float = 1e-6,
+        exact: bool = True,
     ):
         """
         Set the observed data and whether it is IID.
+
+        Rebuids the continuous normalizing flow if the observed data is set.
+
         Args:
-        x_o: The observed data.
-        x_is_iid: Whether the observed data is IID (if batch_dim>1).
-        rebuild_flow: Whether to save (overwrrite) a low-tolerance flow model, useful if
-        the flow needs to be evaluated many times (e.g. for MAP calculation).
+            x_o: The observed data.
+            x_is_iid: Whether the observed data is IID (if batch_dim>1).
+            atol: Absolute tolerance for the ODE solver.
+            rtol: Relative tolerance for the ODE solver.
+            exact: Whether to use the exact ODE solver.
         """
         super().set_x(x_o, x_is_iid)
-        if rebuild_flow and self._x_o is not None:
-            # By default, we want a high-tolerance flow.
-            # This flow will be used mainly for MAP calculations, hence we want to save
-            # it instead of rebuilding it every time.
-            self.flow = self.rebuild_flow(atol=1e-2, rtol=1e-3, exact=True)
+        if self._x_o is not None:
+            self.flow = self.rebuild_flow(atol=atol, rtol=rtol, exact=exact)
 
     def __call__(
         self,
         theta: Tensor,
         track_gradients: bool = True,
-        rebuild_flow: bool = True,
-        atol: float = 1e-5,
-        rtol: float = 1e-6,
-        exact: bool = True,
     ) -> Tensor:
         """Return the potential (posterior log prob) via probability flow ODE.
 
         Args:
             theta: The parameters at which to evaluate the potential.
             track_gradients: Whether to track gradients.
-            rebuild_flow: Whether to rebuild the CNF for accurate log_prob evaluation.
-            atol: Absolute tolerance for the ODE solver.
-            rtol: Relative tolerance for the ODE solver.
-            exact: Whether to use the exact ODE solver.
 
         Returns:
             The potential function, i.e., the log probability of the posterior.
@@ -123,15 +117,9 @@ def __call__(
             theta, theta.shape[1:], leading_is_sample=True
         )
         self.score_estimator.eval()
-        # use rebuild_flow to evaluate log_prob with better precision, without
-        # overwriting self.flow
-        if rebuild_flow or self.flow is None:
-            flow = self.rebuild_flow(atol=atol, rtol=rtol, exact=exact)
-        else:
-            flow = self.flow
 
         with torch.set_grad_enabled(track_gradients):
-            log_probs = flow.log_prob(theta_density_estimator).squeeze(-1)
+            log_probs = self.flow.log_prob(theta_density_estimator).squeeze(-1)
             # Force probability to be zero outside prior support.
             in_prior_support = within_support(self.prior, theta)
 
@@ -217,7 +205,7 @@ def rebuild_flow(
         x_density_estimator = reshape_to_batch_event(
             self.x_o, event_shape=self.score_estimator.condition_shape
         )
-        assert x_density_estimator.shape[0] == 1, (
+        assert x_density_estimator.shape[0] == 1 or not self.x_is_iid, (
             "PosteriorScoreBasedPotential supports only x batchsize of 1`."
         )
 
@@ -312,9 +300,8 @@ def __init__(self, posterior_score_based_potential):
         self.posterior_score_based_potential = posterior_score_based_potential
 
     def __call__(self, input):
-        prepared_potential = partial(
-            self.posterior_score_based_potential.__call__, rebuild_flow=False
-        )
         return DifferentiablePotentialFunction.apply(
-            input, prepared_potential, self.posterior_score_based_potential.gradient
+            input,
+            self.posterior_score_based_potential.__call__,
+            self.posterior_score_based_potential.gradient,
         )
diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py
@@ -961,7 +961,8 @@ def gradient_ascent(
                         f"Optimizing MAP estimate. Iterations: {iter_ + 1} / "
                         f"{num_iter}. Performance in iteration "
                         f"{divmod(iter_ + 1, save_best_every)[0] * save_best_every}: "
-                        f"{best_log_prob_iter.item():.2f} (= unnormalized log-prob)",
+                        f"{best_log_prob_iter.item():.2f} (= unnormalized log-prob). "
+                        "Press Ctrl-C to interrupt.",
                         end="",
                     )
                 argmax_ = theta_transform.inv(best_theta_overall)
diff --git a/tests/linearGaussian_npse_test.py b/tests/linearGaussian_npse_test.py
@@ -1,7 +1,6 @@
 from typing import List
 
 import pytest
-import torch
 from torch import eye, ones, zeros
 from torch.distributions import MultivariateNormal
 
@@ -223,11 +222,9 @@ def test_npse_map():
     theta = prior.sample((num_simulations,))
     x = linear_gaussian(theta, likelihood_shift, likelihood_cov)
 
-    inference.append_simulations(theta, x).train(
-        training_batch_size=100, max_num_epochs=10
-    )
+    inference.append_simulations(theta, x).train()
     posterior = inference.build_posterior().set_default_x(x_o)
 
-    map_ = posterior.map(show_progress_bars=True)
+    map_ = posterior.map(show_progress_bars=True, num_iter=5)
 
-    assert torch.allclose(map_, gt_posterior.mean, atol=0.4), "MAP is not close to GT."
+    assert ((map_ - gt_posterior.mean) ** 2).sum() < 0.5, "MAP is not close to GT."
diff --git a/tests/save_and_load_test.py b/tests/save_and_load_test.py
@@ -21,7 +21,9 @@
         (NRE, "rejection"),
     ),
 )
-def test_picklability(inference_method, sampling_method: str, tmp_path):
+def test_picklability(
+    inference_method, sampling_method: str, tmp_path, mcmc_params_fast
+):
     num_dim = 2
     prior = utils.BoxUniform(low=-2 * torch.ones(num_dim), high=2 * torch.ones(num_dim))
     x_o = torch.zeros(1, num_dim)
@@ -31,15 +33,15 @@ def test_picklability(inference_method, sampling_method: str, tmp_path):
 
     inference = inference_method(prior=prior)
     _ = inference.append_simulations(theta, x).train(max_num_epochs=1)
-    posterior = inference.build_posterior(sample_with=sampling_method).set_default_x(
-        x_o
-    )
+    posterior = inference.build_posterior(
+        sample_with=sampling_method, mcmc_parameters=mcmc_params_fast
+    ).set_default_x(x_o)
 
     # After sample and log_prob, the posterior should still be picklable
     if isinstance(posterior, VIPosterior):
         posterior.train(max_num_iters=10)
     _ = posterior.sample((1,))
-    _ = posterior.log_prob(torch.zeros(1, num_dim))
+    _ = posterior.potential(torch.zeros(1, num_dim))
 
     with open(f"{tmp_path}/saved_posterior.pickle", "wb") as handle:
         pickle.dump(posterior, handle)
diff --git a/tests/sbc_test.py b/tests/sbc_test.py
@@ -29,7 +29,7 @@
         (NPSE, None),
     ),
 )
-def test_running_sbc(method, prior, reduce_fn_str, sampler, mcmc_params_accurate: dict):
+def test_running_sbc(method, prior, reduce_fn_str, sampler, mcmc_params_fast: dict):
     """Tests running inference and then SBC and obtaining nltp."""
 
     num_dim = 2
@@ -59,7 +59,7 @@ def test_running_sbc(method, prior, reduce_fn_str, sampler, mcmc_params_accurate
         posterior_kwargs = {
             "sample_with": "mcmc" if sampler == "mcmc" else "vi",
             "mcmc_method": "slice_np_vectorized",
-            "mcmc_parameters": mcmc_params_accurate,
+            "mcmc_parameters": mcmc_params_fast,
         }
     else:
         posterior_kwargs = {}
@@ -69,7 +69,7 @@ def test_running_sbc(method, prior, reduce_fn_str, sampler, mcmc_params_accurate
     thetas = prior.sample((num_sbc_runs,))
     xs = linear_gaussian(thetas, likelihood_shift, likelihood_cov)
 
-    reduce_fn = "marginals" if reduce_fn_str == "marginals" else posterior.log_prob
+    reduce_fn = "marginals" if reduce_fn_str == "marginals" else posterior.potential
     run_sbc(
         thetas,
         xs,
@@ -79,7 +79,8 @@ def test_running_sbc(method, prior, reduce_fn_str, sampler, mcmc_params_accurate
     )
 
     # Check nltp
-    get_nltp(thetas, xs, posterior)
+    if method in [NPE, NPSE]:
+        get_nltp(thetas, xs, posterior)
 
 
 @pytest.mark.slow