tests: fix slow vector field tests, fix iid-scores (#1657)

janfb · web-flow · commit eae9cc9bec58 · 2025-09-04T11:02:14.000+02:00
* wip: fix vf tests

* adapt tests

* refactor score utils, small fixes.

* refactor vf slow tests.

* remove nan check during diffusion

* move nan check to last diffusion step.

* skip idd-score tests for npse as well

* move NaN check to posterior.sample level, update tests, fix rejection sampling warning
diff --git a/sbi/inference/posteriors/vector_field_posterior.py b/sbi/inference/posteriors/vector_field_posterior.py
@@ -1,6 +1,7 @@
 # This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
 # under the Apache License Version 2.0, see <https://www.apache.org/licenses/>
 
+import math
 import warnings
 from typing import Dict, Literal, Optional, Union
 
@@ -150,7 +151,9 @@ def sample(
         corrector_params: Optional[Dict] = None,
         steps: int = 500,
         ts: Optional[Tensor] = None,
-        iid_method: Literal["fnpe", "gauss", "auto_gauss", "jac_gauss"] = "auto_gauss",
+        iid_method: Optional[
+            Literal["fnpe", "gauss", "auto_gauss", "jac_gauss"]
+        ] = None,
         iid_params: Optional[Dict] = None,
         max_sampling_batch_size: int = 10_000,
         sample_with: Optional[str] = None,
@@ -201,19 +204,22 @@ def sample(
         x = reshape_to_batch_event(x, self.vector_field_estimator.condition_shape)
         is_iid = x.shape[0] > 1
         self.potential_fn.set_x(
-            x, x_is_iid=is_iid, iid_method=iid_method, iid_params=iid_params
+            x,
+            x_is_iid=is_iid,
+            iid_method=iid_method or self.potential_fn.iid_method,
+            iid_params=iid_params,
         )
 
         num_samples = torch.Size(sample_shape).numel()
 
         if sample_with == "ode":
-            samples = rejection.accept_reject_sample(
+            samples, _ = rejection.accept_reject_sample(
                 proposal=self.sample_via_ode,
                 accept_reject_fn=lambda theta: within_support(self.prior, theta),
                 num_samples=num_samples,
                 show_progress_bars=show_progress_bars,
                 max_sampling_batch_size=max_sampling_batch_size,
-            )[0]
+            )
         elif sample_with == "sde":
             proposal_sampling_kwargs = {
                 "predictor": predictor,
@@ -225,14 +231,14 @@ def sample(
                 "max_sampling_batch_size": max_sampling_batch_size,
                 "show_progress_bars": show_progress_bars,
             }
-            samples = rejection.accept_reject_sample(
+            samples, _ = rejection.accept_reject_sample(
                 proposal=self._sample_via_diffusion,
                 accept_reject_fn=lambda theta: within_support(self.prior, theta),
                 num_samples=num_samples,
                 show_progress_bars=show_progress_bars,
                 max_sampling_batch_size=max_sampling_batch_size,
                 proposal_sampling_kwargs=proposal_sampling_kwargs,
-            )[0]
+            )
         else:
             raise ValueError(
                 f"Expected sample_with to be 'ode' or 'sde', but got {sample_with}."
@@ -282,13 +288,16 @@ def _sample_via_diffusion(
                 "The vector field estimator does not support the 'sde' sampling method."
             )
 
-        num_samples = torch.Size(sample_shape).numel()
+        total_samples_needed = torch.Size(sample_shape).numel()
 
-        max_sampling_batch_size = (
+        # Determine effective batch size for sampling
+        effective_batch_size = (
             self.max_sampling_batch_size
             if max_sampling_batch_size is None
             else max_sampling_batch_size
         )
+        # Ensure we don't use larger batches than total samples needed
+        effective_batch_size = min(effective_batch_size, total_samples_needed)
 
         # TODO: the time schedule should be provided by the estimator, see issue #1437
         if ts is None:
@@ -297,28 +306,45 @@ def _sample_via_diffusion(
             ts = torch.linspace(t_max, t_min, steps)
         ts = ts.to(self.device)
 
+        # Initialize the diffusion sampler
         diffuser = Diffuser(
             self.potential_fn,
             predictor=predictor,
             corrector=corrector,
             predictor_params=predictor_params,
             corrector_params=corrector_params,
         )
-        max_sampling_batch_size = min(max_sampling_batch_size, num_samples)
-        samples = []
-        num_iter = num_samples // max_sampling_batch_size
-        num_iter = (
-            num_iter + 1 if (num_samples % max_sampling_batch_size) != 0 else num_iter
-        )
-        for _ in range(num_iter):
-            samples.append(
-                diffuser.run(
-                    num_samples=max_sampling_batch_size,
-                    ts=ts,
-                    show_progress_bars=show_progress_bars,
-                )
+
+        # Calculate how many batches we need
+        num_batches = math.ceil(total_samples_needed / effective_batch_size)
+
+        # Generate samples in batches
+        all_samples = []
+        samples_generated = 0
+
+        for _ in range(num_batches):
+            # Calculate how many samples to generate in this batch
+            remaining_samples = total_samples_needed - samples_generated
+            current_batch_size = min(effective_batch_size, remaining_samples)
+
+            # Generate samples for this batch
+            batch_samples = diffuser.run(
+                num_samples=current_batch_size,
+                ts=ts,
+                show_progress_bars=show_progress_bars,
+            )
+
+            all_samples.append(batch_samples)
+            samples_generated += current_batch_size
+
+        # Concatenate all batches and ensure we return exactly the requested number
+        samples = torch.cat(all_samples, dim=0)[:total_samples_needed]
+
+        if torch.isnan(samples).all():
+            raise RuntimeError(
+                "All samples NaN after diffusion sampling. "
+                "This may indicate numerical instability in the vector field."
             )
-        samples = torch.cat(samples, dim=0)[:num_samples]
 
         return samples
 
@@ -443,14 +469,14 @@ def sample_batched(
             max_sampling_batch_size = capped
 
         if self.sample_with == "ode":
-            samples = rejection.accept_reject_sample(
+            samples, _ = rejection.accept_reject_sample(
                 proposal=self.sample_via_ode,
                 accept_reject_fn=lambda theta: within_support(self.prior, theta),
                 num_samples=num_samples,
                 num_xos=batch_size,
                 show_progress_bars=show_progress_bars,
                 max_sampling_batch_size=max_sampling_batch_size,
-            )[0]
+            )
             samples = samples.reshape(
                 sample_shape + batch_shape + self.vector_field_estimator.input_shape
             )
@@ -465,15 +491,15 @@ def sample_batched(
                 "max_sampling_batch_size": max_sampling_batch_size,
                 "show_progress_bars": show_progress_bars,
             }
-            samples = rejection.accept_reject_sample(
+            samples, _ = rejection.accept_reject_sample(
                 proposal=self._sample_via_diffusion,
                 accept_reject_fn=lambda theta: within_support(self.prior, theta),
                 num_samples=num_samples,
                 num_xos=batch_size,
                 show_progress_bars=show_progress_bars,
                 max_sampling_batch_size=max_sampling_batch_size,
                 proposal_sampling_kwargs=proposal_sampling_kwargs,
-            )[0]
+            )
             samples = samples.reshape(
                 sample_shape + batch_shape + self.vector_field_estimator.input_shape
             )
diff --git a/sbi/inference/potentials/score_fn_iid.py b/sbi/inference/potentials/score_fn_iid.py
@@ -661,7 +661,7 @@ def estimate_posterior_precision(
                 precision_est_budget = min(int(prior.event_shape[0] * 1000), 5000)
 
         thetas = posterior.sample_batched(
-            torch.Size([precision_est_budget]),
+            sample_shape=torch.Size([precision_est_budget]),
             x=conditions,
             show_progress_bars=False,
             steps=precision_initial_sampler_steps,
@@ -740,7 +740,7 @@ def ensure_lam_positive_definite(
     denoising_posterior_precision: torch.Tensor,
     N: int,
     precision_nugget: float = 0.1,
-) -> (torch.Tensor, torch.Tensor):
+) -> tuple[torch.Tensor, torch.Tensor]:
     r"""
     Ensure that the matrix is positive definite.
 
diff --git a/sbi/inference/potentials/vector_field_potential.py b/sbi/inference/potentials/vector_field_potential.py
@@ -21,42 +21,6 @@
 from sbi.utils.torchutils import ensure_theta_batched
 
 
-def vector_field_estimator_based_potential(
-    vector_field_estimator: ConditionalVectorFieldEstimator,
-    prior: Optional[Distribution],
-    x_o: Optional[Tensor],
-    enable_transform: bool = True,
-    **kwargs,
-) -> Tuple["VectorFieldBasedPotential", TorchTransform]:
-    r"""Returns the potential function gradient for vector field estimators.
-
-    Args:
-        vector_field_estimator: The neural network modelling the vector field.
-        prior: The prior distribution.
-        x_o: The observed data at which to evaluate the vector field.
-        enable_transform: Whether to enable transforms. Not supported yet.
-        **kwargs: Additional keyword arguments passed to
-            `VectorFieldBasedPotential`.
-    Returns:
-        The potential function and a transformation that maps
-        to unconstrained space.
-    """
-    device = str(next(vector_field_estimator.parameters()).device)
-
-    potential_fn = VectorFieldBasedPotential(
-        vector_field_estimator, prior, x_o, device=device, **kwargs
-    )
-
-    if prior is not None:
-        theta_transform = mcmc_transform(
-            prior, device=device, enable_transform=enable_transform
-        )
-    else:
-        theta_transform = torch.distributions.transforms.identity_transform
-
-    return potential_fn, theta_transform
-
-
 class VectorFieldBasedPotential(BasePotential):
     def __init__(
         self,
@@ -130,7 +94,7 @@ def set_x(
         self,
         x_o: Optional[Tensor],
         x_is_iid: Optional[bool] = False,
-        iid_method: Literal["fnpe", "gauss", "auto_gauss", "jac_gauss"] = "auto_gauss",
+        iid_method: Optional[str] = None,
         iid_params: Optional[Dict[str, Any]] = None,
         **ode_kwargs,
     ):
@@ -149,7 +113,7 @@ def set_x(
             ode_kwargs: Additional keyword arguments for the neural ODE.
         """
         super().set_x(x_o, x_is_iid)
-        self.iid_method = iid_method
+        self.iid_method = iid_method or self.iid_method
         self.iid_params = iid_params
         # NOTE: Once IID potential evaluation is supported. This needs to be adapted.
         # See #1450.
@@ -286,6 +250,42 @@ def rebuild_flow(self, **kwargs) -> NormalizingFlow:
         return flow
 
 
+def vector_field_estimator_based_potential(
+    vector_field_estimator: ConditionalVectorFieldEstimator,
+    prior: Optional[Distribution],
+    x_o: Optional[Tensor],
+    enable_transform: bool = True,
+    **kwargs,
+) -> Tuple[VectorFieldBasedPotential, TorchTransform]:
+    r"""Returns the potential function gradient for vector field estimators.
+
+    Args:
+        vector_field_estimator: The neural network modelling the vector field.
+        prior: The prior distribution.
+        x_o: The observed data at which to evaluate the vector field.
+        enable_transform: Whether to enable transforms. Not supported yet.
+        **kwargs: Additional keyword arguments passed to
+            `VectorFieldBasedPotential`.
+    Returns:
+        The potential function and a transformation that maps
+        to unconstrained space.
+    """
+    device = str(next(vector_field_estimator.parameters()).device)
+
+    potential_fn = VectorFieldBasedPotential(
+        vector_field_estimator, prior, x_o, device=device, **kwargs
+    )
+
+    if prior is not None:
+        theta_transform = mcmc_transform(
+            prior, device=device, enable_transform=enable_transform
+        )
+    else:
+        theta_transform = torch.distributions.transforms.identity_transform
+
+    return potential_fn, theta_transform
+
+
 class DifferentiablePotentialFunction(torch.autograd.Function):
     """
     A wrapper of `VectorFieldBasedPotential` with a custom autograd function
diff --git a/sbi/samplers/rejection/rejection.py b/sbi/samplers/rejection/rejection.py
@@ -269,7 +269,9 @@ def accept_reject_sample(
     pbar = tqdm(
         disable=not show_progress_bars,
         total=num_samples,
-        desc=f"Drawing {num_samples} posterior samples for {num_xos} observations",
+        desc=f"Drawing {num_samples} samples for {num_xos} observation" + "s"
+        if num_xos > 1
+        else "",
     )
 
     accepted = [[] for _ in range(num_xos)]
@@ -280,6 +282,7 @@ def accept_reject_sample(
     sampling_batch_size = min(num_samples, max_sampling_batch_size)
     num_sampled_total = torch.zeros(num_xos)
     num_samples_possible = 0
+
     while num_remaining > 0:
         # Sample and reject.
         candidates = proposal(
@@ -288,6 +291,7 @@ def accept_reject_sample(
         )
         # SNPE-style rejection-sampling when the proposal is the neural net.
         are_accepted = accept_reject_fn(candidates)
+
         # Reshape necessary in certain cases which do not follow the shape conventions
         # of the "DensityEstimator" class.
         are_accepted = are_accepted.reshape(sampling_batch_size, num_xos)
@@ -323,7 +327,7 @@ def accept_reject_sample(
             max(int(1.5 * num_remaining / max(min_acceptance_rate, 1e-12)), 100),
         )
         if (
-            num_samples_possible > 1000
+            num_samples_possible > (sampling_batch_size - 1)
             and min_acceptance_rate < warn_acceptance
             and not leakage_warning_raised
         ):
diff --git a/sbi/samplers/score/diffuser.py b/sbi/samplers/score/diffuser.py
@@ -143,22 +143,34 @@ def run(
         Returns:
             Tensor: Samples from the distribution(s).
         """
+        # Initialize samples from the base distribution
         samples = self.initialize(num_samples).to(ts.device)
+
+        # Set up progress bar for time-stepping through the diffusion process
+        total_time_steps = ts.numel() - 1  # We skip the first time point
         pbar = tqdm(
             range(1, ts.numel()),
             disable=not show_progress_bars,
-            desc=f"Drawing {num_samples} posterior samples",
+            desc=f"Generating {num_samples} posterior samples in {total_time_steps} "
+            "diffusion steps.",
         )
 
         if save_intermediate:
             intermediate_samples = [samples]
 
-        for i in pbar:
-            t1 = ts[i - 1]
-            t0 = ts[i]
-            samples = self.predictor(samples, t1, t0)
+        # Step through the diffusion process from t_max to t_min
+        for time_step_idx in pbar:
+            # Get current and next time points (going backwards in time)
+            t_current = ts[time_step_idx - 1]  # Previous time point
+            t_next = ts[time_step_idx]  # Current time point
+
+            # Apply predictor step
+            samples = self.predictor(samples, t_current, t_next)
+
+            # Apply corrector step if available
             if self.corrector is not None:
-                samples = self.corrector(samples, t0, t1)
+                samples = self.corrector(samples, t_next, t_current)
+
             if save_intermediate:
                 intermediate_samples.append(samples)
 
diff --git a/tests/linearGaussian_vector_field_test.py b/tests/linearGaussian_vector_field_test.py