fix: lc2st numpy type fixes, improved tests. (#1613)

janfb · web-flow · commit 10a4cc15daa1 · 2025-07-11T11:59:06.000+02:00
* fix(diagnostics): Fix numpy scalar type issues in LC2ST for Python &gt;3.10

- Convert numpy.bool_ to native bool in reject_test() and p_value()
- Convert numpy.float64 to native float in get_statistic_on_observed_data() and eval_lc2st()
- Fixes TypeError: 'numpy.bool' object cannot be interpreted as an integer
- Ensures compatibility with torch.tensor() in newer NumPy versions

* perf(tests): Optimize LC2ST test suite with pytest fixtures

- Add session-scoped fixtures for NPE training and calibration data
- Consolidate badly_trained_npe and well_trained_npe fixtures
- Reduce test execution time by sharing expensive setup across test runs
- Remove redundant NPE training code from individual tests
- Improve test maintainability and reduce code duplication

* PR feedback: improve fixtures
diff --git a/sbi/diagnostics/lc2st.py b/sbi/diagnostics/lc2st.py
@@ -312,7 +312,7 @@ def get_statistic_on_observed_data(
             trained_clfs=self.trained_clfs,
             return_probs=True,
         )
-        return scores.mean()
+        return float(scores.mean())
 
     def p_value(
         self,
@@ -338,7 +338,7 @@ def p_value(
         _, stats_null = self.get_statistics_under_null_hypothesis(
             theta_o=theta_o, x_o=x_o, return_probs=True, verbosity=0
         )
-        return (stat_data < stats_null).mean()
+        return float((stat_data < stats_null).mean())
 
     def reject_test(
         self,
@@ -357,7 +357,7 @@ def reject_test(
         Returns:
             The L-C2ST result: True if rejected, False otherwise.
         """
-        return self.p_value(theta_o=theta_o, x_o=x_o) < alpha
+        return bool(self.p_value(theta_o=theta_o, x_o=x_o) < alpha)
 
     def train_under_null_hypothesis(
         self,
@@ -739,7 +739,7 @@ def eval_lc2st(
     # probability of being in P (class 0)
     proba = clf.predict_proba(joint_p)[:, 0]  # type: ignore
     # mean squared error between proba and dirac at 0.5
-    score = ((proba - [0.5] * len(proba)) ** 2).mean()
+    score = float(((proba - [0.5] * len(proba)) ** 2).mean())
 
     if return_proba:
         return proba, score
diff --git a/tests/lc2st_test.py b/tests/lc2st_test.py
@@ -15,40 +15,87 @@
 )
 
 
+@pytest.fixture(scope="session")
+def basic_setup():
+    """Basic setup shared across LC2ST tests."""
+    dim = 2
+    prior = uniform_prior_gaussian_mixture(dim=dim)
+    simulator = gaussian_mixture
+    return {"dim": dim, "prior": prior, "simulator": simulator}
+
+
+@pytest.fixture(scope="session")
+def npe_factory(basic_setup):
+    """Factory for creating NPE models with different training parameters."""
+
+    def _create_npe(num_simulations, max_epochs=None):
+        prior = basic_setup["prior"]
+        simulator = basic_setup["simulator"]
+
+        theta_train = prior.sample((num_simulations,))
+        x_train = simulator(theta_train)
+
+        inference = NPE(prior, density_estimator='maf')
+        inference = inference.append_simulations(theta=theta_train, x=x_train)
+
+        train_kwargs = {"training_batch_size": 100}
+        if max_epochs:
+            train_kwargs["max_num_epochs"] = max_epochs
+
+        return inference.train(**train_kwargs)
+
+    return _create_npe
+
+
+@pytest.fixture(scope="session")
+def badly_trained_npe(npe_factory):
+    return npe_factory(num_simulations=100, max_epochs=1)
+
+
+@pytest.fixture(scope="session")
+def well_trained_npe(npe_factory):
+    return npe_factory(num_simulations=10_000)
+
+
+@pytest.fixture(scope="session")
+def calibration_data(basic_setup, badly_trained_npe):
+    """Calibration data for LC2ST tests."""
+    prior = basic_setup["prior"]
+    simulator = basic_setup["simulator"]
+    npe = badly_trained_npe
+
+    num_cal = 100  # Smaller for quick tests
+    thetas = prior.sample((num_cal,))
+    xs = simulator(thetas)
+    posterior_samples = npe.sample((1,), xs).reshape(-1, thetas.shape[-1]).detach()
+
+    return {"thetas": thetas, "xs": xs, "posterior_samples": posterior_samples}
+
+
 @pytest.mark.parametrize("method", (LC2ST, LC2ST_NF))
 @pytest.mark.parametrize("classifier", ('mlp', 'random_forest', MLPClassifier))
 @pytest.mark.parametrize("cv_folds", (1, 2))
 @pytest.mark.parametrize("num_ensemble", (1, 3))
 @pytest.mark.parametrize("z_score", (True, False))
-def test_running_lc2st(method, classifier, cv_folds, num_ensemble, z_score):
+def test_running_lc2st(
+    method,
+    classifier,
+    cv_folds,
+    num_ensemble,
+    z_score,
+    calibration_data,
+    badly_trained_npe,
+):
     """Tests running inference, LC2ST-(NF) and then getting test quantities."""
 
-    num_train = 100
-    num_cal = 100
     num_eval = 100
     num_trials_null = 2
 
-    # task
-    dim = 2
-    prior = uniform_prior_gaussian_mixture(dim=dim)
-    simulator = gaussian_mixture
-
-    # training data for the density estimator
-    theta_train = prior.sample((num_train,))
-    x_train = simulator(theta_train)
-
-    # Train the neural posterior estimators
-    inference = NPE(prior, density_estimator='maf')
-    inference = inference.append_simulations(theta=theta_train, x=x_train)
-    npe = inference.train(training_batch_size=100, max_num_epochs=1)
-
-    # calibration data for the test
-    thetas = prior.sample((num_cal,))
-    xs = simulator(thetas)
-    posterior_samples = (
-        npe.sample((1,), condition=xs).reshape(-1, thetas.shape[-1]).detach()
-    )
-    assert posterior_samples.shape == thetas.shape
+    # Get data from fixtures
+    thetas = calibration_data["thetas"]
+    xs = calibration_data["xs"]
+    posterior_samples = calibration_data["posterior_samples"]
+    npe = badly_trained_npe
 
     if method == LC2ST:
         theta_o = (
@@ -107,33 +154,19 @@ def test_running_lc2st(method, classifier, cv_folds, num_ensemble, z_score):
 
 @pytest.mark.slow
 @pytest.mark.parametrize("method", (LC2ST, LC2ST_NF))
-def test_lc2st_true_positiv_rate(method):
+def test_lc2st_true_positiv_rate(method, basic_setup, badly_trained_npe):
     """Tests the true positiv rate of the LC2ST-(NF) test:
     for a "bad" estimator, the LC2ST-(NF) should reject the null hypothesis."""
     num_runs = 100
     confidence_level = 0.95
 
-    # use small num_train and num_epochs to obtain "bad" estimator
-    # (no convergence to the true posterior)
-    num_train = 100
-    num_epochs = 2
-
     num_cal = 1_000
     num_eval = 10_000
 
-    # task
-    dim = 2
-    prior = uniform_prior_gaussian_mixture(dim=dim)
-    simulator = gaussian_mixture
-
-    # training data for the density estimator
-    theta_train = prior.sample((num_train,))
-    x_train = simulator(theta_train)
-
-    # Train the neural posterior estimators
-    inference = NPE(prior, density_estimator='maf')
-    inference = inference.append_simulations(theta=theta_train, x=x_train)
-    npe = inference.train(training_batch_size=100, max_num_epochs=num_epochs)
+    # Get data from fixtures
+    prior = basic_setup["prior"]
+    simulator = basic_setup["simulator"]
+    npe = badly_trained_npe
 
     thetas = prior.sample((num_cal,))
     xs = simulator(thetas)
@@ -186,32 +219,19 @@ def test_lc2st_true_positiv_rate(method):
 
 @pytest.mark.slow
 @pytest.mark.parametrize("method", (LC2ST, LC2ST_NF))
-def test_lc2st_false_positiv_rate(method, set_seed):
+def test_lc2st_false_positiv_rate(method, basic_setup, well_trained_npe, set_seed):
     """Tests the false positiv rate of the LC2ST-(NF) test:
     for a "good" estimator, the LC2ST-(NF) should not reject the null hypothesis."""
     num_runs = 100
     confidence_level = 0.95
 
-    # use big num_train and num_epochs to obtain "good" estimator
-    # (convergence of the estimator)
-    num_train = 10_000
-
     num_cal = 1_000
     num_eval = 10_000
 
-    # task
-    dim = 2
-    prior = uniform_prior_gaussian_mixture(dim=dim)
-    simulator = gaussian_mixture
-
-    # training data for the density estimator
-    theta_train = prior.sample((num_train,))
-    x_train = simulator(theta_train)
-
-    # Train the neural posterior estimators
-    inference = NPE(prior, density_estimator='maf')
-    inference = inference.append_simulations(theta=theta_train, x=x_train)
-    npe = inference.train(training_batch_size=100)
+    # Get data from fixtures
+    prior = basic_setup["prior"]
+    simulator = basic_setup["simulator"]
+    npe = well_trained_npe
 
     thetas = prior.sample((num_cal,))
     xs = simulator(thetas)