Skip to content

Commit 10a4cc1

Browse files
authored
fix: lc2st numpy type fixes, improved tests. (#1613)
* fix(diagnostics): Fix numpy scalar type issues in LC2ST for Python >3.10 - Convert numpy.bool_ to native bool in reject_test() and p_value() - Convert numpy.float64 to native float in get_statistic_on_observed_data() and eval_lc2st() - Fixes TypeError: 'numpy.bool' object cannot be interpreted as an integer - Ensures compatibility with torch.tensor() in newer NumPy versions * perf(tests): Optimize LC2ST test suite with pytest fixtures - Add session-scoped fixtures for NPE training and calibration data - Consolidate badly_trained_npe and well_trained_npe fixtures - Reduce test execution time by sharing expensive setup across test runs - Remove redundant NPE training code from individual tests - Improve test maintainability and reduce code duplication * PR feedback: improve fixtures
1 parent ce363b7 commit 10a4cc1

File tree

2 files changed

+85
-65
lines changed

2 files changed

+85
-65
lines changed

sbi/diagnostics/lc2st.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def get_statistic_on_observed_data(
312312
trained_clfs=self.trained_clfs,
313313
return_probs=True,
314314
)
315-
return scores.mean()
315+
return float(scores.mean())
316316

317317
def p_value(
318318
self,
@@ -338,7 +338,7 @@ def p_value(
338338
_, stats_null = self.get_statistics_under_null_hypothesis(
339339
theta_o=theta_o, x_o=x_o, return_probs=True, verbosity=0
340340
)
341-
return (stat_data < stats_null).mean()
341+
return float((stat_data < stats_null).mean())
342342

343343
def reject_test(
344344
self,
@@ -357,7 +357,7 @@ def reject_test(
357357
Returns:
358358
The L-C2ST result: True if rejected, False otherwise.
359359
"""
360-
return self.p_value(theta_o=theta_o, x_o=x_o) < alpha
360+
return bool(self.p_value(theta_o=theta_o, x_o=x_o) < alpha)
361361

362362
def train_under_null_hypothesis(
363363
self,
@@ -739,7 +739,7 @@ def eval_lc2st(
739739
# probability of being in P (class 0)
740740
proba = clf.predict_proba(joint_p)[:, 0] # type: ignore
741741
# mean squared error between proba and dirac at 0.5
742-
score = ((proba - [0.5] * len(proba)) ** 2).mean()
742+
score = float(((proba - [0.5] * len(proba)) ** 2).mean())
743743

744744
if return_proba:
745745
return proba, score

tests/lc2st_test.py

Lines changed: 81 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -15,40 +15,87 @@
1515
)
1616

1717

18+
@pytest.fixture(scope="session")
19+
def basic_setup():
20+
"""Basic setup shared across LC2ST tests."""
21+
dim = 2
22+
prior = uniform_prior_gaussian_mixture(dim=dim)
23+
simulator = gaussian_mixture
24+
return {"dim": dim, "prior": prior, "simulator": simulator}
25+
26+
27+
@pytest.fixture(scope="session")
28+
def npe_factory(basic_setup):
29+
"""Factory for creating NPE models with different training parameters."""
30+
31+
def _create_npe(num_simulations, max_epochs=None):
32+
prior = basic_setup["prior"]
33+
simulator = basic_setup["simulator"]
34+
35+
theta_train = prior.sample((num_simulations,))
36+
x_train = simulator(theta_train)
37+
38+
inference = NPE(prior, density_estimator='maf')
39+
inference = inference.append_simulations(theta=theta_train, x=x_train)
40+
41+
train_kwargs = {"training_batch_size": 100}
42+
if max_epochs:
43+
train_kwargs["max_num_epochs"] = max_epochs
44+
45+
return inference.train(**train_kwargs)
46+
47+
return _create_npe
48+
49+
50+
@pytest.fixture(scope="session")
51+
def badly_trained_npe(npe_factory):
52+
return npe_factory(num_simulations=100, max_epochs=1)
53+
54+
55+
@pytest.fixture(scope="session")
56+
def well_trained_npe(npe_factory):
57+
return npe_factory(num_simulations=10_000)
58+
59+
60+
@pytest.fixture(scope="session")
61+
def calibration_data(basic_setup, badly_trained_npe):
62+
"""Calibration data for LC2ST tests."""
63+
prior = basic_setup["prior"]
64+
simulator = basic_setup["simulator"]
65+
npe = badly_trained_npe
66+
67+
num_cal = 100 # Smaller for quick tests
68+
thetas = prior.sample((num_cal,))
69+
xs = simulator(thetas)
70+
posterior_samples = npe.sample((1,), xs).reshape(-1, thetas.shape[-1]).detach()
71+
72+
return {"thetas": thetas, "xs": xs, "posterior_samples": posterior_samples}
73+
74+
1875
@pytest.mark.parametrize("method", (LC2ST, LC2ST_NF))
1976
@pytest.mark.parametrize("classifier", ('mlp', 'random_forest', MLPClassifier))
2077
@pytest.mark.parametrize("cv_folds", (1, 2))
2178
@pytest.mark.parametrize("num_ensemble", (1, 3))
2279
@pytest.mark.parametrize("z_score", (True, False))
23-
def test_running_lc2st(method, classifier, cv_folds, num_ensemble, z_score):
80+
def test_running_lc2st(
81+
method,
82+
classifier,
83+
cv_folds,
84+
num_ensemble,
85+
z_score,
86+
calibration_data,
87+
badly_trained_npe,
88+
):
2489
"""Tests running inference, LC2ST-(NF) and then getting test quantities."""
2590

26-
num_train = 100
27-
num_cal = 100
2891
num_eval = 100
2992
num_trials_null = 2
3093

31-
# task
32-
dim = 2
33-
prior = uniform_prior_gaussian_mixture(dim=dim)
34-
simulator = gaussian_mixture
35-
36-
# training data for the density estimator
37-
theta_train = prior.sample((num_train,))
38-
x_train = simulator(theta_train)
39-
40-
# Train the neural posterior estimators
41-
inference = NPE(prior, density_estimator='maf')
42-
inference = inference.append_simulations(theta=theta_train, x=x_train)
43-
npe = inference.train(training_batch_size=100, max_num_epochs=1)
44-
45-
# calibration data for the test
46-
thetas = prior.sample((num_cal,))
47-
xs = simulator(thetas)
48-
posterior_samples = (
49-
npe.sample((1,), condition=xs).reshape(-1, thetas.shape[-1]).detach()
50-
)
51-
assert posterior_samples.shape == thetas.shape
94+
# Get data from fixtures
95+
thetas = calibration_data["thetas"]
96+
xs = calibration_data["xs"]
97+
posterior_samples = calibration_data["posterior_samples"]
98+
npe = badly_trained_npe
5299

53100
if method == LC2ST:
54101
theta_o = (
@@ -107,33 +154,19 @@ def test_running_lc2st(method, classifier, cv_folds, num_ensemble, z_score):
107154

108155
@pytest.mark.slow
109156
@pytest.mark.parametrize("method", (LC2ST, LC2ST_NF))
110-
def test_lc2st_true_positiv_rate(method):
157+
def test_lc2st_true_positiv_rate(method, basic_setup, badly_trained_npe):
111158
"""Tests the true positiv rate of the LC2ST-(NF) test:
112159
for a "bad" estimator, the LC2ST-(NF) should reject the null hypothesis."""
113160
num_runs = 100
114161
confidence_level = 0.95
115162

116-
# use small num_train and num_epochs to obtain "bad" estimator
117-
# (no convergence to the true posterior)
118-
num_train = 100
119-
num_epochs = 2
120-
121163
num_cal = 1_000
122164
num_eval = 10_000
123165

124-
# task
125-
dim = 2
126-
prior = uniform_prior_gaussian_mixture(dim=dim)
127-
simulator = gaussian_mixture
128-
129-
# training data for the density estimator
130-
theta_train = prior.sample((num_train,))
131-
x_train = simulator(theta_train)
132-
133-
# Train the neural posterior estimators
134-
inference = NPE(prior, density_estimator='maf')
135-
inference = inference.append_simulations(theta=theta_train, x=x_train)
136-
npe = inference.train(training_batch_size=100, max_num_epochs=num_epochs)
166+
# Get data from fixtures
167+
prior = basic_setup["prior"]
168+
simulator = basic_setup["simulator"]
169+
npe = badly_trained_npe
137170

138171
thetas = prior.sample((num_cal,))
139172
xs = simulator(thetas)
@@ -186,32 +219,19 @@ def test_lc2st_true_positiv_rate(method):
186219

187220
@pytest.mark.slow
188221
@pytest.mark.parametrize("method", (LC2ST, LC2ST_NF))
189-
def test_lc2st_false_positiv_rate(method, set_seed):
222+
def test_lc2st_false_positiv_rate(method, basic_setup, well_trained_npe, set_seed):
190223
"""Tests the false positiv rate of the LC2ST-(NF) test:
191224
for a "good" estimator, the LC2ST-(NF) should not reject the null hypothesis."""
192225
num_runs = 100
193226
confidence_level = 0.95
194227

195-
# use big num_train and num_epochs to obtain "good" estimator
196-
# (convergence of the estimator)
197-
num_train = 10_000
198-
199228
num_cal = 1_000
200229
num_eval = 10_000
201230

202-
# task
203-
dim = 2
204-
prior = uniform_prior_gaussian_mixture(dim=dim)
205-
simulator = gaussian_mixture
206-
207-
# training data for the density estimator
208-
theta_train = prior.sample((num_train,))
209-
x_train = simulator(theta_train)
210-
211-
# Train the neural posterior estimators
212-
inference = NPE(prior, density_estimator='maf')
213-
inference = inference.append_simulations(theta=theta_train, x=x_train)
214-
npe = inference.train(training_batch_size=100)
231+
# Get data from fixtures
232+
prior = basic_setup["prior"]
233+
simulator = basic_setup["simulator"]
234+
npe = well_trained_npe
215235

216236
thetas = prior.sample((num_cal,))
217237
xs = simulator(thetas)

0 commit comments

Comments
 (0)