fix: patch torch bug in tarp, run torch.histogram on cpu (#1596)

psteinb · web-flow · commit ce31030e4c02 · 2025-07-01T09:51:34.000+02:00
* patch for torch bug in tarp, run torch.histogram on with cpu-only tensor

* pin gpu tests to one param combination

* more clear variable naming

* ruff formatting
diff --git a/sbi/diagnostics/tarp.py b/sbi/diagnostics/tarp.py
@@ -138,6 +138,7 @@ def _run_tarp(
 
     """
     num_posterior_samples, num_tarp_samples, _ = posterior_samples.shape
+    input_device = posterior_samples.device
 
     assert references.shape == thetas.shape, (
         "references must have the same shape as thetas"
@@ -162,11 +163,20 @@ def _run_tarp(
     coverage_values = (
         torch.sum(sample_dists < theta_dists, dim=0) / num_posterior_samples
     )
-    hist, alpha_grid = torch.histogram(coverage_values, density=True, bins=num_bins)
+
+    # enforce execution on the CPU due to
+    # https://github.com/pytorch/pytorch/issues/69519
+    hist, alpha_grid = torch.histogram(
+        coverage_values.cpu(), density=True, bins=num_bins
+    )
+
+    # return all tensors to input_device to keep contract valid
+    hist, alpha_grid = hist.to(input_device), alpha_grid.to(input_device)
+
     # calculate empirical CDF via cumsum and normalize
     ecp = torch.cumsum(hist, dim=0) / hist.sum()
     # add 0 to the beginning of the ecp curve to match the alpha grid
-    ecp = torch.cat([Tensor([0]), ecp])
+    ecp = torch.cat([torch.zeros((1,), device=input_device), ecp])
 
     return ecp, alpha_grid
 
diff --git a/tests/tarp_test.py b/tests/tarp_test.py
@@ -1,6 +1,6 @@
 import pytest
 from scipy.stats import uniform
-from torch import Tensor, allclose, exp, eye, ones
+from torch import allclose, device, exp, eye, ones, zeros
 from torch.distributions import Normal, Uniform
 from torch.nn import L1Loss
 
@@ -141,12 +141,46 @@ def test_run_tarp_correct(distance, z_score_theta, accurate_samples):
         num_bins=30,
     )
 
-    assert allclose((ecp - alpha).abs().max(), Tensor([0.0]), atol=1e-1)
+    assert allclose((ecp - alpha).abs().max(), zeros((1,)), atol=1e-1)
     assert (
         ecp - alpha
     ).abs().sum() < 1.0  # integral of residuals should vanish, fig.2 in paper
 
 
+@pytest.mark.gpu
+def test_run_tarp_correct_on_cuda_device(accurate_samples):
+    z_score_theta = True
+    distance = l2
+    dev = device("cuda")
+    theta, samples = accurate_samples
+    theta, samples = theta.to(dev), samples.to(dev)
+
+    with pytest.raises(NotImplementedError):
+        # let's make sure the execution problem is still there
+        # if torch fixes https://github.com/pytorch/pytorch/issues/69519
+        # this context manager should ensure, the case fails
+        # then we can fix the tarp code
+        from torch import histogram
+
+        histogram(zeros((3,)).cuda(), bins=4)
+
+    references = get_tarp_references(theta).to(dev)
+
+    ecp, alpha = _run_tarp(
+        samples,
+        theta,
+        references,
+        distance=distance,
+        z_score_theta=z_score_theta,
+        num_bins=30,
+    )
+
+    assert allclose((ecp - alpha).abs().max(), zeros((1,), device=dev), atol=1e-1)
+    assert (
+        ecp - alpha
+    ).abs().sum() < 1.05  # integral of residuals should vanish, fig.2 in paper
+
+
 @pytest.mark.parametrize("distance", (l1, l2))
 def test_run_tarp_detect_overdispersed(distance, overdispersed_samples):
     theta, samples = overdispersed_samples
@@ -158,7 +192,7 @@ def test_run_tarp_detect_overdispersed(distance, overdispersed_samples):
 
     # TARP detects that this is NOT a correct representation of the posterior
     # hence we test for not allclose
-    assert not allclose((ecp - alpha).abs().max(), Tensor([0.0]), atol=1e-1)
+    assert not allclose((ecp - alpha).abs().max(), zeros((1,)), atol=1e-1)
     assert (ecp - alpha).abs().sum() > 3.0  # integral is nonzero, fig.2 in paper
 
 
@@ -173,7 +207,7 @@ def test_run_tarp_detect_underdispersed(distance, underdispersed_samples):
 
     # TARP detects that this is NOT a correct representation of the posterior
     # hence we test for not allclose
-    assert not allclose((ecp - alpha).abs().max(), Tensor([0.0]), atol=1e-1)
+    assert not allclose((ecp - alpha).abs().max(), zeros((1,)), atol=1e-1)
     assert (ecp - alpha).abs().sum() > 3.0  # integral is nonzero, fig.2 in paper
 
 
@@ -188,7 +222,7 @@ def test_run_tarp_detect_bias(distance, biased_samples):
 
     # TARP detects that this is NOT a correct representation of the posterior
     # hence we test for not allclose
-    assert not allclose((ecp - alpha).abs().max(), Tensor([0.0]), atol=1e-1)
+    assert not allclose((ecp - alpha).abs().max(), zeros((1,)), atol=1e-1)
     assert (ecp - alpha).abs().sum() > 3.0  # integral is nonzero, fig.2 in paper