sbi-benchmark · janfb · Apr 26, 2021 · Apr 26, 2021 · Apr 27, 2021 · Apr 27, 2021
diff --git a/lan_nle_comparison/LAN-NLE-Figures.ipynb b/lan_nle_comparison/LAN-NLE-Figures.ipynb
diff --git a/lan_nle_comparison/LAN-NLE-Likelihood-Comparison.ipynb b/lan_nle_comparison/LAN-NLE-Likelihood-Comparison.ipynb
diff --git a/lan_nle_comparison/README.md b/lan_nle_comparison/README.md
@@ -0,0 +1,72 @@
+# Comparison between LANs and NLE on the simlpe drift-diffusion model
+
+This is a short demonstration for how to reproduce the comparison between LANs and NLE.
+
+We perform the comparison in (log) likelihood space, comparing the synthetic likelihoods
+of LAN and NLE against the analytic likelihoods obtained from https://github.com/DrugowitschLab/DiffModels.jl. 
+
+Additionally, we use MCMC via slice sampling to compare two approaches in posterior space. 
+For this comparison we have added the DDM as a task in a framework developed for benchmarking 
+simulation-based inference algorithms, `sbibm`. 
+
+In general, the code relies on three repositories, [`sbi`](https://github.com/mackelab/sbi) for using NLE, 
+[`sbibm`](https://github.com/sbi-benchmark/sbibm) for simulating the data and loading the LAN keras weights, 
+and [`benchmarking-results`](https://github.com/sbi-benchmark/results/tree/main/benchmarking_sbi) for running the benchmark. 
+
+## Comparison in likelihood space
+For a demo of the likelihood comparison you find a jupyter notebook in this folder. For
+executing the notebook locally perform the steps outlined below. 
+
+```bash
+# clone repo
+git clone https://github.com/mackelab/sbibm.git
+# switch to branch
+cd sbibm
+git checkout ddm-task
+# install locally (e.g., in a new conda env)
+pip install -e .
+# install missing nflow dependency
+pip install UMNN
+# open the notebook at /lan_nle_comparison
+```
+
+## Comparison in posterior space using `sbibm`
+
+For a general overview over the benchmarking suite see https://sbi-benchmark.github.io. 
+
+To run the benchmark on your local machine, please follow the steps below.
+
+- **optional**: create and activate a new conda environment
+```bash
+conda create -n ddmtest python=3.8
+conda activate ddmtest
+```
+
+- clone and install `benchmarking-results` repo from https://github.com/sbi-benchmark/results/tree/main/benchmarking_sbi
+
+```bash
+git clone https://github.com/mackelab/results.git
+cd results/benchmarking_sbi
+git checkout ddm
+pip install -r requirements.txt
+cd ../..
+```
+
+- clone and install sbibm repo on the `ddm-task` branch
+
+```bash
+git clone https://github.com/mackelab/sbibm.git
+cd sbibm
+git checkout ddm-task
+pip install -e .
+cd ..
+```
+
+- run the benchmark
+
+```bash
+cd results/benchmarking_sbi
+python run.py task=ddm task.num_observation=1 algorithm=lan
+```
+
+More details about how to run the benchmark can be found at https://github.com/sbi-benchmark/results/tree/main/benchmarking_sbi. 
diff --git a/lan_nle_comparison/ddm_transforms.p b/lan_nle_comparison/ddm_transforms.p
diff --git a/lan_nle_comparison/reproduce_figure_5.py b/lan_nle_comparison/reproduce_figure_5.py
@@ -0,0 +1,106 @@
+import keras
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import numpy as np
+import pandas as pd
+import pickle
+import sbibm
+import torch
+import time
+from joblib import Parallel, delayed
+
+from sbibm.tasks.ddm.utils import run_mcmc, LANPotentialFunctionProvider
+from sbibm.algorithms.sbi.utils import wrap_prior_dist
+
+
+# network trained on KDE likelihood for 4-param ddm
+lan_kde_path = "../sbibm/algorithms/lan/lan_pretrained/model_final_ddm.h5"
+lan_ana_path = "../sbibm/algorithms/lan/lan_pretrained/model_final_ddm_analytic.h5"
+lan_kde = keras.models.load_model(lan_kde_path, compile=False)
+lan_ana = keras.models.load_model(lan_ana_path, compile=False)
+
+# Load pretrained NLE model
+with open("../sbibm/algorithms/lan/nle_pretrained/mm_688_4.p", "rb") as fh:
+    nle = pickle.load(fh)
+
+num_workers = 80
+m = num_workers
+n = 1024
+l_lower_bound = 1e-7
+num_samples = 10000
+
+
+task = sbibm.get_task("ddm")
+prior = task.get_prior_dist()
+simulator = task.get_simulator(num_trials=n)  # Passing the seed to Julia.
+
+thos = prior.sample((m,))
+xos = task.get_simulator()(thos)
+
+mcmc_parameters = {
+    "num_chains": 100,
+    "thin": 10,
+    "warmup_steps": 100,
+    "init_strategy": "sir",
+    "sir_batch_size": 100,
+    "sir_num_batches": 1000,
+}
+
+with open("ddm_transforms.p", "rb") as fh:
+    transforms = pickle.load(fh)["transforms"]
+prior_transformed = wrap_prior_dist(prior, transforms)
+
+
+def local_run(xi):
+
+    tic = time.time()
+    # Get potential function for mixed model.
+    potential_fn_mm = nle.get_potential_fn(
+        xi.reshape(-1, 1),
+        transforms,
+        # Pass untransformed prior and correct internally with ladj.
+        prior=prior,
+        ll_lower_bound=np.log(l_lower_bound),
+    )
+
+    # Run MCMC in transformed space.
+    transformed_samples = run_mcmc(
+        prior=prior_transformed,
+        potential_fn=potential_fn_mm,
+        mcmc_parameters=mcmc_parameters,
+        num_samples=num_samples,
+    )
+
+    nle_samples = transforms.inv(transformed_samples)
+    nle_time = time.time() - tic
+
+    tic = time.time()
+    # Use potential function provided refactored from SBI toolbox for LAN.
+    potential_fn_lan = LANPotentialFunctionProvider(transforms, lan_kde, l_lower_bound)
+
+    lan_transformed_samples = run_mcmc(
+        prior=prior_transformed,
+        # Pass original prior to pf and correct potential with ladj.
+        potential_fn=potential_fn_lan(
+            prior=prior,
+            sbi_net=None,
+            x=xi.reshape(-1, 1),
+            mcmc_method="slice_np_vectorized",
+        ),
+        mcmc_parameters=mcmc_parameters,
+        num_samples=num_samples,
+    )
+
+    lan_samples = transforms.inv(lan_transformed_samples)
+    lan_time = time.time() - tic
+
+    return nle_samples, lan_samples, nle_time, lan_time
+
+
+# run in parallel
+results = Parallel(n_jobs=num_workers)(delayed(local_run)(_) for _ in xos)
+
+with open("figure_5_results.p", "wb") as fh:
+    pickle.dump(dict(thos=thos, xos=xos, results=results), fh)
+
+print("Done")
diff --git a/sbibm/algorithms/lan/julia.py b/sbibm/algorithms/lan/julia.py
@@ -0,0 +1,89 @@
+import logging
+from typing import Any, Dict, Optional, Tuple
+
+import torch
+
+from sbibm.tasks.task import Task
+
+from sbibm.algorithms.sbi.utils import wrap_prior_dist
+from sbibm.tasks.ddm.utils import run_mcmc
+
+
+def run(
+    task: Task,
+    num_samples: int,
+    num_simulations: int,
+    num_observation: Optional[int] = None,
+    observation: Optional[torch.Tensor] = None,
+    automatic_transforms_enabled: bool = True,
+    mcmc_method: str = "slice_np_vectorized",
+    mcmc_parameters: Dict[str, Any] = {
+        "num_chains": 100,
+        "thin": 10,
+        "warmup_steps": 100,
+        "init_strategy": "sir",
+        "sir_batch_size": 1000,
+        "sir_num_batches": 100,
+    },
+    l_lower_bound: float = 1e-7,
+) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
+    """Runs MCMC with analytical DDM likelihood.
+
+    Args:
+        task: Task instance, here DDM.
+        num_observation: Observation number to load, alternative to `observation`
+        observation: Observation, alternative to `num_observation`
+        num_samples: Number of samples to generate from posterior
+        num_simulations: Simulation budget
+        num_rounds: Number of rounds
+        automatic_transforms_enabled: Whether to enable automatic transforms
+        mcmc_method: MCMC method
+        mcmc_parameters: MCMC parameters
+        l_lower_bound: lower bound for single trial likelihood evaluations.
+
+    Returns:
+        Samples from posterior, number of simulator calls, log probability of true params if computable
+    """
+    assert not (num_observation is None and observation is None)
+    assert not (num_observation is not None and observation is not None)
+    assert (
+        task.name == "ddm"
+    ), "This algorithm works only for the DDM task as it uses its analytical likeklihood."
+
+    log = logging.getLogger(__name__)
+    log.info(f"Running MCMC with analytical likelihoods from Julia package.")
+
+    prior = task.get_prior_dist()
+    if observation is None:
+        observation = task.get_observation(num_observation)
+
+    transforms = task._get_transforms(automatic_transforms_enabled)["parameters"]
+    if automatic_transforms_enabled:
+        prior_transformed = wrap_prior_dist(prior, transforms)
+
+    # sbi needs the trials in first dimension.
+
+    llj = task._get_log_prob_fn(
+        None,
+        observation,
+        "experimental",
+        posterior=True,
+        automatic_transforms_enabled=automatic_transforms_enabled,
+        l_lower_bound=l_lower_bound,
+    )
+
+    def potential_fn_julia(theta):
+        theta = torch.as_tensor(theta, dtype=torch.float32)
+
+        return llj(theta)
+
+    # Run MCMC in transformed space.
+    samples = run_mcmc(
+        prior=prior_transformed,
+        potential_fn=potential_fn_julia,
+        mcmc_parameters=mcmc_parameters,
+        num_samples=num_samples,
+    )
+
+    # Return untransformed samples.
+    return transforms.inv(samples), num_simulations, None
diff --git a/sbibm/algorithms/lan/lan.py b/sbibm/algorithms/lan/lan.py
@@ -0,0 +1,98 @@
+import logging
+import pathlib
+from typing import Any, Dict, Optional, Tuple
+
+import keras
+import torch
+from sbibm.tasks.task import Task
+from sbibm.tasks.ddm.utils import LANPotentialFunctionProvider, run_mcmc
+
+from sbibm.algorithms.sbi.utils import wrap_prior_dist
+
+
+def run(
+    task: Task,
+    num_samples: int,
+    num_simulations: int,
+    num_observation: Optional[int] = None,
+    observation: Optional[torch.Tensor] = None,
+    automatic_transforms_enabled: bool = True,
+    mcmc_method: str = "slice_np_vectorized",
+    mcmc_parameters: Dict[str, Any] = {
+        "num_chains": 10,
+        "thin": 10,
+        "warmup_steps": 100,
+        "init_strategy": "sir",
+        "sir_batch_size": 1000,
+        "sir_num_batches": 100,
+    },
+    l_lower_bound: float = 1e-7,
+) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
+    """Runs pretrained LAN based on analytical likelihood targets.
+
+    Args:
+        task: Task instance
+        num_observation: Observation number to load, alternative to `observation`
+        observation: Observation, alternative to `num_observation`
+        num_samples: Number of samples to generate from posterior
+        num_simulations: Simulation budget
+        num_rounds: Number of rounds
+        automatic_transforms_enabled: Whether to enable automatic transforms
+        mcmc_method: MCMC method
+        mcmc_parameters: MCMC parameters
+        l_lower_bound: lower bound for single trial likelihood evaluations.
+
+    Returns:
+        Samples from posterior, number of simulator calls, log probability of true params if computable
+    """
+    assert not (num_observation is None and observation is None)
+    assert not (num_observation is not None and observation is not None)
+    assert (
+        task.name == "ddm"
+    ), "This algorithm works only for the DDM task as it uses its analytical likeklihood."
+
+    log = logging.getLogger(__name__)
+    log.info(f"Running LAN pretrained with KDE targets.")
+    # Set LAN budget from paper.
+    lan_budget = int(1e5 * 1.5e6)
+
+    prior = task.get_prior_dist()
+    if observation is None:
+        observation = task.get_observation(num_observation)
+
+    # Maybe transform to unconstrained parameter space for MCMC.
+    transforms = task._get_transforms(automatic_transforms_enabled)["parameters"]
+    if automatic_transforms_enabled:
+        prior_transformed = wrap_prior_dist(prior, transforms)
+    else:
+        prior_transformed = prior
+
+    num_trials = observation.shape[1]
+    # sbi needs the trials in first dimension.
+    observation_sbi = observation.reshape(num_trials, 1)
+
+    # network trained on KDE likelihood for 4-param ddm
+    lan_kde_path = (
+        f"{pathlib.Path(__file__).parent.resolve()}/lan_pretrained/model_final_ddm.h5"
+    )
+    # load weights as keras model
+    lan_kde = keras.models.load_model(lan_kde_path, compile=False)
+
+    # Use potential function provided refactored from SBI toolbox for LAN.
+    potential_fn_lan = LANPotentialFunctionProvider(transforms, lan_kde, l_lower_bound)
+
+    samples = run_mcmc(
+        prior=prior_transformed,
+        # Pass original prior to pf and correct potential with ladj.
+        potential_fn=potential_fn_lan(
+            prior=prior,
+            sbi_net=None,
+            x=observation_sbi,
+            mcmc_method=mcmc_method,
+        ),
+        mcmc_parameters=mcmc_parameters,
+        num_samples=num_samples,
+    )
+
+    # Return untransformed samples.
+    return transforms.inv(samples), lan_budget, None