From e503c88f105fd0a5115f3919cfa3dce8e89d05f2 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Mon, 17 Mar 2025 18:01:40 +0100 Subject: [PATCH 01/51] logit transform changes --- sbi/utils/sbiutils.py | 78 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 3090bc71a..0af7f3f1a 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -19,6 +19,7 @@ AffineTransform, Distribution, Independent, + Transform, biject_to, constraints, ) @@ -197,6 +198,83 @@ def standardizing_transform_zuko( ) +class BoundedLogitTransform(Transform): + """ + Implements a logit transformation for data bounded within a given interval + (min_val, max_val), mapping it to the real line (-inf, inf). This transformation + is useful for normalizing bounded data while ensuring numerical stability. + + The transformation follows: + x' = log((x - min_val) / (max_val - min_val)) + - log(1 - (x - min_val) / (max_val - min_val)) + + The inverse transformation applies the sigmoid function + to map back to (min_val, max_val). + + Attributes: + min_val (float): The lower bound of the input domain. + max_val (float): The upper bound of the input domain. + eps (float): A small value to prevent numerical issues at the boundaries. + domain (Constraint): Defines the valid input range as (min_val, max_val). + codomain (Constraint): Defines the output range as the real line (-inf, inf). + """ + + def __init__(self, min_val: float, max_val: float, eps: float = 1e-5): + super().__init__() + self.min_val = min_val + self.max_val = max_val + self.eps = eps # Avoids numerical instability at boundaries + + # Define domain and codomain + self.domain = constraints.interval(min_val, max_val) # Input is in (min, max) + self.codomain = constraints.real # Output is unbounded + + def __call__(self, x: Tensor) -> Tensor: + # Normalize to (0,1) + x = (x - self.min_val) / (self.max_val - self.min_val) + x = torch.clamp(x, self.eps, 1 - self.eps) # Prevents log(0) or log(∞) + return torch.log(x) - torch.log(1 - x) # Logit function + + def inv(self, y: Tensor) -> Tensor: + # Sigmoid and scale back to (min, max) + return self.min_val + (self.max_val - self.min_val) * torch.sigmoid(y) + + def log_abs_det_jacobian(self, x: Tensor, y: Tensor) -> Tensor: + """ + Computes the log absolute determinant of the Jacobian of the transformation. + Needed for proper transformation in Zuko flows. + """ + x = (x - self.min_val) / (self.max_val - self.min_val) + x = torch.clamp(x, self.eps, 1 - self.eps) + log_det = ( + -torch.log(x) - torch.log(1 - x) - torch.log(self.max_val - self.min_val) + ) + return log_det + + +def logit_transform_zuko( + min_val: float, max_val: float, eps: float = 1e-6 +) -> zuko.flows.UnconditionalTransform: + """ + Builds logit-transforming transform for Zuko flows on a bounded interval. + + Args: + min_val: Lower bound of the prior interval. + max_val: Upper bound of the prior interval. + eps: Small constant to avoid numerical issues at 0 and 1. + + Returns: + Logit transformation for the given range. + """ + return zuko.flows.UnconditionalTransform( + BoundedLogitTransform, + min_val=min_val, + max_val=max_val, + eps=eps, + buffer=True, + ) + + def z_standardization( batch_t: Tensor, structured_dims: bool = False, From 8be9079762cac33716f04022dd52cafce1c41372 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 18 Mar 2025 18:10:14 +0100 Subject: [PATCH 02/51] add new ZScoreTypes --- sbi/neural_nets/net_builders/flow.py | 139 +++++++++++++++------------ 1 file changed, 76 insertions(+), 63 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 51266ddb3..83ffe7161 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1,6 +1,7 @@ # This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed # under the Apache License Version 2.0, see +import warnings from functools import partial from typing import List, Optional, Sequence, Union @@ -17,6 +18,8 @@ from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel from sbi.utils.sbiutils import ( + ZScoreType, + logit_transform_zuko, standardizing_net, standardizing_transform, standardizing_transform_zuko, @@ -31,8 +34,8 @@ def build_made( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: int = 50, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), @@ -102,8 +105,8 @@ def build_made( def build_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -190,8 +193,8 @@ def build_maf( def build_maf_rqs( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -302,8 +305,8 @@ def build_maf_rqs( def build_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: int = 50, num_transforms: int = 5, num_bins: int = 10, @@ -424,8 +427,8 @@ def mask_in_layer(i): def build_zuko_nice( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -479,8 +482,8 @@ def build_zuko_nice( def build_zuko_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -531,8 +534,8 @@ def build_zuko_maf( def build_zuko_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -592,8 +595,8 @@ def build_zuko_nsf( def build_zuko_ncsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -648,8 +651,8 @@ def build_zuko_ncsf( def build_zuko_sospf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -702,8 +705,8 @@ def build_zuko_sospf( def build_zuko_naf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -768,8 +771,8 @@ def build_zuko_naf( def build_zuko_unaf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -834,8 +837,8 @@ def build_zuko_unaf( def build_zuko_cnf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -888,8 +891,8 @@ def build_zuko_cnf( def build_zuko_gf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -945,8 +948,8 @@ def build_zuko_gf( def build_zuko_bpf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType] = "affine-independent", + z_score_y: Optional[ZScoreType] = "affine-independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -1004,8 +1007,8 @@ def build_zuko_flow( which_nf: str, batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Optional[ZScoreType], + z_score_y: Optional[ZScoreType], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -1019,13 +1022,20 @@ def build_zuko_flow( batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - - `none`, or None: do not z-score. - - `independent`: z-score each dimension independently. - - `structured`: treat dimensions as related, therefore compute mean and std - over the entire batch, instead of per-dimension. Should be used when each - sample is, for example, a time series or an image. - z_score_y: Whether to z-score ys passing into the network, same options as - z_score_x. + - `none` or `None`: No transformation is applied. + - `affine-independent`: Applies standard z-scoring with independent dims. + - `affine-structured`: Applies standard z-scoring with structured dims. + - `logit-independent`: Applies logit transformation with independent dims. + - `logit-structured`: Applies logit transformation with structured dims. + Affine is the default standardization method. Logit is used for numerical + stability during training, mapping prior bounds estimated from + batch size to [-inf, inf]). + Independent z-scores each dimension independenlty. Structured treats dims + as related, therefore compute mean and std over the entire batch, instead of + per-dimension. Should be used when each sample is, for example, + a time series or an image. + z_score_y: Whether to z-score ys passing into the network, + same options as z_score_x. hidden_features: The number of hidden features in the flow. Defaults to 50. num_transforms: The number of transformations in the flow. Defaults to 5. embedding_net: The embedding network to use. Defaults to nn.Identity(). @@ -1060,46 +1070,50 @@ def build_zuko_flow( **kwargs, ) + transform_type_x, structured_x = z_score_parser(z_score_x) + transform_type_y, structured_y = z_score_parser(z_score_y) + # Continuous normalizing flows (CNF) only have one transform, # so we need to handle them slightly differently. if which_nf == "CNF": transform = flow_built.transform - - z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x_bool: + if transform_type_x == "affine": transform = ( transform, standardizing_transform_zuko(batch_x, structured_x), ) - - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - # Prepend standardizing transform to y-embedding. - embedding_net = nn.Sequential( - standardizing_transform_zuko(batch_y, structured_y), embedding_net + elif transform_type_x == "logit": + transform = ( + transform, + logit_transform_zuko(batch_x, structured_x), ) - - # Combine transforms. - neural_net = zuko.flows.Flow(transform, flow_built.base) else: - transforms = flow_built.transform.transforms - - z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x_bool: - transforms = ( - *transforms, + transform = flow_built.transform.transforms + if transform_type_x == "affine": + transform = ( + *transform, standardizing_transform_zuko(batch_x, structured_x), ) + elif transform_type_x == "logit": + transform = ( + *transform, + logit_transform_zuko(batch_x, structured_x), + ) - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - # Prepend standardizing transform to y-embedding. - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net + if transform_type_y == "affine" or transform_type_y == "logit": + # Prepend standardizing transform to y-embedding. + embedding_net = nn.Sequential( + standardizing_net(batch_y, structured_y), embedding_net + ) + if transform_type_y == "logit": + # Print a warning that the logit transformation is not applied to y. + warnings.warn( + "Logit transformation can only be applied on the conditioned variable.", + stacklevel=2, ) - # Combine transforms. - neural_net = zuko.flows.Flow(transforms, flow_built.base) + # Combine transforms. + neural_net = zuko.flows.Flow(transform, flow_built.base) flow = ZukoFlow( neural_net, @@ -1114,7 +1128,6 @@ def build_zuko_flow( class ContextSplineMap(nn.Module): """ Neural network from `context` to the spline parameters. - We cannot use the resnet as conditioner to learn each dimension conditioned on the other dimensions (because there is only one). Instead, we learn the spline parameters directly. In the case of conditinal density estimation, From 23bd180643c7b90085261471218e6d23597d3687 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 18 Mar 2025 18:12:41 +0100 Subject: [PATCH 03/51] add new ZScoreTypes --- sbi/utils/sbiutils.py | 115 +++++++++++++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 29 deletions(-) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 0af7f3f1a..3ab77f0f8 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -5,7 +5,18 @@ import random import warnings from math import pi -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union +from typing import ( + Any, + Callable, + Dict, + List, + Literal, + Optional, + Sequence, + Tuple, + Type, + Union, +) import numpy as np import pyknos.nflows.transforms as nflows_tf @@ -102,46 +113,63 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f return clamped_val -def z_score_parser(z_score_flag: Optional["str"]) -> Tuple[bool, bool]: - """Parses string z-score flag into booleans. +ZScoreType = Literal[ + "affine-independent", + "affine-structured", + "logit-independent", + "logit-structured", + "none", +] + - Converts string flag into booleans denoting whether to z-score or not, and whether - data dimensions are structured or independent. +def z_score_parser(z_score_flag: Optional[ZScoreType]) -> Tuple[Union[str, None], bool]: + """Parses string z-score flag into booleans.This function interprets a z-scoring + flag string to determine the type of transformation (affine, logit, or none) + and whether the data dimensions are structured or independent. Args: - z_score_flag: str flag for z-scoring method stating whether the data - dimensions are "structured" or "independent", or does not require z-scoring - ("none" or None). + z_score_flag (Optional[str]): A string specifying the z-scoring method. + - `none` or `None`: No transformation is applied. + - `affine-independent`: Applies standard z-scoring with independent dimensions. + - `affine-structured`: Applies standard z-scoring with structured dimensions. + - `logit-independent`: Applies logit transformation with independent dimensions. + - `logit-structured`: Applies logit transformation with structured dimensions. Returns: - Flag for whether or not to z-score, and whether data is structured + tuple: + - transform_type (Optional[str]): The type of transformation, + either `"affine"`, `"logit"`, or `None` if no transformation is applied. + - structured_data (bool): A boolean indicating whether + the data dimensions are structured (`True`) or independent (`False`). """ if isinstance(z_score_flag, bool): # Raise warning if boolean was passed. warnings.warn( - "Boolean flag for z-scoring is deprecated as of sbi v0.18.0. It will be " - "removed in a future release. Use 'none', 'independent', or 'structured' " - "to indicate z-scoring option.", + "Boolean flag for z-scoring is deprecated as of sbi v0.23.3. It will be " + "removed in a future release. Use `affine-independent`,`affine-structured`," + "`logit-independent`, `logit-structured`, `none`." + "to indicate z-scoring option. Now defaulting to 'affine-independent'.", stacklevel=2, ) - z_score_bool, structured_data = z_score_flag, False - - elif (z_score_flag is None) or (z_score_flag == "none"): - # Return Falses if "none" or None was passed. - z_score_bool, structured_data = False, False - - elif (z_score_flag == "independent") or (z_score_flag == "structured"): - # Got one of two valid z-scoring methods. - z_score_bool = True - structured_data = z_score_flag == "structured" - + transform_type, structured_data = "affine", False + + if z_score_flag in [None, "none"]: + transform_type, structured_data = None, False + elif z_score_flag in [ + "affine-independent", + "affine-structured", + "logit-independent", + "logit-structured", + ]: + transform_type, structured_data = z_score_flag.split("-") + structured_data = structured_data == "structured" else: - # Return warning due to invalid option, defaults to not z-scoring. raise ValueError( - "Invalid z-scoring option. Use 'none', 'independent', or 'structured'." + "Invalid z-scoring option. Use 'affine-independent', 'affine-structured'", + "'logit-independent', 'logit-structured', 'identity'.", ) - return z_score_bool, structured_data + return transform_type, structured_data def standardizing_transform( @@ -253,19 +281,20 @@ def log_abs_det_jacobian(self, x: Tensor, y: Tensor) -> Tensor: def logit_transform_zuko( - min_val: float, max_val: float, eps: float = 1e-6 + batch_t: Tensor, structured_dims: bool = False, eps: float = 1e-5 ) -> zuko.flows.UnconditionalTransform: """ Builds logit-transforming transform for Zuko flows on a bounded interval. Args: - min_val: Lower bound of the prior interval. - max_val: Upper bound of the prior interval. + batch_t: Batched tensor from which min and max values are computed. eps: Small constant to avoid numerical issues at 0 and 1. Returns: Logit transformation for the given range. """ + min_val, max_val = min_max_estimation(batch_t, structured_dims) + return zuko.flows.UnconditionalTransform( BoundedLogitTransform, min_val=min_val, @@ -275,6 +304,34 @@ def logit_transform_zuko( ) +def min_max_estimation( + batch_t: Tensor, structured_dims: bool = False +) -> Tuple[float, float]: + """ + Estimates the minimum and maximum values of a batched tensor. + + Args: + batch_t: Batched tensor from which min and max values are computed. + structured_dims: Whether data dimensions are structured (e.g., time-series, + images), which requires computing min and max per sample first before + aggregating over samples for a single min and max for the batch, or + independent (default), which computes min and max values independently. + + Returns: + Tuple of min and max values for the given tensor. + """ + is_valid_t, *_ = handle_invalid_x(batch_t, True) + + if structured_dims: + min_val = torch.min(batch_t[is_valid_t], dim=1).values + max_val = torch.max(batch_t[is_valid_t], dim=1).values + else: + min_val = torch.min(batch_t[is_valid_t], dim=0).values + max_val = torch.max(batch_t[is_valid_t], dim=0).values + + return min_val, max_val + + def z_standardization( batch_t: Tensor, structured_dims: bool = False, From 162b9769b94e36ada309b9c3c929cf0128f3bad1 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 19 Mar 2025 11:42:43 +0100 Subject: [PATCH 04/51] resolving bug z_scoring last year --- sbi/utils/sbiutils.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 3ab77f0f8..ec940e0c7 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -113,12 +113,17 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f return clamped_val -ZScoreType = Literal[ - "affine-independent", - "affine-structured", - "logit-independent", - "logit-structured", - "none", +ZScoreType = Union[ + Literal[ + "affine-independent", + "affine-structured", + "logit-independent", + "logit-structured", + "structured", + "independent", + "none", + ], + None, ] @@ -163,10 +168,29 @@ def z_score_parser(z_score_flag: Optional[ZScoreType]) -> Tuple[Union[str, None] ]: transform_type, structured_data = z_score_flag.split("-") structured_data = structured_data == "structured" + elif z_score_flag == "structured": + # Raise warning if structured was passed + warnings.warn( + "structured z-scoring is depreciated as of sbi v0.23.3. It will be " + "removed in a future release. Use `affine-independent`,`affine-structured`," + "`logit-independent`, `logit-structured`, `none`, None." + "to indicate z-scoring option. Now defaulting to 'affine-independent'.", + stacklevel=2, + ) + transform_type, structured_data = "affine", True + elif z_score_flag == "independent": + warnings.warn( + "independent z-scoring is depreciated as of sbi v0.23.3. It will be " + "removed in a future release. Use `affine-independent`,`affine-structured`," + "`logit-independent`, `logit-structured`, `none`, None." + "to indicate z-scoring option. Now defaulting to 'affine-independent'.", + stacklevel=2, + ) + transform_type, structured_data = "affine", False else: raise ValueError( - "Invalid z-scoring option. Use 'affine-independent', 'affine-structured'", - "'logit-independent', 'logit-structured', 'identity'.", + "Invalid z-scoring option. Use `affine-independent`, `affine-structured`", + "`logit-independent`, `logit-structured`, `none`.", ) return transform_type, structured_data From c5772d2bb6c016a763226c359e8614c2b529d539 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 19 Mar 2025 11:43:36 +0100 Subject: [PATCH 05/51] resolving bug z_scoring last year --- sbi/neural_nets/net_builders/flow.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 83ffe7161..9123cbb42 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1079,25 +1079,25 @@ def build_zuko_flow( transform = flow_built.transform if transform_type_x == "affine": transform = ( - transform, standardizing_transform_zuko(batch_x, structured_x), + transform, ) elif transform_type_x == "logit": transform = ( - transform, logit_transform_zuko(batch_x, structured_x), + transform, ) else: transform = flow_built.transform.transforms if transform_type_x == "affine": transform = ( - *transform, standardizing_transform_zuko(batch_x, structured_x), + *transform, ) elif transform_type_x == "logit": transform = ( - *transform, logit_transform_zuko(batch_x, structured_x), + *transform, ) if transform_type_y == "affine" or transform_type_y == "logit": From 993efb70c176d846ccaadef0d5876e04e96815dc Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 19 Mar 2025 12:37:49 +0100 Subject: [PATCH 06/51] revert z_score parser --- sbi/utils/sbiutils.py | 86 ++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 58 deletions(-) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index ec940e0c7..24b7cd300 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -115,85 +115,55 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f ZScoreType = Union[ Literal[ - "affine-independent", - "affine-structured", - "logit-independent", - "logit-structured", - "structured", "independent", + "structured", + "logit", "none", ], None, ] -def z_score_parser(z_score_flag: Optional[ZScoreType]) -> Tuple[Union[str, None], bool]: - """Parses string z-score flag into booleans.This function interprets a z-scoring - flag string to determine the type of transformation (affine, logit, or none) - and whether the data dimensions are structured or independent. +def z_score_parser(z_score_flag: Optional["str"]) -> Tuple[bool, bool]: + """Parses string z-score flag into booleans. + + Converts string flag into booleans denoting whether to z-score or not, and whether + data dimensions are structured or independent. Args: - z_score_flag (Optional[str]): A string specifying the z-scoring method. - - `none` or `None`: No transformation is applied. - - `affine-independent`: Applies standard z-scoring with independent dimensions. - - `affine-structured`: Applies standard z-scoring with structured dimensions. - - `logit-independent`: Applies logit transformation with independent dimensions. - - `logit-structured`: Applies logit transformation with structured dimensions. + z_score_flag: str flag for z-scoring method stating whether the data + dimensions are "structured" or "independent", or does not require z-scoring + ("none" or None). Returns: - tuple: - - transform_type (Optional[str]): The type of transformation, - either `"affine"`, `"logit"`, or `None` if no transformation is applied. - - structured_data (bool): A boolean indicating whether - the data dimensions are structured (`True`) or independent (`False`). + Flag for whether or not to z-score, and whether data is structured """ if isinstance(z_score_flag, bool): # Raise warning if boolean was passed. warnings.warn( - "Boolean flag for z-scoring is deprecated as of sbi v0.23.3. It will be " - "removed in a future release. Use `affine-independent`,`affine-structured`," - "`logit-independent`, `logit-structured`, `none`." - "to indicate z-scoring option. Now defaulting to 'affine-independent'.", - stacklevel=2, - ) - transform_type, structured_data = "affine", False - - if z_score_flag in [None, "none"]: - transform_type, structured_data = None, False - elif z_score_flag in [ - "affine-independent", - "affine-structured", - "logit-independent", - "logit-structured", - ]: - transform_type, structured_data = z_score_flag.split("-") - structured_data = structured_data == "structured" - elif z_score_flag == "structured": - # Raise warning if structured was passed - warnings.warn( - "structured z-scoring is depreciated as of sbi v0.23.3. It will be " - "removed in a future release. Use `affine-independent`,`affine-structured`," - "`logit-independent`, `logit-structured`, `none`, None." - "to indicate z-scoring option. Now defaulting to 'affine-independent'.", + "Boolean flag for z-scoring is deprecated as of sbi v0.18.0. It will be " + "removed in a future release. Use 'none', 'independent', or 'structured' " + "to indicate z-scoring option.", stacklevel=2, ) - transform_type, structured_data = "affine", True - elif z_score_flag == "independent": - warnings.warn( - "independent z-scoring is depreciated as of sbi v0.23.3. It will be " - "removed in a future release. Use `affine-independent`,`affine-structured`," - "`logit-independent`, `logit-structured`, `none`, None." - "to indicate z-scoring option. Now defaulting to 'affine-independent'.", - stacklevel=2, - ) - transform_type, structured_data = "affine", False + z_score_bool, structured_data = z_score_flag, False + + elif (z_score_flag is None) or (z_score_flag == "none"): + # Return Falses if "none" or None was passed. + z_score_bool, structured_data = False, False + + elif (z_score_flag == "independent") or (z_score_flag == "structured"): + # Got one of two valid z-scoring methods. + z_score_bool = True + structured_data = z_score_flag == "structured" + else: + # Return warning due to invalid option, defaults to not z-scoring. raise ValueError( - "Invalid z-scoring option. Use `affine-independent`, `affine-structured`", - "`logit-independent`, `logit-structured`, `none`.", + "Invalid z-scoring option. Use 'none', 'independent', or 'structured'." ) - return transform_type, structured_data + return z_score_bool, structured_data def standardizing_transform( From 862f3d267f0391b37559713b9e99a91e5da6232f Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 19 Mar 2025 16:57:49 +0100 Subject: [PATCH 07/51] adjusted logit structure in build_zuko_flow --- sbi/neural_nets/net_builders/flow.py | 176 ++++++++++++++++----------- sbi/utils/sbiutils.py | 115 +---------------- 2 files changed, 113 insertions(+), 178 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 9123cbb42..f3874ffec 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -14,12 +14,13 @@ rational_quadratic, # pyright: ignore[reportAttributeAccessIssue] ) from torch import Tensor, nn, relu, tanh, tensor, uint8 +from torch.distributions import Distribution from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel from sbi.utils.sbiutils import ( ZScoreType, - logit_transform_zuko, + mcmc_transform, standardizing_net, standardizing_transform, standardizing_transform_zuko, @@ -34,8 +35,8 @@ def build_made( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: int = 50, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), @@ -105,8 +106,8 @@ def build_made( def build_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -193,8 +194,8 @@ def build_maf( def build_maf_rqs( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -305,8 +306,8 @@ def build_maf_rqs( def build_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: int = 50, num_transforms: int = 5, num_bins: int = 10, @@ -427,8 +428,8 @@ def mask_in_layer(i): def build_zuko_nice( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -482,8 +483,8 @@ def build_zuko_nice( def build_zuko_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -534,8 +535,8 @@ def build_zuko_maf( def build_zuko_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -595,8 +596,8 @@ def build_zuko_nsf( def build_zuko_ncsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -651,8 +652,8 @@ def build_zuko_ncsf( def build_zuko_sospf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -705,8 +706,8 @@ def build_zuko_sospf( def build_zuko_naf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -771,8 +772,8 @@ def build_zuko_naf( def build_zuko_unaf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -837,8 +838,8 @@ def build_zuko_unaf( def build_zuko_cnf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -891,8 +892,8 @@ def build_zuko_cnf( def build_zuko_gf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -948,8 +949,8 @@ def build_zuko_gf( def build_zuko_bpf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "affine-independent", - z_score_y: Optional[ZScoreType] = "affine-independent", + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -1007,11 +1008,12 @@ def build_zuko_flow( which_nf: str, batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType], - z_score_y: Optional[ZScoreType], + z_score_x: Optional[ZScoreType] = "independent", + z_score_y: Optional[ZScoreType] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), + x_dist: Optional[Distribution] = None, **kwargs, ) -> ZukoFlow: """ @@ -1022,23 +1024,21 @@ def build_zuko_flow( batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - - `none` or `None`: No transformation is applied. - - `affine-independent`: Applies standard z-scoring with independent dims. - - `affine-structured`: Applies standard z-scoring with structured dims. - - `logit-independent`: Applies logit transformation with independent dims. - - `logit-structured`: Applies logit transformation with structured dims. - Affine is the default standardization method. Logit is used for numerical - stability during training, mapping prior bounds estimated from - batch size to [-inf, inf]). - Independent z-scores each dimension independenlty. Structured treats dims - as related, therefore compute mean and std over the entire batch, instead of - per-dimension. Should be used when each sample is, for example, - a time series or an image. - z_score_y: Whether to z-score ys passing into the network, - same options as z_score_x. + - `none`, or None: do not z-score. + - `independent`: z-score each dimension independently. + - `structured`: treat dimensions as related, therefore compute mean and std + over the entire batch, instead of per-dimension. Should be used when each + sample is, for example, a time series or an image. + - `logit`: Applies logit transformation, if bounds from `x_dist` are given. + z_score_y: Whether to z-score ys passing into the network, same options as + z_score_x. hidden_features: The number of hidden features in the flow. Defaults to 50. num_transforms: The number of transformations in the flow. Defaults to 5. embedding_net: The embedding network to use. Defaults to nn.Identity(). + x_dist: The distribution over x, used to determine the bounds for the logit + transformation. x_dist is typically the prior for NPE. For NLE/NRE, + it might be some rough bounded distribution over the data provided + additionally by the user. **kwargs: Additional keyword arguments to pass to the flow constructor. Returns: @@ -1070,50 +1070,88 @@ def build_zuko_flow( **kwargs, ) - transform_type_x, structured_x = z_score_parser(z_score_x) - transform_type_y, structured_y = z_score_parser(z_score_y) - # Continuous normalizing flows (CNF) only have one transform, # so we need to handle them slightly differently. if which_nf == "CNF": transform = flow_built.transform - if transform_type_x == "affine": + + z_score_x_bool, structured_x = z_score_parser(z_score_x) + if z_score_x_bool: + # data is not z-score x if it is logit transformed. transform = ( standardizing_transform_zuko(batch_x, structured_x), transform, ) - elif transform_type_x == "logit": + + # Only x can be logit transformed (not y), if a distribution + # over x is provided (for NPE: this would be the prior + # over theta). + if z_score_x == "logit" and x_dist is not None: transform = ( - logit_transform_zuko(batch_x, structured_x), + # mcmc transform is ~logit transform: + # maps from a bounded to unbound space. + mcmc_transform(x_dist), transform, ) - else: - transform = flow_built.transform.transforms - if transform_type_x == "affine": + else: transform = ( standardizing_transform_zuko(batch_x, structured_x), - *transform, + transform, ) - elif transform_type_x == "logit": - transform = ( - logit_transform_zuko(batch_x, structured_x), - *transform, + warnings.warn( + "Logit transformation is only available if x_dist is provided. " + "The data will be standardized instead.", + stacklevel=2, ) - if transform_type_y == "affine" or transform_type_y == "logit": - # Prepend standardizing transform to y-embedding. - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net - ) - if transform_type_y == "logit": - # Print a warning that the logit transformation is not applied to y. + z_score_y_bool, structured_y = z_score_parser(z_score_y) + if z_score_y_bool: + # Prepend standardizing transform to y-embedding. + embedding_net = nn.Sequential( + standardizing_transform_zuko(batch_y, structured_y), embedding_net + ) + + # Combine transforms. + neural_net = zuko.flows.Flow(transform, flow_built.base) + else: + transforms = flow_built.transform.transforms + + z_score_x_bool, structured_x = z_score_parser(z_score_x) + if z_score_x_bool: + transforms = ( + standardizing_transform_zuko(batch_x, structured_x), + *transforms, + ) + + # Only x can be logit transformed (not y), if a distribution + # over x is provided (for NPE: this would be the prior + # over theta). + if z_score_x == "logit": + transforms = ( + # mcmc transform maps from a bounded to unbound space. + mcmc_transform(x_dist), + *transforms, + ) + else: + transforms = ( + standardizing_transform_zuko(batch_x, structured_x), + *transforms, + ) warnings.warn( - "Logit transformation can only be applied on the conditioned variable.", + "Logit transformation is only available if x_dist is provided. " + "The data will be standardized instead.", stacklevel=2, ) - # Combine transforms. - neural_net = zuko.flows.Flow(transform, flow_built.base) + z_score_y_bool, structured_y = z_score_parser(z_score_y) + if z_score_y_bool: + # Prepend standardizing transform to y-embedding. + embedding_net = nn.Sequential( + standardizing_net(batch_y, structured_y), embedding_net + ) + + # Combine transforms. + neural_net = zuko.flows.Flow(transforms, flow_built.base) flow = ZukoFlow( neural_net, diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 24b7cd300..82d9c1273 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -30,7 +30,6 @@ AffineTransform, Distribution, Independent, - Transform, biject_to, constraints, ) @@ -156,7 +155,10 @@ def z_score_parser(z_score_flag: Optional["str"]) -> Tuple[bool, bool]: # Got one of two valid z-scoring methods. z_score_bool = True structured_data = z_score_flag == "structured" - + elif z_score_flag == "logit": + # Do not z-score if logit transform. Logit is not estimated from data, + # but from the prior bounds, so structured/indpendent does not matter. + z_score_bool, structured_data = False, False else: # Return warning due to invalid option, defaults to not z-scoring. raise ValueError( @@ -220,112 +222,6 @@ def standardizing_transform_zuko( ) -class BoundedLogitTransform(Transform): - """ - Implements a logit transformation for data bounded within a given interval - (min_val, max_val), mapping it to the real line (-inf, inf). This transformation - is useful for normalizing bounded data while ensuring numerical stability. - - The transformation follows: - x' = log((x - min_val) / (max_val - min_val)) - - log(1 - (x - min_val) / (max_val - min_val)) - - The inverse transformation applies the sigmoid function - to map back to (min_val, max_val). - - Attributes: - min_val (float): The lower bound of the input domain. - max_val (float): The upper bound of the input domain. - eps (float): A small value to prevent numerical issues at the boundaries. - domain (Constraint): Defines the valid input range as (min_val, max_val). - codomain (Constraint): Defines the output range as the real line (-inf, inf). - """ - - def __init__(self, min_val: float, max_val: float, eps: float = 1e-5): - super().__init__() - self.min_val = min_val - self.max_val = max_val - self.eps = eps # Avoids numerical instability at boundaries - - # Define domain and codomain - self.domain = constraints.interval(min_val, max_val) # Input is in (min, max) - self.codomain = constraints.real # Output is unbounded - - def __call__(self, x: Tensor) -> Tensor: - # Normalize to (0,1) - x = (x - self.min_val) / (self.max_val - self.min_val) - x = torch.clamp(x, self.eps, 1 - self.eps) # Prevents log(0) or log(∞) - return torch.log(x) - torch.log(1 - x) # Logit function - - def inv(self, y: Tensor) -> Tensor: - # Sigmoid and scale back to (min, max) - return self.min_val + (self.max_val - self.min_val) * torch.sigmoid(y) - - def log_abs_det_jacobian(self, x: Tensor, y: Tensor) -> Tensor: - """ - Computes the log absolute determinant of the Jacobian of the transformation. - Needed for proper transformation in Zuko flows. - """ - x = (x - self.min_val) / (self.max_val - self.min_val) - x = torch.clamp(x, self.eps, 1 - self.eps) - log_det = ( - -torch.log(x) - torch.log(1 - x) - torch.log(self.max_val - self.min_val) - ) - return log_det - - -def logit_transform_zuko( - batch_t: Tensor, structured_dims: bool = False, eps: float = 1e-5 -) -> zuko.flows.UnconditionalTransform: - """ - Builds logit-transforming transform for Zuko flows on a bounded interval. - - Args: - batch_t: Batched tensor from which min and max values are computed. - eps: Small constant to avoid numerical issues at 0 and 1. - - Returns: - Logit transformation for the given range. - """ - min_val, max_val = min_max_estimation(batch_t, structured_dims) - - return zuko.flows.UnconditionalTransform( - BoundedLogitTransform, - min_val=min_val, - max_val=max_val, - eps=eps, - buffer=True, - ) - - -def min_max_estimation( - batch_t: Tensor, structured_dims: bool = False -) -> Tuple[float, float]: - """ - Estimates the minimum and maximum values of a batched tensor. - - Args: - batch_t: Batched tensor from which min and max values are computed. - structured_dims: Whether data dimensions are structured (e.g., time-series, - images), which requires computing min and max per sample first before - aggregating over samples for a single min and max for the batch, or - independent (default), which computes min and max values independently. - - Returns: - Tuple of min and max values for the given tensor. - """ - is_valid_t, *_ = handle_invalid_x(batch_t, True) - - if structured_dims: - min_val = torch.min(batch_t[is_valid_t], dim=1).values - max_val = torch.max(batch_t[is_valid_t], dim=1).values - else: - min_val = torch.min(batch_t[is_valid_t], dim=0).values - max_val = torch.max(batch_t[is_valid_t], dim=0).values - - return min_val, max_val - - def z_standardization( batch_t: Tensor, structured_dims: bool = False, @@ -806,7 +702,8 @@ def mcmc_transform( It does two things: 1) When the prior support is bounded, it transforms the parameters into unbounded space. - 2) It z-scores the parameters such that MCMC is performed in a z-scored space. + 2) It z-scores the parameters such that MCMC is performed in a z-scored space + (when logit transformed, no z-scoring is applied/needed). Args: prior: The prior distribution. From bccd82b15edde1dcf36d2c4c3683042b08c2ea77 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 19 Mar 2025 17:24:56 +0100 Subject: [PATCH 08/51] resolve pyright error --- sbi/neural_nets/net_builders/flow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index f3874ffec..427e91c13 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1013,7 +1013,7 @@ def build_zuko_flow( hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), - x_dist: Optional[Distribution] = None, + x_dist: Union[Distribution, None] = None, **kwargs, ) -> ZukoFlow: """ @@ -1126,7 +1126,7 @@ def build_zuko_flow( # Only x can be logit transformed (not y), if a distribution # over x is provided (for NPE: this would be the prior # over theta). - if z_score_x == "logit": + if z_score_x == "logit" and x_dist is not None: transforms = ( # mcmc transform maps from a bounded to unbound space. mcmc_transform(x_dist), From 145ef4e2403f675eaac2030ccbda9f72b3862309 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 10:22:59 +0100 Subject: [PATCH 09/51] revert flow as a test --- sbi/neural_nets/net_builders/flow.py | 113 ++++++++------------------- 1 file changed, 31 insertions(+), 82 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 427e91c13..51266ddb3 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1,7 +1,6 @@ # This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed # under the Apache License Version 2.0, see -import warnings from functools import partial from typing import List, Optional, Sequence, Union @@ -14,13 +13,10 @@ rational_quadratic, # pyright: ignore[reportAttributeAccessIssue] ) from torch import Tensor, nn, relu, tanh, tensor, uint8 -from torch.distributions import Distribution from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel from sbi.utils.sbiutils import ( - ZScoreType, - mcmc_transform, standardizing_net, standardizing_transform, standardizing_transform_zuko, @@ -35,8 +31,8 @@ def build_made( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), @@ -106,8 +102,8 @@ def build_made( def build_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -194,8 +190,8 @@ def build_maf( def build_maf_rqs( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -306,8 +302,8 @@ def build_maf_rqs( def build_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_transforms: int = 5, num_bins: int = 10, @@ -428,8 +424,8 @@ def mask_in_layer(i): def build_zuko_nice( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -483,8 +479,8 @@ def build_zuko_nice( def build_zuko_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -535,8 +531,8 @@ def build_zuko_maf( def build_zuko_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -596,8 +592,8 @@ def build_zuko_nsf( def build_zuko_ncsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -652,8 +648,8 @@ def build_zuko_ncsf( def build_zuko_sospf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -706,8 +702,8 @@ def build_zuko_sospf( def build_zuko_naf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -772,8 +768,8 @@ def build_zuko_naf( def build_zuko_unaf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -838,8 +834,8 @@ def build_zuko_unaf( def build_zuko_cnf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -892,8 +888,8 @@ def build_zuko_cnf( def build_zuko_gf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -949,8 +945,8 @@ def build_zuko_gf( def build_zuko_bpf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -1008,12 +1004,11 @@ def build_zuko_flow( which_nf: str, batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[ZScoreType] = "independent", - z_score_y: Optional[ZScoreType] = "independent", + z_score_x: Optional[str] = "independent", + z_score_y: Optional[str] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), - x_dist: Union[Distribution, None] = None, **kwargs, ) -> ZukoFlow: """ @@ -1029,16 +1024,11 @@ def build_zuko_flow( - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. - - `logit`: Applies logit transformation, if bounds from `x_dist` are given. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: The number of hidden features in the flow. Defaults to 50. num_transforms: The number of transformations in the flow. Defaults to 5. embedding_net: The embedding network to use. Defaults to nn.Identity(). - x_dist: The distribution over x, used to determine the bounds for the logit - transformation. x_dist is typically the prior for NPE. For NLE/NRE, - it might be some rough bounded distribution over the data provided - additionally by the user. **kwargs: Additional keyword arguments to pass to the flow constructor. Returns: @@ -1077,31 +1067,9 @@ def build_zuko_flow( z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: - # data is not z-score x if it is logit transformed. transform = ( - standardizing_transform_zuko(batch_x, structured_x), - transform, - ) - - # Only x can be logit transformed (not y), if a distribution - # over x is provided (for NPE: this would be the prior - # over theta). - if z_score_x == "logit" and x_dist is not None: - transform = ( - # mcmc transform is ~logit transform: - # maps from a bounded to unbound space. - mcmc_transform(x_dist), transform, - ) - else: - transform = ( standardizing_transform_zuko(batch_x, structured_x), - transform, - ) - warnings.warn( - "Logit transformation is only available if x_dist is provided. " - "The data will be standardized instead.", - stacklevel=2, ) z_score_y_bool, structured_y = z_score_parser(z_score_y) @@ -1119,28 +1087,8 @@ def build_zuko_flow( z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: transforms = ( - standardizing_transform_zuko(batch_x, structured_x), *transforms, - ) - - # Only x can be logit transformed (not y), if a distribution - # over x is provided (for NPE: this would be the prior - # over theta). - if z_score_x == "logit" and x_dist is not None: - transforms = ( - # mcmc transform maps from a bounded to unbound space. - mcmc_transform(x_dist), - *transforms, - ) - else: - transforms = ( standardizing_transform_zuko(batch_x, structured_x), - *transforms, - ) - warnings.warn( - "Logit transformation is only available if x_dist is provided. " - "The data will be standardized instead.", - stacklevel=2, ) z_score_y_bool, structured_y = z_score_parser(z_score_y) @@ -1166,6 +1114,7 @@ def build_zuko_flow( class ContextSplineMap(nn.Module): """ Neural network from `context` to the spline parameters. + We cannot use the resnet as conditioner to learn each dimension conditioned on the other dimensions (because there is only one). Instead, we learn the spline parameters directly. In the case of conditinal density estimation, From 5a26157d6065747b06b2e00670caa687c3704327 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 10:42:14 +0100 Subject: [PATCH 10/51] add x_dist variable --- sbi/neural_nets/net_builders/flow.py | 7 +++++++ sbi/utils/sbiutils.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 51266ddb3..622547c5a 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -13,6 +13,7 @@ rational_quadratic, # pyright: ignore[reportAttributeAccessIssue] ) from torch import Tensor, nn, relu, tanh, tensor, uint8 +from torch.distributions import Distribution from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel @@ -1009,6 +1010,7 @@ def build_zuko_flow( hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), + x_dist: Optional[Distribution] = None, **kwargs, ) -> ZukoFlow: """ @@ -1024,11 +1026,16 @@ def build_zuko_flow( - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. + - `logit`: Applies logit transformation, if bounds from `x_dist` are given. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: The number of hidden features in the flow. Defaults to 50. num_transforms: The number of transformations in the flow. Defaults to 5. embedding_net: The embedding network to use. Defaults to nn.Identity(). + x_dist: The distribution over x, used to determine the bounds for the logit + transformation. x_dist is typically the prior for NPE. For NLE/NRE, + it might be some rough bounded distribution over the data provided + additionally by the user. **kwargs: Additional keyword arguments to pass to the flow constructor. Returns: diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 82d9c1273..7b9451f79 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -123,7 +123,7 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f ] -def z_score_parser(z_score_flag: Optional["str"]) -> Tuple[bool, bool]: +def z_score_parser(z_score_flag: Optional[str]) -> Tuple[bool, bool]: """Parses string z-score flag into booleans. Converts string flag into booleans denoting whether to z-score or not, and whether From 0dd3baa3d7959869fa7fc2f3e9d66facdcf4d181 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 10:56:50 +0100 Subject: [PATCH 11/51] add logit to sbiutils_test.py --- tests/sbiutils_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sbiutils_test.py b/tests/sbiutils_test.py index 51b27c357..7b410016d 100644 --- a/tests/sbiutils_test.py +++ b/tests/sbiutils_test.py @@ -406,10 +406,10 @@ def test_kde(bandwidth, transform, sample_weights): @pytest.mark.parametrize( - "z_x", [True, False, None, "none", "independent", "structured"] + "z_x", [True, False, None, "none", "independent", "structured", "logit"] ) @pytest.mark.parametrize( - "z_theta", [True, False, None, "none", "independent", "structured"] + "z_theta", [True, False, None, "none", "independent", "structured", "logit"] ) @pytest.mark.parametrize("builder", [likelihood_nn, posterior_nn, classifier_nn]) def test_z_scoring_structured(z_x, z_theta, builder): From 000c123e7185ec3f59d7490dc5504d008098fe01 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 11:03:31 +0100 Subject: [PATCH 12/51] add logit if statement --- sbi/neural_nets/net_builders/flow.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 622547c5a..eb686988e 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -18,6 +18,7 @@ from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel from sbi.utils.sbiutils import ( + mcmc_transform, standardizing_net, standardizing_transform, standardizing_transform_zuko, @@ -1078,6 +1079,15 @@ def build_zuko_flow( transform, standardizing_transform_zuko(batch_x, structured_x), ) + # Only x (i.e., prior for NPE) can be logit transformed (not y) + # when x_dist is provided. + if z_score_x == "logit" and x_dist is not None: + transform = ( + # mcmc transform provides a mapping from a + # bound to unbound space. + mcmc_transform(x_dist), + transform, + ) z_score_y_bool, structured_y = z_score_parser(z_score_y) if z_score_y_bool: From b3bc54b0b91c5a28dac91647c26a917117bc9f87 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 11:05:53 +0100 Subject: [PATCH 13/51] add logit if statement --- sbi/neural_nets/net_builders/flow.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index eb686988e..df14c6a4e 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -18,7 +18,6 @@ from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel from sbi.utils.sbiutils import ( - mcmc_transform, standardizing_net, standardizing_transform, standardizing_transform_zuko, @@ -1076,16 +1075,7 @@ def build_zuko_flow( z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: transform = ( - transform, standardizing_transform_zuko(batch_x, structured_x), - ) - # Only x (i.e., prior for NPE) can be logit transformed (not y) - # when x_dist is provided. - if z_score_x == "logit" and x_dist is not None: - transform = ( - # mcmc transform provides a mapping from a - # bound to unbound space. - mcmc_transform(x_dist), transform, ) @@ -1104,8 +1094,8 @@ def build_zuko_flow( z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: transforms = ( - *transforms, standardizing_transform_zuko(batch_x, structured_x), + *transforms, ) z_score_y_bool, structured_y = z_score_parser(z_score_y) From fa8055929dda59cdce71fc4f1463a45e530a86ad Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 11:10:51 +0100 Subject: [PATCH 14/51] add logit if statement --- sbi/neural_nets/net_builders/flow.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index df14c6a4e..07d4d5010 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1078,6 +1078,14 @@ def build_zuko_flow( standardizing_transform_zuko(batch_x, structured_x), transform, ) + # Only x (i.e., prior for NPE) can be logit transformed (not y) + # when x_dist is provided. + if z_score_x == "logit" and x_dist is not None: + transform = ( + # mcmc transform provides a mapping from a + # bound to unbound space. + transform, + ) z_score_y_bool, structured_y = z_score_parser(z_score_y) if z_score_y_bool: From 1de1a98e71d96852b1292e653708c1005cbd27e0 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 11:12:47 +0100 Subject: [PATCH 15/51] add logit if statement --- sbi/neural_nets/net_builders/flow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 07d4d5010..5f6a69e59 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1084,6 +1084,7 @@ def build_zuko_flow( transform = ( # mcmc transform provides a mapping from a # bound to unbound space. + standardizing_transform_zuko(batch_x, structured_x), transform, ) From e2007afb14a02b3f1b2f44c7ff35e8172495150a Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 11:18:22 +0100 Subject: [PATCH 16/51] remove logit if statement --- sbi/neural_nets/net_builders/flow.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 5f6a69e59..df14c6a4e 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1078,15 +1078,6 @@ def build_zuko_flow( standardizing_transform_zuko(batch_x, structured_x), transform, ) - # Only x (i.e., prior for NPE) can be logit transformed (not y) - # when x_dist is provided. - if z_score_x == "logit" and x_dist is not None: - transform = ( - # mcmc transform provides a mapping from a - # bound to unbound space. - standardizing_transform_zuko(batch_x, structured_x), - transform, - ) z_score_y_bool, structured_y = z_score_parser(z_score_y) if z_score_y_bool: From 160375699050ecbb4da6ae14bc6fb4caab971bb6 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 12:40:46 +0100 Subject: [PATCH 17/51] resolve pyright issues --- sbi/neural_nets/net_builders/flow.py | 32 ++++++++++++++++++++++++++-- sbi/utils/sbiutils.py | 29 +++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index df14c6a4e..52980c94a 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -18,6 +18,8 @@ from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel from sbi.utils.sbiutils import ( + biject_transform_zuko, + mcmc_transform, standardizing_net, standardizing_transform, standardizing_transform_zuko, @@ -1073,7 +1075,21 @@ def build_zuko_flow( transform = flow_built.transform z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x_bool: + + # Only x (i.e., prior for NPE) can be logit transformed (not y) + # when x_dist is provided. + if z_score_x == "logit" and x_dist is not None and hasattr(x_dist, "support"): + logit_transform = mcmc_transform(x_dist) + transform = ( + biject_transform_zuko(logit_transform), + transform, + ) + elif z_score_x == "logit" and x_dist is None: + raise ValueError( + "Logit transformation requires a distribution provided through `x_dist`" + "with supported bounds (see `mcmc_transform`).", + ) + elif z_score_x_bool: transform = ( standardizing_transform_zuko(batch_x, structured_x), transform, @@ -1092,7 +1108,19 @@ def build_zuko_flow( transforms = flow_built.transform.transforms z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x_bool: + + if z_score_x == "logit" and x_dist is not None and hasattr(x_dist, "support"): + logit_transform = mcmc_transform(x_dist) + transforms = ( + biject_transform_zuko(logit_transform), + *transforms, + ) + elif z_score_x == "logit" and x_dist is None: + raise ValueError( + "Logit transformation requires a distribution provided through `x_dist`" + "with supported bounds (see `mcmc_transform`).", + ) + elif z_score_x_bool: transforms = ( standardizing_transform_zuko(batch_x, structured_x), *transforms, diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 7b9451f79..d95d0142e 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -222,6 +222,35 @@ def standardizing_transform_zuko( ) +class CallableTransform: + """Wraps a PyTorch Transform to be used in Zuko UnconditionalTransform.""" + + def __init__(self, transform): + self.transform = transform + + def __call__(self): + return self.transform + + +def biject_transform_zuko( + transform, +) -> zuko.flows.UnconditionalTransform: + """ + Builds logit-transforming transform for Zuko flows on a bounded interval. + + Args: + transform: A logit transformation for the given support. + eps: Small constant to avoid numerical issues at 0 and 1. + + Returns: + Logit transformation for the given range. + """ + return zuko.flows.UnconditionalTransform( + CallableTransform(transform), + buffer=True, + ) + + def z_standardization( batch_t: Tensor, structured_dims: bool = False, From 1ffd0e9af807dcffe56086590a166afe84a6c098 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 16:24:25 +0100 Subject: [PATCH 18/51] cover logit in tests --- tests/sbiutils_test.py | 54 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/tests/sbiutils_test.py b/tests/sbiutils_test.py index 7b410016d..49391615c 100644 --- a/tests/sbiutils_test.py +++ b/tests/sbiutils_test.py @@ -20,6 +20,7 @@ from sbi.inference.trainers.npe.npe_a import NPE_A_MDN from sbi.neural_nets import classifier_nn, likelihood_nn, posterior_nn from sbi.utils import BoxUniform, get_kde +from sbi.utils.sbiutils import z_score_parser def test_conditional_density_1d(): @@ -405,6 +406,31 @@ def test_kde(bandwidth, transform, sample_weights): assert kde_vals.shape == torch.Size((num_draws,)) +@pytest.mark.parametrize( + "z_x", + [True, False, None, "none", "independent", "structured", "logit", "invalid_value"], +) +@pytest.mark.parametrize( + "z_theta", + [True, False, None, "none", "independent", "structured", "logit", "invalid_value"], +) +def test_z_score_parser(z_x, z_theta): + if z_theta == "invalid_value": + with pytest.raises(ValueError, match="Invalid z-scoring option."): + z_score_parser(z_theta) + + if z_x == "invalid_value": + with pytest.raises(ValueError, match="Invalid z-scoring option."): + z_score_parser(z_x) + + if z_x != "invalid_value" and z_theta != "invalid_value": + result_x = z_score_parser(z_x) + result_theta = z_score_parser(z_theta) + + assert result_x is not None, f"z_score_parser({z_x}) returned None" + assert result_theta is not None, f"z_score_parser({z_theta}) returned None" + + @pytest.mark.parametrize( "z_x", [True, False, None, "none", "independent", "structured", "logit"] ) @@ -417,19 +443,43 @@ def test_z_scoring_structured(z_x, z_theta, builder): Test that z-scoring string args don't break API. """ # Generate some signals for test. - t = torch.arange(0, 1, 0.001) + t = torch.arange(0, 1, 0.1) x_sin = torch.sin(t * 2 * torch.pi * 5) t_batch = torch.stack([(x_sin * (i + 1)) + (i * 2) for i in range(10)]) + num_dim = t_batch.shape[1] + x_dist = BoxUniform( + low=-2 * torch.ones(num_dim), high=2 * torch.ones(num_dim) + ) # Normal(0, 1) if (z_x == "logit" or z_theta == "logit") else None + # API tests + # TODO: Test breaks at "mnle" if builder in [likelihood_nn, posterior_nn]: - for model in ["mdn", "made", "maf", "nsf"]: + for model in [ + "mdn", + "made", + "maf", + "nsf", + "zuko_nice", + "zuko_nsf", + "zuko_maf", + "zuko_ncsf", + "zuko_bpf", + "maf_rqs", + "zuko_sospf", + "zuko_naf", + "zuko_unaf", + "zuko_gf", + "mlp_flowmatcher", + "resnet_flowmatcher", + ]: net = builder( model, z_score_theta=z_theta, z_score_x=z_x, hidden_features=2, num_transforms=1, + x_dist=x_dist, ) assert net(t_batch, t_batch) else: From 9cc887b0780600e99a0f44f81a7338901c6277b1 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 16:55:58 +0100 Subject: [PATCH 19/51] cover tests for logit in flow.py --- tests/density_estimator_test.py | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/density_estimator_test.py b/tests/density_estimator_test.py index 6496f11a6..270145267 100644 --- a/tests/density_estimator_test.py +++ b/tests/density_estimator_test.py @@ -12,6 +12,7 @@ from sbi.neural_nets.embedding_nets import CNNEmbedding from sbi.neural_nets.estimators.shape_handling import reshape_to_sample_batch_event +from sbi.neural_nets.estimators.zuko_flow import ZukoFlow from sbi.neural_nets.net_builders import ( build_categoricalmassestimator, build_made, @@ -33,6 +34,8 @@ build_zuko_sospf, build_zuko_unaf, ) +from sbi.neural_nets.net_builders.flow import build_zuko_flow +from sbi.utils.torchutils import BoxUniform # List of all density estimator builders for testing. model_builders = [ @@ -435,3 +438,45 @@ def test_mixed_density_estimator( # Test samples samples = density_estimator.sample(sample_shape, condition=conditions) assert samples.shape == (*sample_shape, batch_dim, *input_event_shape) + + +def test_build_zuko_flow_logit(): + batch_x = torch.randn(10, 5) + batch_y = torch.randn(10, 3) + num_dim = 5 + x_dist = BoxUniform( + low=-2 * torch.ones(num_dim), high=2 * torch.ones(num_dim) + ) # Define a bounded distribution + + # Test case where x_dist is provided (should not raise an error) + try: + flow = build_zuko_flow( + which_nf="MAF", + batch_x=batch_x, + batch_y=batch_y, + z_score_x="logit", + z_score_y="logit", + x_dist=x_dist, + ) + assert isinstance(flow, ZukoFlow) + print( + "Test passed: build_zuko_flow works with logit transformation" + "when x_dist is provided." + ) + except Exception as e: + raise AssertionError(f"Test failed with exception: {e}") from e + + # Test case where x_dist is missing (should raise ValueError) + with pytest.raises( + ValueError, + match="Logit transformation requires a distribution provided through `x_dist`", + ): + build_zuko_flow( + which_nf="MAF", + batch_x=batch_x, + batch_y=batch_y, + z_score_x="logit", + z_score_y="logit", + x_dist=None, # No distribution provided + ) + print("Test passed: ValueError raised when x_dist is missing.") From 957390507849a9384a1ab6ed222e62fb839f7746 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 17:08:51 +0100 Subject: [PATCH 20/51] cover tests for CNF --- tests/density_estimator_test.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/density_estimator_test.py b/tests/density_estimator_test.py index 270145267..f8d413c3a 100644 --- a/tests/density_estimator_test.py +++ b/tests/density_estimator_test.py @@ -466,6 +466,24 @@ def test_build_zuko_flow_logit(): except Exception as e: raise AssertionError(f"Test failed with exception: {e}") from e + # Test to cover CNF + try: + flow = build_zuko_flow( + which_nf="CNF", + batch_x=batch_x, + batch_y=batch_y, + z_score_x="logit", + z_score_y="logit", + x_dist=x_dist, + ) + assert isinstance(flow, ZukoFlow) + print( + "Test passed: build_zuko_flow works with logit transformation" + "when x_dist is provided." + ) + except Exception as e: + raise AssertionError(f"Test failed with exception: {e}") from e + # Test case where x_dist is missing (should raise ValueError) with pytest.raises( ValueError, @@ -480,3 +498,17 @@ def test_build_zuko_flow_logit(): x_dist=None, # No distribution provided ) print("Test passed: ValueError raised when x_dist is missing.") + + with pytest.raises( + ValueError, + match="Logit transformation requires a distribution provided through `x_dist`", + ): + build_zuko_flow( + which_nf="CNF", + batch_x=batch_x, + batch_y=batch_y, + z_score_x="logit", + z_score_y="logit", + x_dist=None, # No distribution provided + ) + print("Test passed: ValueError raised when x_dist is missing.") From adff499b9f35a0d82fe9dc68fa23617693cbb3b4 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 18:00:45 +0100 Subject: [PATCH 21/51] adding faq for logit transformation --- docs/faq/question_08_unconstrained.md | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/faq/question_08_unconstrained.md diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md new file mode 100644 index 000000000..9880c8f63 --- /dev/null +++ b/docs/faq/question_08_unconstrained.md @@ -0,0 +1,42 @@ +# My rejection-sampling algorithm gets stuck? + +If you've encountered the following warning: + +``` +WARNING:root:Only 0.002% proposal samples were accepted. It + may take a long time to collect the remaining 980 + samples. Consider interrupting (Ctrl-C) and switching to a + different sampling method with + `build_posterior(..., sample_with='mcmc')`. or + `build_posterior(..., sample_with='vi')`. +``` + +this indicates that a significant portion of the samples proposed by the density estimator fall outside the prior bounds. Several factors might be causing this issue: + +1) Simulator Issues: Ensure that your simulator is functioning as expected and producing realistic outputs. +2) Insufficient Training Data: If the density estimator has been trained on too few simulations, it may lead to invalid estimations. +3) Problematic True Data: Check if there are inconsistencies or unexpected values in the observed data. + + +### Possible solutions + +If you've ruled out these issues, you can try training your density estimator in an unbounded space using a logit transformation. This transformation maps your data to logit space before training and then applies the inverse logit (sigmoid function) to ensure that the trained density estimator remains within the prior bounds. + +Instead of standardizing parameters using z-scoring, you can use the logit transformation. However, this requires providing a density estimation. The specific approach depends on the method you're using: + +- For NPE (Neural Posterior Estimation): You can simply use the prior as the density estimation. +- For NLE/NRE (Neural Likelihood Estimation / Neural Ratio Estimation): A rough density approximation over data boundaries is needed, making the process more complex. + + +### How to apply the logit transformation + +To enable logit transformation when defining your density estimator, use: + +``` +density_estimator_build_fun = posterior_nn( + model="zuko_nsf", hidden_features=60, num_transforms=3, z_score_theta="logit", x_dist=prior +) +``` +This ensures that your density estimator operates in a transformed space where it respects prior bounds, improving the efficiency of rejection sampling. + +Note: The logit transformation is currently only supported for `zuko` density estimators. From 8176707ae633385c4a6d3d0726765ba6039c351a Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Thu, 20 Mar 2025 18:03:28 +0100 Subject: [PATCH 22/51] adding faq for logit transformation --- docs/faq/question_08_unconstrained.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index 9880c8f63..36b4c1547 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -1,10 +1,10 @@ -# My rejection-sampling algorithm gets stuck? +# The inference gets stuck? If you've encountered the following warning: ``` WARNING:root:Only 0.002% proposal samples were accepted. It - may take a long time to collect the remaining 980 + may take a long time to collect the remaining 99980 samples. Consider interrupting (Ctrl-C) and switching to a different sampling method with `build_posterior(..., sample_with='mcmc')`. or From 69502d7dc1911aaf5e085ba5e9040d205553dacc Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Fri, 21 Mar 2025 16:53:33 +0100 Subject: [PATCH 23/51] stash changes --- sbi/neural_nets/net_builders/flow.py | 45 +++++++++------ tests/density_estimator_test.py | 84 ++++++++++++---------------- 2 files changed, 62 insertions(+), 67 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index f696c0957..d4af6a301 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1028,16 +1028,17 @@ def build_zuko_flow( - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. - - `logit`: Applies logit transformation, if bounds from `x_dist` are given. + - `transform_to_unconstrained`: Transforms to + an unbound space, if bounds from `x_dist` are given. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: The number of hidden features in the flow. Defaults to 50. num_transforms: The number of transformations in the flow. Defaults to 5. embedding_net: The embedding network to use. Defaults to nn.Identity(). - x_dist: The distribution over x, used to determine the bounds for the logit - transformation. x_dist is typically the prior for NPE. For NLE/NRE, - it might be some rough bounded distribution over the data provided - additionally by the user. + x_dist: The distribution over x, used to determine the bounds for the + unconstrained transformation. x_dist is typically the prior for NPE. + For NLE/NRE, it might be some rough bounded distribution over the + data provided additionally by the user. **kwargs: Additional keyword arguments to pass to the flow constructor. Returns: @@ -1076,18 +1077,22 @@ def build_zuko_flow( z_score_x_bool, structured_x = z_score_parser(z_score_x) - # Only x (i.e., prior for NPE) can be logit transformed (not y) + # Only x (i.e., prior for NPE) can be transformed to unbound space (not y) # when x_dist is provided. - if z_score_x == "logit" and x_dist is not None and hasattr(x_dist, "support"): - logit_transform = mcmc_transform(x_dist) + if ( + z_score_x == "transform_to_unconstrained" + and x_dist is not None + and hasattr(x_dist, "support") + ): + transform_to_unconstrained = mcmc_transform(x_dist) transform = ( - biject_transform_zuko(logit_transform), + biject_transform_zuko(transform_to_unconstrained), transform, ) - elif z_score_x == "logit" and x_dist is None: + elif z_score_x == "transform_to_unconstrained" and x_dist is None: raise ValueError( - "Logit transformation requires a distribution provided through `x_dist`" - "with supported bounds (see `mcmc_transform`).", + "Transformation to unconstrained space requires a distribution" + "provided through `x_dist`", ) elif z_score_x_bool: transform = ( @@ -1109,16 +1114,20 @@ def build_zuko_flow( z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x == "logit" and x_dist is not None and hasattr(x_dist, "support"): - logit_transform = mcmc_transform(x_dist) + if ( + z_score_x == "transform_to_unconstrained" + and x_dist is not None + and hasattr(x_dist, "support") + ): + transform_to_unconstrained = mcmc_transform(x_dist) transforms = ( - biject_transform_zuko(logit_transform), + biject_transform_zuko(transform_to_unconstrained), *transforms, ) - elif z_score_x == "logit" and x_dist is None: + elif z_score_x == "transform_to_unconstrained" and x_dist is None: raise ValueError( - "Logit transformation requires a distribution provided through `x_dist`" - "with supported bounds (see `mcmc_transform`).", + "Transformation to unconstrained space requires a distribution" + "provided through `x_dist`", ) elif z_score_x_bool: transforms = ( diff --git a/tests/density_estimator_test.py b/tests/density_estimator_test.py index e236fdbc1..aa8cb2e6c 100644 --- a/tests/density_estimator_test.py +++ b/tests/density_estimator_test.py @@ -8,7 +8,7 @@ import pytest import torch from torch import eye, zeros -from torch.distributions import MultivariateNormal +from torch.distributions import HalfNormal, MultivariateNormal from sbi.neural_nets.embedding_nets import CNNEmbedding from sbi.neural_nets.estimators.shape_handling import reshape_to_sample_batch_event @@ -468,75 +468,61 @@ def test_mixed_density_estimator( assert samples.shape == (*sample_shape, batch_dim, *input_event_shape) -def test_build_zuko_flow_logit(): +@pytest.mark.parametrize( + "distributions", + [ + BoxUniform(low=-2 * torch.ones(5), high=2 * torch.ones(5)), + HalfNormal(scale=torch.ones(1) * 2), + MultivariateNormal(loc=zeros(5), covariance_matrix=eye(5)), + ], +) +def test_build_zuko_flow_unconstrained(distributions): + # input dimension is 5 batch_x = torch.randn(10, 5) batch_y = torch.randn(10, 3) - num_dim = 5 - x_dist = BoxUniform( - low=-2 * torch.ones(num_dim), high=2 * torch.ones(num_dim) - ) # Define a bounded distribution # Test case where x_dist is provided (should not raise an error) - try: - flow = build_zuko_flow( - which_nf="MAF", - batch_x=batch_x, - batch_y=batch_y, - z_score_x="logit", - z_score_y="logit", - x_dist=x_dist, - ) - assert isinstance(flow, ZukoFlow) - print( - "Test passed: build_zuko_flow works with logit transformation" - "when x_dist is provided." - ) - except Exception as e: - raise AssertionError(f"Test failed with exception: {e}") from e + flow = build_zuko_flow( + which_nf="MAF", + batch_x=batch_x, + batch_y=batch_y, + z_score_x="transform_to_unconstrained", + z_score_y="transform_to_unconstrained", + x_dist=distributions, + ) + assert isinstance(flow, ZukoFlow) # Test to cover CNF - try: - flow = build_zuko_flow( - which_nf="CNF", - batch_x=batch_x, - batch_y=batch_y, - z_score_x="logit", - z_score_y="logit", - x_dist=x_dist, - ) - assert isinstance(flow, ZukoFlow) - print( - "Test passed: build_zuko_flow works with logit transformation" - "when x_dist is provided." - ) - except Exception as e: - raise AssertionError(f"Test failed with exception: {e}") from e + flow = build_zuko_flow( + which_nf="CNF", + batch_x=batch_x, + batch_y=batch_y, + z_score_x="transform_to_unconstrained", + z_score_y="transform_to_unconstrained", + x_dist=distributions, + ) + assert isinstance(flow, ZukoFlow) # Test case where x_dist is missing (should raise ValueError) with pytest.raises( ValueError, - match="Logit transformation requires a distribution provided through `x_dist`", + match=r".*distribution.*x_dist.*", ): build_zuko_flow( which_nf="MAF", batch_x=batch_x, batch_y=batch_y, - z_score_x="logit", - z_score_y="logit", + z_score_x="transform_to_unconstrained", + z_score_y="transform_to_unconstrained", x_dist=None, # No distribution provided ) - print("Test passed: ValueError raised when x_dist is missing.") - with pytest.raises( - ValueError, - match="Logit transformation requires a distribution provided through `x_dist`", - ): + with pytest.raises(ValueError, match=r".*distribution.*x_dist.*"): build_zuko_flow( which_nf="CNF", batch_x=batch_x, batch_y=batch_y, - z_score_x="logit", - z_score_y="logit", + z_score_x="transform_to_unconstrained", + z_score_y="transform_to_unconstrained", x_dist=None, # No distribution provided ) - print("Test passed: ValueError raised when x_dist is missing.") From d617030dbb0fdd8a9b1d8bd85421726332cbd121 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Fri, 21 Mar 2025 16:59:21 +0100 Subject: [PATCH 24/51] feedback guy adjustments --- docs/faq/question_08_unconstrained.md | 6 ++-- sbi/utils/sbiutils.py | 29 +++++----------- tests/sbiutils_test.py | 48 +++++++++++++++++++++++---- 3 files changed, 53 insertions(+), 30 deletions(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index 36b4c1547..46697cd07 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -1,4 +1,4 @@ -# The inference gets stuck? +# What if almost all posterior samples lie outside the prior bounds for some conditionals? If you've encountered the following warning: @@ -28,9 +28,9 @@ Instead of standardizing parameters using z-scoring, you can use the logit trans - For NLE/NRE (Neural Likelihood Estimation / Neural Ratio Estimation): A rough density approximation over data boundaries is needed, making the process more complex. -### How to apply the logit transformation +### What do I do if my data is highly nonlinear? -To enable logit transformation when defining your density estimator, use: +Therefore, you can enable the logit transformation when defining your density estimator, use: ``` density_estimator_build_fun = posterior_nn( diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 61a7fa521..abff3a40a 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -10,7 +10,6 @@ Callable, Dict, List, - Literal, Optional, Sequence, Tuple, @@ -112,17 +111,6 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f return clamped_val -ZScoreType = Union[ - Literal[ - "independent", - "structured", - "logit", - "none", - ], - None, -] - - def z_score_parser(z_score_flag: Optional[str]) -> Tuple[bool, bool]: """Parses string z-score flag into booleans. @@ -155,14 +143,15 @@ def z_score_parser(z_score_flag: Optional[str]) -> Tuple[bool, bool]: # Got one of two valid z-scoring methods. z_score_bool = True structured_data = z_score_flag == "structured" - elif z_score_flag == "logit": - # Do not z-score if logit transform. Logit is not estimated from data, - # but from the prior bounds, so structured/indpendent does not matter. + elif z_score_flag == "transform_to_unconstrained": + # Dependent on the distribution, the biject_to function + # will provide e.g., a logit, exponential of z-scored distribution. z_score_bool, structured_data = False, False else: # Return warning due to invalid option, defaults to not z-scoring. raise ValueError( - "Invalid z-scoring option. Use 'none', 'independent', or 'structured'." + "Invalid z-scoring option. Use 'none', 'independent'" + "'structured' or 'transform_to_unconstrained." ) return z_score_bool, structured_data @@ -236,14 +225,14 @@ def biject_transform_zuko( transform, ) -> zuko.flows.UnconditionalTransform: """ - Builds logit-transforming transform for Zuko flows on a bounded interval. + Wraps a pytorch transform in a Zuko unconditional transfrom on a bounded interval. Args: - transform: A logit transformation for the given support. - eps: Small constant to avoid numerical issues at 0 and 1. + transform: a bijective transformation for Zuko, depending on the input + (e.g., logit, exponential or z-scored) Returns: - Logit transformation for the given range. + Zuko bijective transformation """ return zuko.flows.UnconditionalTransform( CallableTransform(transform), diff --git a/tests/sbiutils_test.py b/tests/sbiutils_test.py index 49391615c..945ab106c 100644 --- a/tests/sbiutils_test.py +++ b/tests/sbiutils_test.py @@ -408,11 +408,29 @@ def test_kde(bandwidth, transform, sample_weights): @pytest.mark.parametrize( "z_x", - [True, False, None, "none", "independent", "structured", "logit", "invalid_value"], + [ + True, + False, + None, + "none", + "independent", + "structured", + "transform_to_unconstrained", + "invalid_value", + ], ) @pytest.mark.parametrize( "z_theta", - [True, False, None, "none", "independent", "structured", "logit", "invalid_value"], + [ + True, + False, + None, + "none", + "independent", + "structured", + "transform_to_unconstrained", + "invalid_value", + ], ) def test_z_score_parser(z_x, z_theta): if z_theta == "invalid_value": @@ -432,10 +450,28 @@ def test_z_score_parser(z_x, z_theta): @pytest.mark.parametrize( - "z_x", [True, False, None, "none", "independent", "structured", "logit"] + "z_x", + [ + True, + False, + None, + "none", + "independent", + "structured", + "transform_to_unconstrained", + ], ) @pytest.mark.parametrize( - "z_theta", [True, False, None, "none", "independent", "structured", "logit"] + "z_theta", + [ + True, + False, + None, + "none", + "independent", + "structured", + "transform_to_unconstrained", + ], ) @pytest.mark.parametrize("builder", [likelihood_nn, posterior_nn, classifier_nn]) def test_z_scoring_structured(z_x, z_theta, builder): @@ -448,9 +484,7 @@ def test_z_scoring_structured(z_x, z_theta, builder): t_batch = torch.stack([(x_sin * (i + 1)) + (i * 2) for i in range(10)]) num_dim = t_batch.shape[1] - x_dist = BoxUniform( - low=-2 * torch.ones(num_dim), high=2 * torch.ones(num_dim) - ) # Normal(0, 1) if (z_x == "logit" or z_theta == "logit") else None + x_dist = BoxUniform(low=-2 * torch.ones(num_dim), high=2 * torch.ones(num_dim)) # API tests # TODO: Test breaks at "mnle" From 9037534eb4dc1257a2e98d8a027a2b5d767cb054 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Fri, 21 Mar 2025 17:09:59 +0100 Subject: [PATCH 25/51] add documentation if statements --- sbi/neural_nets/net_builders/flow.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index d4af6a301..8bc910dc2 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1018,6 +1018,13 @@ def build_zuko_flow( """ Fundamental building blocks to build a Zuko normalizing flow model. + There are only 3 cases we consider in the if statements down below: + + z_score_x is independent or none or structured, in which case we just use + the normal standardizing transform. + z_score_x is logit but xdist is not valid, in which case we raise an error + z_score_x is logit and xdist is valid, in which case we give the logit transform. + Args: which_nf (str): The type of normalizing flow to build. batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. From d6606310c42bb99e4510e56bd46998426fef6d2a Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 10 Jun 2025 11:36:20 +0200 Subject: [PATCH 26/51] resolve comment 1 and 2 of Jan --- sbi/neural_nets/net_builders/flow.py | 79 ++++++++++++++++------------ sbi/utils/sbiutils.py | 2 +- 2 files changed, 45 insertions(+), 36 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 8bc910dc2..601895859 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1002,7 +1002,6 @@ def build_zuko_bpf( return flow - def build_zuko_flow( which_nf: str, batch_x: Tensor, @@ -1043,9 +1042,12 @@ def build_zuko_flow( num_transforms: The number of transformations in the flow. Defaults to 5. embedding_net: The embedding network to use. Defaults to nn.Identity(). x_dist: The distribution over x, used to determine the bounds for the - unconstrained transformation. x_dist is typically the prior for NPE. - For NLE/NRE, it might be some rough bounded distribution over the - data provided additionally by the user. + unconstrained transformation. + - In Neural Posterior Estimation (NPE), x_dist typically corresponds + to the prior over x. + - For Neural Likelihood Estimation (NLE) or Neural Ratio Estimation (NRE), + x_dist may instead be a user-specified distribution that captures a + rough bounded support of the observed data space. **kwargs: Additional keyword arguments to pass to the flow constructor. Returns: @@ -1086,21 +1088,24 @@ def build_zuko_flow( # Only x (i.e., prior for NPE) can be transformed to unbound space (not y) # when x_dist is provided. - if ( - z_score_x == "transform_to_unconstrained" - and x_dist is not None - and hasattr(x_dist, "support") - ): - transform_to_unconstrained = mcmc_transform(x_dist) - transform = ( - biject_transform_zuko(transform_to_unconstrained), - transform, - ) - elif z_score_x == "transform_to_unconstrained" and x_dist is None: - raise ValueError( - "Transformation to unconstrained space requires a distribution" - "provided through `x_dist`", - ) + if z_score_x == "transform_to_unconstrained": + if x_dist is None: + raise ValueError( + "Transformation to unconstrained space requires a distribution " + "provided through `x_dist`." + ) + elif not hasattr(x_dist, "support"): + raise ValueError( + "`x_dist` requires a `.support` attribute for" + "an unconstrained transformation." + ) + else: + transform_to_unconstrained = mcmc_transform(x_dist) + transform = ( + biject_transform_zuko(transform_to_unconstrained), + transform, + ) + elif z_score_x_bool: transform = ( standardizing_transform_zuko(batch_x, structured_x), @@ -1120,22 +1125,25 @@ def build_zuko_flow( transforms = flow_built.transform.transforms z_score_x_bool, structured_x = z_score_parser(z_score_x) - - if ( - z_score_x == "transform_to_unconstrained" - and x_dist is not None - and hasattr(x_dist, "support") - ): - transform_to_unconstrained = mcmc_transform(x_dist) - transforms = ( - biject_transform_zuko(transform_to_unconstrained), - *transforms, - ) - elif z_score_x == "transform_to_unconstrained" and x_dist is None: - raise ValueError( - "Transformation to unconstrained space requires a distribution" - "provided through `x_dist`", - ) + + if z_score_x == "transform_to_unconstrained": + if x_dist is None: + raise ValueError( + "Transformation to unconstrained space requires a distribution " + "provided through `x_dist`." + ) + elif not hasattr(x_dist, "support"): + raise ValueError( + "`x_dist` requires a `.support` attribute for" + "an unconstrained transformation." + ) + else: + transform_to_unconstrained = mcmc_transform(x_dist) + transforms = ( + biject_transform_zuko(transform_to_unconstrained), + *transforms, + ) + elif z_score_x_bool: transforms = ( standardizing_transform_zuko(batch_x, structured_x), @@ -1162,6 +1170,7 @@ def build_zuko_flow( return flow + def build_zuko_unconditional_flow( which_nf: str, batch_x: Tensor, diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index c74c5caa3..9acf612a3 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -244,7 +244,7 @@ def z_standardization( batch_t: Tensor, structured_dims: bool = False, min_std: float = 1e-14, -) -> list[Tensor, Tensor]: +) -> [Tensor, Tensor]: """Computes mean and standard deviation for z-scoring Args: From 93325836d64c1efdeb98d4f01794d0468c1f6736 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 10 Jun 2025 11:44:22 +0200 Subject: [PATCH 27/51] ruff linted push --- sbi/neural_nets/net_builders/flow.py | 16 ++++++++-------- sbi/utils/sbiutils.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 601895859..95dec1b1c 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1002,6 +1002,7 @@ def build_zuko_bpf( return flow + def build_zuko_flow( which_nf: str, batch_x: Tensor, @@ -1042,11 +1043,11 @@ def build_zuko_flow( num_transforms: The number of transformations in the flow. Defaults to 5. embedding_net: The embedding network to use. Defaults to nn.Identity(). x_dist: The distribution over x, used to determine the bounds for the - unconstrained transformation. - - In Neural Posterior Estimation (NPE), x_dist typically corresponds + unconstrained transformation. + - In Neural Posterior Estimation (NPE), x_dist typically corresponds to the prior over x. - - For Neural Likelihood Estimation (NLE) or Neural Ratio Estimation (NRE), - x_dist may instead be a user-specified distribution that captures a + - For Neural Likelihood Estimation (NLE) or Neural Ratio Estimation (NRE), + x_dist may instead be a user-specified distribution that captures a rough bounded support of the observed data space. **kwargs: Additional keyword arguments to pass to the flow constructor. @@ -1105,7 +1106,7 @@ def build_zuko_flow( biject_transform_zuko(transform_to_unconstrained), transform, ) - + elif z_score_x_bool: transform = ( standardizing_transform_zuko(batch_x, structured_x), @@ -1125,7 +1126,7 @@ def build_zuko_flow( transforms = flow_built.transform.transforms z_score_x_bool, structured_x = z_score_parser(z_score_x) - + if z_score_x == "transform_to_unconstrained": if x_dist is None: raise ValueError( @@ -1143,7 +1144,7 @@ def build_zuko_flow( biject_transform_zuko(transform_to_unconstrained), *transforms, ) - + elif z_score_x_bool: transforms = ( standardizing_transform_zuko(batch_x, structured_x), @@ -1170,7 +1171,6 @@ def build_zuko_flow( return flow - def build_zuko_unconditional_flow( which_nf: str, batch_x: Tensor, diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 9acf612a3..519e38766 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -151,7 +151,7 @@ def z_score_parser(z_score_flag: Optional[str]) -> Tuple[bool, bool]: # Return warning due to invalid option, defaults to not z-scoring. raise ValueError( "Invalid z-scoring option. Use 'none', 'independent'" - "'structured' or 'transform_to_unconstrained." + "'structured' or 'transform_to_unconstrained'." ) return z_score_bool, structured_data From 7dcf9198c0bb2874b0a9d2b10f3dd5c02a92b3fb Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 10 Jun 2025 11:58:09 +0200 Subject: [PATCH 28/51] cleanup density_estimator_test.py --- tests/density_estimator_test.py | 42 +++++++++++---------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/tests/density_estimator_test.py b/tests/density_estimator_test.py index ac5885d77..dbea63286 100644 --- a/tests/density_estimator_test.py +++ b/tests/density_estimator_test.py @@ -467,61 +467,47 @@ def test_mixed_density_estimator( assert samples.shape == (*sample_shape, batch_dim, *input_event_shape) +@pytest.mark.parametrize("which_nf", ["MAF", "CNF"]) @pytest.mark.parametrize( - "distributions", + "x_dist", [ BoxUniform(low=-2 * torch.ones(5), high=2 * torch.ones(5)), HalfNormal(scale=torch.ones(1) * 2), MultivariateNormal(loc=zeros(5), covariance_matrix=eye(5)), ], ) -def test_build_zuko_flow_unconstrained(distributions): +def test_build_zuko_flow_with_valid_unconstrained_transform(which_nf, x_dist): + """Test that ZukoFlow builds successfully with valid `x_dist`.""" # input dimension is 5 batch_x = torch.randn(10, 5) batch_y = torch.randn(10, 3) # Test case where x_dist is provided (should not raise an error) flow = build_zuko_flow( - which_nf="MAF", + which_nf=which_nf, batch_x=batch_x, batch_y=batch_y, z_score_x="transform_to_unconstrained", z_score_y="transform_to_unconstrained", - x_dist=distributions, + x_dist=x_dist, ) assert isinstance(flow, ZukoFlow) - # Test to cover CNF - flow = build_zuko_flow( - which_nf="CNF", - batch_x=batch_x, - batch_y=batch_y, - z_score_x="transform_to_unconstrained", - z_score_y="transform_to_unconstrained", - x_dist=distributions, - ) - assert isinstance(flow, ZukoFlow) - - # Test case where x_dist is missing (should raise ValueError) +@pytest.mark.parametrize("which_nf", ["MAF", "CNF"]) +def test_build_zuko_flow_missing_x_dist_raises_error(which_nf): + """Test that ValueError is raised if `x_dist` is None when required.""" + batch_x = torch.randn(10, 5) + batch_y = torch.randn(10, 3) + with pytest.raises( ValueError, match=r".*distribution.*x_dist.*", ): build_zuko_flow( - which_nf="MAF", + which_nf=which_nf, batch_x=batch_x, batch_y=batch_y, z_score_x="transform_to_unconstrained", z_score_y="transform_to_unconstrained", x_dist=None, # No distribution provided - ) - - with pytest.raises(ValueError, match=r".*distribution.*x_dist.*"): - build_zuko_flow( - which_nf="CNF", - batch_x=batch_x, - batch_y=batch_y, - z_score_x="transform_to_unconstrained", - z_score_y="transform_to_unconstrained", - x_dist=None, # No distribution provided - ) + ) \ No newline at end of file From bbca1ed18fbfb90ca7e8ae380d003c67e7dee5e4 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 10 Jun 2025 12:03:05 +0200 Subject: [PATCH 29/51] cleanup density_estimator_test.py and ruff check --- tests/density_estimator_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/density_estimator_test.py b/tests/density_estimator_test.py index dbea63286..cebe7c848 100644 --- a/tests/density_estimator_test.py +++ b/tests/density_estimator_test.py @@ -493,12 +493,13 @@ def test_build_zuko_flow_with_valid_unconstrained_transform(which_nf, x_dist): ) assert isinstance(flow, ZukoFlow) + @pytest.mark.parametrize("which_nf", ["MAF", "CNF"]) def test_build_zuko_flow_missing_x_dist_raises_error(which_nf): """Test that ValueError is raised if `x_dist` is None when required.""" batch_x = torch.randn(10, 5) batch_y = torch.randn(10, 3) - + with pytest.raises( ValueError, match=r".*distribution.*x_dist.*", @@ -510,4 +511,4 @@ def test_build_zuko_flow_missing_x_dist_raises_error(which_nf): z_score_x="transform_to_unconstrained", z_score_y="transform_to_unconstrained", x_dist=None, # No distribution provided - ) \ No newline at end of file + ) From b09ebb73abb265c17a2073082f45e0b285a2ccb4 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 1 Jul 2025 13:12:42 +0200 Subject: [PATCH 30/51] adjusted docstrings --- sbi/neural_nets/net_builders/flow.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 95dec1b1c..6b087b629 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1018,12 +1018,13 @@ def build_zuko_flow( """ Fundamental building blocks to build a Zuko normalizing flow model. - There are only 3 cases we consider in the if statements down below: + The following cases are considered in the if statements down below: - z_score_x is independent or none or structured, in which case we just use - the normal standardizing transform. - z_score_x is logit but xdist is not valid, in which case we raise an error - z_score_x is logit and xdist is valid, in which case we give the logit transform. + z_score_x is `independent, `structured` or None, in which case we just use + the normal standardizing transform. + z_score_x is `transform_to_unconstrained`, in this case, we check if `x_dist` is + provided and has a support property. If `x_dist` is not valid (i.e. None + or has no support property), we raise an error. Args: which_nf (str): The type of normalizing flow to build. @@ -1044,11 +1045,12 @@ def build_zuko_flow( embedding_net: The embedding network to use. Defaults to nn.Identity(). x_dist: The distribution over x, used to determine the bounds for the unconstrained transformation. - - In Neural Posterior Estimation (NPE), x_dist typically corresponds - to the prior over x. + - In Neural Posterior Estimation (NPE), `x_dist` typically corresponds + to the prior over x (e.g., a `BoxUniform`). - For Neural Likelihood Estimation (NLE) or Neural Ratio Estimation (NRE), - x_dist may instead be a user-specified distribution that captures a - rough bounded support of the observed data space. + `x_dist` may instead be a user-specified distribution. However, make sure + all the data lies within the support of the distribution if you want to + use the `transform_to_unconstrained` option for NLE and NRE. **kwargs: Additional keyword arguments to pass to the flow constructor. Returns: From 8c029f41869c3650effbeaef7e999733ef8354ce Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 2 Jul 2025 16:02:11 +0200 Subject: [PATCH 31/51] add tests convergence unconstrained space --- docs/faq/question_08_unconstrained.md | 4 +- tests/linearGaussian_snpe_test.py | 58 +++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index 46697cd07..ec436a7af 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -30,11 +30,11 @@ Instead of standardizing parameters using z-scoring, you can use the logit trans ### What do I do if my data is highly nonlinear? -Therefore, you can enable the logit transformation when defining your density estimator, use: +Therefore, you can enable the `transform_to_unconstrained` transformation when defining your density estimator, use: ``` density_estimator_build_fun = posterior_nn( - model="zuko_nsf", hidden_features=60, num_transforms=3, z_score_theta="logit", x_dist=prior + model="zuko_nsf", hidden_features=60, num_transforms=3, z_score_theta="transform_to_unconstrained", x_dist=prior ) ``` This ensures that your density estimator operates in a transformed space where it respects prior bounds, improving the efficiency of rejection sampling. diff --git a/tests/linearGaussian_snpe_test.py b/tests/linearGaussian_snpe_test.py index 6c9d1cf30..fd0c2bc9b 100644 --- a/tests/linearGaussian_snpe_test.py +++ b/tests/linearGaussian_snpe_test.py @@ -731,3 +731,61 @@ def simulator(theta): _ = inference.append_simulations(theta, x, proposal=proposal).train() posterior = inference.build_posterior().set_default_x(x_o) proposal = posterior + + +@pytest.mark.slow +@pytest.mark.parametrize( + "num_dim", + ((2), (1)), +) +@pytest.mark.parametrize("npe_method", [NPE_B, NPE_C]) +@pytest.mark.parametrize( + "density_estimator", + ["zuko_maf", "zuko_nsf"], +) +def test_density_estimators_unconstrained_space( + num_dim, npe_method: type, density_estimator +): + """Test NPE B/C in inconstrained space.""" + + x_o = zeros(1, num_dim) + num_samples = 1000 + num_simulations = 2500 + + # likelihood_mean will be likelihood_shift+theta + likelihood_shift = -1.0 * ones(num_dim) + likelihood_cov = 0.3 * eye(num_dim) + + prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) + + target_samples = samples_true_posterior_linear_gaussian_uniform_prior( + x_o, likelihood_shift, likelihood_cov, prior, num_samples + ) + + def simulator(theta): + return linear_gaussian(theta, likelihood_shift, likelihood_cov) + + # Train in unconstrained space + + density_estimator_build_fun = posterior_nn( + model=density_estimator, + hidden_features=60, + num_transforms=3, + z_score_theta="transform_to_unconstrained", + x_dist=prior, + ) + + inference = npe_method(prior, density_estimator=density_estimator_build_fun) + + theta = prior.sample((num_simulations,)) + x = simulator(theta) + posterior_estimator = inference.append_simulations(theta, x).train( + training_batch_size=100 + ) + posterior = DirectPosterior( + prior=prior, posterior_estimator=posterior_estimator + ).set_default_x(x_o) + samples = posterior.sample((num_samples,)) + + # Compute the c2st and assert it is near chance level of 0.5. + check_c2st(samples, target_samples, alg=f"npe_{density_estimator}") From d63af4800a72bd9df3225d693dc063eab72c58ec Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 2 Jul 2025 16:10:09 +0200 Subject: [PATCH 32/51] adjust faq --- docs/faq/question_08_unconstrained.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index ec436a7af..cdd9a2664 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -36,7 +36,9 @@ Therefore, you can enable the `transform_to_unconstrained` transformation when d density_estimator_build_fun = posterior_nn( model="zuko_nsf", hidden_features=60, num_transforms=3, z_score_theta="transform_to_unconstrained", x_dist=prior ) + +inference = NPE_C(prior, density_estimator=density_estimator_build_fun) ``` This ensures that your density estimator operates in a transformed space where it respects prior bounds, improving the efficiency of rejection sampling. -Note: The logit transformation is currently only supported for `zuko` density estimators. +Note: The logit transformation is currently only supported for `zuko` density estimators (e.g., `zuko_nsf` and `zuko_maf`). From 9fb371a8e6fc6d63de8af3f18e2e30c468206aae Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 2 Jul 2025 16:20:06 +0200 Subject: [PATCH 33/51] add test snle --- tests/linearGaussian_snle_test.py | 49 +++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index 558e1488c..fe961e7c4 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -502,3 +502,52 @@ def test_api_nle_sampling_methods( posterior.train(max_num_iters=10) posterior.sample(sample_shape=(num_samples,), show_progress_bars=False) + + + + +@pytest.mark.parametrize("num_dim", (1,)) # dim 3 is tested below. +@pytest.mark.parametrize("prior_str", ("uniform", "gaussian")) +def test_snle_unconstrained_space( + num_dim: int, prior_str: str, mcmc_params_fast: dict +): + """Test NLE API with 2 rounds, different priors num trials and MAP for unconstrained space.""" + num_rounds = 2 + num_samples = 1 + num_simulations_per_round = 100 + + if prior_str == "gaussian": + prior_mean = zeros(num_dim) + prior_cov = eye(num_dim) + prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) + else: + prior = BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) + + simulator = diagonal_linear_gaussian + + density_estimator_build_fun = likelihood_nn( + model="zuko_nsf", + hidden_features=60, + num_transforms=3, + z_score_theta="transform_to_unconstrained", + x_dist=prior, + ) + + inference = NLE(prior=prior, density_estimator=density_estimator_build_fun, show_progress_bars=False) + + proposals = [prior] + for _ in range(num_rounds): + theta = proposals[-1].sample((num_simulations_per_round,)) + x = simulator(theta) + inference.append_simulations(theta, x).train( + training_batch_size=100, max_num_epochs=2 + ) + for num_trials in [1, 3]: + x_o = zeros((num_trials, num_dim)) + posterior = inference.build_posterior( + mcmc_method="slice_np_vectorized", + mcmc_parameters=mcmc_params_fast, + ).set_default_x(x_o) + posterior.sample(sample_shape=(num_samples,)) + proposals.append(posterior) + posterior.map(num_iter=1) \ No newline at end of file From be06d5883632b2bf1b7395e03bd2278559ffc0fa Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Wed, 2 Jul 2025 16:31:59 +0200 Subject: [PATCH 34/51] adjust linear gaussian and estimate c2st --- tests/linearGaussian_snle_test.py | 116 ++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 30 deletions(-) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index fe961e7c4..36b53beb2 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -504,17 +504,28 @@ def test_api_nle_sampling_methods( posterior.sample(sample_shape=(num_samples,), show_progress_bars=False) - - -@pytest.mark.parametrize("num_dim", (1,)) # dim 3 is tested below. +@pytest.mark.slow +@pytest.mark.parametrize("num_dim", (1, 2)) @pytest.mark.parametrize("prior_str", ("uniform", "gaussian")) -def test_snle_unconstrained_space( - num_dim: int, prior_str: str, mcmc_params_fast: dict +@pytest.mark.parametrize("model_str", ("zuko_maf", "zuko_nsf")) +def test_c2st_nle_unconstrained_space( + num_dim: int, prior_str: str, model_str: str, mcmc_params_accurate: dict ): - """Test NLE API with 2 rounds, different priors num trials and MAP for unconstrained space.""" - num_rounds = 2 - num_samples = 1 - num_simulations_per_round = 100 + """Test SNL on linear Gaussian in unconstrained space. + + Args: + num_dim: parameter dimension of the gaussian model + prior_str: one of "gaussian" or "uniform" + + """ + num_samples = 500 + num_simulations = 3000 + trials_to_test = [1] + + # likelihood_mean will be likelihood_shift+theta + likelihood_shift = -1.0 * ones(num_dim) + # Use increased cov to avoid too small posterior cov for many trials. + likelihood_cov = 0.8 * eye(num_dim) if prior_str == "gaussian": prior_mean = zeros(num_dim) @@ -523,31 +534,76 @@ def test_snle_unconstrained_space( else: prior = BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) - simulator = diagonal_linear_gaussian - - density_estimator_build_fun = likelihood_nn( - model="zuko_nsf", + def simulator(theta): + return linear_gaussian(theta, likelihood_shift, likelihood_cov) + + # Use likelihood_nn with z_score_theta="transform_to_unconstrained" + density_estimator = likelihood_nn( + model_str, hidden_features=60, num_transforms=3, z_score_theta="transform_to_unconstrained", x_dist=prior, ) - - inference = NLE(prior=prior, density_estimator=density_estimator_build_fun, show_progress_bars=False) + inference = NLE(density_estimator=density_estimator) - proposals = [prior] - for _ in range(num_rounds): - theta = proposals[-1].sample((num_simulations_per_round,)) - x = simulator(theta) - inference.append_simulations(theta, x).train( - training_batch_size=100, max_num_epochs=2 + theta = prior.sample((num_simulations,)) + x = simulator(theta) + + likelihood_estimator = inference.append_simulations(theta, x).train() + + # Test inference amortized over trials. + for num_trials in trials_to_test: + x_o = zeros((num_trials, num_dim)) + if prior_str == "gaussian": + gt_posterior = true_posterior_linear_gaussian_mvn_prior( + x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov + ) + target_samples = gt_posterior.sample((num_samples,)) + elif prior_str == "uniform": + target_samples = samples_true_posterior_linear_gaussian_uniform_prior( + x_o, + likelihood_shift, + likelihood_cov, + prior=prior, + num_samples=num_samples, + ) + else: + raise ValueError(f"Wrong prior_str: '{prior_str}'.") + + potential_fn, theta_transform = likelihood_estimator_based_potential( + prior=prior, likelihood_estimator=likelihood_estimator, x_o=x_o ) - for num_trials in [1, 3]: - x_o = zeros((num_trials, num_dim)) - posterior = inference.build_posterior( - mcmc_method="slice_np_vectorized", - mcmc_parameters=mcmc_params_fast, - ).set_default_x(x_o) - posterior.sample(sample_shape=(num_samples,)) - proposals.append(posterior) - posterior.map(num_iter=1) \ No newline at end of file + posterior = MCMCPosterior( + proposal=prior, + potential_fn=potential_fn, + theta_transform=theta_transform, + method="slice_np_vectorized", + **mcmc_params_accurate, + ) + + samples = posterior.sample(sample_shape=(num_samples,)) + + # Check performance based on c2st accuracy. + check_c2st( + samples, + target_samples, + alg=f"nle_a-{prior_str}-prior-{model_str}-{num_trials}-trials", + ) + + map_ = posterior.map( + num_init_samples=1_000, + init_method="proposal", + show_progress_bars=False, + ) + + if prior_str == "uniform": + # Check whether the returned probability outside of the support is zero. + posterior_prob = get_prob_outside_uniform_prior(posterior, prior, num_dim) + assert posterior_prob == 0.0, ( + "The posterior probability outside of the prior support is not zero" + ) + + assert ((map_ - ones(num_dim)) ** 2).sum() < 0.5 + else: + assert ((map_ - gt_posterior.mean) ** 2).sum() < 0.5 From c732ded4c72bd9b6abd1a9469583576ebe8c122d Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Mon, 28 Jul 2025 21:42:05 +0100 Subject: [PATCH 35/51] adjust documentation --- docs/faq/question_08_unconstrained.md | 54 ++++++++++----------------- 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index cdd9a2664..9931fa615 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -1,44 +1,28 @@ -# What if almost all posterior samples lie outside the prior bounds for some conditionals? +# Using the logit transformation +If you've ruled out simulator issues, you can try training your density or ratio estimator in an unbounded space using a logit transformation: -If you've encountered the following warning: +- **For NPE**: The transformation maps bounded parameters θ to unbounded space before training, then applies the inverse (sigmoid) after training to ensure posterior samples stay within prior bounds. -``` -WARNING:root:Only 0.002% proposal samples were accepted. It - may take a long time to collect the remaining 99980 - samples. Consider interrupting (Ctrl-C) and switching to a - different sampling method with - `build_posterior(..., sample_with='mcmc')`. or - `build_posterior(..., sample_with='vi')`. -``` - -this indicates that a significant portion of the samples proposed by the density estimator fall outside the prior bounds. Several factors might be causing this issue: - -1) Simulator Issues: Ensure that your simulator is functioning as expected and producing realistic outputs. -2) Insufficient Training Data: If the density estimator has been trained on too few simulations, it may lead to invalid estimations. -3) Problematic True Data: Check if there are inconsistencies or unexpected values in the observed data. - - -### Possible solutions - -If you've ruled out these issues, you can try training your density estimator in an unbounded space using a logit transformation. This transformation maps your data to logit space before training and then applies the inverse logit (sigmoid function) to ensure that the trained density estimator remains within the prior bounds. - -Instead of standardizing parameters using z-scoring, you can use the logit transformation. However, this requires providing a density estimation. The specific approach depends on the method you're using: +- **For NLE/NRE**: The transformation would need to map bounded data x to unbounded space, which requires estimating data bounds from simulations (more complex). -- For NPE (Neural Posterior Estimation): You can simply use the prior as the density estimation. -- For NLE/NRE (Neural Likelihood Estimation / Neural Ratio Estimation): A rough density approximation over data boundaries is needed, making the process more complex. +To enable this for NPE: - -### What do I do if my data is highly nonlinear? - -Therefore, you can enable the `transform_to_unconstrained` transformation when defining your density estimator, use: - -``` +```python density_estimator_build_fun = posterior_nn( - model="zuko_nsf", hidden_features=60, num_transforms=3, z_score_theta="transform_to_unconstrained", x_dist=prior + model="zuko_nsf", + hidden_features=60, + num_transforms=3, + z_score_theta="transform_to_unconstrained" # Transforms parameters to unconstrained space + x_dist=prior # For NPE, this specifies bounds for parameters (internally called 'x') ) - -inference = NPE_C(prior, density_estimator=density_estimator_build_fun) +inference = NPE(prior, density_estimator=density_estimator_build_fun) ``` + This ensures that your density estimator operates in a transformed space where it respects prior bounds, improving the efficiency of rejection sampling. -Note: The logit transformation is currently only supported for `zuko` density estimators (e.g., `zuko_nsf` and `zuko_maf`). +>Note: The `x_dist=prior` might seem confusing - internally, sbi uses generic `x,y` notation where for NPE, x represents parameters (θ) and y represents data. This is why we pass the prior as `x_dist`. + +Important: + +- This transformation is currently only supported for zuko density estimators. +- For **NLE/NRE**, setting up this transformation is more complex as it requires estimating bounds for the simulated data rather than using prior bounds. \ No newline at end of file From 021d67fe676f79fd95d250804474b5c816d81ba4 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova <41705732+anastasiakrouglova@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:43:11 +0100 Subject: [PATCH 36/51] Update sbi/neural_nets/net_builders/flow.py Co-authored-by: Jan --- sbi/neural_nets/net_builders/flow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 6b087b629..4637d1c3a 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1037,7 +1037,7 @@ def build_zuko_flow( over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. - `transform_to_unconstrained`: Transforms to - an unbound space, if bounds from `x_dist` are given. + an unbounded space if bounds from `x_dist` are given. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: The number of hidden features in the flow. Defaults to 50. From d375df9f526f8667fc69604bd6c7423d4ae8ce24 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova <41705732+anastasiakrouglova@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:43:41 +0100 Subject: [PATCH 37/51] Update sbi/utils/sbiutils.py Co-authored-by: Jan --- sbi/utils/sbiutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 519e38766..646bdb8c6 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -150,7 +150,7 @@ def z_score_parser(z_score_flag: Optional[str]) -> Tuple[bool, bool]: else: # Return warning due to invalid option, defaults to not z-scoring. raise ValueError( - "Invalid z-scoring option. Use 'none', 'independent'" + "Invalid z-scoring option. Use 'none', 'independent' " "'structured' or 'transform_to_unconstrained'." ) From bd0c055e633655b99df725b7c502918b9c91101e Mon Sep 17 00:00:00 2001 From: Nastya Krouglova <41705732+anastasiakrouglova@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:43:54 +0100 Subject: [PATCH 38/51] Update sbi/utils/sbiutils.py Co-authored-by: Jan --- sbi/utils/sbiutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 646bdb8c6..fcba529aa 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -111,7 +111,7 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f return clamped_val -def z_score_parser(z_score_flag: Optional[str]) -> Tuple[bool, bool]: +def z_score_parser(z_score_flag: Optional[Literal["none", "independent", "structured", "transform_to_unconstrained"]]) -> Tuple[bool, bool]: """Parses string z-score flag into booleans. Converts string flag into booleans denoting whether to z-score or not, and whether From 3908896158c41faf2850b0d307316e1f6b27103e Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 29 Jul 2025 11:08:43 +0100 Subject: [PATCH 39/51] add literal import to sbi utils --- sbi/utils/sbiutils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index fcba529aa..2e6e5e2fa 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -10,6 +10,7 @@ Callable, Dict, List, + Literal, Optional, Sequence, Tuple, @@ -111,7 +112,11 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f return clamped_val -def z_score_parser(z_score_flag: Optional[Literal["none", "independent", "structured", "transform_to_unconstrained"]]) -> Tuple[bool, bool]: +def z_score_parser( + z_score_flag: Optional[ + Literal["none", "independent", "structured", "transform_to_unconstrained"] + ], +) -> Tuple[bool, bool]: """Parses string z-score flag into booleans. Converts string flag into booleans denoting whether to z-score or not, and whether From 33129d868cd3e8bcc1a29017fb6b65195b9d47e3 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 29 Jul 2025 11:14:32 +0100 Subject: [PATCH 40/51] adjust literals and add get_transform_to_unconstrained --- sbi/neural_nets/net_builders/flow.py | 127 ++++++++++++--------------- 1 file changed, 57 insertions(+), 70 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 4637d1c3a..56e097f20 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -2,7 +2,7 @@ # under the Apache License Version 2.0, see from functools import partial -from typing import List, Optional, Sequence, Union +from typing import List, Literal, Optional, Sequence, Union import torch import zuko @@ -1007,8 +1007,10 @@ def build_zuko_flow( which_nf: str, batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal["none", "independent", + "structured", "transform_to_unconstrained"], + z_score_y: Literal["none", "independent", + "structured", "transform_to_unconstrained"], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -1085,83 +1087,37 @@ def build_zuko_flow( # Continuous normalizing flows (CNF) only have one transform, # so we need to handle them slightly differently. if which_nf == "CNF": - transform = flow_built.transform - - z_score_x_bool, structured_x = z_score_parser(z_score_x) - - # Only x (i.e., prior for NPE) can be transformed to unbound space (not y) - # when x_dist is provided. - if z_score_x == "transform_to_unconstrained": - if x_dist is None: - raise ValueError( - "Transformation to unconstrained space requires a distribution " - "provided through `x_dist`." - ) - elif not hasattr(x_dist, "support"): - raise ValueError( - "`x_dist` requires a `.support` attribute for" - "an unconstrained transformation." - ) - else: - transform_to_unconstrained = mcmc_transform(x_dist) - transform = ( - biject_transform_zuko(transform_to_unconstrained), - transform, - ) - - elif z_score_x_bool: - transform = ( - standardizing_transform_zuko(batch_x, structured_x), - transform, - ) - - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - # Prepend standardizing transform to y-embedding. - embedding_net = nn.Sequential( - standardizing_transform_zuko(batch_y, structured_y), embedding_net - ) - - # Combine transforms. - neural_net = zuko.flows.Flow(transform, flow_built.base) + # Transforms is 1 continuous transform for CNF + transforms = flow_built.transform else: transforms = flow_built.transform.transforms - z_score_x_bool, structured_x = z_score_parser(z_score_x) + z_score_x_bool, structured_x = z_score_parser(z_score_x) + + if z_score_x == "transform_to_unconstrained": + transforms = get_transform_to_unconstrained(x_dist, which_nf, transforms) - if z_score_x == "transform_to_unconstrained": - if x_dist is None: - raise ValueError( - "Transformation to unconstrained space requires a distribution " - "provided through `x_dist`." - ) - elif not hasattr(x_dist, "support"): - raise ValueError( - "`x_dist` requires a `.support` attribute for" - "an unconstrained transformation." - ) - else: - transform_to_unconstrained = mcmc_transform(x_dist) - transforms = ( - biject_transform_zuko(transform_to_unconstrained), - *transforms, - ) - - elif z_score_x_bool: + elif z_score_x_bool: + if which_nf == "CNF": + transforms = ( + standardizing_transform_zuko(batch_x, structured_x), + transforms, + ) + else: transforms = ( standardizing_transform_zuko(batch_x, structured_x), *transforms, ) - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - # Prepend standardizing transform to y-embedding. - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net - ) + z_score_y_bool, structured_y = z_score_parser(z_score_y) + if z_score_y_bool: + # Prepend standardizing transform to y-embedding. + embedding_net = nn.Sequential( + standardizing_net(batch_y, structured_y), embedding_net + ) - # Combine transforms. - neural_net = zuko.flows.Flow(transforms, flow_built.base) + # Combine transforms. + neural_net = zuko.flows.Flow(transforms, flow_built.base) flow = ZukoFlow( neural_net, @@ -1173,6 +1129,37 @@ def build_zuko_flow( return flow +def get_transform_to_unconstrained( + x_dist: Distribution, + which_nf: str, + transforms: zuko.flows.Transforms, +) -> zuko.flows.Transform: + if x_dist is None: + raise ValueError( + "Transformation to unconstrained space requires a distribution " + "provided through `x_dist`." + ) + elif not hasattr(x_dist, "support"): + raise ValueError( + "`x_dist` requires a `.support` attribute for" + "an unconstrained transformation." + ) + else: + transform_to_unconstrained = mcmc_transform(x_dist) + if which_nf == "CNF": + # Transforms is 1 continuous transform for CNF + transforms = ( + biject_transform_zuko(transform_to_unconstrained), + transforms, + ) + else: + transforms = ( + biject_transform_zuko(transform_to_unconstrained), + *transforms, + ) + return transforms + + def build_zuko_unconditional_flow( which_nf: str, batch_x: Tensor, From 580fd50514c859a57aef9b86bfda63bdbae97e08 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 29 Jul 2025 11:19:51 +0100 Subject: [PATCH 41/51] adjust literals and format --- sbi/neural_nets/net_builders/flow.py | 126 ++++++++++++++++++++------- 1 file changed, 93 insertions(+), 33 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 56e097f20..e490bd4b4 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -34,8 +34,12 @@ def build_made( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: int = 50, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), @@ -105,8 +109,12 @@ def build_made( def build_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -193,8 +201,12 @@ def build_maf( def build_maf_rqs( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -305,8 +317,12 @@ def build_maf_rqs( def build_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: int = 50, num_transforms: int = 5, num_bins: int = 10, @@ -427,8 +443,12 @@ def mask_in_layer(i): def build_zuko_nice( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -482,8 +502,12 @@ def build_zuko_nice( def build_zuko_maf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -534,8 +558,12 @@ def build_zuko_maf( def build_zuko_nsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -595,8 +623,12 @@ def build_zuko_nsf( def build_zuko_ncsf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -651,8 +683,12 @@ def build_zuko_ncsf( def build_zuko_sospf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -705,8 +741,12 @@ def build_zuko_sospf( def build_zuko_naf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -771,8 +811,12 @@ def build_zuko_naf( def build_zuko_unaf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -837,8 +881,12 @@ def build_zuko_unaf( def build_zuko_cnf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -891,8 +939,12 @@ def build_zuko_cnf( def build_zuko_gf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -948,8 +1000,12 @@ def build_zuko_gf( def build_zuko_bpf( batch_x: Tensor, batch_y: Tensor, - z_score_x: Optional[str] = "independent", - z_score_y: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -1007,10 +1063,12 @@ def build_zuko_flow( which_nf: str, batch_x: Tensor, batch_y: Tensor, - z_score_x: Literal["none", "independent", - "structured", "transform_to_unconstrained"], - z_score_y: Literal["none", "independent", - "structured", "transform_to_unconstrained"], + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -1163,7 +1221,9 @@ def get_transform_to_unconstrained( def build_zuko_unconditional_flow( which_nf: str, batch_x: Tensor, - z_score_x: Optional[str] = "independent", + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, **kwargs, From dfd9948a0f4b40bb135149744da6e1fb389559b1 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 29 Jul 2025 11:22:08 +0100 Subject: [PATCH 42/51] add new line for ruff --- docs/faq/question_08_unconstrained.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index 9931fa615..2740b0dcf 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -25,4 +25,4 @@ This ensures that your density estimator operates in a transformed space where i Important: - This transformation is currently only supported for zuko density estimators. -- For **NLE/NRE**, setting up this transformation is more complex as it requires estimating bounds for the simulated data rather than using prior bounds. \ No newline at end of file +- For **NLE/NRE**, setting up this transformation is more complex as it requires estimating bounds for the simulated data rather than using prior bounds. From d6dca317db3d7c6f26aa34354eefa3aaa708bbc1 Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 29 Jul 2025 11:26:54 +0100 Subject: [PATCH 43/51] stying ruff --- docs/faq/question_08_unconstrained.md | 31 +++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index 2740b0dcf..c246bf61d 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -1,28 +1,41 @@ # Using the logit transformation -If you've ruled out simulator issues, you can try training your density or ratio estimator in an unbounded space using a logit transformation: +If you've ruled out simulator issues, you can try +training your density or ratio estimator in an unbounded space +using a logit transformation: -- **For NPE**: The transformation maps bounded parameters θ to unbounded space before training, then applies the inverse (sigmoid) after training to ensure posterior samples stay within prior bounds. +- **For NPE**: The transformation maps bounded parameters θ +to unbounded space before training, then applies the inverse (sigmoid) +after training to ensure posterior samples stay within prior bounds. -- **For NLE/NRE**: The transformation would need to map bounded data x to unbounded space, which requires estimating data bounds from simulations (more complex). +- **For NLE/NRE**: The transformation would need to map bounded +data x to unbounded space, which requires estimating data bounds +from simulations (more complex). To enable this for NPE: ```python density_estimator_build_fun = posterior_nn( - model="zuko_nsf", - hidden_features=60, - num_transforms=3, + model="zuko_nsf", + hidden_features=60, + num_transforms=3, z_score_theta="transform_to_unconstrained" # Transforms parameters to unconstrained space x_dist=prior # For NPE, this specifies bounds for parameters (internally called 'x') ) inference = NPE(prior, density_estimator=density_estimator_build_fun) ``` -This ensures that your density estimator operates in a transformed space where it respects prior bounds, improving the efficiency of rejection sampling. +This ensures that your density estimator operates in a +transformed space where it respects prior bounds, +improving the efficiency of rejection sampling. ->Note: The `x_dist=prior` might seem confusing - internally, sbi uses generic `x,y` notation where for NPE, x represents parameters (θ) and y represents data. This is why we pass the prior as `x_dist`. +>Note: The `x_dist=prior` might seem confusing - internally, +sbi uses generic `x,y` notation where for NPE, x represents +parameters (θ) and y represents data. +This is why we pass the prior as `x_dist`. Important: - This transformation is currently only supported for zuko density estimators. -- For **NLE/NRE**, setting up this transformation is more complex as it requires estimating bounds for the simulated data rather than using prior bounds. +- For **NLE/NRE**, setting up this transformation is more +complex as it requires estimating bounds for the simulated data +rather than using prior bounds. From b8cd4da70cab578b5fed943f80181dc51ebf736c Mon Sep 17 00:00:00 2001 From: Nastya Krouglova Date: Tue, 29 Jul 2025 11:33:16 +0100 Subject: [PATCH 44/51] stying ruff --- docs/faq/question_08_unconstrained.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/faq/question_08_unconstrained.md b/docs/faq/question_08_unconstrained.md index c246bf61d..aea197ee3 100644 --- a/docs/faq/question_08_unconstrained.md +++ b/docs/faq/question_08_unconstrained.md @@ -28,14 +28,14 @@ This ensures that your density estimator operates in a transformed space where it respects prior bounds, improving the efficiency of rejection sampling. ->Note: The `x_dist=prior` might seem confusing - internally, -sbi uses generic `x,y` notation where for NPE, x represents -parameters (θ) and y represents data. +Note: The `x_dist=prior` might seem confusing - internally, +sbi uses generic `x,y` notation where for NPE, `x` represents +parameters (θ) and `y` represents data. This is why we pass the prior as `x_dist`. Important: - This transformation is currently only supported for zuko density estimators. -- For **NLE/NRE**, setting up this transformation is more +- For **NLE/NRE**, setting up this transformation is more complex as it requires estimating bounds for the simulated data rather than using prior bounds. From e528737cf985faaa968f1f2831d723400f2cd620 Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 31 Jul 2025 10:43:32 +0200 Subject: [PATCH 45/51] fix flow builder z-score defaults. --- sbi/neural_nets/net_builders/flow.py | 59 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index e490bd4b4..74fe86947 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -2,7 +2,7 @@ # under the Apache License Version 2.0, see from functools import partial -from typing import List, Literal, Optional, Sequence, Union +from typing import List, Literal, Optional, Sequence, Tuple, Union import torch import zuko @@ -14,6 +14,7 @@ ) from torch import Tensor, nn, relu, tanh, tensor, uint8 from torch.distributions import Distribution +from zuko.lazy import Flow, LazyDistribution from sbi.neural_nets.estimators import NFlowsFlow, ZukoFlow, ZukoUnconditionalFlow from sbi.utils.nn_utils import MADEMoGWrapper, get_numel @@ -36,10 +37,10 @@ def build_made( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: int = 50, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), @@ -111,10 +112,10 @@ def build_maf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -203,10 +204,10 @@ def build_maf_rqs( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -319,10 +320,10 @@ def build_nsf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: int = 50, num_transforms: int = 5, num_bins: int = 10, @@ -445,10 +446,10 @@ def build_zuko_nice( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -504,10 +505,10 @@ def build_zuko_maf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -560,10 +561,10 @@ def build_zuko_nsf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -625,10 +626,10 @@ def build_zuko_ncsf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -685,10 +686,10 @@ def build_zuko_sospf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -743,10 +744,10 @@ def build_zuko_naf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -813,10 +814,10 @@ def build_zuko_unaf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -883,10 +884,10 @@ def build_zuko_cnf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -941,10 +942,10 @@ def build_zuko_gf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), @@ -1002,10 +1003,10 @@ def build_zuko_bpf( batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 3, embedding_net: nn.Module = nn.Identity(), From 0665400532fac121cfe8dd8ba58faef8ed10137e Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 31 Jul 2025 10:44:00 +0200 Subject: [PATCH 46/51] refactor zuko flow build functions --- sbi/neural_nets/net_builders/flow.py | 196 +++++++++++++++++---------- sbi/utils/sbiutils.py | 2 +- 2 files changed, 126 insertions(+), 72 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 74fe86947..b0c030908 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1061,15 +1061,26 @@ def build_zuko_bpf( def build_zuko_flow( - which_nf: str, + which_nf: Literal[ + "BPF", + "CNF", + "GF", + "MAF", + "NCSF", + "NAF", + "NICE", + "NSF", + "SOSPF", + "UNAF", + ], batch_x: Tensor, batch_y: Tensor, z_score_x: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", z_score_y: Literal[ "none", "independent", "structured", "transform_to_unconstrained" - ], + ] = "independent", hidden_features: Union[Sequence[int], int] = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), @@ -1117,106 +1128,149 @@ def build_zuko_flow( Returns: ZukoFlow: The constructed Zuko normalizing flow model. """ - check_data_device(batch_x, batch_y) x_numel = get_numel(batch_x, embedding_net=None) y_numel = get_numel(batch_y, embedding_net=embedding_net) - # keep only zuko kwargs + # Keep only zuko kwargs kwargs = {k: v for k, v in kwargs.items() if k not in nflow_specific_kwargs} if isinstance(hidden_features, int): hidden_features = [hidden_features] * num_transforms + # Get base transforms from specified flow + base, base_transforms = _get_base_and_transforms( + which_nf, x_numel, y_numel, hidden_features, num_transforms, **kwargs + ) + + # Prepare x transforms to prepend + x_transforms = _prepare_x_transforms(z_score_x, batch_x, x_dist) + + # Combine all transforms + transforms = x_transforms + base_transforms + + # Maybe add y-z-scoring via embedding network + embedding_net = _prepare_y_embedding(z_score_y, batch_y, embedding_net) + + # Create final neural network + neural_net = zuko.flows.Flow(transforms, base) + + flow = ZukoFlow( + neural_net, + embedding_net, + input_shape=batch_x[0].shape, + condition_shape=batch_y[0].shape, + ) + + return flow + + +def _get_base_and_transforms( + which_nf: str, + x_numel: int, + y_numel: int, + hidden_features: Sequence[int], + num_transforms: int, + **kwargs, +) -> Tuple[LazyDistribution, tuple]: + """ + Build the base zuko flow and extract its transforms. + + Args: + which_nf: The type of normalizing flow to build. + x_numel: Number of elements in x. + y_numel: Number of elements in y. + hidden_features: Hidden features as a sequence. + num_transforms: Number of transforms. + **kwargs: Additional arguments for flow constructor. + + Returns: + base_transforms: Tuple of transforms from the built flow. + """ build_nf = getattr(zuko.flows, which_nf) if which_nf == "CNF": - flow_built = build_nf( + flow: Flow = build_nf( features=x_numel, context=y_numel, hidden_features=hidden_features, **kwargs ) + # CNF has a single continuous transform + base_transforms = (flow.transform,) else: - flow_built = build_nf( + flow: Flow = build_nf( features=x_numel, context=y_numel, hidden_features=hidden_features, transforms=num_transforms, **kwargs, ) + # Regular flows have multiple discrete transforms + base_transforms = tuple(flow.transform.transforms) - # Continuous normalizing flows (CNF) only have one transform, - # so we need to handle them slightly differently. - if which_nf == "CNF": - # Transforms is 1 continuous transform for CNF - transforms = flow_built.transform - else: - transforms = flow_built.transform.transforms + return flow.base, base_transforms - z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x == "transform_to_unconstrained": - transforms = get_transform_to_unconstrained(x_dist, which_nf, transforms) +def _prepare_x_transforms( + z_score_x: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + batch_x: Tensor, + x_dist: Optional[Distribution], +) -> tuple: + """ + Prepare transforms to prepend for x processing. - elif z_score_x_bool: - if which_nf == "CNF": - transforms = ( - standardizing_transform_zuko(batch_x, structured_x), - transforms, + Args: + z_score_x: Type of x preprocessing. + batch_x: Batch of x data. + x_dist: Distribution for unconstrained transformation. + + Returns: + Tuple of transforms to prepend (empty tuple if no preprocessing). + """ + transforms = () + z_score_x_bool, structured_x = z_score_parser(z_score_x) + if z_score_x == "transform_to_unconstrained": + if x_dist is None: + raise ValueError( + "Transformation to unconstrained space requires a distribution " + "provided through `x_dist`." ) - else: - transforms = ( - standardizing_transform_zuko(batch_x, structured_x), - *transforms, + if not hasattr(x_dist, "support"): + raise ValueError( + "`x_dist` requires a `.support` attribute for" + "an unconstrained transformation." ) + transform_to_unconstrained = biject_transform_zuko(mcmc_transform(x_dist)) + transforms = (transform_to_unconstrained,) + elif z_score_x_bool: + z_score_transform = standardizing_transform_zuko(batch_x, structured_x) + transforms = (z_score_transform,) - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - # Prepend standardizing transform to y-embedding. - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net - ) - - # Combine transforms. - neural_net = zuko.flows.Flow(transforms, flow_built.base) + return transforms - flow = ZukoFlow( - neural_net, - embedding_net, - input_shape=batch_x[0].shape, - condition_shape=batch_y[0].shape, - ) - return flow +def _prepare_y_embedding( + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + batch_y: Tensor, + embedding_net: nn.Module, +) -> nn.Module: + """ + Prepend the embedding network for y, adding z-scoring if needed. + Args: + z_score_y: Type of y preprocessing. + batch_y: Batch of y data. + embedding_net: Original embedding network. -def get_transform_to_unconstrained( - x_dist: Distribution, - which_nf: str, - transforms: zuko.flows.Transforms, -) -> zuko.flows.Transform: - if x_dist is None: - raise ValueError( - "Transformation to unconstrained space requires a distribution " - "provided through `x_dist`." - ) - elif not hasattr(x_dist, "support"): - raise ValueError( - "`x_dist` requires a `.support` attribute for" - "an unconstrained transformation." - ) - else: - transform_to_unconstrained = mcmc_transform(x_dist) - if which_nf == "CNF": - # Transforms is 1 continuous transform for CNF - transforms = ( - biject_transform_zuko(transform_to_unconstrained), - transforms, - ) - else: - transforms = ( - biject_transform_zuko(transform_to_unconstrained), - *transforms, - ) - return transforms + Returns: + Modified embedding network. + """ + z_score_y_bool, structured_y = z_score_parser(z_score_y) + if z_score_y_bool: + return nn.Sequential(standardizing_net(batch_y, structured_y), embedding_net) + return embedding_net def build_zuko_unconditional_flow( diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index 2e6e5e2fa..d68199261 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -227,7 +227,7 @@ def __call__(self): def biject_transform_zuko( - transform, + transform: TorchTransform, ) -> zuko.flows.UnconditionalTransform: """ Wraps a pytorch transform in a Zuko unconditional transfrom on a bounded interval. From 4d4bfb8589228bb709667bb4e01d410a6add5fbc Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 31 Jul 2025 10:49:52 +0200 Subject: [PATCH 47/51] re-use y-embedding helper function. --- sbi/neural_nets/net_builders/flow.py | 73 +++++++++++----------------- 1 file changed, 28 insertions(+), 45 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index b0c030908..3f82064a9 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -79,11 +79,7 @@ def build_made( transform_zx = standardizing_transform(batch_x, structured_x) transform = transforms.CompositeTransform([transform_zx, transform]) - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net - ) + embedding_net = _prepare_y_embedding(z_score_y, batch_y, embedding_net) distribution = MADEMoGWrapper( features=x_numel, @@ -181,11 +177,7 @@ def build_maf( standardizing_transform(batch_x, structured_x) ] + transform_list - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net - ) + embedding_net = _prepare_y_embedding(z_score_y, batch_y, embedding_net) # Combine transforms transform = transforms.CompositeTransform(transform_list) @@ -297,11 +289,7 @@ def build_maf_rqs( standardizing_transform(batch_x, structured_x) ] + transform_list - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net - ) + embedding_net = _prepare_y_embedding(z_score_y, batch_y, embedding_net) # Combine transforms. transform = transforms.CompositeTransform(transform_list) @@ -422,12 +410,7 @@ def mask_in_layer(i): standardizing_transform(batch_x, structured_x) ] + transform_list - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - # Prepend standardizing transform to y-embedding. - embedding_net = nn.Sequential( - standardizing_net(batch_y, structured_y), embedding_net - ) + embedding_net = _prepare_y_embedding(z_score_y, batch_y, embedding_net) distribution = get_base_dist(x_numel, **kwargs) @@ -1249,30 +1232,6 @@ def _prepare_x_transforms( return transforms -def _prepare_y_embedding( - z_score_y: Literal[ - "none", "independent", "structured", "transform_to_unconstrained" - ], - batch_y: Tensor, - embedding_net: nn.Module, -) -> nn.Module: - """ - Prepend the embedding network for y, adding z-scoring if needed. - - Args: - z_score_y: Type of y preprocessing. - batch_y: Batch of y data. - embedding_net: Original embedding network. - - Returns: - Modified embedding network. - """ - z_score_y_bool, structured_y = z_score_parser(z_score_y) - if z_score_y_bool: - return nn.Sequential(standardizing_net(batch_y, structured_y), embedding_net) - return embedding_net - - def build_zuko_unconditional_flow( which_nf: str, batch_x: Tensor, @@ -1361,6 +1320,30 @@ def build_zuko_unconditional_flow( return flow +def _prepare_y_embedding( + z_score_y: Literal[ + "none", "independent", "structured", "transform_to_unconstrained" + ], + batch_y: Tensor, + embedding_net: nn.Module, +) -> nn.Module: + """ + Prepend the embedding network for y, adding z-scoring if needed. + + Args: + z_score_y: Type of y preprocessing. + batch_y: Batch of y data. + embedding_net: Original embedding network. + + Returns: + Modified embedding network. + """ + z_score_y_bool, structured_y = z_score_parser(z_score_y) + if z_score_y_bool: + return nn.Sequential(standardizing_net(batch_y, structured_y), embedding_net) + return embedding_net + + class ContextSplineMap(nn.Module): """ Neural network from `context` to the spline parameters. From 8946a099747b5b83f1b491b2f044cb0110db368d Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 31 Jul 2025 11:24:58 +0200 Subject: [PATCH 48/51] fix typing --- sbi/utils/sbiutils.py | 5 +- tests/linearGaussian_snle_test.py | 81 ++++++++++++------------------- tests/sbiutils_test.py | 4 -- 3 files changed, 32 insertions(+), 58 deletions(-) diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index d68199261..cc31e5126 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -10,7 +10,6 @@ Callable, Dict, List, - Literal, Optional, Sequence, Tuple, @@ -113,9 +112,7 @@ def clamp_and_warn(name: str, value: float, min_val: float, max_val: float) -> f def z_score_parser( - z_score_flag: Optional[ - Literal["none", "independent", "structured", "transform_to_unconstrained"] - ], + z_score_flag: Optional[str] = None, ) -> Tuple[bool, bool]: """Parses string z-score flag into booleans. diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index 36b53beb2..5a175c71e 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -520,7 +520,6 @@ def test_c2st_nle_unconstrained_space( """ num_samples = 500 num_simulations = 3000 - trials_to_test = [1] # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(num_dim) @@ -553,57 +552,39 @@ def simulator(theta): likelihood_estimator = inference.append_simulations(theta, x).train() # Test inference amortized over trials. - for num_trials in trials_to_test: - x_o = zeros((num_trials, num_dim)) - if prior_str == "gaussian": - gt_posterior = true_posterior_linear_gaussian_mvn_prior( - x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov - ) - target_samples = gt_posterior.sample((num_samples,)) - elif prior_str == "uniform": - target_samples = samples_true_posterior_linear_gaussian_uniform_prior( - x_o, - likelihood_shift, - likelihood_cov, - prior=prior, - num_samples=num_samples, - ) - else: - raise ValueError(f"Wrong prior_str: '{prior_str}'.") - - potential_fn, theta_transform = likelihood_estimator_based_potential( - prior=prior, likelihood_estimator=likelihood_estimator, x_o=x_o - ) - posterior = MCMCPosterior( - proposal=prior, - potential_fn=potential_fn, - theta_transform=theta_transform, - method="slice_np_vectorized", - **mcmc_params_accurate, + x_o = zeros((1, num_dim)) + if prior_str == "gaussian": + gt_posterior = true_posterior_linear_gaussian_mvn_prior( + x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov ) - - samples = posterior.sample(sample_shape=(num_samples,)) - - # Check performance based on c2st accuracy. - check_c2st( - samples, - target_samples, - alg=f"nle_a-{prior_str}-prior-{model_str}-{num_trials}-trials", + target_samples = gt_posterior.sample((num_samples,)) + elif prior_str == "uniform": + target_samples = samples_true_posterior_linear_gaussian_uniform_prior( + x_o, + likelihood_shift, + likelihood_cov, + prior=prior, + num_samples=num_samples, ) + else: + raise ValueError(f"Wrong prior_str: '{prior_str}'.") - map_ = posterior.map( - num_init_samples=1_000, - init_method="proposal", - show_progress_bars=False, - ) + potential_fn, theta_transform = likelihood_estimator_based_potential( + prior=prior, likelihood_estimator=likelihood_estimator, x_o=x_o + ) + posterior = MCMCPosterior( + proposal=prior, + potential_fn=potential_fn, + theta_transform=theta_transform, + method="slice_np_vectorized", + **mcmc_params_accurate, + ) - if prior_str == "uniform": - # Check whether the returned probability outside of the support is zero. - posterior_prob = get_prob_outside_uniform_prior(posterior, prior, num_dim) - assert posterior_prob == 0.0, ( - "The posterior probability outside of the prior support is not zero" - ) + samples = posterior.sample(sample_shape=(num_samples,)) - assert ((map_ - ones(num_dim)) ** 2).sum() < 0.5 - else: - assert ((map_ - gt_posterior.mean) ** 2).sum() < 0.5 + # Check performance based on c2st accuracy. + check_c2st( + samples, + target_samples, + alg=f"nle_a-{prior_str}-prior-{model_str}", + ) diff --git a/tests/sbiutils_test.py b/tests/sbiutils_test.py index 945ab106c..6d83c06cc 100644 --- a/tests/sbiutils_test.py +++ b/tests/sbiutils_test.py @@ -452,8 +452,6 @@ def test_z_score_parser(z_x, z_theta): @pytest.mark.parametrize( "z_x", [ - True, - False, None, "none", "independent", @@ -464,8 +462,6 @@ def test_z_score_parser(z_x, z_theta): @pytest.mark.parametrize( "z_theta", [ - True, - False, None, "none", "independent", From 340ba705131758a6c56d7841e1796e56ba10e2cd Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 31 Jul 2025 14:14:53 +0200 Subject: [PATCH 49/51] small fixes. --- sbi/neural_nets/net_builders/flow.py | 66 +++++++--------------------- 1 file changed, 15 insertions(+), 51 deletions(-) diff --git a/sbi/neural_nets/net_builders/flow.py b/sbi/neural_nets/net_builders/flow.py index 3f82064a9..9014a266b 100644 --- a/sbi/neural_nets/net_builders/flow.py +++ b/sbi/neural_nets/net_builders/flow.py @@ -1044,18 +1044,7 @@ def build_zuko_bpf( def build_zuko_flow( - which_nf: Literal[ - "BPF", - "CNF", - "GF", - "MAF", - "NCSF", - "NAF", - "NICE", - "NSF", - "SOSPF", - "UNAF", - ], + which_nf: str, batch_x: Tensor, batch_y: Tensor, z_score_x: Literal[ @@ -1126,7 +1115,7 @@ def build_zuko_flow( which_nf, x_numel, y_numel, hidden_features, num_transforms, **kwargs ) - # Prepare x transforms to prepend + # Get x transforms (z-score or logit transform) x_transforms = _prepare_x_transforms(z_score_x, batch_x, x_dist) # Combine all transforms @@ -1168,7 +1157,7 @@ def _get_base_and_transforms( **kwargs: Additional arguments for flow constructor. Returns: - base_transforms: Tuple of transforms from the built flow. + tuple of flow base and its transforms. """ build_nf = getattr(zuko.flows, which_nf) @@ -1271,46 +1260,21 @@ def build_zuko_unconditional_flow( if isinstance(hidden_features, int): hidden_features = [hidden_features] * num_transforms - build_nf = getattr(zuko.flows, which_nf) + base, base_transforms = _get_base_and_transforms( + which_nf, x_numel, 0, hidden_features, num_transforms, **kwargs + ) - if which_nf == "CNF": - flow_built = build_nf( - features=x_numel, hidden_features=hidden_features, **kwargs - ) - else: - flow_built = build_nf( - features=x_numel, - hidden_features=hidden_features, - transforms=num_transforms, - **kwargs, + z_score_x_bool, structured_x = z_score_parser(z_score_x) + if z_score_x_bool: + # TODO: Check whether first base transform, then z-score is correct (it's the + # other way around in the conditional flows). + transforms = ( + *base_transforms, + standardizing_transform_zuko(batch_x, structured_x), ) - # Continuous normalizing flows (CNF) only have one transform, - # so we need to handle them slightly differently. - if which_nf == "CNF": - transform = flow_built.transform - - z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x_bool: - transform = ( - transform, - standardizing_transform_zuko(batch_x, structured_x), - ) - - # Combine transforms. - neural_net = zuko.flows.Flow(transform, flow_built.base) - else: - transforms = flow_built.transform.transforms - - z_score_x_bool, structured_x = z_score_parser(z_score_x) - if z_score_x_bool: - transforms = ( - *transforms, - standardizing_transform_zuko(batch_x, structured_x), - ) - - # Combine transforms. - neural_net = zuko.flows.Flow(transforms, flow_built.base) + # Combine transforms. + neural_net = zuko.flows.Flow(transforms, base) flow = ZukoUnconditionalFlow( neural_net, From c4aa2d14cd18cb924e6610d7b66da18800a4d16c Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 31 Jul 2025 14:42:13 +0200 Subject: [PATCH 50/51] fix unconstrained nle test --- tests/linearGaussian_snle_test.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index 5a175c71e..df0d1fc6a 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -505,7 +505,7 @@ def test_api_nle_sampling_methods( @pytest.mark.slow -@pytest.mark.parametrize("num_dim", (1, 2)) +@pytest.mark.parametrize("num_dim", (2,)) @pytest.mark.parametrize("prior_str", ("uniform", "gaussian")) @pytest.mark.parametrize("model_str", ("zuko_maf", "zuko_nsf")) def test_c2st_nle_unconstrained_space( @@ -536,19 +536,22 @@ def test_c2st_nle_unconstrained_space( def simulator(theta): return linear_gaussian(theta, likelihood_shift, likelihood_cov) + theta = prior.sample((num_simulations,)) + x = simulator(theta) + + # Estimate prior on x. + x_dist = BoxUniform(low=x.min(dim=0)[0], high=x.max(dim=0)[0]) + # Use likelihood_nn with z_score_theta="transform_to_unconstrained" density_estimator = likelihood_nn( model_str, hidden_features=60, num_transforms=3, - z_score_theta="transform_to_unconstrained", - x_dist=prior, + z_score_x="transform_to_unconstrained", + x_dist=x_dist, ) inference = NLE(density_estimator=density_estimator) - theta = prior.sample((num_simulations,)) - x = simulator(theta) - likelihood_estimator = inference.append_simulations(theta, x).train() # Test inference amortized over trials. @@ -566,8 +569,6 @@ def simulator(theta): prior=prior, num_samples=num_samples, ) - else: - raise ValueError(f"Wrong prior_str: '{prior_str}'.") potential_fn, theta_transform = likelihood_estimator_based_potential( prior=prior, likelihood_estimator=likelihood_estimator, x_o=x_o From 1ee797d0cca6dedc4c55b9ada7a5a06b2b07dafd Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 31 Jul 2025 14:48:18 +0200 Subject: [PATCH 51/51] refactor z-score-parser test --- tests/sbiutils_test.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/tests/sbiutils_test.py b/tests/sbiutils_test.py index 6d83c06cc..73a56cffd 100644 --- a/tests/sbiutils_test.py +++ b/tests/sbiutils_test.py @@ -416,7 +416,13 @@ def test_kde(bandwidth, transform, sample_weights): "independent", "structured", "transform_to_unconstrained", - "invalid_value", + pytest.param( + "invalid_value", + marks=pytest.mark.xfail( + raises=ValueError, + reason="Invalid z-scoring option should raise ValueError.", + ), + ), ], ) @pytest.mark.parametrize( @@ -429,24 +435,30 @@ def test_kde(bandwidth, transform, sample_weights): "independent", "structured", "transform_to_unconstrained", - "invalid_value", + pytest.param( + "invalid_value", + marks=pytest.mark.xfail( + raises=ValueError, + reason="Invalid z-scoring option should raise ValueError.", + ), + ), ], ) def test_z_score_parser(z_x, z_theta): - if z_theta == "invalid_value": - with pytest.raises(ValueError, match="Invalid z-scoring option."): - z_score_parser(z_theta) - - if z_x == "invalid_value": - with pytest.raises(ValueError, match="Invalid z-scoring option."): + """Test the z_score_parser function.""" + if z_x is bool or z_theta is bool: + with pytest.warns( + UserWarning, + match="Boolean values for z-scoring are deprecated and will", + ): z_score_parser(z_x) + z_score_parser(z_theta) - if z_x != "invalid_value" and z_theta != "invalid_value": - result_x = z_score_parser(z_x) - result_theta = z_score_parser(z_theta) + result_x = z_score_parser(z_x) + result_theta = z_score_parser(z_theta) - assert result_x is not None, f"z_score_parser({z_x}) returned None" - assert result_theta is not None, f"z_score_parser({z_theta}) returned None" + assert result_x is not None, f"z_score_parser({z_x}) returned None" + assert result_theta is not None, f"z_score_parser({z_theta}) returned None" @pytest.mark.parametrize(