pytorch
diff --git a/‎botorch_community/acquisition/discretized.py
Lines changed: 80 additions & 19 deletions b/‎botorch_community/acquisition/discretized.py
Lines changed: 80 additions & 19 deletions
diff --git a/‎botorch_community/acquisition/input_constructors.py
Lines changed: 52 additions & 3 deletions b/‎botorch_community/acquisition/input_constructors.py
Lines changed: 52 additions & 3 deletions
diff --git a/‎botorch_community/models/prior_fitted_network.py
Lines changed: 98 additions & 10 deletions b/‎botorch_community/models/prior_fitted_network.py
Lines changed: 98 additions & 10 deletions
@@ -11,8 +11,13 @@
 from abc import ABC, abstractmethod
 
 import torch
-
 from botorch.acquisition import AcquisitionFunction
+from botorch.acquisition.objective import (
+    PosteriorTransform,
+    ScalarizedPosteriorTransform,
+)
+
+from botorch.exceptions.errors import UnsupportedError
 from botorch.models.model import Model
 from botorch.utils.transforms import (
     average_over_ensemble_models,
@@ -34,17 +39,33 @@ class DiscretizedAcquistionFunction(AcquisitionFunction, ABC):
     be implemented by subclasses to define the specific acquisition functions.
     """
 
-    def __init__(self, model: Model) -> None:
+    def __init__(self, model: Model, posterior_transform: PosteriorTransform) -> None:
         r"""
         Initialize the DiscretizedAcquistionFunction
 
         Args:
             model: A fitted model that is used to compute the posterior
                 distribution over the outcomes of interest.
                 The model should be a `PFNModel`.
+            posterior_transform: A ScalarizedPosteriorTransform that can only
+                indicate minimization or maximization of the objective.
         """
-
         super().__init__(model=model)
+        self.maximize = True
+        if posterior_transform is not None:
+            unsupported_error_message = (
+                "Only scalarized posterior transforms with a"
+                "single objective and 0.0 offset are supported."
+            )
+            if (
+                not isinstance(posterior_transform, ScalarizedPosteriorTransform)
+                or (posterior_transform.offset != 0.0)
+                or len(posterior_transform.weights) != 1
+                or posterior_transform.weights[0] not in [-1.0, 1.0]
+            ):
+                raise UnsupportedError(unsupported_error_message)
+
+            self.maximize = posterior_transform.weights[0] == 1.0
 
     @t_batch_mode_transform(expected_q=1)
     @average_over_ensemble_models
@@ -59,9 +80,13 @@ def forward(self, X: Tensor) -> Tensor:
             A `(b)`-dim Tensor of the acquisition function at the given
             design points `X`.
         """
-        self.to(device=X.device)
-
         discrete_posterior = self.model.posterior(X)
+        if not self.maximize:
+            discrete_posterior.borders = -torch.flip(discrete_posterior.borders, [0])
+            discrete_posterior.probabilities = torch.flip(
+                discrete_posterior.probabilities, [-1]
+            )
+
         result = discrete_posterior.integrate(self.ag_integrate)
         # remove q dimension
         return result.squeeze(-1)
@@ -87,18 +112,19 @@ def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
         """
         pass  # pragma: no cover
 
-    r"""DiscretizedExpectedImprovement is an acquisition function that computes
-    the expected improvement over the current best observed value for a Riemann
-    distribution."""
-
 
 class DiscretizedExpectedImprovement(DiscretizedAcquistionFunction):
     r"""DiscretizedExpectedImprovement is an acquisition function that
     computes the expected improvement over the current best observed value
     for a Riemann distribution.
     """
 
-    def __init__(self, model: Model, best_f: Tensor) -> None:
+    def __init__(
+        self,
+        model: Model,
+        best_f: Tensor,
+        posterior_transform: PosteriorTransform | None = None,
+    ) -> None:
         r"""
         Initialize the DiscretizedExpectedImprovement
 
@@ -108,7 +134,7 @@ def __init__(self, model: Model, best_f: Tensor) -> None:
                 The model should be a `PFNModel`.
             best_f: A tensor representing the current best observed value.
         """
-        super().__init__(model)
+        super().__init__(model=model, posterior_transform=posterior_transform)
         self.register_buffer("best_f", torch.as_tensor(best_f))
 
     def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
@@ -127,11 +153,38 @@ def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
             A `(b)`-dim Tensor of acquisition function derivatives at the given
             design points `X`.
         """
-        max_lower_bound_and_f = torch.max(self.best_f, lower_bound)
-        bucket_average = (upper_bound + max_lower_bound_and_f) / 2
-        improvement = bucket_average - self.best_f
+        best_f = self.best_f.to(lower_bound)
+
+        # Case 1: best_f >= upper_bound, entire interval gives 0 improvement
+        case1_mask = best_f >= upper_bound
+
+        # Case 2: best_f <= lower_bound, entire interval gives improvement
+        case2_mask = best_f <= lower_bound
+
+        # Case 3: lower_bound < best_f < upper_bound, partial improvement
+        case3_mask = ~(case1_mask | case2_mask)
+
+        # Initialize result tensor
+        result = torch.zeros_like(lower_bound)
+
+        # Case 1: result is already 0
+
+        # Case 2: integral = (
+        #    ((upper_bound + lower_bound)/2 - best_f)
+        #     * (upper_bound - lower_bound)
+        # )
+        if case2_mask.any():
+            bucket_width = upper_bound - lower_bound
+            bucket_center = (upper_bound + lower_bound) / 2
+            result = torch.where(
+                case2_mask, (bucket_center - best_f) * bucket_width, result
+            )
+
+        # Case 3: integral = (upper_bound - best_f)²/2
+        if case3_mask.any():
+            result = torch.where(case3_mask, (upper_bound - best_f).pow(2) / 2, result)
 
-        return improvement.clamp_min(0)
+        return result.clamp_min(0)
 
 
 class DiscretizedProbabilityOfImprovement(DiscretizedAcquistionFunction):
@@ -140,7 +193,12 @@ class DiscretizedProbabilityOfImprovement(DiscretizedAcquistionFunction):
     for a Riemann distribution.
     """
 
-    def __init__(self, model: Model, best_f: Tensor) -> None:
+    def __init__(
+        self,
+        model: Model,
+        best_f: Tensor,
+        posterior_transform: PosteriorTransform | None = None,
+    ) -> None:
         r"""
         Initialize the DiscretizedProbabilityOfImprovement
 
@@ -151,7 +209,7 @@ def __init__(self, model: Model, best_f: Tensor) -> None:
             best_f: A tensor representing the current best observed value.
         """
 
-        super().__init__(model)
+        super().__init__(model, posterior_transform)
         self.register_buffer("best_f", torch.as_tensor(best_f))
 
     def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
@@ -174,5 +232,8 @@ def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
             A `(b)`-dim Tensor of acquisition function derivatives at the given
             design points `X`.
         """
-        proportion = (upper_bound - self.best_f) / (upper_bound - lower_bound)
-        return proportion.clamp(0, 1)
+        best_f = self.best_f.to(lower_bound)
+        # two separate clamps needed below, as one is a tensor and one a scalar
+        return (
+            (upper_bound - best_f).clamp(min=0.0).clamp(max=upper_bound - lower_bound)
+        )
@@ -13,22 +13,71 @@
 
 from __future__ import annotations
 
-from typing import List, Optional, Tuple
+from typing import Any, Hashable, List, Optional, Tuple
 
 import torch
-from botorch.acquisition.input_constructors import acqf_input_constructor
-from botorch.acquisition.objective import ScalarizedPosteriorTransform
+
+from botorch.acquisition.input_constructors import (
+    acqf_input_constructor,
+    get_best_f_analytic,
+)
+from botorch.acquisition.objective import (
+    PosteriorTransform,
+    ScalarizedPosteriorTransform,
+)
 from botorch.acquisition.utils import get_optimal_samples
 from botorch.models.model import Model
+
+from botorch.utils.datasets import SupervisedDataset
 from botorch_community.acquisition.bayesian_active_learning import (
     qBayesianQueryByComittee,
     qBayesianVarianceReduction,
     qStatisticalDistanceActiveLearning,
 )
+
+from botorch_community.acquisition.discretized import (
+    DiscretizedExpectedImprovement,
+    DiscretizedProbabilityOfImprovement,
+)
 from botorch_community.acquisition.scorebo import qSelfCorrectingBayesianOptimization
 from torch import Tensor
 
 
+@acqf_input_constructor(
+    DiscretizedExpectedImprovement, DiscretizedProbabilityOfImprovement
+)
+def construct_inputs_best_f(
+    model: Model,
+    training_data: SupervisedDataset | dict[Hashable, SupervisedDataset],
+    posterior_transform: PosteriorTransform | None = None,
+    best_f: float | Tensor | None = None,
+) -> dict[str, Any]:
+    r"""Construct kwargs for the acquisition functions requiring `best_f`.
+
+    Args:
+        model: The model to be used in the acquisition function.
+        training_data: Dataset(s) used to train the model.
+            Used to determine default value for `best_f`.
+        best_f: Threshold above (or below) which improvement is defined.
+        posterior_transform: The posterior transform to be used in the
+            acquisition function.
+
+    Returns:
+        A dict mapping kwarg names of the constructor to values.
+    """
+    if best_f is None:
+        best_f = get_best_f_analytic(
+            training_data=training_data,
+            posterior_transform=posterior_transform,
+        )
+
+    return {
+        "model": model,
+        "posterior_transform": posterior_transform,
+        "best_f": best_f,
+    }
+
+
 @acqf_input_constructor(
     qBayesianQueryByComittee,
     qBayesianVarianceReduction,
 
@@ -31,21 +31,61 @@ def __init__(
         train_X: Tensor,
         train_Y: Tensor,
         model: nn.Module,
+        train_Yvar: Tensor | None = None,
+        batch_first: bool = False,
+        constant_model_kwargs: dict | None = None,
     ) -> None:
         """Initialize a PFNModel.
 
         Args:
-            train_X: A `batch_shape x n x d` tensor of training features.
-            train_Y: A `batch_shape x n x m` tensor of training observations.
+            train_X: A `n x d` tensor of training features.
+            train_Y: A `n x m` tensor of training observations.
             model: A pre-trained PFN model with the following
                 forward(train_X, train_Y, X) -> logit predictions of shape
                 `n x b x c` where c is the number of discrete buckets
                 borders: A `c+1`-dim tensor of bucket borders
+            train_Yvar: Not yet supported.
+            batch_first: Whether the batch dimension is the first dimension of
+                the input tensors. This is needed to support different PFN
+                models. For batch-first x has shape `batch x seq_len x features`
+                and for non-batch-first it has shape `seq_len x batch x features`.
+            constant_model_kwargs: A dictionary of model kwargs that
+                will be passed to the model in each forward pass.
         """
         super().__init__()
-        self.train_X = train_X
-        self.train_Y = train_Y
-        self.pfn = model.to(train_X)
+
+        if train_Yvar is not None:
+            raise UnsupportedError("train_Yvar is not supported for PFNModel.")
+
+        if not (1 <= train_Y.dim() <= 3):
+            raise UnsupportedError("train_Y must be 1- to 3-dimensional.")
+
+        if not (2 <= train_X.dim() <= 3):
+            raise UnsupportedError("train_X must be 2- to 3-dimensional.")
+
+        if train_Y.dim() == train_X.dim():
+            if train_Y.shape[-1] > 1:
+                raise UnsupportedError("Only 1 target allowed for PFNModel.")
+            train_Y = train_Y.squeeze(-1)
+
+        if (len(train_X.shape) != len(train_Y.shape) + 1) or (
+            train_Y.shape != train_X.shape[:-1]
+        ):
+            raise UnsupportedError(
+                "train_X and train_Y must have the same shape except "
+                "for the last dimension."
+            )
+
+        if len(train_X.shape) == 2:
+            # adding batch dimension
+            train_X = train_X.unsqueeze(0)
+            train_Y = train_Y.unsqueeze(0)
+
+        self.train_X = train_X  # shape: `b x n x d`
+        self.train_Y = train_Y  # shape: `b x n`
+        self.pfn = model
+        self.batch_first = batch_first
+        self.constant_model_kwargs = constant_model_kwargs
 
     def posterior(
         self,
@@ -61,7 +101,7 @@ def posterior(
             any `model.forward` or `model.likelihood` calls.
 
         Args:
-            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
+            X: A `b'? x b? x q x d`-dim Tensor, where `d` is the dimension of the
                 feature space, `q` is the number of points considered jointly,
                 and `b` is the batch dimension.
                 We only allow `q=1` for PFNModel, so q can also be omitted, i.e.
@@ -86,11 +126,59 @@ def posterior(
         if posterior_transform is not None:
             raise UnsupportedError("posterior_transform is not supported for PFNModel.")
 
-        if len(X.shape) > 2 and X.shape[-2] > 1:
-            raise NotImplementedError("q must be 1 for PFNModel.")  # add support later
+        if not (1 <= len(X.shape) <= 4):
+            raise UnsupportedError("X must be 1- to 4-dimensional.")
+
+        # X has shape b'? x b? x q? x d
+
+        orig_X_shape = X.shape
+        q_in_orig_X_shape = len(X.shape) > 2
+
+        if len(X.shape) == 1:
+            X = X.unsqueeze(0).unsqueeze(0).unsqueeze(0)  # shape `b'=1 x b=1 x q=1 x d`
+        elif len(X.shape) == 2:
+            X = X.unsqueeze(1).unsqueeze(1)  # shape `b' x b=1 x q=1 x d`
+        elif len(X.shape) == 3:
+            if self.train_X.shape[0] == 1:
+                X = X.unsqueeze(1)  # shape `b' x b=1 x q x d`
+            else:
+                X = X.unsqueeze(0)  # shape `b'=1 x b x q x d`
+
+        # X has shape `b' x b x q x d`
+
+        if X.shape[2] != 1:
+            raise UnsupportedError("Only q=1 is supported for PFNModel.")
+
+        # X has shape `b' x b x q=1 x d`
+
+        train_X = self.train_X  # shape `b x n x d`
+        train_Y = self.train_Y  # shape `b x n`
+        folded_X = X.transpose(0, 2).squeeze(0)  # shape `b x b' x d
+
+        constant_model_kwargs = self.constant_model_kwargs or {}
+
+        if self.batch_first:
+            logits = self.pfn(
+                train_X.float(),
+                train_X.float(),
+                folded_X.float(),
+                **constant_model_kwargs,
+            ).transpose(0, 1)
+        else:
+            logits = self.pfn(
+                train_X.float().transpose(0, 1),
+                train_Y.float().transpose(0, 1),
+                folded_X.float().transpose(0, 1),
+                **constant_model_kwargs,
+            )
+
+        # logits shape `b' x b x logits_dim`
 
-        # flatten batch dimensions for PFN input
-        logits = self.pfn(self.train_X, self.train_Y, X)
+        logits = logits.view(
+            *orig_X_shape[:-1], -1
+        )  # orig shape w/o q but logits_dim at end: `b'? x b? x q? x logits_dim`
+        if q_in_orig_X_shape:
+            logits = logits.squeeze(-2)  # shape `b'? x b? x logits_dim`
 
         probabilities = logits.softmax(dim=-1)