Update (base update)

Vincent Moens · Vincent Moens · commit 38776a13a10b · 2024-09-13T12:08:30.000+01:00
[ghstack-poisoned]
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -7565,6 +7565,7 @@ def _create_mock_actor(
                     "action1": (action_key, "action1"),
                 },
                 log_prob_key=sample_log_prob_key,
+                aggregate_probabilities=True,
             )
             module_out_keys = [
                 ("params", "action1", "loc"),
@@ -7634,6 +7635,7 @@ def _create_mock_actor_value(
                     "action1": ("action", "action1"),
                 },
                 log_prob_key=sample_log_prob_key,
+                aggregate_probabilities=True,
             )
             module_out_keys = [
                 ("params", "action1", "loc"),
@@ -7690,6 +7692,7 @@ def _create_mock_actor_value_shared(
                     "action1": ("action", "action1"),
                 },
                 log_prob_key=sample_log_prob_key,
+                aggregate_probabilities=True,
             )
             module_out_keys = [
                 ("params", "action1", "loc"),
@@ -8627,6 +8630,7 @@ def _create_mock_actor(
                     "action1": (action_key, "action1"),
                 },
                 log_prob_key=sample_log_prob_key,
+                aggregate_probabilities=True,
             )
             module_out_keys = [
                 ("params", "action1", "loc"),
@@ -8727,6 +8731,7 @@ def _create_mock_common_layer_setup(
                     "action1": ("action", "action1"),
                 },
                 log_prob_key=sample_log_prob_key,
+                aggregate_probabilities=True,
             )
             module_out_keys = [
                 ("params", "action1", "loc"),
@@ -15277,7 +15282,7 @@ def _forward_value_estimator_keys(self, **kwargs) -> None:
         class MyLoss3(MyLoss2):
             @dataclass
             class _AcceptedKeys:
-                some_key = "some_value"
+                some_key: str = "some_value"
 
         loss_module = MyLoss3()
         assert loss_module.tensor_keys.some_key == "some_value"
@@ -15639,6 +15644,67 @@ def __init__(self):
                 assert p.device == dest
 
 
+def test_exploration_compile():
+    m = ProbabilisticTensorDictModule(
+        in_keys=["loc", "scale"],
+        out_keys=["sample"],
+        distribution_class=torch.distributions.Normal,
+    )
+
+    # class set_exploration_type_random(set_exploration_type):
+    #     __init__ = object.__init__
+    #     type = ExplorationType.RANDOM
+    it = exploration_type()
+
+    @torch.compile(fullgraph=True)
+    def func(t):
+        with set_exploration_type(ExplorationType.RANDOM):
+            t0 = m(t.clone())
+            t1 = m(t.clone())
+        return t0, t1
+
+    t = TensorDict(loc=torch.randn(3), scale=torch.rand(3))
+    t0, t1 = func(t)
+    assert (t0["sample"] != t1["sample"]).any()
+    assert it == exploration_type()
+
+    @torch.compile(fullgraph=True)
+    def func(t):
+        with set_exploration_type(ExplorationType.MEAN):
+            t0 = m(t.clone())
+            t1 = m(t.clone())
+        return t0, t1
+
+    t = TensorDict(loc=torch.randn(3), scale=torch.rand(3))
+    t0, t1 = func(t)
+    assert (t0["sample"] == t1["sample"]).all()
+    assert it == exploration_type()
+
+    @torch.compile(fullgraph=True)
+    @set_exploration_type(ExplorationType.RANDOM)
+    def func(t):
+        t0 = m(t.clone())
+        t1 = m(t.clone())
+        return t0, t1
+
+    t = TensorDict(loc=torch.randn(3), scale=torch.rand(3))
+    t0, t1 = func(t)
+    assert (t0["sample"] != t1["sample"]).any()
+    assert it == exploration_type()
+
+    @torch.compile(fullgraph=True)
+    @set_exploration_type(ExplorationType.MEAN)
+    def func(t):
+        t0 = m(t.clone())
+        t1 = m(t.clone())
+        return t0, t1
+
+    t = TensorDict(loc=torch.randn(3), scale=torch.rand(3))
+    t0, t1 = func(t)
+    assert (t0["sample"] == t1["sample"]).all()
+    assert it == exploration_type()
+
+
 def test_loss_exploration():
     class DummyLoss(LossModule):
         def forward(self, td, mode):
diff --git a/torchrl/__init__.py b/torchrl/__init__.py
@@ -3,13 +3,15 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import os
+import weakref
 from warnings import warn
 
 import torch
 
 from tensordict import set_lazy_legacy
 
 from torch import multiprocessing as mp
+from torch.distributions.transforms import _InverseTransform, ComposeTransform
 
 set_lazy_legacy(False).set()
 
@@ -51,3 +53,42 @@
 filter_warnings_subprocess = True
 
 _THREAD_POOL_INIT = torch.get_num_threads()
+
+# monkey-patch dist transforms until https://github.com/pytorch/pytorch/pull/135001/ finds a home
+@property
+def inv(self):
+    """
+    Returns the inverse :class:`Transform` of this transform.
+    This should satisfy ``t.inv.inv is t``.
+    """
+    inv = None
+    if self._inv is not None:
+        inv = self._inv()
+    if inv is None:
+        inv = _InverseTransform(self)
+        if not torch.compiler.is_dynamo_compiling():
+            self._inv = weakref.ref(inv)
+    return inv
+
+
+torch.distributions.transforms.Transform.inv = inv
+
+
+@property
+def inv(self):
+    inv = None
+    if self._inv is not None:
+        inv = self._inv()
+    if inv is None:
+        inv = ComposeTransform([p.inv for p in reversed(self.parts)])
+        if not torch.compiler.is_dynamo_compiling():
+            self._inv = weakref.ref(inv)
+            inv._inv = weakref.ref(self)
+        else:
+            # We need inv.inv to be equal to self, but weakref can cause a graph break
+            inv._inv = lambda out=self: out
+
+    return inv
+
+
+ComposeTransform.inv = inv
diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
@@ -427,8 +427,13 @@ def _log_probs(
         if isinstance(action, torch.Tensor):
             log_prob = dist.log_prob(action)
         else:
-            tensordict = dist.log_prob(tensordict)
-            log_prob = tensordict.get(self.tensor_keys.sample_log_prob)
+            maybe_log_prob = dist.log_prob(tensordict)
+            if not isinstance(maybe_log_prob, torch.Tensor):
+                # In some cases (Composite distribution with aggregate_probabilities toggled off) the returned type may not
+                # be a tensor
+                log_prob = maybe_log_prob.get(self.tensor_keys.sample_log_prob)
+            else:
+                log_prob = maybe_log_prob
         log_prob = log_prob.unsqueeze(-1)
         return log_prob, dist
 
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
@@ -46,7 +46,6 @@ def _forward_wrapper(func):
     @functools.wraps(func)
     def new_forward(self, *args, **kwargs):
         with set_exploration_type(self.deterministic_sampling_mode):
-            # with nullcontext():
             return func(self, *args, **kwargs)
 
     return new_forward
@@ -55,7 +54,7 @@ def new_forward(self, *args, **kwargs):
 class _LossMeta(abc.ABCMeta):
     def __init__(cls, name, bases, attr_dict):
         super().__init__(name, bases, attr_dict)
-        # cls.forward = _forward_wrapper(cls.forward)
+        cls.forward = _forward_wrapper(cls.forward)
 
 
 class LossModule(TensorDictModuleBase, metaclass=_LossMeta):
@@ -229,7 +228,9 @@ def set_keys(self, **kwargs) -> None:
         """
         for key, value in kwargs.items():
             if key not in self._AcceptedKeys.__dataclass_fields__:
-                raise ValueError(f"{key} is not an accepted tensordict key")
+                raise ValueError(
+                    f"{key} is not an accepted tensordict key. Accepted keys are: {self._AcceptedKeys.__dataclass_fields__}."
+                )
             if value is not None:
                 setattr(self.tensor_keys, key, value)
             else:
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
@@ -495,8 +495,13 @@ def _log_weight(
         if isinstance(action, torch.Tensor):
             log_prob = dist.log_prob(action)
         else:
-            tensordict = dist.log_prob(tensordict)
-            log_prob = tensordict.get(self.tensor_keys.sample_log_prob)
+            maybe_log_prob = dist.log_prob(tensordict)
+            if not isinstance(maybe_log_prob, torch.Tensor):
+                # In some cases (Composite distribution with aggregate_probabilities toggled off) the returned type may not
+                # be a tensor
+                log_prob = maybe_log_prob.get(self.tensor_keys.sample_log_prob)
+            else:
+                log_prob = maybe_log_prob
 
         log_weight = (log_prob - prev_log_prob).unsqueeze(-1)
         kl_approx = (prev_log_prob - log_prob).unsqueeze(-1)
@@ -1144,7 +1149,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDict:
             x = previous_dist.sample((self.samples_mc_kl,))
             previous_log_prob = previous_dist.log_prob(x)
             current_log_prob = current_dist.log_prob(x)
-            if is_tensor_collection(x):
+            if is_tensor_collection(current_log_prob):
                 previous_log_prob = previous_log_prob.get(
                     self.tensor_keys.sample_log_prob
                 )
diff --git a/torchrl/objectives/redq.py b/torchrl/objectives/redq.py
@@ -453,6 +453,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict_select = tensordict.select(
             "next", *obs_keys, self.tensor_keys.action, strict=False
         )
+        # We need to copy bc select does not copy sub-tds
+        tensordict_select = tensordict_select.copy()
 
         selected_models_idx = torch.randperm(self.num_qvalue_nets)[
             : self.sub_sample_len

Original file line number	Diff line number	Diff line change
`@@ -453,6 +453,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:`
`453`	`453`	`tensordict_select = tensordict.select(`
`454`	`454`	`"next", *obs_keys, self.tensor_keys.action, strict=False`
`455`	`455`	`)`
	`456`	`+ # We need to copy bc select does not copy sub-tds`
	`457`	`+ tensordict_select = tensordict_select.copy()`
`456`	`458`
`457`	`459`	`selected_models_idx = torch.randperm(self.num_qvalue_nets)[`
`458`	`460`	`: self.sub_sample_len`