[BugFix] Fix partial device transfers in collector

Vincent Moens · Vincent Moens · commit afb81de51013 · 2025-01-20T13:23:09.000Z
ghstack-source-id: 2cd74c2 Pull Request resolved: #2703
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -2233,7 +2233,9 @@ def __init__(self):
             self.out_keys = ["action"]
 
         def forward(self, td):
-            td["action"] = (self.param + self.buf).expand(td.shape)
+            td["action"] = (self.param + self.buf.to(self.param.device)).expand(
+                td.shape
+            )
             return td
 
     @pytest.mark.parametrize(
@@ -2288,6 +2290,64 @@ def test_param_sync(self, give_weights, collector, policy_device, env_device):
             col.shutdown()
             del col
 
+    @pytest.mark.parametrize(
+        "collector",
+        [
+            functools.partial(MultiSyncDataCollector, cat_results="stack"),
+            MultiaSyncDataCollector,
+        ],
+    )
+    @pytest.mark.parametrize("give_weights", [True, False])
+    @pytest.mark.parametrize(
+        "policy_device,env_device",
+        [
+            ["cpu", get_default_devices()[0]],
+            [get_default_devices()[0], "cpu"],
+            # ["cpu", "cuda:0"],  # 1226: faster execution
+            # ["cuda:0", "cpu"],
+            # ["cuda", "cuda:0"],
+            # ["cuda:0", "cuda"],
+        ],
+    )
+    def test_param_sync_mixed_device(
+        self, give_weights, collector, policy_device, env_device
+    ):
+        with torch.device("cpu"):
+            policy = TestUpdateParams.Policy()
+        policy.param = nn.Parameter(policy.param.data.to(policy_device))
+        assert policy.buf.device == torch.device("cpu")
+
+        env = EnvCreator(lambda: TestUpdateParams.DummyEnv(device=env_device))
+        device = env().device
+        env = [env]
+        col = collector(
+            env, policy, device=device, total_frames=200, frames_per_batch=10
+        )
+        try:
+            for i, data in enumerate(col):
+                if i == 0:
+                    assert (data["action"] == 0).all()
+                    # update policy
+                    policy.param.data += 1
+                    policy.buf.data += 2
+                    assert policy.buf.device == torch.device("cpu")
+                    if give_weights:
+                        p_w = TensorDict.from_module(policy)
+                    else:
+                        p_w = None
+                    col.update_policy_weights_(p_w)
+                elif i == 20:
+                    if (data["action"] == 1).all():
+                        raise RuntimeError("Failed to update buffer")
+                    elif (data["action"] == 2).all():
+                        raise RuntimeError("Failed to update params")
+                    elif (data["action"] == 0).all():
+                        raise RuntimeError("Failed to update params and buffers")
+                    assert (data["action"] == 3).all()
+        finally:
+            col.shutdown()
+            del col
+
 
 class TestAggregateReset:
     def test_aggregate_reset_to_root(self):
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -232,7 +232,8 @@ def map_weight(
 
         # Create a stateless policy, then populate this copy with params on device
         get_original_weights = functools.partial(TensorDict.from_module, policy)
-        with param_and_buf.to("meta").to_module(policy):
+        # We need to use ".data" otherwise buffers may disappear from the `get_original_weights` function
+        with param_and_buf.data.to("meta").to_module(policy):
             policy = deepcopy(policy)
 
         param_and_buf.apply(