Fix Init Param Test

kausv · facebook-github-bot · commit a872a5ecc46f · 2025-08-20T10:18:22.000-07:00
Summary: This test was failing due to error on CUDA release https://www.internalfb.com/intern/test/281475075501969?ref_report_id=0 ```invalid device pointer:``` MultiProcessTest assigns the devices on its own. So I removed the specific device from EmbeddingConfig of the tests. Then distributed.gather() failed on tensor at pos 0 was expected to be device CUDA but found CPU. So I matched gathered_tensor device to the PG. Which caused assertclose to fail since the devices did not match any more, so I copied it to CPU. Reviewed By: iamzainhuda Differential Revision: D80547120
diff --git a/torchrec/distributed/tests/test_init_parameters.py b/torchrec/distributed/tests/test_init_parameters.py
@@ -36,6 +36,7 @@
     ShardingPlan,
     ShardingType,
 )
+from torchrec.distributed.utils import none_throws
 from torchrec.modules.embedding_configs import (
     DataType,
     EmbeddingBagConfig,
@@ -100,15 +101,17 @@ def initialize_and_test_parameters(
                 )
         elif isinstance(model.state_dict()[key], ShardedTensor):
             if ctx.rank == 0:
-                gathered_tensor = torch.empty_like(embedding_tables.state_dict()[key])
+                gathered_tensor = torch.empty_like(
+                    embedding_tables.state_dict()[key], device=ctx.device
+                )
             else:
                 gathered_tensor = None
 
             model.state_dict()[key].gather(dst=0, out=gathered_tensor)
 
             if ctx.rank == 0:
                 torch.testing.assert_close(
-                    gathered_tensor,
+                    none_throws(gathered_tensor).to("cpu"),
                     embedding_tables.state_dict()[key],
                 )
         elif isinstance(model.state_dict()[key], torch.Tensor):
@@ -160,7 +163,6 @@ def test_initialize_parameters_ec(self, sharding_type: str) -> None:
 
         # Initialize embedding table on non-meta device, in this case cuda:0
         embedding_tables = EmbeddingCollection(
-            device=torch.device("cuda:0"),
             tables=[
                 EmbeddingConfig(
                     name=table_name,
@@ -210,7 +212,6 @@ def test_initialize_parameters_ebc(self, sharding_type: str) -> None:
 
         # Initialize embedding bag on non-meta device, in this case cuda:0
         embedding_tables = EmbeddingBagCollection(
-            device=torch.device("cuda:0"),
             tables=[
                 EmbeddingBagConfig(
                     name=table_name,