rebase + fix some tests

bnellnm · bnellnm · commit c4086d706727 · 2025-05-14T14:56:26.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/tests/kernels/moe/test_batched_moe.py b/tests/kernels/moe/test_batched_moe.py
@@ -31,7 +31,7 @@ def make_tensors(config: BatchedMMConfig):
         A = torch.randn(
             (config.num_experts, config.max_tokens_per_expert, config.K),
             device="cuda",
-            dtype=config.dtype)
+            dtype=config.dtype) / 10
         B = torch.randn((config.num_experts, config.N, config.K),
                         device="cuda",
                         dtype=config.dtype)
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -155,7 +155,7 @@ def flatten_tp_across_dp(dp_rank: int):
                   and vllm_parallel_config.enable_expert_parallel)
 
         dp_size = dp_size_
-        dp_rank = get_dp_group().rank_in_group
+        dp_rank = get_dp_group().rank_in_group if dp_size > 1 else 0
         tp_size, tp_rank = flatten_tp_across_dp(dp_rank)
 
         if not use_ep:
@@ -299,6 +299,7 @@ def get_or_create(self, **kwargs):
                 # TODO (varun): Add support to switch to intranode
                 # when all communications are within the same
                 # node.
+                logger.debug("Create AllToAll %s", kwargs)
                 instance = pplx.AllToAll.internode(**kwargs)
                 self._cache[key] = instance
             return instance