Merge remote-tracking branch 'upstream_gitee/main_eplb_0909'

offline0806 · offline0806 · commit 99bdc4bf9b53 · 2025-09-10T17:33:52.000+08:00
diff --git a/tests/ut/distributed/test_determin_expert_map_all.py b/tests/ut/distributed/test_determin_expert_map_all.py
@@ -0,0 +1,113 @@
+import torch
+import unittest
+from vllm_ascend.eplb.core.eplb_utils import determine_default_expert_map
+
+
+class TestDetermineDefaultExpertMap(unittest.TestCase):
+
+    def test_world_size_1(self):
+        global_expert_num = 8
+        world_size = 1
+        global_redundant_expert_num = 0
+
+        expected_counts = [8]
+        expected_maps = [[0, 1, 2, 3, 4, 5, 6, 7]]
+
+        local_count, expert_map = determine_default_expert_map(
+            global_expert_num, world_size, 0, global_redundant_expert_num)
+
+        self.assertEqual(local_count, expected_counts[0])
+
+        expected_tensor = torch.tensor(expected_maps[0], dtype=torch.int32)
+        self.assertTrue(torch.all(expert_map == expected_tensor).item())
+
+    def test_equal_distribution(self):
+        global_expert_num = 6
+        world_size = 3
+        global_redundant_expert_num = 0
+
+        expected_counts = [2, 2, 2]
+        expected_maps = [
+            [0, 1, -1, -1, -1, -1],  # rank 0
+            [-1, -1, 0, 1, -1, -1],  # rank 1
+            [-1, -1, -1, -1, 0, 1]  # rank 2
+        ]
+
+        for rank_id in range(world_size):
+            local_count, expert_map = determine_default_expert_map(
+                global_expert_num, world_size, rank_id,
+                global_redundant_expert_num)
+
+            self.assertEqual(
+                local_count,
+                expected_counts[rank_id],
+            )
+
+            expected_tensor = torch.tensor(expected_maps[rank_id],
+                                           dtype=torch.int32)
+            self.assertTrue(torch.all(expert_map == expected_tensor).item())
+
+    def test_unequal_distribution(self):
+        global_expert_num = 10
+        world_size = 3
+        global_redundant_expert_num = 0
+
+        expected_counts = [3, 3, 4]
+        expected_maps = [
+            [0, 1, 2, -1, -1, -1, -1, -1, -1, -1],  # rank 0
+            [-1, -1, -1, 0, 1, 2, -1, -1, -1, -1],  # rank 1
+            [-1, -1, -1, -1, -1, -1, 0, 1, 2, 3]  # rank 2
+        ]
+
+        for rank_id in range(world_size):
+            local_count, expert_map = determine_default_expert_map(
+                global_expert_num, world_size, rank_id,
+                global_redundant_expert_num)
+
+            self.assertEqual(local_count, expected_counts[rank_id])
+
+            expected_tensor = torch.tensor(expected_maps[rank_id],
+                                           dtype=torch.int32)
+            self.assertTrue(torch.all(expert_map == expected_tensor).item())
+
+    def test_with_redundancy(self):
+        global_expert_num = 7
+        world_size = 3
+        global_redundant_expert_num = 2
+
+        expected_counts = [3, 3, 3]
+        expected_maps = [
+            [0, 1, 2, -1, -1, -1, -1],  # rank 0
+            [-1, -1, 0, 1, 2, -1, -1],  # rank 1
+            [-1, -1, -1, -1, 0, 1, 2]  # rank 2
+        ]
+
+        for rank_id in range(world_size):
+            local_count, expert_map = determine_default_expert_map(
+                global_expert_num, world_size, rank_id,
+                global_redundant_expert_num)
+
+            self.assertEqual(local_count, expected_counts[rank_id])
+
+            expected_tensor = torch.tensor(expected_maps[rank_id],
+                                           dtype=torch.int32)
+            self.assertTrue(torch.all(expert_map == expected_tensor).item())
+
+    def test_redundancy_at_boundary(self):
+        global_expert_num = 5
+        world_size = 2
+        global_redundant_expert_num = 1
+
+        expected_counts = [3, 3]
+        expected_maps = [[0, 1, 2, -1, -1], [-1, -1, 0, 1, 2]]
+
+        for rank_id in range(world_size):
+            local_count, expert_map = determine_default_expert_map(
+                global_expert_num, world_size, rank_id,
+                global_redundant_expert_num)
+
+            self.assertEqual(local_count, expected_counts[rank_id])
+
+            expected_tensor = torch.tensor(expected_maps[rank_id],
+                                           dtype=torch.int32)
+            self.assertTrue(torch.all(expert_map == expected_tensor).item())
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
@@ -45,7 +45,13 @@ def __init__(self, vllm_config):
             ascend_scheduler_config)
 
         self.expert_map_path = additional_config.get("expert_map_path", None)
+        self.expert_map_record_path = additional_config.get(
+            "expert_map_record_path",
+            None)  # Provide path to export expert map
         # Eplb config
+        self.init_redundancy_expert = additional_config.get(
+            "init_redundancy_expert",
+            0)
         self.dynamic_eplb = additional_config.get("dynamic_eplb", False)
         self.num_iterations_eplb_update = additional_config.get("num_iterations_eplb_update", 400)
         self.gate_eplb = additional_config.get("gate_eplb", False)
diff --git a/vllm_ascend/eplb/adaptor/vllm_adaptor.py b/vllm_ascend/eplb/adaptor/vllm_adaptor.py
@@ -21,6 +21,7 @@
 import torch.distributed as dist
 from vllm.logger import logger
 
+from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.eplb.adaptor.abstract_adaptor import EplbAdaptor
 
 
@@ -39,6 +40,7 @@ def __init__(self, model, **args):
             self.num_dense_layers = self.model.config.first_k_dense_replace
             self.global_expert_num = self.model.config.n_routed_experts
         self.num_moe_layers = self.model.config.num_hidden_layers - self.num_dense_layers
+        self.init_redundancy_expert = get_ascend_config().init_redundancy_expert
 
         # TODO: init self.expert_weight_names depending on different model types, only deepseek v3 w8a8 and qwen3-moe is supported here
         if self.model.quant_config is not None:
@@ -158,6 +160,35 @@ def _expert_file_to_tensor(self, expert_map_path: str):
             return expert_map_tensor, layers_num, gpus_num
         logger.error(f"failed to read expert_map_path: {expert_map_path}")
 
+    def _export_tensor_to_file(self, expert_maps, expert_map_record_path: str):
+        num_local_experts = expert_maps.max() + 1
+        expert_maps_local = self.global2local(expert_maps, num_local_experts)
+
+        expert_maps_list = expert_maps_local.tolist()
+        record: dict[str, Any] = {
+            "moe_layer_count": len(expert_maps_list),
+            "layer_list": []
+        }
+
+        for layer_idx, layer_data in enumerate(expert_maps_list):
+            layer_record: dict[str, Any] = {
+                "layer_id": layer_idx,
+                "device_count": len(layer_data),
+                "device_list": []
+            }
+
+            for device_idx, experts in enumerate(layer_data):
+                device_record = {
+                    "device_id": device_idx,
+                    "device_expert": experts
+                }
+                layer_record["device_list"].append(device_record)
+
+            record["layer_list"].append(layer_record)
+
+        with open(expert_map_record_path, "w") as f:
+            json.dump(record, f, indent=4)
+
     def do_update_expert_map(self, layer_id, updated_expert_map):
         self.expert_map_per_layer[layer_id].copy_(updated_expert_map)
         self.expert_map_per_layer_cpu[layer_id].copy_(updated_expert_map)
@@ -173,6 +204,26 @@ def do_update_log2phy_map(self, layer_id, updated_log2phy_map):
         if self.log2phy_map_per_layer[layer_id] is not None:
             self.log2phy_map_per_layer[layer_id].copy_(updated_log2phy_map)
 
+    def global2local(self, placement: torch.Tensor,
+                     E_local: int) -> torch.Tensor:
+
+        L, G, _ = placement.shape
+        device = placement.device
+
+        pt_local = torch.full((L, G, E_local),
+                              fill_value=-1,
+                              dtype=torch.long,
+                              device=device)
+
+        valid = placement >= 0
+        l_idx, g_idx, k_idx = valid.nonzero(as_tuple=True)
+
+        slot_idx = placement[l_idx, g_idx, k_idx]
+
+        pt_local[l_idx, g_idx, slot_idx] = k_idx
+
+        return pt_local
+
     def local2global(self, placement_local: torch.Tensor) -> torch.Tensor:
 
         L, G, E_local = placement_local.shape
@@ -198,7 +249,10 @@ def local2global(self, placement_local: torch.Tensor) -> torch.Tensor:
         return placement_global
 
     def determine_expert_map_all(self):
-
+        if self.world_size == 1:
+            local_ids = torch.arange(self.global_expert_num, dtype=torch.int32)
+            return local_ids.view(1, 1, -1).expand(self.num_moe_layers, 1, -1)
+        
         local_num_experts = self.global_expert_num // self.world_size
 
         expert_map_all = torch.full(
@@ -215,6 +269,13 @@ def determine_expert_map_all(self):
                 start = r * local_num_experts
                 end = self.global_expert_num
                 local_count = self.global_expert_num - r * local_num_experts
+                
+            if r < self.init_redundancy_expert:
+                local_count += 1
+                if end < self.global_expert_num:
+                    end += 1
+                else:
+                    start -= 1
 
             local_ids = torch.arange(local_count, dtype=torch.int32)
             expert_map_all[:, r, start:end] = local_ids.unsqueeze(0).expand(
diff --git a/vllm_ascend/eplb/core/eplb_utils.py b/vllm_ascend/eplb/core/eplb_utils.py
@@ -20,6 +20,37 @@
 import torch
 
 
+def determine_default_expert_map(global_expert_num, world_size, rank_id,
+                                 global_redundant_expert_num):
+    if world_size == 1:
+        local_ids = torch.arange(global_expert_num, dtype=torch.int32)
+        return (global_expert_num, local_ids)
+
+    local_num_experts = global_expert_num // world_size
+
+    expert_map = torch.full((global_expert_num, ), -1, dtype=torch.int32)
+
+    if rank_id < world_size - 1:
+        start = rank_id * local_num_experts
+        end = (rank_id + 1) * local_num_experts
+        local_count = local_num_experts
+    else:
+        start = rank_id * local_num_experts
+        end = global_expert_num
+        local_count = global_expert_num - rank_id * local_num_experts
+
+    if rank_id < global_redundant_expert_num:
+        local_count += 1
+        if end < global_expert_num:
+            end += 1
+        else:
+            start -= 1
+
+    local_ids = torch.arange(local_count, dtype=torch.int32)
+    expert_map[start:end] = local_ids
+
+    return (local_count, expert_map)
+
 def generate_log2phy_map(expert_map):
     num_local_experts = expert_map.max() + 1
     log2phy_map = expert_map.clone()
@@ -50,7 +81,13 @@ def generate_log2phy_map(expert_map):
     return log2phy_map
 
 
-def determine_default_log2phy_map(global_expert_num, world_size, rank_id):
+def determine_default_log2phy_map(global_expert_num, world_size, rank_id, global_redundant_expert_num):
+    if world_size == 1:
+        local_ids = torch.arange(global_expert_num, dtype=torch.int32)
+        expert_map_all = local_ids.unsqueeze(0).expand(world_size, -1)
+        log2phy_map_all = generate_log2phy_map(expert_map_all)
+        return log2phy_map_all[rank_id]
+    
     local_num_experts = global_expert_num // world_size
 
     expert_map_all = torch.full((world_size, global_expert_num),
@@ -66,6 +103,13 @@ def determine_default_log2phy_map(global_expert_num, world_size, rank_id):
             start = r * local_num_experts
             end = global_expert_num
             local_count = global_expert_num - r * local_num_experts
+            
+        if r < global_redundant_expert_num:
+            local_count += 1
+            if end < global_expert_num:
+                end += 1
+            else:
+                start -= 1
 
         local_ids = torch.arange(local_count, dtype=torch.int32)
         expert_map_all[r, start:end] = local_ids
diff --git a/vllm_ascend/eplb/eplb_updator.py b/vllm_ascend/eplb/eplb_updator.py
@@ -39,6 +39,7 @@ def init_eplb(self, expert_map_path, process):
         self.periodic_load_gather = True
         self.num_iterations_eplb_update: torch.int64 = self.ascend_config.num_iterations_eplb_update
         self.expert_map_path = expert_map_path
+        self.expert_map_record_path = self.ascend_config.expert_map_record_path
 
         try:
             if not envs.VLLM_ALLOW_EXPERT_LOAD_COLLECTING:
@@ -82,6 +83,11 @@ def update_iteration(self):
         self.cur_iterations += 1
         if self.cur_iterations == (self.num_iterations_eplb_update + \
                                    self.num_wait_worker_iterations + self.num_moe_layers):
+            if self.expert_map_record_path is not None:
+                self.adaptor._export_tensor_to_file(
+                    self.shared_dict["expert_maps"],
+                    self.expert_map_record_path)
+                
             self.adaptor.model.clear_all_moe_loads()
             if not self.gate_eplb:
                 self.cur_iterations = 0
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -311,11 +311,16 @@ def __init__(
                 self.ep_size,
                 get_ep_group().rank_in_group, self.global_num_experts)
             if self.dynamic_eplb:
-                from vllm_ascend.eplb.core.eplb_utils import \
-                    determine_default_log2phy_map
+                self.global_redundant_expert_num = ascend_config.init_redundancy_expert
+                from vllm_ascend.eplb.core.eplb_utils import (
+                    determine_default_expert_map,
+                    determine_default_log2phy_map)
+                self.local_num_experts, self.expert_map = determine_default_expert_map(
+                    self.global_num_experts, self.ep_size, self.ep_rank,
+                    self.global_redundant_expert_num)
                 self.log2phy = determine_default_log2phy_map(
-                    self.global_num_experts, self.ep_size, self.ep_rank
-                )
+                    self.global_num_experts, self.ep_size, self.ep_rank,
+                    self.global_redundant_expert_num)
 
         self.enable_shared_expert_dp = ascend_config.enable_shared_expert_dp