|
27 | 27 | FusedMoEParallelConfig # isort: skip
|
28 | 28 | from vllm.model_executor.layers.fused_moe.layer import (
|
29 | 29 | FusedMoE, UnquantizedFusedMoEMethod, determine_expert_map)
|
30 |
| - |
| 30 | +from vllm_ascend.eplb.core.eplb_utils import ( |
| 31 | + determine_default_expert_map, |
| 32 | + determine_default_log2phy_map) |
31 | 33 | from vllm_ascend.ascend_config import get_ascend_config
|
32 | 34 | from vllm_ascend.distributed.parallel_state import get_mc2_group
|
33 | 35 | from vllm_ascend.ops.expert_load_balancer import ExpertLoadBalancer
|
34 | 36 | from vllm_ascend.ops.moe.experts_selector import select_experts
|
35 | 37 | from vllm_ascend.ops.moe.moe_comm_method import (AllGatherCommImpl,
|
36 | 38 | AlltoAllCommImpl, MC2CommImpl)
|
37 | 39 | from vllm_ascend.ops.moe.token_dispatcher import setup_token_dispatchers
|
38 |
| -from vllm_ascend.utils import ACL_FORMAT_FRACTAL_NZ, is_310p |
39 | 40 | from vllm_ascend.utils import ACL_FORMAT_FRACTAL_NZ, is_310p, vllm_version_is
|
40 |
| -from vllm.logger import logger |
41 | 41 |
|
42 | 42 | original_unquantized_fused_moe_init_func = UnquantizedFusedMoEMethod.__init__
|
43 | 43 |
|
@@ -298,31 +298,26 @@ def __init__(
|
298 | 298 | self.moe_config.mc2_group = get_mc2_group()
|
299 | 299 | ascend_config = get_ascend_config()
|
300 | 300 | self.dynamic_eplb = ascend_config.dynamic_eplb
|
301 |
| - self.expert_map_path = ascend_config.expert_map_path |
302 |
| - self.global_redundant_expert_num = ascend_config.init_redundancy_expert |
303 |
| - self.global_num_experts = num_experts + self.global_redundant_expert_num |
304 |
| - if self.expert_map_path and os.path.exists(self.expert_map_path) and os.access(self.expert_map_path, os.R_OK): |
305 |
| - self.expert_load_balancer = ExpertLoadBalancer(self.expert_map_path, self.global_num_experts) |
306 |
| - self.local_num_experts, self.expert_map = (self.expert_load_balancer.get_rank_placement_map(self.moe_instance_id, self.ep_rank)) |
307 |
| - self.log2phy = self.expert_load_balancer.get_rank_log2phy_map(self.moe_instance_id, self.ep_rank).npu() |
308 |
| - self.global_redundant_expert_num = (self.expert_load_balancer.get_global_redundant_expert_num()) |
309 |
| - else: |
310 |
| - self.local_num_experts, self.expert_map = determine_expert_map(self.ep_size, self.ep_rank, self.global_num_experts) |
311 |
| - if self.dynamic_eplb: |
312 |
| - self.global_redundant_expert_num = ascend_config.init_redundancy_expert |
313 |
| - from vllm_ascend.eplb.core.eplb_utils import ( |
314 |
| - determine_default_expert_map, |
315 |
| - determine_default_log2phy_map) |
316 |
| - self.local_num_experts, self.expert_map = determine_default_expert_map( |
317 |
| - self.global_num_experts, self.ep_size, self.ep_rank, |
318 |
| - self.global_redundant_expert_num) |
319 |
| - self.log2phy = determine_default_log2phy_map( |
320 |
| - self.global_num_experts, self.ep_size, self.ep_rank, |
321 |
| - self.global_redundant_expert_num) |
322 |
| - |
323 |
| - self.moe_load = None |
324 |
| - local_num_experts = (torch.sum(self.expert_map != -1) if self.expert_map is not None else num_experts) |
325 | 301 | if self.dynamic_eplb:
|
| 302 | + self.expert_map_path = ascend_config.expert_map_path |
| 303 | + self.global_redundant_expert_num = ascend_config.init_redundancy_expert |
| 304 | + self.global_num_experts = num_experts + self.global_redundant_expert_num |
| 305 | + if self.expert_map_path and os.path.exists(self.expert_map_path) and os.access(self.expert_map_path, os.R_OK): |
| 306 | + self.expert_load_balancer = ExpertLoadBalancer(self.expert_map_path, self.global_num_experts) |
| 307 | + self.local_num_experts, self.expert_map = (self.expert_load_balancer.get_rank_placement_map(self.moe_instance_id, self.ep_rank)) |
| 308 | + self.log2phy = self.expert_load_balancer.get_rank_log2phy_map(self.moe_instance_id, self.ep_rank).npu() |
| 309 | + self.global_redundant_expert_num = (self.expert_load_balancer.get_global_redundant_expert_num()) |
| 310 | + else: |
| 311 | + self.local_num_experts, self.expert_map = determine_expert_map(self.ep_size, self.ep_rank, self.global_num_experts) |
| 312 | + if self.dynamic_eplb: |
| 313 | + self.global_redundant_expert_num = ascend_config.init_redundancy_expert |
| 314 | + self.local_num_experts, self.expert_map = determine_default_expert_map( |
| 315 | + self.global_num_experts, self.ep_size, self.ep_rank, |
| 316 | + self.global_redundant_expert_num) |
| 317 | + self.log2phy = determine_default_log2phy_map( |
| 318 | + self.global_num_experts, self.ep_size, self.ep_rank, |
| 319 | + self.global_redundant_expert_num) |
| 320 | + local_num_experts = (torch.sum(self.expert_map != -1) if self.expert_map is not None else num_experts) |
326 | 321 | self.moe_load = torch.zeros(local_num_experts, dtype=torch.int64)
|
327 | 322 |
|
328 | 323 |
|
|
0 commit comments