Skip to content

Commit 76a134f

Browse files
author
offline0806
committed
[EPLB]Fix ci.
Signed-off-by: offline0806 <z00858301@china.huawei.com>
1 parent 2037241 commit 76a134f

File tree

6 files changed

+16
-21
lines changed

6 files changed

+16
-21
lines changed

vllm_ascend/eplb/core/eplb_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ def determine_default_expert_map(global_expert_num, world_size, rank_id,
3939
end = global_expert_num
4040
local_count = global_expert_num - rank_id * local_num_experts
4141

42-
if isinstance(global_redundant_expert_num, int) and rank_id < global_redundant_expert_num:
42+
if isinstance(global_redundant_expert_num,
43+
int) and rank_id < global_redundant_expert_num:
4344
local_count += 1
4445
if end < global_expert_num:
4546
end += 1

vllm_ascend/ops/common_fused_moe.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,7 @@ def __init__(self, *args, **kwargs):
246246
self.global_redundant_expert_num = ascend_config.init_redundancy_expert
247247
# static eplb initializing with expert_map_path
248248
if self.expert_map_path and os.path.exists(
249-
self.expert_map_path) and os.access(
250-
self.expert_map_path, os.R_OK):
249+
self.expert_map_path) and os.access(self.expert_map_path, os.R_OK):
251250
self.expert_load_balancer = ExpertLoadBalancer(
252251
self.expert_map_path, self.global_num_experts)
253252
self.local_num_experts, self.expert_map = (
@@ -256,8 +255,7 @@ def __init__(self, *args, **kwargs):
256255
self.log2phy = self.expert_load_balancer.get_rank_log2phy_map(
257256
self.moe_instance_id, self.ep_rank).npu()
258257
self.global_redundant_expert_num = (
259-
self.expert_load_balancer.get_global_redundant_expert_num(
260-
))
258+
self.expert_load_balancer.get_global_redundant_expert_num())
261259
else:
262260
# init moe.
263261
self.local_num_experts, self.expert_map = determine_expert_map(

vllm_ascend/ops/fused_moe.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,7 @@ def __init__(
244244
self.global_num_experts = num_experts + self.global_redundant_expert_num
245245
# static eplb initializing with expert_map_path
246246
if self.expert_map_path and os.path.exists(
247-
self.expert_map_path) and os.access(
248-
self.expert_map_path, os.R_OK):
247+
self.expert_map_path) and os.access(self.expert_map_path, os.R_OK):
249248
self.expert_load_balancer = ExpertLoadBalancer(
250249
self.expert_map_path, self.global_num_experts)
251250
self.local_num_experts, self.expert_map = (
@@ -254,8 +253,7 @@ def __init__(
254253
self.log2phy = self.expert_load_balancer.get_rank_log2phy_map(
255254
self.moe_instance_id, self.ep_rank).npu()
256255
self.global_redundant_expert_num = (
257-
self.expert_load_balancer.get_global_redundant_expert_num(
258-
))
256+
self.expert_load_balancer.get_global_redundant_expert_num())
259257
else:
260258
# init moe.
261259
self.local_num_experts, self.expert_map = determine_expert_map(
@@ -269,8 +267,8 @@ def __init__(
269267
self.log2phy = determine_default_log2phy_map(
270268
self.global_num_experts, self.ep_size, self.ep_rank,
271269
self.global_redundant_expert_num)
272-
local_num_experts = (torch.sum(self.expert_map != -1) if
273-
self.expert_map is not None else num_experts)
270+
local_num_experts = (torch.sum(self.expert_map != -1)
271+
if self.expert_map is not None else num_experts)
274272
if self.dynamic_eplb:
275273
self.moe_load = torch.zeros(local_num_experts, dtype=torch.int64)
276274

vllm_ascend/quantization/w4a8_dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
from vllm.config import get_current_vllm_config
2424
from vllm.distributed import get_ep_group
2525
from vllm.forward_context import get_forward_context
26-
from vllm_ascend.ascend_config import get_ascend_config
2726

27+
from vllm_ascend.ascend_config import get_ascend_config
2828
from vllm_ascend.distributed.parallel_state import get_mc2_group
2929
from vllm_ascend.ops.moe.experts_selector import select_experts
3030

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,7 @@ def apply(
230230
w1_scale=layer.w13_weight_scale,
231231
w2_scale=layer.w2_weight_scale,
232232
expert_map=expert_map,
233-
dynamic_eplb=self.dynamic_eplb
234-
)
233+
dynamic_eplb=self.dynamic_eplb)
235234

236235
# this is a naive implementation for experts load balance so as
237236
# to avoid accumulating too much tokens on a single rank.

vllm_ascend/torchair/ops/torchair_fused_moe.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,8 +1019,7 @@ def __init__(
10191019
self.global_num_experts = num_experts + self.global_redundant_expert_num
10201020
# static eplb initializing with expert_map_path
10211021
if self.expert_map_path and os.path.exists(
1022-
self.expert_map_path) and os.access(
1023-
self.expert_map_path, os.R_OK):
1022+
self.expert_map_path) and os.access(self.expert_map_path, os.R_OK):
10241023
self.expert_load_balancer = ExpertLoadBalancer(
10251024
self.expert_map_path, self.global_num_experts)
10261025
self.local_num_experts, self.expert_map = (
@@ -1029,8 +1028,7 @@ def __init__(
10291028
self.log2phy = self.expert_load_balancer.get_rank_log2phy_map(
10301029
self.moe_instance_id, self.ep_rank).npu()
10311030
self.global_redundant_expert_num = (
1032-
self.expert_load_balancer.get_global_redundant_expert_num(
1033-
))
1031+
self.expert_load_balancer.get_global_redundant_expert_num())
10341032
else:
10351033
# init moe.
10361034
self.local_num_experts, self.expert_map = determine_expert_map(
@@ -1044,9 +1042,10 @@ def __init__(
10441042
self.log2phy = determine_default_log2phy_map(
10451043
self.global_num_experts, self.ep_size, self.ep_rank,
10461044
self.global_redundant_expert_num)
1047-
local_num_experts = (torch.sum(self.expert_map != -1) if
1048-
self.expert_map is not None else num_experts)
1049-
self.moe_load = torch.zeros(local_num_experts, dtype=torch.int64)
1045+
local_num_experts = (torch.sum(self.expert_map != -1)
1046+
if self.expert_map is not None else num_experts)
1047+
if self.dynamic_eplb:
1048+
self.moe_load = torch.zeros(local_num_experts, dtype=torch.int64)
10501049

10511050
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
10521051
self.enable_multistream_moe = \

0 commit comments

Comments
 (0)