Skip to content

Commit 4b2aa57

Browse files
committed
fix CI problem
Signed-off-by: Yuxiao-Xu <664988918@qq.com>
1 parent c007295 commit 4b2aa57

File tree

4 files changed

+35
-18
lines changed

4 files changed

+35
-18
lines changed

vllm_ascend/ops/expert_load_balancer.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1-
import torch
21
import json
32
import random
3+
from typing import List, Dict
4+
5+
import torch
46

57
class ExpertLoadBalancer(object):
68
def __init__(self, expert_map_path, global_expert_num):
79
self.expert_map_path = expert_map_path
810
self.global_expert_num = global_expert_num
9-
self.expert_map_tensor, self.layers_num, self.ranks_num = self.expert_file_to_tensor()
11+
self.expert_map_tensor, self.layers_num, self.ranks_num = \
12+
self.expert_file_to_tensor()
1013

1114
def expert_file_to_tensor(self):
1215
with open(self.expert_map_path, "r") as f:
@@ -39,25 +42,29 @@ def generate_index_dicts(tensor_2d):
3942
return dict_list
4043

4144
def generate_expert_placement_map(self):
42-
expert_placement_map = torch.full((self.layers_num, self.ranks_num, self.global_expert_num),
43-
-1, dtype=torch.int32)
45+
expert_placement_map = torch.full(
46+
(self.layers_num, self.ranks_num, self.global_expert_num),
47+
-1, dtype=torch.int32)
4448
for layer_id in range(self.layers_num):
4549
for gpu_id in range(self.ranks_num):
4650
e_ids = self.expert_map_tensor[layer_id, gpu_id]
47-
expert_placement_map[layer_id, gpu_id, e_ids] = torch.arange(len(e_ids), dtype=torch.int32)
51+
expert_placement_map[layer_id, gpu_id, e_ids] = \
52+
torch.arange(len(e_ids), dtype=torch.int32)
4853
return expert_placement_map
4954

5055
def generate_log2phy_expert_map(self, layer_id):
5156
concatenated = torch.flatten(self.expert_map_tensor[layer_id])
52-
rank_expert_to_global = self.generate_index_dicts(self.expert_map_tensor[layer_id])
53-
result_dict = {}
57+
rank_expert_to_global = self.generate_index_dicts(
58+
self.expert_map_tensor[layer_id])
59+
result_dict: Dict[int, List[int]] = {}
5460
for idx, value in enumerate(concatenated):
5561
key = value.item()
5662
if key not in result_dict:
5763
result_dict[key] = []
5864
result_dict[key].append(idx)
5965

60-
log2phy_map = torch.full((self.ranks_num, self.global_expert_num), -1, dtype=torch.int32)
66+
log2phy_map = torch.full((self.ranks_num, self.global_expert_num),
67+
-1, dtype=torch.int32)
6168
for rank in range(self.ranks_num):
6269
for key in result_dict:
6370
indices_in_concat = result_dict[key]
@@ -71,7 +78,8 @@ def generate_log2phy_expert_map(self, layer_id):
7178
def get_rank_placement_map(self, layer_id, rank_id):
7279
expert_placement_map = self.generate_expert_placement_map()
7380
layer_expert_map = expert_placement_map[layer_id]
74-
rank_expert_map = layer_expert_map[rank_id].to(torch.npu.current_device())
81+
rank_expert_map = layer_expert_map[rank_id].to(
82+
torch.npu.current_device())
7583
rank_local_expert_num = torch.sum(torch.ne(rank_expert_map, -1)).item()
7684
return rank_local_expert_num, rank_expert_map
7785

@@ -80,7 +88,8 @@ def get_rank_log2phy_map(self, layer_id, rank_id):
8088
return layer_log2phy_map[rank_id]
8189

8290
def get_global_redundant_expert_num(self):
83-
global_redundant_expert_num = len(self.expert_map_tensor[0][0]) * self.ranks_num - self.global_expert_num
91+
global_redundant_expert_num = len(self.expert_map_tensor[0][0]) \
92+
* self.ranks_num - self.global_expert_num
8493
return global_redundant_expert_num
8594

8695

vllm_ascend/ops/fused_moe.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,15 +1029,20 @@ def __init__(
10291029
vllm_config = get_current_vllm_config()
10301030
expert_map_path = None
10311031
if vllm_config.additional_config:
1032-
expert_map_path = vllm_config.additional_config.get("expert_map_path", None)
1032+
expert_map_path = vllm_config.additional_config.get(
1033+
"expert_map_path", None)
10331034
if expert_map_path and os.path.exists(expert_map_path):
10341035
# moe expert load balance
1035-
expert_load_balancer = ExpertLoadBalancer(expert_map_path, self.global_num_experts)
1036-
self.local_num_experts, self.expert_map = expert_load_balancer.get_rank_placement_map(
1037-
self.moe_instance_id, get_ep_group().rank_in_group)
1036+
expert_load_balancer = ExpertLoadBalancer(expert_map_path,
1037+
self.global_num_experts)
1038+
self.local_num_experts, self.expert_map = \
1039+
expert_load_balancer.get_rank_placement_map(
1040+
self.moe_instance_id,
1041+
get_ep_group().rank_in_group)
10381042
self.log2phy = expert_load_balancer.get_rank_log2phy_map(
10391043
self.moe_instance_id, get_ep_group().rank_in_group)
1040-
self.global_redundant_expert_num = expert_load_balancer.get_global_redundant_expert_num()
1044+
self.global_redundant_expert_num = \
1045+
expert_load_balancer.get_global_redundant_expert_num()
10411046
else:
10421047
# Create a tensor of size num_experts filled with -1
10431048
self.local_num_experts, self.expert_map = determine_expert_map(

vllm_ascend/quantization/quant_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,8 @@ def apply(
331331
layer, x, router_logits, top_k, renormalize, use_grouped_topk,
332332
global_num_experts, expert_map, topk_group, num_expert_group,
333333
custom_routing_function, scoring_func, e_score_correction_bias,
334-
is_prefill, enable_force_load_balance, log2phy, global_redundant_expert_num, **kwargs)
334+
is_prefill, enable_force_load_balance, log2phy,
335+
global_redundant_expert_num, **kwargs)
335336

336337
def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
337338
if hasattr(self.quant_method, "process_weights_after_loading"):

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
VLLM_ENABLE_MC2: bool = envs_ascend.VLLM_ENABLE_MC2
3333

3434

35-
def apply_mlp(hidden_states: torch.Tensor,
35+
def apply_mlp(
36+
hidden_states: torch.Tensor,
3637
w1: torch.Tensor,
3738
w1_scale: torch.Tensor,
3839
w2: torch.Tensor,
@@ -720,7 +721,8 @@ def apply(
720721
# according to tp_size before they are feed into fused_moe module.
721722
# Therefore, all2all is needed no matter how dp/tp is set so as to
722723
# dispatch/combine tokens.
723-
return fused_experts_with_all2all(hidden_states=x,
724+
return fused_experts_with_all2all(
725+
hidden_states=x,
724726
w1=layer.w13_weight,
725727
w1_scale=layer.w13_weight_scale,
726728
w2=layer.w2_weight,

0 commit comments

Comments
 (0)