Skip to content

Commit 91525d6

Browse files
committed
fix CI issues
Signed-off-by: Yuxiao-Xu <664988918@qq.com>
1 parent 402a90b commit 91525d6

File tree

3 files changed

+55
-47
lines changed

3 files changed

+55
-47
lines changed

vllm_ascend/ops/expert_load_balancer.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44

55
import torch
66

7+
78
class ExpertLoadBalancer(object):
89
def __init__(self, expert_map_path, global_expert_num):
910
self.expert_map_path = expert_map_path
1011
self.global_expert_num = global_expert_num
11-
self.expert_map_tensor, self.layers_num, self.ranks_num = \
12-
self.expert_file_to_tensor()
12+
self.expert_map_tensor, self.layers_num, self.ranks_num = (
13+
self.expert_file_to_tensor()
14+
)
1315

1416
def expert_file_to_tensor(self):
1517
with open(self.expert_map_path, "r") as f:
@@ -43,28 +45,33 @@ def generate_index_dicts(tensor_2d):
4345

4446
def generate_expert_placement_map(self):
4547
expert_placement_map = torch.full(
46-
(self.layers_num, self.ranks_num, self.global_expert_num),
47-
-1, dtype=torch.int32)
48+
(self.layers_num, self.ranks_num, self.global_expert_num),
49+
-1,
50+
dtype=torch.int32,
51+
)
4852
for layer_id in range(self.layers_num):
4953
for gpu_id in range(self.ranks_num):
5054
e_ids = self.expert_map_tensor[layer_id, gpu_id]
51-
expert_placement_map[layer_id, gpu_id, e_ids] = \
52-
torch.arange(len(e_ids), dtype=torch.int32)
55+
expert_placement_map[layer_id, gpu_id, e_ids] = torch.arange(
56+
len(e_ids), dtype=torch.int32
57+
)
5358
return expert_placement_map
5459

5560
def generate_log2phy_expert_map(self, layer_id):
5661
concatenated = torch.flatten(self.expert_map_tensor[layer_id])
5762
rank_expert_to_global = self.generate_index_dicts(
58-
self.expert_map_tensor[layer_id])
63+
self.expert_map_tensor[layer_id]
64+
)
5965
result_dict: Dict[int, List[int]] = {}
6066
for idx, value in enumerate(concatenated):
6167
key = value.item()
6268
if key not in result_dict:
6369
result_dict[key] = []
6470
result_dict[key].append(idx)
6571

66-
log2phy_map = torch.full((self.ranks_num, self.global_expert_num),
67-
-1, dtype=torch.int32)
72+
log2phy_map = torch.full(
73+
(self.ranks_num, self.global_expert_num), -1, dtype=torch.int32
74+
)
6875
for rank in range(self.ranks_num):
6976
for key in result_dict:
7077
indices_in_concat = result_dict[key]
@@ -78,8 +85,7 @@ def generate_log2phy_expert_map(self, layer_id):
7885
def get_rank_placement_map(self, layer_id, rank_id):
7986
expert_placement_map = self.generate_expert_placement_map()
8087
layer_expert_map = expert_placement_map[layer_id]
81-
rank_expert_map = layer_expert_map[rank_id].to(
82-
torch.npu.current_device())
88+
rank_expert_map = layer_expert_map[rank_id].to(torch.npu.current_device())
8389
rank_local_expert_num = torch.sum(torch.ne(rank_expert_map, -1)).item()
8490
return rank_local_expert_num, rank_expert_map
8591

@@ -88,8 +94,9 @@ def get_rank_log2phy_map(self, layer_id, rank_id):
8894
return layer_log2phy_map[rank_id]
8995

9096
def get_global_redundant_expert_num(self):
91-
global_redundant_expert_num = len(self.expert_map_tensor[0][0]) \
92-
* self.ranks_num - self.global_expert_num
97+
global_redundant_expert_num = (
98+
len(self.expert_map_tensor[0][0]) * self.ranks_num - self.global_expert_num
99+
)
93100
return global_redundant_expert_num
94101

95102

@@ -99,4 +106,3 @@ def get_global_redundant_expert_num(self):
99106
# print(rank_placement_map)
100107
# rank_phy2log_map = expert_load_balancer.get_rank_log2phy_map(1, 0)
101108
# print(rank_phy2log_map)
102-

vllm_ascend/ops/fused_moe.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,7 @@ def apply(
959959
class AscendFusedMoE(FusedMoE):
960960

961961
moe_counter = -1
962+
962963
def __init__(
963964
self,
964965
num_experts: int, # Global number of experts
@@ -1025,22 +1026,22 @@ def __init__(
10251026
self.log2phy = None
10261027
self.global_redundant_expert_num = 0
10271028

1028-
10291029
vllm_config = get_current_vllm_config()
10301030
expert_map_path = None
10311031
if vllm_config.additional_config:
10321032
expert_map_path = vllm_config.additional_config.get(
1033-
"expert_map_path", None)
1033+
"expert_map_path", None)
10341034
if expert_map_path and os.path.exists(expert_map_path):
10351035
# moe expert load balance
10361036
expert_load_balancer = ExpertLoadBalancer(expert_map_path,
10371037
self.global_num_experts)
10381038
self.local_num_experts, self.expert_map = \
10391039
expert_load_balancer.get_rank_placement_map(
1040-
self.moe_instance_id,
1040+
self.moe_instance_id,
10411041
get_ep_group().rank_in_group)
10421042
self.log2phy = expert_load_balancer.get_rank_log2phy_map(
1043-
self.moe_instance_id, get_ep_group().rank_in_group)
1043+
self.moe_instance_id,
1044+
get_ep_group().rank_in_group)
10441045
self.global_redundant_expert_num = \
10451046
expert_load_balancer.get_global_redundant_expert_num()
10461047
else:

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@
3333

3434

3535
def apply_mlp(
36-
hidden_states: torch.Tensor,
37-
w1: torch.Tensor,
38-
w1_scale: torch.Tensor,
39-
w2: torch.Tensor,
40-
w2_scale: torch.Tensor,
41-
group_list: torch.Tensor,
42-
dynamic_scale: torch.Tensor = None,
43-
group_list_type: int = 1,
44-
**kwargs) -> torch.Tensor:
36+
hidden_states: torch.Tensor,
37+
w1: torch.Tensor,
38+
w1_scale: torch.Tensor,
39+
w2: torch.Tensor,
40+
w2_scale: torch.Tensor,
41+
group_list: torch.Tensor,
42+
dynamic_scale: torch.Tensor = None,
43+
group_list_type: int = 1,
44+
**kwargs) -> torch.Tensor:
4545
"""
4646
apply MLP: gate_up_proj -> swiglu -> down_proj
4747
@@ -465,13 +465,14 @@ def fused_experts(hidden_states: torch.Tensor,
465465
group_list_type = 0
466466

467467
# `hidden_states` will be disposed in the `apply_mlp` function
468-
hidden_states = apply_mlp(hidden_states,
469-
w1,
470-
w1_scale,
471-
w2,
472-
w2_scale,
473-
expert_tokens,
474-
group_list_type=group_list_type)
468+
hidden_states = apply_mlp(
469+
hidden_states,
470+
w1,
471+
w1_scale,
472+
w2,
473+
w2_scale,
474+
expert_tokens,
475+
group_list_type=group_list_type)
475476

476477
if expert_map is not None:
477478
hidden_states.mul_(sorted_weights.unsqueeze(1))
@@ -722,19 +723,19 @@ def apply(
722723
# Therefore, all2all is needed no matter how dp/tp is set so as to
723724
# dispatch/combine tokens.
724725
return fused_experts_with_all2all(
725-
hidden_states=x,
726-
w1=layer.w13_weight,
727-
w1_scale=layer.w13_weight_scale,
728-
w2=layer.w2_weight,
729-
w2_scale=layer.w2_weight_scale,
730-
topk_weights=topk_weights,
731-
topk_ids=topk_ids,
732-
top_k=top_k,
733-
expert_map=expert_map,
734-
ep_group=self.ep_group,
735-
log2phy=log2phy,
736-
global_redundant_expert_num=global_redundant_expert_num,
737-
)
726+
hidden_states=x,
727+
w1=layer.w13_weight,
728+
w1_scale=layer.w13_weight_scale,
729+
w2=layer.w2_weight,
730+
w2_scale=layer.w2_weight_scale,
731+
topk_weights=topk_weights,
732+
topk_ids=topk_ids,
733+
top_k=top_k,
734+
expert_map=expert_map,
735+
ep_group=self.ep_group,
736+
log2phy=log2phy,
737+
global_redundant_expert_num=global_redundant_expert_num,
738+
)
738739

739740
def process_weights_after_loading(self, layer):
740741
if self.transpose_weight:

0 commit comments

Comments
 (0)