Skip to content

Commit b457f6b

Browse files
committed
fix CI issues
Signed-off-by: Yuxiao-Xu <664988918@qq.com>
1 parent 402a90b commit b457f6b

File tree

3 files changed

+46
-39
lines changed

3 files changed

+46
-39
lines changed

vllm_ascend/ops/expert_load_balancer.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
import json
22
import random
3-
from typing import List, Dict
3+
from typing import Dict, List
44

55
import torch
66

7+
78
class ExpertLoadBalancer(object):
9+
810
def __init__(self, expert_map_path, global_expert_num):
911
self.expert_map_path = expert_map_path
1012
self.global_expert_num = global_expert_num
11-
self.expert_map_tensor, self.layers_num, self.ranks_num = \
12-
self.expert_file_to_tensor()
13+
self.expert_map_tensor, self.layers_num, self.ranks_num = (
14+
self.expert_file_to_tensor())
1315

1416
def expert_file_to_tensor(self):
1517
with open(self.expert_map_path, "r") as f:
@@ -43,19 +45,22 @@ def generate_index_dicts(tensor_2d):
4345

4446
def generate_expert_placement_map(self):
4547
expert_placement_map = torch.full(
46-
(self.layers_num, self.ranks_num, self.global_expert_num),
47-
-1, dtype=torch.int32)
48+
(self.layers_num, self.ranks_num, self.global_expert_num),
49+
-1,
50+
dtype=torch.int32,
51+
)
4852
for layer_id in range(self.layers_num):
4953
for gpu_id in range(self.ranks_num):
5054
e_ids = self.expert_map_tensor[layer_id, gpu_id]
51-
expert_placement_map[layer_id, gpu_id, e_ids] = \
52-
torch.arange(len(e_ids), dtype=torch.int32)
55+
expert_placement_map[layer_id, gpu_id,
56+
e_ids] = torch.arange(len(e_ids),
57+
dtype=torch.int32)
5358
return expert_placement_map
5459

5560
def generate_log2phy_expert_map(self, layer_id):
5661
concatenated = torch.flatten(self.expert_map_tensor[layer_id])
5762
rank_expert_to_global = self.generate_index_dicts(
58-
self.expert_map_tensor[layer_id])
63+
self.expert_map_tensor[layer_id])
5964
result_dict: Dict[int, List[int]] = {}
6065
for idx, value in enumerate(concatenated):
6166
key = value.item()
@@ -64,7 +69,8 @@ def generate_log2phy_expert_map(self, layer_id):
6469
result_dict[key].append(idx)
6570

6671
log2phy_map = torch.full((self.ranks_num, self.global_expert_num),
67-
-1, dtype=torch.int32)
72+
-1,
73+
dtype=torch.int32)
6874
for rank in range(self.ranks_num):
6975
for key in result_dict:
7076
indices_in_concat = result_dict[key]
@@ -79,7 +85,7 @@ def get_rank_placement_map(self, layer_id, rank_id):
7985
expert_placement_map = self.generate_expert_placement_map()
8086
layer_expert_map = expert_placement_map[layer_id]
8187
rank_expert_map = layer_expert_map[rank_id].to(
82-
torch.npu.current_device())
88+
torch.npu.current_device())
8389
rank_local_expert_num = torch.sum(torch.ne(rank_expert_map, -1)).item()
8490
return rank_local_expert_num, rank_expert_map
8591

@@ -88,8 +94,9 @@ def get_rank_log2phy_map(self, layer_id, rank_id):
8894
return layer_log2phy_map[rank_id]
8995

9096
def get_global_redundant_expert_num(self):
91-
global_redundant_expert_num = len(self.expert_map_tensor[0][0]) \
92-
* self.ranks_num - self.global_expert_num
97+
global_redundant_expert_num = (
98+
len(self.expert_map_tensor[0][0]) * self.ranks_num -
99+
self.global_expert_num)
93100
return global_redundant_expert_num
94101

95102

@@ -99,4 +106,3 @@ def get_global_redundant_expert_num(self):
99106
# print(rank_placement_map)
100107
# rank_phy2log_map = expert_load_balancer.get_rank_log2phy_map(1, 0)
101108
# print(rank_phy2log_map)
102-

vllm_ascend/ops/fused_moe.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,7 @@ def apply(
959959
class AscendFusedMoE(FusedMoE):
960960

961961
moe_counter = -1
962+
962963
def __init__(
963964
self,
964965
num_experts: int, # Global number of experts
@@ -1025,22 +1026,22 @@ def __init__(
10251026
self.log2phy = None
10261027
self.global_redundant_expert_num = 0
10271028

1028-
10291029
vllm_config = get_current_vllm_config()
10301030
expert_map_path = None
10311031
if vllm_config.additional_config:
10321032
expert_map_path = vllm_config.additional_config.get(
1033-
"expert_map_path", None)
1033+
"expert_map_path", None)
10341034
if expert_map_path and os.path.exists(expert_map_path):
10351035
# moe expert load balance
10361036
expert_load_balancer = ExpertLoadBalancer(expert_map_path,
10371037
self.global_num_experts)
10381038
self.local_num_experts, self.expert_map = \
10391039
expert_load_balancer.get_rank_placement_map(
1040-
self.moe_instance_id,
1040+
self.moe_instance_id,
10411041
get_ep_group().rank_in_group)
10421042
self.log2phy = expert_load_balancer.get_rank_log2phy_map(
1043-
self.moe_instance_id, get_ep_group().rank_in_group)
1043+
self.moe_instance_id,
1044+
get_ep_group().rank_in_group)
10441045
self.global_redundant_expert_num = \
10451046
expert_load_balancer.get_global_redundant_expert_num()
10461047
else:

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@
3232
VLLM_ENABLE_MC2: bool = envs_ascend.VLLM_ENABLE_MC2
3333

3434

35-
def apply_mlp(
36-
hidden_states: torch.Tensor,
35+
def apply_mlp(hidden_states: torch.Tensor,
3736
w1: torch.Tensor,
3837
w1_scale: torch.Tensor,
3938
w2: torch.Tensor,
@@ -465,13 +464,14 @@ def fused_experts(hidden_states: torch.Tensor,
465464
group_list_type = 0
466465

467466
# `hidden_states` will be disposed in the `apply_mlp` function
468-
hidden_states = apply_mlp(hidden_states,
469-
w1,
470-
w1_scale,
471-
w2,
472-
w2_scale,
473-
expert_tokens,
474-
group_list_type=group_list_type)
467+
hidden_states = apply_mlp(
468+
hidden_states,
469+
w1,
470+
w1_scale,
471+
w2,
472+
w2_scale,
473+
expert_tokens,
474+
group_list_type=group_list_type)
475475

476476
if expert_map is not None:
477477
hidden_states.mul_(sorted_weights.unsqueeze(1))
@@ -722,19 +722,19 @@ def apply(
722722
# Therefore, all2all is needed no matter how dp/tp is set so as to
723723
# dispatch/combine tokens.
724724
return fused_experts_with_all2all(
725-
hidden_states=x,
726-
w1=layer.w13_weight,
727-
w1_scale=layer.w13_weight_scale,
728-
w2=layer.w2_weight,
729-
w2_scale=layer.w2_weight_scale,
730-
topk_weights=topk_weights,
731-
topk_ids=topk_ids,
732-
top_k=top_k,
733-
expert_map=expert_map,
734-
ep_group=self.ep_group,
735-
log2phy=log2phy,
736-
global_redundant_expert_num=global_redundant_expert_num,
737-
)
725+
hidden_states=x,
726+
w1=layer.w13_weight,
727+
w1_scale=layer.w13_weight_scale,
728+
w2=layer.w2_weight,
729+
w2_scale=layer.w2_weight_scale,
730+
topk_weights=topk_weights,
731+
topk_ids=topk_ids,
732+
top_k=top_k,
733+
expert_map=expert_map,
734+
ep_group=self.ep_group,
735+
log2phy=log2phy,
736+
global_redundant_expert_num=global_redundant_expert_num,
737+
)
738738

739739
def process_weights_after_loading(self, layer):
740740
if self.transpose_weight:

0 commit comments

Comments
 (0)