Skip to content

Commit a09e869

Browse files
committed
fix nz
1 parent 6d9e5f6 commit a09e869

File tree

2 files changed

+16
-9
lines changed

2 files changed

+16
-9
lines changed

vllm_ascend/eplb/adaptor/vllm_adaptor.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,16 @@ def __init__(self, model, **args):
6969
self.all_topk_ids = []
7070

7171
def init_buffer_tensor(self, num_buffer_tensor):
72-
for name in self.expert_weight_names:
73-
complete_name = "model.layers." + str(
74-
self.num_dense_layers) + ".mlp.experts." + name
75-
expert_tensor = self.param_dict[complete_name].data[
76-
0:num_buffer_tensor]
77-
buffer_tensors = torch.empty_like(expert_tensor)
78-
for buffer_id in range(num_buffer_tensor):
72+
for buffer_id in range(num_buffer_tensor):
73+
for name in self.expert_weight_names:
74+
complete_name = "model.layers." + str(
75+
self.num_dense_layers) + ".mlp.experts." + name
76+
expert_tensor = self.param_dict[complete_name].data[0]
77+
if name in ["w13_weight", "w2_weight"]:
78+
expert_tensor = expert_tensor.clone()
79+
buffer_tensor = torch.empty_like(expert_tensor)
7980
self.buffer_tensor_list[buffer_id].append(
80-
buffer_tensors[buffer_id])
81+
buffer_tensor)
8182

8283
def init_expert_param_per_layer(self):
8384
num_local_expert = self.param_dict["model.layers." + str(self.num_dense_layers) + \

vllm_ascend/eplb/core/eplb_device_transfer_loader.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import torch.distributed as dist
2020
from vllm.logger import logger
21+
from vllm_ascend.ascend_config import get_ascend_config
2122

2223

2324
class ExpertWeightUpdateState(Enum):
@@ -38,6 +39,7 @@ def __init__(self, eplb_adaptor):
3839
self.state = ExpertWeightUpdateState.WAITING
3940
self.recv_expert_list = []
4041
self.mock_flag = True
42+
self.enable_weight_nz_layout = get_ascend_config().enable_weight_nz_layout
4143

4244
def generate_expert_d2d_transfer_task(self, expert_send_info,
4345
expert_recv_info, updated_expert_map,
@@ -61,10 +63,14 @@ def generate_expert_d2d_transfer_task(self, expert_send_info,
6163
dst_rank, global_expert_id_to_send = send_info
6264
local_expert_id = self.eplb_adaptor.expert_map_per_layer_cpu[
6365
layer_id][global_expert_id_to_send].item()
66+
idx = 0
6467
for src_tensor in self.eplb_adaptor.expert_param_per_layer[
65-
layer_id][local_expert_id]:
68+
layer_id][local_expert_id]:
69+
if self.enable_weight_nz_layout and idx < 2:
70+
src_tensor = src_tensor.clone()
6671
self.comm_op_list.append(
6772
dist.P2POp(dist.isend, src_tensor, dst_rank))
73+
idx += 1
6874

6975
buffer_tensor_id = 0
7076
for recv_info in expert_recv_info:

0 commit comments

Comments
 (0)