Skip to content

Commit b680c8b

Browse files
committed
enable ep for pangu
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent fadbdce commit b680c8b

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

tests/e2e/multicard/test_torchair_graph_mode.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def _pangu_torchair_test_fixture(
129129
distributed_executor_backend="mp",
130130
enforce_eager=False,
131131
additional_config=additional_config,
132+
enable_expert_parallel=True,
132133
) as vllm_model:
133134
# use greedy sampler to make sure the generated results are fix
134135
vllm_output = vllm_model.generate_greedy(example_prompts, 5)

vllm_ascend/models/pangu_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def pangu_group8_topk(
356356
num_tokens = scores.shape[0]
357357
router_scale = _ROUTER_SCALE.squeeze( # type: ignore
358358
)
359-
359+
# TODO: support disable expert parallel
360360
ep_size = get_ep_group().world_size
361361
local_num_experts = global_num_experts // ep_size
362362
local_num_group = topk // ep_size
@@ -463,6 +463,7 @@ def __init__(
463463
custom_routing_function=topk_wrapper(num_voted_experts),
464464
prefix=f"{prefix}.experts",
465465
)
466+
self.use_ep = self.experts.use_ep
466467

467468
self.gate = ReplicatedLinear(
468469
config.hidden_size,

0 commit comments

Comments
 (0)