We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e939ddd commit e8bb2cdCopy full SHA for e8bb2cd
vllm_ascend/utils.py
@@ -321,7 +321,8 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
321
if os.getenv("HCCL_OP_EXPANSION_MODE") == 'AIV':
322
# TODO: Find out whether we need to take into account the pp_size
323
parallel_factor = 1 + num_comm_groups + int(
324
- parallel_config.enable_expert_parallel)
+ parallel_config.enable_expert_parallel) + int(
325
+ vllm_config.additional_config.get("multistream_overlap_shared_expert", False))
326
if is_moe_model(vllm_config):
327
parallel_factor += (parallel_config.data_parallel_size > 1)
328
# Calculate maximum supported batch sizes considering model architecture on the A2 Hardware Device
0 commit comments