modify parallel_factor

whx-sjtu · whx-sjtu · commit e8bb2cd060ee · 2025-09-17T20:58:45.000+08:00
Signed-off-by: whx-sjtu &lt;2952154980@qq.com&gt;
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
@@ -321,7 +321,8 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
     if os.getenv("HCCL_OP_EXPANSION_MODE") == 'AIV':
         # TODO: Find out whether we need to take into account the pp_size
         parallel_factor = 1 + num_comm_groups + int(
-            parallel_config.enable_expert_parallel)
+            parallel_config.enable_expert_parallel) + int(
+            vllm_config.additional_config.get("multistream_overlap_shared_expert", False))
         if is_moe_model(vllm_config):
             parallel_factor += (parallel_config.data_parallel_size > 1)
         # Calculate maximum supported batch sizes considering model architecture on the A2 Hardware Device