remove chunked_prefill_for_mla related content

whx-sjtu · whx-sjtu · commit 30cfb791cf53 · 2025-08-05T11:55:11.000+08:00
Signed-off-by: whx-sjtu &lt;2952154980@qq.com&gt;
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po
@@ -148,9 +148,6 @@ msgid ""
 " to be passed in."
 msgstr "在为MOE模型使用专家负载均衡时，需要传入专家映射路径。"
 
-#: ../../user_guide/configuration/additional_config.md
-msgid "`chunked_prefill_for_mla`"
-msgstr "`chunked_prefill_for_mla`"
 
 #: ../../user_guide/configuration/additional_config.md
 msgid "`False`"
diff --git a/docs/source/user_guide/configuration/additional_config.md b/docs/source/user_guide/configuration/additional_config.md
@@ -30,7 +30,6 @@ The following table lists the additional configuration options available in vLLM
 | `ascend_scheduler_config`     | dict | `{}` | The config options for ascend scheduler                                                       |
 | `refresh`                     | bool | `false` | Whether to refresh global ascend config content. This value is usually used by rlhf or ut/e2e test case.     |
 | `expert_map_path`             | str  | `None` | When using expert load balancing for the MOE model, an expert map path needs to be passed in. |
-| `chunked_prefill_for_mla`     | bool | `False` | Whether to enable the fused operator-like chunked_prefill. |
 | `kv_cache_dtype`     | str | `None` | When using the kv cache quantization method, kv cache dtype needs to be set, currently only int8 is supported. |
 
 The details of each config option are as follows:
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
@@ -45,8 +45,6 @@ def __init__(self, vllm_config):
             ascend_scheduler_config)
 
         self.expert_map_path = additional_config.get("expert_map_path", None)
-        self.chunked_prefill_for_mla = additional_config.get(
-            "chunked_prefill_for_mla", False)
 
 
 class TorchairGraphConfig: