Skip to content

Commit 09b8b4e

Browse files
committed
[3/N][Refactor][Quantization]remove packed_modules_mapping from models
Signed-off-by: 22dimensions <waitingwind@foxmail.com>
1 parent 0c04bf1 commit 09b8b4e

File tree

5 files changed

+42
-41
lines changed

5 files changed

+42
-41
lines changed

vllm_ascend/models/deepseek_mtp.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,6 @@ def compute_logits(
180180

181181

182182
class CustomDeepSeekMTP(DeepSeekMTP):
183-
# NOTE 1.The quantized MTP layer of deepseek on the NPU is not quantized;
184-
# NOTE 2.The description file generated by the current msmodelslim tool does not have
185-
# MTP layer info. Please manually add it and set the value to FLOAT.
186-
packed_modules_mapping = {
187-
"gate_up_proj": ["gate_proj", "up_proj"],
188-
"experts":
189-
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]
190-
}
191183

192184
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
193185
nn.Module.__init__(self)

vllm_ascend/models/qwen2_5_vl.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -491,17 +491,6 @@ def forward(
491491
dummy_inputs=Qwen2_5_VLDummyInputsBuilder)
492492
class AscendQwen2_5_VLForConditionalGeneration(
493493
Qwen2_5_VLForConditionalGeneration):
494-
packed_modules_mapping = {
495-
"qkv_proj": [
496-
"q_proj",
497-
"k_proj",
498-
"v_proj",
499-
],
500-
"gate_up_proj": [
501-
"gate_proj",
502-
"up_proj",
503-
],
504-
}
505494

506495
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
507496
super().__init__(vllm_config=vllm_config, prefix=prefix)

vllm_ascend/models/qwen3_moe.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -318,19 +318,6 @@ def forward(
318318

319319

320320
class CustomQwen3MoeForCausalLM(Qwen3MoeForCausalLM):
321-
packed_modules_mapping = {
322-
"qkv_proj": [
323-
"q_proj",
324-
"k_proj",
325-
"v_proj",
326-
],
327-
"gate_up_proj": [
328-
"gate_proj",
329-
"up_proj",
330-
],
331-
"experts":
332-
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
333-
}
334321

335322
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
336323
nn.Module.__init__(self)

vllm_ascend/models/qwen3_next.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,15 +1166,6 @@ def load_weights(self, weights: Iterable[tuple[str,
11661166

11671167
class Qwen3NextForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
11681168
MixtureOfExperts, IsHybrid):
1169-
packed_modules_mapping = {
1170-
"qkv_proj": [
1171-
"q_proj",
1172-
"k_proj",
1173-
"v_proj",
1174-
],
1175-
"gate_up_proj": ["gate_proj", "up_proj"],
1176-
"in_proj": ["in_proj_qkvz", "in_proj_ba"],
1177-
}
11781169

11791170
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
11801171
config = vllm_config.model_config.hf_config

vllm_ascend/quantization/quant_config.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from typing import Any, Callable, Dict, List, Mapping, Optional
2020

2121
import torch
22+
from vllm.config import get_current_vllm_config
2223
from vllm.distributed import get_tensor_model_parallel_rank
2324
from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEMethodBase,
2425
FusedMoeWeightScaleSupported)
@@ -55,6 +56,11 @@ class AscendQuantConfig(QuantizationConfig):
5556
def __init__(self, quant_config: Dict[str, Any]):
5657
super().__init__()
5758
self.quant_description = quant_config
59+
vllm_config = get_current_vllm_config()
60+
model_type = vllm_config.model_config.hf_config.model_type
61+
if model_type in packed_modules_model_mapping:
62+
self.packed_modules_mapping = packed_modules_model_mapping[
63+
model_type]
5864

5965
def __repr__(self) -> str:
6066
return "AscendQuantConfig:\n" + super().__repr__()
@@ -153,6 +159,42 @@ def get_scaled_act_names(self) -> List[str]:
153159
return []
154160

155161

162+
packed_modules_model_mapping = {
163+
"qwen3_moe": {
164+
"qkv_proj": [
165+
"q_proj",
166+
"k_proj",
167+
"v_proj",
168+
],
169+
"gate_up_proj": [
170+
"gate_proj",
171+
"up_proj",
172+
],
173+
"experts":
174+
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
175+
},
176+
"deepseek_v2": {
177+
"gate_up_proj": ["gate_proj", "up_proj"],
178+
"experts":
179+
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]
180+
},
181+
"deepseek_mtp": {
182+
"gate_up_proj": ["gate_proj", "up_proj"],
183+
"experts":
184+
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]
185+
},
186+
"qwen3_next": {
187+
"qkv_proj": [
188+
"q_proj",
189+
"k_proj",
190+
"v_proj",
191+
],
192+
"gate_up_proj": ["gate_proj", "up_proj"],
193+
"in_proj": ["in_proj_qkvz", "in_proj_ba"],
194+
}
195+
}
196+
197+
156198
class AscendLinearMethod(LinearMethodBase):
157199
"""Linear method for Ascend quantization.
158200

0 commit comments

Comments
 (0)