|
19 | 19 | from typing import Any, Callable, Dict, List, Mapping, Optional
|
20 | 20 |
|
21 | 21 | import torch
|
| 22 | +from vllm.config import get_current_vllm_config |
22 | 23 | from vllm.distributed import get_tensor_model_parallel_rank
|
23 | 24 | from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEMethodBase,
|
24 | 25 | FusedMoeWeightScaleSupported)
|
@@ -55,6 +56,11 @@ class AscendQuantConfig(QuantizationConfig):
|
55 | 56 | def __init__(self, quant_config: Dict[str, Any]):
|
56 | 57 | super().__init__()
|
57 | 58 | self.quant_description = quant_config
|
| 59 | + vllm_config = get_current_vllm_config() |
| 60 | + model_type = vllm_config.model_config.hf_config.model_type |
| 61 | + if model_type in packed_modules_model_mapping: |
| 62 | + self.packed_modules_mapping = packed_modules_model_mapping[ |
| 63 | + model_type] |
58 | 64 |
|
59 | 65 | def __repr__(self) -> str:
|
60 | 66 | return "AscendQuantConfig:\n" + super().__repr__()
|
@@ -153,6 +159,42 @@ def get_scaled_act_names(self) -> List[str]:
|
153 | 159 | return []
|
154 | 160 |
|
155 | 161 |
|
| 162 | +packed_modules_model_mapping = { |
| 163 | + "qwen3_moe": { |
| 164 | + "qkv_proj": [ |
| 165 | + "q_proj", |
| 166 | + "k_proj", |
| 167 | + "v_proj", |
| 168 | + ], |
| 169 | + "gate_up_proj": [ |
| 170 | + "gate_proj", |
| 171 | + "up_proj", |
| 172 | + ], |
| 173 | + "experts": |
| 174 | + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], |
| 175 | + }, |
| 176 | + "deepseek_v2": { |
| 177 | + "gate_up_proj": ["gate_proj", "up_proj"], |
| 178 | + "experts": |
| 179 | + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"] |
| 180 | + }, |
| 181 | + "deepseek_mtp": { |
| 182 | + "gate_up_proj": ["gate_proj", "up_proj"], |
| 183 | + "experts": |
| 184 | + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"] |
| 185 | + }, |
| 186 | + "qwen3_next": { |
| 187 | + "qkv_proj": [ |
| 188 | + "q_proj", |
| 189 | + "k_proj", |
| 190 | + "v_proj", |
| 191 | + ], |
| 192 | + "gate_up_proj": ["gate_proj", "up_proj"], |
| 193 | + "in_proj": ["in_proj_qkvz", "in_proj_ba"], |
| 194 | + } |
| 195 | +} |
| 196 | + |
| 197 | + |
156 | 198 | class AscendLinearMethod(LinearMethodBase):
|
157 | 199 | """Linear method for Ascend quantization.
|
158 | 200 |
|
|
0 commit comments