Skip to content

Commit 0381d1b

Browse files
committed
refactoer quantization
Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com> Signed-off-by: hust17yixuan <303660421@qq.com>
1 parent 99bf25a commit 0381d1b

File tree

8 files changed

+1501
-3
lines changed

8 files changed

+1501
-3
lines changed

tests/ut/torchair/test_utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import torch
77

88
from tests.ut.base import TestBase
9+
from vllm_ascend.quantization.quantizer import SUPPORT_ASCEND_QUANTIZER_TYPE
910
from vllm_ascend.torchair import utils
1011

1112

@@ -120,3 +121,15 @@ def test_converting_weight_acl_format_format_true(self, mock_npu_cast,
120121

121122
utils.converting_weight_acl_format(model, ACL_FORMAT_FRACTAL_NZ)
122123
mock_npu_cast.assert_not_called()
124+
125+
def test_torchair_quant_method_register(self):
126+
127+
TorchairW8A8DYNAMICQuantizer = SUPPORT_ASCEND_QUANTIZER_TYPE[
128+
"W8A8_DYNAMIC"]
129+
TorchairW4A8DYNAMICQuantizer = SUPPORT_ASCEND_QUANTIZER_TYPE[
130+
"W4A8_DYNAMIC"]
131+
utils.torchair_quant_method_register()
132+
self.assertNotEqual(TorchairW8A8DYNAMICQuantizer,
133+
SUPPORT_ASCEND_QUANTIZER_TYPE["W8A8_DYNAMIC"])
134+
self.assertNotEqual(TorchairW4A8DYNAMICQuantizer,
135+
SUPPORT_ASCEND_QUANTIZER_TYPE["W4A8_DYNAMIC"])

vllm_ascend/torchair/models/torchair_deepseek_v2.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@
7171

7272
from vllm_ascend.ascend_config import get_ascend_config
7373
from vllm_ascend.quantization.quant_config import AscendLinearMethod
74-
from vllm_ascend.quantization.w8a8_dynamic import AscendW8A8DynamicLinearMethod
7574
from vllm_ascend.torchair.ops.torchair_fused_moe import TorchairAscendFusedMoE
75+
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import \
76+
TorchairAscendW8A8DynamicLinearMethod
7677
from vllm_ascend.utils import dispose_tensor, npu_prefetch
7778

7879

@@ -261,8 +262,9 @@ def __init__(
261262
quant_method = self.gate_up_proj.quant_method
262263
if isinstance(quant_method, UnquantizedLinearMethod):
263264
self.act_fn = TorchairDeepseekV2SiluAndMul()
264-
elif (isinstance(quant_method, AscendLinearMethod) and isinstance(
265-
quant_method.quant_method, AscendW8A8DynamicLinearMethod)):
265+
elif (isinstance(quant_method, AscendLinearMethod)
266+
and isinstance(quant_method.quant_method,
267+
TorchairAscendW8A8DynamicLinearMethod)):
266268
# TODO(sdmyzlp): Currently preserved as before:
267269
# 1. The only quantization supported for silu is W8A8Dynamic
268270
# 2. Output dtype of gate_up/down is fixed to be int32/bfloat16

vllm_ascend/torchair/quantization/__init__.py

Whitespace-only changes.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from vllm_ascend.quantization.quantizer import VLLMAscendQuantizer
2+
from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (
3+
TorchairAscendW4A8DynamicFusedMoEMethod,
4+
TorchairAscendW4A8DynamicLinearMethod)
5+
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (
6+
TorchairAscendW8A8DynamicFusedMoEMethod,
7+
TorchairAscendW8A8DynamicLinearMethod)
8+
9+
10+
class TorchairW8A8DYNAMICQuantizer(VLLMAscendQuantizer):
11+
12+
@staticmethod
13+
def build_linear_method():
14+
return TorchairAscendW8A8DynamicLinearMethod()
15+
16+
@staticmethod
17+
def build_moe_method():
18+
return TorchairAscendW8A8DynamicFusedMoEMethod()
19+
20+
21+
class TorchairW4A8DYNAMICQuantizer(VLLMAscendQuantizer):
22+
23+
@staticmethod
24+
def build_linear_method():
25+
return TorchairAscendW4A8DynamicLinearMethod()
26+
27+
@staticmethod
28+
def build_moe_method():
29+
return TorchairAscendW4A8DynamicFusedMoEMethod()

0 commit comments

Comments
 (0)