From 77464c93286ec1ec525f3034891e16004f989322 Mon Sep 17 00:00:00 2001 From: zhoux77899 Date: Fri, 18 Jul 2025 14:47:47 +0800 Subject: [PATCH] tests(ci): add `qwen3_moe` W8A8 quantized model test case Signed-off-by: ZhouXiang --- tests/singlecard/test_offline_inference.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/singlecard/test_offline_inference.py b/tests/singlecard/test_offline_inference.py index cd65a24969..48a4f219e6 100644 --- a/tests/singlecard/test_offline_inference.py +++ b/tests/singlecard/test_offline_inference.py @@ -37,16 +37,12 @@ "Qwen/Qwen3-0.6B-Base", ] MULTIMODALITY_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"] - QUANTIZATION_MODELS = [ "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8", + "vllm-ascend/Qwen3-30B-A3B-W8A8-Pruning" ] os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256" -QUANTIZATION_MODELS = [ - "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8", -] - @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["half", "float16"])