File tree Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Original file line number Diff line number Diff line change @@ -188,6 +188,7 @@ jobs:
188
188
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
189
189
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
190
190
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
191
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
191
192
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
192
193
fi
193
194
@@ -218,5 +219,6 @@ jobs:
218
219
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
219
220
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
220
221
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
222
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
221
223
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
222
224
fi
Original file line number Diff line number Diff line change 23
23
import os
24
24
from unittest .mock import patch
25
25
26
- import vllm # noqa: F401
26
+ from modelscope import snapshot_download # type: ignore
27
27
from vllm import SamplingParams
28
28
29
29
from tests .conftest import VllmRunner
@@ -95,3 +95,20 @@ def test_models_distributed_DeepSeek_dbo():
95
95
distributed_executor_backend = "mp" ,
96
96
) as vllm_model :
97
97
vllm_model .generate (example_prompts , sampling_params )
98
+
99
+
100
+ def test_models_distributed_DeepSeek_W8A8 ():
101
+ example_prompts = [
102
+ "Hello, my name is" ,
103
+ ]
104
+ max_tokens = 5
105
+
106
+ with VllmRunner (
107
+ snapshot_download ("vllm-ascend/DeepSeek-V2-Lite-W8A8" ),
108
+ max_model_len = 8192 ,
109
+ enforce_eager = True ,
110
+ dtype = "auto" ,
111
+ tensor_parallel_size = 4 ,
112
+ quantization = "ascend" ,
113
+ ) as vllm_model :
114
+ vllm_model .generate_greedy (example_prompts , max_tokens )
You can’t perform that action at this time.
0 commit comments