Skip to content

Commit b32b1e6

Browse files
committed
add ut for qwen3 flashcomm2
Signed-off-by: David9857 <985700846@qq.com>
1 parent 716d6f6 commit b32b1e6

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

tests/multicard/test_offline_inference_distributed.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,21 @@ def test_models_distributed_DeepSeek_W8A8():
169169
quantization="ascend",
170170
) as vllm_model:
171171
vllm_model.generate_greedy(example_prompts, max_tokens)
172+
173+
174+
@patch.dict(os.environ, {"VLLM_ENABLE_FC": "1"})
175+
def test_models_distributed_Qwen3_with_flashcomm2():
176+
example_prompts = [
177+
"Hello, my name is",
178+
]
179+
max_tokens = 5
180+
181+
with VllmRunner(
182+
snapshot_download("Qwen/Qwen3-0.6B-Base"),
183+
max_model_len=8192,
184+
enforce_eager=True,
185+
dtype="auto",
186+
tensor_parallel_size=2,
187+
quantization="ascend",
188+
) as vllm_model:
189+
vllm_model.generate_greedy(example_prompts, max_tokens)

0 commit comments

Comments
 (0)