add ut for qwen3 flashcomm2

David9857 · David9857 · commit b32b1e65f7df · 2025-07-14T19:02:19.000+08:00
Signed-off-by: David9857 &lt;985700846@qq.com&gt;
diff --git a/tests/multicard/test_offline_inference_distributed.py b/tests/multicard/test_offline_inference_distributed.py
@@ -169,3 +169,21 @@ def test_models_distributed_DeepSeek_W8A8():
             quantization="ascend",
     ) as vllm_model:
         vllm_model.generate_greedy(example_prompts, max_tokens)
+
+
+@patch.dict(os.environ, {"VLLM_ENABLE_FC": "1"})
+def test_models_distributed_Qwen3_with_flashcomm2():
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+
+    with VllmRunner(
+            snapshot_download("Qwen/Qwen3-0.6B-Base"),
+            max_model_len=8192,
+            enforce_eager=True,
+            dtype="auto",
+            tensor_parallel_size=2,
+            quantization="ascend",
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)