We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 716d6f6 commit b32b1e6Copy full SHA for b32b1e6
tests/multicard/test_offline_inference_distributed.py
@@ -169,3 +169,21 @@ def test_models_distributed_DeepSeek_W8A8():
169
quantization="ascend",
170
) as vllm_model:
171
vllm_model.generate_greedy(example_prompts, max_tokens)
172
+
173
174
+@patch.dict(os.environ, {"VLLM_ENABLE_FC": "1"})
175
+def test_models_distributed_Qwen3_with_flashcomm2():
176
+ example_prompts = [
177
+ "Hello, my name is",
178
+ ]
179
+ max_tokens = 5
180
181
+ with VllmRunner(
182
+ snapshot_download("Qwen/Qwen3-0.6B-Base"),
183
+ max_model_len=8192,
184
+ enforce_eager=True,
185
+ dtype="auto",
186
+ tensor_parallel_size=2,
187
+ quantization="ascend",
188
+ ) as vllm_model:
189
+ vllm_model.generate_greedy(example_prompts, max_tokens)
0 commit comments