@@ -102,6 +102,7 @@ def test_models_distributed_Qwen3_MOE_TP2_WITH_ACLGRAPH():
102
102
) as vllm_model :
103
103
vllm_model .generate_greedy (example_prompts , max_tokens )
104
104
105
+
105
106
def test_Qwen3_235b_all2allv_mc2_quant (monkeypatch ):
106
107
"""Test Qwen3-235B with all2all sequence and multi-card configuration."""
107
108
# Set environment variables similar to the startup command
@@ -113,14 +114,14 @@ def test_Qwen3_235b_all2allv_mc2_quant(monkeypatch):
113
114
monkeypatch .setenv ('ACL_STREAM_TIMEOUT' , '340000' )
114
115
monkeypatch .setenv ('HCCL_OP_EXPANSION_MODE' , 'AIV' )
115
116
monkeypatch .setenv ('HCCL_OP_BASE_FFTS_MODE_ENABLE' , 'true' )
116
-
117
+
117
118
example_prompts = [
118
119
"Hello, my name is" ,
119
120
"The capital of France is" ,
120
121
"In the field of artificial intelligence," ,
121
122
]
122
123
max_tokens = 32
123
-
124
+
124
125
# Additional config matching the startup command
125
126
additional_config = {
126
127
"torchair_graph_config" : {
@@ -134,7 +135,7 @@ def test_Qwen3_235b_all2allv_mc2_quant(monkeypatch):
134
135
},
135
136
"refresh" : True
136
137
}
137
-
138
+
138
139
with VllmRunner (
139
140
"vllm-ascend/Qwen3-235B-A22B-W8A8" , # Use quantized model path
140
141
tensor_parallel_size = 4 ,
@@ -150,4 +151,3 @@ def test_Qwen3_235b_all2allv_mc2_quant(monkeypatch):
150
151
additional_config = additional_config ,
151
152
) as vllm_model :
152
153
vllm_model .generate_greedy (example_prompts , max_tokens )
153
-
0 commit comments