Skip to content

Commit 4c1ead8

Browse files
author
paulyu
committed
[Bugfix] fix yapi issues
Signed-off-by: paulyu <paulyu0307@gmail.com>
1 parent d988cf8 commit 4c1ead8

File tree

2 files changed

+23
-25
lines changed

2 files changed

+23
-25
lines changed

tests/multicard/test_lora_quant_tp.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66

77

88
@pytest.mark.parametrize("model", MODELS)
9-
def test_quant_model_tp_equality(tinyllama_lora_files,
10-
model):
9+
def test_quant_model_tp_equality(tinyllama_lora_files, model):
1110
if model.quantization == "GPTQ":
1211
pytest.skip("GPTQ lora outputs are just incredibly unstable")
1312
with VllmRunner(model=model.model_path,

tests/singlecard/test_lora_quant.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,15 @@ class ModelWithQuantization:
1818

1919
MODELS: list[ModelWithQuantization]
2020
MODELS = [
21-
ModelWithQuantization(
22-
model_path="TinyLlama/TinyLlama-1.1B-Chat-v0.3",
23-
quantization=None),
24-
# ModelWithQuantization(
25-
# model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
26-
# quantization="AWQ"), #AWQ quantization is currently not supported in ROCm. (Ref: https://github.com/vllm-project/vllm/blob/f6518b2b487724b3aa20c8b8224faba5622c4e44/tests/lora/test_quant_model.py#L23)
27-
# ModelWithQuantization(
28-
# model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
29-
# quantization="GPTQ"),
30-
]
21+
ModelWithQuantization(model_path="TinyLlama/TinyLlama-1.1B-Chat-v0.3",
22+
quantization=None),
23+
# ModelWithQuantization(
24+
# model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
25+
# quantization="AWQ"), #AWQ quantization is currently not supported in ROCm. (Ref: https://github.com/vllm-project/vllm/blob/f6518b2b487724b3aa20c8b8224faba5622c4e44/tests/lora/test_quant_model.py#L23)
26+
# ModelWithQuantization(
27+
# model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
28+
# quantization="GPTQ"),
29+
]
3130

3231

3332
def do_sample(llm: vllm.LLM,
@@ -117,33 +116,33 @@ def expect_match(output, expected_output):
117116
max_num_seqs=16) as vllm_model:
118117
print("no lora")
119118
output = do_sample(vllm_model,
120-
tinyllama_lora_files,
121-
lora_id=0,
122-
max_tokens=max_tokens)
119+
tinyllama_lora_files,
120+
lora_id=0,
121+
max_tokens=max_tokens)
123122
expect_match(output, expected_no_lora_output)
124123

125124
print("lora 1")
126125
output = do_sample(vllm_model,
127-
tinyllama_lora_files,
128-
lora_id=1,
129-
max_tokens=max_tokens)
126+
tinyllama_lora_files,
127+
lora_id=1,
128+
max_tokens=max_tokens)
130129
expect_match(output, expected_lora_output)
131130

132131
print("no lora")
133132
output = do_sample(vllm_model,
134-
tinyllama_lora_files,
135-
lora_id=0,
136-
max_tokens=max_tokens)
133+
tinyllama_lora_files,
134+
lora_id=0,
135+
max_tokens=max_tokens)
137136
expect_match(output, expected_no_lora_output)
138137

139138
print("lora 2")
140139
output = do_sample(vllm_model,
141-
tinyllama_lora_files,
142-
lora_id=2,
143-
max_tokens=max_tokens)
140+
tinyllama_lora_files,
141+
lora_id=2,
142+
max_tokens=max_tokens)
144143
expect_match(output, expected_lora_output)
145144

146145
print("removing lora")
147146

148147
del vllm_model
149-
cleanup_dist_env_and_memory()
148+
cleanup_dist_env_and_memory()

0 commit comments

Comments
 (0)