@@ -64,43 +64,6 @@ def test_batching(
64
64
)
65
65
66
66
67
- @pytest .mark .parametrize ("model" , MODELS )
68
- @pytest .mark .parametrize ("dtype" , ["bfloat16" ])
69
- @pytest .mark .parametrize ("max_tokens" , [15 ])
70
- def test_n_lt_1 (
71
- vllm_runner ,
72
- example_prompts ,
73
- model : str ,
74
- dtype : str ,
75
- max_tokens : int ,
76
- ) -> None :
77
- # To pass the small model tests, we need full precision.
78
- # assert dtype == "float"
79
-
80
- with vllm_runner (model , dtype = dtype ) as vllm_model :
81
- for_loop_outputs = []
82
- for _ in range (10 ):
83
- for_loop_outputs .append (
84
- vllm_model .generate_greedy ([example_prompts [1 ]],
85
- max_tokens )[0 ])
86
- sampling_params = SamplingParams (n = 10 ,
87
- temperature = 0.001 ,
88
- seed = 0 ,
89
- max_tokens = max_tokens )
90
- n_lt_1_outputs = vllm_model .generate ([example_prompts [1 ]],
91
- sampling_params )
92
- token_ids , texts = n_lt_1_outputs [0 ]
93
- n_lt_1_outputs = [(token_id , text )
94
- for token_id , text in zip (token_ids , texts )]
95
-
96
- check_outputs_equal (
97
- outputs_0_lst = n_lt_1_outputs ,
98
- outputs_1_lst = for_loop_outputs ,
99
- name_0 = "vllm_n_lt_1_outputs" ,
100
- name_1 = "vllm" ,
101
- )
102
-
103
-
104
67
@pytest .mark .parametrize ("model" , MODELS )
105
68
@pytest .mark .parametrize ("dtype" , ["bfloat16" ])
106
69
@pytest .mark .parametrize ("max_tokens" , [20 ])
0 commit comments