Skip to content

Commit 1ff0e9e

Browse files
committed
[CI] Refactor e2e CI
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 0bd5ff5 commit 1ff0e9e

22 files changed

+195
-987
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,6 @@ jobs:
191191
VLLM_WORKER_MULTIPROC_METHOD: spawn
192192
VLLM_USE_MODELSCOPE: True
193193
run: |
194-
pytest -sv tests/e2e/singlecard/test_offline_inference.py
195194
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
196195
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
197196
pytest -sv tests/e2e/singlecard/test_camem.py
@@ -202,16 +201,14 @@ jobs:
202201
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
203202
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
204203
205-
# All other tests, ignore: 310p test, accuracy test.
204+
# All other tests.
206205
pytest -sv tests/e2e/singlecard/ \
207-
--ignore=tests/e2e/singlecard/test_offline_inference.py \
208206
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
209207
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
210208
--ignore=tests/e2e/singlecard/test_camem.py \
211209
--ignore=tests/e2e/singlecard/test_embedding.py \
212210
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
213211
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py \
214-
--ignore=tests/e2e/singlecard/test_offline_inference_310p.py
215212
e2e-2-cards:
216213
needs: [e2e]
217214
if: ${{ needs.e2e.result == 'success' }}
@@ -288,4 +285,3 @@ jobs:
288285
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
289286
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
290287
--ignore=tests/e2e/multicard/test_data_parallel.py \
291-
--ignore=tests/e2e/multicard/test_offline_inference_310p.py

.github/workflows/vllm_ascend_test_310p.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ jobs:
111111
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
112112
run: |
113113
if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
114-
pytest -sv tests/e2e/singlecard/test_offline_inference_310p.py
114+
pytest -sv tests/e2e/310p/test_offline_inference_310p.py
115115
else
116-
pytest -sv tests/e2e/multicard/test_offline_inference_310p.py
117-
fi
116+
pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
117+
fi
File renamed without changes.
File renamed without changes.

tests/e2e/conftest.py

Lines changed: 3 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,19 @@
2323
from typing import Any, List, Optional, Tuple, TypeVar, Union
2424

2525
import numpy as np
26-
import pytest
2726
import torch
28-
from modelscope import snapshot_download # type: ignore[import-untyped]
2927
from PIL import Image
3028
from torch import nn
3129
from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
3230
BatchEncoding, BatchFeature)
3331
from transformers.models.auto.auto_factory import _BaseAutoModelClass
3432
from vllm import LLM, SamplingParams
3533
from vllm.config import TaskOption, _get_and_verify_dtype
36-
from vllm.inputs import ExplicitEncoderDecoderPrompt, TextPrompt, TokensPrompt
34+
from vllm.inputs import TextPrompt
3735
from vllm.outputs import RequestOutput
38-
from vllm.sampling_params import BeamSearchParams
3936
from vllm.transformers_utils.utils import maybe_model_redirect
40-
from vllm.utils import is_list_of
4137

42-
from tests.e2e.model_utils import (PROMPT_TEMPLATES, TokensTextLogprobs,
38+
from tests.e2e.model_utils import (TokensTextLogprobs,
4339
TokensTextLogprobsPromptLogprobs)
4440
# TODO: remove this part after the patch merged into vllm, if
4541
# we not explicitly patch here, some of them might be effectiveless
@@ -62,7 +58,6 @@
6258
PromptVideoInput = _PromptMultiModalInput[np.ndarray]
6359

6460
_TEST_DIR = os.path.dirname(__file__)
65-
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
6661

6762

6863
def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
@@ -95,7 +90,7 @@ def __init__(
9590
block_size: int = 16,
9691
enable_chunked_prefill: bool = False,
9792
swap_space: int = 4,
98-
enforce_eager: Optional[bool] = True,
93+
enforce_eager: Optional[bool] = False,
9994
quantization: Optional[str] = None,
10095
**kwargs,
10196
) -> None:
@@ -220,26 +215,6 @@ def generate_w_logprobs(
220215
if sampling_params.prompt_logprobs is None else
221216
toks_str_logsprobs_prompt_logprobs)
222217

223-
def generate_encoder_decoder_w_logprobs(
224-
self,
225-
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
226-
sampling_params: SamplingParams,
227-
) -> Union[List[TokensTextLogprobs],
228-
List[TokensTextLogprobsPromptLogprobs]]:
229-
'''
230-
Logprobs generation for vLLM encoder/decoder models
231-
'''
232-
233-
assert sampling_params.logprobs is not None
234-
req_outputs = self.model.generate(encoder_decoder_prompts,
235-
sampling_params=sampling_params)
236-
toks_str_logsprobs_prompt_logprobs = (
237-
self._final_steps_generate_w_logprobs(req_outputs))
238-
# Omit prompt logprobs if not required by sampling params
239-
return ([x[0:-1] for x in toks_str_logsprobs_prompt_logprobs]
240-
if sampling_params.prompt_logprobs is None else
241-
toks_str_logsprobs_prompt_logprobs)
242-
243218
def generate_greedy(
244219
self,
245220
prompts: List[str],
@@ -284,53 +259,6 @@ def generate_greedy_logprobs(
284259
audios=audios,
285260
videos=videos)
286261

287-
def generate_encoder_decoder_greedy_logprobs(
288-
self,
289-
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
290-
max_tokens: int,
291-
num_logprobs: int,
292-
num_prompt_logprobs: Optional[int] = None,
293-
) -> Union[List[TokensTextLogprobs],
294-
List[TokensTextLogprobsPromptLogprobs]]:
295-
greedy_logprobs_params = SamplingParams(
296-
temperature=0.0,
297-
max_tokens=max_tokens,
298-
logprobs=num_logprobs,
299-
prompt_logprobs=(num_prompt_logprobs),
300-
)
301-
'''
302-
Greedy logprobs generation for vLLM encoder/decoder models
303-
'''
304-
305-
return self.generate_encoder_decoder_w_logprobs(
306-
encoder_decoder_prompts, greedy_logprobs_params)
307-
308-
def generate_beam_search(
309-
self,
310-
prompts: Union[List[str], List[List[int]]],
311-
beam_width: int,
312-
max_tokens: int,
313-
) -> List[Tuple[List[List[int]], List[str]]]:
314-
if is_list_of(prompts, str, check="all"):
315-
prompts = [TextPrompt(prompt=prompt) for prompt in prompts]
316-
else:
317-
prompts = [
318-
TokensPrompt(prompt_token_ids=tokens) for tokens in prompts
319-
]
320-
outputs = self.model.beam_search(
321-
prompts,
322-
BeamSearchParams(beam_width=beam_width, max_tokens=max_tokens))
323-
returned_outputs = []
324-
for output in outputs:
325-
token_ids = [x.tokens for x in output.sequences]
326-
texts = [x.text for x in output.sequences]
327-
returned_outputs.append((token_ids, texts))
328-
return returned_outputs
329-
330-
def classify(self, prompts: List[str]) -> List[List[float]]:
331-
req_outputs = self.model.classify(prompts)
332-
return [req_output.outputs.probs for req_output in req_outputs]
333-
334262
def encode(
335263
self,
336264
prompts: List[str],
@@ -346,50 +274,6 @@ def encode(
346274
req_outputs = self.model.embed(inputs)
347275
return [req_output.outputs.embedding for req_output in req_outputs]
348276

349-
def score(
350-
self,
351-
text_1: Union[str, List[str]],
352-
text_2: Union[str, List[str]],
353-
) -> List[float]:
354-
req_outputs = self.model.score(text_1, text_2)
355-
return [req_output.outputs.score for req_output in req_outputs]
356-
357-
def __enter__(self):
358-
return self
359-
360-
def __exit__(self, exc_type, exc_value, traceback):
361-
del self.model
362-
cleanup_dist_env_and_memory()
363-
364-
365-
@pytest.fixture(scope="session")
366-
def vllm_runner():
367-
return VllmRunner
368-
369-
370-
@pytest.fixture(params=list(PROMPT_TEMPLATES.keys()))
371-
def prompt_template(request):
372-
return PROMPT_TEMPLATES[request.param]
373-
374-
375-
def _read_prompts(filename: str) -> list[str]:
376-
with open(filename) as f:
377-
prompts = f.readlines()
378-
return prompts
379-
380-
381-
@pytest.fixture
382-
def example_prompts() -> list[str]:
383-
prompts = []
384-
for filename in _TEST_PROMPTS:
385-
prompts += _read_prompts(filename)
386-
return prompts
387-
388-
389-
@pytest.fixture(scope="session")
390-
def ilama_lora_files():
391-
return snapshot_download(repo_id="vllm-ascend/ilama-text2sql-spider")
392-
393277

394278
class HfRunner:
395279

@@ -502,18 +386,9 @@ def __init__(
502386
if skip_tokenizer_init:
503387
self.tokenizer = self.processor.tokenizer
504388

505-
def encode(self, prompts: list[str], *args,
506-
**kwargs) -> list[list[torch.Tensor]]:
507-
return self.model.encode(prompts, *args, **kwargs)
508-
509389
def __enter__(self):
510390
return self
511391

512392
def __exit__(self, exc_type, exc_value, traceback):
513393
del self.model
514394
cleanup_dist_env_and_memory()
515-
516-
517-
@pytest.fixture(scope="session")
518-
def hf_runner():
519-
return HfRunner

0 commit comments

Comments
 (0)