From fe8081f679bc7b3a7acec500f06e5f024114dcae Mon Sep 17 00:00:00 2001 From: krishung5 Date: Fri, 21 Feb 2025 03:03:53 -0800 Subject: [PATCH 1/6] Add openai testing for LLM API --- python/openai/tests/conftest.py | 16 ++++++-- python/openai/tests/test_openai_client.py | 28 +++++++++---- qa/L0_openai/test.sh | 49 ++++++++++++++++++++--- 3 files changed, 76 insertions(+), 17 deletions(-) diff --git a/python/openai/tests/conftest.py b/python/openai/tests/conftest.py index 9ea9a5634e..c567a82d65 100644 --- a/python/openai/tests/conftest.py +++ b/python/openai/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -31,8 +31,10 @@ from fastapi.testclient import TestClient from tests.utils import OpenAIServer, setup_fastapi_app, setup_server - ### TEST ENVIRONMENT SETUP ### +LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0) + + def infer_test_environment(): # Infer the test environment for simplicity in local dev/testing. try: @@ -48,7 +50,10 @@ def infer_test_environment(): import tensorrt_llm as _ backend = "tensorrtllm" - model = "tensorrt_llm_bls" + if LLMAPI_SETUP: + model = "tensorrt_llm" + else: + model = "tensorrt_llm_bls" return backend, model except ImportError: print("No tensorrt_llm installation found.") @@ -57,7 +62,10 @@ def infer_test_environment(): def infer_test_model_repository(backend): - model_repository = str(Path(__file__).parent / f"{backend}_models") + if LLMAPI_SETUP: + model_repository = str(Path(__file__).parent / f"{backend}_llmapi_models") + else: + model_repository = str(Path(__file__).parent / f"{backend}_models") return model_repository diff --git a/python/openai/tests/test_openai_client.py b/python/openai/tests/test_openai_client.py index 6f1b456ab4..70011d9c1b 100644 --- a/python/openai/tests/test_openai_client.py +++ b/python/openai/tests/test_openai_client.py @@ -1,4 +1,4 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -39,9 +39,16 @@ def test_openai_client_models(self, client: openai.OpenAI, backend: str): models = list(client.models.list()) print(f"Models: {models}") if backend == "tensorrtllm": - # tensorrt_llm_bls + - # preprocess -> tensorrt_llm -> postprocess - assert len(models) == 4 + import os + + LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0) + if LLMAPI_SETUP: + # LLM API setup only has the tensorrt_llm model + assert len(models) == 1 + else: + # tensorrt_llm_bls + + # preprocess -> tensorrt_llm -> postprocess + assert len(models) == 4 elif backend == "vllm": assert len(models) == 1 else: @@ -105,9 +112,16 @@ async def test_openai_client_models(self, client: openai.AsyncOpenAI, backend: s models = [model async for model in async_models] print(f"Models: {models}") if backend == "tensorrtllm": - # tensorrt_llm_bls + - # preprocess -> tensorrt_llm -> postprocess - assert len(models) == 4 + import os + + LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0) + if LLMAPI_SETUP: + # LLM API setup only has the tensorrt_llm model + assert len(models) == 1 + else: + # tensorrt_llm_bls + + # preprocess -> tensorrt_llm -> postprocess + assert len(models) == 4 elif backend == "vllm": assert len(models) == 1 else: diff --git a/qa/L0_openai/test.sh b/qa/L0_openai/test.sh index 0921bce98e..e56bca749c 100755 --- a/qa/L0_openai/test.sh +++ b/qa/L0_openai/test.sh @@ -85,6 +85,14 @@ function prepare_tensorrtllm() { python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/postprocessing/config.pbtxt tokenizer_dir:${HF_LLAMA_MODEL},triton_max_batch_size:64,postprocessing_instance_count:1 python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm_bls/config.pbtxt triton_max_batch_size:64,decoupled_mode:True,bls_instance_count:1,accumulate_tokens:False,logits_datatype:TYPE_FP32 python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,triton_max_batch_size:64,decoupled_mode:True,max_beam_width:1,engine_dir:${ENGINE_PATH},batching_strategy:inflight_fused_batching,max_queue_size:0,max_queue_delay_microseconds:1000,encoder_input_features_data_type:TYPE_FP16,logits_datatype:TYPE_FP32,exclude_input_in_output:True + + # Prepare LLM API setup + LLMAPI_MODEL_REPO="tests/tensorrtllm_llmapi_models" + mkdir -p ${LLMAPI_MODEL_REPO} + cp /app/all_models/llmapi/* "${LLMAPI_MODEL_REPO}" -r + + # Modify the json file model.json, from "model":"TinyLlama/TinyLlama-1.1B-Chat-v1.0", to "model": "meta-llama/Meta-Llama-3.1-8B-Instruct" + sed -i 's#"model":"TinyLlama/TinyLlama-1.1B-Chat-v1.0"#"model":"meta-llama/Meta-Llama-3.1-8B-Instruct"#g' ${LLMAPI_MODEL_REPO}/tensorrt_llm/1/model.json } function pre_test() { @@ -103,16 +111,45 @@ function run_test() { # Capture error code without exiting to allow log collection set +e - pytest -s -v --junitxml=test_openai.xml tests/ 2>&1 > ${TEST_LOG} - if [ $? -ne 0 ]; then - cat ${TEST_LOG} - echo -e "\n***\n*** Test Failed\n***" - RET=1 + + if [ "${IMAGE_KIND}" == "TRTLLM" ]; then + echo "Running TensorRT-LLM tests..." + + # First run with default model setup + echo "Running tests with default model setup..." + pytest -s -v --junitxml=test_openai_default.xml tests/ 2>&1 > test_openai_default.log + DEFAULT_RESULT=$? + + # Then run with LLM API setup + echo "Running tests with LLM API setup..." + LLMAPI_SETUP=1 pytest -s -v --junitxml=test_openai_llmapi.xml tests/ 2>&1 > test_openai_llmapi.log + LLMAPI_RESULT=$? + + # Combine results + if [ $DEFAULT_RESULT -ne 0 ]; then + cat test_openai_default.log + echo -e "\n***\n*** Test Failed with default model setup\n***" + RET=1 + fi + if [ $LLMAPI_RESULT -ne 0 ]; then + cat test_openai_llmapi.log + echo -e "\n***\n*** Test Failed with LLM API setup\n***" + RET=1 + fi + else + echo "Running vLLM tests..." + pytest -s -v --junitxml=test_openai.xml tests/ 2>&1 > ${TEST_LOG} + if [ $? -ne 0 ]; then + cat ${TEST_LOG} + echo -e "\n***\n*** Test Failed\n***" + RET=1 + fi fi - set -e # Collect logs for error analysis when needed cp *.xml *.log ../../../ + + set -e popd } From be339c5754c84705bec760b082f738127a98ea2d Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 27 Feb 2025 01:03:04 -0800 Subject: [PATCH 2/6] Skip seed tests for LLM API --- python/openai/tests/test_chat_completions.py | 8 +++++++- python/openai/tests/test_completions.py | 10 ++++++++-- qa/L0_openai/test.sh | 5 +---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/python/openai/tests/test_chat_completions.py b/python/openai/tests/test_chat_completions.py index 401601c526..8a83a1d025 100644 --- a/python/openai/tests/test_chat_completions.py +++ b/python/openai/tests/test_chat_completions.py @@ -1,4 +1,4 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -25,6 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy +import os import subprocess from pathlib import Path from typing import List @@ -368,6 +369,11 @@ def test_chat_completions_temperature_tensorrtllm( assert response1_text == response2_text assert response1_text != response3_text + # TODO: Remove xfail for LLM API when it supports seed + @pytest.mark.xfail( + condition=os.getenv("LLMAPI_SETUP") == "1", + reason="Didn't see any difference in responses with different seeds when using LLM API. Skipping for now.", + ) # Simple tests to verify random seed roughly behaves as expected def test_chat_completions_seed(self, client, model: str, messages: List[dict]): responses = [] diff --git a/python/openai/tests/test_completions.py b/python/openai/tests/test_completions.py index d89ff4701e..327042ff6b 100644 --- a/python/openai/tests/test_completions.py +++ b/python/openai/tests/test_completions.py @@ -1,4 +1,4 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -25,6 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy +import os import pytest @@ -238,6 +239,11 @@ def test_completions_temperature_tensorrtllm( assert response1_text == response2_text assert response1_text != response3_text + # TODO: Remove xfail for LLM API when it supports seed + @pytest.mark.xfail( + condition=os.getenv("LLMAPI_SETUP") == "1", + reason="Didn't see any difference in responses with different seeds when using LLM API. Skipping for now.", + ) # Simple tests to verify seed roughly behaves as expected def test_completions_seed(self, client, model: str, prompt: str): responses = [] @@ -258,7 +264,7 @@ def test_completions_seed(self, client, model: str, prompt: str): json=payload1, ) ) - # Third response should differ with different temperature in payload + # Third response should differ with different seed in payload responses.append( client.post( "/v1/completions", diff --git a/qa/L0_openai/test.sh b/qa/L0_openai/test.sh index e56bca749c..bbe204408d 100755 --- a/qa/L0_openai/test.sh +++ b/qa/L0_openai/test.sh @@ -90,8 +90,6 @@ function prepare_tensorrtllm() { LLMAPI_MODEL_REPO="tests/tensorrtllm_llmapi_models" mkdir -p ${LLMAPI_MODEL_REPO} cp /app/all_models/llmapi/* "${LLMAPI_MODEL_REPO}" -r - - # Modify the json file model.json, from "model":"TinyLlama/TinyLlama-1.1B-Chat-v1.0", to "model": "meta-llama/Meta-Llama-3.1-8B-Instruct" sed -i 's#"model":"TinyLlama/TinyLlama-1.1B-Chat-v1.0"#"model":"meta-llama/Meta-Llama-3.1-8B-Instruct"#g' ${LLMAPI_MODEL_REPO}/tensorrt_llm/1/model.json } @@ -145,11 +143,10 @@ function run_test() { RET=1 fi fi + set -e # Collect logs for error analysis when needed cp *.xml *.log ../../../ - - set -e popd } From 2f5f06e84a5486eaa3bbc88a5ae406b72479dbef Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 27 Feb 2025 11:00:28 -0800 Subject: [PATCH 3/6] Rewording --- python/openai/tests/test_chat_completions.py | 4 ++-- python/openai/tests/test_completions.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/openai/tests/test_chat_completions.py b/python/openai/tests/test_chat_completions.py index 8a83a1d025..09f8ed621e 100644 --- a/python/openai/tests/test_chat_completions.py +++ b/python/openai/tests/test_chat_completions.py @@ -369,10 +369,10 @@ def test_chat_completions_temperature_tensorrtllm( assert response1_text == response2_text assert response1_text != response3_text - # TODO: Remove xfail for LLM API when it supports seed + # TODO: Remove xfail for LLM API when it's verified. @pytest.mark.xfail( condition=os.getenv("LLMAPI_SETUP") == "1", - reason="Didn't see any difference in responses with different seeds when using LLM API. Skipping for now.", + reason="Seed parameter support to be verified for LLM API", ) # Simple tests to verify random seed roughly behaves as expected def test_chat_completions_seed(self, client, model: str, messages: List[dict]): diff --git a/python/openai/tests/test_completions.py b/python/openai/tests/test_completions.py index 327042ff6b..a767e723b6 100644 --- a/python/openai/tests/test_completions.py +++ b/python/openai/tests/test_completions.py @@ -239,10 +239,10 @@ def test_completions_temperature_tensorrtllm( assert response1_text == response2_text assert response1_text != response3_text - # TODO: Remove xfail for LLM API when it supports seed + # TODO: Remove xfail for LLM API when it's verified. @pytest.mark.xfail( condition=os.getenv("LLMAPI_SETUP") == "1", - reason="Didn't see any difference in responses with different seeds when using LLM API. Skipping for now.", + reason="Seed parameter support to be verified for LLM API", ) # Simple tests to verify seed roughly behaves as expected def test_completions_seed(self, client, model: str, prompt: str): From 70268030d6c02c92285421769848c3ed44477dcf Mon Sep 17 00:00:00 2001 From: krishung5 Date: Fri, 28 Feb 2025 15:15:07 -0800 Subject: [PATCH 4/6] Use backend fixture for llmapi --- python/openai/tests/conftest.py | 26 +++++++++------ python/openai/tests/test_chat_completions.py | 4 +-- python/openai/tests/test_completions.py | 6 ++-- python/openai/tests/test_openai_client.py | 34 ++++++++------------ python/openai/tests/utils.py | 13 ++++++-- qa/L0_openai/test.sh | 2 +- 6 files changed, 47 insertions(+), 38 deletions(-) diff --git a/python/openai/tests/conftest.py b/python/openai/tests/conftest.py index c567a82d65..8460889de2 100644 --- a/python/openai/tests/conftest.py +++ b/python/openai/tests/conftest.py @@ -31,9 +31,6 @@ from fastapi.testclient import TestClient from tests.utils import OpenAIServer, setup_fastapi_app, setup_server -### TEST ENVIRONMENT SETUP ### -LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0) - def infer_test_environment(): # Infer the test environment for simplicity in local dev/testing. @@ -49,10 +46,14 @@ def infer_test_environment(): try: import tensorrt_llm as _ - backend = "tensorrtllm" + # TODO: Refactor away from environment variables + LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0) + if LLMAPI_SETUP: + backend = "llmapi" model = "tensorrt_llm" else: + backend = "tensorrtllm" model = "tensorrt_llm_bls" return backend, model except ImportError: @@ -62,10 +63,7 @@ def infer_test_environment(): def infer_test_model_repository(backend): - if LLMAPI_SETUP: - model_repository = str(Path(__file__).parent / f"{backend}_llmapi_models") - else: - model_repository = str(Path(__file__).parent / f"{backend}_models") + model_repository = str(Path(__file__).parent / f"{backend}_models") return model_repository @@ -92,13 +90,23 @@ def infer_test_model_repository(backend): # only once for all the tests below. @pytest.fixture(scope="module") def server(): + # TODO: tensorrllm and llmapi backends both use "tensorrtllm" as the backend flag for OpenAI server. + # In the future if the backend are consolidated, this check can be updated or removed. + # key: the TEST_BACKEND value + # value: the corresponding backend flag for OpenAI server + backend_map = { + "tensorrtllm": "tensorrtllm", + "llmapi": "tensorrtllm", + "vllm": "vllm", + } + args = [ "--model-repository", TEST_MODEL_REPOSITORY, "--tokenizer", TEST_TOKENIZER, "--backend", - TEST_BACKEND, + backend_map[TEST_BACKEND], ] # TODO: Incorporate kserve frontend binding smoke tests to catch any # breakage with default values or slight cli arg variations diff --git a/python/openai/tests/test_chat_completions.py b/python/openai/tests/test_chat_completions.py index 09f8ed621e..6c0b39c1fb 100644 --- a/python/openai/tests/test_chat_completions.py +++ b/python/openai/tests/test_chat_completions.py @@ -311,7 +311,7 @@ def test_chat_completions_temperature_vllm( def test_chat_completions_temperature_tensorrtllm( self, client, backend: str, model: str, messages: List[dict] ): - if backend != "tensorrtllm": + if backend != "tensorrtllm" and backend != "llmapi": pytest.skip( reason="Only used to test TRT-LLM-specific temperature behavior" ) @@ -371,7 +371,7 @@ def test_chat_completions_temperature_tensorrtllm( # TODO: Remove xfail for LLM API when it's verified. @pytest.mark.xfail( - condition=os.getenv("LLMAPI_SETUP") == "1", + condition=lambda backend: backend == "llmapi", reason="Seed parameter support to be verified for LLM API", ) # Simple tests to verify random seed roughly behaves as expected diff --git a/python/openai/tests/test_completions.py b/python/openai/tests/test_completions.py index a767e723b6..5e76b9368b 100644 --- a/python/openai/tests/test_completions.py +++ b/python/openai/tests/test_completions.py @@ -192,8 +192,8 @@ def test_completions_temperature_vllm( def test_completions_temperature_tensorrtllm( self, client, backend: str, model: str, prompt: str ): - if backend != "tensorrtllm": - pytest.skip(reason="Only used to test vLLM-specific temperature behavior") + if backend != "tensorrtllm" and backend != "llmapi": + pytest.skip(reason="Only used to test TRTLLM-specific temperature behavior") responses = [] payload1 = { @@ -241,7 +241,7 @@ def test_completions_temperature_tensorrtllm( # TODO: Remove xfail for LLM API when it's verified. @pytest.mark.xfail( - condition=os.getenv("LLMAPI_SETUP") == "1", + condition=lambda backend: backend == "llmapi", reason="Seed parameter support to be verified for LLM API", ) # Simple tests to verify seed roughly behaves as expected diff --git a/python/openai/tests/test_openai_client.py b/python/openai/tests/test_openai_client.py index 70011d9c1b..6c083d91ee 100644 --- a/python/openai/tests/test_openai_client.py +++ b/python/openai/tests/test_openai_client.py @@ -39,16 +39,12 @@ def test_openai_client_models(self, client: openai.OpenAI, backend: str): models = list(client.models.list()) print(f"Models: {models}") if backend == "tensorrtllm": - import os - - LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0) - if LLMAPI_SETUP: - # LLM API setup only has the tensorrt_llm model - assert len(models) == 1 - else: - # tensorrt_llm_bls + - # preprocess -> tensorrt_llm -> postprocess - assert len(models) == 4 + # tensorrt_llm_bls + + # preprocess -> tensorrt_llm -> postprocess + assert len(models) == 4 + elif backend == "llmapi": + # Only has one tensorrt_llm model. + assert len(models) == 1 elif backend == "vllm": assert len(models) == 1 else: @@ -82,7 +78,7 @@ def test_openai_client_chat_completion( def test_openai_client_completion_echo( self, client: openai.OpenAI, echo: bool, backend: str, model: str, prompt: str ): - if backend == "tensorrtllm": + if backend == "tensorrtllm" or backend == "llmapi": pytest.skip( reason="TRT-LLM backend currently only supports setting this parameter at model load time", ) @@ -112,16 +108,12 @@ async def test_openai_client_models(self, client: openai.AsyncOpenAI, backend: s models = [model async for model in async_models] print(f"Models: {models}") if backend == "tensorrtllm": - import os - - LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0) - if LLMAPI_SETUP: - # LLM API setup only has the tensorrt_llm model - assert len(models) == 1 - else: - # tensorrt_llm_bls + - # preprocess -> tensorrt_llm -> postprocess - assert len(models) == 4 + # tensorrt_llm_bls + + # preprocess -> tensorrt_llm -> postprocess + assert len(models) == 4 + elif backend == "llmapi": + # Only has one tensorrt_llm model. + assert len(models) == 1 elif backend == "vllm": assert len(models) == 1 else: diff --git a/python/openai/tests/utils.py b/python/openai/tests/utils.py index fdffcc5ea9..a2b655d86d 100644 --- a/python/openai/tests/utils.py +++ b/python/openai/tests/utils.py @@ -1,4 +1,4 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -53,8 +53,17 @@ def setup_server(model_repository: str): def setup_fastapi_app(tokenizer: str, server: tritonserver.Server, backend: str): + # TODO: tensorrllm and llmapi backends both use "tensorrtllm" as the backend flag for OpenAI server. + # In the future if the backends are consolidated, this check can be updated or removed. + # key: the backend value + # value: the corresponding backend flag for OpenAI server + backend_map = { + "tensorrtllm": "tensorrtllm", + "llmapi": "tensorrtllm", + "vllm": "vllm", + } engine: TritonLLMEngine = TritonLLMEngine( - server=server, tokenizer=tokenizer, backend=backend + server=server, tokenizer=tokenizer, backend=backend_map[backend] ) frontend: FastApiFrontend = FastApiFrontend(engine=engine) return frontend.app diff --git a/qa/L0_openai/test.sh b/qa/L0_openai/test.sh index bbe204408d..7a86a6db97 100755 --- a/qa/L0_openai/test.sh +++ b/qa/L0_openai/test.sh @@ -87,7 +87,7 @@ function prepare_tensorrtllm() { python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,triton_max_batch_size:64,decoupled_mode:True,max_beam_width:1,engine_dir:${ENGINE_PATH},batching_strategy:inflight_fused_batching,max_queue_size:0,max_queue_delay_microseconds:1000,encoder_input_features_data_type:TYPE_FP16,logits_datatype:TYPE_FP32,exclude_input_in_output:True # Prepare LLM API setup - LLMAPI_MODEL_REPO="tests/tensorrtllm_llmapi_models" + LLMAPI_MODEL_REPO="tests/llmapi_models" mkdir -p ${LLMAPI_MODEL_REPO} cp /app/all_models/llmapi/* "${LLMAPI_MODEL_REPO}" -r sed -i 's#"model":"TinyLlama/TinyLlama-1.1B-Chat-v1.0"#"model":"meta-llama/Meta-Llama-3.1-8B-Instruct"#g' ${LLMAPI_MODEL_REPO}/tensorrt_llm/1/model.json From d2daa15091f88c87053304e49ba5165cde39dead Mon Sep 17 00:00:00 2001 From: krishung5 Date: Fri, 28 Feb 2025 15:22:50 -0800 Subject: [PATCH 5/6] Remove unused import --- python/openai/tests/test_chat_completions.py | 1 - python/openai/tests/test_completions.py | 1 - 2 files changed, 2 deletions(-) diff --git a/python/openai/tests/test_chat_completions.py b/python/openai/tests/test_chat_completions.py index 6c0b39c1fb..64fd2873d6 100644 --- a/python/openai/tests/test_chat_completions.py +++ b/python/openai/tests/test_chat_completions.py @@ -25,7 +25,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy -import os import subprocess from pathlib import Path from typing import List diff --git a/python/openai/tests/test_completions.py b/python/openai/tests/test_completions.py index 5e76b9368b..1a58a3294f 100644 --- a/python/openai/tests/test_completions.py +++ b/python/openai/tests/test_completions.py @@ -25,7 +25,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy -import os import pytest From d631a648f039112074272ae36403893e816598b7 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 18 Mar 2025 02:00:41 -0700 Subject: [PATCH 6/6] VUpdate comment as verified that seed parameter is not supported yet --- python/openai/tests/test_chat_completions.py | 2 +- python/openai/tests/test_completions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/openai/tests/test_chat_completions.py b/python/openai/tests/test_chat_completions.py index 64fd2873d6..edcfd22fbe 100644 --- a/python/openai/tests/test_chat_completions.py +++ b/python/openai/tests/test_chat_completions.py @@ -371,7 +371,7 @@ def test_chat_completions_temperature_tensorrtllm( # TODO: Remove xfail for LLM API when it's verified. @pytest.mark.xfail( condition=lambda backend: backend == "llmapi", - reason="Seed parameter support to be verified for LLM API", + reason="Seed parameter is not supported in LLM API PyTorch workflow yet", ) # Simple tests to verify random seed roughly behaves as expected def test_chat_completions_seed(self, client, model: str, messages: List[dict]): diff --git a/python/openai/tests/test_completions.py b/python/openai/tests/test_completions.py index 1a58a3294f..0b9fe0efa3 100644 --- a/python/openai/tests/test_completions.py +++ b/python/openai/tests/test_completions.py @@ -241,7 +241,7 @@ def test_completions_temperature_tensorrtllm( # TODO: Remove xfail for LLM API when it's verified. @pytest.mark.xfail( condition=lambda backend: backend == "llmapi", - reason="Seed parameter support to be verified for LLM API", + reason="Seed parameter is not supported in LLM API PyTorch workflow yet", ) # Simple tests to verify seed roughly behaves as expected def test_completions_seed(self, client, model: str, prompt: str):