Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions python/openai/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -31,8 +31,10 @@
from fastapi.testclient import TestClient
from tests.utils import OpenAIServer, setup_fastapi_app, setup_server


### TEST ENVIRONMENT SETUP ###
LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to clean up some of the if/else everywhere for llmapi vs trtllm stuff, but hopefully we can consolidate the supported backend implementations once its a bit more mature.



def infer_test_environment():
# Infer the test environment for simplicity in local dev/testing.
try:
Expand All @@ -48,7 +50,10 @@ def infer_test_environment():
import tensorrt_llm as _

backend = "tensorrtllm"
model = "tensorrt_llm_bls"
if LLMAPI_SETUP:
model = "tensorrt_llm"
else:
model = "tensorrt_llm_bls"
Copy link
Contributor

@rmccorm4 rmccorm4 Feb 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to set backend = "llmapi" and use the backend fixture to check if llmapi in the tests, following similar patterns for vllm/trtllm in the existing tests?

ex:

if backend != "tensorrtllm":
pytest.skip(
reason="Only used to test TRT-LLM-specific temperature behavior"
)

Not sure what the config.pbtxt backend field will be for LLM API though - so let me know what you're thinking

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The config.pbtxt backend field will be python for LLM API backend.

I thought that the LLM API backend is one of the tensorrtllm backends, so hence didn't have another "llmapi backend" fixture.

I think avoiding having an ENV makes more sense, however, right now the logic to determine which backend to use is to import tensorrtllm or vllm module and see if there's an import error, then set the backend and model name accordingly.

try:
import tensorrt_llm as _
backend = "tensorrtllm"
model = "tensorrt_llm_bls"
return backend, model
except ImportError:
print("No tensorrt_llm installation found.")

Both general tensorrtllm and llmapi backend cases will import the tensorrtllm module successfully. I think some sort of flags would still be required to know if this is a general trtllm model or a llmapi one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The config.pbtxt backend field will be python for LLM API backend.

Do we plan to add a python-based backend like vllm is?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean making it backend: llmapi in the config.pbtxt? I don't think there's a plan to do so right now. I could see it be less confusing having this after the pivot is more finalized, right now it's more like adding another python model just like the original one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right because they're not using the runtime config.pbtxt feature currently, they just swapped the backend to python instead, gotcha - makes sense.

Will the llmapi model.py replace the existing model.py in trtllm backend? Hopefully we're not planning to support 3 implementations for now 😅

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree - confirming w/ TRT-LLM team on this.

return backend, model
except ImportError:
print("No tensorrt_llm installation found.")
Expand All @@ -57,7 +62,10 @@ def infer_test_environment():


def infer_test_model_repository(backend):
model_repository = str(Path(__file__).parent / f"{backend}_models")
if LLMAPI_SETUP:
model_repository = str(Path(__file__).parent / f"{backend}_llmapi_models")
else:
model_repository = str(Path(__file__).parent / f"{backend}_models")
return model_repository


Expand Down
8 changes: 7 additions & 1 deletion python/openai/tests/test_chat_completions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand All @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import copy
import os
import subprocess
from pathlib import Path
from typing import List
Expand Down Expand Up @@ -368,6 +369,11 @@ def test_chat_completions_temperature_tensorrtllm(
assert response1_text == response2_text
assert response1_text != response3_text

# TODO: Remove xfail for LLM API when it supports seed
@pytest.mark.xfail(
condition=os.getenv("LLMAPI_SETUP") == "1",
reason="Didn't see any difference in responses with different seeds when using LLM API. Skipping for now.",
)
# Simple tests to verify random seed roughly behaves as expected
def test_chat_completions_seed(self, client, model: str, messages: List[dict]):
responses = []
Expand Down
10 changes: 8 additions & 2 deletions python/openai/tests/test_completions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand All @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import copy
import os

import pytest

Expand Down Expand Up @@ -238,6 +239,11 @@ def test_completions_temperature_tensorrtllm(
assert response1_text == response2_text
assert response1_text != response3_text

# TODO: Remove xfail for LLM API when it supports seed
@pytest.mark.xfail(
condition=os.getenv("LLMAPI_SETUP") == "1",
reason="Didn't see any difference in responses with different seeds when using LLM API. Skipping for now.",
)
# Simple tests to verify seed roughly behaves as expected
def test_completions_seed(self, client, model: str, prompt: str):
responses = []
Expand All @@ -258,7 +264,7 @@ def test_completions_seed(self, client, model: str, prompt: str):
json=payload1,
)
)
# Third response should differ with different temperature in payload
# Third response should differ with different seed in payload
responses.append(
client.post(
"/v1/completions",
Expand Down
28 changes: 21 additions & 7 deletions python/openai/tests/test_openai_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -39,9 +39,16 @@ def test_openai_client_models(self, client: openai.OpenAI, backend: str):
models = list(client.models.list())
print(f"Models: {models}")
if backend == "tensorrtllm":
# tensorrt_llm_bls +
# preprocess -> tensorrt_llm -> postprocess
assert len(models) == 4
import os

LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0)
if LLMAPI_SETUP:
# LLM API setup only has the tensorrt_llm model
assert len(models) == 1
else:
# tensorrt_llm_bls +
# preprocess -> tensorrt_llm -> postprocess
assert len(models) == 4
elif backend == "vllm":
assert len(models) == 1
else:
Expand Down Expand Up @@ -105,9 +112,16 @@ async def test_openai_client_models(self, client: openai.AsyncOpenAI, backend: s
models = [model async for model in async_models]
print(f"Models: {models}")
if backend == "tensorrtllm":
# tensorrt_llm_bls +
# preprocess -> tensorrt_llm -> postprocess
assert len(models) == 4
import os

LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0)
if LLMAPI_SETUP:
# LLM API setup only has the tensorrt_llm model
assert len(models) == 1
else:
# tensorrt_llm_bls +
# preprocess -> tensorrt_llm -> postprocess
assert len(models) == 4
elif backend == "vllm":
assert len(models) == 1
else:
Expand Down
44 changes: 39 additions & 5 deletions qa/L0_openai/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ function prepare_tensorrtllm() {
python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/postprocessing/config.pbtxt tokenizer_dir:${HF_LLAMA_MODEL},triton_max_batch_size:64,postprocessing_instance_count:1
python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm_bls/config.pbtxt triton_max_batch_size:64,decoupled_mode:True,bls_instance_count:1,accumulate_tokens:False,logits_datatype:TYPE_FP32
python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,triton_max_batch_size:64,decoupled_mode:True,max_beam_width:1,engine_dir:${ENGINE_PATH},batching_strategy:inflight_fused_batching,max_queue_size:0,max_queue_delay_microseconds:1000,encoder_input_features_data_type:TYPE_FP16,logits_datatype:TYPE_FP32,exclude_input_in_output:True

# Prepare LLM API setup
LLMAPI_MODEL_REPO="tests/tensorrtllm_llmapi_models"
mkdir -p ${LLMAPI_MODEL_REPO}
cp /app/all_models/llmapi/* "${LLMAPI_MODEL_REPO}" -r
sed -i 's#"model":"TinyLlama/TinyLlama-1.1B-Chat-v1.0"#"model":"meta-llama/Meta-Llama-3.1-8B-Instruct"#g' ${LLMAPI_MODEL_REPO}/tensorrt_llm/1/model.json
}

function pre_test() {
Expand All @@ -103,11 +109,39 @@ function run_test() {

# Capture error code without exiting to allow log collection
set +e
pytest -s -v --junitxml=test_openai.xml tests/ 2>&1 > ${TEST_LOG}
if [ $? -ne 0 ]; then
cat ${TEST_LOG}
echo -e "\n***\n*** Test Failed\n***"
RET=1

if [ "${IMAGE_KIND}" == "TRTLLM" ]; then
echo "Running TensorRT-LLM tests..."

# First run with default model setup
echo "Running tests with default model setup..."
pytest -s -v --junitxml=test_openai_default.xml tests/ 2>&1 > test_openai_default.log
DEFAULT_RESULT=$?

# Then run with LLM API setup
echo "Running tests with LLM API setup..."
LLMAPI_SETUP=1 pytest -s -v --junitxml=test_openai_llmapi.xml tests/ 2>&1 > test_openai_llmapi.log
LLMAPI_RESULT=$?

# Combine results
if [ $DEFAULT_RESULT -ne 0 ]; then
cat test_openai_default.log
echo -e "\n***\n*** Test Failed with default model setup\n***"
RET=1
fi
if [ $LLMAPI_RESULT -ne 0 ]; then
cat test_openai_llmapi.log
echo -e "\n***\n*** Test Failed with LLM API setup\n***"
RET=1
fi
else
echo "Running vLLM tests..."
pytest -s -v --junitxml=test_openai.xml tests/ 2>&1 > ${TEST_LOG}
if [ $? -ne 0 ]; then
cat ${TEST_LOG}
echo -e "\n***\n*** Test Failed\n***"
RET=1
fi
fi
set -e

Expand Down
Loading