Skip to content

Commit 0df1013

Browse files
fix: Adding ensemble support for vllm container (#68)
Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
1 parent fe14385 commit 0df1013

File tree

5 files changed

+75
-1
lines changed

5 files changed

+75
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ coverage.xml
5050
.hypothesis/
5151
.pytest_cache/
5252
cover/
53+
*.out
5354

5455
# Translations
5556
*.mo

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ export TRITON_CONTAINER_VERSION=<YY.MM>
100100
--upstream-container-version=${TRITON_CONTAINER_VERSION}
101101
--backend=python:r${TRITON_CONTAINER_VERSION}
102102
--backend=vllm:r${TRITON_CONTAINER_VERSION}
103+
--backend=ensemble
103104
```
104105

105106
### Option 3. Add the vLLM Backend to the Default Triton Container
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
name: "ensemble_model"
28+
platform: "ensemble"
29+
max_batch_size: 1
30+
input [
31+
{
32+
name: "text_input"
33+
data_type: TYPE_STRING
34+
dims: [ -1 ]
35+
}
36+
]
37+
output [
38+
{
39+
name: "text_output"
40+
data_type: TYPE_STRING
41+
dims: [ -1 ]
42+
}
43+
]
44+
ensemble_scheduling {
45+
step [
46+
{
47+
model_name: "vllm_opt"
48+
model_version: -1
49+
input_map {
50+
key: "text_input"
51+
value: "text_input"
52+
}
53+
output_map {
54+
key: "text_output"
55+
value: "text_output"
56+
}
57+
}
58+
]
59+
}

ci/L0_backend_vllm/vllm_backend/test.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/m
7070
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/
7171
sed -i 's/"facebook\/opt-125m"/"invalid_model"/' models/vllm_invalid_2/1/model.json
7272

73+
74+
# Sanity check ensembles are enabled and can successfully be loaded
75+
mkdir -p models/ensemble_model/1
76+
cp -r ensemble_config.pbtxt models/ensemble_model/config.pbtxt
77+
7378
RET=0
7479

7580
run_server
@@ -166,4 +171,4 @@ fi
166171

167172
collect_artifacts_from_subdir
168173

169-
exit $RET
174+
exit $RET

ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def setUp(self):
4848
self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
4949
self.vllm_model_name = "vllm_opt"
5050
self.python_model_name = "add_sub"
51+
self.ensemble_model_name = "ensemble_model"
5152
self.vllm_load_test = "vllm_load_test"
5253

5354
def test_vllm_triton_backend(self):
@@ -57,6 +58,13 @@ def test_vllm_triton_backend(self):
5758
self.triton_client.load_model(self.python_model_name)
5859
self.assertTrue(self.triton_client.is_model_ready(self.python_model_name))
5960

61+
# Test to ensure that ensemble models are supported in vllm container.
62+
# If ensemble support not present, triton will error out at model loading stage.
63+
# Ensemble Model is a pipeline consisting of 1 model (vllm_opt)
64+
self.triton_client.load_model(self.ensemble_model_name)
65+
self.assertTrue(self.triton_client.is_model_ready(self.ensemble_model_name))
66+
self.triton_client.unload_model(self.ensemble_model_name)
67+
6068
# Unload vllm model and test add_sub model
6169
self.triton_client.unload_model(self.vllm_load_test)
6270
self.assertFalse(self.triton_client.is_model_ready(self.vllm_load_test))

0 commit comments

Comments
 (0)