Skip to content

Commit f77614e

Browse files
authored
Update CI - Bump vllm to v0.4.2 (#43)
1 parent 861a198 commit f77614e

File tree

2 files changed

+7
-9
lines changed

2 files changed

+7
-9
lines changed

ci/L0_multi_gpu/vllm_backend/test.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ rm -rf models && mkdir -p models
4242
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
4343
sed -i '3s/^/ "tensor_parallel_size": 2,\n/' models/vllm_opt/1/model.json
4444

45-
python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc] nvidia-ml-py3
46-
4745
RET=0
4846

4947
run_server

ci/L0_multi_gpu/vllm_backend/vllm_multi_gpu_test.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import unittest
2929
from functools import partial
3030

31-
import nvidia_smi
31+
import pynvml
3232
import tritonclient.grpc as grpcclient
3333
from tritonclient.utils import *
3434

@@ -38,20 +38,20 @@
3838

3939
class VLLMMultiGPUTest(TestResultCollector):
4040
def setUp(self):
41-
nvidia_smi.nvmlInit()
41+
pynvml.nvmlInit()
4242
self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
4343
self.vllm_model_name = "vllm_opt"
4444

4545
def get_gpu_memory_utilization(self, gpu_id):
46-
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(gpu_id)
47-
info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
46+
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
47+
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
4848
return info.used
4949

5050
def get_available_gpu_ids(self):
51-
device_count = nvidia_smi.nvmlDeviceGetCount()
51+
device_count = pynvml.nvmlDeviceGetCount()
5252
available_gpus = []
5353
for gpu_id in range(device_count):
54-
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(gpu_id)
54+
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
5555
if handle:
5656
available_gpus.append(gpu_id)
5757
return available_gpus
@@ -119,7 +119,7 @@ def _test_vllm_model(self, send_parameters_as_tensor=True):
119119
self.triton_client.stop_stream()
120120

121121
def tearDown(self):
122-
nvidia_smi.nvmlShutdown()
122+
pynvml.nvmlShutdown()
123123
self.triton_client.close()
124124

125125

0 commit comments

Comments
 (0)