Skip to content

Commit 5d32dff

Browse files
committed
Move common functions to common/util.sh
1 parent d638f04 commit 5d32dff

File tree

3 files changed

+79
-148
lines changed

3 files changed

+79
-148
lines changed

qa/L0_orca/test.sh

Lines changed: 0 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -48,47 +48,6 @@ CLIENT_PY=${BASE_DIR}/orca_http_test.py
4848
CLIENT_LOG="${NAME}_orca_http_test.log"
4949
source ../common/util.sh
5050

51-
function replace_config_tags {
52-
tag_to_replace="${1}"
53-
new_value="${2}"
54-
config_file_path="${3}"
55-
sed -i "s|${tag_to_replace}|${new_value}|g" ${config_file_path}
56-
}
57-
58-
function prepare_model_repository {
59-
rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
60-
cp -r ${TENSORRTLLM_BACKEND_DIR}/all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
61-
rm -rf ${MODEL_REPOSITORY}/tensorrt_llm_bls
62-
mv "${MODEL_REPOSITORY}/ensemble" "${MODEL_REPOSITORY}/${MODEL_NAME}"
63-
64-
replace_config_tags "model_version: -1" "model_version: 1" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
65-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
66-
replace_config_tags 'name: "ensemble"' "name: \"$MODEL_NAME\"" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
67-
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
68-
69-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
70-
replace_config_tags '${preprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
71-
replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
72-
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
73-
replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
74-
replace_config_tags '${max_queue_size}' "0" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
75-
76-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
77-
replace_config_tags '${postprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
78-
replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
79-
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
80-
81-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
82-
replace_config_tags '${decoupled_mode}' 'true' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
83-
replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
84-
replace_config_tags '${batching_strategy}' 'inflight_fused_batching' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
85-
replace_config_tags '${engine_dir}' "${ENGINES_DIR}" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
86-
replace_config_tags '${triton_backend}' "tensorrtllm" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
87-
replace_config_tags '${max_queue_size}' "0" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
88-
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
89-
replace_config_tags '${encoder_input_features_data_type}' "TYPE_FP32" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
90-
}
91-
9251
# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
9352
# success, 1 on failure
9453
function wait_for_server_ready() {
@@ -145,42 +104,6 @@ function kill_server {
145104
done
146105
}
147106

148-
function clone_tensorrt_llm_backend_repo {
149-
rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
150-
apt-get update && apt-get install git-lfs -y --no-install-recommends
151-
git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} ${TRITON_REPO_ORG}/tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
152-
cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
153-
}
154-
155-
function build_gpt2_base_model {
156-
# Download weights from HuggingFace Transformers
157-
cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
158-
rm pytorch_model.bin model.safetensors
159-
if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
160-
echo "Downloading pytorch_model.bin failed."
161-
exit 1
162-
fi
163-
cd ${GPT_DIR}
164-
165-
# Convert weights from HF Tranformers to FT format
166-
python3 convert_checkpoint.py --model_dir gpt2 --dtype float16 --tp_size ${NUM_GPUS} --output_dir "./c-model/gpt2/${NUM_GPUS}-gpu/"
167-
cd ${BASE_DIR}
168-
}
169-
170-
function build_gpt2_tensorrt_engine {
171-
# Build TensorRT engines
172-
cd ${GPT_DIR}
173-
trtllm-build --checkpoint_dir "./c-model/gpt2/${NUM_GPUS}-gpu/" \
174-
--gpt_attention_plugin float16 \
175-
--remove_input_padding enable \
176-
--paged_kv_cache enable \
177-
--gemm_plugin float16 \
178-
--workers "${NUM_GPUS}" \
179-
--output_dir "${ENGINES_DIR}"
180-
181-
cd ${BASE_DIR}
182-
}
183-
184107
clone_tensorrt_llm_backend_repo
185108
build_gpt2_base_model
186109
build_gpt2_tensorrt_engine

qa/L0_perf_tensorrt_llm/test.sh

Lines changed: 1 addition & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,7 @@ SERVER=${TRITON_DIR}/bin/tritonserver
4343
BACKEND_DIR=${TRITON_DIR}/backends
4444
SERVER_LOG="${NAME}_server.log"
4545
SERVER_TIMEOUT=${SERVER_TIMEOUT:=120}
46-
47-
function clone_tensorrt_llm_backend_repo {
48-
rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
49-
apt-get update && apt-get install git-lfs -y --no-install-recommends
50-
git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} ${TRITON_REPO_ORG}/tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
51-
cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
52-
}
46+
source ../common/util.sh
5347

5448
# Update Open MPI to a version compatible with SLURM.
5549
function upgrade_openmpi {
@@ -95,69 +89,6 @@ function upgrade_openmpi {
9589
mpirun --version
9690
}
9791

98-
function build_gpt2_base_model {
99-
# Download weights from HuggingFace Transformers
100-
cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
101-
rm pytorch_model.bin model.safetensors
102-
if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
103-
echo "Downloading pytorch_model.bin failed."
104-
exit 1
105-
fi
106-
cd ${GPT_DIR}
107-
108-
# Convert weights from HF Tranformers to FT format
109-
python3 convert_checkpoint.py --model_dir gpt2 --dtype float16 --tp_size ${NUM_GPUS} --output_dir "./c-model/gpt2/${NUM_GPUS}-gpu/"
110-
cd ${BASE_DIR}
111-
}
112-
113-
function build_gpt2_tensorrt_engine {
114-
# Build TensorRT engines
115-
cd ${GPT_DIR}
116-
trtllm-build --checkpoint_dir "./c-model/gpt2/${NUM_GPUS}-gpu/" \
117-
--gpt_attention_plugin float16 \
118-
--remove_input_padding enable \
119-
--paged_kv_cache enable \
120-
--gemm_plugin float16 \
121-
--workers "${NUM_GPUS}" \
122-
--output_dir "${ENGINES_DIR}"
123-
124-
cd ${BASE_DIR}
125-
}
126-
127-
function replace_config_tags {
128-
tag_to_replace="${1}"
129-
new_value="${2}"
130-
config_file_path="${3}"
131-
sed -i "s|${tag_to_replace}|${new_value}|g" ${config_file_path}
132-
}
133-
134-
function prepare_model_repository {
135-
rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
136-
cp -r ${TENSORRTLLM_BACKEND_DIR}/all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
137-
rm -rf ${MODEL_REPOSITORY}/tensorrt_llm_bls
138-
mv "${MODEL_REPOSITORY}/ensemble" "${MODEL_REPOSITORY}/${MODEL_NAME}"
139-
140-
replace_config_tags "model_version: -1" "model_version: 1" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
141-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
142-
replace_config_tags 'name: "ensemble"' "name: \"$MODEL_NAME\"" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
143-
144-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
145-
replace_config_tags '${preprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
146-
replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
147-
148-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
149-
replace_config_tags '${postprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
150-
replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
151-
152-
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
153-
replace_config_tags '${decoupled_mode}' 'true' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
154-
replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
155-
replace_config_tags '${batching_strategy}' 'inflight_fused_batching' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
156-
replace_config_tags '${engine_dir}' "${ENGINES_DIR}" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
157-
replace_config_tags '${triton_backend}' "tensorrtllm" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
158-
replace_config_tags '${max_queue_size}' "0" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
159-
}
160-
16192
# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
16293
# success, 1 on failure
16394
function wait_for_server_ready() {

qa/common/util.sh

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -534,3 +534,80 @@ function deactivate_virtualenv() {
534534
rm -fr venv
535535
fi
536536
}
537+
538+
function replace_config_tags {
539+
tag_to_replace="${1}"
540+
new_value="${2}"
541+
config_file_path="${3}"
542+
sed -i "s|${tag_to_replace}|${new_value}|g" ${config_file_path}
543+
}
544+
545+
function prepare_model_repository {
546+
rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
547+
cp -r ${TENSORRTLLM_BACKEND_DIR}/all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
548+
rm -rf ${MODEL_REPOSITORY}/tensorrt_llm_bls
549+
mv "${MODEL_REPOSITORY}/ensemble" "${MODEL_REPOSITORY}/${MODEL_NAME}"
550+
551+
replace_config_tags "model_version: -1" "model_version: 1" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
552+
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
553+
replace_config_tags 'name: "ensemble"' "name: \"$MODEL_NAME\"" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
554+
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
555+
556+
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
557+
replace_config_tags '${preprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
558+
replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
559+
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
560+
replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
561+
replace_config_tags '${max_queue_size}' "0" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
562+
563+
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
564+
replace_config_tags '${postprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
565+
replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
566+
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
567+
568+
replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
569+
replace_config_tags '${decoupled_mode}' 'true' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
570+
replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
571+
replace_config_tags '${batching_strategy}' 'inflight_fused_batching' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
572+
replace_config_tags '${engine_dir}' "${ENGINES_DIR}" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
573+
replace_config_tags '${triton_backend}' "tensorrtllm" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
574+
replace_config_tags '${max_queue_size}' "0" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
575+
replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
576+
replace_config_tags '${encoder_input_features_data_type}' "TYPE_FP32" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
577+
}
578+
579+
function clone_tensorrt_llm_backend_repo {
580+
rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
581+
apt-get update && apt-get install git-lfs -y --no-install-recommends
582+
git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} ${TRITON_REPO_ORG}/tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
583+
cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
584+
}
585+
586+
function build_gpt2_base_model {
587+
# Download weights from HuggingFace Transformers
588+
cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
589+
rm pytorch_model.bin model.safetensors
590+
if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
591+
echo "Downloading pytorch_model.bin failed."
592+
exit 1
593+
fi
594+
cd ${GPT_DIR}
595+
596+
# Convert weights from HF Tranformers to FT format
597+
python3 convert_checkpoint.py --model_dir gpt2 --dtype float16 --tp_size ${NUM_GPUS} --output_dir "./c-model/gpt2/${NUM_GPUS}-gpu/"
598+
cd ${BASE_DIR}
599+
}
600+
601+
function build_gpt2_tensorrt_engine {
602+
# Build TensorRT engines
603+
cd ${GPT_DIR}
604+
trtllm-build --checkpoint_dir "./c-model/gpt2/${NUM_GPUS}-gpu/" \
605+
--gpt_attention_plugin float16 \
606+
--remove_input_padding enable \
607+
--paged_kv_cache enable \
608+
--gemm_plugin float16 \
609+
--workers "${NUM_GPUS}" \
610+
--output_dir "${ENGINES_DIR}"
611+
612+
cd ${BASE_DIR}
613+
}

0 commit comments

Comments
 (0)