@@ -48,47 +48,6 @@ CLIENT_PY=${BASE_DIR}/orca_http_test.py
48
48
CLIENT_LOG=" ${NAME} _orca_http_test.log"
49
49
source ../common/util.sh
50
50
51
- function replace_config_tags {
52
- tag_to_replace=" ${1} "
53
- new_value=" ${2} "
54
- config_file_path=" ${3} "
55
- sed -i " s|${tag_to_replace} |${new_value} |g" ${config_file_path}
56
- }
57
-
58
- function prepare_model_repository {
59
- rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
60
- cp -r ${TENSORRTLLM_BACKEND_DIR} /all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
61
- rm -rf ${MODEL_REPOSITORY} /tensorrt_llm_bls
62
- mv " ${MODEL_REPOSITORY} /ensemble" " ${MODEL_REPOSITORY} /${MODEL_NAME} "
63
-
64
- replace_config_tags " model_version: -1" " model_version: 1" " ${MODEL_REPOSITORY} /${MODEL_NAME} /config.pbtxt"
65
- replace_config_tags ' ${triton_max_batch_size}' " 128" " ${MODEL_REPOSITORY} /${MODEL_NAME} /config.pbtxt"
66
- replace_config_tags ' name: "ensemble"' " name: \" $MODEL_NAME \" " " ${MODEL_REPOSITORY} /${MODEL_NAME} /config.pbtxt"
67
- replace_config_tags ' ${logits_datatype}' " TYPE_FP32" " ${MODEL_REPOSITORY} /${MODEL_NAME} /config.pbtxt"
68
-
69
- replace_config_tags ' ${triton_max_batch_size}' " 128" " ${MODEL_REPOSITORY} /preprocessing/config.pbtxt"
70
- replace_config_tags ' ${preprocessing_instance_count}' ' 1' " ${MODEL_REPOSITORY} /preprocessing/config.pbtxt"
71
- replace_config_tags ' ${tokenizer_dir}' " ${TOKENIZER_DIR} /" " ${MODEL_REPOSITORY} /preprocessing/config.pbtxt"
72
- replace_config_tags ' ${logits_datatype}' " TYPE_FP32" " ${MODEL_REPOSITORY} /preprocessing/config.pbtxt"
73
- replace_config_tags ' ${max_queue_delay_microseconds}' " 1000000" " ${MODEL_REPOSITORY} /preprocessing/config.pbtxt"
74
- replace_config_tags ' ${max_queue_size}' " 0" " ${MODEL_REPOSITORY} /preprocessing/config.pbtxt"
75
-
76
- replace_config_tags ' ${triton_max_batch_size}' " 128" " ${MODEL_REPOSITORY} /postprocessing/config.pbtxt"
77
- replace_config_tags ' ${postprocessing_instance_count}' ' 1' " ${MODEL_REPOSITORY} /postprocessing/config.pbtxt"
78
- replace_config_tags ' ${tokenizer_dir}' " ${TOKENIZER_DIR} /" " ${MODEL_REPOSITORY} /postprocessing/config.pbtxt"
79
- replace_config_tags ' ${logits_datatype}' " TYPE_FP32" " ${MODEL_REPOSITORY} /postprocessing/config.pbtxt"
80
-
81
- replace_config_tags ' ${triton_max_batch_size}' " 128" " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
82
- replace_config_tags ' ${decoupled_mode}' ' true' " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
83
- replace_config_tags ' ${max_queue_delay_microseconds}' " 1000000" " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
84
- replace_config_tags ' ${batching_strategy}' ' inflight_fused_batching' " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
85
- replace_config_tags ' ${engine_dir}' " ${ENGINES_DIR} " " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
86
- replace_config_tags ' ${triton_backend}' " tensorrtllm" " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
87
- replace_config_tags ' ${max_queue_size}' " 0" " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
88
- replace_config_tags ' ${logits_datatype}' " TYPE_FP32" " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
89
- replace_config_tags ' ${encoder_input_features_data_type}' " TYPE_FP32" " ${MODEL_REPOSITORY} /tensorrt_llm/config.pbtxt"
90
- }
91
-
92
51
# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
93
52
# success, 1 on failure
94
53
function wait_for_server_ready() {
@@ -145,42 +104,6 @@ function kill_server {
145
104
done
146
105
}
147
106
148
- function clone_tensorrt_llm_backend_repo {
149
- rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
150
- apt-get update && apt-get install git-lfs -y --no-install-recommends
151
- git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} ${TRITON_REPO_ORG} /tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
152
- cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
153
- }
154
-
155
- function build_gpt2_base_model {
156
- # Download weights from HuggingFace Transformers
157
- cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
158
- rm pytorch_model.bin model.safetensors
159
- if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
160
- echo " Downloading pytorch_model.bin failed."
161
- exit 1
162
- fi
163
- cd ${GPT_DIR}
164
-
165
- # Convert weights from HF Tranformers to FT format
166
- python3 convert_checkpoint.py --model_dir gpt2 --dtype float16 --tp_size ${NUM_GPUS} --output_dir " ./c-model/gpt2/${NUM_GPUS} -gpu/"
167
- cd ${BASE_DIR}
168
- }
169
-
170
- function build_gpt2_tensorrt_engine {
171
- # Build TensorRT engines
172
- cd ${GPT_DIR}
173
- trtllm-build --checkpoint_dir " ./c-model/gpt2/${NUM_GPUS} -gpu/" \
174
- --gpt_attention_plugin float16 \
175
- --remove_input_padding enable \
176
- --paged_kv_cache enable \
177
- --gemm_plugin float16 \
178
- --workers " ${NUM_GPUS} " \
179
- --output_dir " ${ENGINES_DIR} "
180
-
181
- cd ${BASE_DIR}
182
- }
183
-
184
107
clone_tensorrt_llm_backend_repo
185
108
build_gpt2_base_model
186
109
build_gpt2_tensorrt_engine
0 commit comments