Skip to content

Commit bbb3795

Browse files
mc-nvyinggeh
andauthored
refactor: Enhance memory allocation security in HTTP and Sagemaker request handler (#8305) (#8314)
Co-authored-by: Yingge He <157551214+yinggeh@users.noreply.github.com>
1 parent 3772013 commit bbb3795

File tree

7 files changed

+606
-374
lines changed

7 files changed

+606
-374
lines changed
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/python
2+
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
import socket
29+
import unittest
30+
31+
32+
class HTTPRequestManyChunksTest(unittest.TestCase):
33+
def setUp(self):
34+
self._model_name = "simple"
35+
self._local_host = "localhost"
36+
self._http_port = 8000
37+
self._malicious_chunk_count = (
38+
1000000 # large enough to cause a stack overflow if using alloca()
39+
)
40+
self._parse_error = (
41+
"failed to parse the request JSON buffer: Invalid value. at 0"
42+
)
43+
44+
def send_chunked_request(
45+
self, header: str, chunk_count: int, expected_response: str
46+
):
47+
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
48+
header = (
49+
f"{header}"
50+
f"Host: {self._local_host}:{self._http_port}\r\n"
51+
f"Content-Type: application/octet-stream\r\n"
52+
f"Transfer-Encoding: chunked\r\n"
53+
f"Connection: close\r\n"
54+
f"\r\n"
55+
)
56+
try:
57+
s.connect((self._local_host, self._http_port))
58+
# HTTP request with chunked encoding
59+
s.sendall((header.encode()))
60+
61+
# Send chunked payload
62+
for _ in range(chunk_count):
63+
s.send(b"1\r\nA\r\n")
64+
# End chunked encoding
65+
s.sendall(b"0\r\n\r\n")
66+
67+
# Receive response
68+
response = b""
69+
while True:
70+
try:
71+
chunk = s.recv(4096)
72+
if not chunk:
73+
break
74+
response += chunk
75+
except socket.timeout:
76+
break
77+
self.assertIn(expected_response, response.decode())
78+
except Exception as e:
79+
raise (e)
80+
finally:
81+
s.close()
82+
83+
def test_infer(self):
84+
request_header = (
85+
f"POST /v2/models/{self._model_name}/infer HTTP/1.1\r\n"
86+
f"Inference-Header-Content-Length: 0\r\n"
87+
)
88+
89+
self.send_chunked_request(
90+
request_header,
91+
self._malicious_chunk_count,
92+
"Raw request must only have 1 input (found 1) to be deduced but got 2 inputs in 'simple' model configuration",
93+
)
94+
95+
def test_registry_index(self):
96+
request_header = f"POST /v2/repository/index HTTP/1.1\r\n"
97+
98+
self.send_chunked_request(
99+
request_header, self._malicious_chunk_count, self._parse_error
100+
)
101+
102+
def test_model_control(self):
103+
load_request_header = (
104+
f"POST /v2/repository/models/{self._model_name}/load HTTP/1.1\r\n"
105+
)
106+
unload_request_header = load_request_header.replace("/load", "/unload")
107+
108+
self.send_chunked_request(
109+
load_request_header, self._malicious_chunk_count, self._parse_error
110+
)
111+
self.send_chunked_request(
112+
unload_request_header, self._malicious_chunk_count, self._parse_error
113+
)
114+
115+
def test_trace(self):
116+
request_header = (
117+
f"POST /v2/models/{self._model_name}/trace/setting HTTP/1.1\r\n"
118+
)
119+
120+
self.send_chunked_request(
121+
request_header, self._malicious_chunk_count, self._parse_error
122+
)
123+
124+
def test_logging(self):
125+
request_header = f"POST /v2/logging HTTP/1.1\r\n"
126+
127+
self.send_chunked_request(
128+
request_header, self._malicious_chunk_count, self._parse_error
129+
)
130+
131+
def test_system_shm_register(self):
132+
request_header = f"POST /v2/systemsharedmemory/region/test_system_shm_register/register HTTP/1.1\r\n"
133+
134+
self.send_chunked_request(
135+
request_header, self._malicious_chunk_count, self._parse_error
136+
)
137+
138+
def test_cuda_shm_register(self):
139+
request_header = f"POST /v2/cudasharedmemory/region/test_cuda_shm_register/register HTTP/1.1\r\n"
140+
141+
self.send_chunked_request(
142+
request_header, self._malicious_chunk_count, self._parse_error
143+
)
144+
145+
def test_generate(self):
146+
request_header = f"POST /v2/models/{self._model_name}/generate HTTP/1.1\r\n"
147+
self.send_chunked_request(
148+
request_header, self._malicious_chunk_count, self._parse_error
149+
)
150+
151+
152+
if __name__ == "__main__":
153+
unittest.main()

qa/L0_http/test.sh

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ fi
4040

4141
export CUDA_VISIBLE_DEVICES=0
4242

43+
source ../common/util.sh
4344
RET=0
4445

4546
CLIENT_PLUGIN_TEST="./http_client_plugin_test.py"
@@ -129,7 +130,6 @@ set -e
129130

130131
CLIENT_LOG=`pwd`/client.log
131132
SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
132-
source ../common/util.sh
133133

134134
run_server
135135
if [ "$SERVER_PID" == "0" ]; then
@@ -855,7 +855,32 @@ elif [ `grep -c "Error: --http-max-input-size must be greater than 0." ${SERVER_
855855
RET=1
856856
fi
857857

858-
###
858+
### Test HTTP Requests Containing Many Chunks ###
859+
MODELDIR="`pwd`/models"
860+
REQUEST_MANY_CHUNKS_PY="http_request_many_chunks.py"
861+
CLIENT_LOG="./client.http_request_many_chunks.log"
862+
SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1 --model-control-mode=explicit --load-model=simple"
863+
SERVER_LOG="./inference_server_request_many_chunks.log"
864+
865+
run_server
866+
if [ "$SERVER_PID" == "0" ]; then
867+
echo -e "\n***\n*** Failed to start $SERVER\n***"
868+
cat $SERVER_LOG
869+
exit 1
870+
fi
871+
872+
set +e
873+
python $REQUEST_MANY_CHUNKS_PY -v >> ${CLIENT_LOG} 2>&1
874+
if [ $? -ne 0 ]; then
875+
echo -e "\n***\n*** HTTP Request Many Chunks Test Failed\n***"
876+
cat $SERVER_LOG
877+
cat $CLIENT_LOG
878+
RET=1
879+
fi
880+
set -e
881+
882+
kill $SERVER_PID
883+
wait $SERVER_PID
859884

860885
if [ $RET -eq 0 ]; then
861886
echo -e "\n***\n*** Test Passed\n***"
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/usr/bin/python
2+
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
import socket
29+
import unittest
30+
31+
32+
class SagemakerRequestManyChunksTest(unittest.TestCase):
33+
def setUp(self):
34+
self._local_host = "localhost"
35+
self._sagemaker_port = 8080
36+
self._malicious_chunk_count = (
37+
1000000 # large enough to cause a stack overflow if using alloca()
38+
)
39+
40+
def send_chunked_request(
41+
self, header: str, chunk_count: int, expected_response: str
42+
):
43+
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
44+
header = (
45+
f"{header}"
46+
f"Host: {self._local_host}:{self._sagemaker_port}\r\n"
47+
f"Content-Type: application/octet-stream\r\n"
48+
f"Transfer-Encoding: chunked\r\n"
49+
f"Connection: close\r\n"
50+
f"\r\n"
51+
)
52+
try:
53+
s.connect((self._local_host, self._sagemaker_port))
54+
# HTTP request with chunked encoding
55+
s.sendall((header.encode()))
56+
57+
# Send chunked payload
58+
for _ in range(chunk_count):
59+
s.send(b"1\r\nA\r\n")
60+
# End chunked encoding
61+
s.sendall(b"0\r\n\r\n")
62+
63+
# Receive response
64+
response = b""
65+
while True:
66+
try:
67+
chunk = s.recv(4096)
68+
if not chunk:
69+
break
70+
response += chunk
71+
except socket.timeout:
72+
break
73+
self.assertIn(expected_response, response.decode())
74+
except Exception as e:
75+
raise (e)
76+
finally:
77+
s.close()
78+
79+
def test_load_model(self):
80+
request_header = (
81+
f"POST /models HTTP/1.1\r\n" f"X-Amzn-SageMaker-Target-Model: ZZZZZZZ\r\n"
82+
)
83+
self.send_chunked_request(
84+
request_header,
85+
self._malicious_chunk_count,
86+
"failed to parse the request JSON buffer: Invalid value. at 0",
87+
)
88+
89+
90+
if __name__ == "__main__":
91+
unittest.main()

qa/L0_sagemaker/test.sh

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,53 @@ kill $SERVER_PID
565565
wait $SERVE_PID
566566
# MME end
567567

568+
### Test Sagemaker Requests Containing Many Chunks ###
569+
rm -rf models && mkdir models && \
570+
cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models/sm_model && \
571+
rm -r models/sm_model/2 && rm -r models/sm_model/3 && \
572+
sed -i "s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt
573+
574+
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=sm_model
575+
REQUEST_MANY_CHUNKS_PY="sagemaker_request_many_chunks.py"
576+
CLIENT_LOG="./client.sagemaker_request_many_chunks.log"
577+
SERVER_LOG="./server.sagemaker_request_many_chunks.log"
578+
579+
serve > $SERVER_LOG 2>&1 &
580+
SERVE_PID=$!
581+
# Obtain Triton PID in such way as $! will return the script PID
582+
sleep 1
583+
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
584+
sagemaker_wait_for_server_ready $SERVER_PID 10
585+
if [ "$WAIT_RET" != "0" ]; then
586+
echo -e "\n***\n*** Failed to start $SERVER\n***"
587+
kill $SERVER_PID || true
588+
cat $SERVER_LOG
589+
exit 1
590+
fi
591+
592+
# Ping
593+
set +e
594+
code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
595+
set -e
596+
if [ "$code" != "200" ]; then
597+
cat ./ping.out
598+
echo -e "\n***\n*** Test Failed\n***"
599+
RET=1
600+
fi
601+
602+
set +e
603+
python $REQUEST_MANY_CHUNKS_PY >>$CLIENT_LOG 2>&1
604+
if [ $? -ne 0 ]; then
605+
echo -e "\n***\n*** Sagemaker Request Many Chunks Test Failed\n***"
606+
cat $SERVER_LOG
607+
cat $CLIENT_LOG
608+
RET=1
609+
fi
610+
set -e
611+
612+
kill $SERVER_PID
613+
wait $SERVE_PID
614+
568615
unlink /opt/ml/model
569616
rm -rf /opt/ml/model
570617

0 commit comments

Comments
 (0)