triton-inference-server
diff --git a/‎qa/L0_http/http_request_many_chunks.py
Lines changed: 153 additions & 0 deletions b/‎qa/L0_http/http_request_many_chunks.py
Lines changed: 153 additions & 0 deletions
diff --git a/‎qa/L0_http/test.sh
Lines changed: 27 additions & 2 deletions b/‎qa/L0_http/test.sh
Lines changed: 27 additions & 2 deletions
diff --git a/‎qa/L0_sagemaker/sagemaker_request_many_chunks.py
Lines changed: 91 additions & 0 deletions b/‎qa/L0_sagemaker/sagemaker_request_many_chunks.py
Lines changed: 91 additions & 0 deletions
diff --git a/‎qa/L0_sagemaker/test.sh
Lines changed: 47 additions & 0 deletions b/‎qa/L0_sagemaker/test.sh
Lines changed: 47 additions & 0 deletions
@@ -0,0 +1,153 @@
+#!/usr/bin/python
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import socket
+import unittest
+
+
+class HTTPRequestManyChunksTest(unittest.TestCase):
+    def setUp(self):
+        self._model_name = "simple"
+        self._local_host = "localhost"
+        self._http_port = 8000
+        self._malicious_chunk_count = (
+            1000000  # large enough to cause a stack overflow if using alloca()
+        )
+        self._parse_error = (
+            "failed to parse the request JSON buffer: Invalid value. at 0"
+        )
+
+    def send_chunked_request(
+        self, header: str, chunk_count: int, expected_response: str
+    ):
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        header = (
+            f"{header}"
+            f"Host: {self._local_host}:{self._http_port}\r\n"
+            f"Content-Type: application/octet-stream\r\n"
+            f"Transfer-Encoding: chunked\r\n"
+            f"Connection: close\r\n"
+            f"\r\n"
+        )
+        try:
+            s.connect((self._local_host, self._http_port))
+            # HTTP request with chunked encoding
+            s.sendall((header.encode()))
+
+            # Send chunked payload
+            for _ in range(chunk_count):
+                s.send(b"1\r\nA\r\n")
+            # End chunked encoding
+            s.sendall(b"0\r\n\r\n")
+
+            # Receive response
+            response = b""
+            while True:
+                try:
+                    chunk = s.recv(4096)
+                    if not chunk:
+                        break
+                    response += chunk
+                except socket.timeout:
+                    break
+            self.assertIn(expected_response, response.decode())
+        except Exception as e:
+            raise (e)
+        finally:
+            s.close()
+
+    def test_infer(self):
+        request_header = (
+            f"POST /v2/models/{self._model_name}/infer HTTP/1.1\r\n"
+            f"Inference-Header-Content-Length: 0\r\n"
+        )
+
+        self.send_chunked_request(
+            request_header,
+            self._malicious_chunk_count,
+            "Raw request must only have 1 input (found 1) to be deduced but got 2 inputs in 'simple' model configuration",
+        )
+
+    def test_registry_index(self):
+        request_header = f"POST /v2/repository/index HTTP/1.1\r\n"
+
+        self.send_chunked_request(
+            request_header, self._malicious_chunk_count, self._parse_error
+        )
+
+    def test_model_control(self):
+        load_request_header = (
+            f"POST /v2/repository/models/{self._model_name}/load HTTP/1.1\r\n"
+        )
+        unload_request_header = load_request_header.replace("/load", "/unload")
+
+        self.send_chunked_request(
+            load_request_header, self._malicious_chunk_count, self._parse_error
+        )
+        self.send_chunked_request(
+            unload_request_header, self._malicious_chunk_count, self._parse_error
+        )
+
+    def test_trace(self):
+        request_header = (
+            f"POST /v2/models/{self._model_name}/trace/setting HTTP/1.1\r\n"
+        )
+
+        self.send_chunked_request(
+            request_header, self._malicious_chunk_count, self._parse_error
+        )
+
+    def test_logging(self):
+        request_header = f"POST /v2/logging HTTP/1.1\r\n"
+
+        self.send_chunked_request(
+            request_header, self._malicious_chunk_count, self._parse_error
+        )
+
+    def test_system_shm_register(self):
+        request_header = f"POST /v2/systemsharedmemory/region/test_system_shm_register/register HTTP/1.1\r\n"
+
+        self.send_chunked_request(
+            request_header, self._malicious_chunk_count, self._parse_error
+        )
+
+    def test_cuda_shm_register(self):
+        request_header = f"POST /v2/cudasharedmemory/region/test_cuda_shm_register/register HTTP/1.1\r\n"
+
+        self.send_chunked_request(
+            request_header, self._malicious_chunk_count, self._parse_error
+        )
+
+    def test_generate(self):
+        request_header = f"POST /v2/models/{self._model_name}/generate HTTP/1.1\r\n"
+        self.send_chunked_request(
+            request_header, self._malicious_chunk_count, self._parse_error
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -40,6 +40,7 @@ fi
 
 export CUDA_VISIBLE_DEVICES=0
 
+source ../common/util.sh
 RET=0
 
 CLIENT_PLUGIN_TEST="./http_client_plugin_test.py"
@@ -129,7 +130,6 @@ set -e
 
 CLIENT_LOG=`pwd`/client.log
 SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
-source ../common/util.sh
 
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -855,7 +855,32 @@ elif [ `grep -c "Error: --http-max-input-size must be greater than 0." ${SERVER_
     RET=1
 fi
 
-###
+### Test HTTP Requests Containing Many Chunks ###
+MODELDIR="`pwd`/models"
+REQUEST_MANY_CHUNKS_PY="http_request_many_chunks.py"
+CLIENT_LOG="./client.http_request_many_chunks.log"
+SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1 --model-control-mode=explicit --load-model=simple"
+SERVER_LOG="./inference_server_request_many_chunks.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $REQUEST_MANY_CHUNKS_PY -v >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** HTTP Request Many Chunks Test Failed\n***"
+    cat $SERVER_LOG
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
 
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
 
@@ -0,0 +1,91 @@
+#!/usr/bin/python
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import socket
+import unittest
+
+
+class SagemakerRequestManyChunksTest(unittest.TestCase):
+    def setUp(self):
+        self._local_host = "localhost"
+        self._sagemaker_port = 8080
+        self._malicious_chunk_count = (
+            1000000  # large enough to cause a stack overflow if using alloca()
+        )
+
+    def send_chunked_request(
+        self, header: str, chunk_count: int, expected_response: str
+    ):
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        header = (
+            f"{header}"
+            f"Host: {self._local_host}:{self._sagemaker_port}\r\n"
+            f"Content-Type: application/octet-stream\r\n"
+            f"Transfer-Encoding: chunked\r\n"
+            f"Connection: close\r\n"
+            f"\r\n"
+        )
+        try:
+            s.connect((self._local_host, self._sagemaker_port))
+            # HTTP request with chunked encoding
+            s.sendall((header.encode()))
+
+            # Send chunked payload
+            for _ in range(chunk_count):
+                s.send(b"1\r\nA\r\n")
+            # End chunked encoding
+            s.sendall(b"0\r\n\r\n")
+
+            # Receive response
+            response = b""
+            while True:
+                try:
+                    chunk = s.recv(4096)
+                    if not chunk:
+                        break
+                    response += chunk
+                except socket.timeout:
+                    break
+            self.assertIn(expected_response, response.decode())
+        except Exception as e:
+            raise (e)
+        finally:
+            s.close()
+
+    def test_load_model(self):
+        request_header = (
+            f"POST /models HTTP/1.1\r\n" f"X-Amzn-SageMaker-Target-Model: ZZZZZZZ\r\n"
+        )
+        self.send_chunked_request(
+            request_header,
+            self._malicious_chunk_count,
+            "failed to parse the request JSON buffer: Invalid value. at 0",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -565,6 +565,53 @@ kill $SERVER_PID
 wait $SERVE_PID
 # MME end
 
+### Test Sagemaker Requests Containing Many Chunks ###
+rm -rf models && mkdir models && \
+    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models/sm_model && \
+    rm -r models/sm_model/2 && rm -r models/sm_model/3 && \
+    sed -i "s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt
+
+export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=sm_model
+REQUEST_MANY_CHUNKS_PY="sagemaker_request_many_chunks.py"
+CLIENT_LOG="./client.sagemaker_request_many_chunks.log"
+SERVER_LOG="./server.sagemaker_request_many_chunks.log"
+
+serve > $SERVER_LOG 2>&1 &
+SERVE_PID=$!
+# Obtain Triton PID in such way as $! will return the script PID
+sleep 1
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+sagemaker_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Ping
+set +e
+code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./ping.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set +e
+python $REQUEST_MANY_CHUNKS_PY >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Sagemaker Request Many Chunks Test Failed\n***"
+    cat $SERVER_LOG
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVE_PID
+
 unlink /opt/ml/model
 rm -rf /opt/ml/model