From e7b4800e4e272a29610a8e130789b2052622e898 Mon Sep 17 00:00:00 2001 From: kevin Date: Sat, 11 Jan 2025 15:35:09 +0000 Subject: [PATCH 1/2] [LLM] update llm server dockerfiles --- .../Dockerfile_serving_cuda118_cudnn8 | 33 +++++++++---------- .../Dockerfile_serving_cuda123_cudnn9 | 31 ++++++++--------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 b/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 index c701765e9829..f6f762e63726 100644 --- a/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 +++ b/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 @@ -1,31 +1,28 @@ FROM registry.baidubce.com/paddlepaddle/fastdeploy:llm-base-gcc12.3-cuda11.8-cudnn8-nccl2.15.5 WORKDIR /opt/output/ -COPY ./server/ /opt/output/Serving/ - ENV LD_LIBRARY_PATH="/usr/local/cuda-11.8/compat/:$LD_LIBRARY_PATH" -RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple -RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ +RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/ \ && python3 -m pip install paddlenlp==3.0.0b0 \ - && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 + && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \ + && python3 -m pip install --no-cache-dir --force-reinstall https://paddlepaddle-inference-banchmark.bj.bcebos.com/paddlenlp_ops-0.0.0-py3-none-any.whl \ + && apt-get clean && rm -rf /var/lib/apt/lists/* -RUN git clone https://gitee.com/paddlepaddle/PaddleNLP.git && cd PaddleNLP/csrc \ - && python3 setup_cuda.py build && python3 setup_cuda.py install --user \ - && cp -r /opt/output/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \ - && cp -r /root/.local/lib/python3.10/site-packages/* /usr/local/lib/python3.10/dist-packages/ \ - && rm -rf /opt/output/PaddleNLP +RUN mkdir -p /opt/source/ && cd /opt/source/ \ + && git clone https://github.com/PaddlePaddle/Paddle.git \ + && git clone https://github.com/PaddlePaddle/PaddleNLP.git \ + && cp -r /opt/source/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \ + && python3 -m pip install --no-cache-dir -r PaddleNLP/requirements.txt \ + && python3 -m pip install --no-cache-dir -r PaddleNLP/llm/server/server/requirements.txt -RUN python3 -m pip install -r /opt/output/Serving/requirements.txt && rm /opt/output/Serving/requirements.txt -RUN mv Serving/server /usr/local/lib/python3.10/dist-packages/ RUN mkdir -p /opt/output/Serving/llm_model/model/1 \ - && mv /opt/output/Serving/config/config.pbtxt /opt/output/Serving/llm_model/model/ \ - && rm -rf /opt/output/Serving/config/ -RUN echo "from server.triton_server import TritonPythonModel" >>/opt/output/Serving/llm_model/model/1/model.py + && cp /opt/source/PaddleNLP/llm/server/server/config/config.pbtxt /opt/output/Serving/llm_model/model/ \ + && cp /opt/source/PaddleNLP/llm/server/server/scripts/start_server.sh /opt/output/Serving/ \ + && cp /opt/source/PaddleNLP/llm/server/server/scripts/stop_server.sh /opt/output/Serving/ -RUN cd /opt/output/Serving/ \ - && cp scripts/start_server.sh . && cp scripts/stop_server.sh . \ - && rm -rf scripts +ENV PYTHONPATH="/opt/source/PaddleNLP/llm/server/server" +RUN echo "from server.triton_server import TritonPythonModel" >>/opt/output/Serving/llm_model/model/1/model.py ENV http_proxy="" ENV https_proxy="" diff --git a/llm/server/dockerfiles/Dockerfile_serving_cuda123_cudnn9 b/llm/server/dockerfiles/Dockerfile_serving_cuda123_cudnn9 index 4b0d1f002d98..ffe2517d3f0c 100644 --- a/llm/server/dockerfiles/Dockerfile_serving_cuda123_cudnn9 +++ b/llm/server/dockerfiles/Dockerfile_serving_cuda123_cudnn9 @@ -1,31 +1,28 @@ FROM registry.baidubce.com/paddlepaddle/fastdeploy:llm-base-gcc12.3-cuda12.3-cudnn9-nccl2.15.5 WORKDIR /opt/output/ -COPY ./server/ /opt/output/Serving/ - ENV LD_LIBRARY_PATH="/usr/local/cuda-12.3/compat/:$LD_LIBRARY_PATH" -RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/ \ && python3 -m pip install paddlenlp==3.0.0b0 \ - && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 + && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \ + && python3 -m pip install --no-cache-dir --force-reinstall https://paddlepaddle-inference-banchmark.bj.bcebos.com/paddlenlp_ops-0.0.0-py3-none-any.whl \ + && apt-get clean && rm -rf /var/lib/apt/lists/* -RUN git clone https://gitee.com/paddlepaddle/PaddleNLP.git && cd PaddleNLP/csrc \ - && python3 setup_cuda.py build && python3 setup_cuda.py install --user \ - && cp -r /opt/output/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \ - && cp -r /root/.local/lib/python3.10/site-packages/* /usr/local/lib/python3.10/dist-packages/ \ - && rm -rf /opt/output/PaddleNLP +RUN mkdir -p /opt/source/ && cd /opt/source/ \ + && git clone https://github.com/PaddlePaddle/Paddle.git \ + && git clone https://github.com/PaddlePaddle/PaddleNLP.git \ + && cp -r /opt/source/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \ + && python3 -m pip install --no-cache-dir -r PaddleNLP/requirements.txt \ + && python3 -m pip install --no-cache-dir -r PaddleNLP/llm/server/server/requirements.txt -RUN python3 -m pip install -r /opt/output/Serving/requirements.txt && rm /opt/output/Serving/requirements.txt -RUN mv Serving/server /usr/local/lib/python3.10/dist-packages/ RUN mkdir -p /opt/output/Serving/llm_model/model/1 \ - && mv /opt/output/Serving/config/config.pbtxt /opt/output/Serving/llm_model/model/ \ - && rm -rf /opt/output/Serving/config/ -RUN echo "from server.triton_server import TritonPythonModel" >>/opt/output/Serving/llm_model/model/1/model.py + && cp /opt/source/PaddleNLP/llm/server/server/config/config.pbtxt /opt/output/Serving/llm_model/model/ \ + && cp /opt/source/PaddleNLP/llm/server/server/scripts/start_server.sh /opt/output/Serving/ \ + && cp /opt/source/PaddleNLP/llm/server/server/scripts/stop_server.sh /opt/output/Serving/ -RUN cd /opt/output/Serving/ \ - && cp scripts/start_server.sh . && cp scripts/stop_server.sh . \ - && rm -rf scripts +ENV PYTHONPATH="/opt/source/PaddleNLP/llm/server/server" +RUN echo "from server.triton_server import TritonPythonModel" >>/opt/output/Serving/llm_model/model/1/model.py ENV http_proxy="" ENV https_proxy="" From 1d5ca47c6b6253b852c135031823a5fcc5cc0de5 Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 25 Feb 2025 03:31:52 +0000 Subject: [PATCH 2/2] update llm dockerfile --- .../dockerfiles/Dockerfile_serving_cuda118_cudnn8 | 15 ++++++++------- ...3_cudnn9 => Dockerfile_serving_cuda124_cudnn9} | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) rename llm/server/dockerfiles/{Dockerfile_serving_cuda123_cudnn9 => Dockerfile_serving_cuda124_cudnn9} (64%) diff --git a/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 b/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 index f6f762e63726..d7921b593cc0 100644 --- a/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 +++ b/llm/server/dockerfiles/Dockerfile_serving_cuda118_cudnn8 @@ -1,18 +1,19 @@ FROM registry.baidubce.com/paddlepaddle/fastdeploy:llm-base-gcc12.3-cuda11.8-cudnn8-nccl2.15.5 WORKDIR /opt/output/ -ENV LD_LIBRARY_PATH="/usr/local/cuda-11.8/compat/:$LD_LIBRARY_PATH" +ENV LD_LIBRARY_PATH="/usr/local/cuda-11.8/lib64:/usr/lib64:/usr/local/cuda-11.8/targets/x86_64-linux/lib/:/opt/nccl-2.15.5-1/build/lib/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" -RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/ \ - && python3 -m pip install paddlenlp==3.0.0b0 \ - && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \ +# 安装 paddlepaddle & paddlenlp & paddlenlp_ops +RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \ + && python3 -m pip install --no-cache-dir --force-reinstall https://paddle-qa.bj.bcebos.com/paddlenlp/wheel/2f85a64edd4aa9911c94ccb5ce53e83ac41ce22b/paddlenlp-3.0.0b3.post20250123-py3-none-any.whl \ && python3 -m pip install --no-cache-dir --force-reinstall https://paddlepaddle-inference-banchmark.bj.bcebos.com/paddlenlp_ops-0.0.0-py3-none-any.whl \ + && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \ && apt-get clean && rm -rf /var/lib/apt/lists/* +# clone paddle & paddlenlp 源码(代码版本应与上述安装版本对齐) RUN mkdir -p /opt/source/ && cd /opt/source/ \ && git clone https://github.com/PaddlePaddle/Paddle.git \ - && git clone https://github.com/PaddlePaddle/PaddleNLP.git \ - && cp -r /opt/source/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \ + && git clone -b release/3.0-beta4 https://github.com/PaddlePaddle/PaddleNLP.git \ && python3 -m pip install --no-cache-dir -r PaddleNLP/requirements.txt \ && python3 -m pip install --no-cache-dir -r PaddleNLP/llm/server/server/requirements.txt @@ -21,7 +22,7 @@ RUN mkdir -p /opt/output/Serving/llm_model/model/1 \ && cp /opt/source/PaddleNLP/llm/server/server/scripts/start_server.sh /opt/output/Serving/ \ && cp /opt/source/PaddleNLP/llm/server/server/scripts/stop_server.sh /opt/output/Serving/ -ENV PYTHONPATH="/opt/source/PaddleNLP/llm/server/server" +ENV PYTHONPATH="/opt/source/PaddleNLP/llm/server/server:/opt/source/PaddleNLP" RUN echo "from server.triton_server import TritonPythonModel" >>/opt/output/Serving/llm_model/model/1/model.py ENV http_proxy="" diff --git a/llm/server/dockerfiles/Dockerfile_serving_cuda123_cudnn9 b/llm/server/dockerfiles/Dockerfile_serving_cuda124_cudnn9 similarity index 64% rename from llm/server/dockerfiles/Dockerfile_serving_cuda123_cudnn9 rename to llm/server/dockerfiles/Dockerfile_serving_cuda124_cudnn9 index ffe2517d3f0c..a9a17c8e89ae 100644 --- a/llm/server/dockerfiles/Dockerfile_serving_cuda123_cudnn9 +++ b/llm/server/dockerfiles/Dockerfile_serving_cuda124_cudnn9 @@ -1,18 +1,19 @@ -FROM registry.baidubce.com/paddlepaddle/fastdeploy:llm-base-gcc12.3-cuda12.3-cudnn9-nccl2.15.5 +FROM registry.baidubce.com/paddlepaddle/fastdeploy:llm-base-gcc12.3-cuda12.4-cudnn9-nccl2.15.5 WORKDIR /opt/output/ -ENV LD_LIBRARY_PATH="/usr/local/cuda-12.3/compat/:$LD_LIBRARY_PATH" +ENV LD_LIBRARY_PATH="/usr/local/cuda-12.4/lib64:/usr/lib64:/usr/local/cuda-12.4/targets/x86_64-linux/lib/:/opt/nccl-2.15.5-1/build/lib/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" +# 安装 paddlepaddle & paddlenlp & paddlenlp_ops RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/ \ - && python3 -m pip install paddlenlp==3.0.0b0 \ - && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \ + && python3 -m pip install --no-cache-dir --force-reinstall https://paddle-qa.bj.bcebos.com/paddlenlp/wheel/2f85a64edd4aa9911c94ccb5ce53e83ac41ce22b/paddlenlp-3.0.0b3.post20250123-py3-none-any.whl \ && python3 -m pip install --no-cache-dir --force-reinstall https://paddlepaddle-inference-banchmark.bj.bcebos.com/paddlenlp_ops-0.0.0-py3-none-any.whl \ + && python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \ && apt-get clean && rm -rf /var/lib/apt/lists/* +# clone paddle & paddlenlp 源码(代码版本应与上述安装版本对齐) RUN mkdir -p /opt/source/ && cd /opt/source/ \ && git clone https://github.com/PaddlePaddle/Paddle.git \ - && git clone https://github.com/PaddlePaddle/PaddleNLP.git \ - && cp -r /opt/source/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \ + && git clone -b release/3.0-beta4 https://github.com/PaddlePaddle/PaddleNLP.git \ && python3 -m pip install --no-cache-dir -r PaddleNLP/requirements.txt \ && python3 -m pip install --no-cache-dir -r PaddleNLP/llm/server/server/requirements.txt @@ -21,7 +22,7 @@ RUN mkdir -p /opt/output/Serving/llm_model/model/1 \ && cp /opt/source/PaddleNLP/llm/server/server/scripts/start_server.sh /opt/output/Serving/ \ && cp /opt/source/PaddleNLP/llm/server/server/scripts/stop_server.sh /opt/output/Serving/ -ENV PYTHONPATH="/opt/source/PaddleNLP/llm/server/server" +ENV PYTHONPATH="/opt/source/PaddleNLP/llm/server/server:/opt/source/PaddleNLP" RUN echo "from server.triton_server import TritonPythonModel" >>/opt/output/Serving/llm_model/model/1/model.py ENV http_proxy=""