[main] links (#3227)

dkalinowski · web-flow · commit e18d204e6c12 · 2025-04-15T11:51:05.000+02:00
diff --git a/demos/c_api_minimal_app/Makefile b/demos/c_api_minimal_app/Makefile
@@ -25,13 +25,13 @@ BASE_OS ?= ubuntu24
 
 ifeq ($(BASE_OS),ubuntu24)
   BASE_OS_TAG_UBUNTU ?= 24.04
-  PACKAGE_URL ?="https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu24.tar.gz"
+  PACKAGE_URL ?="https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24.tar.gz"
   BASE_IMAGE ?= ubuntu:$(BASE_OS_TAG_UBUNTU)
   DIST_OS=ubuntu
 endif
 ifeq ($(BASE_OS),redhat)
   BASE_OS_TAG_REDHAT ?= 9.5
-  PACKAGE_URL ="https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_redhat.tar.gz"
+  PACKAGE_URL ="https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat.tar.gz"
   BASE_IMAGE ?= registry.access.redhat.com/ubi9/ubi:$(BASE_OS_TAG_REDHAT)
   DIST_OS=redhat
 endif
diff --git a/demos/code_local_assistant/README.md b/demos/code_local_assistant/README.md
@@ -33,7 +33,7 @@ Since we do not want to wait for the code to appear, we need to use smaller mode
 Code completion works in non-streaming, unary mode. Do not use instruct model, there is no chat involved in the process.
 
 Export `Qwen/Qwen2.5-Coder-1.5B`:
-```baconsolesh
+```console
 python export_model.py text_generation --source_model Qwen/Qwen2.5-Coder-1.5B --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
 ```
 
@@ -54,7 +54,7 @@ Examine that workspace is set up properly `models/config_all.json`:
 }
 ```
 
-```console
+```bash
 tree models
 models
 ├── codellama
@@ -100,13 +100,14 @@ Run OpenVINO Model Server with both models loaded at the same time:
 ### Windows: deploying on bare metal
 Please refer to OpenVINO Model Server installation first: [link](../../docs/deploying_server_baremetal.md)
 
-```console
+```bat
 ovms --rest_port 8000 --config_path ./models/config_all.json
 ```
 
 ### Linux: via Docker
 ```bash
-docker run -d --rm -v $(pwd)/:/workspace/ -p 8000:8000 openvino/model_server:2025.1 --rest_port 8000 --config_path /workspace/models/config_all.json
+docker run -d --rm --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -u $(id -u):$(id -g) \
+  -p 8000:8000 -v $(pwd)/:/workspace/ openvino/model_server:2025.1 --rest_port 8000 --config_path /workspace/models/config_all.json
 ```
 
 ## Set Up Visual Studio Code
diff --git a/demos/continuous_batching/README.md b/demos/continuous_batching/README.md
@@ -33,8 +33,8 @@ LLM engine parameters will be defined inside the `graph.pbtxt` file.
 
 Download export script, install it's dependencies and create directory for the models:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 mkdir models
 ```
 
diff --git a/demos/continuous_batching/rag/README.md b/demos/continuous_batching/rag/README.md
@@ -4,8 +4,8 @@
 ## Creating models repository for all the endpoints
 
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 
 mkdir models
 python export_model.py text_generation --source_model meta-llama/Meta-Llama-3-8B-Instruct --weight-format int8 --kv_cache_precision u8 --config_file_path models/config_all.json --model_repository_path models 
diff --git a/demos/continuous_batching/speculative_decoding/README.md b/demos/continuous_batching/speculative_decoding/README.md
@@ -35,8 +35,8 @@ LLM engine parameters will be defined inside the `graph.pbtxt` file.
 
 Download export script, install its dependencies and create directory for the models:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 mkdir models 
 ```
 
diff --git a/demos/continuous_batching/vlm/README.md b/demos/continuous_batching/vlm/README.md
@@ -24,8 +24,8 @@ Execution parameters will be defined inside the `graph.pbtxt` file.
 
 Download export script, install it's dependencies and create directory for the models:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 mkdir models
 ```
 
diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md
@@ -17,8 +17,8 @@ That ensures faster initialization time, better performance and lower memory con
 
 Download export script, install it's dependencies and create directory for the models:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 mkdir models 
 ```
 
diff --git a/demos/llm_npu/README.md b/demos/llm_npu/README.md
@@ -27,8 +27,8 @@ LLM engine parameters will be defined inside the `graph.pbtxt` file.
 
 Download export script, install it's dependencies and create directory for the models:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 mkdir models
 ```
 
diff --git a/demos/rerank/README.md b/demos/rerank/README.md
@@ -15,8 +15,8 @@ That ensures faster initialization time, better performance and lower memory con
 
 Download export script, install it's dependencies and create directory for the models:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/0/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 mkdir models 
 ```
 
diff --git a/demos/vlm_npu/README.md b/demos/vlm_npu/README.md
@@ -27,8 +27,8 @@ LLM engine parameters will be defined inside the `graph.pbtxt` file.
 
 Download export script, install it's dependencies and create directory for the models:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/export_models/requirements.txt
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/requirements.txt
 mkdir models
 ```
 
diff --git a/docs/deploying_server_baremetal.md b/docs/deploying_server_baremetal.md
@@ -8,12 +8,12 @@ To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu22, Ubu
 :sync: ubuntu-22-04
 Download precompiled package (without python support):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu22.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu22.tar.gz
 tar -xzvf ovms_ubuntu22.tar.gz
 ```
 or precompiled package (with python and LLM support):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu22_python_on.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu22_python_on.tar.gz
 tar -xzvf ovms_ubuntu22_python_on.tar.gz
 ```
 Install required libraries:
@@ -36,12 +36,12 @@ pip3 install "Jinja2==3.1.6" "MarkupSafe==3.0.2"
 :sync: ubuntu-24-04
 Download precompiled package (without python support):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu24.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24.tar.gz
 tar -xzvf ovms_ubuntu24.tar.gz
 ```
 or precompiled package (with python and LLM support):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu24_python_on.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24_python_on.tar.gz
 tar -xzvf ovms_ubuntu24_python_on.tar.gz
 ```
 Install required libraries:
@@ -64,12 +64,12 @@ pip3 install "Jinja2==3.1.6" "MarkupSafe==3.0.2"
 :sync: rhel-9.5
 Download precompiled package (without python support):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_redhat.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat.tar.gz
 tar -xzvf ovms_redhat.tar.gz
 ```
 or precompiled package (with python and LLM support):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_redhat_python_on.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat_python_on.tar.gz
 tar -xzvf ovms_redhat_python_on.tar.gz
 ```
 Install required libraries:
@@ -95,7 +95,7 @@ Make sure you have [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/r
 Download and unpack model server archive for Windows:
 
 ```bat
-curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_windows.zip -o ovms.zip
+curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_windows.zip -o ovms.zip
 tar -xf ovms.zip
 ```
 
diff --git a/docs/llm/quickstart.md b/docs/llm/quickstart.md
@@ -15,7 +15,7 @@ pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/r
 
 2. Run optimum-cli to download and quantize the model:
 ```console
-curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/export_model.py -o export_model.py
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/1/demos/common/export_models/export_model.py -o export_model.py
 mkdir models
 python export_model.py text_generation --source_model deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --weight-format int4 --config_file_path models/config.json --model_repository_path models --target_device GPU --cache 2
 ```