openvinotoolkit
diff --git a/‎.bazelrc
Lines changed: 7 additions & 2 deletions b/‎.bazelrc
Lines changed: 7 additions & 2 deletions
diff --git a/‎ci/loadWin.groovy
Lines changed: 5 additions & 3 deletions b/‎ci/loadWin.groovy
Lines changed: 5 additions & 3 deletions
diff --git a/‎docs/deploying_server_baremetal.md
Lines changed: 11 additions & 11 deletions b/‎docs/deploying_server_baremetal.md
Lines changed: 11 additions & 11 deletions
diff --git a/‎setupvars.bat
Lines changed: 6 additions & 2 deletions b/‎setupvars.bat
Lines changed: 6 additions & 2 deletions
diff --git a/‎setupvars.ps1
Lines changed: 6 additions & 2 deletions b/‎setupvars.ps1
Lines changed: 6 additions & 2 deletions
diff --git a/‎src/BUILD
Lines changed: 8 additions & 12 deletions b/‎src/BUILD
Lines changed: 8 additions & 12 deletions
diff --git a/‎src/llm/BUILD
Lines changed: 33 additions & 15 deletions b/‎src/llm/BUILD
Lines changed: 33 additions & 15 deletions
diff --git a/‎src/llm/http_llm_calculator.cc
Lines changed: 1 addition & 0 deletions b/‎src/llm/http_llm_calculator.cc
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/llm/language_model/continuous_batching/servable.cpp
Lines changed: 4 additions & 1 deletion b/‎src/llm/language_model/continuous_batching/servable.cpp
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/llm/language_model/continuous_batching/servable_initializer.cpp
Lines changed: 6 additions & 6 deletions b/‎src/llm/language_model/continuous_batching/servable_initializer.cpp
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/llm/language_model/legacy/servable.cpp
Lines changed: 4 additions & 1 deletion b/‎src/llm/language_model/legacy/servable.cpp
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/llm/language_model/legacy/servable_initializer.cpp
Lines changed: 5 additions & 2 deletions b/‎src/llm/language_model/legacy/servable_initializer.cpp
Lines changed: 5 additions & 2 deletions
@@ -242,8 +242,6 @@ build:windows --override_repository="boringssl=C:\\opt\\boringSSL-SwiftPM"
 
 # Windows config default flags
 build:windows --define=CLOUD_DISABLE=1
-build:windows --define=PYTHON_DISABLE=0
-build:windows --define=MEDIAPIPE_DISABLE=0
 # Change this path alongside WORKSPACE:"windows_openvino" path for model_api cmake compilation.
 build:windows --action_env OpenVINO_DIR="c:/opt/openvino/runtime/cmake"
 build:windows --action_env OpenCV_DIR="c:/opt/opencv"
@@ -252,6 +250,13 @@ build:windows --repo_env=HERMETIC_PYTHON_VERSION=3.12
 build:windows --define=USE_DROGON=1
 build:windows --define=GENAI_USE_BINARY=1
 
+build:win_mp_on_py_on --config=windows
+build:win_mp_on_py_on --define=MEDIAPIPE_DISABLE=0
+build:win_mp_on_py_on --define=PYTHON_DISABLE=0
+build:win_mp_on_py_off --config=windows
+build:win_mp_on_py_off --define=MEDIAPIPE_DISABLE=0
+build:win_mp_on_py_off --define=PYTHON_DISABLE=1
+
 # Tests settings ############################################################################################################################
 # sometimes failed logs exceed this threshold
 test --experimental_ui_max_stdouterr_bytes=304857600
 
@@ -113,14 +113,15 @@ def clean() {
 }
 
 def build(){
-    def status = bat(returnStatus: true, script: 'windows_build.bat ' + get_short_bazel_path() + " //src:ovms_test")
+    def pythonOption = env.OVMS_PYTHON_ENABLED == "1" ? "--with_python" : "--no_python"
+    def status = bat(returnStatus: true, script: 'windows_build.bat ' + get_short_bazel_path() + ' ' + pythonOption + ' --with_tests') 
     status = bat(returnStatus: true, script: 'grep "Build completed successfully" win_build.log"')
     if (status != 0) {
         error "Error: Windows build failed ${status}. Check win_build.log for details."
     } else {
         echo "Build successful."
     }
-    def status_pkg = bat(returnStatus: true, script: 'windows_create_package.bat ' + get_short_bazel_path())
+    def status_pkg = bat(returnStatus: true, script: 'windows_create_package.bat ' + get_short_bazel_path() + ' ' + pythonOption)
     if (status_pkg != 0) {
         error "Error: Windows package failed ${status_pkg}."
     } else {
@@ -129,7 +130,8 @@ def build(){
 }
 
 def unit_test(){
-    status = bat(returnStatus: true, script: 'windows_test.bat ' + get_short_bazel_path())
+    def pythonOption = env.OVMS_PYTHON_ENABLED == "1" ? "--with_python" : "--no_python"
+    status = bat(returnStatus: true, script: 'windows_test.bat ' + get_short_bazel_path() + ' ' + pythonOption)
     if (status != 0) {
         error "Error: Windows build test failed ${status}. Check win_build_test.log for details."
     } else {
 
@@ -6,12 +6,12 @@ To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu22, Ubu
 ::::{tab-set}
 :::{tab-item} Ubuntu 22.04
 :sync: ubuntu-22-04
-Download precompiled package (without python support):
+Download precompiled package (without python):
 ```{code} sh
 wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu22.tar.gz
 tar -xzvf ovms_ubuntu22.tar.gz
 ```
-or precompiled package (with python and LLM support):
+or precompiled package (with python):
 ```{code} sh
 wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu22_python_on.tar.gz
 tar -xzvf ovms_ubuntu22_python_on.tar.gz
@@ -25,7 +25,7 @@ Set path to the libraries and add binary to the `PATH`
 export LD_LIBRARY_PATH=${PWD}/ovms/lib
 export PATH=$PATH:${PWD}/ovms/bin
 ```
-In case of the version with python and LLM support run also:
+In case of the version with python run also:
 ```{code} sh
 export PYTHONPATH=${PWD}/ovms/lib/python
 sudo apt -y install libpython3.10
@@ -34,12 +34,12 @@ pip3 install "Jinja2==3.1.6" "MarkupSafe==3.0.2"
 :::
 :::{tab-item} Ubuntu 24.04
 :sync: ubuntu-24-04
-Download precompiled package (without python support):
+Download precompiled package (without python):
 ```{code} sh
 wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24.tar.gz
 tar -xzvf ovms_ubuntu24.tar.gz
 ```
-or precompiled package (with python and LLM support):
+or precompiled package (with python):
 ```{code} sh
 wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24_python_on.tar.gz
 tar -xzvf ovms_ubuntu24_python_on.tar.gz
@@ -53,7 +53,7 @@ Set path to the libraries and add binary to the `PATH`
 export LD_LIBRARY_PATH=${PWD}/ovms/lib
 export PATH=$PATH:${PWD}/ovms/bin
 ```
-In case of the version with python and LLM support run also:
+In case of the version with python run also:
 ```{code} sh
 export PYTHONPATH=${PWD}/ovms/lib/python
 sudo apt -y install libpython3.12
@@ -62,12 +62,12 @@ pip3 install "Jinja2==3.1.6" "MarkupSafe==3.0.2"
 :::
 :::{tab-item} RHEL 9.5
 :sync: rhel-9.5
-Download precompiled package (without python support):
+Download precompiled package (without python):
 ```{code} sh
 wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat.tar.gz
 tar -xzvf ovms_redhat.tar.gz
 ```
-or precompiled package (with python and LLM support):
+or precompiled package (with python):
 ```{code} sh
 wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat_python_on.tar.gz
 tar -xzvf ovms_redhat_python_on.tar.gz
@@ -81,7 +81,7 @@ Set path to the libraries and add binary to the `PATH`
 export LD_LIBRARY_PATH=${PWD}/ovms/lib
 export PATH=$PATH:${PWD}/ovms/bin
 ```
-In case of the version with python and LLM support run also:
+In case of the version with python run also:
 ```{code} sh
 export PYTHONPATH=${PWD}/ovms/lib/python
 sudo yum install -y python39-libs
@@ -111,9 +111,9 @@ Run `setupvars` script to set required environment variables.
 .\ovms\setupvars.ps1
 ```
 
-> **Note**: Running this script changes Python settings for the shell that runs it. Environment variables are set only for the current shell so make sure you rerun the script before using model server in a new shell. 
+> **Note**: If package contains Python, running this script changes Python settings for the shell that runs it. Environment variables are set only for the current shell so make sure you rerun the script before using model server in a new shell. 
 
-> **Note**: When serving LLM models, OVMS uses Python's Jinja package to apply chat template. Please ensure you have Windows "Beta Unicode UTF-8 for worldwide language support" enabled. [Instruction](llm_utf8_troubleshoot.png)
+> **Note**: If package contains Python, OVMS uses Python's Jinja package to apply chat template when serving LLMs. In such case, please ensure you have Windows "Beta Unicode UTF-8 for worldwide language support" enabled. [Instruction](llm_utf8_troubleshoot.png)
 
 You can also build model server from source by following the [developer guide](windows_developer_guide.md).
 
 
@@ -15,6 +15,10 @@
 ::
 @echo off
 set "OVMS_DIR=%~dp0"
-set "PYTHONHOME=%OVMS_DIR%\python"
-set "PATH=%OVMS_DIR%;%PYTHONHOME%;%PATH%"
+if exist "%OVMS_DIR%\python" (
+    set "PYTHONHOME=%OVMS_DIR%\python"
+    set "PATH=%OVMS_DIR%;%PYTHONHOME%;%PATH%"
+) else (
+    set "PATH=%OVMS_DIR%;%PATH%"
+)
 echo "OpenVINO Model Server Environment Initialized"
@@ -15,6 +15,10 @@
 #
 
 $env:OVMS_DIR=$PSScriptRoot
-$env:PYTHONHOME="$env:OVMS_DIR\python"
-$env:PATH="$env:OVMS_DIR;$env:PYTHONHOME;$env:PATH"
+if (Test-Path "$env:OVMS_DIR\python") {
+    $env:PYTHONHOME="$env:OVMS_DIR\python"
+    $env:PATH="$env:OVMS_DIR;$env:PYTHONHOME;$env:PATH"
+} else {
+    $env:PATH="$env:OVMS_DIR;$env:PATH"
+}
 echo "OpenVINO Model Server Environment Initialized"
@@ -603,18 +603,15 @@ cc_library(
         deps = select({
             "//:not_disable_python": [
                 "//src/python:libovmspythonmodule",
-                # Jinja template processing is done in Python
-                "//src/llm:llmcalculator",
-                "//src/llm:genai_servables",
-                "//src/llm:text_processor",
             ],
             "//:disable_python": []
         }) + select({
             "//conditions:default": [],
             "//:not_disable_mediapipe" : [
                 "//src/llm:openai_completions_api_handler",
                 "//src/embeddings:embeddingscalculator",
-                "//src/rerank:rerankcalculator",],
+                "//src/rerank:rerankcalculator",
+                "//src/llm:llmcalculator",],
         }) + select({
             "//:enable_drogon": [
                 "libdrogon_http_server",
@@ -2804,6 +2801,11 @@ cc_test(
                 "test/mediapipe_framework_test.cpp",
                 "test/http_openai_handler_test.cpp",
                 "test/multipart_calculator_test.cpp",
+                "test/llm/llmnode_test.cpp",
+                "test/llm/max_model_length_test.cpp",
+                "test/llm/text_streamer_test.cpp",
+                "test/llm/visual_language_model/complete_flow_test.cpp",
+                "test/llm/visual_language_model/initialization_test.cpp",
             ],
             "//:disable_mediapipe" : [
                 "test/disabled_mediapipe_test.cpp",
@@ -2813,14 +2815,8 @@ cc_test(
                 # OvmsPyTensor is currently not used in OVMS core and is just a base for the binding.
                 # "test/python/ovms_py_tensor_test.cpp",
                 "test/pythonnode_test.cpp",
-                # LLM logic uses Python for processing Jinja templates
-                "test/llm/llmnode_test.cpp",
-                "test/llm/assisted_decoding_test.cpp",
-                "test/llm/max_model_length_test.cpp",
+                # LLM logic uses Python for processing Jinja templates when built with Python enabled
                 "test/llm/llmtemplate_test.cpp",
-                "test/llm/text_streamer_test.cpp",
-                "test/llm/visual_language_model/complete_flow_test.cpp",
-                "test/llm/visual_language_model/initialization_test.cpp",
             ],
             "//:disable_python" : [],
         }) + select({
 
@@ -16,9 +16,9 @@
 
 load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library")
 load("//:common_settings.bzl",
-     "COMMON_STATIC_LIBS_COPTS", "COMMON_STATIC_LIBS_LINKOPTS", "COMMON_FUZZER_COPTS", "COMMON_FUZZER_LINKOPTS", "COMMON_LOCAL_DEFINES", "PYBIND_DEPS")
+     "COMMON_STATIC_LIBS_COPTS", "COMMON_STATIC_LIBS_LINKOPTS", "COMMON_FUZZER_COPTS", "COMMON_FUZZER_LINKOPTS", "COMMON_LOCAL_DEFINES", "PYBIND_DEPS", "COPTS_PYTHON")
 
-COPTS_ADJUSTED = COMMON_STATIC_LIBS_COPTS + select({
+COPTS_ADJUSTED = COMMON_STATIC_LIBS_COPTS + COPTS_PYTHON + select({
         "//conditions:default": [],
         "//:fuzzer_build" : COMMON_FUZZER_COPTS,
 })
@@ -93,13 +93,30 @@ cc_library(
 
 cc_library(
     name = "genai_servables",
-    hdrs = ["servable.hpp", "servable_initializer.hpp", 
-            "language_model/continuous_batching/servable.hpp", "language_model/continuous_batching/llm_executor.hpp", "language_model/continuous_batching/servable_initializer.hpp",
-            "visual_language_model/continuous_batching/servable.hpp", "language_model/legacy/servable.hpp", "language_model/legacy/servable_initializer.hpp", "language_model/legacy/legacy_executor.hpp",
-            "visual_language_model/legacy/servable.hpp", "visual_language_model/legacy/servable_initializer.hpp", "visual_language_model/legacy/legacy_executor.hpp"],
-    srcs = ["servable.cpp", "servable_initializer.cpp", "language_model/continuous_batching/servable.cpp", "language_model/continuous_batching/servable_initializer.cpp",
-            "visual_language_model/continuous_batching/servable.cpp", "language_model/legacy/servable.cpp", "language_model/legacy/servable_initializer.cpp", "language_model/legacy/legacy_executor.cpp",
-            "visual_language_model/legacy/servable.cpp", "visual_language_model/legacy/servable_initializer.cpp", "visual_language_model/legacy/legacy_executor.cpp"],
+    hdrs = ["servable.hpp", 
+            "servable_initializer.hpp", 
+            "language_model/continuous_batching/servable.hpp",
+            "language_model/continuous_batching/llm_executor.hpp",
+            "language_model/continuous_batching/servable_initializer.hpp",
+            "visual_language_model/continuous_batching/servable.hpp",
+            "language_model/legacy/servable.hpp",
+            "language_model/legacy/servable_initializer.hpp",
+            "language_model/legacy/legacy_executor.hpp",
+            "visual_language_model/legacy/servable.hpp",
+            "visual_language_model/legacy/servable_initializer.hpp",
+            "visual_language_model/legacy/legacy_executor.hpp",
+            "text_utils.hpp"],
+    srcs = ["servable.cpp",
+            "servable_initializer.cpp",
+            "language_model/continuous_batching/servable.cpp",
+            "language_model/continuous_batching/servable_initializer.cpp",
+            "visual_language_model/continuous_batching/servable.cpp",
+            "language_model/legacy/servable.cpp",
+            "language_model/legacy/servable_initializer.cpp",
+            "language_model/legacy/legacy_executor.cpp",
+            "visual_language_model/legacy/servable.cpp",
+            "visual_language_model/legacy/servable_initializer.cpp",
+            "visual_language_model/legacy/legacy_executor.cpp"],
     deps = [
         "//third_party:openvino",
         "@mediapipe//mediapipe/framework:calculator_framework",
@@ -111,14 +128,15 @@ cc_library(
         "//src:libovmsprofiler",
         "//src:libovmsfilesystem",
         "llmcalculator_cc_proto",
-        "//src/python:utils",
-        ":text_processor",
         ":openai_completions_api_handler",
         "//src:httppayload",
         "//src:libhttpclientconnection",
-    ] + PYBIND_DEPS + select({
+    ] + select({
         "//conditions:default": ["//third_party:genai", ":llm_engine"],
         "//:not_genai_bin" : [":llm_engine"],
+    }) + select({
+        "//:disable_python": [],
+        "//:not_disable_python" : [":py_jinja_template_processor"],
     }),
     visibility = ["//visibility:public"],
     local_defines = COMMON_LOCAL_DEFINES,
@@ -128,9 +146,9 @@ cc_library(
 )
 
 cc_library(
-    name = "text_processor",
-    hdrs = ["text_processor.hpp"],
-    srcs = ["text_processor.cpp"],
+    name = "py_jinja_template_processor",
+    hdrs = ["py_jinja_template_processor.hpp"],
+    srcs = ["py_jinja_template_processor.cpp"],
     deps = ["@mediapipe//mediapipe/framework:calculator_framework",
             "//third_party:openvino",
             "//src:libovmslogging",
 
@@ -26,6 +26,7 @@
 #pragma warning(pop)
 
 #include "../http_payload.hpp"
+#include "../logging.hpp"
 #include "../profiler.hpp"
 #include "apis/openai_completions.hpp"
 #include "servable.hpp"
 
@@ -33,7 +33,10 @@
 #include "../../../http_payload.hpp"
 #include "../../../mediapipe_internal/mediapipe_utils.hpp"
 #include "../../apis/openai_completions.hpp"
-#include "../../text_processor.hpp"
+#include "../../text_utils.hpp"
+#if (PYTHON_DISABLE == 0)
+#include "../../py_jinja_template_processor.hpp"
+#endif
 #include "llm_executor.hpp"
 #include "servable.hpp"
 
 
@@ -57,15 +57,12 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
         return status;
     }
     auto properties = std::static_pointer_cast<ContinuousBatchingServableProperties>(servable->getProperties());
-
     properties->modelsPath = parsedModelsPath;
-
     properties->schedulerConfig.max_num_batched_tokens = nodeOptions.max_num_batched_tokens();
     properties->schedulerConfig.cache_size = nodeOptions.cache_size();
     properties->schedulerConfig.dynamic_split_fuse = nodeOptions.dynamic_split_fuse();
     properties->schedulerConfig.max_num_seqs = nodeOptions.max_num_seqs();
     properties->schedulerConfig.enable_prefix_caching = nodeOptions.enable_prefix_caching();
-
     properties->device = nodeOptions.device();
 
     if (!nodeOptions.draft_models_path().empty()) {
@@ -91,7 +88,6 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
         }
 
     } else if (nodeOptions.has_draft_max_num_batched_tokens() || nodeOptions.has_draft_cache_size() || nodeOptions.has_draft_dynamic_split_fuse() || nodeOptions.has_draft_max_num_seqs() || nodeOptions.has_draft_block_size() || nodeOptions.has_draft_device()) {
-        // Consider moving draft parameters to separate structure in node options, so it's validated on the proto level
         SPDLOG_ERROR("Draft model path is not provided, but draft scheduler options are set.");
         return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
     }
@@ -116,14 +112,18 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
         return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
     }
 
-    loadTextProcessor(properties, parsedModelsPath);
+#if (PYTHON_DISABLE == 0)
+    loadPyTemplateProcessor(properties, parsedModelsPath);
+#else
+    loadDefaultTemplateProcessorIfNeeded(properties);
+#endif
     if (nodeOptions.has_max_tokens_limit()) {
         properties->maxTokensLimit = nodeOptions.max_tokens_limit();
     }
     properties->bestOfLimit = nodeOptions.best_of_limit();
     properties->maxModelLength = parseMaxModelLength(parsedModelsPath);
-
     properties->llmExecutorWrapper = std::make_shared<LLMExecutorWrapper>(properties->pipeline);
+
     return StatusCode::OK;
 }
 
 
@@ -33,7 +33,10 @@
 #include "../../../http_payload.hpp"
 #include "../../../mediapipe_internal/mediapipe_utils.hpp"
 #include "../../apis/openai_completions.hpp"
-#include "../../text_processor.hpp"
+#include "../../text_utils.hpp"
+#if (PYTHON_DISABLE == 0)
+#include "../../py_jinja_template_processor.hpp"
+#endif
 #include "servable.hpp"
 
 namespace ovms {
 
@@ -95,8 +95,11 @@ Status LegacyServableInitializer::initialize(std::shared_ptr<GenAiServable>& ser
         SPDLOG_ERROR("Error during llm node initialization for models_path: {}", parsedModelsPath);
         return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
     }
-
-    loadTextProcessor(properties, parsedModelsPath);
+#if (PYTHON_DISABLE == 0)
+    loadPyTemplateProcessor(properties, parsedModelsPath);
+#else
+    loadDefaultTemplateProcessorIfNeeded(properties);
+#endif
     properties->legacyExecutor = std::make_shared<LegacyExecutorWrapper>(properties->pipeline);
     if (nodeOptions.has_max_tokens_limit()) {
         properties->maxTokensLimit = nodeOptions.max_tokens_limit();