diff --git a/demos/code_local_assistant/README.md b/demos/code_local_assistant/README.md
index 15cbc0a183..740f467fe1 100644
--- a/demos/code_local_assistant/README.md
+++ b/demos/code_local_assistant/README.md
@@ -22,7 +22,7 @@ mkdir models
 
 Export `codellama/CodeLlama-7b-Instruct-hf`:
 ```console
-python export_model.py text_generation --source_model codellama/CodeLlama-7b-Instruct-hf --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
+python export_model.py text_generation --source_model codellama/CodeLlama-7b-Instruct-hf --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --task text_generation --target_device NPU --overwrite_models
 ```
 
 > **Note:** Use `--target_device GPU` for Intel GPU or omit this parameter to run on Intel CPU
@@ -34,7 +34,7 @@ Code completion works in non-streaming, unary mode. Do not use instruct model, t
 
 Export `Qwen/Qwen2.5-Coder-1.5B`:
 ```console
-python export_model.py text_generation --source_model Qwen/Qwen2.5-Coder-1.5B --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
+python export_model.py text_generation --source_model Qwen/Qwen2.5-Coder-1.5B --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --task text_generation --target_device NPU --overwrite_models
 ```
 
 Examine that workspace is set up properly `models/config_all.json`:
diff --git a/demos/continuous_batching/rag/README.md b/demos/continuous_batching/rag/README.md
index 3fac03d741..61144f0c9d 100644
--- a/demos/continuous_batching/rag/README.md
+++ b/demos/continuous_batching/rag/README.md
@@ -10,7 +10,7 @@
 
 ```bash
 mkdir models
-docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/Qwen3-8B-int4-ov
+docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/Qwen3-8B-int4-ov --task text_generation
 docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/bge-base-en-v1.5-fp16-ov --task embeddings
 docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/bge-reranker-base-fp16-ov --task rerank
 
@@ -25,9 +25,10 @@ docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/mo
 
 ```bat
 mkdir models
-ovms --pull --model_repository_path models --source_model OpenVINO/Qwen3-8B-int4-ov
-ovms --pull --model_repository_path models --source_model OpenVINO/bge-base-en-v1.5-fp16-ov --task embeddings
-ovms --pull --model_repository_path models --source_model OpenVINO/bge-reranker-base-fp16-ov --task rerank
+
+ovms.exe --pull --model_repository_path models --source_model OpenVINO/Qwen3-8B-int4-ov --task text_generation
+ovms.exe --pull --model_repository_path models --source_model OpenVINO/bge-base-en-v1.5-fp16-ov --task embeddings
+ovms.exe --pull --model_repository_path models --source_model OpenVINO/bge-reranker-base-fp16-ov --task rerank
 
 ovms --add_to_config models --model_name OpenVINO/Qwen3-8B-int4-ov --model_path OpenVINO/Qwen3-8B-int4-ov
 ovms --add_to_config models --model_name OpenVINO/bge-base-en-v1.5-fp16-ov --model_path OpenVINO/bge-base-en-v1.5-fp16-ov
diff --git a/demos/continuous_batching/vlm/README.md b/demos/continuous_batching/vlm/README.md
index b642de9b00..f09d5f020e 100644
--- a/demos/continuous_batching/vlm/README.md
+++ b/demos/continuous_batching/vlm/README.md
@@ -30,7 +30,7 @@ Select deployment option depending on how you prepared models in the previous st
 Running this command starts the container with CPU only target device:
 ```bash
 mkdir -p models
-docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path /models --model_name OpenGVLab/InternVL2-2B --pipeline_type VLM
+docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path /models --model_name OpenGVLab/InternVL2-2B --task text_generation --pipeline_type VLM
 ```
 **GPU**
 
@@ -39,7 +39,7 @@ to `docker run` command, use the image with GPU support.
 It can be applied using the commands below:
 ```bash
 mkdir -p models
-docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --target_device GPU --pipeline_type VLM
+docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --task text_generation --target_device GPU --pipeline_type VLM
 ```
 :::
 
@@ -49,11 +49,11 @@ If you run on GPU make sure to have appropriate drivers installed, so the device
 
 ```bat
 mkdir models
-ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --pipeline_type VLM --target_device CPU
+ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --task text_generation --pipeline_type VLM --target_device CPU
 ```
 or
 ```bat
-ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --pipeline_type VLM --target_device GPU
+ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --task text_generation --pipeline_type VLM --target_device GPU
 ```
 :::
 
diff --git a/demos/llm_npu/README.md b/demos/llm_npu/README.md
index 252f3666b0..c3592c1277 100644
--- a/demos/llm_npu/README.md
+++ b/demos/llm_npu/README.md
@@ -38,7 +38,7 @@ Run `export_model.py` script to download and quantize the model:
 
 **LLM**
 ```console
-python export_model.py text_generation --source_model meta-llama/Llama-3.1-8B-Instruct --target_device NPU --config_file_path models/config.json --ov_cache_dir ./models/.ov_cache --model_repository_path models  --overwrite_models
+python export_model.py text_generation --source_model meta-llama/Llama-3.1-8B-Instruct --target_device NPU --config_file_path models/config.json --ov_cache_dir ./models/.ov_cache --model_repository_path models --task text_generation  --overwrite_models
 ```
 **Note:** The parameter `--ov_cache` stores the model compilation cache to speedup initialization time for sequential startup. Drop this parameter if you don't want to store the compilation cache.
 
diff --git a/docs/llm/quickstart.md b/docs/llm/quickstart.md
index 2a4e9f3737..f203365cad 100644
--- a/docs/llm/quickstart.md
+++ b/docs/llm/quickstart.md
@@ -19,7 +19,7 @@ You can use another model from [OpenVINO organization on HuggingFace](https://hu
 
 ```bash
 mkdir models
-docker run --user $(id -u):$(id -g) -d --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render*) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --rest_port 8000 --target_device GPU --cache_size 2
+docker run --user $(id -u):$(id -g) -d --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render*) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --task text_generation --rest_port 8000 --target_device GPU --cache_size 2
 ```
 :::
 
@@ -27,7 +27,7 @@ docker run --user $(id -u):$(id -g) -d --device /dev/dri --group-add=$(stat -c "
 **Required:** OpenVINO Model Server package - see [deployment instructions](../deploying_server_baremetal.md) for details.
 
 ```bat
-ovms.exe --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --rest_port 8000 --target_device GPU --cache_size 2
+ovms.exe --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --rest_port 8000 --task text_generation --target_device GPU --cache_size 2
 ```
 :::
 ::::
diff --git a/docs/parameters.md b/docs/parameters.md
index e663e58c32..42ed4098c9 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -78,12 +78,12 @@ Shared configuration options for the pull, and pull & start mode. In the presenc
 
 | Option                      | Value format | Description                                                                                                   |
 |-----------------------------|--------------|---------------------------------------------------------------------------------------------------------------|
-| `--pull`                    | `NA`         | Runs the server in pull mode to download the model from the Hugging Face repository.  |
-| `--source_model`            | `string`     | Name of the model in the Hugging Face repository. If not set, `model_name` is used.                |
+| `--pull`                    | `NA`         | Runs the server in pull mode to download the model from the Hugging Face repository.                          |
+| `--source_model`            | `string`     | Name of the model in the Hugging Face repository. If not set, `model_name` is used. `Required`                |
 | `--model_repository_path`   | `string`     | Directory where all required model files will be saved.                                                       |
 | `--model_name`              | `string`     | Name of the model as exposed externally by the server.                                                        |
-| `--target_device` | `string` | Device name to be used to execute inference operations. Accepted values are: `"CPU"/"GPU"/"MULTI"/"HETERO"` |
-| `--task`                    | `string`     | Task type the model will support (`text_generation`, `embedding`, `rerank`, `image_generation`).  Default: `text_generation` |
+| `--target_device`           | `string`     | Device name to be used to execute inference operations. Accepted values are: `"CPU"/"GPU"/"MULTI"/"HETERO"`   |
+| `--task`                    | `string`     | Task type the model will support (`text_generation`, `embedding`, `rerank`, `image_generation`).              |
 | `--overwrite_models`        | `NA`         | If set, an existing model with the same name will be overwritten. If not set, the server will use existing model files if available. |
 
 ## Pull Mode Options for optimum-cli mode
diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp
index f7d76984a2..1fe91772a3 100644
--- a/src/cli_parser.cpp
+++ b/src/cli_parser.cpp
@@ -186,7 +186,7 @@ void CLIParser::parse(int argc, char** argv) {
                 "MODEL_REPOSITORY_PATH")
             ("task",
                 "Choose type of model export: text_generation - chat and completion endpoints, embeddings - embeddings endpoint, rerank - rerank endpoint, image_generation - image generation/edit/inpainting endpoints.",
-                cxxopts::value<std::string>()->default_value("text_generation"),
+                cxxopts::value<std::string>(),
                 "TASK")
             ("weight-format",
                 "Model precision used in optimum-cli export with conversion",
@@ -289,11 +289,8 @@ void CLIParser::parse(int argc, char** argv) {
                     }
                 }
             } else {
-                // Default task is text_generation
-                task = TEXT_GENERATION_GRAPH;
-                GraphCLIParser cliParser;
-                unmatchedOptions = cliParser.parse(result->unmatched());
-                this->graphOptionsParser = std::move(cliParser);
+                std::cerr << "error parsing options - --task parameter wasn't passed";
+                exit(OVMS_EX_USAGE);
             }
 
             if (unmatchedOptions.size()) {
diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp
index 7efb1064cc..f7bd241893 100644
--- a/src/test/ovmsconfig_test.cpp
+++ b/src/test/ovmsconfig_test.cpp
@@ -354,6 +354,19 @@ TEST_F(OvmsConfigDeathTest, hfWrongTask) {
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "error parsing options - --task parameter unsupported value: bad_task");
 }
 
+TEST_F(OvmsConfigDeathTest, hfNoTaskParameter) {
+    char* n_argv[] = {
+        "ovms",
+        "--pull",
+        "--source_model",
+        "some/model",
+        "--model_repository_path",
+        "/some/path",
+    };
+    int arg_count = 6;
+    EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "error parsing options - --task parameter wasn't passed");
+}
+
 TEST_F(OvmsConfigDeathTest, hfBadTextGraphParameter) {
     char* n_argv[] = {
         "ovms",
@@ -362,10 +375,12 @@ TEST_F(OvmsConfigDeathTest, hfBadTextGraphParameter) {
         "some/model",
         "--model_repository_path",
         "/some/path",
+        "--task",
+        "text_generation",
         "--max_allowed_chunks",
         "1400",
     };
-    int arg_count = 8;
+    int arg_count = 10;
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "task: text_generation - error parsing options - unmatched arguments : --max_allowed_chunks, 1400,");
 }
 
@@ -548,10 +563,12 @@ TEST_F(OvmsConfigDeathTest, hfBadTextGraphParameterName) {
         "some/model",
         "--model_repository_path",
         "/some/path",
+        "--task",
+        "text_generation",
         "--min_num_batched_tokens",
         "145",
     };
-    int arg_count = 8;
+    int arg_count = 10;
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "task: text_generation - error parsing options - unmatched arguments : --min_num_batched_tokens, 145,");
 }
 
@@ -629,10 +646,12 @@ TEST_F(OvmsConfigDeathTest, hfBadTextGenGraphNoPull) {
         "some/model",
         "--model_repository_path",
         "/some/path",
+        "--task",
+        "text_generation",
         "--normalizes",
         "true",
     };
-    int arg_count = 7;
+    int arg_count = 9;
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "task: text_generation - error parsing options - unmatched arguments : --normalizes, true,");
 }
 
@@ -857,8 +876,10 @@ TEST_F(OvmsConfigDeathTest, simultaneousPullAndListModels) {
         "OpenVINO/Phi-3-mini-FastDraft-50M-int8-ov",
         "--model_repository_path",
         "/models",
+        "--task",
+        "text_generation",
         "--list_models"};
-    int arg_count = 7;
+    int arg_count = 9;
 
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "--list_models cannot be used with --pull or --task") << createCmd(arg_count, n_argv) << buffer.str();
 }
@@ -873,6 +894,8 @@ TEST(OvmsGraphConfigTest, positiveAllChanged) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--pipeline_type",
         (char*)"VLM",
         (char*)"--max_num_seqs",
@@ -893,7 +916,7 @@ TEST(OvmsGraphConfigTest, positiveAllChanged) {
         (char*)"parserName",
     };
 
-    int arg_count = 24;
+    int arg_count = 26;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
@@ -926,6 +949,8 @@ TEST(OvmsGraphConfigTest, positiveSomeChanged) {
         (char*)"--overwrite_models",
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--pipeline_type",
         (char*)"VLM",
         (char*)"--max_num_seqs",
@@ -934,7 +959,7 @@ TEST(OvmsGraphConfigTest, positiveSomeChanged) {
         (char*)"NPU",
     };
 
-    int arg_count = 13;
+    int arg_count = 15;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
@@ -1100,9 +1125,11 @@ TEST(OvmsGraphConfigTest, positiveDefault) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
     };
 
-    int arg_count = 6;
+    int arg_count = 8;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
@@ -1133,11 +1160,13 @@ TEST(OvmsGraphConfigTest, positiveDefaultStart) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--port",
         (char*)"8080",
     };
 
-    int arg_count = 7;
+    int arg_count = 9;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
@@ -1170,11 +1199,13 @@ TEST(OvmsGraphConfigTest, positiveTargetDeviceHetero) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--target_device",
         (char*)"HETERO",
     };
 
-    int arg_count = 8;
+    int arg_count = 10;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
@@ -1192,11 +1223,13 @@ TEST(OvmsGraphConfigTest, negativePipelineType) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--pipeline_type",
         (char*)"INVALID",
     };
 
-    int arg_count = 8;
+    int arg_count = 10;
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "pipeline_type: INVALID is not allowed. Supported types: LM, LM_CB, VLM, VLM_CB, AUTO");
 }
 
@@ -1210,11 +1243,13 @@ TEST(OvmsGraphConfigTest, negativeTargetDevice) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--target_device",
         (char*)"INVALID",
     };
 
-    int arg_count = 8;
+    int arg_count = 10;
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "target_device: INVALID is not allowed. Supported devices: CPU, GPU, NPU, HETERO");
 }
 
@@ -1228,11 +1263,13 @@ TEST(OvmsGraphConfigTest, negativeEnablePrefixCaching) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--enable_prefix_caching",
         (char*)"INVALID",
     };
 
-    int arg_count = 8;
+    int arg_count = 10;
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "enable_prefix_caching: INVALID is not allowed. Supported values: true, false");
 }
 
@@ -1246,14 +1283,34 @@ TEST(OvmsGraphConfigTest, negativeDynamicSplitFuse) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         (char*)"--dynamic_split_fuse",
         (char*)"INVALID",
     };
 
-    int arg_count = 8;
+    int arg_count = 10;
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "dynamic_split_fuse: INVALID is not allowed. Supported values: true, false");
 }
 
+TEST(OvmsGraphConfigTest, negativeSourceModel) {
+    std::string modelName = "NonOpenVINO/Phi-3-mini-FastDraft-50M-int8-ov";
+    std::string downloadPath = "test/repository";
+    char* n_argv[] = {
+        (char*)"ovms",
+        (char*)"--pull",
+        (char*)"--source_model",
+        (char*)modelName.c_str(),
+        (char*)"--model_repository_path",
+        (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
+    };
+
+    int arg_count = 8;
+    EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "For now only OpenVINO models are supported in pulling mode");
+}
+
 TEST(OvmsGraphConfigTest, positiveAllChangedRerank) {
     std::string modelName = "OpenVINO/Phi-3-mini-FastDraft-50M-int8-ov";
     std::string downloadPath = "test/repository";
diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp
index 54e2042ff8..fdb4dbe29c 100644
--- a/src/test/pull_hf_model_test.cpp
+++ b/src/test/pull_hf_model_test.cpp
@@ -442,9 +442,11 @@ TEST_F(HfDownloadModelModule, TestInvalidProxyTimeout) {
         (char*)modelName.c_str(),
         (char*)"--model_repository_path",
         (char*)downloadPath.c_str(),
+        (char*)"--task",
+        (char*)"text_generation",
         nullptr};
 
-    int arg_count = 6;
+    int arg_count = 8;
     ConstructorEnabledConfig config;
     {
         EnvGuard eGuard;