Skip to content

remove of default value for --task param #3513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 25, 2025
4 changes: 2 additions & 2 deletions demos/code_local_assistant/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ mkdir models
Export `codellama/CodeLlama-7b-Instruct-hf`:
```console
python export_model.py text_generation --source_model codellama/CodeLlama-7b-Instruct-hf --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
python export_model.py text_generation --source_model codellama/CodeLlama-7b-Instruct-hf --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --task text_generation --target_device NPU --overwrite_models
```

> **Note:** Use `--target_device GPU` for Intel GPU or omit this parameter to run on Intel CPU
Expand All @@ -34,7 +34,7 @@ Code completion works in non-streaming, unary mode. Do not use instruct model, t

Export `Qwen/Qwen2.5-Coder-1.5B`:
```console
python export_model.py text_generation --source_model Qwen/Qwen2.5-Coder-1.5B --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
python export_model.py text_generation --source_model Qwen/Qwen2.5-Coder-1.5B --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --task text_generation --target_device NPU --overwrite_models
```

Examine that workspace is set up properly `models/config_all.json`:
Expand Down
4 changes: 2 additions & 2 deletions demos/continuous_batching/rag/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

```bash
mkdir models
docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/Qwen3-8B-int4-ov
docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/Qwen3-8B-int4-ov --task text_generation
docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/bge-base-en-v1.5-fp16-ov --task embeddings
docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest --pull --model_repository_path /models --source_model OpenVINO/bge-reranker-base-fp16-ov --task rerank

Expand All @@ -25,7 +25,7 @@ docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/mo

```bat
mkdir models
ovms.exe --pull --model_repository_path models --source_model OpenVINO/Qwen3-8B-int4-ov
ovms.exe --pull --model_repository_path models --source_model OpenVINO/Qwen3-8B-int4-ov --task text_generation
ovms.exe --pull --model_repository_path models --source_model OpenVINO/bge-base-en-v1.5-fp16-ov --task embeddings
ovms.exe --pull --model_repository_path models --source_model OpenVINO/bge-reranker-base-fp16-ov --task rerank

Expand Down
8 changes: 4 additions & 4 deletions demos/continuous_batching/vlm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Select deployment option depending on how you prepared models in the previous st
Running this command starts the container with CPU only target device:
```bash
mkdir -p models
docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path /models --model_name OpenGVLab/InternVL2-2B --pipeline_type VLM
docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path /models --model_name OpenGVLab/InternVL2-2B --task text_generation --pipeline_type VLM
```
**GPU**

Expand All @@ -39,7 +39,7 @@ to `docker run` command, use the image with GPU support.
It can be applied using the commands below:
```bash
mkdir -p models
docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --target_device GPU --pipeline_type VLM
docker run -d -u $(id -u):$(id -g) --rm -p 8000:8000 --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --task text_generation --target_device GPU --pipeline_type VLM
```
:::

Expand All @@ -49,11 +49,11 @@ If you run on GPU make sure to have appropriate drivers installed, so the device

```bat
mkdir models
ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --pipeline_type VLM --target_device CPU
ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --task text_generation --pipeline_type VLM --target_device CPU
```
or
```bat
ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --pipeline_type VLM --target_device GPU
ovms --rest_port 8000 --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenGVLab/InternVL2-2B --task text_generation --pipeline_type VLM --target_device GPU
```
:::

Expand Down
2 changes: 1 addition & 1 deletion demos/llm_npu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Run `export_model.py` script to download and quantize the model:

**LLM**
```console
python export_model.py text_generation --source_model meta-llama/Llama-3.1-8B-Instruct --target_device NPU --config_file_path models/config.json --ov_cache_dir ./models/.ov_cache --model_repository_path models --overwrite_models
python export_model.py text_generation --source_model meta-llama/Llama-3.1-8B-Instruct --target_device NPU --config_file_path models/config.json --ov_cache_dir ./models/.ov_cache --model_repository_path models --task text_generation --overwrite_models
```
**Note:** The parameter `--ov_cache` stores the model compilation cache to speedup initialization time for sequential startup. Drop this parameter if you don't want to store the compilation cache.

Expand Down
4 changes: 2 additions & 2 deletions docs/llm/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ hardware configuration.

```bash
mkdir models
docker run --user $(id -u):$(id -g) -d --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render*) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --rest_port 8000 --target_device GPU --cache_size 2
docker run --user $(id -u):$(id -g) -d --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render*) --rm -p 8000:8000 -v $(pwd)/models:/models:rw openvino/model_server:latest-gpu --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --task text_generation --rest_port 8000 --target_device GPU --cache_size 2
```
:::

:::{tab-item} On Baremetal Host
**Required:** OpenVINO Model Server package - see [deployment instructions](../deploying_server_baremetal.md) for details.

```bat
ovms.exe --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --rest_port 8000 --target_device GPU --cache_size 2
ovms.exe --source_model OpenVINO/Phi-3.5-mini-instruct-int4-ov --model_repository_path models --rest_port 8000 --task text_generation --target_device GPU --cache_size 2
```
:::
::::
Expand Down
9 changes: 3 additions & 6 deletions src/cli_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ void CLIParser::parse(int argc, char** argv) {
"MODEL_REPOSITORY_PATH")
("task",
"Choose type of model export: text_generation - chat and completion endpoints, embeddings - embeddings endpoint, rerank - rerank endpoint, image_generation - image generation/edit/inpainting endpoints.",
cxxopts::value<std::string>()->default_value("text_generation"),
cxxopts::value<std::string>(),
"TASK");

options->add_options("single model")
Expand Down Expand Up @@ -282,11 +282,8 @@ void CLIParser::parse(int argc, char** argv) {
}
}
} else {
// Default task is text_generation
task = TEXT_GENERATION_GRAPH;
GraphCLIParser cliParser;
unmatchedOptions = cliParser.parse(result->unmatched());
this->graphOptionsParser = std::move(cliParser);
std::cerr << "error parsing options - --task parameter wasn't passed";
exit(OVMS_EX_USAGE);
}

if (unmatchedOptions.size()) {
Expand Down
69 changes: 55 additions & 14 deletions src/test/ovmsconfig_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,19 @@ TEST_F(OvmsConfigDeathTest, hfWrongTask) {
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "error parsing options - --task parameter unsupported value: bad_task");
}

TEST_F(OvmsConfigDeathTest, hfNoTaskParameter) {
char* n_argv[] = {
"ovms",
"--pull",
"--source_model",
"some/model",
"--model_repository_path",
"/some/path",
};
int arg_count = 6;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "error parsing options - --task parameter wasn't passed");
}

TEST_F(OvmsConfigDeathTest, hfBadTextGraphParameter) {
char* n_argv[] = {
"ovms",
Expand All @@ -364,10 +377,12 @@ TEST_F(OvmsConfigDeathTest, hfBadTextGraphParameter) {
"some/model",
"--model_repository_path",
"/some/path",
"--task",
"text_generation",
"--max_allowed_chunks",
"1400",
};
int arg_count = 8;
int arg_count = 10;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "task: text_generation - error parsing options - unmatched arguments : --max_allowed_chunks, 1400,");
}

Expand Down Expand Up @@ -550,10 +565,12 @@ TEST_F(OvmsConfigDeathTest, hfBadTextGraphParameterName) {
"some/model",
"--model_repository_path",
"/some/path",
"--task",
"text_generation",
"--min_num_batched_tokens",
"145",
};
int arg_count = 8;
int arg_count = 10;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "task: text_generation - error parsing options - unmatched arguments : --min_num_batched_tokens, 145,");
}

Expand Down Expand Up @@ -631,10 +648,12 @@ TEST_F(OvmsConfigDeathTest, hfBadTextGenGraphNoPull) {
"some/model",
"--model_repository_path",
"/some/path",
"--task",
"text_generation",
"--normalizes",
"true",
};
int arg_count = 7;
int arg_count = 9;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "task: text_generation - error parsing options - unmatched arguments : --normalizes, true,");
}

Expand Down Expand Up @@ -797,8 +816,10 @@ TEST_F(OvmsConfigDeathTest, simultaneousPullAndListModels) {
"OpenVINO/Phi-3-mini-FastDraft-50M-int8-ov",
"--model_repository_path",
"/models",
"--task",
"text_generation",
"--list_models"};
int arg_count = 7;
int arg_count = 9;

EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "--list_models cannot be used with --pull or --task") << createCmd(arg_count, n_argv) << buffer.str();
}
Expand All @@ -813,6 +834,8 @@ TEST(OvmsGraphConfigTest, positiveAllChanged) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--pipeline_type",
(char*)"VLM",
(char*)"--max_num_seqs",
Expand All @@ -831,7 +854,7 @@ TEST(OvmsGraphConfigTest, positiveAllChanged) {
(char*)"/draft/model/source",
};

int arg_count = 22;
int arg_count = 24;
ConstructorEnabledConfig config;
config.parse(arg_count, n_argv);

Expand Down Expand Up @@ -863,6 +886,8 @@ TEST(OvmsGraphConfigTest, positiveSomeChanged) {
(char*)"--overwrite_models",
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--pipeline_type",
(char*)"VLM",
(char*)"--max_num_seqs",
Expand All @@ -871,7 +896,7 @@ TEST(OvmsGraphConfigTest, positiveSomeChanged) {
(char*)"NPU",
};

int arg_count = 13;
int arg_count = 15;
ConstructorEnabledConfig config;
config.parse(arg_count, n_argv);

Expand Down Expand Up @@ -939,9 +964,11 @@ TEST(OvmsGraphConfigTest, positiveDefault) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
};

int arg_count = 6;
int arg_count = 8;
ConstructorEnabledConfig config;
config.parse(arg_count, n_argv);
auto& hfSettings = config.getServerSettings().hfSettings;
Expand Down Expand Up @@ -971,11 +998,13 @@ TEST(OvmsGraphConfigTest, positiveDefaultStart) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--port",
(char*)"8080",
};

int arg_count = 7;
int arg_count = 9;
ConstructorEnabledConfig config;
config.parse(arg_count, n_argv);
auto& hfSettings = config.getServerSettings().hfSettings;
Expand Down Expand Up @@ -1007,11 +1036,13 @@ TEST(OvmsGraphConfigTest, positiveTargetDeviceHetero) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--target_device",
(char*)"HETERO",
};

int arg_count = 8;
int arg_count = 10;
ConstructorEnabledConfig config;
config.parse(arg_count, n_argv);
auto& hfSettings = config.getServerSettings().hfSettings;
Expand All @@ -1029,11 +1060,13 @@ TEST(OvmsGraphConfigTest, negativePipelineType) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--pipeline_type",
(char*)"INVALID",
};

int arg_count = 8;
int arg_count = 10;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "pipeline_type: INVALID is not allowed. Supported types: LM, LM_CB, VLM, VLM_CB, AUTO");
}

Expand All @@ -1047,11 +1080,13 @@ TEST(OvmsGraphConfigTest, negativeTargetDevice) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--target_device",
(char*)"INVALID",
};

int arg_count = 8;
int arg_count = 10;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "target_device: INVALID is not allowed. Supported devices: CPU, GPU, NPU, HETERO");
}

Expand All @@ -1065,11 +1100,13 @@ TEST(OvmsGraphConfigTest, negativeEnablePrefixCaching) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--enable_prefix_caching",
(char*)"INVALID",
};

int arg_count = 8;
int arg_count = 10;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "enable_prefix_caching: INVALID is not allowed. Supported values: true, false");
}

Expand All @@ -1083,11 +1120,13 @@ TEST(OvmsGraphConfigTest, negativeDynamicSplitFuse) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
(char*)"--dynamic_split_fuse",
(char*)"INVALID",
};

int arg_count = 8;
int arg_count = 10;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "dynamic_split_fuse: INVALID is not allowed. Supported values: true, false");
}

Expand All @@ -1101,9 +1140,11 @@ TEST(OvmsGraphConfigTest, negativeSourceModel) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
};

int arg_count = 6;
int arg_count = 8;
EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "For now only OpenVINO models are supported in pulling mode");
}

Expand Down
4 changes: 3 additions & 1 deletion src/test/pull_hf_model_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,11 @@ TEST_F(HfDownloadModelModule, TestInvalidProxyTimeout) {
(char*)modelName.c_str(),
(char*)"--model_repository_path",
(char*)downloadPath.c_str(),
(char*)"--task",
(char*)"text_generation",
nullptr};

int arg_count = 6;
int arg_count = 8;
ConstructorEnabledConfig config;
{
EnvGuard eGuard;
Expand Down