sync release branch with main (#3229)

dtrawins · web-flow · commit d6ea460804f5 · 2025-04-15T09:11:05.000+02:00
* updates in documentation (#3223) * update requirements in universal-sentence-encoder demo (#3222) * fix rerank and embeddings demo for windows and gpu (#3235)
diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
@@ -189,7 +189,7 @@ def add_common_arguments(parser):
                 "name": "{{model_name}}_embeddings_model",
                 "base_path": "embeddings",
                 "target_device": "{{target_device|default("CPU", true)}}",
-                "plugin_config": { "NUM_STREAMS": {{num_streams|default("1", true)}} }
+                "plugin_config": { "NUM_STREAMS": "{{num_streams|default(1, true)}}" }
             }
 	}
    ]
@@ -208,7 +208,7 @@ def add_common_arguments(parser):
                 "name": "{{model_name}}_rerank_model",
                 "base_path": "rerank",
                 "target_device": "{{target_device|default("CPU", true)}}",
-                "plugin_config": { "NUM_STREAMS": {{num_streams|default("1", true)}} }
+                "plugin_config": { "NUM_STREAMS": "{{num_streams|default(1, true)}}" }
             }
 	}
    ]
diff --git a/demos/continuous_batching/accuracy/README.md b/demos/continuous_batching/accuracy/README.md
@@ -80,6 +80,7 @@ export OPENAI_COMPATIBLE_API_URL=http://localhost:8000/v3
 export OPENAI_COMPATIBLE_API_KEY="unused"
 git clone https://github.com/EvolvingLMMs-Lab/lmms-eval
 cd lmms-eval
+git checkout 4471ad311e620ed6cf3a0419d8ba6f18f8fb1cb3  # https://github.com/EvolvingLMMs-Lab/lmms-eval/issues/625
 pip install -e . --extra-index-url "https://download.pytorch.org/whl/cpu"
 python -m lmms_eval \
     --model openai_compatible \
@@ -101,7 +102,7 @@ openai_compatible (model_version=OpenGVLab/InternVL2_5-8B,max_retries=1), gen_kw
 |--------|-------|------|-----:|--------------------|---|--------:|---|------|
 |mme     |Yaml   |none  |     0|mme_cognition_score |↑  | 600.3571|±  |   N/A|
 |mme     |Yaml   |none  |     0|mme_perception_score|↑  |1618.2984|±  |   N/A|
-|mmmu_val|      0|none  |     0|mmmu_acc            |↑  |   0.5100|±  |   N/A|
+|mmmu_val|      0|none  |     0|mmmu_acc            |↑  |   0.5322|±  |   N/A|
 
 ```
 
diff --git a/demos/continuous_batching/vlm/README.md b/demos/continuous_batching/vlm/README.md
@@ -46,6 +46,7 @@ python export_model.py text_generation --source_model OpenGVLab/InternVL2_5-8B -
 > **Note:** Change the `--weight-format` to quantize the model to `int8` or `int4` precision to reduce memory consumption and improve performance.
 
 > **Note:** You can change the model used in the demo out of any topology [tested](https://github.com/openvinotoolkit/openvino.genai/blob/master/SUPPORTED_MODELS.md#visual-language-models) with OpenVINO.
+Be aware that QwenVL models executed on GPU might experience execution errors with very high resolution images. In case of such behavior, it is recommended to reduce the parameter `max_pixels` in `preprocessor_config.json`.
 
 You should have a model folder like below:
 ```
diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md
@@ -126,8 +126,7 @@ content-type: application/json
 
 :::{dropdown} **Request embeddings with cURL**
 ```bash
-curl http://localhost:8000/v3/embeddings \
-  -H "Content-Type: application/json" -d '{ "model": "Alibaba-NLP/gte-large-en-v1.5", "input": "hello world"}' | jq .
+curl http://localhost:8000/v3/embeddings -H "Content-Type: application/json" -d "{ \"model\": \"Alibaba-NLP/gte-large-en-v1.5\", \"input\": \"hello world\"}"
 ```
 ```json
 {
diff --git a/demos/rerank/README.md b/demos/rerank/README.md
@@ -106,8 +106,7 @@ content-type: application/json
 :::{dropdown} **Requesting rerank score with cURL**
 
 ```bash
-curl http://localhost:8000/v3/rerank  -H "Content-Type: application/json" \
--d '{ "model": "BAAI/bge-reranker-large", "query": "welcome", "documents":["good morning","farewell"]}' | jq .
+curl http://localhost:8000/v3/rerank  -H "Content-Type: application/json" -d "{ \"model\": \"BAAI/bge-reranker-large\", \"query\": \"welcome\", \"documents\":[\"good morning\",\"farewell\"]}"
 ```
 ```json
 {
diff --git a/demos/universal-sentence-encoder/requirements.txt b/demos/universal-sentence-encoder/requirements.txt
@@ -1,2 +1,2 @@
-tensorflow-serving-api==2.11.0
-numpy<2.0.0
+tensorflow-serving-api==2.18.1
+tensorflow==2.18.1

Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,7 @@ def add_common_arguments(parser):`
`189`	`189`	`"name": "{{model_name}}_embeddings_model",`
`190`	`190`	`"base_path": "embeddings",`
`191`	`191`	`"target_device": "{{target_device\|default("CPU", true)}}",`
`192`		`- "plugin_config": { "NUM_STREAMS": {{num_streams\|default("1", true)}} }`
	`192`	`+ "plugin_config": { "NUM_STREAMS": "{{num_streams\|default(1, true)}}" }`
`193`	`193`	`}`
`194`	`194`	`}`
`195`	`195`	`]`
`@@ -208,7 +208,7 @@ def add_common_arguments(parser):`
`208`	`208`	`"name": "{{model_name}}_rerank_model",`
`209`	`209`	`"base_path": "rerank",`
`210`	`210`	`"target_device": "{{target_device\|default("CPU", true)}}",`
`211`		`- "plugin_config": { "NUM_STREAMS": {{num_streams\|default("1", true)}} }`
	`211`	`+ "plugin_config": { "NUM_STREAMS": "{{num_streams\|default(1, true)}}" }`
`212`	`212`	`}`
`213`	`213`	`}`
`214`	`214`	`]`
Original file line number	Diff line number	Diff line change
`@@ -126,8 +126,7 @@ content-type: application/json`
`126`	`126`
`127`	`127`	`:::{dropdown} Request embeddings with cURL`
`128`	`128`	```bash
`129`		`-curl http://localhost:8000/v3/embeddings \`
`130`		`- -H "Content-Type: application/json" -d '{ "model": "Alibaba-NLP/gte-large-en-v1.5", "input": "hello world"}' \| jq .`
	`129`	`+curl http://localhost:8000/v3/embeddings -H "Content-Type: application/json" -d "{ \"model\": \"Alibaba-NLP/gte-large-en-v1.5\", \"input\": \"hello world\"}"`
`131`	`130`	```
`132`	`131`	```json
`133`	`132`	`{`
Original file line number	Diff line number	Diff line change
`@@ -106,8 +106,7 @@ content-type: application/json`
`106`	`106`	`:::{dropdown} Requesting rerank score with cURL`
`107`	`107`
`108`	`108`	```bash
`109`		`-curl http://localhost:8000/v3/rerank -H "Content-Type: application/json" \`
`110`		`--d '{ "model": "BAAI/bge-reranker-large", "query": "welcome", "documents":["good morning","farewell"]}' \| jq .`
	`109`	`+curl http://localhost:8000/v3/rerank -H "Content-Type: application/json" -d "{ \"model\": \"BAAI/bge-reranker-large\", \"query\": \"welcome\", \"documents\":[\"good morning\",\"farewell\"]}"`
`111`	`110`	```
`112`	`111`	```json
`113`	`112`	`{`