Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion demos/common/export_models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ options:

#### Text Generation CPU Deployment
```console
python export_model.py text_generation --source_model meta-llama/Meta-Llama-3-8B-Instruct --weight-format fp16 --kv_cache_precision u8 --config_file_path models/config_all.json --model_repository_path models
python demos\common\export_models\export_model.py text_generation --source_model meta-llama/Llama-3.2-1B-Instruct --weight-format int4 --kv_cache_precision u8 --config_file_path config.json --model_repository_path audio
```

#### GPU Deployment (Low Concurrency, Limited Memory)
Expand Down
1 change: 1 addition & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ ovms_cc_library(
"//conditions:default": [],
"//:not_disable_mediapipe" : [
"//src/image_gen:image_gen_calculator",
"//src/speech:speech_calculator",
"//src/image_gen:imagegen_init",
"//src/llm:openai_completions_api_handler",
"//src/embeddings:embeddingscalculator",
Expand Down
2 changes: 2 additions & 0 deletions src/http_rest_api_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ static Status createV3HttpPayload(
} else {
SPDLOG_DEBUG("Model name from deduced from MultiPart field: {}", modelName);
}
auto stream = multiPartParser->getFieldByName("stream");
SPDLOG_ERROR("{}", stream);
ensureJsonParserInErrorState(parsedJson);
} else if (isApplicationJson) {
{
Expand Down
23 changes: 23 additions & 0 deletions src/mediapipe_internal/mediapipegraphdefinition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ const std::string MediapipeGraphDefinition::SCHEDULER_CLASS_NAME{"Mediapipe"};
const std::string MediapipeGraphDefinition::PYTHON_NODE_CALCULATOR_NAME{"PythonExecutorCalculator"};
const std::string MediapipeGraphDefinition::LLM_NODE_CALCULATOR_NAME{"LLMCalculator"};
const std::string MediapipeGraphDefinition::IMAGE_GEN_CALCULATOR_NAME{"ImageGenCalculator"};
const std::string MediapipeGraphDefinition::SPEECH_NODE_CALCULATOR_NAME{"SpeechCalculator"};
const std::string MediapipeGraphDefinition::EMBEDDINGS_NODE_CALCULATOR_NAME{"EmbeddingsCalculatorOV"};
const std::string MediapipeGraphDefinition::RERANK_NODE_CALCULATOR_NAME{"RerankCalculatorOV"};

Expand Down Expand Up @@ -554,6 +555,28 @@ Status MediapipeGraphDefinition::initializeNodes() {
rerankServableMap.insert(std::pair<std::string, std::shared_ptr<RerankServable>>(nodeName, std::move(servable)));
rerankServablesCleaningGuard.disableCleaning();
}
if (endsWith(config.node(i).calculator(), SPEECH_NODE_CALCULATOR_NAME)) {
auto& speechServableMap = this->sidePacketMaps.speechServableMap;
ResourcesCleaningGuard<SpeechServableMap> speechServablesCleaningGuard(speechServableMap);
if (!config.node(i).node_options().size()) {
SPDLOG_LOGGER_ERROR(modelmanager_logger, "Speech node missing options in graph: {}. ", this->name);
return StatusCode::LLM_NODE_MISSING_OPTIONS;
}
if (config.node(i).name().empty()) {
SPDLOG_LOGGER_ERROR(modelmanager_logger, "Speech node name is missing in graph: {}. ", this->name);
return StatusCode::LLM_NODE_MISSING_NAME;
}
std::string nodeName = config.node(i).name();
if (speechServableMap.find(nodeName) != speechServableMap.end()) {
SPDLOG_LOGGER_ERROR(modelmanager_logger, "Speech node name: {} already used in graph: {}. ", nodeName, this->name);
return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS;
}
mediapipe::SpeechCalculatorOptions nodeOptions;
config.node(i).node_options(0).UnpackTo(&nodeOptions);
std::shared_ptr<SpeechServable> servable = std::make_shared<SpeechServable>(nodeOptions.models_path(), nodeOptions.device(), mgconfig.getBasePath(), nodeOptions.mode());
speechServableMap.insert(std::pair<std::string, std::shared_ptr<SpeechServable>>(nodeName, std::move(servable)));
speechServablesCleaningGuard.disableCleaning();
}
}
return StatusCode::OK;
}
Expand Down
8 changes: 7 additions & 1 deletion src/mediapipe_internal/mediapipegraphdefinition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "../sidepacket_servable.hpp"
#include "../embeddings/embeddings_servable.hpp"
#include "../rerank/rerank_servable.hpp"
#include "../speech/speech_servable.hpp"

namespace ovms {
class MediapipeGraphDefinitionUnloadGuard;
Expand All @@ -62,6 +63,7 @@ struct ImageGenerationPipelines;
using PythonNodeResourcesMap = std::unordered_map<std::string, std::shared_ptr<PythonNodeResources>>;
using GenAiServableMap = std::unordered_map<std::string, std::shared_ptr<GenAiServable>>;
using RerankServableMap = std::unordered_map<std::string, std::shared_ptr<RerankServable>>;
using SpeechServableMap = std::unordered_map<std::string, std::shared_ptr<SpeechServable>>;
using EmbeddingsServableMap = std::unordered_map<std::string, std::shared_ptr<EmbeddingsServable>>;
using ImageGenerationPipelinesMap = std::unordered_map<std::string, std::shared_ptr<ImageGenerationPipelines>>;

Expand All @@ -71,19 +73,22 @@ struct GraphSidePackets {
ImageGenerationPipelinesMap imageGenPipelinesMap;
EmbeddingsServableMap embeddingsServableMap;
RerankServableMap rerankServableMap;
SpeechServableMap speechServableMap;
void clear() {
pythonNodeResourcesMap.clear();
genAiServableMap.clear();
imageGenPipelinesMap.clear();
embeddingsServableMap.clear();
rerankServableMap.clear();
speechServableMap.clear();
}
bool empty() {
return (pythonNodeResourcesMap.empty() &&
genAiServableMap.empty() &&
imageGenPipelinesMap.empty() &&
embeddingsServableMap.empty() &&
rerankServableMap.empty());
rerankServableMap.empty() &&
speechServableMap.empty());
}
};

Expand Down Expand Up @@ -124,6 +129,7 @@ class MediapipeGraphDefinition {
static const std::string IMAGE_GEN_CALCULATOR_NAME;
static const std::string EMBEDDINGS_NODE_CALCULATOR_NAME;
static const std::string RERANK_NODE_CALCULATOR_NAME;
static const std::string SPEECH_NODE_CALCULATOR_NAME;
Status waitForLoaded(std::unique_ptr<MediapipeGraphDefinitionUnloadGuard>& unloadGuard, const uint32_t waitForLoadedTimeoutMicroseconds = WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS);

// Pipelines are not versioned and any available definition has constant version equal 1.
Expand Down
4 changes: 3 additions & 1 deletion src/mediapipe_internal/mediapipegraphexecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
const GenAiServableMap& llmNodeResourcesMap,
const EmbeddingsServableMap& embeddingsServableMap,
const RerankServableMap& rerankServableMap,
const SpeechServableMap& speechServableMap,
PythonBackend* pythonBackend,
MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
name(name),
Expand All @@ -56,7 +57,7 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
outputTypes(std::move(outputTypes)),
inputNames(std::move(inputNames)),
outputNames(std::move(outputNames)),
sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap}),
sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, speechServableMap}),
pythonBackend(pythonBackend),
currentStreamTimestamp(STARTING_TIMESTAMP),
mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
Expand Down Expand Up @@ -88,6 +89,7 @@ const std::string MediapipeGraphExecutor::LLM_SESSION_SIDE_PACKET_TAG = "llm";
const std::string MediapipeGraphExecutor::IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes";
const std::string MediapipeGraphExecutor::EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable";
const std::string MediapipeGraphExecutor::RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable";
const std::string MediapipeGraphExecutor::SPEECH_SESSION_SIDE_PACKET_TAG = "speech_servable";
const ::mediapipe::Timestamp MediapipeGraphExecutor::STARTING_TIMESTAMP = ::mediapipe::Timestamp(0);

} // namespace ovms
4 changes: 4 additions & 0 deletions src/mediapipe_internal/mediapipegraphexecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class MediapipeGraphExecutor {
static const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG;
static const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG;
static const std::string RERANK_SESSION_SIDE_PACKET_TAG;
static const std::string SPEECH_SESSION_SIDE_PACKET_TAG;
static const ::mediapipe::Timestamp STARTING_TIMESTAMP;

MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
Expand All @@ -103,6 +104,7 @@ class MediapipeGraphExecutor {
const GenAiServableMap& llmNodeResourcesMap,
const EmbeddingsServableMap& embeddingsServableMap,
const RerankServableMap& rerankServableMap,
const SpeechServableMap& speechServableMap,
PythonBackend* pythonBackend,
MediapipeServableMetricReporter* mediapipeServableMetricReporter);
MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
Expand Down Expand Up @@ -151,6 +153,8 @@ class MediapipeGraphExecutor {
inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP);

inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(STARTING_TIMESTAMP);
inputSidePackets[SPEECH_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SpeechServableMap>(this->sidePacketMaps.speechServableMap).At(STARTING_TIMESTAMP);

MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR);

::mediapipe::Packet packet;
Expand Down
61 changes: 61 additions & 0 deletions src/speech/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#
# Copyright (c) 2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library")
load("//:common_settings.bzl", "ovms_cc_library")

ovms_cc_library(
name = "speech_servable",
hdrs = ["speech_servable.hpp"],
visibility = ["//visibility:public"],
alwayslink = 1,
)

ovms_cc_library(
name = "llm_engine", # in fact this is genai library
srcs = [],
deps = ["@llm_engine//:llm_engine"],
visibility = ["//visibility:public"],
alwayslink = 1,
)

ovms_cc_library(
name = "speech_calculator",
srcs = ["http_speech_calculator.cc"],
hdrs = ["dr_wav.h", "dr_mp3.h"],
deps = [
"@mediapipe//mediapipe/framework:calculator_framework",
"//src:httppayload",
"//src:libovmslogging",
"speech_calculator_cc_proto",
":speech_servable",
]+ select({
"//conditions:default": ["//third_party:genai", ":llm_engine"],
"//:not_genai_bin" : [":llm_engine"],
}),
visibility = ["//visibility:public"],
alwayslink = 1,
)

mediapipe_proto_library(
name = "speech_calculator_proto",
srcs = ["speech_calculator.proto"],
visibility = ["//visibility:private"],
deps = [
"@mediapipe//mediapipe/framework:calculator_options_proto",
"@mediapipe//mediapipe/framework:calculator_proto",
],
)
Loading