Skip to content

Commit 03b66ab

Browse files
Speech pipeline POC
1 parent 6e2e910 commit 03b66ab

File tree

7 files changed

+6810
-0
lines changed

7 files changed

+6810
-0
lines changed

src/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ ovms_cc_library(
562562
"//conditions:default": [],
563563
"//:not_disable_mediapipe" : [
564564
"//src/image_gen:image_gen_calculator",
565+
"//src/speech:speech_calculator",
565566
"//src/image_gen:imagegen_init",
566567
"//src/llm:openai_completions_api_handler",
567568
"//src/embeddings:embeddingscalculator",

src/http_rest_api_handler.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,13 +495,15 @@ static Status createV3HttpPayload(
495495
bool isUriBasedRouting = !isApplicationJson && !isMultiPart; // For content types other than "application/json" and "multipart/form-data", we look for model information in the URI
496496

497497
if (isMultiPart) {
498+
SPDLOG_ERROR("MULTIPART");
498499
OVMS_PROFILE_SCOPE("multipart parse");
499500
if (!multiPartParser->parse()) {
500501
SPDLOG_DEBUG("Failed to parse multipart content type request");
501502
return StatusCode::FAILED_TO_PARSE_MULTIPART_CONTENT_TYPE;
502503
}
503504
modelName = multiPartParser->getFieldByName("model");
504505
if (modelName.empty()) {
506+
SPDLOG_ERROR("model");
505507
isUriBasedRouting = true;
506508
} else {
507509
SPDLOG_DEBUG("Model name from deduced from MultiPart field: {}", modelName);

src/mediapipe_internal/mediapipegraphdefinition.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ const std::string MediapipeGraphDefinition::SCHEDULER_CLASS_NAME{"Mediapipe"};
6161
const std::string MediapipeGraphDefinition::PYTHON_NODE_CALCULATOR_NAME{"PythonExecutorCalculator"};
6262
const std::string MediapipeGraphDefinition::LLM_NODE_CALCULATOR_NAME{"LLMCalculator"};
6363
const std::string MediapipeGraphDefinition::IMAGE_GEN_CALCULATOR_NAME{"ImageGenCalculator"};
64+
//const std::string MediapipeGraphDefinition::SPEECH_CALCULATOR_NAME{"SpeechCalculator"};
6465
const std::string MediapipeGraphDefinition::EMBEDDINGS_NODE_CALCULATOR_NAME{"EmbeddingsCalculatorOV"};
6566
const std::string MediapipeGraphDefinition::RERANK_NODE_CALCULATOR_NAME{"RerankCalculatorOV"};
6667

src/speech/BUILD

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#
2+
# Copyright (c) 2025 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library")
18+
load("//:common_settings.bzl", "ovms_cc_library")
19+
20+
ovms_cc_library(
21+
name = "llm_engine", # in fact this is genai library
22+
srcs = [],
23+
deps = ["@llm_engine//:llm_engine"],
24+
visibility = ["//visibility:public"],
25+
alwayslink = 1,
26+
)
27+
28+
ovms_cc_library(
29+
name = "speech_calculator",
30+
srcs = ["http_speech_calculator.cc"],
31+
hdrs = ["dr_wav.h"],
32+
deps = [
33+
"@mediapipe//mediapipe/framework:calculator_framework",
34+
"//src:httppayload",
35+
"//src:libovmslogging",
36+
"speech_calculator_cc_proto",
37+
]+ select({
38+
"//conditions:default": ["//third_party:genai", ":llm_engine"],
39+
"//:not_genai_bin" : [":llm_engine"],
40+
}),
41+
visibility = ["//visibility:public"],
42+
alwayslink = 1,
43+
)
44+
45+
mediapipe_proto_library(
46+
name = "speech_calculator_proto",
47+
srcs = ["speech_calculator.proto"],
48+
visibility = ["//visibility:private"],
49+
deps = [
50+
"@mediapipe//mediapipe/framework:calculator_options_proto",
51+
"@mediapipe//mediapipe/framework:calculator_proto",
52+
],
53+
)

0 commit comments

Comments
 (0)