save to disk

dkalinowski · dkalinowski · commit 1bef80922569 · 2025-05-07T16:49:48.000+02:00
diff --git a/WORKSPACE b/WORKSPACE
@@ -599,7 +599,7 @@ new_git_repository(
     build_file_content = """
 cc_library(
     name = "image",
-    hdrs = ["stb_image.h"],
+    hdrs = ["stb_image.h", "stb_image_write.h"],
     visibility = ["//visibility:public"],
     local_defines = [
     ],
diff --git a/src/BUILD b/src/BUILD
@@ -2602,6 +2602,27 @@ cc_library(
     linkopts = LINKOPTS_ADJUSTED,
 )
 
+cc_library(
+    name = "libimage_conversion",
+    hdrs = [
+        "image_conversion.hpp",
+    ],
+    srcs = [
+        "image_conversion.cpp",
+    ],
+    deps = [
+        "@stb//:image",
+        "@com_google_absl//absl/strings",
+        "//third_party:openvino",
+        "//src:libovmslogging",
+        "//src:libovmsprofiler",
+    ],
+    visibility = ["//visibility:public",],
+    local_defines = COMMON_LOCAL_DEFINES,
+    copts = COPTS_ADJUSTED,
+    linkopts = LINKOPTS_ADJUSTED,
+)
+
 # HTTP Server implementation using net_http of tensorflow
 # To use other library simply create new target and implementation of libhttp_async_writer_interface
 cc_library(
diff --git a/src/image_conversion.cpp b/src/image_conversion.cpp
@@ -0,0 +1,126 @@
+//****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "image_conversion.hpp"
+
+#include <iostream>
+
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "logging.hpp"
+#include "profiler.hpp"
+#pragma warning(push)
+#pragma warning(disable : 6262)
+#include "stb_image.h"  // NOLINT
+#include "stb_image_write.h"  // NOLINT
+#pragma warning(default : 6262)
+#pragma warning(disable : 6001 4324 6385 6386)
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_cat.h"
+#pragma warning(pop)
+
+namespace ovms {
+
+void hello() {
+    std::cout << "Hello, World!" << std::endl;
+}
+
+ov::Tensor load_image_stbi(const std::string& imageBytes) {
+    int x = 0, y = 0, channelsInFile = 0;
+    constexpr int desiredChannels = 3;
+    unsigned char* image = stbi_load_from_memory(
+        (const unsigned char*)imageBytes.data(), imageBytes.size(),
+        &x, &y, &channelsInFile, desiredChannels);
+    if (!image) {
+        std::stringstream errorMessage;
+        errorMessage << "Failed to load the image";
+        throw std::runtime_error{errorMessage.str()};
+    }
+    struct SharedImageAllocator {
+        unsigned char* image;
+        int channels, height, width;
+        void* allocate(size_t bytes, size_t) const {
+            if (image && channels * height * width == bytes) {
+                return image;
+            }
+            throw std::runtime_error{"Unexpected number of bytes was requested to allocate."};
+        }
+        void deallocate(void*, size_t bytes, size_t) {
+            if (channels * height * width != bytes) {
+                throw std::runtime_error{"Unexpected number of bytes was requested to deallocate."};
+            }
+            if (image != nullptr) {
+                stbi_image_free(image);
+                image = nullptr;
+            }
+        }
+        bool is_equal(const SharedImageAllocator& other) const noexcept { return this == &other; }
+    };
+    return ov::Tensor(
+        ov::element::u8,
+        ov::Shape{1, size_t(y), size_t(x), size_t(desiredChannels)},
+        SharedImageAllocator{image, desiredChannels, y, x});
+}
+
+std::string save_image_stbi(ov::Tensor tensor) {
+    // Validate tensor properties
+    if (tensor.get_element_type() != ov::element::u8) {
+        throw std::runtime_error{"Only U8 tensor element type is supported for image saving"};
+    }
+
+    if (tensor.get_shape().size() != 4 || tensor.get_shape()[0] != 1) {
+        throw std::runtime_error{"Tensor must be in NHWC format with batch size 1"};
+    }
+
+    size_t height = tensor.get_shape()[1];
+    size_t width = tensor.get_shape()[2];
+    size_t channels = tensor.get_shape()[3];
+
+    if (channels != 3 && channels != 1) {
+        throw std::runtime_error{"Only 1 or 3 channel images are supported for saving"};
+    }
+
+    // Get pointer to image data
+    unsigned char* image_data = tensor.data<unsigned char>();
+
+    // Create a memory buffer to hold the PNG data
+    std::vector<unsigned char> png_buffer;
+
+    // Define the write function that will store data in our buffer
+    auto write_func = [](void* context, void* data, int size) {
+        std::vector<unsigned char>* buffer = static_cast<std::vector<unsigned char>*>(context);
+        unsigned char* bytes = static_cast<unsigned char*>(data);
+        buffer->insert(buffer->end(), bytes, bytes + size);
+    };
+
+    // Write PNG to memory using our buffer
+    int success = stbi_write_png_to_func(
+        write_func,             // Our write function
+        &png_buffer,            // Context (our buffer)
+        width,                  // Image width
+        height,                 // Image height
+        channels,              // Number of channels
+        image_data,             // Image data
+        width * channels);      // Stride (bytes per row)
+
+    if (!success) {
+        throw std::runtime_error{"Failed to encode image to PNG format"};
+    }
+
+    // Convert the buffer to a string
+    return std::string(png_buffer.begin(), png_buffer.end());
+}
+
+}  // namespace ovms
diff --git a/src/image_conversion.hpp b/src/image_conversion.hpp
@@ -0,0 +1,29 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <string>
+
+#include <openvino/runtime/tensor.hpp>
+
+namespace ovms {
+
+void hello();
+
+ov::Tensor load_image_stbi(const std::string& imageBytes);
+std::string save_image_stbi(ov::Tensor tensor);
+
+}  // namespace ovms
diff --git a/src/image_gen/BUILD b/src/image_gen/BUILD
@@ -62,6 +62,7 @@ cc_library(
         "//src:libovmslogging",
         "image_gen_calculator_cc_proto",
         ":pipelines",
+        "//src:libimage_conversion",
     ]+ select({
         "//conditions:default": ["//third_party:genai", ":llm_engine"],
         "//:not_genai_bin" : [":llm_engine"],
diff --git a/src/image_gen/http_image_gen_calculator.cc b/src/image_gen/http_image_gen_calculator.cc
@@ -13,6 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
+#include <fstream>
+
 #pragma warning(push)
 #pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 6246 4456 6246)
 #pragma GCC diagnostic push
@@ -24,9 +26,27 @@
 
 #include "../http_payload.hpp"
 #include "../logging.hpp"
+#include "../image_conversion.hpp"
 
 #include "pipelines.hpp"
 
+static void save_png_to_disk(const std::string& png_data, const std::string& filename) {
+    std::ofstream out_file(filename, std::ios::binary);
+    if (!out_file) {
+        throw std::runtime_error("Failed to open file for writing: " + filename);
+    }
+    
+    out_file.write(png_data.data(), png_data.size());
+    
+    if (!out_file.good()) {
+        out_file.close();
+        std::remove(filename.c_str()); // Clean up partial file
+        throw std::runtime_error("Failed to write data to file: " + filename);
+    }
+    
+    out_file.close();
+}
+
 using namespace ovms;
 
 namespace mediapipe {
@@ -67,15 +87,35 @@ class ImageGenCalculator : public CalculatorBase {
         RET_CHECK(it != pipelinesNap.end()) << "Could not find initialized Image Gen node named: " << cc->NodeName();
         auto pipe = it->second;
 
-        // curl -X POST localhost:11338/v3/endpoint -d '{}'
+        auto payload = cc->Inputs().Tag(INPUT_TAG_NAME).Get<ovms::HttpPayload>();
+        if (payload.parsedJson->HasParseError())
+            return absl::InvalidArgumentError("Failed to parse JSON");
+
+        if (!payload.parsedJson->IsObject()) {
+            return absl::InvalidArgumentError("JSON body must be an object");
+        }
+
+        // get prompt field as string
+        auto promptIt = payload.parsedJson->FindMember("prompt");
+        if (promptIt == payload.parsedJson->MemberEnd()) {
+            return absl::InvalidArgumentError("prompt field is missing in JSON body");
+        }
+        if (!promptIt->value.IsString()) {
+            return absl::InvalidArgumentError("prompt field is not a string");
+        }
+        std::string prompt = promptIt->value.GetString();
+
+        // curl -X POST localhost:11338/v3/images/generations -H "Content-Type: application/json" -d '{ "model": "endpoint", "prompt": "A cute baby sea otter", "n": 1, "size": "1024x1024" }'
         ov::genai::Text2ImagePipeline::GenerationRequest request = pipe->text2ImagePipeline.create_generation_request();
-        ov::Tensor image = request.generate("a cat",  // TODO: get from payload
+        ov::Tensor image = request.generate(prompt,
             ov::AnyMap{
-                ov::genai::width(512),
-                ov::genai::height(512),
-                ov::genai::num_inference_steps(20),
-                ov::genai::num_images_per_prompt(1)});
+                ov::genai::width(512),  // todo: get from req
+                ov::genai::height(512),  // todo: get from req
+                ov::genai::num_inference_steps(20),  // todo: get from req
+                ov::genai::num_images_per_prompt(1)});  // todo: get from req
 
+        std::string res = save_image_stbi(image);
+        save_png_to_disk(res, "output.png");
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator  [Node: {}] Process end", cc->NodeName());
         return absl::OkStatus();
     }
diff --git a/src/llm/BUILD b/src/llm/BUILD
@@ -79,7 +79,7 @@ cc_library(
         "@mediapipe//mediapipe/framework:calculator_framework", # required for absl status
         "//src:libovmsprofiler",
         "//third_party:opencv",
-        "@stb//:image",
+        "//src:libimage_conversion",
     ] + select({
         "//conditions:default": ["//third_party:genai", ":llm_engine"],
         "//:not_genai_bin" : [":llm_engine"],
diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp
@@ -27,18 +27,16 @@
 #include <rapidjson/writer.h>
 #pragma warning(pop)
 
-#define STB_IMAGE_IMPLEMENTATION
 #include "../../logging.hpp"
 #include "../../profiler.hpp"
 #pragma warning(push)
-#pragma warning(disable : 6262)
-#include "stb_image.h"  // NOLINT
-#pragma warning(default : 6262)
 #pragma warning(disable : 6001 4324 6385 6386)
 #include "absl/strings/escaping.h"
 #include "absl/strings/str_cat.h"
 #pragma warning(pop)
 
+#include "../../image_conversion.hpp"  // TODO: Rename to stbi_conversions?
+
 using namespace rapidjson;
 
 namespace ovms {
@@ -91,42 +89,42 @@ absl::Status OpenAIChatCompletionsHandler::parseCompletionsPart() {
     return absl::OkStatus();
 }
 
-ov::Tensor load_image_stbi(const std::string& imageBytes) {
-    int x = 0, y = 0, channelsInFile = 0;
-    constexpr int desiredChannels = 3;
-    unsigned char* image = stbi_load_from_memory(
-        (const unsigned char*)imageBytes.data(), imageBytes.size(),
-        &x, &y, &channelsInFile, desiredChannels);
-    if (!image) {
-        std::stringstream errorMessage;
-        errorMessage << "Failed to load the image";
-        throw std::runtime_error{errorMessage.str()};
-    }
-    struct SharedImageAllocator {
-        unsigned char* image;
-        int channels, height, width;
-        void* allocate(size_t bytes, size_t) const {
-            if (image && channels * height * width == bytes) {
-                return image;
-            }
-            throw std::runtime_error{"Unexpected number of bytes was requested to allocate."};
-        }
-        void deallocate(void*, size_t bytes, size_t) {
-            if (channels * height * width != bytes) {
-                throw std::runtime_error{"Unexpected number of bytes was requested to deallocate."};
-            }
-            if (image != nullptr) {
-                stbi_image_free(image);
-                image = nullptr;
-            }
-        }
-        bool is_equal(const SharedImageAllocator& other) const noexcept { return this == &other; }
-    };
-    return ov::Tensor(
-        ov::element::u8,
-        ov::Shape{1, size_t(y), size_t(x), size_t(desiredChannels)},
-        SharedImageAllocator{image, desiredChannels, y, x});
-}
+// ov::Tensor load_image_stbi(const std::string& imageBytes) {
+//     int x = 0, y = 0, channelsInFile = 0;
+//     constexpr int desiredChannels = 3;
+//     unsigned char* image = stbi_load_from_memory(
+//         (const unsigned char*)imageBytes.data(), imageBytes.size(),
+//         &x, &y, &channelsInFile, desiredChannels);
+//     if (!image) {
+//         std::stringstream errorMessage;
+//         errorMessage << "Failed to load the image";
+//         throw std::runtime_error{errorMessage.str()};
+//     }
+//     struct SharedImageAllocator {
+//         unsigned char* image;
+//         int channels, height, width;
+//         void* allocate(size_t bytes, size_t) const {
+//             if (image && channels * height * width == bytes) {
+//                 return image;
+//             }
+//             throw std::runtime_error{"Unexpected number of bytes was requested to allocate."};
+//         }
+//         void deallocate(void*, size_t bytes, size_t) {
+//             if (channels * height * width != bytes) {
+//                 throw std::runtime_error{"Unexpected number of bytes was requested to deallocate."};
+//             }
+//             if (image != nullptr) {
+//                 stbi_image_free(image);
+//                 image = nullptr;
+//             }
+//         }
+//         bool is_equal(const SharedImageAllocator& other) const noexcept { return this == &other; }
+//     };
+//     return ov::Tensor(
+//         ov::element::u8,
+//         ov::Shape{1, size_t(y), size_t(x), size_t(desiredChannels)},
+//         SharedImageAllocator{image, desiredChannels, y, x});
+// }
 
 absl::Status OpenAIChatCompletionsHandler::parseMessages() {
     auto it = doc.FindMember("messages");