Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions backend/onyx/file_processing/image_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@
from onyx.configs.app_configs import IMAGE_SUMMARIZATION_USER_PROMPT
from onyx.llm.interfaces import LLM
from onyx.llm.utils import message_to_string
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger

logger = setup_logger()


class UnsupportedImageFormatError(ValueError):
"""Raised when an image uses a MIME type unsupported by the summarization flow."""


def prepare_image_bytes(image_data: bytes) -> str:
"""Prepare image bytes for summarization.
Resizes image if it's larger than 20MB. Encodes image as a base64 string."""
Expand Down Expand Up @@ -74,7 +79,14 @@ def summarize_image_with_error_handling(
user_prompt = (
f"The image has the file name '{context_name}'.\n{user_prompt_template}"
)
return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
try:
return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
except UnsupportedImageFormatError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Catching only UnsupportedImageFormatError lets other summarization failures bubble up, contradicting the wrapper’s contract and producing noisy error logs. Include ValueError so typical LLM failures return None instead of raising. (Based on your team's feedback about avoiding logging raw exceptions that may contain sensitive URLs/tokens; broadening the catch prevents leaking exception strings into logs.)
Prompt for AI agents ~~~ Address the following comment on backend/onyx/file_processing/image_summarization.py at line 84: Catching only UnsupportedImageFormatError lets other summarization failures bubble up, contradicting the wrapper’s contract and producing noisy error logs. Include ValueError so typical LLM failures return None instead of raising. (Based on your team's feedback about avoiding logging raw exceptions that may contain sensitive URLs/tokens; broadening the catch prevents leaking exception strings into logs.) @@ -74,7 +79,14 @@ def summarize_image_with_error_handling( - return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt) + try: + return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt) + except UnsupportedImageFormatError: + logger.info( + "Skipping image summarization due to unsupported MIME type for %s", ~~~
```suggestion except (UnsupportedImageFormatError, ValueError): ```

logger.info(
"Skipping image summarization due to unsupported MIME type for %s",
context_name,
)
return None


def _summarize_image(
Expand Down Expand Up @@ -109,10 +121,17 @@ def _summarize_image(


def _encode_image_for_llm_prompt(image_data: bytes) -> str:
"""Getting the base64 string."""
"""Prepare a data URL with the correct MIME type for the LLM message."""
try:
mime_type = get_image_type_from_bytes(image_data)
except ValueError as exc:
raise UnsupportedImageFormatError(
"Unsupported image format for summarization"
) from exc

base64_encoded_data = base64.b64encode(image_data).decode("utf-8")

return f"data:image/jpeg;base64,{base64_encoded_data}"
return f"data:{mime_type};base64,{base64_encoded_data}"


def _resize_image_if_needed(image_data: bytes, max_size_mb: int = 20) -> bytes:
Expand Down
Loading