From 7090060b68324262c2f9fdb20dfacfd2b283fa15 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 14:46:22 -0400 Subject: [PATCH 01/56] select changes from wip-v0.4/core --- libs/core/langchain_core/messages/__init__.py | 66 + libs/core/langchain_core/messages/ai.py | 47 +- .../langchain_core/messages/content_blocks.py | 1453 ++++++++++++++++- libs/core/langchain_core/messages/modifier.py | 2 +- libs/core/langchain_core/messages/tool.py | 92 +- libs/core/langchain_core/messages/utils.py | 2 +- .../output_parsers/transform.py | 4 +- libs/core/langchain_core/runnables/base.py | 16 +- libs/core/langchain_core/runnables/config.py | 4 +- libs/core/langchain_core/runnables/graph.py | 6 +- .../langchain_core/utils/function_calling.py | 2 +- libs/core/pyproject.toml | 2 + .../tests/unit_tests/messages/test_imports.py | 22 + .../tests/unit_tests/messages/test_utils.py | 31 +- .../prompts/__snapshots__/test_chat.ambr | 82 +- .../runnables/__snapshots__/test_graph.ambr | 41 +- libs/core/tests/unit_tests/test_messages.py | 325 +++- 17 files changed, 1939 insertions(+), 258 deletions(-) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index fe87e964af291..410299ea5b553 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -33,9 +33,31 @@ ) from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.content_blocks import ( + Annotation, + AudioContentBlock, + Citation, + CodeInterpreterCall, + CodeInterpreterOutput, + CodeInterpreterResult, + ContentBlock, + DataContentBlock, + FileContentBlock, + ImageContentBlock, + NonStandardAnnotation, + NonStandardContentBlock, + PlainTextContentBlock, + ReasoningContentBlock, + TextContentBlock, + VideoContentBlock, + WebSearchCall, + WebSearchResult, convert_to_openai_data_block, convert_to_openai_image_block, is_data_content_block, + is_reasoning_block, + is_text_block, + is_tool_call_block, + is_tool_call_chunk, ) from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk from langchain_core.messages.human import HumanMessage, HumanMessageChunk @@ -65,24 +87,42 @@ __all__ = ( "AIMessage", "AIMessageChunk", + "Annotation", "AnyMessage", + "AudioContentBlock", "BaseMessage", "BaseMessageChunk", "ChatMessage", "ChatMessageChunk", + "Citation", + "CodeInterpreterCall", + "CodeInterpreterOutput", + "CodeInterpreterResult", + "ContentBlock", + "DataContentBlock", + "FileContentBlock", "FunctionMessage", "FunctionMessageChunk", "HumanMessage", "HumanMessageChunk", + "ImageContentBlock", "InvalidToolCall", "MessageLikeRepresentation", + "NonStandardAnnotation", + "NonStandardContentBlock", + "PlainTextContentBlock", + "ReasoningContentBlock", "RemoveMessage", "SystemMessage", "SystemMessageChunk", + "TextContentBlock", "ToolCall", "ToolCallChunk", "ToolMessage", "ToolMessageChunk", + "VideoContentBlock", + "WebSearchCall", + "WebSearchResult", "_message_from_dict", "convert_to_messages", "convert_to_openai_data_block", @@ -91,6 +131,10 @@ "filter_messages", "get_buffer_string", "is_data_content_block", + "is_reasoning_block", + "is_text_block", + "is_tool_call_block", + "is_tool_call_chunk", "merge_content", "merge_message_runs", "message_chunk_to_message", @@ -103,25 +147,43 @@ _dynamic_imports = { "AIMessage": "ai", "AIMessageChunk": "ai", + "Annotation": "content_blocks", + "AudioContentBlock": "content_blocks", "BaseMessage": "base", "BaseMessageChunk": "base", "merge_content": "base", "message_to_dict": "base", "messages_to_dict": "base", + "Citation": "content_blocks", + "ContentBlock": "content_blocks", "ChatMessage": "chat", "ChatMessageChunk": "chat", + "CodeInterpreterCall": "content_blocks", + "CodeInterpreterOutput": "content_blocks", + "CodeInterpreterResult": "content_blocks", + "DataContentBlock": "content_blocks", + "FileContentBlock": "content_blocks", "FunctionMessage": "function", "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", + "NonStandardAnnotation": "content_blocks", + "NonStandardContentBlock": "content_blocks", + "PlainTextContentBlock": "content_blocks", + "ReasoningContentBlock": "content_blocks", "RemoveMessage": "modifier", "SystemMessage": "system", "SystemMessageChunk": "system", + "WebSearchCall": "content_blocks", + "WebSearchResult": "content_blocks", + "ImageContentBlock": "content_blocks", "InvalidToolCall": "tool", + "TextContentBlock": "content_blocks", "ToolCall": "tool", "ToolCallChunk": "tool", "ToolMessage": "tool", "ToolMessageChunk": "tool", + "VideoContentBlock": "content_blocks", "AnyMessage": "utils", "MessageLikeRepresentation": "utils", "_message_from_dict": "utils", @@ -132,6 +194,10 @@ "filter_messages": "utils", "get_buffer_string": "utils", "is_data_content_block": "content_blocks", + "is_reasoning_block": "content_blocks", + "is_text_block": "content_blocks", + "is_tool_call_block": "content_blocks", + "is_tool_call_chunk": "content_blocks", "merge_message_runs": "utils", "message_chunk_to_message": "utils", "messages_from_dict": "utils", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index c81187dc3f69b..45eb7d2ba5e8d 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -8,11 +8,7 @@ from pydantic import model_validator from typing_extensions import NotRequired, Self, TypedDict, override -from langchain_core.messages.base import ( - BaseMessage, - BaseMessageChunk, - merge_content, -) +from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.tool import ( InvalidToolCall, ToolCall, @@ -20,23 +16,26 @@ default_tool_chunk_parser, default_tool_parser, ) -from langchain_core.messages.tool import ( - invalid_tool_call as create_invalid_tool_call, -) -from langchain_core.messages.tool import ( - tool_call as create_tool_call, -) -from langchain_core.messages.tool import ( - tool_call_chunk as create_tool_call_chunk, -) +from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call +from langchain_core.messages.tool import tool_call as create_tool_call +from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.json import parse_partial_json from langchain_core.utils.usage import _dict_int_op logger = logging.getLogger(__name__) +_LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" -_LC_ID_PREFIX = "run-" +_LC_ID_PREFIX = f"{_LC_AUTO_PREFIX}run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" class InputTokenDetails(TypedDict, total=False): @@ -428,17 +427,27 @@ def add_ai_message_chunks( chunk_id = None candidates = [left.id] + [o.id for o in others] - # first pass: pick the first non-run-* id + # first pass: pick the first provider-assigned id (non-run-* and non-lc_*) for id_ in candidates: - if id_ and not id_.startswith(_LC_ID_PREFIX): + if ( + id_ + and not id_.startswith(_LC_ID_PREFIX) + and not id_.startswith(_LC_AUTO_PREFIX) + ): chunk_id = id_ break else: - # second pass: no provider-assigned id found, just take the first non-null + # second pass: prefer lc_run-* ids over lc_* ids for id_ in candidates: - if id_: + if id_ and id_.startswith(_LC_ID_PREFIX): chunk_id = id_ break + else: + # third pass: take any remaining id (auto-generated lc_* ids) + for id_ in candidates: + if id_: + chunk_id = id_ + break return left.__class__( example=left.example, diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 83a66fb123a42..d426339156d39 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -1,110 +1,967 @@ -"""Types for content blocks.""" +"""Standard, multimodal content blocks for Large Language Model I/O. + +.. warning:: + This module is under active development. The API is unstable and subject to + change in future releases. + +This module provides a standardized data structure for representing inputs to and +outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict`` that +can represent a piece of text, an image, a tool call, or other structured data. + +**Rationale** + +Different LLM providers use distinct and incompatible API schemas. This module +introduces a unified, provider-agnostic format to standardize these interactions. A +message to or from a model is simply a ``list`` of ``ContentBlock`` objects, allowing +for the natural interleaving of text, images, and other content in a single, ordered +sequence. + +An adapter for a specific provider is responsible for translating this standard list of +blocks into the format required by its API. + +**Extensibility** + +Data **not yet mapped** to a standard block may be represented using the +``NonStandardContentBlock``, which allows for provider-specific data to be included +without losing the benefits of type checking and validation. + +Furthermore, provider-specific fields **within** a standard block are fully supported +by default in the ``extras`` field of each block. This allows for additional metadata +to be included without breaking the standard structure. + +Following widespread adoption of `PEP 728 `__, we will add +``extra_items=Any`` as a param to Content Blocks. This will signify to type checkers +that additional provider-specific fields are allowed outside of the ``extras`` field, +and that will become the new standard approach to adding provider-specific metadata. + +.. warning:: + Do not heavily rely on the ``extras`` field for provider-specific data! This field + is subject to deprecation in future releases as we move towards PEP 728. + +**Example with PEP 728 provider-specific fields:** + +.. code-block:: python + + # Content block definition + # NOTE: `extra_items=Any` + class TextContentBlock(TypedDict, extra_items=Any): + type: Literal["text"] + id: NotRequired[str] + text: str + annotations: NotRequired[list[Annotation]] + index: NotRequired[int] + +.. code-block:: python + + from langchain_core.messages.content_blocks import TextContentBlock + + # Create a text content block with provider-specific fields + my_block: TextContentBlock = { + # Add required fields + "type": "text", + "text": "Hello, world!", + # Additional fields not specified in the TypedDict + # These are valid with PEP 728 and are typed as Any + "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, + "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, + "custom_field": "any value", + } + + # Mutating an existing block to add provider-specific fields + openai_data = my_block["openai_metadata"] # Type: Any + +.. note:: + PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings + from type checkers that don't yet support it. The functionality works correctly + in Python 3.13+ and will be fully supported as the ecosystem catches up. + +**Key Block Types** + +The module defines several types of content blocks, including: + +- ``TextContentBlock``: Standard text. +- ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data. +- ``ToolCallContentBlock``: For function calling. +- ``ReasoningContentBlock``: To capture a model's thought process. +- ``Citation``: For annotations that link generated text to a source document. + +**Example Usage** + +.. code-block:: python + + # Direct construction: + from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock + + multimodal_message: AIMessage = [ + TextContentBlock(type="text", text="What is shown in this image?"), + ImageContentBlock( + type="image", + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + + from langchain_core.messages.content_blocks import create_text_block, create_image_block + + # Using factory functions: + multimodal_message: AIMessage = [ + create_text_block("What is shown in this image?"), + create_image_block( + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + +Factory functions like ``create_text_block`` and ``create_image_block`` are provided +and offer benefits such as: +- Automatic ID generation (when not provided) +- No need to manually specify the ``type`` field + +""" # noqa: E501 import warnings -from typing import Any, Literal, Union +from typing import Any, Literal, Optional, Union +from uuid import uuid4 + +from typing_extensions import NotRequired, TypedDict, TypeGuard + + +def _ensure_id(id_val: Optional[str]) -> str: + """Ensure the ID is a valid string, generating a new UUID if not provided. + + Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are + LangChain-generated IDs. + + Args: + id_val: Optional string ID value to validate. + + Returns: + A valid string ID, either the provided value or a new UUID. + """ + return id_val or str(f"lc_{uuid4()}") + + +class Citation(TypedDict): + """Annotation for citing data from a document. + + .. note:: + ``start``/``end`` indices refer to the **response text**, + not the source text. This means that the indices are relative to the model's + response, not the original document (as specified in the ``url``). + + .. note:: + ``create_citation`` may also be used as a factory to create a ``Citation``. + Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["citation"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + url: NotRequired[str] + """URL of the document source.""" + + # For future consideration, if needed: + # provenance: NotRequired[str] + # """Provenance of the document, e.g., ``'Wikipedia'``, ``'arXiv'``, etc. + + # Included for future compatibility; not currently implemented. + # """ + + title: NotRequired[str] + """Source document title. + + For example, the page title for a web page or the title of a paper. + """ -from pydantic import TypeAdapter, ValidationError -from typing_extensions import NotRequired, TypedDict + start_index: NotRequired[int] + """Start index of the **response text** (``TextContentBlock.text``) for which the + annotation applies.""" + end_index: NotRequired[int] + """End index of the **response text** (``TextContentBlock.text``) for which the + annotation applies.""" + + cited_text: NotRequired[str] + """Excerpt of source text being cited.""" + + # NOTE: not including spans for the raw document text (such as `text_start_index` + # and `text_end_index`) as this is not currently supported by any provider. The + # thinking is that the `cited_text` should be sufficient for most use cases, and it + # is difficult to reliably extract spans from the raw document text across file + # formats or encoding schemes. + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class NonStandardAnnotation(TypedDict): + """Provider-specific annotation format.""" + + type: Literal["non_standard_annotation"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + value: dict[str, Any] + """Provider-specific annotation data.""" + + +Annotation = Union[Citation, NonStandardAnnotation] + + +class TextContentBlock(TypedDict): + """Text output from a LLM. + + This typically represents the main text content of a message, such as the response + from a language model or the text of a user message. + + .. note:: + ``create_text_block`` may also be used as a factory to create a + ``TextContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["text"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + text: str + """Block text.""" + + annotations: NotRequired[list[Annotation]] + """``Citation``s and other annotations.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class ToolCall(TypedDict): + """Represents a request to call a tool. + + Example: + + .. code-block:: python + + { + "name": "foo", + "args": {"a": 1}, + "id": "123" + } + + This represents a request to call the tool named "foo" with arguments {"a": 1} + and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["tool_call"] + """Used for discrimination.""" + + id: Optional[str] + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + """ + # TODO: Consider making this NotRequired[str] in the future. + + name: str + """The name of the tool to be called.""" + + args: dict[str, Any] + """The arguments to the tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class ToolCallChunk(TypedDict): + """A chunk of a tool call (e.g., as part of a stream). + + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + all string attributes are concatenated. Chunks are only merged if their + values of ``index`` are equal and not ``None``. + + Example: + + .. code-block:: python + + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] + right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + + ( + AIMessageChunk(content="", tool_call_chunks=left_chunks) + + AIMessageChunk(content="", tool_call_chunks=right_chunks) + ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] + """ -class BaseDataContentBlock(TypedDict, total=False): - """Base class for data content blocks.""" + # TODO: Consider making fields NotRequired[str] in the future. + + type: NotRequired[Literal["tool_call_chunk"]] + """Used for serialization.""" + + id: Optional[str] + """An identifier associated with the tool call.""" + + name: Optional[str] + """The name of the tool to be called.""" + + args: Optional[str] + """The arguments to the tool call.""" + + index: Optional[int] + """The index of the tool call in a sequence.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class InvalidToolCall(TypedDict): + """Allowance for errors made by LLM. + + Here we add an ``error`` key to surface errors made during generation + (e.g., invalid JSON arguments.) + """ + + # TODO: Consider making fields NotRequired[str] in the future. + + type: Literal["invalid_tool_call"] + """Used for discrimination.""" + + id: Optional[str] + """An identifier associated with the tool call.""" + + name: Optional[str] + """The name of the tool to be called.""" + + args: Optional[str] + """The arguments to the tool call.""" + + error: Optional[str] + """An error message associated with the tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +# Note: These are not standard tool calls, but rather provider-specific built-in tools. +# Web search +class WebSearchCall(TypedDict): + """Built-in web search tool call.""" + + type: Literal["web_search_call"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + query: NotRequired[str] + """The search query used in the web search tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class WebSearchResult(TypedDict): + """Result of a built-in web search tool call.""" + + type: Literal["web_search_result"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + urls: NotRequired[list[str]] + """List of URLs returned by the web search tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class CodeInterpreterCall(TypedDict): + """Built-in code interpreter tool call.""" + + type: Literal["code_interpreter_call"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + language: NotRequired[str] + """The name of the programming language used in the code interpreter tool call.""" + + code: NotRequired[str] + """The code to be executed by the code interpreter.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class CodeInterpreterOutput(TypedDict): + """Output of a singular code interpreter tool call. + + Full output of a code interpreter tool call is represented by + ``CodeInterpreterResult`` which is a list of these blocks. + """ + + type: Literal["code_interpreter_output"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + return_code: NotRequired[int] + """Return code of the executed code. + + Example: ``0`` for success, non-zero for failure. + """ + + stderr: NotRequired[str] + """Standard error output of the executed code.""" + + stdout: NotRequired[str] + """Standard output of the executed code.""" + + file_ids: NotRequired[list[str]] + """List of file IDs generated by the code interpreter.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class CodeInterpreterResult(TypedDict): + """Result of a code interpreter tool call.""" + + type: Literal["code_interpreter_result"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + output: list[CodeInterpreterOutput] + """List of outputs from the code interpreter tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class ReasoningContentBlock(TypedDict): + """Reasoning output from a LLM. + + .. note:: + ``create_reasoning_block`` may also be used as a factory to create a + ``ReasoningContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["reasoning"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + reasoning: NotRequired[str] + """Reasoning text. + + Either the thought summary or the raw reasoning text itself. This is often parsed + from ```` tags in the model's response. + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +# Note: `title` and `context` are fields that could be used to provide additional +# information about the file, such as a description or summary of its content. +# E.g. with Claude, you can provide a context for a file which is passed to the model. +class ImageContentBlock(TypedDict): + """Image data. + + .. note:: + ``create_image_block`` may also be used as a factory to create a + ``ImageContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["image"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the image file, e.g., from a file storage system.""" mime_type: NotRequired[str] - """MIME type of the content block (if needed).""" + """MIME type of the image. Required for base64. + + `Examples from IANA `__ + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the image.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" -class URLContentBlock(BaseDataContentBlock): - """Content block for data from a URL.""" - type: Literal["image", "audio", "file"] - """Type of the content block.""" - source_type: Literal["url"] - """Source type (url).""" - url: str - """URL for data.""" +class VideoContentBlock(TypedDict): + """Video data. + .. note:: + ``create_video_block`` may also be used as a factory to create a + ``VideoContentBlock``. Benefits include: -class Base64ContentBlock(BaseDataContentBlock): - """Content block for inline data from a base64 string.""" + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["video"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ - type: Literal["image", "audio", "file"] - """Type of the content block.""" - source_type: Literal["base64"] - """Source type (base64).""" - data: str + file_id: NotRequired[str] + """ID of the video file, e.g., from a file storage system.""" + + mime_type: NotRequired[str] + """MIME type of the video. Required for base64. + + `Examples from IANA `__ + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the video.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class AudioContentBlock(TypedDict): + """Audio data. + + .. note:: + ``create_audio_block`` may also be used as a factory to create an + ``AudioContentBlock``. Benefits include: + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["audio"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the audio file, e.g., from a file storage system.""" + + mime_type: NotRequired[str] + """MIME type of the audio. Required for base64. + + `Examples from IANA `__ + + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the audio.""" + + base64: NotRequired[str] """Data as a base64 string.""" + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + -class PlainTextContentBlock(BaseDataContentBlock): - """Content block for plain text data (e.g., from a document).""" +class PlainTextContentBlock(TypedDict): + """Plaintext data (e.g., from a document). + + .. note:: + Title and context are optional fields that may be passed to the model. See + Anthropic `example `__. + + .. note:: + ``create_plaintext_block`` may also be used as a factory to create a + ``PlainTextContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["text-plain"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the plaintext file, e.g., from a file storage system.""" + + mime_type: Literal["text/plain"] + """MIME type of the file. Required for base64.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the plaintext.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + + text: NotRequired[str] + """Plaintext content. This is optional if the data is provided as base64.""" + + title: NotRequired[str] + """Title of the text data, e.g., the title of a document.""" + + context: NotRequired[str] + """Context for the text, e.g., a description or summary of the text's content.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class FileContentBlock(TypedDict): + """File data that doesn't fit into other multimodal blocks. + + This block is intended for files that are not images, audio, or plaintext. For + example, it can be used for PDFs, Word documents, etc. + + If the file is an image, audio, or plaintext, you should use the corresponding + content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``, + ``PlainTextContentBlock``). + + .. note:: + ``create_file_block`` may also be used as a factory to create a + ``FileContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ type: Literal["file"] - """Type of the content block.""" - source_type: Literal["text"] - """Source type (text).""" - text: str - """Text data.""" + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the file, e.g., from a file storage system.""" + + mime_type: NotRequired[str] + """MIME type of the file. Required for base64. + + `Examples from IANA `__ + + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the file.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +# Future modalities to consider: +# - 3D models +# - Tabular data + + +class NonStandardContentBlock(TypedDict): + """Provider-specific data. + + This block contains data for which there is not yet a standard type. + + The purpose of this block should be to simply hold a provider-specific payload. + If a provider's non-standard output includes reasoning and tool calls, it should be + the adapter's job to parse that payload and emit the corresponding standard + ``ReasoningContentBlock`` and ``ToolCallContentBlocks``. + + .. note:: + ``create_non_standard_block`` may also be used as a factory to create a + ``NonStandardContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["non_standard"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ -class IDContentBlock(TypedDict): - """Content block for data specified by an identifier.""" + value: dict[str, Any] + """Provider-specific data.""" - type: Literal["image", "audio", "file"] - """Type of the content block.""" - source_type: Literal["id"] - """Source type (id).""" - id: str - """Identifier for data source.""" + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" +# --- Aliases --- DataContentBlock = Union[ - URLContentBlock, - Base64ContentBlock, + ImageContentBlock, + VideoContentBlock, + AudioContentBlock, PlainTextContentBlock, - IDContentBlock, + FileContentBlock, +] + +ToolContentBlock = Union[ + ToolCall, + ToolCallChunk, + CodeInterpreterCall, + CodeInterpreterOutput, + CodeInterpreterResult, + WebSearchCall, + WebSearchResult, ] -_DataContentBlockAdapter: TypeAdapter[DataContentBlock] = TypeAdapter(DataContentBlock) +ContentBlock = Union[ + TextContentBlock, + ToolCall, + ToolCallChunk, + InvalidToolCall, + ReasoningContentBlock, + NonStandardContentBlock, + DataContentBlock, + ToolContentBlock, +] -def is_data_content_block( - content_block: dict, -) -> bool: +KNOWN_BLOCK_TYPES = { + "text", + "text-plain", + "tool_call", + "invalid_tool_call", + "tool_call_chunk", + "reasoning", + "non_standard", + "image", + "audio", + "file", + "video", + "code_interpreter_call", + "code_interpreter_output", + "code_interpreter_result", + "web_search_call", + "web_search_result", +} + + +def is_data_content_block(block: dict) -> bool: """Check if the content block is a standard data content block. Args: - content_block: The content block to check. + block: The content block to check. Returns: True if the content block is a data content block, False otherwise. """ - try: - _ = _DataContentBlockAdapter.validate_python(content_block) - except ValidationError: - return False - else: - return True + return block.get("type") in ( + "audio", + "image", + "video", + "file", + "text-plain", + ) and any( + key in block + for key in ( + "url", + "base64", + "file_id", + "text", + "source_type", # backwards compatibility + ) + ) + + +def is_tool_call_block(block: ContentBlock) -> TypeGuard[ToolCall]: + """Type guard to check if a content block is a ``ToolCall``.""" + return block.get("type") == "tool_call" + + +def is_tool_call_chunk(block: ContentBlock) -> TypeGuard[ToolCallChunk]: + """Type guard to check if a content block is a ``ToolCallChunk``.""" + return block.get("type") == "tool_call_chunk" + + +def is_text_block(block: ContentBlock) -> TypeGuard[TextContentBlock]: + """Type guard to check if a content block is a ``TextContentBlock``.""" + return block.get("type") == "text" + +def is_reasoning_block(block: ContentBlock) -> TypeGuard[ReasoningContentBlock]: + """Type guard to check if a content block is a ``ReasoningContentBlock``.""" + return block.get("type") == "reasoning" -def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict: + +def is_invalid_tool_call_block( + block: ContentBlock, +) -> TypeGuard[InvalidToolCall]: + """Type guard to check if a content block is an ``InvalidToolCall``.""" + return block.get("type") == "invalid_tool_call" + + +def convert_to_openai_image_block(block: dict[str, Any]) -> dict: """Convert image content block to format expected by OpenAI Chat Completions API.""" - if content_block["source_type"] == "url": + if "url" in block: return { "type": "image_url", "image_url": { - "url": content_block["url"], + "url": block["url"], }, } - if content_block["source_type"] == "base64": - if "mime_type" not in content_block: + if "base64" in block or block.get("source_type") == "base64": + if "mime_type" not in block: error_message = "mime_type key is required for base64 data." raise ValueError(error_message) - mime_type = content_block["mime_type"] + mime_type = block["mime_type"] + base64_data = block["data"] if "data" in block else block["base64"] return { "type": "image_url", "image_url": { - "url": f"data:{mime_type};base64,{content_block['data']}", + "url": f"data:{mime_type};base64,{base64_data}", }, } error_message = "Unsupported source type. Only 'url' and 'base64' are supported." @@ -117,39 +974,493 @@ def convert_to_openai_data_block(block: dict) -> dict: formatted_block = convert_to_openai_image_block(block) elif block["type"] == "file": - if block["source_type"] == "base64": - file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"} + if "base64" in block or block.get("source_type") == "base64": + base64_data = block["data"] if "source_type" in block else block["base64"] + file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"} if filename := block.get("filename"): file["filename"] = filename - elif (metadata := block.get("metadata")) and ("filename" in metadata): - file["filename"] = metadata["filename"] + elif (extras := block.get("extras")) and ("filename" in extras): + file["filename"] = extras["filename"] + elif (extras := block.get("metadata")) and ("filename" in extras): + # Backward compat + file["filename"] = extras["filename"] else: warnings.warn( "OpenAI may require a filename for file inputs. Specify a filename " - "in the content block: {'type': 'file', 'source_type': 'base64', " - "'mime_type': 'application/pdf', 'data': '...', " - "'filename': 'my-pdf'}", + "in the content block: {'type': 'file', 'mime_type': " + "'application/pdf', 'base64': '...', 'filename': 'my-pdf'}", stacklevel=1, ) formatted_block = {"type": "file", "file": file} - elif block["source_type"] == "id": - formatted_block = {"type": "file", "file": {"file_id": block["id"]}} + elif "file_id" in block or block.get("source_type") == "id": + file_id = block["id"] if "source_type" in block else block["file_id"] + formatted_block = {"type": "file", "file": {"file_id": file_id}} else: - error_msg = "source_type base64 or id is required for file blocks." + error_msg = "Keys base64 or file_id required for file blocks." raise ValueError(error_msg) elif block["type"] == "audio": - if block["source_type"] == "base64": + if "base64" in block or block.get("source_type") == "base64": + base64_data = block["data"] if "source_type" in block else block["base64"] audio_format = block["mime_type"].split("/")[-1] formatted_block = { "type": "input_audio", - "input_audio": {"data": block["data"], "format": audio_format}, + "input_audio": {"data": base64_data, "format": audio_format}, } else: - error_msg = "source_type base64 is required for audio blocks." + error_msg = "Key base64 is required for audio blocks." raise ValueError(error_msg) else: error_msg = f"Block of type {block['type']} is not supported." raise ValueError(error_msg) return formatted_block + + +def create_text_block( + text: str, + *, + id: Optional[str] = None, + annotations: Optional[list[Annotation]] = None, + index: Optional[int] = None, +) -> TextContentBlock: + """Create a ``TextContentBlock``. + + Args: + text: The text content of the block. + id: Content block identifier. Generated automatically if not provided. + annotations: ``Citation``s and other annotations for the text. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``TextContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = TextContentBlock( + type="text", + text=text, + id=_ensure_id(id), + ) + if annotations is not None: + block["annotations"] = annotations + if index is not None: + block["index"] = index + return block + + +def create_image_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> ImageContentBlock: + """Create an ``ImageContentBlock``. + + Args: + url: URL of the image. + base64: Base64-encoded image data. + file_id: ID of the image file from a file storage system. + mime_type: MIME type of the image. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``ImageContentBlock``. + + Raises: + ValueError: If no image source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + block = ImageContentBlock(type="image", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_video_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> VideoContentBlock: + """Create a ``VideoContentBlock``. + + Args: + url: URL of the video. + base64: Base64-encoded video data. + file_id: ID of the video file from a file storage system. + mime_type: MIME type of the video. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``VideoContentBlock``. + + Raises: + ValueError: If no video source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + if base64 and not mime_type: + msg = "mime_type is required when using base64 data" + raise ValueError(msg) + + block = VideoContentBlock(type="video", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_audio_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> AudioContentBlock: + """Create an ``AudioContentBlock``. + + Args: + url: URL of the audio. + base64: Base64-encoded audio data. + file_id: ID of the audio file from a file storage system. + mime_type: MIME type of the audio. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``AudioContentBlock``. + + Raises: + ValueError: If no audio source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + if base64 and not mime_type: + msg = "mime_type is required when using base64 data" + raise ValueError(msg) + + block = AudioContentBlock(type="audio", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_file_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> FileContentBlock: + """Create a ``FileContentBlock``. + + Args: + url: URL of the file. + base64: Base64-encoded file data. + file_id: ID of the file from a file storage system. + mime_type: MIME type of the file. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``FileContentBlock``. + + Raises: + ValueError: If no file source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + if base64 and not mime_type: + msg = "mime_type is required when using base64 data" + raise ValueError(msg) + + block = FileContentBlock(type="file", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_plaintext_block( + text: Optional[str] = None, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + title: Optional[str] = None, + context: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> PlainTextContentBlock: + """Create a ``PlainTextContentBlock``. + + Args: + text: The plaintext content. + url: URL of the plaintext file. + base64: Base64-encoded plaintext data. + file_id: ID of the plaintext file from a file storage system. + title: Title of the text data. + context: Context or description of the text content. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``PlainTextContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = PlainTextContentBlock( + type="text-plain", + mime_type="text/plain", + id=_ensure_id(id), + ) + + if text is not None: + block["text"] = text + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if title is not None: + block["title"] = title + if context is not None: + block["context"] = context + if index is not None: + block["index"] = index + + return block + + +def create_tool_call( + name: str, + args: dict[str, Any], + *, + id: Optional[str] = None, + index: Optional[int] = None, +) -> ToolCall: + """Create a ``ToolCall``. + + Args: + name: The name of the tool to be called. + args: The arguments to the tool call. + id: An identifier for the tool call. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``ToolCall``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = ToolCall( + type="tool_call", + name=name, + args=args, + id=_ensure_id(id), + ) + + if index is not None: + block["index"] = index + + return block + + +def create_reasoning_block( + reasoning: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> ReasoningContentBlock: + """Create a ``ReasoningContentBlock``. + + Args: + reasoning: The reasoning text or thought summary. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``ReasoningContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = ReasoningContentBlock( + type="reasoning", + reasoning=reasoning or "", + id=_ensure_id(id), + ) + + if index is not None: + block["index"] = index + + return block + + +def create_citation( + *, + url: Optional[str] = None, + title: Optional[str] = None, + start_index: Optional[int] = None, + end_index: Optional[int] = None, + cited_text: Optional[str] = None, + id: Optional[str] = None, +) -> Citation: + """Create a ``Citation``. + + Args: + url: URL of the document source. + title: Source document title. + start_index: Start index in the response text where citation applies. + end_index: End index in the response text where citation applies. + cited_text: Excerpt of source text being cited. + id: Content block identifier. Generated automatically if not provided. + + Returns: + A properly formatted ``Citation``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = Citation(type="citation", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if title is not None: + block["title"] = title + if start_index is not None: + block["start_index"] = start_index + if end_index is not None: + block["end_index"] = end_index + if cited_text is not None: + block["cited_text"] = cited_text + + return block + + +def create_non_standard_block( + value: dict[str, Any], + *, + id: Optional[str] = None, + index: Optional[int] = None, +) -> NonStandardContentBlock: + """Create a ``NonStandardContentBlock``. + + Args: + value: Provider-specific data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``NonStandardContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = NonStandardContentBlock( + type="non_standard", + value=value, + id=_ensure_id(id), + ) + + if index is not None: + block["index"] = index + + return block diff --git a/libs/core/langchain_core/messages/modifier.py b/libs/core/langchain_core/messages/modifier.py index 08b7e79b69cc6..5f1602a4908d3 100644 --- a/libs/core/langchain_core/messages/modifier.py +++ b/libs/core/langchain_core/messages/modifier.py @@ -13,7 +13,7 @@ class RemoveMessage(BaseMessage): def __init__( self, - id: str, # noqa: A002 + id: str, **kwargs: Any, ) -> None: """Create a RemoveMessage. diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 1f8a519a7dc24..181c80443d56f 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -5,9 +5,12 @@ from uuid import UUID from pydantic import Field, model_validator -from typing_extensions import NotRequired, TypedDict, override +from typing_extensions import override from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content +from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall +from langchain_core.messages.content_blocks import ToolCall as ToolCall +from langchain_core.messages.content_blocks import ToolCallChunk as ToolCallChunk from langchain_core.utils._merge import merge_dicts, merge_obj @@ -177,42 +180,11 @@ def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore[override] return super().__add__(other) -class ToolCall(TypedDict): - """Represents a request to call a tool. - - Example: - - .. code-block:: python - - { - "name": "foo", - "args": {"a": 1}, - "id": "123" - } - - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". - - """ - - name: str - """The name of the tool to be called.""" - args: dict[str, Any] - """The arguments to the tool call.""" - id: Optional[str] - """An identifier associated with the tool call. - - An identifier is needed to associate a tool call request with a tool - call result in events when multiple concurrent tool calls are made. - """ - type: NotRequired[Literal["tool_call"]] - - def tool_call( *, name: str, args: dict[str, Any], - id: Optional[str], # noqa: A002 + id: Optional[str], ) -> ToolCall: """Create a tool call. @@ -224,43 +196,11 @@ def tool_call( return ToolCall(name=name, args=args, id=id, type="tool_call") -class ToolCallChunk(TypedDict): - """A chunk of a tool call (e.g., as part of a stream). - - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), - all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. - - Example: - - .. code-block:: python - - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] - right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - - ( - AIMessageChunk(content="", tool_call_chunks=left_chunks) - + AIMessageChunk(content="", tool_call_chunks=right_chunks) - ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] - - """ - - name: Optional[str] - """The name of the tool to be called.""" - args: Optional[str] - """The arguments to the tool call.""" - id: Optional[str] - """An identifier associated with the tool call.""" - index: Optional[int] - """The index of the tool call in a sequence.""" - type: NotRequired[Literal["tool_call_chunk"]] - - def tool_call_chunk( *, name: Optional[str] = None, args: Optional[str] = None, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, index: Optional[int] = None, ) -> ToolCallChunk: """Create a tool call chunk. @@ -276,29 +216,11 @@ def tool_call_chunk( ) -class InvalidToolCall(TypedDict): - """Allowance for errors made by LLM. - - Here we add an `error` key to surface errors made during generation - (e.g., invalid JSON arguments.) - """ - - name: Optional[str] - """The name of the tool to be called.""" - args: Optional[str] - """The arguments to the tool call.""" - id: Optional[str] - """An identifier associated with the tool call.""" - error: Optional[str] - """An error message associated with the tool call.""" - type: NotRequired[Literal["invalid_tool_call"]] - - def invalid_tool_call( *, name: Optional[str] = None, args: Optional[str] = None, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, error: Optional[str] = None, ) -> InvalidToolCall: """Create an invalid tool call. diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 37f16c632bd06..e84dc6c0191ed 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -213,7 +213,7 @@ def _create_message_from_message_type( name: Optional[str] = None, tool_call_id: Optional[str] = None, tool_calls: Optional[list[dict[str, Any]]] = None, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, **additional_kwargs: Any, ) -> BaseMessage: """Create a message from a message type and content string. diff --git a/libs/core/langchain_core/output_parsers/transform.py b/libs/core/langchain_core/output_parsers/transform.py index 876e66b555669..0c864805b9335 100644 --- a/libs/core/langchain_core/output_parsers/transform.py +++ b/libs/core/langchain_core/output_parsers/transform.py @@ -32,7 +32,7 @@ class BaseTransformOutputParser(BaseOutputParser[T]): def _transform( self, - input: Iterator[Union[str, BaseMessage]], # noqa: A002 + input: Iterator[Union[str, BaseMessage]], ) -> Iterator[T]: for chunk in input: if isinstance(chunk, BaseMessage): @@ -42,7 +42,7 @@ def _transform( async def _atransform( self, - input: AsyncIterator[Union[str, BaseMessage]], # noqa: A002 + input: AsyncIterator[Union[str, BaseMessage]], ) -> AsyncIterator[T]: async for chunk in input: if isinstance(chunk, BaseMessage): diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 6e0be997af6ef..17024df7bb766 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -728,7 +728,7 @@ def assign( @abstractmethod def invoke( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any, ) -> Output: @@ -748,7 +748,7 @@ def invoke( async def ainvoke( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any, ) -> Output: @@ -996,7 +996,7 @@ async def ainvoke_task( def stream( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> Iterator[Output]: @@ -1016,7 +1016,7 @@ def stream( async def astream( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> AsyncIterator[Output]: @@ -1070,7 +1070,7 @@ def astream_log( async def astream_log( self, - input: Any, # noqa: A002 + input: Any, config: Optional[RunnableConfig] = None, *, diff: bool = True, @@ -1141,7 +1141,7 @@ async def astream_log( async def astream_events( self, - input: Any, # noqa: A002 + input: Any, config: Optional[RunnableConfig] = None, *, version: Literal["v1", "v2"] = "v2", @@ -1407,7 +1407,7 @@ async def slow_thing(some_input: str, config: RunnableConfig) -> str: def transform( self, - input: Iterator[Input], # noqa: A002 + input: Iterator[Input], config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> Iterator[Output]: @@ -1449,7 +1449,7 @@ def transform( async def atransform( self, - input: AsyncIterator[Input], # noqa: A002 + input: AsyncIterator[Input], config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> AsyncIterator[Output]: diff --git a/libs/core/langchain_core/runnables/config.py b/libs/core/langchain_core/runnables/config.py index 4ac7bda7b4654..cc36622b914bf 100644 --- a/libs/core/langchain_core/runnables/config.py +++ b/libs/core/langchain_core/runnables/config.py @@ -402,7 +402,7 @@ def call_func_with_variable_args( Callable[[Input, CallbackManagerForChainRun], Output], Callable[[Input, CallbackManagerForChainRun, RunnableConfig], Output], ], - input: Input, # noqa: A002 + input: Input, config: RunnableConfig, run_manager: Optional[CallbackManagerForChainRun] = None, **kwargs: Any, @@ -439,7 +439,7 @@ def acall_func_with_variable_args( Awaitable[Output], ], ], - input: Input, # noqa: A002 + input: Input, config: RunnableConfig, run_manager: Optional[AsyncCallbackManagerForChainRun] = None, **kwargs: Any, diff --git a/libs/core/langchain_core/runnables/graph.py b/libs/core/langchain_core/runnables/graph.py index 3e22494bad7fb..20a841d51a84f 100644 --- a/libs/core/langchain_core/runnables/graph.py +++ b/libs/core/langchain_core/runnables/graph.py @@ -114,7 +114,7 @@ class Node(NamedTuple): def copy( self, *, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, name: Optional[str] = None, ) -> Node: """Return a copy of the node with optional new id and name. @@ -187,7 +187,7 @@ class MermaidDrawMethod(Enum): def node_data_str( - id: str, # noqa: A002 + id: str, data: Union[type[BaseModel], RunnableType, None], ) -> str: """Convert the data of a node to a string. @@ -328,7 +328,7 @@ def next_id(self) -> str: def add_node( self, data: Union[type[BaseModel], RunnableType, None], - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, *, metadata: Optional[dict[str, Any]] = None, ) -> Node: diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index 609129ac58b1b..b6e04c9abae88 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -627,7 +627,7 @@ def convert_to_json_schema( @beta() def tool_example_to_messages( - input: str, # noqa: A002 + input: str, tool_calls: list[BaseModel], tool_outputs: Optional[list[str]] = None, *, diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index e329e0d3c5cf6..a6e5ed777acc2 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -86,6 +86,7 @@ ignore = [ "FIX002", # Line contains TODO "ISC001", # Messes with the formatter "PERF203", # Rarely useful + "PLC0414", # Enable re-export "PLR09", # Too many something (arg, statements, etc) "RUF012", # Doesn't play well with Pydantic "TC001", # Doesn't play well with Pydantic @@ -105,6 +106,7 @@ unfixable = ["PLW1510",] flake8-annotations.allow-star-arg-any = true flake8-annotations.mypy-init-return = true +flake8-builtins.ignorelist = ["id", "input", "type"] flake8-type-checking.runtime-evaluated-base-classes = ["pydantic.BaseModel","langchain_core.load.serializable.Serializable","langchain_core.runnables.base.RunnableSerializable"] pep8-naming.classmethod-decorators = [ "classmethod", "langchain_core.utils.pydantic.pre_init", "pydantic.field_validator", "pydantic.v1.root_validator",] pydocstyle.convention = "google" diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py index ff9fbf92fc77e..750f2f49f060d 100644 --- a/libs/core/tests/unit_tests/messages/test_imports.py +++ b/libs/core/tests/unit_tests/messages/test_imports.py @@ -5,26 +5,48 @@ "_message_from_dict", "AIMessage", "AIMessageChunk", + "Annotation", "AnyMessage", + "AudioContentBlock", "BaseMessage", "BaseMessageChunk", + "ContentBlock", "ChatMessage", "ChatMessageChunk", + "Citation", + "CodeInterpreterCall", + "CodeInterpreterOutput", + "CodeInterpreterResult", + "DataContentBlock", + "FileContentBlock", "FunctionMessage", "FunctionMessageChunk", "HumanMessage", "HumanMessageChunk", + "ImageContentBlock", "InvalidToolCall", + "NonStandardAnnotation", + "NonStandardContentBlock", + "PlainTextContentBlock", "SystemMessage", "SystemMessageChunk", + "TextContentBlock", "ToolCall", "ToolCallChunk", "ToolMessage", "ToolMessageChunk", + "VideoContentBlock", + "WebSearchCall", + "WebSearchResult", + "ReasoningContentBlock", "RemoveMessage", "convert_to_messages", "get_buffer_string", "is_data_content_block", + "is_reasoning_block", + "is_text_block", + "is_tool_call_block", + "is_tool_call_chunk", "merge_content", "message_chunk_to_message", "message_to_dict", diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py index bedd518589ea0..f9f1c9c9ff081 100644 --- a/libs/core/tests/unit_tests/messages/test_utils.py +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -1221,15 +1221,30 @@ def test_convert_to_openai_messages_multimodal() -> None: {"type": "text", "text": "Text message"}, { "type": "image", - "source_type": "url", "url": "https://example.com/test.png", }, + { + "type": "image", + "source_type": "url", # backward compatibility + "url": "https://example.com/test.png", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/png", + }, { "type": "image", "source_type": "base64", "data": "", "mime_type": "image/png", }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "filename": "test.pdf", + }, { "type": "file", "source_type": "base64", @@ -1244,11 +1259,20 @@ def test_convert_to_openai_messages_multimodal() -> None: "file_data": "data:application/pdf;base64,", }, }, + { + "type": "file", + "file_id": "file-abc123", + }, { "type": "file", "source_type": "id", "id": "file-abc123", }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/wav", + }, { "type": "audio", "source_type": "base64", @@ -1268,7 +1292,7 @@ def test_convert_to_openai_messages_multimodal() -> None: result = convert_to_openai_messages(messages, text_format="block") assert len(result) == 1 message = result[0] - assert len(message["content"]) == 8 + assert len(message["content"]) == 13 # Test adding filename messages = [ @@ -1276,8 +1300,7 @@ def test_convert_to_openai_messages_multimodal() -> None: content=[ { "type": "file", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "application/pdf", }, ] diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 7c07416fe5d9c..f45b2f7dae9b6 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -726,7 +726,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -752,6 +752,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -763,6 +767,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -781,9 +789,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -998,12 +1007,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1015,6 +1035,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1026,9 +1050,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -1037,9 +1062,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not ``None``. Example: @@ -1065,6 +1090,10 @@ ]), 'title': 'Args', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1105,9 +1134,9 @@ }), }), 'required': list([ + 'id', 'name', 'args', - 'id', 'index', ]), 'title': 'ToolCallChunk', @@ -2158,7 +2187,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -2184,6 +2213,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2195,6 +2228,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -2213,9 +2250,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -2430,12 +2468,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2447,6 +2496,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -2458,9 +2511,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -2469,9 +2523,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not ``None``. Example: @@ -2497,6 +2551,10 @@ ]), 'title': 'Args', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2537,9 +2595,9 @@ }), }), 'required': list([ + 'id', 'name', 'args', - 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index a788c425fced3..1a5bcc93dad87 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1129,7 +1129,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -1155,6 +1155,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1166,6 +1170,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -1184,9 +1192,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -1401,12 +1410,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1418,6 +1438,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1429,9 +1453,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -1440,9 +1465,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not ``None``. Example: @@ -1468,6 +1493,10 @@ ]), 'title': 'Args', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1508,9 +1537,9 @@ }), }), 'required': list([ + 'id', 'name', 'args', - 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 0656a2f2e974b..89c77414dc0b8 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -3,6 +3,7 @@ from typing import Optional, Union import pytest +from typing_extensions import get_args from langchain_core.documents import Document from langchain_core.load import dumpd, load @@ -30,10 +31,16 @@ messages_from_dict, messages_to_dict, ) +from langchain_core.messages.content_blocks import KNOWN_BLOCK_TYPES, ContentBlock from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.utils._merge import merge_lists +from langchain_core.v1.messages import AIMessage as AIMessageV1 +from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 +from langchain_core.v1.messages import HumanMessage as HumanMessageV1 +from langchain_core.v1.messages import SystemMessage as SystemMessageV1 +from langchain_core.v1.messages import ToolMessage as ToolMessageV1 def test_message_init() -> None: @@ -181,20 +188,170 @@ def test_message_chunks() -> None: # Test ID order of precedence null_id = AIMessageChunk(content="", id=None) default_id = AIMessageChunk( - content="", id="run-abc123" + content="", id="lc_run--abc123" ) # LangChain-assigned run ID meaningful_id = AIMessageChunk(content="", id="msg_def456") # provider-assigned ID - assert (null_id + default_id).id == "run-abc123" - assert (default_id + null_id).id == "run-abc123" + assert (null_id + default_id).id == "lc_run--abc123" + assert (default_id + null_id).id == "lc_run--abc123" assert (null_id + meaningful_id).id == "msg_def456" assert (meaningful_id + null_id).id == "msg_def456" + # Provider assigned IDs have highest precedence assert (default_id + meaningful_id).id == "msg_def456" assert (meaningful_id + default_id).id == "msg_def456" +def test_message_chunks_v1() -> None: + left = AIMessageChunkV1("foo ", id="abc") + right = AIMessageChunkV1("bar") + expected = AIMessageChunkV1("foo bar", id="abc") + assert left + right == expected + + # Test tool calls + one = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0) + ], + ) + two = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name=None, args='{"arg1": "val', id=None, index=0) + ], + ) + three = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name=None, args='ue}"', id=None, index=0) + ], + ) + result = one + two + three + expected = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk( + name="tool1", args='{"arg1": "value}"', id="1", index=0 + ) + ], + id=result.id, # Use the same ID as the result + ) + assert result == expected + + converted_message = result.to_message() + assert converted_message == AIMessageV1( + content=[ + { + "name": "tool1", + "args": {"arg1": "value}"}, + "id": "1", + "type": "tool_call", + } + ], + id=converted_message.id, # Use the same ID as the converted message + ) + + chunk1 = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0) + ], + ) + chunk2 = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="a", id=None, index=1) + ], + ) + # Don't merge if `index` field does not match. + merge_result = chunk1 + chunk2 + assert merge_result == AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0), + create_tool_call_chunk(name="tool1", args="a", id=None, index=1), + ], + id=merge_result.id, # Use the same ID as the merge result + ) + + ai_msg_chunk = AIMessageChunkV1([]) + tool_calls_msg_chunk = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="a", id=None, index=1) + ], + ) + # These assertions test that adding empty chunks preserves the non-empty chunk + result1 = ai_msg_chunk + tool_calls_msg_chunk + assert result1.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks + assert result1.content == tool_calls_msg_chunk.content + + result2 = tool_calls_msg_chunk + ai_msg_chunk + assert result2.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks + assert result2.content == tool_calls_msg_chunk.content + + ai_msg_chunk = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0) + ], + ) + assert ai_msg_chunk.tool_calls == [create_tool_call(name="tool1", args={}, id="1")] + + # Test token usage + left = AIMessageChunkV1( + [], + usage_metadata={"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}, + ) + right = AIMessageChunkV1( + [], + usage_metadata={"input_tokens": 4, "output_tokens": 5, "total_tokens": 9}, + ) + usage_result = left + right + expected_usage = AIMessageChunkV1( + content=[], + usage_metadata={"input_tokens": 5, "output_tokens": 7, "total_tokens": 12}, + id=usage_result.id, # Use the same ID as the result + ) + assert usage_result == expected_usage + + # Test adding empty chunks preserves the original + left_result = AIMessageChunkV1(content=[]) + left + assert left_result.usage_metadata == left.usage_metadata + assert left_result.content == left.content + + right_result = right + AIMessageChunkV1(content=[]) + assert right_result.usage_metadata == right.usage_metadata + assert right_result.content == right.content + + # Test ID order of precedence + # Note: AIMessageChunkV1 always generates an ID if none provided + auto_id = AIMessageChunkV1(content=[]) # Gets auto-generated lc_* ID + default_id = AIMessageChunkV1( + content=[], id="lc_run--abc123" + ) # LangChain-assigned run ID + meaningful_id = AIMessageChunkV1( + content=[], id="msg_def456" + ) # provider-assigned ID + + # Provider-assigned IDs always win over LangChain-generated IDs + assert (auto_id + meaningful_id).id == "msg_def456" # provider-assigned wins + assert (meaningful_id + auto_id).id == "msg_def456" # provider-assigned wins + + assert ( + default_id + meaningful_id + ).id == "msg_def456" # meaningful_id is provider-assigned + assert ( + meaningful_id + default_id + ).id == "msg_def456" # meaningful_id is provider-assigned + + # Between auto-generated and lc_run--* IDs, run IDs win + assert (auto_id + default_id).id == default_id.id + assert (default_id + auto_id).id == default_id.id + + def test_chat_message_chunks() -> None: assert ChatMessageChunk(role="User", content="I am", id="ai4") + ChatMessageChunk( role="User", content=" indeed." @@ -207,7 +364,7 @@ def test_chat_message_chunks() -> None: ): ChatMessageChunk(role="User", content="I am") + ChatMessageChunk( role="Assistant", content=" indeed." - ) + ) # type: ignore[reportUnusedExpression, unused-ignore] assert ChatMessageChunk(role="User", content="I am") + AIMessageChunk( content=" indeed." @@ -316,7 +473,7 @@ def test_function_message_chunks() -> None: ): FunctionMessageChunk(name="hello", content="I am") + FunctionMessageChunk( name="bye", content=" indeed." - ) + ) # type: ignore[reportUnusedExpression, unused-ignore] def test_ai_message_chunks() -> None: @@ -332,7 +489,7 @@ def test_ai_message_chunks() -> None: ): AIMessageChunk(example=True, content="I am") + AIMessageChunk( example=False, content=" indeed." - ) + ) # type: ignore[reportUnusedExpression, unused-ignore] class TestGetBufferString(unittest.TestCase): @@ -1116,23 +1273,20 @@ def test_is_data_content_block() -> None: assert is_data_content_block( { "type": "image", - "source_type": "url", "url": "https://...", } ) assert is_data_content_block( { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", } ) assert is_data_content_block( { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", "cache_control": {"type": "ephemeral"}, } @@ -1140,13 +1294,17 @@ def test_is_data_content_block() -> None: assert is_data_content_block( { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", "metadata": {"cache_control": {"type": "ephemeral"}}, } ) - + assert is_data_content_block( + { + "type": "image", + "source_type": "base64", # backward compatibility + } + ) assert not is_data_content_block( { "type": "text", @@ -1162,43 +1320,124 @@ def test_is_data_content_block() -> None: assert not is_data_content_block( { "type": "image", - "source_type": "base64", + "source": "", } ) - assert not is_data_content_block( + + +def test_convert_to_openai_image_block() -> None: + for input_block in [ { "type": "image", - "source": "", + "url": "https://...", + "cache_control": {"type": "ephemeral"}, + }, + { + "type": "image", + "source_type": "url", + "url": "https://...", + "cache_control": {"type": "ephemeral"}, + }, + ]: + expected = { + "type": "image_url", + "image_url": {"url": "https://..."}, } - ) + result = convert_to_openai_image_block(input_block) + assert result == expected + for input_block in [ + { + "type": "image", + "base64": "", + "mime_type": "image/jpeg", + "cache_control": {"type": "ephemeral"}, + }, + { + "type": "image", + "source_type": "base64", + "data": "", + "mime_type": "image/jpeg", + "cache_control": {"type": "ephemeral"}, + }, + ]: + expected = { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,", + }, + } + result = convert_to_openai_image_block(input_block) + assert result == expected -def test_convert_to_openai_image_block() -> None: - input_block = { - "type": "image", - "source_type": "url", - "url": "https://...", - "cache_control": {"type": "ephemeral"}, - } - expected = { - "type": "image_url", - "image_url": {"url": "https://..."}, - } - result = convert_to_openai_image_block(input_block) - assert result == expected - input_block = { - "type": "image", - "source_type": "base64", - "data": "", - "mime_type": "image/jpeg", - "cache_control": {"type": "ephemeral"}, +def test_known_block_types() -> None: + expected = { + bt + for bt in get_args(ContentBlock) + for bt in get_args(bt.__annotations__["type"]) } + # Normalize any Literal[...] types in block types to their string values. + # This ensures all entries are plain strings, not Literal objects. expected = { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,", - }, + t + if isinstance(t, str) + else t.__args__[0] + if hasattr(t, "__args__") and len(t.__args__) == 1 + else t + for t in expected } - result = convert_to_openai_image_block(input_block) - assert result == expected + assert expected == KNOWN_BLOCK_TYPES + + +def test_v1_text_accessor() -> None: + """Test that v1 message.text property and .text() method return the same value.""" + # Test HumanMessage + human_msg = HumanMessageV1(content="Hello world") + assert human_msg.text == "Hello world" + assert human_msg.text() == "Hello world" # type: ignore[operator] + assert str(human_msg.text) == human_msg.text() # type: ignore[operator] + + # Test SystemMessage + system_msg = SystemMessageV1(content="You are a helpful assistant") + assert system_msg.text == "You are a helpful assistant" + assert system_msg.text() == "You are a helpful assistant" # type: ignore[operator] + assert str(system_msg.text) == system_msg.text() # type: ignore[operator] + + # Test AIMessage + ai_msg = AIMessageV1(content="I can help you with that") + assert ai_msg.text == "I can help you with that" + assert ai_msg.text() == "I can help you with that" # type: ignore[operator] + assert str(ai_msg.text) == ai_msg.text() # type: ignore[operator] + + # Test ToolMessage + tool_msg = ToolMessageV1(content="Task completed", tool_call_id="tool_1") + assert tool_msg.text == "Task completed" + assert tool_msg.text() == "Task completed" # type: ignore[operator] + assert str(tool_msg.text) == tool_msg.text() # type: ignore[operator] + + # Test with complex content (list of content blocks) + complex_msg = HumanMessageV1( + content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}] + ) + assert complex_msg.text == "Hello world" + assert complex_msg.text() == "Hello world" # type: ignore[operator] + assert str(complex_msg.text) == complex_msg.text() # type: ignore[operator] + + # Test with mixed content (text and non-text blocks) + mixed_msg = AIMessageV1( + content=[ + {"type": "text", "text": "The answer is "}, + {"type": "tool_call", "name": "calculate", "args": {"x": 2}, "id": "1"}, + {"type": "text", "text": "42"}, + ] + ) + assert mixed_msg.text == "The answer is 42" + assert mixed_msg.text() == "The answer is 42" # type: ignore[operator] + assert str(mixed_msg.text) == mixed_msg.text() # type: ignore[operator] + + # Test empty content + empty_msg = HumanMessageV1(content=[]) + assert empty_msg.text == "" + assert empty_msg.text() == "" # type: ignore[operator] + assert str(empty_msg.text) == empty_msg.text() # type: ignore[operator] From 54a3c5f85cc17a32b4052d818d0eba23dd03b92f Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 14:53:12 -0400 Subject: [PATCH 02/56] x --- .../language_models/chat_models.py | 14 ++ libs/core/tests/unit_tests/test_messages.py | 207 ------------------ 2 files changed, 14 insertions(+), 207 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 310f392fd2546..06838e3b07592 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -41,6 +41,7 @@ BaseMessageChunk, HumanMessage, convert_to_messages, + convert_to_openai_data_block, convert_to_openai_image_block, is_data_content_block, message_chunk_to_message, @@ -130,6 +131,19 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]: message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy convert_to_openai_image_block(block) ) + elif ( + block.get("type") == "file" + and is_data_content_block(block) + and "base64" in block + ): + if message_to_trace is message: + # Shallow copy + message_to_trace = message.model_copy() + message_to_trace.content = list(message_to_trace.content) + + message_to_trace.content[idx] = convert_to_openai_data_block( # type: ignore[index] + block + ) elif len(block) == 1 and "type" not in block: # Tracing assumes all content blocks have a "type" key. Here # we add this key if it is missing, and there's an obvious diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 89c77414dc0b8..900e42691dede 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -36,11 +36,6 @@ from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.utils._merge import merge_lists -from langchain_core.v1.messages import AIMessage as AIMessageV1 -from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 -from langchain_core.v1.messages import HumanMessage as HumanMessageV1 -from langchain_core.v1.messages import SystemMessage as SystemMessageV1 -from langchain_core.v1.messages import ToolMessage as ToolMessageV1 def test_message_init() -> None: @@ -203,155 +198,6 @@ def test_message_chunks() -> None: assert (meaningful_id + default_id).id == "msg_def456" -def test_message_chunks_v1() -> None: - left = AIMessageChunkV1("foo ", id="abc") - right = AIMessageChunkV1("bar") - expected = AIMessageChunkV1("foo bar", id="abc") - assert left + right == expected - - # Test tool calls - one = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0) - ], - ) - two = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name=None, args='{"arg1": "val', id=None, index=0) - ], - ) - three = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name=None, args='ue}"', id=None, index=0) - ], - ) - result = one + two + three - expected = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk( - name="tool1", args='{"arg1": "value}"', id="1", index=0 - ) - ], - id=result.id, # Use the same ID as the result - ) - assert result == expected - - converted_message = result.to_message() - assert converted_message == AIMessageV1( - content=[ - { - "name": "tool1", - "args": {"arg1": "value}"}, - "id": "1", - "type": "tool_call", - } - ], - id=converted_message.id, # Use the same ID as the converted message - ) - - chunk1 = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0) - ], - ) - chunk2 = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="a", id=None, index=1) - ], - ) - # Don't merge if `index` field does not match. - merge_result = chunk1 + chunk2 - assert merge_result == AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0), - create_tool_call_chunk(name="tool1", args="a", id=None, index=1), - ], - id=merge_result.id, # Use the same ID as the merge result - ) - - ai_msg_chunk = AIMessageChunkV1([]) - tool_calls_msg_chunk = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="a", id=None, index=1) - ], - ) - # These assertions test that adding empty chunks preserves the non-empty chunk - result1 = ai_msg_chunk + tool_calls_msg_chunk - assert result1.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks - assert result1.content == tool_calls_msg_chunk.content - - result2 = tool_calls_msg_chunk + ai_msg_chunk - assert result2.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks - assert result2.content == tool_calls_msg_chunk.content - - ai_msg_chunk = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0) - ], - ) - assert ai_msg_chunk.tool_calls == [create_tool_call(name="tool1", args={}, id="1")] - - # Test token usage - left = AIMessageChunkV1( - [], - usage_metadata={"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}, - ) - right = AIMessageChunkV1( - [], - usage_metadata={"input_tokens": 4, "output_tokens": 5, "total_tokens": 9}, - ) - usage_result = left + right - expected_usage = AIMessageChunkV1( - content=[], - usage_metadata={"input_tokens": 5, "output_tokens": 7, "total_tokens": 12}, - id=usage_result.id, # Use the same ID as the result - ) - assert usage_result == expected_usage - - # Test adding empty chunks preserves the original - left_result = AIMessageChunkV1(content=[]) + left - assert left_result.usage_metadata == left.usage_metadata - assert left_result.content == left.content - - right_result = right + AIMessageChunkV1(content=[]) - assert right_result.usage_metadata == right.usage_metadata - assert right_result.content == right.content - - # Test ID order of precedence - # Note: AIMessageChunkV1 always generates an ID if none provided - auto_id = AIMessageChunkV1(content=[]) # Gets auto-generated lc_* ID - default_id = AIMessageChunkV1( - content=[], id="lc_run--abc123" - ) # LangChain-assigned run ID - meaningful_id = AIMessageChunkV1( - content=[], id="msg_def456" - ) # provider-assigned ID - - # Provider-assigned IDs always win over LangChain-generated IDs - assert (auto_id + meaningful_id).id == "msg_def456" # provider-assigned wins - assert (meaningful_id + auto_id).id == "msg_def456" # provider-assigned wins - - assert ( - default_id + meaningful_id - ).id == "msg_def456" # meaningful_id is provider-assigned - assert ( - meaningful_id + default_id - ).id == "msg_def456" # meaningful_id is provider-assigned - - # Between auto-generated and lc_run--* IDs, run IDs win - assert (auto_id + default_id).id == default_id.id - assert (default_id + auto_id).id == default_id.id - - def test_chat_message_chunks() -> None: assert ChatMessageChunk(role="User", content="I am", id="ai4") + ChatMessageChunk( role="User", content=" indeed." @@ -1388,56 +1234,3 @@ def test_known_block_types() -> None: for t in expected } assert expected == KNOWN_BLOCK_TYPES - - -def test_v1_text_accessor() -> None: - """Test that v1 message.text property and .text() method return the same value.""" - # Test HumanMessage - human_msg = HumanMessageV1(content="Hello world") - assert human_msg.text == "Hello world" - assert human_msg.text() == "Hello world" # type: ignore[operator] - assert str(human_msg.text) == human_msg.text() # type: ignore[operator] - - # Test SystemMessage - system_msg = SystemMessageV1(content="You are a helpful assistant") - assert system_msg.text == "You are a helpful assistant" - assert system_msg.text() == "You are a helpful assistant" # type: ignore[operator] - assert str(system_msg.text) == system_msg.text() # type: ignore[operator] - - # Test AIMessage - ai_msg = AIMessageV1(content="I can help you with that") - assert ai_msg.text == "I can help you with that" - assert ai_msg.text() == "I can help you with that" # type: ignore[operator] - assert str(ai_msg.text) == ai_msg.text() # type: ignore[operator] - - # Test ToolMessage - tool_msg = ToolMessageV1(content="Task completed", tool_call_id="tool_1") - assert tool_msg.text == "Task completed" - assert tool_msg.text() == "Task completed" # type: ignore[operator] - assert str(tool_msg.text) == tool_msg.text() # type: ignore[operator] - - # Test with complex content (list of content blocks) - complex_msg = HumanMessageV1( - content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}] - ) - assert complex_msg.text == "Hello world" - assert complex_msg.text() == "Hello world" # type: ignore[operator] - assert str(complex_msg.text) == complex_msg.text() # type: ignore[operator] - - # Test with mixed content (text and non-text blocks) - mixed_msg = AIMessageV1( - content=[ - {"type": "text", "text": "The answer is "}, - {"type": "tool_call", "name": "calculate", "args": {"x": 2}, "id": "1"}, - {"type": "text", "text": "42"}, - ] - ) - assert mixed_msg.text == "The answer is 42" - assert mixed_msg.text() == "The answer is 42" # type: ignore[operator] - assert str(mixed_msg.text) == mixed_msg.text() # type: ignore[operator] - - # Test empty content - empty_msg = HumanMessageV1(content=[]) - assert empty_msg.text == "" - assert empty_msg.text() == "" # type: ignore[operator] - assert str(empty_msg.text) == empty_msg.text() # type: ignore[operator] From f8244b9108bddb10aa1d604485e562bee70e03fc Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 16:33:48 -0400 Subject: [PATCH 03/56] type required on tool_call_chunk; keep messages.tool.ToolCallChunk --- .../langchain_core/messages/content_blocks.py | 2 +- libs/core/langchain_core/messages/tool.py | 35 +++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index d426339156d39..daf5112507406 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -334,7 +334,7 @@ class ToolCallChunk(TypedDict): # TODO: Consider making fields NotRequired[str] in the future. - type: NotRequired[Literal["tool_call_chunk"]] + type: Literal["tool_call_chunk"] """Used for serialization.""" id: Optional[str] diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 181c80443d56f..33755ce0ecb44 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -5,12 +5,11 @@ from uuid import UUID from pydantic import Field, model_validator -from typing_extensions import override +from typing_extensions import NotRequired, TypedDict, override from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall from langchain_core.messages.content_blocks import ToolCall as ToolCall -from langchain_core.messages.content_blocks import ToolCallChunk as ToolCallChunk from langchain_core.utils._merge import merge_dicts, merge_obj @@ -196,6 +195,38 @@ def tool_call( return ToolCall(name=name, args=args, id=id, type="tool_call") +class ToolCallChunk(TypedDict): + """A chunk of a tool call (e.g., as part of a stream). + + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + all string attributes are concatenated. Chunks are only merged if their + values of `index` are equal and not None. + + Example: + + .. code-block:: python + + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] + right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + + ( + AIMessageChunk(content="", tool_call_chunks=left_chunks) + + AIMessageChunk(content="", tool_call_chunks=right_chunks) + ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] + + """ + + name: Optional[str] + """The name of the tool to be called.""" + args: Optional[str] + """The arguments to the tool call.""" + id: Optional[str] + """An identifier associated with the tool call.""" + index: Optional[int] + """The index of the tool call in a sequence.""" + type: NotRequired[Literal["tool_call_chunk"]] + + def tool_call_chunk( *, name: Optional[str] = None, From 1b9ec25755013d39025e5982d029dae591bcb5b6 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 16:52:08 -0400 Subject: [PATCH 04/56] update init on aimessage --- libs/core/langchain_core/messages/ai.py | 65 ++++++++++++++--- libs/core/langchain_core/messages/base.py | 72 ++++++++++++++++--- .../core/tests/unit_tests/messages/test_ai.py | 71 ++++++++++++++++++ .../prompts/__snapshots__/test_chat.ambr | 20 ++---- .../runnables/__snapshots__/test_graph.ambr | 10 +-- libs/core/tests/unit_tests/test_messages.py | 32 +++++++++ 6 files changed, 232 insertions(+), 38 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 45eb7d2ba5e8d..9d4a1e01879df 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -3,11 +3,12 @@ import json import logging import operator -from typing import Any, Literal, Optional, Union, cast +from typing import Any, Literal, Optional, Union, cast, overload from pydantic import model_validator from typing_extensions import NotRequired, Self, TypedDict, override +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.tool import ( InvalidToolCall, @@ -179,16 +180,35 @@ class AIMessage(BaseMessage): type: Literal["ai"] = "ai" """The type of the message (used for deserialization). Defaults to "ai".""" + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... - Args: - content: The content of the message. - kwargs: Additional arguments to pass to the parent class. - """ - super().__init__(content=content, **kwargs) + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... + + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) @property def lc_attributes(self) -> dict: @@ -198,6 +218,33 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return content blocks of the message.""" + blocks = super().content_blocks + + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + blocks.append(tool_call_block) + + return blocks + # TODO: remove this logic if possible, reducing breaking nature of changes @model_validator(mode="before") @classmethod diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index ba976286b75d8..13b12f764d19e 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -2,11 +2,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable +from langchain_core.messages import content_blocks as types from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -61,15 +62,32 @@ class BaseMessage(Serializable): extra="allow", ) + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - """ - super().__init__(content=content, **kwargs) + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... + + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__(content=content_blocks, **kwargs) + else: + super().__init__(content=content, **kwargs) @classmethod def is_lc_serializable(cls) -> bool: @@ -88,6 +106,44 @@ def get_lc_namespace(cls) -> list[str]: """ return ["langchain", "schema", "messages"] + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return the content as a list of standard ContentBlocks. + + To use this property, the corresponding chat model must support + ``message_version="v1"`` or higher: + + .. code-block:: python + + from langchain.chat_models import init_chat_model + llm = init_chat_model("...", message_version="v1") + + otherwise, does best-effort parsing to standard types. + """ + blocks: list[types.ContentBlock] = [] + content = ( + [self.content] + if isinstance(self.content, str) and self.content + else self.content + ) + for item in content: + if isinstance(item, str): + blocks.append({"type": "text", "text": item}) + elif isinstance(item, dict): + item_type = item.get("type") + if item_type not in types.KNOWN_BLOCK_TYPES: + msg = ( + f"Non-standard content block type '{item_type}'. Ensure " + "the model supports `output_version='v1'` or higher and " + "that this attribute is set on initialization." + ) + raise ValueError(msg) + blocks.append(cast("types.ContentBlock", item)) + else: + pass + + return blocks + def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index d36d034712817..81981725c5005 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -1,5 +1,6 @@ from langchain_core.load import dumpd, load from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content_blocks as types from langchain_core.messages.ai import ( InputTokenDetails, OutputTokenDetails, @@ -196,3 +197,73 @@ def test_add_ai_message_chunks_usage() -> None: output_token_details=OutputTokenDetails(audio=1, reasoning=2), ), ) + + +def test_content_blocks() -> None: + message = AIMessage( + "", + tool_calls=[ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"} + ], + ) + assert len(message.content_blocks) == 1 + assert message.content_blocks[0]["type"] == "tool_call" + assert message.content_blocks == [ + {"type": "tool_call", "id": "abc_123", "name": "foo", "args": {"a": "b"}} + ] + assert message.content == "" + + message = AIMessage( + "foo", + tool_calls=[ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"} + ], + ) + assert len(message.content_blocks) == 2 + assert message.content_blocks[0]["type"] == "text" + assert message.content_blocks[1]["type"] == "tool_call" + assert message.content_blocks == [ + {"type": "text", "text": "foo"}, + {"type": "tool_call", "id": "abc_123", "name": "foo", "args": {"a": "b"}}, + ] + assert message.content == "foo" + + # With standard blocks + standard_content: list[types.ContentBlock] = [ + {"type": "reasoning", "reasoning": "foo"}, + {"type": "text", "text": "bar"}, + { + "type": "text", + "text": "baz", + "annotations": [{"type": "citation", "url": "http://example.com"}], + }, + { + "type": "image", + "url": "http://example.com/image.png", + "extras": {"foo": "bar"}, + }, + { + "type": "non_standard", + "value": {"custom_key": "custom_value", "another_key": 123}, + }, + { + "type": "tool_call", + "name": "foo", + "args": {"a": "b"}, + "id": "abc_123", + }, + ] + missing_tool_call = { + "type": "tool_call", + "name": "bar", + "args": {"c": "d"}, + "id": "abc_234", + } + message = AIMessage( + content_blocks=standard_content, + tool_calls=[ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"}, + missing_tool_call, + ], + ) + assert message.content_blocks == [*standard_content, missing_tool_call] diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index f45b2f7dae9b6..08a1c528cfb6f 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -1062,9 +1062,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their - values of ``index`` are equal and not ``None``. + values of `index` are equal and not None. Example: @@ -1090,10 +1090,6 @@ ]), 'title': 'Args', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1134,9 +1130,9 @@ }), }), 'required': list([ - 'id', 'name', 'args', + 'id', 'index', ]), 'title': 'ToolCallChunk', @@ -2523,9 +2519,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their - values of ``index`` are equal and not ``None``. + values of `index` are equal and not None. Example: @@ -2551,10 +2547,6 @@ ]), 'title': 'Args', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -2595,9 +2587,9 @@ }), }), 'required': list([ - 'id', 'name', 'args', + 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 1a5bcc93dad87..d3a746eaf7966 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1465,9 +1465,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their - values of ``index`` are equal and not ``None``. + values of `index` are equal and not None. Example: @@ -1493,10 +1493,6 @@ ]), 'title': 'Args', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1537,9 +1533,9 @@ }), }), 'required': list([ - 'id', 'name', 'args', + 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 900e42691dede..7157c3341d1dc 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1234,3 +1234,35 @@ def test_known_block_types() -> None: for t in expected } assert expected == KNOWN_BLOCK_TYPES + + +def test_typed_init() -> None: + # AIMessage + message = AIMessage("Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + message = AIMessage(content="Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + message = AIMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert message.content == [{"type": "text", "text": "Hello"}] + assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # # HumanMessage + # message = HumanMessage("Hello") + # assert message.content == "Hello" + # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # message = HumanMessage(content="Hello") + # assert message.content == "Hello" + # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # message = HumanMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + # assert message.content == [{"type": "text", "text": "Hello"}] + # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # Test we get type errors for malformed blocks (type checker will complain if + # below type-ignores are unused). + _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] From 8426db47f151bc8f39493b67a7c789e9ed5afcb3 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 18:09:04 -0400 Subject: [PATCH 05/56] update init on HumanMessage, SystemMessage, ToolMessage --- libs/core/langchain_core/messages/human.py | 38 ++++++++++---- libs/core/langchain_core/messages/system.py | 38 ++++++++++---- libs/core/langchain_core/messages/tool.py | 38 ++++++++++---- libs/core/tests/unit_tests/test_messages.py | 58 ++++++++++++--------- libs/core/tests/unit_tests/test_tools.py | 12 ++--- 5 files changed, 126 insertions(+), 58 deletions(-) diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 1be4cbfa9d3d9..041db0cdb9726 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -1,7 +1,8 @@ """Human message.""" -from typing import Any, Literal, Union +from typing import Any, Literal, Optional, Union, cast, overload +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk @@ -41,16 +42,35 @@ class HumanMessage(BaseMessage): type: Literal["human"] = "human" """The type of the message (used for serialization). Defaults to "human".""" + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... + + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - kwargs: Additional fields to pass to the message. - """ - super().__init__(content=content, **kwargs) + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) class HumanMessageChunk(HumanMessage, BaseMessageChunk): diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index d63bd53a0fee0..c8a5bbae5c859 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -1,7 +1,8 @@ """System message.""" -from typing import Any, Literal, Union +from typing import Any, Literal, Optional, Union, cast, overload +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk @@ -34,16 +35,35 @@ class SystemMessage(BaseMessage): type: Literal["system"] = "system" """The type of the message (used for serialization). Defaults to "system".""" + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... + + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - kwargs: Additional fields to pass to the message. - """ - super().__init__(content=content, **kwargs) + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) class SystemMessageChunk(SystemMessage, BaseMessageChunk): diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 33755ce0ecb44..14177181480fc 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -1,12 +1,13 @@ """Messages for tools.""" import json -from typing import Any, Literal, Optional, Union +from typing import Any, Literal, Optional, Union, cast, overload from uuid import UUID from pydantic import Field, model_validator from typing_extensions import NotRequired, TypedDict, override +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall from langchain_core.messages.content_blocks import ToolCall as ToolCall @@ -135,16 +136,35 @@ def coerce_args(cls, values: dict) -> dict: values["tool_call_id"] = str(tool_call_id) return values + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Create a ToolMessage. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - **kwargs: Additional fields. - """ - super().__init__(content=content, **kwargs) + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... + + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) class ToolMessageChunk(ToolMessage, BaseMessageChunk): diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 7157c3341d1dc..d6857aef32de2 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1041,12 +1041,12 @@ def test_tool_message_content() -> None: ToolMessage(["foo"], tool_call_id="1") ToolMessage([{"foo": "bar"}], tool_call_id="1") - assert ToolMessage(("a", "b", "c"), tool_call_id="1").content == ["a", "b", "c"] # type: ignore[arg-type] - assert ToolMessage(5, tool_call_id="1").content == "5" # type: ignore[arg-type] - assert ToolMessage(5.1, tool_call_id="1").content == "5.1" # type: ignore[arg-type] - assert ToolMessage({"foo": "bar"}, tool_call_id="1").content == "{'foo': 'bar'}" # type: ignore[arg-type] + assert ToolMessage(("a", "b", "c"), tool_call_id="1").content == ["a", "b", "c"] # type: ignore[call-overload] + assert ToolMessage(5, tool_call_id="1").content == "5" # type: ignore[call-overload] + assert ToolMessage(5.1, tool_call_id="1").content == "5.1" # type: ignore[call-overload] + assert ToolMessage({"foo": "bar"}, tool_call_id="1").content == "{'foo': 'bar'}" # type: ignore[call-overload] assert ( - ToolMessage(Document("foo"), tool_call_id="1").content == "page_content='foo'" # type: ignore[arg-type] + ToolMessage(Document("foo"), tool_call_id="1").content == "page_content='foo'" # type: ignore[call-overload] ) @@ -1237,32 +1237,40 @@ def test_known_block_types() -> None: def test_typed_init() -> None: - # AIMessage - message = AIMessage("Hello") - assert message.content == "Hello" - assert message.content_blocks == [{"type": "text", "text": "Hello"}] + ai_message = AIMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert ai_message.content == [{"type": "text", "text": "Hello"}] + assert ai_message.content_blocks == ai_message.content - message = AIMessage(content="Hello") - assert message.content == "Hello" - assert message.content_blocks == [{"type": "text", "text": "Hello"}] + human_message = HumanMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert human_message.content == [{"type": "text", "text": "Hello"}] + assert human_message.content_blocks == human_message.content - message = AIMessage(content_blocks=[{"type": "text", "text": "Hello"}]) - assert message.content == [{"type": "text", "text": "Hello"}] - assert message.content_blocks == [{"type": "text", "text": "Hello"}] + system_message = SystemMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert system_message.content == [{"type": "text", "text": "Hello"}] + assert system_message.content_blocks == system_message.content - # # HumanMessage - # message = HumanMessage("Hello") - # assert message.content == "Hello" - # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + tool_message = ToolMessage( + content_blocks=[{"type": "text", "text": "Hello"}], + tool_call_id="abc123", + ) + assert tool_message.content == [{"type": "text", "text": "Hello"}] + assert tool_message.content_blocks == tool_message.content - # message = HumanMessage(content="Hello") - # assert message.content == "Hello" - # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + for message_class in [AIMessage, HumanMessage, SystemMessage]: + message = message_class("Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] - # message = HumanMessage(content_blocks=[{"type": "text", "text": "Hello"}]) - # assert message.content == [{"type": "text", "text": "Hello"}] - # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + message = message_class(content="Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] # Test we get type errors for malformed blocks (type checker will complain if # below type-ignores are unused). _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = ToolMessage( + content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] + tool_call_id="abc123", + ) diff --git a/libs/core/tests/unit_tests/test_tools.py b/libs/core/tests/unit_tests/test_tools.py index 72c6a5a387cfb..af0cdec45ce21 100644 --- a/libs/core/tests/unit_tests/test_tools.py +++ b/libs/core/tests/unit_tests/test_tools.py @@ -2281,7 +2281,7 @@ def test_tool_injected_tool_call_id() -> None: @tool def foo(x: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> ToolMessage: """Foo.""" - return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[arg-type] + return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[call-overload] assert foo.invoke( { @@ -2290,7 +2290,7 @@ def foo(x: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> ToolMessage "name": "foo", "id": "bar", } - ) == ToolMessage(0, tool_call_id="bar") # type: ignore[arg-type] + ) == ToolMessage(0, tool_call_id="bar") # type: ignore[call-overload] with pytest.raises( ValueError, @@ -2302,7 +2302,7 @@ def foo(x: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> ToolMessage @tool def foo2(x: int, tool_call_id: Annotated[str, InjectedToolCallId()]) -> ToolMessage: """Foo.""" - return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[arg-type] + return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[call-overload] assert foo2.invoke( { @@ -2311,14 +2311,14 @@ def foo2(x: int, tool_call_id: Annotated[str, InjectedToolCallId()]) -> ToolMess "name": "foo", "id": "bar", } - ) == ToolMessage(0, tool_call_id="bar") # type: ignore[arg-type] + ) == ToolMessage(0, tool_call_id="bar") # type: ignore[call-overload] def test_tool_uninjected_tool_call_id() -> None: @tool def foo(x: int, tool_call_id: str) -> ToolMessage: """Foo.""" - return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[arg-type] + return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[call-overload] with pytest.raises(ValueError, match="1 validation error for foo"): foo.invoke({"type": "tool_call", "args": {"x": 0}, "name": "foo", "id": "bar"}) @@ -2330,7 +2330,7 @@ def foo(x: int, tool_call_id: str) -> ToolMessage: "name": "foo", "id": "bar", } - ) == ToolMessage(0, tool_call_id="zap") # type: ignore[arg-type] + ) == ToolMessage(0, tool_call_id="zap") # type: ignore[call-overload] def test_tool_return_output_mixin() -> None: From 0ddab9ff20faa3bee52892b9b2ce87c9316ef2e4 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 10:59:50 -0400 Subject: [PATCH 06/56] start on duplicate content --- libs/core/langchain_core/messages/ai.py | 70 ++++++++++++------- libs/core/langchain_core/messages/base.py | 69 ++++++++++-------- libs/core/langchain_core/messages/human.py | 7 +- libs/core/langchain_core/messages/system.py | 7 +- libs/core/langchain_core/messages/tool.py | 7 +- .../language_models/chat_models/test_base.py | 44 ++++-------- libs/core/tests/unit_tests/test_messages.py | 19 +++-- 7 files changed, 129 insertions(+), 94 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 9d4a1e01879df..6983143b6fa6b 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -202,11 +202,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -218,32 +223,45 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } - @property - def content_blocks(self) -> list[types.ContentBlock]: - """Return content blocks of the message.""" - blocks = super().content_blocks - - # Add from tool_calls if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content - if isinstance(block, dict) and block.get("type") == "tool_call" - } - for tool_call in self.tool_calls: - if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: - tool_call_block: types.ToolCall = { - "type": "tool_call", - "id": id_, - "name": tool_call["name"], - "args": tool_call["args"], - } - if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] - if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] - blocks.append(tool_call_block) + @model_validator(mode="after") + def _init_content_blocks(self) -> Self: + """Assign the content as a list of standard ContentBlocks. + + To use this property, the corresponding chat model must support + ``message_version="v1"`` or higher: - return blocks + .. code-block:: python + + from langchain.chat_models import init_chat_model + llm = init_chat_model("...", message_version="v1") + + otherwise, does best-effort parsing to standard types. + """ + if not self.content_blocks: + self.content_blocks = self._init_text_content(self.content) + + if self.tool_calls or self.invalid_tool_calls: + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content_blocks + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + self.content_blocks.append(tool_call_block) + + return self # TODO: remove this logic if possible, reducing breaking nature of changes @model_validator(mode="before") diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 13b12f764d19e..ff88dd248eb18 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -4,7 +4,8 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload -from pydantic import ConfigDict, Field +from pydantic import ConfigDict, Field, model_validator +from typing_extensions import Self from langchain_core.load.serializable import Serializable from langchain_core.messages import content_blocks as types @@ -27,6 +28,9 @@ class BaseMessage(Serializable): content: Union[str, list[Union[str, dict]]] """The string contents of the message.""" + content_blocks: list[types.ContentBlock] = Field(default_factory=list) + """The content of the message as a list of standard ContentBlocks.""" + additional_kwargs: dict = Field(default_factory=dict) """Reserved for additional payload data associated with the message. @@ -84,8 +88,14 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: - super().__init__(content=content_blocks, **kwargs) + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: + super().__init__( + content=content_blocks, content_blocks=content_blocks, **kwargs + ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -106,9 +116,30 @@ def get_lc_namespace(cls) -> list[str]: """ return ["langchain", "schema", "messages"] - @property - def content_blocks(self) -> list[types.ContentBlock]: - """Return the content as a list of standard ContentBlocks. + @staticmethod + def _init_text_content( + content: Union[str, list[Union[str, dict]]], + ) -> list[types.ContentBlock]: + """Parse string content into a list of ContentBlocks.""" + blocks: list[types.ContentBlock] = [] + content = [content] if isinstance(content, str) and content else content + for item in content: + if isinstance(item, str): + blocks.append({"type": "text", "text": item}) + elif isinstance(item, dict): + item_type = item.get("type") + if item_type not in types.KNOWN_BLOCK_TYPES: + blocks.append({"type": "non_standard", "value": item}) + else: + blocks.append(cast("types.ContentBlock", item)) + else: + pass + + return blocks + + @model_validator(mode="after") + def _init_content_blocks(self) -> Self: + """Assign the content as a list of standard ContentBlocks. To use this property, the corresponding chat model must support ``message_version="v1"`` or higher: @@ -120,29 +151,11 @@ def content_blocks(self) -> list[types.ContentBlock]: otherwise, does best-effort parsing to standard types. """ - blocks: list[types.ContentBlock] = [] - content = ( - [self.content] - if isinstance(self.content, str) and self.content - else self.content - ) - for item in content: - if isinstance(item, str): - blocks.append({"type": "text", "text": item}) - elif isinstance(item, dict): - item_type = item.get("type") - if item_type not in types.KNOWN_BLOCK_TYPES: - msg = ( - f"Non-standard content block type '{item_type}'. Ensure " - "the model supports `output_version='v1'` or higher and " - "that this attribute is set on initialization." - ) - raise ValueError(msg) - blocks.append(cast("types.ContentBlock", item)) - else: - pass + if not self.content_blocks: + blocks = self._init_text_content(self.content) + self.content_blocks = blocks - return blocks + return self def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 041db0cdb9726..5383bb964d4fc 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -64,11 +64,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index c8a5bbae5c859..976ea55d15685 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -57,11 +57,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 14177181480fc..78b4f14c3a953 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -158,11 +158,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 37b05ed825566..12cd42ddc322c 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -443,19 +443,11 @@ def test_trace_images_in_openai_format() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages == [ - [ - [ - HumanMessage( - content=[ - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - } - ] - ) - ] - ] + assert tracer.messages[0][0][0].content == [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + } ] # Test no mutation assert response.content == [ @@ -486,23 +478,15 @@ def test_trace_content_blocks_with_no_type_key() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages == [ - [ - [ - HumanMessage( - [ - { - "type": "text", - "text": "Hello", - }, - { - "type": "cachePoint", - "cachePoint": {"type": "default"}, - }, - ] - ) - ] - ] + assert tracer.messages[0][0][0].content == [ + { + "type": "text", + "text": "Hello", + }, + { + "type": "cachePoint", + "cachePoint": {"type": "default"}, + }, ] # Test no mutation assert response.content == [ diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index d6857aef32de2..ec17985c31baa 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -3,6 +3,7 @@ from typing import Optional, Union import pytest +from pydantic import ValidationError from typing_extensions import get_args from langchain_core.documents import Document @@ -1267,10 +1268,14 @@ def test_typed_init() -> None: # Test we get type errors for malformed blocks (type checker will complain if # below type-ignores are unused). - _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - _ = ToolMessage( - content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] - tool_call_id="abc123", - ) + with pytest.raises(ValidationError): + _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + with pytest.raises(ValidationError): + _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + with pytest.raises(ValidationError): + _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + with pytest.raises(ValidationError): + _ = ToolMessage( + content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] + tool_call_id="abc123", + ) From 98d5f469e37e2e9553483b450d6230546c8e0649 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 11:00:02 -0400 Subject: [PATCH 07/56] Revert "start on duplicate content" This reverts commit 0ddab9ff20faa3bee52892b9b2ce87c9316ef2e4. --- libs/core/langchain_core/messages/ai.py | 70 +++++++------------ libs/core/langchain_core/messages/base.py | 69 ++++++++---------- libs/core/langchain_core/messages/human.py | 7 +- libs/core/langchain_core/messages/system.py | 7 +- libs/core/langchain_core/messages/tool.py | 7 +- .../language_models/chat_models/test_base.py | 44 ++++++++---- libs/core/tests/unit_tests/test_messages.py | 19 ++--- 7 files changed, 94 insertions(+), 129 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 6983143b6fa6b..9d4a1e01879df 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -202,16 +202,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -223,45 +218,32 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } - @model_validator(mode="after") - def _init_content_blocks(self) -> Self: - """Assign the content as a list of standard ContentBlocks. - - To use this property, the corresponding chat model must support - ``message_version="v1"`` or higher: - - .. code-block:: python - - from langchain.chat_models import init_chat_model - llm = init_chat_model("...", message_version="v1") - - otherwise, does best-effort parsing to standard types. - """ - if not self.content_blocks: - self.content_blocks = self._init_text_content(self.content) - - if self.tool_calls or self.invalid_tool_calls: - # Add from tool_calls if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content_blocks - if isinstance(block, dict) and block.get("type") == "tool_call" - } - for tool_call in self.tool_calls: - if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: - tool_call_block: types.ToolCall = { - "type": "tool_call", - "id": id_, - "name": tool_call["name"], - "args": tool_call["args"], - } - if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] - if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] - self.content_blocks.append(tool_call_block) + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return content blocks of the message.""" + blocks = super().content_blocks + + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + blocks.append(tool_call_block) - return self + return blocks # TODO: remove this logic if possible, reducing breaking nature of changes @model_validator(mode="before") diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index ff88dd248eb18..13b12f764d19e 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -4,8 +4,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload -from pydantic import ConfigDict, Field, model_validator -from typing_extensions import Self +from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable from langchain_core.messages import content_blocks as types @@ -28,9 +27,6 @@ class BaseMessage(Serializable): content: Union[str, list[Union[str, dict]]] """The string contents of the message.""" - content_blocks: list[types.ContentBlock] = Field(default_factory=list) - """The content of the message as a list of standard ContentBlocks.""" - additional_kwargs: dict = Field(default_factory=dict) """Reserved for additional payload data associated with the message. @@ -88,14 +84,8 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: - super().__init__( - content=content_blocks, content_blocks=content_blocks, **kwargs - ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) + if content_blocks is not None: + super().__init__(content=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -116,30 +106,9 @@ def get_lc_namespace(cls) -> list[str]: """ return ["langchain", "schema", "messages"] - @staticmethod - def _init_text_content( - content: Union[str, list[Union[str, dict]]], - ) -> list[types.ContentBlock]: - """Parse string content into a list of ContentBlocks.""" - blocks: list[types.ContentBlock] = [] - content = [content] if isinstance(content, str) and content else content - for item in content: - if isinstance(item, str): - blocks.append({"type": "text", "text": item}) - elif isinstance(item, dict): - item_type = item.get("type") - if item_type not in types.KNOWN_BLOCK_TYPES: - blocks.append({"type": "non_standard", "value": item}) - else: - blocks.append(cast("types.ContentBlock", item)) - else: - pass - - return blocks - - @model_validator(mode="after") - def _init_content_blocks(self) -> Self: - """Assign the content as a list of standard ContentBlocks. + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return the content as a list of standard ContentBlocks. To use this property, the corresponding chat model must support ``message_version="v1"`` or higher: @@ -151,11 +120,29 @@ def _init_content_blocks(self) -> Self: otherwise, does best-effort parsing to standard types. """ - if not self.content_blocks: - blocks = self._init_text_content(self.content) - self.content_blocks = blocks + blocks: list[types.ContentBlock] = [] + content = ( + [self.content] + if isinstance(self.content, str) and self.content + else self.content + ) + for item in content: + if isinstance(item, str): + blocks.append({"type": "text", "text": item}) + elif isinstance(item, dict): + item_type = item.get("type") + if item_type not in types.KNOWN_BLOCK_TYPES: + msg = ( + f"Non-standard content block type '{item_type}'. Ensure " + "the model supports `output_version='v1'` or higher and " + "that this attribute is set on initialization." + ) + raise ValueError(msg) + blocks.append(cast("types.ContentBlock", item)) + else: + pass - return self + return blocks def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 5383bb964d4fc..041db0cdb9726 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -64,16 +64,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index 976ea55d15685..c8a5bbae5c859 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -57,16 +57,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 78b4f14c3a953..14177181480fc 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -158,16 +158,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 12cd42ddc322c..37b05ed825566 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -443,11 +443,19 @@ def test_trace_images_in_openai_format() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages[0][0][0].content == [ - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - } + assert tracer.messages == [ + [ + [ + HumanMessage( + content=[ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + } + ] + ) + ] + ] ] # Test no mutation assert response.content == [ @@ -478,15 +486,23 @@ def test_trace_content_blocks_with_no_type_key() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages[0][0][0].content == [ - { - "type": "text", - "text": "Hello", - }, - { - "type": "cachePoint", - "cachePoint": {"type": "default"}, - }, + assert tracer.messages == [ + [ + [ + HumanMessage( + [ + { + "type": "text", + "text": "Hello", + }, + { + "type": "cachePoint", + "cachePoint": {"type": "default"}, + }, + ] + ) + ] + ] ] # Test no mutation assert response.content == [ diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index ec17985c31baa..d6857aef32de2 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -3,7 +3,6 @@ from typing import Optional, Union import pytest -from pydantic import ValidationError from typing_extensions import get_args from langchain_core.documents import Document @@ -1268,14 +1267,10 @@ def test_typed_init() -> None: # Test we get type errors for malformed blocks (type checker will complain if # below type-ignores are unused). - with pytest.raises(ValidationError): - _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - with pytest.raises(ValidationError): - _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - with pytest.raises(ValidationError): - _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - with pytest.raises(ValidationError): - _ = ToolMessage( - content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] - tool_call_id="abc123", - ) + _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = ToolMessage( + content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] + tool_call_id="abc123", + ) From 6eaa17205ccc40f80c5e544efa8cc36875e635da Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 15:04:21 -0400 Subject: [PATCH 08/56] implement output_version on BaseChatModel --- .../langchain_core/language_models/chat_models.py | 14 ++++++++++++++ .../language_models/chat_models/test_cache.py | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 06838e3b07592..1eb32ea1d66af 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -334,6 +334,20 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """ + output_version: str = "v0" + """Version of AIMessage output format to use. + + This field is used to roll-out new output formats for chat model AIMessages + in a backwards-compatible way. + + ``'v1'`` standardizes output format using a list of typed ContentBlock dicts. We + recommend this for new applications. + + All chat models currently support the default of ``"v0"``. + + .. versionadded:: 1.0 + """ + @model_validator(mode="before") @classmethod def raise_deprecation(cls, values: dict) -> Any: diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py index 39e4babc7821f..7cf428bb3ab15 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py @@ -301,8 +301,9 @@ def test_llm_representation_for_serializable() -> None: assert chat._get_llm_string() == ( '{"id": ["tests", "unit_tests", "language_models", "chat_models", ' '"test_cache", "CustomChat"], "kwargs": {"messages": {"id": ' - '["builtins", "list_iterator"], "lc": 1, "type": "not_implemented"}}, "lc": ' - '1, "name": "CustomChat", "type": "constructor"}---[(\'stop\', None)]' + '["builtins", "list_iterator"], "lc": 1, "type": "not_implemented"}, ' + '"output_version": "v0"}, "lc": 1, "name": "CustomChat", "type": ' + "\"constructor\"}---[('stop', None)]" ) From 3ae7535f42c54908b0d5508878854aebbbc8186b Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 15:15:57 -0400 Subject: [PATCH 09/56] openai: pull in _compat from 0.4 branch --- .../langchain_openai/chat_models/_compat.py | 468 +++++++++++++++++- 1 file changed, 460 insertions(+), 8 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 25ff3eb607cc3..00f3b365c9b38 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -1,7 +1,10 @@ """ -This module converts between AIMessage output formats for the Responses API. +This module converts between AIMessage output formats, which are governed by the +``output_version`` attribute on ChatOpenAI. Supported values are ``"v0"`` and +``"responses/v1"``. -ChatOpenAI v0.3 stores reasoning and tool outputs in AIMessage.additional_kwargs: +``"v0"`` corresponds to the format as of ChatOpenAI v0.3. For the Responses API, it +stores reasoning and tool outputs in AIMessage.additional_kwargs: .. code-block:: python @@ -28,8 +31,9 @@ id="msg_123", ) -To retain information about response item sequencing (and to accommodate multiple -reasoning items), ChatOpenAI now stores these items in the content sequence: +``"responses/v1"`` is only applicable to the Responses API. It retains information +about response item sequencing and accommodates multiple reasoning items by +representing these items in the content sequence: .. code-block:: python @@ -57,18 +61,22 @@ content blocks, rather than on the AIMessage.id, which now stores the response ID. For backwards compatibility, this module provides functions to convert between the -old and new formats. The functions are used internally by ChatOpenAI. - +formats. The functions are used internally by ChatOpenAI. """ # noqa: E501 +import copy import json -from typing import Union +from collections.abc import Iterable, Iterator +from typing import Any, Literal, Optional, Union, cast -from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessage, is_data_content_block +from langchain_core.messages import content_blocks as types +from langchain_core.v1.messages import AIMessage as AIMessageV1 _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" +# v0.3 / Responses def _convert_to_v03_ai_message( message: AIMessage, has_reasoning: bool = False ) -> AIMessage: @@ -253,3 +261,447 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: }, deep=False, ) + + +# v1 / Chat Completions +def _convert_from_v1_to_chat_completions(message: AIMessageV1) -> AIMessageV1: + """Convert a v1 message to the Chat Completions format.""" + new_content: list[types.ContentBlock] = [] + for block in message.content: + if block["type"] == "text": + # Strip annotations + new_content.append({"type": "text", "text": block["text"]}) + elif block["type"] in ("reasoning", "tool_call"): + pass + else: + new_content.append(block) + new_message = copy.copy(message) + new_message.content = new_content + + return new_message + + +# v1 / Responses +def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation: + annotation_type = annotation.get("type") + + if annotation_type == "url_citation": + known_fields = { + "type", + "url", + "title", + "cited_text", + "start_index", + "end_index", + } + url_citation = cast(types.Citation, {}) + for field in ("end_index", "start_index", "title"): + if field in annotation: + url_citation[field] = annotation[field] + url_citation["type"] = "citation" + url_citation["url"] = annotation["url"] + for field in annotation: + if field not in known_fields: + if "extras" not in url_citation: + url_citation["extras"] = {} + url_citation["extras"][field] = annotation[field] + return url_citation + + elif annotation_type == "file_citation": + known_fields = {"type", "title", "cited_text", "start_index", "end_index"} + document_citation: types.Citation = {"type": "citation"} + if "filename" in annotation: + document_citation["title"] = annotation.pop("filename") + for field in annotation: + if field not in known_fields: + if "extras" not in document_citation: + document_citation["extras"] = {} + document_citation["extras"][field] = annotation[field] + + return document_citation + + # TODO: standardise container_file_citation? + else: + non_standard_annotation: types.NonStandardAnnotation = { + "type": "non_standard_annotation", + "value": annotation, + } + return non_standard_annotation + + +def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]: + if "summary" not in block: + yield cast(types.ReasoningContentBlock, block) + return + + known_fields = {"type", "reasoning", "id", "index"} + unknown_fields = [ + field for field in block if field != "summary" and field not in known_fields + ] + if unknown_fields: + block["extras"] = {} + for field in unknown_fields: + block["extras"][field] = block.pop(field) + + if not block["summary"]: + _ = block.pop("summary", None) + yield cast(types.ReasoningContentBlock, block) + return + + # Common part for every exploded line, except 'summary' + common = {k: v for k, v in block.items() if k in known_fields} + + # Optional keys that must appear only in the first exploded item + first_only = block.pop("extras", None) + + for idx, part in enumerate(block["summary"]): + new_block = dict(common) + new_block["reasoning"] = part.get("text", "") + if idx == 0 and first_only: + new_block.update(first_only) + yield cast(types.ReasoningContentBlock, new_block) + + +def _convert_to_v1_from_responses( + content: list[dict[str, Any]], + tool_calls: Optional[list[types.ToolCall]] = None, + invalid_tool_calls: Optional[list[types.InvalidToolCall]] = None, +) -> list[types.ContentBlock]: + """Mutate a Responses message to v1 format.""" + + def _iter_blocks() -> Iterable[types.ContentBlock]: + for block in content: + if not isinstance(block, dict): + continue + block_type = block.get("type") + + if block_type == "text": + if "annotations" in block: + block["annotations"] = [ + _convert_annotation_to_v1(a) for a in block["annotations"] + ] + yield cast(types.TextContentBlock, block) + + elif block_type == "reasoning": + yield from _explode_reasoning(block) + + elif block_type == "image_generation_call" and ( + result := block.get("result") + ): + new_block = {"type": "image", "base64": result} + if output_format := block.get("output_format"): + new_block["mime_type"] = f"image/{output_format}" + if "id" in block: + new_block["id"] = block["id"] + if "index" in block: + new_block["index"] = block["index"] + for extra_key in ( + "status", + "background", + "output_format", + "quality", + "revised_prompt", + "size", + ): + if extra_key in block: + new_block[extra_key] = block[extra_key] + yield cast(types.ImageContentBlock, new_block) + + elif block_type == "function_call": + tool_call_block: Optional[types.ContentBlock] = None + call_id = block.get("call_id", "") + if call_id: + for tool_call in tool_calls or []: + if tool_call.get("id") == call_id: + tool_call_block = cast(types.ToolCall, tool_call.copy()) + break + else: + for invalid_tool_call in invalid_tool_calls or []: + if invalid_tool_call.get("id") == call_id: + tool_call_block = cast( + types.InvalidToolCall, invalid_tool_call.copy() + ) + break + if tool_call_block: + if "id" in block: + if "extras" not in tool_call_block: + tool_call_block["extras"] = {} + tool_call_block["extras"]["item_id"] = block["id"] # type: ignore[typeddict-item] + if "index" in block: + tool_call_block["index"] = block["index"] + yield tool_call_block + + elif block_type == "web_search_call": + web_search_call = {"type": "web_search_call", "id": block["id"]} + if "index" in block: + web_search_call["index"] = block["index"] + if ( + "action" in block + and isinstance(block["action"], dict) + and block["action"].get("type") == "search" + and "query" in block["action"] + ): + web_search_call["query"] = block["action"]["query"] + for key in block: + if key not in ("type", "id"): + web_search_call[key] = block[key] + + web_search_result = {"type": "web_search_result", "id": block["id"]} + if "index" in block: + web_search_result["index"] = block["index"] + 1 + yield cast(types.WebSearchCall, web_search_call) + yield cast(types.WebSearchResult, web_search_result) + + elif block_type == "code_interpreter_call": + code_interpreter_call = { + "type": "code_interpreter_call", + "id": block["id"], + } + if "code" in block: + code_interpreter_call["code"] = block["code"] + if "container_id" in block: + code_interpreter_call["container_id"] = block["container_id"] + if "index" in block: + code_interpreter_call["index"] = block["index"] + + code_interpreter_result = { + "type": "code_interpreter_result", + "id": block["id"], + } + if "outputs" in block: + code_interpreter_result["outputs"] = block["outputs"] + for output in block["outputs"]: + if ( + isinstance(output, dict) + and (output_type := output.get("type")) + and output_type == "logs" + ): + if "output" not in code_interpreter_result: + code_interpreter_result["output"] = [] + code_interpreter_result["output"].append( + { + "type": "code_interpreter_output", + "stdout": output.get("logs", ""), + } + ) + + if "status" in block: + code_interpreter_result["status"] = block["status"] + if "index" in block: + code_interpreter_result["index"] = block["index"] + 1 + + yield cast(types.CodeInterpreterCall, code_interpreter_call) + yield cast(types.CodeInterpreterResult, code_interpreter_result) + + else: + new_block = {"type": "non_standard", "value": block} + if "index" in new_block["value"]: + new_block["index"] = new_block["value"].pop("index") + yield cast(types.NonStandardContentBlock, new_block) + + return list(_iter_blocks()) + + +def _convert_annotation_from_v1(annotation: types.Annotation) -> dict[str, Any]: + if annotation["type"] == "citation": + new_ann: dict[str, Any] = {} + for field in ("end_index", "start_index"): + if field in annotation: + new_ann[field] = annotation[field] + + if "url" in annotation: + # URL citation + if "title" in annotation: + new_ann["title"] = annotation["title"] + new_ann["type"] = "url_citation" + new_ann["url"] = annotation["url"] + else: + # Document citation + new_ann["type"] = "file_citation" + if "title" in annotation: + new_ann["filename"] = annotation["title"] + + if extra_fields := annotation.get("extras"): + for field, value in extra_fields.items(): + new_ann[field] = value + + return new_ann + + elif annotation["type"] == "non_standard_annotation": + return annotation["value"] + + else: + return dict(annotation) + + +def _implode_reasoning_blocks(blocks: list[dict[str, Any]]) -> Iterable[dict[str, Any]]: + i = 0 + n = len(blocks) + + while i < n: + block = blocks[i] + + # Skip non-reasoning blocks or blocks already in Responses format + if block.get("type") != "reasoning" or "summary" in block: + yield dict(block) + i += 1 + continue + elif "reasoning" not in block and "summary" not in block: + # {"type": "reasoning", "id": "rs_..."} + oai_format = {**block, "summary": []} + if "extras" in oai_format: + oai_format.update(oai_format.pop("extras")) + oai_format["type"] = oai_format.pop("type", "reasoning") + if "encrypted_content" in oai_format: + oai_format["encrypted_content"] = oai_format.pop("encrypted_content") + yield oai_format + i += 1 + continue + else: + pass + + summary: list[dict[str, str]] = [ + {"type": "summary_text", "text": block.get("reasoning", "")} + ] + # 'common' is every field except the exploded 'reasoning' + common = {k: v for k, v in block.items() if k != "reasoning"} + if "extras" in common: + common.update(common.pop("extras")) + + i += 1 + while i < n: + next_ = blocks[i] + if next_.get("type") == "reasoning" and "reasoning" in next_: + summary.append( + {"type": "summary_text", "text": next_.get("reasoning", "")} + ) + i += 1 + else: + break + + merged = dict(common) + merged["summary"] = summary + merged["type"] = merged.pop("type", "reasoning") + yield merged + + +def _consolidate_calls( + items: Iterable[dict[str, Any]], + call_name: Literal["web_search_call", "code_interpreter_call"], + result_name: Literal["web_search_result", "code_interpreter_result"], +) -> Iterator[dict[str, Any]]: + """ + Generator that walks through *items* and, whenever it meets the pair + + {"type": "web_search_call", "id": X, ...} + {"type": "web_search_result", "id": X} + + merges them into + + {"id": X, + "action": …, + "status": …, + "type": "web_search_call"} + + keeping every other element untouched. + """ + items = iter(items) # make sure we have a true iterator + for current in items: + # Only a call can start a pair worth collapsing + if current.get("type") != call_name: + yield current + continue + + try: + nxt = next(items) # look-ahead one element + except StopIteration: # no “result” – just yield the call back + yield current + break + + # If this really is the matching “result” – collapse + if nxt.get("type") == result_name and nxt.get("id") == current.get("id"): + if call_name == "web_search_call": + collapsed = {"id": current["id"]} + if "action" in current: + collapsed["action"] = current["action"] + collapsed["status"] = current["status"] + collapsed["type"] = "web_search_call" + + if call_name == "code_interpreter_call": + collapsed = {"id": current["id"]} + for key in ("code", "container_id"): + if key in current: + collapsed[key] = current[key] + + for key in ("outputs", "status"): + if key in nxt: + collapsed[key] = nxt[key] + collapsed["type"] = "code_interpreter_call" + + yield collapsed + + else: + # Not a matching pair – emit both, in original order + yield current + yield nxt + + +def _convert_from_v1_to_responses( + content: list[types.ContentBlock], tool_calls: list[types.ToolCall] +) -> list[dict[str, Any]]: + new_content: list = [] + for block in content: + if block["type"] == "text" and "annotations" in block: + # Need a copy because we’re changing the annotations list + new_block = dict(block) + new_block["annotations"] = [ + _convert_annotation_from_v1(a) for a in block["annotations"] + ] + new_content.append(new_block) + elif block["type"] == "tool_call": + new_block = {"type": "function_call", "call_id": block["id"]} + if "extras" in block and "item_id" in block["extras"]: + new_block["id"] = block["extras"]["item_id"] + if "name" in block: + new_block["name"] = block["name"] + if "extras" in block and "arguments" in block["extras"]: + new_block["arguments"] = block["extras"]["arguments"] + if any(key not in block for key in ("name", "arguments")): + matching_tool_calls = [ + call for call in tool_calls if call["id"] == block["id"] + ] + if matching_tool_calls: + tool_call = matching_tool_calls[0] + if "name" not in block: + new_block["name"] = tool_call["name"] + if "arguments" not in block: + new_block["arguments"] = json.dumps(tool_call["args"]) + new_content.append(new_block) + elif ( + is_data_content_block(cast(dict, block)) + and block["type"] == "image" + and "base64" in block + and isinstance(block.get("id"), str) + and block["id"].startswith("ig_") + ): + new_block = {"type": "image_generation_call", "result": block["base64"]} + for extra_key in ("id", "status"): + if extra_key in block: + new_block[extra_key] = block[extra_key] # type: ignore[typeddict-item] + new_content.append(new_block) + elif block["type"] == "non_standard" and "value" in block: + new_content.append(block["value"]) + else: + new_content.append(block) + + new_content = list(_implode_reasoning_blocks(new_content)) + new_content = list( + _consolidate_calls(new_content, "web_search_call", "web_search_result") + ) + new_content = list( + _consolidate_calls( + new_content, "code_interpreter_call", "code_interpreter_result" + ) + ) + + return new_content From c1d65a7d7f17775d1b5a63e78d9ed317c317659c Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 18:00:14 -0400 Subject: [PATCH 10/56] x --- .../langchain_openai/chat_models/_compat.py | 64 +++-- .../langchain_openai/chat_models/base.py | 143 ++++++++-- .../tests/unit_tests/chat_models/test_base.py | 259 +++++++++++++++++- .../chat_models/test_responses_stream.py | 128 +++++++-- 4 files changed, 529 insertions(+), 65 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 00f3b365c9b38..f3431f1651499 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -64,14 +64,12 @@ formats. The functions are used internally by ChatOpenAI. """ # noqa: E501 -import copy import json from collections.abc import Iterable, Iterator from typing import Any, Literal, Optional, Union, cast -from langchain_core.messages import AIMessage, is_data_content_block +from langchain_core.messages import AIMessage, AIMessageChunk, is_data_content_block from langchain_core.messages import content_blocks as types -from langchain_core.v1.messages import AIMessage as AIMessageV1 _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" @@ -264,21 +262,51 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: # v1 / Chat Completions -def _convert_from_v1_to_chat_completions(message: AIMessageV1) -> AIMessageV1: - """Convert a v1 message to the Chat Completions format.""" - new_content: list[types.ContentBlock] = [] - for block in message.content: - if block["type"] == "text": - # Strip annotations - new_content.append({"type": "text", "text": block["text"]}) - elif block["type"] in ("reasoning", "tool_call"): - pass +def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: + """Mutate a Chat Completions message to v1 format.""" + if isinstance(message.content, str): + if message.content: + message.content = [{"type": "text", "text": message.content}] else: - new_content.append(block) - new_message = copy.copy(message) - new_message.content = new_content + message.content = [] + + for tool_call in message.tool_calls: + if id_ := tool_call.get("id"): + message.content.append({"type": "tool_call", "id": id_}) + + if "tool_calls" in message.additional_kwargs: + _ = message.additional_kwargs.pop("tool_calls") + + if "token_usage" in message.response_metadata: + _ = message.response_metadata.pop("token_usage") + + return message + + +def _convert_to_v1_from_chat_completions_chunk(chunk: AIMessageChunk) -> AIMessageChunk: + result = _convert_to_v1_from_chat_completions(cast(AIMessage, chunk)) + return cast(AIMessageChunk, result) - return new_message + +def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: + """Convert a v1 message to the Chat Completions format.""" + if isinstance(message.content, list): + new_content: list = [] + for block in message.content: + if isinstance(block, dict): + block_type = block.get("type") + if block_type == "text": + # Strip annotations + new_content.append({"type": "text", "text": block["text"]}) + elif block_type in ("reasoning", "tool_call"): + pass + else: + new_content.append(block) + else: + new_content.append(block) + return message.model_copy(update={"content": new_content}) + + return message # v1 / Responses @@ -408,7 +436,9 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: yield cast(types.ImageContentBlock, new_block) elif block_type == "function_call": - tool_call_block: Optional[types.ContentBlock] = None + tool_call_block: Optional[ + Union[types.ToolCall, types.InvalidToolCall] + ] = None call_id = block.get("call_id", "") if call_id: for tool_call in tool_calls or []: diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index a8702359b36e6..947073b0e6a29 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -108,7 +108,12 @@ ) from langchain_openai.chat_models._compat import ( _convert_from_v03_ai_message, + _convert_from_v1_to_chat_completions, + _convert_from_v1_to_responses, _convert_to_v03_ai_message, + _convert_to_v1_from_chat_completions, + _convert_to_v1_from_chat_completions_chunk, + _convert_to_v1_from_responses, ) if TYPE_CHECKING: @@ -681,7 +686,7 @@ class BaseChatOpenAI(BaseChatModel): .. versionadded:: 0.3.9 """ - output_version: Literal["v0", "responses/v1"] = "v0" + output_version: str = "v0" """Version of AIMessage output format to use. This field is used to roll-out new output formats for chat model AIMessages @@ -692,8 +697,9 @@ class BaseChatOpenAI(BaseChatModel): - ``'v0'``: AIMessage format as of langchain-openai 0.3.x. - ``'responses/v1'``: Formats Responses API output items into AIMessage content blocks. + - ``"v1"``: v1 of LangChain cross-provider standard. - Currently only impacts the Responses API. ``output_version='responses/v1'`` is + Currently only impacts the Responses API. ``output_version='v1'`` is recommended. .. versionadded:: 0.3.25 @@ -896,6 +902,10 @@ def _convert_chunk_to_generation_chunk( message=default_chunk_class(content="", usage_metadata=usage_metadata), generation_info=base_generation_info, ) + if self.output_version == "v1": + generation_chunk.message = _convert_to_v1_from_chat_completions_chunk( + cast(AIMessageChunk, generation_chunk.message) + ) return generation_chunk choice = choices[0] @@ -923,6 +933,20 @@ def _convert_chunk_to_generation_chunk( if usage_metadata and isinstance(message_chunk, AIMessageChunk): message_chunk.usage_metadata = usage_metadata + if self.output_version == "v1": + message_chunk = cast(AIMessageChunk, message_chunk) + # Convert to v1 format + if isinstance(message_chunk.content, str): + message_chunk = _convert_to_v1_from_chat_completions_chunk( + message_chunk + ) + if message_chunk.content: + message_chunk.content[0]["index"] = 0 # type: ignore[index] + else: + message_chunk = _convert_to_v1_from_chat_completions_chunk( + message_chunk + ) + generation_chunk = ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None ) @@ -1216,7 +1240,12 @@ def _get_request_payload( else: payload = _construct_responses_api_payload(messages, payload) else: - payload["messages"] = [_convert_message_to_dict(m) for m in messages] + payload["messages"] = [ + _convert_message_to_dict(_convert_from_v1_to_chat_completions(m)) + if isinstance(m, AIMessage) + else _convert_message_to_dict(m) + for m in messages + ] return payload def _create_chat_result( @@ -1282,6 +1311,12 @@ def _create_chat_result( if hasattr(message, "refusal"): generations[0].message.additional_kwargs["refusal"] = message.refusal + if self.output_version == "v1": + _ = llm_output.pop("token_usage", None) + generations[0].message = _convert_to_v1_from_chat_completions( + cast(AIMessage, generations[0].message) + ) + return ChatResult(generations=generations, llm_output=llm_output) async def _astream( @@ -3660,6 +3695,10 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: if isinstance(lc_msg, AIMessage): lc_msg = _convert_from_v03_ai_message(lc_msg) msg = _convert_message_to_dict(lc_msg) + if isinstance(lc_msg, AIMessage): + msg["content"] = _convert_from_v1_to_responses( + msg["content"], lc_msg.tool_calls + ) # "name" parameter unsupported if "name" in msg: msg.pop("name") @@ -3821,7 +3860,7 @@ def _construct_lc_result_from_responses_api( response: Response, schema: Optional[type[_BM]] = None, metadata: Optional[dict] = None, - output_version: Literal["v0", "responses/v1"] = "v0", + output_version: str = "v0", ) -> ChatResult: """Construct ChatResponse from OpenAI Response API response.""" if response.error: @@ -3959,6 +3998,30 @@ def _construct_lc_result_from_responses_api( additional_kwargs["parsed"] = parsed except json.JSONDecodeError: pass + + if output_version == "v1": + content_blocks = _convert_to_v1_from_responses(content_blocks) + + if response.tools and any( + tool.type == "image_generation" for tool in response.tools + ): + # Get mime_time from tool definition and add to image generations + # if missing (primarily for tracing purposes). + image_generation_call = next( + tool for tool in response.tools if tool.type == "image_generation" + ) + if image_generation_call.output_format: + mime_type = f"image/{image_generation_call.output_format}" + for content_block in content_blocks: + # OK to mutate output message + if ( + isinstance(content_block, dict) + and content_block.get("type") == "image" + and "base64" in content_block + and "mime_type" not in block + ): + block["mime_type"] = mime_type + message = AIMessage( content=content_blocks, id=response.id, @@ -3983,7 +4046,7 @@ def _convert_responses_chunk_to_generation_chunk( schema: Optional[type[_BM]] = None, metadata: Optional[dict] = None, has_reasoning: bool = False, - output_version: Literal["v0", "responses/v1"] = "v0", + output_version: str = "v0", ) -> tuple[int, int, int, Optional[ChatGenerationChunk]]: def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: """Advance indexes tracked during streaming. @@ -4049,9 +4112,29 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: annotation = chunk.annotation else: annotation = chunk.annotation.model_dump(exclude_none=True, mode="json") - content.append({"annotations": [annotation], "index": current_index}) + if output_version == "v1": + content.append( + { + "type": "text", + "text": "", + "annotations": [annotation], + "index": current_index, + } + ) + else: + content.append({"annotations": [annotation], "index": current_index}) elif chunk.type == "response.output_text.done": - content.append({"id": chunk.item_id, "index": current_index}) + if output_version == "v1": + content.append( + { + "type": "text", + "text": "", + "id": chunk.item_id, + "index": current_index, + } + ) + else: + content.append({"id": chunk.item_id, "index": current_index}) elif chunk.type == "response.created": id = chunk.response.id response_metadata["id"] = chunk.response.id # Backwards compatibility @@ -4144,21 +4227,35 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: content.append({"type": "refusal", "refusal": chunk.refusal}) elif chunk.type == "response.output_item.added" and chunk.item.type == "reasoning": _advance(chunk.output_index) + current_sub_index = 0 reasoning = chunk.item.model_dump(exclude_none=True, mode="json") reasoning["index"] = current_index content.append(reasoning) elif chunk.type == "response.reasoning_summary_part.added": - _advance(chunk.output_index) - content.append( - { - # langchain-core uses the `index` key to aggregate text blocks. - "summary": [ - {"index": chunk.summary_index, "type": "summary_text", "text": ""} - ], - "index": current_index, - "type": "reasoning", - } - ) + if output_version in ("v0", "responses/v1"): + _advance(chunk.output_index) + content.append( + { + # langchain-core uses the `index` key to aggregate text blocks. + "summary": [ + { + "index": chunk.summary_index, + "type": "summary_text", + "text": "", + } + ], + "index": current_index, + "type": "reasoning", + } + ) + else: + # v1 + block: dict = {"type": "reasoning", "reasoning": ""} + if chunk.summary_index > 0: + _advance(chunk.output_index, chunk.summary_index) + block["id"] = chunk.item_id + block["index"] = current_index + content.append(block) elif chunk.type == "response.image_generation_call.partial_image": # Partial images are not supported yet. pass @@ -4180,6 +4277,16 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: else: return current_index, current_output_index, current_sub_index, None + if output_version == "v1": + content = cast(AIMessageChunk, _convert_to_v1_from_responses(content)) + for content_block in content: + if ( + isinstance(content_block, dict) + and content_block.get("index", -1) > current_index + ): + # blocks were added for v1 + current_index = content_block["index"] + message = AIMessageChunk( content=content, # type: ignore[arg-type] tool_call_chunks=tool_call_chunks, diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 731857906020f..6713f8c967b7b 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -20,11 +20,13 @@ ToolCall, ToolMessage, ) +from langchain_core.messages import content_blocks as types from langchain_core.messages.ai import UsageMetadata from langchain_core.outputs import ChatGeneration, ChatResult from langchain_core.runnables import RunnableLambda from langchain_core.tracers.base import BaseTracer from langchain_core.tracers.schemas import Run +from langchain_core.v1.messages import AIMessage as AIMessageV1 from openai.types.responses import ResponseOutputMessage, ResponseReasoningItem from openai.types.responses.response import IncompleteDetails, Response, ResponseUsage from openai.types.responses.response_error import ResponseError @@ -51,7 +53,10 @@ from langchain_openai.chat_models._compat import ( _FUNCTION_CALL_IDS_MAP_KEY, _convert_from_v03_ai_message, + _convert_from_v1_to_chat_completions, + _convert_from_v1_to_responses, _convert_to_v03_ai_message, + _convert_to_v1_from_responses, ) from langchain_openai.chat_models.base import ( _construct_lc_result_from_responses_api, @@ -2373,7 +2378,7 @@ def mock_create(*args: Any, **kwargs: Any) -> Response: assert payload["tools"][0]["headers"]["Authorization"] == "Bearer PLACEHOLDER" -def test_compat() -> None: +def test_compat_responses_v03() -> None: # Check compatibility with v0.3 message format message_v03 = AIMessage( content=[ @@ -2434,6 +2439,258 @@ def test_compat() -> None: assert message_v03_output is not message_v03 +@pytest.mark.parametrize( + "message_v1, expected", + [ + ( + AIMessageV1( + [ + {"type": "reasoning", "reasoning": "Reasoning text"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "text", + "text": "Hello, world!", + "annotations": [ + {"type": "citation", "url": "https://example.com"} + ], + }, + ], + id="chatcmpl-123", + response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, + ), + AIMessageV1( + [{"type": "text", "text": "Hello, world!"}], + id="chatcmpl-123", + response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, + ), + ) + ], +) +def test_convert_from_v1_to_chat_completions( + message_v1: AIMessageV1, expected: AIMessageV1 +) -> None: + result = _convert_from_v1_to_chat_completions(message_v1) + assert result == expected + assert result.tool_calls == message_v1.tool_calls # tool calls remain cached + + # Check no mutation + assert message_v1 != result + + +@pytest.mark.parametrize( + "message_v1, expected", + [ + ( + AIMessageV1( + [ + {"type": "reasoning", "id": "abc123"}, + {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, + {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + "extras": {"item_id": "fc_123"}, + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "citation", "url": "https://example.com"}, + { + "type": "citation", + "title": "my doc", + "extras": {"file_id": "file_123", "index": 1}, + }, + { + "type": "non_standard_annotation", + "value": {"bar": "baz"}, + }, + ], + }, + {"type": "image", "base64": "...", "id": "ig_123"}, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ], + id="resp123", + ), + [ + {"type": "reasoning", "id": "abc123", "summary": []}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo "}, + {"type": "summary_text", "text": "bar"}, + ], + }, + { + "type": "function_call", + "call_id": "call_123", + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + { + "type": "function_call", + "call_id": "call_234", + "name": "get_weather_2", + "arguments": '{"location": "New York"}', + "id": "fc_123", + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "url_citation", "url": "https://example.com"}, + { + "type": "file_citation", + "filename": "my doc", + "index": 1, + "file_id": "file_123", + }, + {"bar": "baz"}, + ], + }, + {"type": "image_generation_call", "id": "ig_123", "result": "..."}, + {"type": "something_else", "foo": "bar"}, + ], + ) + ], +) +def test_convert_from_v1_to_responses( + message_v1: AIMessageV1, expected: AIMessageV1 +) -> None: + result = _convert_from_v1_to_responses(message_v1.content, message_v1.tool_calls) + assert result == expected + + # Check no mutation + assert message_v1 != result + + +@pytest.mark.parametrize( + "responses_content, tool_calls, expected_content", + [ + ( + [ + {"type": "reasoning", "id": "abc123", "summary": []}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo "}, + {"type": "summary_text", "text": "bar"}, + ], + }, + { + "type": "function_call", + "call_id": "call_123", + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + { + "type": "function_call", + "call_id": "call_234", + "name": "get_weather_2", + "arguments": '{"location": "New York"}', + "id": "fc_123", + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "url_citation", "url": "https://example.com"}, + { + "type": "file_citation", + "filename": "my doc", + "index": 1, + "file_id": "file_123", + }, + {"bar": "baz"}, + ], + }, + {"type": "image_generation_call", "id": "ig_123", "result": "..."}, + {"type": "something_else", "foo": "bar"}, + ], + [ + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + }, + ], + [ + {"type": "reasoning", "id": "abc123"}, + {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, + {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + "extras": {"item_id": "fc_123"}, + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "citation", "url": "https://example.com"}, + { + "type": "citation", + "title": "my doc", + "extras": {"file_id": "file_123", "index": 1}, + }, + {"type": "non_standard_annotation", "value": {"bar": "baz"}}, + ], + }, + {"type": "image", "base64": "...", "id": "ig_123"}, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ], + ) + ], +) +def test_convert_to_v1_from_responses( + responses_content: list[dict[str, Any]], + tool_calls: list[ToolCall], + expected_content: list[types.ContentBlock], +) -> None: + result = _convert_to_v1_from_responses(responses_content, tool_calls) + assert result == expected_content + + def test_get_last_messages() -> None: messages: list[BaseMessage] = [HumanMessage("Hello")] last_messages, previous_response_id = _get_last_messages(messages) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py index eca5ee1c2559a..49c88ab4aadeb 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py @@ -1,6 +1,7 @@ from typing import Any, Optional from unittest.mock import MagicMock, patch +import pytest from langchain_core.messages import AIMessageChunk, BaseMessageChunk from openai.types.responses import ( ResponseCompletedEvent, @@ -337,7 +338,7 @@ id="rs_234", summary=[], type="reasoning", - encrypted_content=None, + encrypted_content="encrypted-content", status=None, ), output_index=2, @@ -416,7 +417,7 @@ Summary(text="still more reasoning", type="summary_text"), ], type="reasoning", - encrypted_content=None, + encrypted_content="encrypted-content", status=None, ), output_index=2, @@ -562,7 +563,7 @@ Summary(text="still more reasoning", type="summary_text"), ], type="reasoning", - encrypted_content=None, + encrypted_content="encrypted-content", status=None, ), ResponseOutputMessage( @@ -620,8 +621,99 @@ def _strip_none(obj: Any) -> Any: return obj -def test_responses_stream() -> None: - llm = ChatOpenAI(model="o4-mini", output_version="responses/v1") +@pytest.mark.parametrize( + "output_version, expected_content", + [ + ( + "responses/v1", + [ + { + "id": "rs_123", + "summary": [ + { + "index": 0, + "type": "summary_text", + "text": "reasoning block one", + }, + { + "index": 1, + "type": "summary_text", + "text": "another reasoning block", + }, + ], + "type": "reasoning", + "index": 0, + }, + {"type": "text", "text": "text block one", "index": 1, "id": "msg_123"}, + { + "type": "text", + "text": "another text block", + "index": 2, + "id": "msg_123", + }, + { + "id": "rs_234", + "summary": [ + {"index": 0, "type": "summary_text", "text": "more reasoning"}, + { + "index": 1, + "type": "summary_text", + "text": "still more reasoning", + }, + ], + "encrypted_content": "encrypted-content", + "type": "reasoning", + "index": 3, + }, + {"type": "text", "text": "more", "index": 4, "id": "msg_234"}, + {"type": "text", "text": "text", "index": 5, "id": "msg_234"}, + ], + ), + ( + "v1", + [ + { + "type": "reasoning", + "reasoning": "reasoning block one", + "id": "rs_123", + "index": 0, + }, + { + "type": "reasoning", + "reasoning": "another reasoning block", + "id": "rs_123", + "index": 1, + }, + {"type": "text", "text": "text block one", "index": 2, "id": "msg_123"}, + { + "type": "text", + "text": "another text block", + "index": 3, + "id": "msg_123", + }, + { + "type": "reasoning", + "reasoning": "more reasoning", + "id": "rs_234", + "extras": {"encrypted_content": "encrypted-content"}, + "index": 4, + }, + { + "type": "reasoning", + "reasoning": "still more reasoning", + "id": "rs_234", + "index": 5, + }, + {"type": "text", "text": "more", "index": 6, "id": "msg_234"}, + {"type": "text", "text": "text", "index": 7, "id": "msg_234"}, + ], + ), + ], +) +def test_responses_stream(output_version: str, expected_content: list[dict]) -> None: + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version=output_version + ) mock_client = MagicMock() def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager: @@ -630,36 +722,14 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager: mock_client.responses.create = mock_create full: Optional[BaseMessageChunk] = None + chunks = [] with patch.object(llm, "root_client", mock_client): for chunk in llm.stream("test"): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk + chunks.append(chunk) assert isinstance(full, AIMessageChunk) - expected_content = [ - { - "id": "rs_123", - "summary": [ - {"index": 0, "type": "summary_text", "text": "reasoning block one"}, - {"index": 1, "type": "summary_text", "text": "another reasoning block"}, - ], - "type": "reasoning", - "index": 0, - }, - {"type": "text", "text": "text block one", "index": 1, "id": "msg_123"}, - {"type": "text", "text": "another text block", "index": 2, "id": "msg_123"}, - { - "id": "rs_234", - "summary": [ - {"index": 0, "type": "summary_text", "text": "more reasoning"}, - {"index": 1, "type": "summary_text", "text": "still more reasoning"}, - ], - "type": "reasoning", - "index": 3, - }, - {"type": "text", "text": "more", "index": 4, "id": "msg_234"}, - {"type": "text", "text": "text", "index": 5, "id": "msg_234"}, - ] assert full.content == expected_content assert full.additional_kwargs == {} assert full.id == "resp_123" From c0e4361192a07f6f349707d2acf3b809acc71ff9 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 18:03:19 -0400 Subject: [PATCH 11/56] core: populate tool_calls when initializing AIMessage via content_blocks --- libs/core/langchain_core/messages/ai.py | 11 ++++++++++- .../core/tests/unit_tests/messages/test_ai.py | 19 ++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 9d4a1e01879df..b38a7fa1a4924 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -203,6 +203,13 @@ def __init__( ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" if content_blocks is not None: + # If there are tool calls in content_blocks, but not in tool_calls, add them + content_tool_calls = [ + block for block in content_blocks if block.get("type") == "tool_call" + ] + if content_tool_calls and "tool_calls" not in kwargs: + kwargs["tool_calls"] = content_tool_calls + super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), **kwargs, @@ -273,7 +280,9 @@ def _backwards_compat_tool_calls(cls, values: dict) -> Any: # Ensure "type" is properly set on all tool call-like dicts. if tool_calls := values.get("tool_calls"): values["tool_calls"] = [ - create_tool_call(**{k: v for k, v in tc.items() if k != "type"}) + create_tool_call( + **{k: v for k, v in tc.items() if k not in ("type", "extras")} + ) for tc in tool_calls ] if invalid_tool_calls := values.get("invalid_tool_calls"): diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index 81981725c5005..a7225015c2cff 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -253,7 +253,7 @@ def test_content_blocks() -> None: "id": "abc_123", }, ] - missing_tool_call = { + missing_tool_call: types.ToolCall = { "type": "tool_call", "name": "bar", "args": {"c": "d"}, @@ -267,3 +267,20 @@ def test_content_blocks() -> None: ], ) assert message.content_blocks == [*standard_content, missing_tool_call] + + # Check we auto-populate tool_calls + standard_content = [ + {"type": "text", "text": "foo"}, + { + "type": "tool_call", + "name": "foo", + "args": {"a": "b"}, + "id": "abc_123", + }, + missing_tool_call, + ] + message = AIMessage(content_blocks=standard_content) + assert message.tool_calls == [ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"}, + missing_tool_call, + ] From 5c961ca4f683dcc71a259bc17304f88fb8f0b915 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 18:10:20 -0400 Subject: [PATCH 12/56] update test_base --- .../openai/langchain_openai/chat_models/base.py | 4 ++-- .../tests/unit_tests/chat_models/test_base.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 947073b0e6a29..552d45e41eec3 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -3695,7 +3695,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: if isinstance(lc_msg, AIMessage): lc_msg = _convert_from_v03_ai_message(lc_msg) msg = _convert_message_to_dict(lc_msg) - if isinstance(lc_msg, AIMessage): + if isinstance(lc_msg, AIMessage) and isinstance(msg.get("content"), list): msg["content"] = _convert_from_v1_to_responses( msg["content"], lc_msg.tool_calls ) @@ -4278,7 +4278,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: return current_index, current_output_index, current_sub_index, None if output_version == "v1": - content = cast(AIMessageChunk, _convert_to_v1_from_responses(content)) + content = cast(list[dict], _convert_to_v1_from_responses(content)) for content_block in content: if ( isinstance(content_block, dict) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 6713f8c967b7b..ee89dc47fb8a7 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -26,7 +26,6 @@ from langchain_core.runnables import RunnableLambda from langchain_core.tracers.base import BaseTracer from langchain_core.tracers.schemas import Run -from langchain_core.v1.messages import AIMessage as AIMessageV1 from openai.types.responses import ResponseOutputMessage, ResponseReasoningItem from openai.types.responses.response import IncompleteDetails, Response, ResponseUsage from openai.types.responses.response_error import ResponseError @@ -2443,7 +2442,7 @@ def test_compat_responses_v03() -> None: "message_v1, expected", [ ( - AIMessageV1( + AIMessage( [ {"type": "reasoning", "reasoning": "Reasoning text"}, { @@ -2463,7 +2462,7 @@ def test_compat_responses_v03() -> None: id="chatcmpl-123", response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, ), - AIMessageV1( + AIMessage( [{"type": "text", "text": "Hello, world!"}], id="chatcmpl-123", response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, @@ -2472,7 +2471,7 @@ def test_compat_responses_v03() -> None: ], ) def test_convert_from_v1_to_chat_completions( - message_v1: AIMessageV1, expected: AIMessageV1 + message_v1: AIMessage, expected: AIMessage ) -> None: result = _convert_from_v1_to_chat_completions(message_v1) assert result == expected @@ -2486,8 +2485,8 @@ def test_convert_from_v1_to_chat_completions( "message_v1, expected", [ ( - AIMessageV1( - [ + AIMessage( + content_blocks=[ {"type": "reasoning", "id": "abc123"}, {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, @@ -2574,9 +2573,11 @@ def test_convert_from_v1_to_chat_completions( ], ) def test_convert_from_v1_to_responses( - message_v1: AIMessageV1, expected: AIMessageV1 + message_v1: AIMessage, expected: list[dict[str, Any]] ) -> None: - result = _convert_from_v1_to_responses(message_v1.content, message_v1.tool_calls) + result = _convert_from_v1_to_responses( + message_v1.content_blocks, message_v1.tool_calls + ) assert result == expected # Check no mutation From 0c7294f608df054081852ae74db90b1c140cb94c Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 10:08:37 -0400 Subject: [PATCH 13/56] openai: pull in responses api integration tests from 0.4 branch --- .../cassettes/test_function_calling.yaml.gz | Bin 0 -> 7912 bytes .../test_parsed_pydantic_schema.yaml.gz | Bin 0 -> 4616 bytes .../tests/cassettes/test_web_search.yaml.gz | Bin 24336 -> 27998 bytes .../chat_models/test_responses_api.py | 628 +++++++++++++++--- 4 files changed, 543 insertions(+), 85 deletions(-) create mode 100644 libs/partners/openai/tests/cassettes/test_function_calling.yaml.gz create mode 100644 libs/partners/openai/tests/cassettes/test_parsed_pydantic_schema.yaml.gz diff --git a/libs/partners/openai/tests/cassettes/test_function_calling.yaml.gz b/libs/partners/openai/tests/cassettes/test_function_calling.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..197a8402cf6ebaf57e31e7d5600363e77502e1c7 GIT binary patch literal 7912 zcmV%O&o{ibqo`^HHrJ|_H%KY-n%&O|Q+0X54y;=Y99}mCSOZWK4 zFaPS$}$p*&v(R;3~I$HQG;7gu()2>Q)l;&ywXsSCj% zvV6O%hWJddrP|s#rZ0B8;huFIS)v`O!c?zq(+=@gJ8EyzFGbPr4p&a+P^HiD*2Jvz zRt`S1rN>|p^p2J>6rJeNn>Fh{(^ z4B~Y#!?Yn1VP-T!n#okJ3YPAMPSC5NqiXuxuoAVO^>H)Ny93P5S%8FaP-U*WpvE-W)%*e*N_yr3MeP*jc(k?A>a)-T2G5 zAenB0WqhNZvy9`J^KqliEZ#hXv)J4CcQo7_SF;<<{N?)AEVt{~+r!jDM>iTB9@d*_ z5Qnq%`eFL!`1dwlA7kgvw&C=qh|5iv>pL5+rp^LWd)Q&oZwtNr)Z1+Hc6@M_+q*^b zhNeL@`&{b}M-|PWRqh|Z{5cqn@#pp}_~X3MzYRXc!=tND*0A!OEgc9c>@p%)v9>iU zDcrCQ@$lBv#nh?|ZS8i#y`AFD!ZOsk6NP(xwzC5o6%4tn9qpj;C^whhY$W4F4c{Sr zGlpSm3~`D@u=<_T(T;dHB$S*Hl$_yD0X>*zykO52NE)UH3VFp$76vuyg^r=EoJd>H z)a2L;7_f!(UEcA|j8>spd123#r=NBxX@OBuL%`~M+OF-Lefj7ATYPes z%SG@vfc*_%_X1c*0QTa!Qah15PoC4MB~xR#a-)f7M_R=>HWIf}JI+Rp2*9>U0jxU& znzgHc9tk^oJ3F;l(McHyv5bu^4MUq=5V?D|DG2sa+n!RQ;|1)xE2 zjow!7dcf!gV{J|cv<8R?F&r~s1BBG}AZB<@%gC9>z%0d^5~Af`HX>5>K3k*j@$!SK zhnVgQX2sZD<^%wh22DBdxE4$!ZSFSd!9#ia6-)2ELbkwYb!k)@V0RYiq65z&*8T>% zzk%-m9niha)~jW_o)tkQ>ukMwBaJ@CKOo?K>B-IBiozk8g-7b%NX$nydXdz~Ow2?P z1}*Pkj(a}n(xaJxIL#eFork`fcn77mNR;tQqPAoy>rxqOfuhS51sK>2>Rw^L#Vc&7 z3RvUctAu#~hv6##7j|7gsLAB0wCa1_<@_b-*V#ycxov=!Ld^)THwZTgtQG*TXSs>b zqV{|$xriRMEt^#*i)qv6!>mgO=d>r_R%v(H8nZcAOhf|m-W9bvXzcTR{0b z8+_qAn!VWRzHHRakBzN|+v!MvykrpPdZj;pdgQ`@5Co^I3G~XWOP#HF-J4}Uso7L# zVh^lHeO2e4WVKAmn!kuxRjhv7SEAlxFs@7LSPPZ0C#cd(MIGnMGZ-rewWSEk+&rkd zd@$!ul8qpJMUL3@5=F13lWk^Nz}ohbtjiBpIc*-bk5)z7K&AqdeY5ww(d<1M5Rf`L zIC301n7EL2`8qmz(;TjKZAjsQWCI=a?atv3$hpPA z^&^77Yq4xum*nKxYId~9q)GEtj8(zLNNEHJ+C(dxIF@ch;gZSnKTKGM{=Ckf;iE*f zH0zQcHh$+LqY-0d0-q4q*PI$OdxB;7g6yT=y}saQS6F9VrX%8p1ww{xAP;& zCYYzX=!r!v40D1^Tbb;$-`BB$ULno}QZAta>~xbYdvk13@-_t?5HH5W^a*oIW<(@j1H6{&Oi-zjmYIh z#fbvIv6A1OO4^oL8@a$-xzfmz8)Ul%l-Yzp?^9M$UKWf7C>2_H+jJJ^%9X=LO?116 zT~C`I2Dwao7tF%|eSl|Ou}HGO*+SKO#KaQ#=9L|}Cxk`G-d@-~$#v-4r>{@c@q#&z z)8$D6;0xYEf*4{6p4ez!Use}ETeJNztebyaW6;eaxO(Sn7A=h_TDsPtHQikQVl;aL z6MyHc%|kPdudaPPew)VYSA-MSY8Fp}hvl0$jd?_SquuIl=^$po+#aI!t%f|9xz>cj5eI=RiEFid z!yIQ0qM6O{kL#h*8k4J>15;Pu@b}KutK7p-dXH!J zz^>taL8qgXmEQ_NGxVg_5C;*>f{{~1jc!x#41Qo5{wt6E%1(-RaV_ZNL&!Z$TAI01 zmDxZ5N3|*J)uRB9!{nHHBg&+T0xz7bZ9Gq}$wygwTVBk$73JhJDtRWS$UYL8vLb{W zLaAxu>LLWdD#GkDmT!aya-(o=X*Eh@k~azvx(GH))uv%fu{?~{QBBy=Maq5B3Ll9u zDWk`IpNAck3yrLugFxRsD^6qS9w}p)2cKCE7ZKmIQ(Ku5OJMr&&O6ez`|`V`JnqMt z-1qA8*D}00Kf9jj{Ys9vH1TVx-dkDTewku3EzwvVo|7s5$h(zzwxYZOHiy!%ekd!teh26M(`FL`W)m6qCpohC~t81OA=Qj9XIy>h(F z2TuyoUI6HPn)s_B)|%c5@&vJOExj_5pMhMC{5+-jjsv)YjS20>0K+7sgrSN7%PY*E z)*OmCY!D(dV`GVrwA=O%Y0$D3>fDOdjx|@uK>BeQnsaxqCcT9@Hy;OMYcXia2Q{fn z$~+9r@q%Y52d$xRj@7xDu#x1EANMglLwS*%gfmgcGSD9Y*m_~m!5sx9H;$>5P*{f* zq^>9xc9E5|fQ=9ex`z=)wPOjr0B~wPaig)O+Vcrw2x~~D#wy(rqHBw(p=~mpyhxf{ z?5!XhZxAX!PIb6^i~_HiPmN)cp2B zjd`6rhdsJx`BRyB$LbvVS!{ZtEM;#|&SDf9jC_e-s?}F4sS}gr`T<^ z#Tg!-_&L#w-}m1aqeo!lL@vTF(p!a>vxxd*B|i-8KI$&t-9>o$bi?f{Y$ZXsU+-By zzC0hSV%f)ES01>yE|H$x#%JO_0I)qb;g+bjGzc0cN>D90d05MeNaZ>dx(3dgFDu5S ziaK5yxIW+AD5bO%EpvlB3z{hu1rmj}#udq$mjY!`)q8Ualqo{C#_k+Tg|5L%^(QH} zo?&z&P|oe$hRkR)E_uRCOK4V)WuX-l|FZ1Pg?3I2z974Ep=DjFqp!>ETxh(@&&iGo zl>vv`bFAg=thD<&qfxJfDjZJwnJRw_!IoGk(TCmLfMkj2D#16!>Ur?IJgXy5L^GmY z1W#3xnF^tp`Vcm(h341_t&X`@=W^AIo~n38F!iUzl$4~Dfywh2Qb=e*C9FN;LFwxL zCeJ7D%I;ifyvuLM?p$a&HTab5DrMQ73yrJ#H)MA%v|@QqWfvSA8R7a@oJH=(Q;`2F zoW-f{x#29lyPXAJo;zIrz0RVf>a$Vr%Z|1zw47~x&(W5JmZSQ)>=vJLv}K`jaQmjC zHCVPabr!`ag}=&K6fF4oS2+tFq)>KxT>rBC%7V*R{PXfto7a_&kMfhccT4H`@_bVF zt|WC19PqL(ui@TT0goHx72LZ3yjZx)S>&U<>KQ3*_i3=>zUM-AwXJo7q$p$gU4`QY z`GxrAJ%!6@7T3G8T~_hNkz=0-i`Z}PgPFERyJ77HW9GSy{4GSaM%4vHz4ye6r-34q z{Yi*_UEh*h@#3{WG0zNsEnd79D86Ss7xALH) zC&j|kNxrM@FYA+1T$cDPIoD&^4{ubd2?s6v9l=($xip&R+`d;KEtf`1j^aBN(sF5(`|;0KNEscdo^RDp)Yen6BjRk= zpO_+a2F<@;-NH9-)cRNU;uY?Uw(JELdo|9xj%gPPJSc`>i5K0oy?6z`EPbtr16G7d z%f7eky~}_8oA1B-x7%R%e`5FC|I!Yz56%*a=G+SDEYV;rS5;k|tGZG(lc&l+HxcZ; zByjy;Z1tm6oGh`%YG8sf{IzF^#J|rH{p-&Xxs`HQ4<<2R>?y2QAX8N`1*PTf)lP4r zj{Q)b=gUKz_LNpHR9cNtkwBay<=wMHXGh&R3A{L9QKslrpXK}-BQ5WnH?t+UJj#&! z?^2iPTTf0e(dZD08reShxUqAjP?#E4h@}kOisFqW&?t%Goccj&)=AFE#c7E2%+P!1 z3AlW1Lyb$;^+=XvCd|4Xi*p9x){3C&+8m)$K!^0oMs{IcXOkr-;Z#!Zc;%cyiEDf3 z5p4OO6=N50`{(C2@)J)Nhg3=0J{eq`Jn`+Qx+6nAx1RQSxAkiWR8C(PYe%+2 z&lqyLaP>4qzGEM-P^#(3tx2L>oQ)}j<;0g6mY|*t#>)C?_c0h4;De29;SMH21C(C& zMzHtUsQ%bb6-boZ2QddX;9axf$crb>j4-ke*ut>^j07!hHg;Cf0Num{iSA2a82IT> zxih#Nb#eX$6sfmPm%|sZ9LXT~v2!bKr5`bZQk_b*goXE`T6QM>2OC*Gadv|yeovG= zg5^ASaTL4bP~OnSo&ID6?!DLynnL=>4( zUFd5p(7{1vRg>j^Q zg(VlN{e+EdryhSi;`c;J>Vn#u@UXK#AvG6C%{Qr<3yS$W0FBn4JtNC<>87xkVsK{pqr zngy2F;iFW7yJ6!%X|ij}LOKf_U6*sc&7LvEsN}9ol@QN~!g$(!@!XETa*mCPrE6~> zjZ3@Y;IkhdUbl0a1RDp;CY(?8_7?4UmouGEW$%IUf@(E3EM3$Tyh-Welrg(F(eyNjJA)pk}>Og}f6Leb4} z8;h=C8HvT&OYIHJ_K_(^sysVVh_7hEiH%<>#8)(-if;5d_*d-9o??7F|l-goszHArJD40 z7DH~P9m=!U$lh0`bO;?RS3b%g2W1Y&i9m1Aa^PIWWfDR^O3N=6`2cJ8Uwx_ZN zV)x&SPSxRlJXro5({65Eg$x@_Iwml!43|`Lvm_rYzx+wf?LhFJgVhUY)f*k6v!%xv ziuN=$D4thCC{~e*ZwstK-&+2B4jLoN$U=Tr6lnM$&cd`a@ie`BHQ;Jb`s&ydv0$Qlsud1n_V8_ zvqGu+-1%NeN>NAF7ORpTj%i1O`~(Q;?7d;8EG&P-R^P>H4>5zr z#L{!hI!*O)Ox_Nbu>#;jG01mAW}}8?M|7uvN~}(8gYH7h7%t@wyO+WzE0Vj~c1oKtMvdbB9!#u5 z6FXpZDpryTE)!^gNj4l?g!Bv#5ovUKRO4A=^~u6$CYY5<2XH*IbZC~W#^_P48?qTL zr-qUnSSm9n<0P{#pKV*vb3a8#FaUh)EDt|=!S3=)VFNlj*~(4dJ`YZek$=uUa$QW7 zP}n)e30Egx?AYPh(T_?$@$fq|t@R?{?+ry`eVs-lGCb^$z4RdMNNfP<5wwtJbv6J( z*nEDx3NqU`h6aAKfkC=EtK-s{o&R!Sy9Rg!bF>rtz8-Z^| z2=JiR{h-kU;`nGPxH|rZuF|TeK-TbKhzkVPjH)VgqO;mOM|D^SMrn>p;FWftS`E># z!fO81o@yPrLyANCc?eB?$VOgf!BQBbi-`Fikmu5fFEEh_gKWF2u4aUzQdO9Cm0a=i zqiUbmzxNa`-{Ssx{d-UGitSvt^zY~M0+WtHz(2sgmH0k)7u6Psjn$1QF-EOTcWlwG8rzb`n=>{zbDEE+zuF{>IwMzQ?CdpQJ z4vQcTzJ}6}-&UYpOBeoUsz0q^629Ao%Ny}gYX!K~(k30tHzlT%e&&3MMIg$M$+;2& zp-r2xm3%yTO{P3fMN8Oe&ZwPlaR)ZiBXISM-(&~6JoxPFaA$+40%IKf7M*@jU+R^j z$k307AgcUkoD0twpEp(TH{2I|%WPDOZMZPbF_}U|iKvs?&9=82aowbL9_IrrvCfy!K_3Zq&M=L)mSpu2BS$E!0 zsxc?>+*Y546=jv$dD<-;NgnCK@aRY?J7SvJHbq>XGOmNbRQ+s=9F}Y-VcF(sesj2U zX$N?|bQq2>?B-n}AVFm1ri!$s{S|bal6($ix*(;ZmBuo{<`uH(IozDj!dAV#28*84 zckoRd+~5)UK}`+|N9Gl!bn+=Da;r=uq$1i)_rWMz=2`VN3tMln;Lk?5JhUyC+qWZJ z9@-Xh&$lC79@^#^>yIPc=TXRXk+KH}nY_Xr8q3u7(=69Jew-_d#rFylG7)(pKh)gQ za!FI-qmjrN^PKHPFnUsPBS5y=mmZJjJuXuKjRo7A4m!(d171akm-CcpZ)GA};wns6 zQ`qfxO@+JA+S(6E&c67hHbzX3O#&5N8)$e)VR{+Ut%L0NgF1Y;a&6Y(S@?u}=S8ulDU0%81@~byno?bF~a>3C{(NQ%w3Qe$$&dDyciRodrbNwNxdR}byx9C#-d&4fRD|bi|Lv8(ch~-XedXWR SSAKVM^8WyoQmuSQZvX)1?X4^T literal 0 HcmV?d00001 diff --git a/libs/partners/openai/tests/cassettes/test_parsed_pydantic_schema.yaml.gz b/libs/partners/openai/tests/cassettes/test_parsed_pydantic_schema.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..13c0b8896decc64a0df2d5181abd8873694b343d GIT binary patch literal 4616 zcmV+j68G&NiwFR`0B~pm|Lt5`kE1%aexF~_^EA?yN&}>__ok(J$fZI^7i4lNZf^mS zKmsH?kWfVPMX#%eTI@Y_psHwV%gH{QYlF-|UT>egF3N zzuSH^+hyPW^S5vCfAegxwWBugcI4mnM(mq{xOXLOkN%cJ@8D)_&k5g_bN$7C?F=$o zcn{>&#&-=haQuO2Oj6(0#kHlUJ|1xB4u63#MXoTY$C^Uq;gKEdG~v0!j{Z5`|kw;}C+hZUwyD5jP%;P8MmT|4G)8U#+GeXt`nDBiqvrF?mBp>}j6 zaSt;^%jV`r8~^S!#@S47Msmc9NE~=T%joelV-Q$KhS?yq3^lcd=FyrAT-14HjN|I; z=n@ZRxDCw42))F!r12IH=t{u>mK4AB2M0&eGWa`iG9o)oTo}9v4ZZoyo$o-mcvX%w z-zNEgKYjbhZ@-OSx~y`qA=Q%YEW)cK&ge_?zfTJ7*I`bLV!W zWF93?!8}?f-W?5-Y&*Zw%-h7*R=JPoyQkR_2D;Md{V7gneiY2(_-VFtynCC*S>)W= zHke%%G23(#-`Q|GbJno5rvoVcy3pBPyLqz9o}A78ZjtPusUOZi*81kG!Z}!#`~BO$ zhm$G%x!?KU>y5q&MiE$nmpr4Fzyw0OF-S}!$1ArVq}ZXu?GXvkc)N+>dC9NvU~#fT7G1|NK(chR zlskhhVhZptW(vm9d{&RGRFMXr8os1zclkclRnH8S)at%u*2;WVrsliabpxgAWav3K zURVsN2%4`4CzDw zA3l2AO2!T=#~PG95DD~lm@tnt=zUi^!i&2%hw-!{~U)xNoUYn0ko1M@g)TDrDU zkHK2)dMl!F#t2Tdk6%IgDiR7nz3N$r$F|BTfIL07uN{ja^QP*-Hrlaej1iM%hg*RG zfx$HK!t`<_OSvEap#NRGnwUDBfyldk2;M!BvkZ?bWW{#z?67%2hK96@)!Iv3X0}9@ z_|NeDU^O6G3InWJo#&XD=8Gd?8;1VFU|m2<{b7*$lZl&f@%|A0N=~b|2KLi@ZE(%XY&dvo!U=0d+@@{>`WU zVj?pOZi{rbJ$B}lwZIB{M@I;d=Mhtf#pFZuIrz6-$Yzk52FMP)#|GKjf*2cuivGre zDQE_^O9y7|h5<}VT@de~#6nx-_E<4}1QAD&8730&?2lrmzNK4;3URDgvFbsPq4U6$ z22I)o4cRXi3zWl&tgQ*J#aWGy)dbYc^5FHvI*3DpnI1s=Xa<}UX#`ck7I~DUw&9i$Y z;l^CsZ-=J-ezZm#>Zmom1c9g=gO}LBiso1@)Wk__@ea_1_%!cIfBM9Jk+Tw@D~6MouUjYlZBLdZf9 zaVBUv{B}nvU3~Gp7?L*QA)PYD-b~NYh7E5JRFR3Vt3$Sa)U1X0I0>57=JK(eK^Z`7 zAql{6s#Jk7f1)0+G1Gu>29vjV(k+)|{&q&Ype}&gxcYkYh}tbCEVvc<+aNG-zlBZ6d zxtx1E-OZwS0g1^A$L4e{Bgvmkkj56;|LIo=#m^kjd&e=Tb} z*JQJuM>GFvvs=z0k}F!6k0J$~8c?svrV6!QYW;lL089P|MO3c%=-3QkV8PEtP~lCC3S^@OHORZr2C<4}L& zhuvOibz9Fvec(gWzzXGt8V*858n1_v>MOn+SJJ+y66p!h(uwy(4>f~~k5^VN4W$bS zW|q>N04*4T{{lrItAU16P&o+Vr@Di`4=!+F8V9#|*qo$3@G%IfvOf%Sbi9_{6^seJ z29>LLvr*K&a48K{=W{aJK{YpTaV|uVA9gO#O;Kt#0+t;#esj?@`pjG$CcPMgnAW@Yl6 z;@?qDhDwm31yq~&ih3tC{aFNYG!RLq2&^Ws^ExXK0CnYrK}x6xRmn$oWBwoZV{Y%r zknRj)yhsn}kjSp`;s#bj!3c?r^J?n}^ho%9Eo}!#7Gt#DFi=;kt{e;jX#EwEI87l$YU7E zf+ML?K8TSlI2J=bijgchmUj6NMzY{Y!0j=Nq{U_s3R$>4X`-F8z-ryZ%1U?IuTx?s z8M4kR(H0}|cL8!-Nqa7Y0@k19*%_-98 z`67mvD@kA_xSl}RhJO^iB^ZTsf2 zy;7$mGNhuv9EVf+*#x*y!k%Y_FHN=EUJs!u#AshVTE4YvudS}Pf;LEnCCC9MvWz9D zuN9-sCR#nu{kZxVe(U3kT7?|KaIr18C_4Ix;OKnSHcvJ;9=FZ1@EP1^o90d1B&pN` zFj2S7k_@hH+GddNg5jZRe$V19LG=pkR`>E=3pI5@94%2PwmcgXo%SPkS?i7ihoHT? zI*Q-g0s?UYi$|=jK|)$i5PyC+D^~JgCykiL=yzfTpxwbh;C>0aTz+-C z^xJ;{72|#hyKH`SyOft0^>4(VGD%5)R{SBdWV>I9KgzWA_3e@#4Y+2TG@R9`>gR2f z$ns40E4KNrT#4!Q3`rTpM{wuqbRWP9?Zl?etrob zS36(b$BUy=7j2U)oJlJmwawEL1^3z}f5|qj#TdHo-HGNh9lie|F3eSb%i5Q=61d$_Zf~< zNA>&4qAW*h+Kp-)Vvdm4Q}xfrq5D((D{)9p**H{^!ISjzvvG)?h&W9z?~FrU-Oq`} zYiar;eqP(?`h=fn^&F@?3Ud5eeqQ=``YAtuK{wZa2_Ij{U*E@9rvChE`1tY5`#9u5 zzlL3ozP?><;J;HAGfCCpOlkA8n8_S_egrewy&$5VD_cSz#muticK9@A0?!1;q`aLL zBF>kJKO;}F@C&8l56QF0gNPB`*kb#LJc$wA*kZdSPjo-&7m;Q5)n&P0Fq7`-Uf%Y- z1bXm$@Od9U*m=d}QtWyDnz4u8*+To~%nFT?xfvb2)wY~P8l*;TPwu18xUb@nsOVdz~GqkK9Qs#E=^-hw5U&@YeabRRMgN%>XJfp`aIVh1do zwZb#2S7*;3l@So%;ec(3d+QoEI<|1pv07fPI9-0|3?DcA5SG$ykDEqQo*~p>CHd() zeEt#$A&)Ow6?cv)87z*!QBN2lhr4*P>DrMdVwYD=HqCkV=&|NNS4Up2BYRTpL`yHr z;sBdxPf~ipH_|xy1vlGR#vlM=*;}19IEowC>&HNe{L6y@xf+;{_@Iyad?Y}S$UtTJcz__0ffjWh4-h0W z6ho2#aqZYEeg>i61g*mm=o=OXyC<&D-)ME@+d(B)r}z3#F7y+_V(-Fc80|ro)&U1* z+D0XT!q$0++3VubyO?vyfq;%`8x3^QTSjtaFYNpDmOee?54H-BQwCkmML4VoSPC)Zx; yL!Y?{PAMyW3y09fuDsoR$FjZWZou-(U-$lU_ql(G%i51$Nd7;Su*H6VQvd*`4k)+) literal 0 HcmV?d00001 diff --git a/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz b/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz index e99f1c2e13a14928c0b2b025eab331ac4ec326f3..a202dfe9c614179cbb87d0bb5321229a94be9c85 100644 GIT binary patch literal 27998 zcmV*cKvTaTiwFR#z=vo8|LncnlIlpZCVW3n(fewf^>51tL{hzIvwguCkw_G0q&tu@ zIY~ezkRaJS{J2LbLLnhQ9Hy4k|7x@9%E}Z$$iv;k51+e#ef)L!*zI@!{$D@-G2NKX zzyI-H|218#CtuD#{?GsV2mJ5DX>OwXg zy*e}M1$Q#;ZcTeIo7%&i;gnpnUT{8oCEKtId$OS1=zi<SD8!?}?CRa%;KDyb(d~iE4R`6;+zTwcX6J7k&*T??L`C)7hu}Sg;k1p(w`<}kJ zEBgbu%KY~~{%rU9@SlgT#lOQ3rBIMFly}!d3qF*YN`;-_&|R7h>!|l&fwgIAR>6f2 zIGQy7G#X9Iv@OeYnuYN=^FRN86=Y+xSuK7``fo{(E9q-0>Ds9IZQ{RsrGq{)=VslK z^j_(EY7Z75gU(bsS@LqTAPbsf)iM?!XmHC;M}?I>I-;bnjYem$)_?A&BkM5fp%$RG z>ohlX$GN6uPOaB0#qHoOT@$o`X9HvwZCjsD8|rTr@>_-cRw3y! z7BV~hLY?X-ckF6!?TWcnDsxNktMKpHsXfx?nl0y5cXlERs!I#hz$~Lw6i2D3b8S@3 zYp!mqu94A316M7>7umHXD7NqELjD83;AD=5uq*tM(zhX5naNnUk?7F%c+#l=%aiFY@;dcoZ3ep;}X%ypk6aZW0)D|y4FX7Xu^y>o@elH zov|{TO4_Wq9_~JRMWR&H1!kI+6vaId$tP{uStLYg#WHrz1G! zhBBMfYxw(x4*xBKri4>=@b%i(=`gOp?d9_3ugGsLeavoEUYCzxS79CBwUO;kuPbEF$(XiwqbCVA>hD%r&PA+>yLIZoi zeW>i@deZ{Qu#pT?+n8x~Au}nL7Usy>>2>R%JEd<;QnOSY*K6befIcf&cmoT%<`{0| z-0`r+WC+8f~-F#%ixD zwLRgnej#htv*2tOV27&p=csT5CZ11vtL?a+H>Qis?0vD7Cu21;%>g}~QlB1*WDy%e z$AeguGL!mXHn8E4#`0_+4S{BqZMt^B@kj!+gzLWy&yKQ|+)&{e)sidKJXug#0@3M% zJrr_dQR?gX1wHZw9Su$sPMz=`7$x5Zs5WFLv;GtQnZtsmtY)8ZhqiahGAIXgI~`^5 z1UX-kxB}=#ZFHOgC%`Gww}8cu@Ff{w7aMhCvr;{%i_&5OEJiJY5%5u6-GNd#nB(G1 zAL%yG1U^tZfE3^YE)l&tRkG9iaIKBhJXw&wnfDe&hrV(}&dAlm?gMylGE&{vqPQ8s z&ZZ5EoFdW8pweIm`IfDIPluV!q|pUMqWsZv3cwG>0kjT$p0iE&ppQ)#wDuNG8ChuG z0)x)Tb-pL-ArLCkg(LiX5AwQif#~VZ&s@usUDzc! z_u5`qbnhwH`Iak)e(??d$pK4bTm5Q=|Ln$66b(v4I0#J|%(`RL(9Ip}W`i^uHq39i znuSv&a^1%J1x^}oHnFS*C|{B-YXB$%z%>u9q1ghN~H+OqLxL*mFxR0T&sJ1+y%ONS{?XuYUjyS z03OWTlulf!&{G~0Q^bL9wRFi^!cM-C&+yhGrioKpfaoC`p{3x00(&`oe zdSBt&pyU+3fgFYm|4n6iy_$7wFhz!7Il$q_)moO&bOdU>M_8kzue*3P%;Db1D&=e* zEi~Yc1-u=oyTK0S9jytfzUmgxs*oY}iCtTO2;w2^$7Pv_UuQf-e{?Ew6 zj!STLWWfL+MiVtSVMpQ9+`=ondDF)TKUFT^;O2&_X%jkd0jF zWIRa;5I0Xu5W=h`8bS-o1*jXhKwA<_@Mw4f1u}H#9Yn1R?@kd+jqLda#OxMkmf%Iz zl4@Xh0`2QVw?!7~?2)d_6bbOtgqXb#Q=(nc&yJVz6u9 zaT-$FW?CyOVFw-^(Ip!?G^%{RMa%h6G;8m-XsP=KKdhi~A5y(w-Hb_kah zyt45794v^=BzRmjv^K04R@!*A20AuyLvodDh+g!c< z8&$xBX7D@wTa-yhPkBUW^AeNJ3ko)uf=LHH@hQMtgni(H;DHif84lqSbRjsJvqgJZ zJ%Xdo$w9RZY+v5z$>sa+wmtNe42>0y00_NKi%(o?)a`K7&{N5|>j8U@SHH$Q8j0y%QzCnaB>RkcRf-cgt-Ct!Q^<{tdrXp+A zt;|$-<$!<>$*xLc+!dH1?g8;9&z;tbQVTvRAjhNEo54QO0JzF)(;e zvkQ5{352QQ(HGQ%>EuKAGP!#(TmXy?ECe^+4!-%DD$^`c9<4DE!#x#4t{^0)Bs}Hd zJAVNsG0c)I0$i7r5AB)@*DKNyV~ac4rkR52SY5L)pl2Vhj=J4lsw5Vuz3F|^R+j@; z?XQlCtx0NKyATGX2Pg(GDP!N62)UL6p#jN2B1Fv?Xk)`G>UM2spfS1b0&Pz&3+Zuf zE)5qzDsVrV%So+pgwIJ{h+vq*JnCoXe}3mCe&;5B=O%t5l7372Z%O}8l=QEU-FCCy zefTg$ve<6_g&ZAL0H6Yno0(kpGKO$T>tppOW(}25ZK=7`Qc(Mvq*R8MTCc6tzVfEJ zD_67>yd}lFVR=!U$#!8ev4=}j_zDtS>9N17DD)Y9)cpjF19DwFsV;opm2=*bO27Kn zDw3U|fs--E3O*Lj7QV~nB1V*!6Un4m9{kF>2WFt3x4b0{P*27J|D4ZG>ql=V)%wNoN&Zg_(Zd#^0o{nm7 z;~cSRXi5_lowpXZj zLiUB3Q$N)u7~Ya{Y}bdDg8!~m^I&-`wB;%XKTGWqqDeYO#g&ijc%<4~>sw3J)^e)b zo$Gxi-U~lE%#*r*IrP?35CN?p*VZ0AEmeBP15H z3xy@=w=%xuyQ^hM#snS>=d&-QvftT~-V&c3`pus`??oCr|K+J6j@;ntS&r7pD|LOP zxgCnusSg;d>q{-GxP!b>X@XhawUy-s(2hU^?tTM@UcEBYsX_R)~DzbIzZ<=GAeV)?#5 zTM|x^gz}1;S;X>wSfG5{Q)NrWbXcQ&)$q>67iV#o^0}*ekC*B#dCOV7lVCK&nhp@^qkse(w%E*_V+Wcbt+xB*q?7|lptED7(SsQ9SA%e%v*mm;Lz zl3t)RdRH{yJQHK^*J7jW#P3ZIUWtyfC8L6*v+yaG3g_Zb`qx!WBwYXJ5%)-~SJc}oTIsvCfyLk6dK4mGT*{$@Pn3-dI zd4GcLqCOC;jYm>*QCT_0>USmQI0 zXJ|`r$y=byd+?UbJG%k9ya#c~?w%XK%X=`F?4q~Tmd&!oJAr*1OflJSI5avDjOC~)xV3_wHFd3`g0Ak*Q!DKIf1B!VM3X{F~ z4KU_CI81iw-hgA?leTBZ;0-|LJy=YBJl7$a_nXWoO$WG8+Do_P;4(>DgM0yOWzW^%r~4$-^^o$23@S3#Qh z;4>qw;B%PfJqS(q;x~Yr_n=f56WxGn-hS?n)e_zxhA>}*t`d; zDHt%l#3pC;>%h%>Fq>RoUWabpgWBY2kL%#gdvKfF z7`zVOjE&r!oyUS38SbbAoEDz0E-p0ShzU`}`3#GH5WlBJyx)-mRWzOvF=1YxEYfN0Veo;4&B%zVz{G5y}T)-poaI z2YrfvV^Np|Dn&m|lnG^=sDq4VftU6mf5Ro2JHJ-vx;@N%;0`epP{-5fOcYLFQE9N2 zu*(cSM%`8rPP+z%HA^luNpOO={te8X4T7}nS|Hb;61<`ijQRB5yvQrB%JYMcwhr8o zos%4e7-?8{^W5R{LQXUf@$R2Ki(t{}=CP!1rf~OoL5Vq7ci$|3#&8E`ISMi0C5AjN z6-J9OPg?!R1rqBRNy6m~P8;}cYkBD+3)F9R8i*BBD8*PheWM)}PJ9K!CH}7L+7Wf% z2iQ7z(S`yK3yi>!Voou48FKMK3z;Ad#(G6)IvgsYU=8x3NrE^*9u?wkpt3(~o-Nh= zZ+t(?qY1)>XfLBy2u2esMe%P(F*zeJ4G7>tq!aKY+}o%&{B*H7rnRD?^uWEvW5J=k zt>RYwYyhT#Y4zy$1a3}{?G@NR=#aRfd?e7g#tay+a##E#^kv>du{5zTRd{aTC_J)F z^K22j97|n`=truBL_QVNg&BOSqMyGEF%f3!rnLe{%HATtcalX>L@A>1p0lJj=FTz8 zR~4k^U!R@r88D|@`_V0G|BgaQlzjlrg+CKSOdk<3Q4drm`{+OPKEA#`zApS}|D#MX zG*N%-G11+8xJnwc&`k?#-U@k@?j5_*JgM;UTg5fUj1Ym|S9$M(CjwQGeMzHik{zJif2mhms z_Il?bRl+!>RYvcx+;(Ng@*3??=S^Q)bG@!F^@^5LM@mLn&gV)+%eF@v>~P?yb;VI6 zb)_QASi~r;{Ws$+2+#}hHeM?bt3&AeIoQPjE)cx?zje~OMM3y8B2~WN zzwsJf@%w6PGlZi~!lUw{ZPQEm8WzdZv-maWevz6s!8?X!=>=SF!hJvgjJizN5>ePA zwgrCoXTdQUlA;(Lyx&&8``sduLoc07b4aH)L7f+};5k!7X*RP9o5*~Z!ndudF?hY@ z4SyrF0ruco~F&QDS zR5%{$*nopLgqzeN{0ep!cq~6@KON81#pA)_<*DT%oaoRYFdTb;3F$tF8w{}0l z0sMpY)?hZNydI*%{W-0JhvKF00z3AMP6m1KZBK4*G9Cmr&!WdiY*1_1C%dtSYihRf z~QOqGb#`3cgbl1@TBa80AWz1_~@%hUZq{LT42^7_I`panzw@9#U!s22OjuFTl~fmu+-u^BJ&ocr=lt9f21uBza*?l-hB-0)ouZC=w9O&c)}Q z+xoKiTVMXxm%sJpZ+-cnp)Wa-6hrybULLd)sFUJNy~#i4XRG~>m*w3>{RwpBvhQP$ zkO*63M4;`zxQx^4x&UChKb-58+L8dKLA5Fs<4s+bmddERYS%GS*g2}pL0(^W=W1Wh zXs+yPQip~9f_1b}jjmWhs)yhOg*VPoPyvRpX_a~akd7OUY`Qhh7Rs}&W!l4oS=r1) z`#cJt-SYFm;y0FUa+(-_6Lv1R1(9#zUu%B0+NE`u7mmfbek`mfBAx|Sv#P@a$ai)@bnCLiFA%0L z#R^)y@sRadm^si8Zk5$Wof9qXKoEK{*RE(s&8@BU${bY8fW3IM3DCTgfj~KqOFRFf zh$F|5G>&W#N7|@hQ@R7*FQ9SunoUAd*_HrE_iMb1Xf7bg<;62kbw z-UVY0as$H4)9`dtgohElhluO!OYpOV#3`N>f>Dq;B4wQ|M4evrv4Nxj4_=aUo)x_Q zanF*7;@w=Bce(VCHU%CFXGAl9Z%C$abS8X7hZgYEn{#am$5okI+OqlvKU&7h$U`%B-TsH|%d3FcohD{0l#Z>=YqcMfrCO+uq_(X+vzxQw1jIueDmF{GEi6WTD5bP z5pma?#6GN{JBaQ44{imc15A!3kntO3FRO`A7gt<<2YTV1-;gmc*5Jn@%`PsGXnj>r zVG~qrwgDzq4z$gR_T24)P|9%ab>8%md}`NaaJZE$+EzRQ6B%XUmj<`$dPl!NFJ$ep zopX~_o*7^+y@EFxUv}bqU$=OZ*+k6D@5@i~=8fy@5#D4kj;eMc0?!|(DZ9=Dv457P z9AFu8o%+hdG-WR?i2buPWhh-i?184#Lc^u%U_~^V-&==c@Ek9qOO>1rN5Gy;q0w zFy6T;eN>0~@Z>X9iaJzBXxN?d?gKL-ulYyh-Pc5yLnVFfE_wH5Jo1`J$I3h(!J^)wHK-2%Vja?d`T-7|9X!}Pq9$qMf41ttXJt-jYUspz}#>(@1dvv z;#XZwd0CBp|M2kgWjFKQ`-d~pWjB-FKe7||{^364WjFKQ`^Qhf9Qf+eqpG`lMA z?8Re-mEy?zWmoE&ykGT&csed#SSgOYGps?(uo9fIMH01F_%k|N`V@Z#Fz&a*os>VX z`$24%F~gm4_|wly#R_)Kf?k8Q-2bz!DX zkfuIT=b>~_6iu%HsbbUA4f;e}t3Q@5ilS*asB=F}z0!A|p4>EeJpC4?r~l&4bM*9m zBHrlb@$}mj1li;C-25f#Po|0P2*uyhpFHIrKNKIKKV3xWG5yI{{fV@2SbzEv>xa|A zq_e}ySX@lG-=E41-|Zp>=D}1Zz1tO+-R-_|dpMODzT24!ej=4gBv0^**oi-xW*6l= zP%iweoKwGu2X&rHvx{=>r#-UI$vJ5PLwJ+x%jeSUB5yLw?a4H|$D70-5|H4p;!WPR z__;JY+MN_?!)quQptDk#txM?G};>SPc$fN;<4&iDOjw zP^KqnD#q1c1P9@FkElv#PtlaSQm=t7AWee_1yrTqdP-=yNA(N4q9fD zPfOChGYznsOkBU{*THJ!YJ401*QSZpijnj*v2D%MLd#k5Q|{Q`F_zBsd$J#1r8Ake z5i7LR$;Z)|Jhl0JN|WhKp>6RkojH)Ohw$tsJ;gHW7tu4?WW7Sq!A+ltUuUmL5r33< zY$I-{t`j4c6gPL)xw{EL+z_(bzwHlkTD}IlfO78pJ-0*1_HeDIgqCvRX<3T}p}Sm7 z{IFdVQ8{zPF3QG%(6J?709#;o(OEU|6}za0O$U^8=*vvv_2n)de5v*l&zE)e8+gwl z-O$qjQOaKPff52yDyIR4sP>Jd!w{Ep?v|X3^FUrC=YBLRHc#H_$iVWDFXG8SiDf*V zY?aet6RnbN6M;9Oq=S|*N{4lH=?g#OR^=sgq;S;|XmUMV8TIF=mQb{^3L*pK%ja4e(URoyka4%*n@#A?8 z;w9Qjflg^pL1ZQ76ZvC(x37Z8a#!j(h?n9PQvr|Fq@=@RWh)i1ZAo>K4%;R(6jMP< z(HBZ8Ev-yCN;6cUd?=}<86}+C>%fWQcs7O$o|az&jYYZ&1A1NojYYaL<>tPo;~dhJ zE%_p7EVldbaltsXG8J9tjO2HnJEQ^vpHq?#flrByD~XkHtGnS{;9n!(DA-JGyhy%L zuz9f2$h<|L}Z|FtYbaSr=M(JlnaC$vV%Z_`Q_XC%edUzI?uOACq;l zcL_0>$mru#K;R82`4D(ILOK-?_$SJT{x%3aos*IRTF#PBOO$PRRLwl1J1^wUp3$Ax zXDH+z>A?LrG(!)#o7X#5B&$njyHZSkm(ImxR9H=b4~@0DHN6mW(qT2TQ}SVmWL|k1 zQtofJ{G(r=a+iVDlX?wye%WqfFaAV0In1BetB|ho=fyH24%>b_oE+xQa988F{3+o} z;y`zASe&$+ZKGS1Am9)LE z>>jlC)^M_3Gt3sZhqtyb)J^^JHjwkF4QyYh9C7JyJo8K@_=?g4M}@PW zEk;+Ad3bqIt_K;#o2P9=4Tyo4CDVSb%#-%*-0(G9aZPe zQF|F~ZS>f$RISFZ1_wfB!Y{OXa~fQc-yICu8f3mXRv9^Ux*CRqKn#vf^C_&4V$s$7!N zkfZ1u$x1Xjd!@dV3XbUc1h&^JWV2ki)?)6*=AN@zPXaPRr+V zk0NjKIMIFii1)=NEwt<`4QcKM>*IQm90JFHxr6beF`{An)pw9wK> zJ}p5ttKWvuiZ96}menIGXG?Cms$4&Pn@%bwko1fB-?)ue;Wnx#0bHI`tU);)d|5pu zo-ZAxBBzBf7fI(!H7$JUCY~>Msc1WKQu-;Fezjn6PF{YsN<~><^`!QaKe4Iw4KKIz zhF!YPV6;Ll+h2jv@@I5~*j7t^$-BrBD#y%~|48?xkVR zNdtTVcr%~;$vE1$X{93Neo~hAV=}ov{+hHV{@ySBy$}kK@4nwJ=O`YAg!o$hl1c9< zci!(zxxHZW2)p0ULZZQI9tU0ObvTMU9=mreU(Dkh0l9km=^&Urb(YbFe#ocmLTE}HYhJME)i z$(Mt~^JS@Z&*Z>gIY|t7Gx5`}j7Bjnv~&_bNvu9}vHBn#(_G*_e@lLOf1Ig-iGX`K zw`$W8P~KlN;gaa21ESiJCmCGq59(sCB!@GRUMcbh)T=FE6 zUeuwtRq$0E8kJgNFaClqh)jnH^^kX?l^fmfDE{=8d{w0fevY&HV=ky#86;kTBV~3Nlktgl6?*nNoq*6^OpROs6NtdG0dEF zRDCA}RYp=RD#{?ZjS zxO>o+pN@Nb6&gJ3;$N6Iob2$j+8-o7y!=>qUiRVMstZ^at zb?3d&-Rs`&ydZ6ouiUn;G6;Do+vEA2z?)dnF}J|W%72KoGRMu`SW>sF!7m;a9NS9Z zQx2qZ2S^_{f~LsEZ)&9jzsGy%M3z*Yl55rr&SW%n$xPz9&ve*iXfjqE&-17X%fW8(4FrFD(&)lTD}S_{!ZO&{H~(!k;NK(5 zor2mmY~x#XvV5nlQNf0PeDN5IlNa}Nr^KhCM3&B0{t zXTJ?NDs8k{d?qliKFB3KuHH1J*6DDq>rbMfbJ=#{p>s+~y3P&qiSIghNX7K4lX|1P zjh*<5@)N-l%eX#w6;$nVn!_n=t90Zjq$p=QVL1xCbkmWeSjx%IQH&-j$x&!-;&T*p z%|2Nv$x$pj$?udVo4s`8D70+Sa}-T$nig70iKk_DT22WqbIH%0b?d`G*L{9bye4mk z^z@b6lX)|Z=OX=ZishAdcQr+s)T{EEcax5^N4sw%KkZ@bWAox&l(bO&>Mi-9dhjtL zQy)o0)vxkNN7YA{StqXA!%-^5q*r?^tEs3$l8?!+LTaU>N&m9wBwk;_(#5p!rIY;X zIFx-$nvA+xZFF)|(WD=AX9{K$gBWDzKzF|hfsqO%xO`0hY~ofrDioGQDfy|*WDk$v zE#fLhVuwdqVsng?$ap*X)gE;nS$F#iz3C5ge1BEvIi4o=ulx3sdXw|zYr3S#p+uKg z46lP=8BQwZk1wmo%3Ze$PAXFFj?$NtpK{-6jmP>(op~lm2BjpMtriIc&3ON!sX0 z_>)Joo_Fn8+%Dk*4?xzFKky(Wp?a^WU6Mof zem7}e?GeAJ^sR2vyscUMrqZ{%NqbA)(^UFaH)(EP#&0TptD7{Jo|sLgm$IA8PQ2l! z()@3W=x)*>tsbMP^hvFdmv`YAC-C)sIj{JE2>6!Flqfhk2j&y&R6YZynh8ubqfAvI zOeL3R-~_%MRp-s?(@cNn?W0z+RLh+LPEDPgqwY2pC6>UO zv*edpmT)50BVE3J#grl0Nx^{Y>Irx>znr9_WN4|Rpp@;N9Pcls$q?|6HVicV>= zt0sOx3{GacmNQRM9#Tm`97^r7-*J-<+wP2U z{+2aQ2h>=tbdnD>4m3wibVxL#CO;%vw+`3<9fZ`1+ZFW}cm7#+q{MgrnZ@mj`papV zOFk{9F~gz>nWww#yd{2~?#-}hV#0WtYiXsVgz@0yB{iFpCjHC7F%{ivtaNl(QTx^8 z&(q~@eJYyt15M@RH|gJr{ybx%7llfYro2DVYeFSzU-4sZuL+ePO}PqwMW`et5csT` zcnG}QFhRr51Se(pDtaB(vNnr;Ddop9S(l*2)~!c8_)j`MWz=XvgGtvH*eHYa{Cb?7U9J zzyn8JR+B&QV0+Je@?VK;)Kbt`{+R+el z?UWLi@z*r(Mtb^wPYky+UKL0@h|*o@r1R0y*W4s2j(^0{aj!v$qRqsk1~J^El%$Zf z#HWy&gGtQpKMCMVC-HpQDDAY-TRM>7(kSAV8KGA#_}UB9^6cuiwpyq|d5<28+a zm8BsVFxyvk2H{OqU((ojmy)!HDBN(AZNv&OSiCmjvEXQ#wB@CI4FPu|0x z?hC3Egd6foxakdBzr~ezex1Fhc{j?t-@yA~xCwrr0UEB%l%VQoiJwgjY9%eS%p|`; zA#Qr}*Qev!l|kauaq;F8CuW{5&{RwQJl!qm*Y9+M>f0BFa3v~Ke*lG-HSF3h9ie)- z%hM66$5yYrsaVn*c+c{QZ{U4MM{|sJznJ*u7(TR?r-@%voq+bd06cvXO`m6TikU#Z zs5$}dd7j084o&}F+b>xwXm&iLBXs^7LnT=7IR$|)HuE6 z8hazx{pQDGfhNg*>x-+9`xFE8HhIM1gx{LUZdh_q?hHINkA8|Sjn$;mVZ z`3vEj2!C_Ke}fw#LaW2QeEQTRi#3_Yqk>}#@abC{izC)5Jkt#9PIWcRAY}ado*hwe zOJjz-MVYDuOjQz?szjJdZV;xLy;rK4*0PvuExi`w-p}5W@4ez)u7<}^mHzaX!#ii_ zOo`B5XMcX;=Th(}`}0i7xfC$=m)8l51?Lqf>9OE(J#S2(>{|2pHSlitJ4tWgy&R;Y zfp`9y(zd^+f%mh+u0_21i*k|NI}>g0)l<1hzRTBi{Sjs%1-kx#UkF-#nT}@Y?FY-+ z{?=yby+?9Azov^)yFW{M7bT%-V$x~3N`6PGzCNn$Qjv#rl{Dlb+w|sax~V7`0-9En zUozB$3)KNWb#LW;64FJj6NJ&^-66G_Wc(rMW=rq=1u>7`;2hg)@$Kg51& z4|A#LEa@tJCHb8tU6S4$tC`72Wf7QZIThtsZZ(tq@~i2n055b*MW;07^op*C|8F~` z$@MPMY=`M#y_|~S5pGpV{_qHGy-G#N5b&sz{F0&Jn^7M+W+NTLBb3v5ndhrAzr2oaWH0`rvWy^nA+D(}!_O|z`fHMo;Suoi$sZnJ`*y(HzU)TjTooUWI zq$Kp@D#;H$*}#+gR1A**dUg^&JffJ6FrIr)YU$U6@zh=_I#Ricl=zNRX{k`SvqR!9 z(3i{NQ7NZ{FW-{iDGgqA2NY5oARPVNXX=u`)j!?aNqbey8q57NYxund@ zRx(QRXJ(hvQH9h=eihQG-8a(0mrm08Qb`M6W|GdAx2rdka`y~UaQEEm%$pd#l#*Yy zsf|mhC*LNcEHKq@_mL7Lcqf;9BzQWikkk}ZA<5aa&~ladO2cD1D!Q^^SX}}u4J*ZC zO8By0O@7DI%v9>m#rubsU%dtfg7=UA;!nar2HY3&17xdL-9Ic`@`9mc$*)3E(@=%9 z*T&5+hW$wVpf{jtC;5Zk=4qkjAo;X(CnNK~rAOatQte=tQ!!A>9VCCC7TQ9~X$}$e zDXBurrlAT+2foat0}7ITWVqJf3av=6SWO8SqznA1#)2m%B)pu0Dx|k`q}-)j4ZahQ za*ttu#*GEvaToce88KaX?Piw-F5|BW(~DK^@m%@MND8&cw+l!3_kJ=~_aKmtp3Q8S z`QPXQPbq)2oC2RK5j{}C_*R{)Yt{=6&X;P93O4-XFFWq?CCAF%l!n&C$(+=3=BPRu zN9|=CrN@5$`M-hm`*BW%V*p|8XTJ@2BW<+WzP}`)5Eyj8#yLD(lTteX{n$VR*d>U_ zsB^n>Ihlwx^pA2 z3)vxsChIOp(1|EG)hThN!aJWWS@(GhO;up38D*;CM(KABO2s&MqZioETe+N>bwZOK zlO1ZfZg6-s>)8%9qHf^TanE(Ap$?*&ndW%$qW)kRMifnXf1=lP{Xv>? zYeSKq9c;~L%50(+b^Q@)p5^*{t4s6UWMpohjl2C#!y{ZRpZwtwz&tLT{KEso|Moah zPDy^8$nQ|KM}9P%R1Cj$jZ^Z6-%dtvvG_Dze*7*zJr6HmyRLF^O51pO|7-k5XcQbS2X`*rooHSnnJrh zVqO7X!lO%31th&$rIAa1vr2uW+u&)%Hm!bjI?i}Yemc&S3eL3N1$EH1(lHB3byta> zh4dENt-5U!-R)L|9KI_i)mvP*io+LPW3Hkt<)=2EPItuUItPEY9!PEST~bH+G98my z<-R5iff6F?*kv9JJph_Lu@M!D#UUp^`R@*y1A)SYU^WT)*sb`UQh67-b=XDc5d@&pbuwj)M6LZGxDC6+ZseS5gtJ#Vcv0&4 zw2W`i{DLL7Q(9^{>%kI}QSAVPIq2n5U#L5qluIRjWLnzjZ8ngGu#@8q)T#t@1XXLy zu&)*z*HLb$prr_1zrdvWo{okYoGbUXDGyi`h(AHuD+VdLN_jAwO4{~r4HKVp4Cie&tk1UwM*bU3{M~voQfAXpfsg&_mFBQ4F7XW~>P$feXt@qNQgaBM z>#$o113j=Doc2c#>DhssG{4cvW;mZN-e%&U^>9Xj z@#PbP*25#FInM3zLF@j6b&D_@o0+}XIITsg2fPR0+L`s)orBC(ZGvim7q@jdKAd`G zZh(IeI!Mg~IE3(JZP;Qw>@C?D>A(!&yYxUwdCS5Dn~4sSDuKPtu?ojD!@K8*N6VX4 z3cK@O+M7SV0F8s}Y=cZ&C;~X{YuW%VK+rDL3!tipE>R;se8E_kcH~cRU4aBRtunqu z?Jxm)nf0Hz1LA;?W$Hs)hiA-&LY=%iIM49k+NgxMibe@fQF;E|Ch3 zT&wTRWM@CSaIe&}pN3S(8ek;;IVv1M0Oyn5YRmN~$a)ue*;6rjF~e8Ww;nBIu!r{M z=m?Jv!Nx3g0==}5sjBJ*=t{K~nhhfp-prbZ`Lb8qfU&ZLZ#$ZVrlfN@$~KXGWSWjN z-%jf&@mtXmS8u32q#a#18e@qswN?|t2A3xQk8 zZw25t!r~iL1;B6Lu=Eb3YOVLOa0IoTs27<+B)m!A0!%MH%N?_vP1MlvD7+ZlGuL(E zHD$=0lsk3eHD$;XUH9okP=*;$hJ9@eT+u6)46s)4^!s%Vj|G2cgGbbdZ-=|(drE3Z z@gt}T2%RdhQ!-&>T0iB%Jb>eZixyrK@AA$VjB?;j_?*IJynuJ~im~fJ0&}ar;*WYJ z0iG}{Tg%`wpj%OBUBYPO8U%|@QnUUINz|wCZ9Dcgz;iiZ`3-P@Wx}pV16p8xr7uS^ z7r%QLkNMge_;jb23D2P(+~G1zS0a&sZx;j*P$uRosuiMk*piFmC7rnQjFTl;6P~pO z?&*lDVDiEfOgPwul{w;BDUnqvX23e2>V;Fz0I#AwggZe6XrZO?pLCCBfpyDpY8f<_ z26$A`a3$AN(2GZYuLC0q6KhL#q%F{VhlU`XYFXmOAe=A`z*8E^U+?fuptVcHXm)~w zbbzLCQGgqON)4UnI%@h3PT81&M{xw&3YRW217V)wC@)%Qc<~BNyWuNF<{Fs>K7$G{ z;~g!3LvE{kFWR6lY9KUZbn+t6q(R_J&w(d_*4?`KJ?#~-p(#V4st}YYq^DZn+d{cBKOw)CYi8ZmEoY=N)+vdc!COXl? zwr$(Cory6q{(0y5F8-^owY%4@>b_}JZ3O_^jM|NzZ)_a}g+JnF*TNpDm#<^|wM&8>?*JaCSgd7IsZAkyfrNP`&dDfX$F6_I7=#kN zQ3z8YV3?GtKvz_2m#Y;JL!f?+dyadKUP?+qN9wmo?u--qP9;L_ZZB37) zhK1)g4f%^bMG^TUtf@DHmiQqR^PbO!4Ll_j3)@G1%wVATyh=7Gco*nGrziZNpZ=8gu3Sh>Q+E%ZGYsPl%CCsl*9#BcxG#)q{vN8* z5&xWu9Y7<_36>Gznuk)xPBhB#h>Om^pc)-<%R_otWQ8`CIb+!WT8}V>LZh%>E(f7E z)aFpTRO&7hwb1&C2%KiDyU(1T`{2y}i!he<%H~p}{s@e!&iK+Q?h|;YXdc7}H@^Y# zom!Y!@Ft}1gJt);wrM-g$sVZX;=&WbS~~?S>o?%>^-{6k_&!{yp=D^u?YnV0?I%i> zZN)={5Rfy2HCCslseZPa5q%t%@Y#d$IGw_vvNIH!+uysij763egM$ZjtF(5la#^+{ zp2|YmDVJ|iYp^y!PtwHPdqz3BhGY9UZG|0Fl${QmGIH*DxAV#}<6X3Z zSJiZ2&1V25+4bBwrm)GLO99cGwJ>-VMrTknyuo{&_m-pteYGbiZ>wMMw){4Ss)`+_GfAhgZyoo{esT&WuSVX&{`;a8JT8YL z?7c5VuoZjLcoh-Nko*u(LUsu9-=?E7j|C5awJHQGDp-QSH5A!6PT5-D#UpUy-+$@> z8AA%!YSFRi1Y;1SSObQ`QXhFEzAAGOqkJ$lGk(Amb#gS)Nr%uNQED({0#;_*O>|LX zEZ^!7m}==D+>Uo`bYy(23aMc?V>&|yf*9o>w{63jW7}j5i<^+(SFQPQtooA3mm&v} zb!(<&n-|0#EkyakTTSP061Wfc5c7s=f%x<$AillK;%MyLZfgbQ9E-PDVnK0m*jms@ z3aWXc?#q9wQpJP*R=tn{_@VI`(~OXnt8)a)W#McKi`cm#(etqawG_7tIY@pU4q&u| z8Z$pGxeezQ_4i~b--wHh$!l)^*)#v**Vk3o@A@PkBZ3>cEfEnzRXZfygB6r#%L zxp(Qh1sPqHA$CtaSny0y>V*OTapPGRNfQ->gqeKE`4NW{@GIR>SFaK1<)Mm|Cmu17 zv~y1MrFJg(Q&J#LQ4E`4UF9q!Y&T_9{4gYXmGIe;Gowe6+eM9hnK|9`dEYT9WGWE0 z2L%|i9)r7Vo9}LQJ2#-|FqUtQf#i%-(rlS$L_@IX0^Z}-Bes+;L*X1vTgcZ~R}%V; z`C~O2=Rr*5MJiEU8`q7P!we`*!&@qXGatGjl-ad50qo%E&rH^e$znA&N$A^7Sl>gH zA~iZjn=F1;Z=qAHdx^35;ZbJTw@}J)E7x5L35E!78!UzN^N2qQ3F+Ro6!~xJug5CS z@yjjNizD1mY*TsxFOa82FJ%FgjYOf;R@%^Fzt-ol&4p$3D}2r=@W_mn1n3!CPcI9j zSHu(xcci*SQREuKyI@+g)e;OJ)1s$s^E>WOQwLb3=wGdYj)ZriqxA%kb+E|KG9Y7a zVr^Ju#WD$st{|nueTfPyiXkT{U}t(FpR#>#?vg=NR9-ulT@o-57WDLL}ah&yLKcQ$eEB1o&ev`6CRy{`%5s!JqJ{N*nhazD&#vD zzNVEr`6X_buWS;yzAY`)_qd=l%qVf{!!<^8MHO8%PW%NhUC^L73 zMyb4}ycojwPzY0R&pYK}wJDRuc#@JQYn3cG)mD`eaAX%DY55Vp#M6JZDw=4(A5e8o zc*$HLR}8}ieNmM^zqX^*yW9p%UO=h^p9l(|qI4QXVR&oVxJi2bv2+FZL=7lQ5@tktLT=yO{Sm<0`vO24R9%`g9R;&oFQ9PVWYs9oK8YfBPdLMNEi}a98qU z#s5szjCvT?#;O?H>SvD7>YDJBN`HVc+#cGn71YjP_ascC0WU`;pmot@n{^}#bP!RL zH>XOU@@O?zNwQCNmHmT)BKW${BIT2nU!Zy@TLBPkHaY`=u*DxN_g*3*BJi7VgFn3) z-8EbW4D4$P(!wSfme={*B&k=fOa?5{zKAqT2Lr&WAS)pT0z@ z1c0!h@&`oT<$ETNDyiWeN1V!C2SE75|~zh<7ufZAY=qH1CE}Y4e$E)T;@ua%%6#sB6Jk1Zp1U zHwFzU4^MWOeMnO9E!O@^XfKLCd-g$1;6JMkN@T#f07y;zgNs7v&|IFO0xNg?jv8}4Tm!cy7f!{!okBl1p%6C zYQE!RRT>cb9UN7rQs>l@S2ZZ|F~Q!17-%*C`BVm;Kr;hxWYXkS)?RyPUu z6HTrhpU(!tMvAUDXbdi{G8i@`Y@5hUB|u^*^9o0m!59X|uq6)0FY8puu=p$9TlcbS zirdXDTz2H1&k`mbt4{oIAa>_G7s{)5+)@Sy=Bc;zkK|XF$>7^ZFAy==TK|Vk;cCMD z(b+7qI7^?=eso9D6qC5F=9y|)pEKp+LvH~V8U-Xzqj{8AhfOC(wq@Sl5ZvP{qUI}O zf_kaZGWYio6vv^=;MuikFz`AK*INWn2#O>{@-+$v>2}OKbE+Fh{RkcW?TNQMFT-4z z3OBCRgfyOay_%KYJTxe=Tm*#k0+G};UpV#BwtQ5=OD-;%4rE~5lC zL`q>(Q#bs_*dc*HGaF$gci(u8O>Ef2mUC@=+#hywza`6gbjvYb>y{{MqZl>=<*>r>Kjq476yv+&JhNtJGiH69Suuv@H zpXG=#iXChI{j-U@kiPlQkD_0sQ*RP%qoedNwW*C`JP4npfB`6&hZ99Cf#Hb+?$##x zB?lCvmuL~)O22u5LO2AtSTJzpAbam>U|x76CJ65x(eq!xcXI(eI>qzx0JiZWdYGdP zWYP-=xiXflzX8bJd|+@A+K3c5VEFIcBagumW=!y-C(?YF2OMm2LN8c1bhbxGoAm1;P+U~`Xt!=W z0;f7e=s#)G-A&TQ9d5>nk2LlwMe;Q)ON*Q~r0GS&bf? zy*d?S{{wb^1g%GFwu)XT@HN}xKZKItKSX%qvGp&)wcexd6*7#a(E;H^%jwsUtD_er z8qV>(c-ACF{KZqq+Uv`us1PZ>$E*aSXa^@3&P2e8@q=sYr63rS^R@H*_l5 z=fUFICH?p==fRY`c#-I{?j^Z^&r0fXlg`0JkuE7R-yr2wA(2j5K>f3<5jspsAP}y% zw;3iHoQWzYwpNHuCf+XABV;d;_!+9XaXqKan9}lbj!Bn<;*q;|LGSuIdaxpqS=z-t zUJEFXZG0M%Tl#wBL<=o#DkBQPjnkmd(4fN~xHQp3 zxRw8S?f+N7@N2C~J?hA>>2AjVljCE-yz7(kF?#BCKIe(>{3EWr;UmH)mw8R%EAQ#{ z&&MssOUdVoY1iBP^Q)2`F0M;mwirL#C$Znl-?DIywrC$$<4gaG?w_MQ*f|0*kxXCD zbsGwg;ioeW&4lM;`y{<+)V`A1GGj=`f!z1?4xL-!s_PnhS`}*DxmW>?`KRP~E z`u|rJ-qxJK;4y-9aMQHPlc12}=kuT3r9WUY0=jdhgmo>=(Na0Az5(Hy`Bxvq`t{fO zh(Iy!!VAZj7jMSbYt5N4-;49-t-ogEp%Y8+-}{8i1S{W_1#?v>w=#2Maz?k}=8{fE zA&r>?wpv`ePEyjh(W}z*72WL(`5y?ohk+GAoT->YzictTqv2)*Jsb2f-#lFB0p9aH zl=YrFC*Xid-Okn+WAdI*Sbxhv1$K^@->0kTC{cyYdt3TyD!s8iUp)oRB!KrY`C=d9 z?#7XlVFB#00^z6j3j@Y5#)LkAv`WmR*-13-dXpQYZACFza`-8eqW1V5dL=VCj%%&@ zkXVbzx4(In^ivIwJ`eX1nI2A$dmp%j1>gixa4aRS-~{4R$;uk3ru75!+SXT_^R!_F zvF**IRUDH{50mk?`YM-X0s# zkLQv3&+W7g?=s#5_2UxPt;$gW#+X13J)W;P2|!{24rj`j$}rjJnEnrk?)t^Tsm~X) z4p2k#28DI^2HD0C=6gnvUOlF?i9xO3bs9Xm<-!eqC&w1hPZd$9k$a%i53YCe1?@2W z%+#*YiozuWQ>aJ=_Xxb9H(5~aMIL3_lN%>DUw}Bp+!V5;5{+0s9HqJl*hBD9&j}+y zm}h!DpGMOb`x>0##~J@Y7lmC*=4N3q5xEqTHpH)@`y!XC5pd!mU%6S?Hv$`!BGkBt zup_5k^|?%g30Ww|PDHmKJ-@jxJNe@m7t2b0ixyx6x`ZwoG%L=q0=&pzs!0f;F`~?K zyhWThyEUhBPGFMrl358`!@2pcgDSp%nll6L3nKJd7iK8QAvY@$Pwo?WU8s7pUYaXi z5)42QPppsb5~b7W*v@Oi3)CR9_PWXJWT&nuzZ1m}a#Ck+*iL_4@^B8ZPj#D~2bvM1 zAxW5X3tywyJTa~N<06vuG4FJTy`14k-c|v2Ei|~wlJ|8c#y%sfxjfWiST>(vMX7Z{ z@$b(8D{D`kK)XLq$9=oT=Fht0%z+#z3hzRo@cv;8o)iI;X0XLRjjUNlbcfP^ARtIO zy2uXp$F#|)7Umf5>!=XmaVOossCHJB#(y5zD$=oPvW~aCnfB!}^;nhX3;w=StTATI z$$)~4*$OaI0%uU&Ho0l`VpsR3$mQrCxC6*`DDP!v z7y0WUgF7?1VCcpK^@}6BnfASxjSg?!95Gd`UUWXv!TwBp27#`z@H zfbBBByi0InJx}%%ap%QIK9A$wL2+~U#<*Gpa>QmZyJ_-N;*MY zU=}xO*~w3tLHw>h{TV9Vvy^i=cJ;clWS5lv)9{b?;@=j5!L!&NMWlMD9sBI;Z8+Cu zkLIAabU(-~inMm6Q&%m4w%b#&(u>4#GBzi4MPikEA^z&IXEsru%)K<=&j$U@Z(f2T{osyLvuWFWK{iG# z;mVSyHL;0`;Jx*7rFh`TXQ2%BFE^G|qeW~6c;!t)q!sPjiDNESUwLhFtH;@NdJheL z#wV-aB}=rQf4b8x`jq;*HC%#^?P>h3CbD9zAR1|xK6}TTnl&~!$z~rAm&FZ%xj%Iw z@u~NiHv~uCXKV3L^9YSZ8Uc1`;_erD?e+K6ty;MvL-tMx2q}jR6jbaYnUsNfM2R_N zdzSp^_h895X>gF{NyH81lU{a6JR7*@#Le{elBPYn>n3aoJE9;1M~#Qr-e4q+gtBz; z+3=6!eSU#>t!Q26_r`X%V-aJD-`G8CF29(WW{3j8`Rt9W#ynp$w}mWbAicaa`bzY( z){lb3rse@Pc&1MXjL1uh8v93;3Pf~)n-fP<26|@?Rm85P(%7QadGd>O_94rOh8+?c zjoX+R0<}9Bfpd#+JPb~wJcT#RQPrtE{O3!JL-TN^^-|zGxm{B?-qcz)mCLxww z1HxQbL3d4Lel|}2o`2v9L=5CvtyL`7^`Doc3zqwYd9PC5qTX#GJ#gYhZk`y|W(}^~ zuXmwaf#47X-ElrnEaBh41K-4#js%IG9&4D<$PNiUhfyo$r0 z#2FRFC>k@v^wF)Kuu=~LeO==G~MbLUa#TVBLj zS|i;75vQVZ*HRN}U8d$A>Q$(^Fg#i2>sAx&#l{H|eRQ+uWENXW1vFC{jJ5sX5*jgNA(p2>Q4F^Y35p-rrE?5mLqH(F_vA5fEGo?%$~5CR@Ij zeEjUn@*E|PUm-nrc~a7-3oe#neQocsI!0HfgIE8yXH=7Qto;;NP5vVegy@WEpS^|(Fq?*a0{0J(SZn8|bfc79U8LXud}m~?;!kYV z>Je$+-~U}-vdRX^!w0gAiJ_&Aa|eTatu z`L>w>i8f$lTwf7I$DrC*re&c3M7@Os; z9w71ZD;`8bk<|CVX46x$R4V5{zXNP9(4q;r@K@6B9+lOR$AD`QIc00(cZnF(*?fps zj&Vu-Sz@rGUiY$}eby#I%=_&sP?6GfunGN=58%+OxPf>fif>Y{YJR;|;Mbph3wWzE zK1r~}LcF37HHKs+M z56`pkVYcx?!)gTYZ81~G4Ml;x*ghSsEv}y^uis-hxTZp-)gLnndnRd;5mQZ!UNN`K z)GM12$>3*p@rZjPb*a0opH1 zM)Y^SBrmR4eJ`%H2nl<--B3YT{`^)lm`=3=OPkwHl3rP+jVgGwaIhX4JxzYOa{!-| z>;Yx*XD}2$*B~EKV}@Wf(TM9spsd;f&A`k^rIKi3env?1#9~xQr>*9aAPfRAPhsYe zOZi={SEqjIX9X*qvheno;mZM(W&9%m`{)^$@xBGze0y;{(+=-4Yu>5XYP z%;pDfo(PjRhqu&5$hc5*>A``B!hI|Ed~d24iPu*$%R?{9R5n z6K-k-t%ET^rm>;EZMPXd=JvE1LiB4UfYWQWV=?%BvEeh9GjV9zn;}4p$(woD(jJ;D zKe(6d0(W}kQ~YZEJYc|NF3lvBg;RQt49y1R1ljCDtgt~uoOp&&S?uv!LHwD_wgw4{ zw}CBguO!@IQ+Bv1HdF#&^${Oe2blCD&RAx5xUj9KbX~Qk?9OvWec}&AZ9N(4Z%tgZ zL02lA%i>8ltzihp*YGqTDfxxXd_v$f@1qwACgK9O)k&}&_T#Eav?)dB_ESQaWDfW^ zV+g#4{S3Gnmhdn`=Tji_4NNm0WoT=mjce^xwMH>r+3H62@JCA}HcODjY9d$|1z2TM z`$*xR5(~X1+iOOvf&g9H6fpT1kh1kWno~)(C)6b4(;p()v(#z6{y4J3HZ{9ZUURq^ zLa#^49@3W}^>W`HxGJG7WD^OdUG+<5-ylvI-vsfdwfDZkEi9Xf*uiro|y8S{|UKgT(Q36btQ z680i_7%fC=?eN)zvCEpBc+!CbE25g4i^4+f6W1D8l89qxoh6fYsiAyK+;y*w8v!en z-^Z`&bs$EDVFdPfO7PG_&Ov%BU+M|iQKHrhC}CE>UQ!4kT{yQw_8doS*>z4lZDgnj zJ=e44-^vE9-k5Fl5c7SEza*Ax%?i^0QUOl|2d3Pp6`#z?f;7O#CQkE|?9vyZ4E+vrRh}D1IL5up2N1;Z!ZQqFvT%&F_%=@U_3D^U4jiWde%n5u z2jg|mU0DerR_7br(bLChI(jy)t_t;U>^k|WePZcz-Z?h#VF0PUI zQT6LX;`SR6I_Eou+Jb+8$UzWlcsV#QpQQwj%Rxg*hILx_P<@*R*VLRk$SH=%(d7xz zPt(^zdF=2oU?S2$hbqxn{k3XXU@#fT`1ep~ zm~ZT;bFGR4Eb5lI`~=ReI|R-k^UmI)7|eweL_ZNCgN-Si-<5@CE_De=bI68hf-hH4 z8S=a;H1q^WO;kWUEJ*!-tZY{+GjpK zuv;#siG7Jmk=atPa3w{~MRS_IE53_}mB#_-$=QR4{X3L7f2LhS?R2$WJAqDlKngZY zaaIeYQO83^uwgO8-20jpNZ0%%N!17yweqF)v;@r3gqx$XDArhhLbIerJayP^?&|9w zY54erSQ4S$m>oSb97U0k9?hQO_f=Q#p&-q z*Iu^X4mnVdIZ!E@B(bC*d&H|I<+Xd#G*TlxsH>9vmei0bjcg=}s!bnWdzEPXLJF$! zmWsGEGBHVvxThV+CU<^@%%!IDj5g9XapfDTmH{0!+EbXV*Ds^2E7&FaE$!WlOe)8# z%xiPvY1FooNQ{;>I$C;AVHc`RWU}{ht@=%E{Cpgf7gE_|bP#o?uBwyFtjd-HFD6J} z^xVr-8#9+R{78-TgG0t>CZOXPX;0i9A0H2qd&a4p14i|LBAu6_-K{y#zNYPvNWd+D z=s(H_(;H<2TV0vEJfL;)*eIFt8m}wBAr#nMx1zM42_p^xD{YAtoiOlCQJ9K2z`Sid z5;nszS>9XFS=wNfKk99Z8PY^rtDUWtd#(z$Xr2ABho4(63|=NbJLZe}Tl6{huFiUw zp6+pXW|3lyUw1Qjm8VeyByI+^N}U{8M*M|{aspLfe2Y(>gv!gSwIlu59iKu}KJKI< z6Aq9RYHlTB2@Tk~D0tyd5Rtcv{7yToFWR=}ja951#-h>G33{(HVG9k_cq4Z99*Yv} z46I~-gzab{(i;=|cJrcrO*k{i;3Y$r@Z0vHEGd{Wiq_cs-rmvBe1hYpai=73Ij8H5 z3Xr_<9%5C2?N9JuY)&ZO8E%_AcTmw0^4@mbx}#`s-zxB;5&BaGi!8LklpTVN-MX88 zr(IrjgZ6l7$GaR+I$(-ieyFh$XuhC4R#K9`L)q~!L#eSR)v z2tFXFW^;%@oh1U5rqE%Qm3VuoI;ov;1*)T)&*TYoZr5!BJPVYB*{1MHR{PJ|Ig2n= zAOp{&^+Mh*3`e}vtp+IQ3*!W&wzAy|_C!*$v#JMygrD(6&?U4+qS+5rd%HNVFm@bB zJM@VCW!k2vFvVI-)XecW{aI7oqyVOoNn~zL&Yb}EK+I@!TIXrptt%W8(SCSyw6XQQ z!SY#2GJc4l`PSyFSgQ=LJWdpTu&Yi1*D-IQ@0`P`m|&uesjqUc@X0bEL}bpTvnSy4S-B z%dAEZOIlha8+a$_W@wHKYn{x=>-X`%^|S;kZZX=g)#?lyc3AAN(|yKpg4F}=Iz>0T zYc6-Kp^fWEk#M*!--2l&qhTN~j=;`1BBHe{es6W<*JkJUlE3BLF0Y=GtFK!p6}~dJ zN3ySmsh;=4lG}8hPKy4)YhwrX2)vS21d+1>`?Zv@Rq-e4AD=D(oyxvvh>Q#Qq+wK; zrfCv^(-AyC!p%yuv6iQ;)otDrUkp=ae3v3C$2k%Zyz9L-J^h-5&Oi#D*2FdCfmhev zD#m((JXvmLZU@G+l>j98s@UM!k3t~-v-vn06MJC<({)p#(aw*~(c_?Q5-^1Tl%x~62pMuEY z(mtwqTco@&N{iW~xm>HEq&gRGyw!v;O{gDH!|N2CdPs<{&8GxlhSNvUD793blrchc zeu0Dqv2eSsxTP{2Hzed0qRP_dyTY!PM(v_DN3s`TPK15FbGz)XWl=`G1 zf7^@Mj%7@fx@JdNJhQbv)}4m3==26S)Av^kBbpn^>aaqD#*w z{JM+@$R+P~1bxt`LZ@CzB+CRxai-M^6&o|a1Jjt@!mlnp{~!*haCibyy(CanWR72v zVr9$pXAj^EBE<%CF@ zPBC(*8!;G2R@11{g8HU{LB$k6TdQ78?%Z3q022sHNj(^v8Nsx&x=%kiT4O2NTWw>m zmOqLn^i5WkdK4QrXbw#CR($O?{j>Ihpc#vT8Ba-kt7J8~@V)HP{W{t*^5tsDLrdDq z=5)-FQ$lY^FY}%LL`UViSnQO@rN4(+p)(s!?I$egiKkJuSOZib2k|Hc8d2k#LfG8p z^}8LRG!^$NGIsNPl0YrWZqSkg?7spoo_(jUpiZ)&Q95*iUh7}zhwvTbG#f)j34|hY zs;i0RtKg^W9sI4dj!#G?*eWR|!&@}9f0!4m>zgisfVYOl3kzf3%S4t}>xOtfUim%d zaXMkTjwo+QnHT1YOw?9el$0f%G5~Wq>l;k6dC6mTFJ8FfnRzN@RRytXde*mL;bdf* zDS6MOIO~7?mvOPerK~(^>v4o}idJspJGcLE-IF#F;i5I_>o-+|y!neHo{6Y`!wG^2 z|I!r~aGPR;NH?gIvz7)a{t{nXvwrHQCHl*M7k$+Mvy{5adDBd&dMctWTuK2={!^a2 s1p$GCnc_(Z#Eg)<=QUEncV8vvc@cV7r3du>eRvO%037810g6-g9RL6T literal 24336 zcmV*8KykkxiwFR_c2#Hs|Lwh7lj1tEHhlm7irH5?Vj?;=7+ABIh<(8sQ)M$8jeUph zYB1QqDvox%KmN;q` z+QexTqw%P_a)ZId4u&}=EJtpm7=HB1ffE!r@d3-8-`wywEfbk=5IK@J&0r zsd;UG^DlMLEjaYWIm>8$hnG*CKwpk><EykPn`}EVnm{(+~ZE8#zODcQN$gLm68suAPvM(rx;Cs|PPwxW4HZBltjQ zD%0;yv*o*i@4I2EI2vXC_y5(6>@1ga@Appsz0+^q=?l8kO{2AQ@y}j)YZ>m;ZTO1S zEAQ-J;K3dYZROyrv!zE~=#F&X@nEBYm$i-J+%opK(-%&&v(X!WZfwKfj(cbYsCTt< z!)TOi`A!aAxX}HP;?MDAPB4t9*4F=2+SNf;kIaG@sG(l%Pymnws?WX?Qg#0!kzfH*BXhM$m zRN!x&<}fqLb^VWK2RCKL8cj2}+lR_@>Q>4*xZr*MqvR zH}t7FJ8w$J3yD2x)HCi#!xztNCA+s9!;EFnX?={!bR^2q8|8H09JOZSdT|En2N=H0 z8brpQAuqIzVdNw~OjphjB@J8g&mM;R!^!5qVKl!@oo1OJ#>I zwVU$^Om&4~a+JXW7X~)LCf+IVb|t@6D^nX|@Atrk(YB^`!`(qRO?J&Ys?#vD95=fe zr>Xt%9v0!}&el>6I}-+BDMbj)3v`u11-2l+&3jbLx{WO4!o@7)!v24Z;@&a}({XRU z8Z`=z?JfV(y!@os7_LEgQSssL_+lSq+9iudUN{JbuuEE7OaRoH4v96&!=Hja8e@a( z=at!8{zYCouLmIKP_1@CwTUlk;hu-*b2VagF^6|mBfNlb$= zC^%ik(xKg-MDQguQ05GL5s2gf&keWmJWPEU;)tnn5T-CrO(E7h&oYKE105_JjAxOH+tIxs`b3K}9T7hYVE<6>K7TapZu~kHM(Cd!?nJz}^lE3%gPD7z(n0 zsAo>owE~(i_pLF_xXp6Z_Ws;mRi)ma{{yqyfLVjtwPtit`q>?PDl{~^wg4lwu*&e6`rfIOVP;wUeRa&H45nyhf+gt;=3BMiU5@cZI0f(>@RB*Tg{V?P z7-5H4Ce(y@A^!p=m4Q`9actuh8KwuW(*h#^za&<}L`#j+Zvsk!}$_qXPN4=leiaGf(idp;;jL0owdGchmHf%Lh zA#MVcU0>QkaUILhEu{77QB|UD^o%~8SEUa4nW=9ROAU)so8pVn=t2YmMxq`&DYrQc z(eZ!*t-{utBYa?jjE1nS5R(`iwZGno^D2}raGj1<%4lAhS#L1Hg&9tp%McvVQmwl_ z8c0X!PQe{uc(H_i8ln|PnQ8rbUJLiAMi{OTClAm0ppT%FOpT^~>{H??G*VLlMMeb0 z42I}RVgD90Ru7S=8)8)E}YBbJ~zzTg0m@aDttBgK< zGd2!BI{w&v4al@m7ZIKTM+FA7xCP@GSV1vFCxw5aMaCCH;s$z)5kr3RR^J@?7HS+~ z2g7{Js0V>&v%7bqz%5n6Dt-o0`M z7AOmTn-~X>;~!vn*YWRK=-|PhY%!08y&uca$imk0GoS_-l9g@ZBeE;lt5YjCoZ@PO zzM;^kW^lltlb{J@3zlb#FJkYYF*eaBFqFCZ1fz&?fVPqQ5p=1rNOc;@HOnFF`;`U$ z1JwsY75oyPc)w9=!Cn{(6^9Kj5kofI5dkbnh4>lB^>ut9%5#k$25$%pm{=KS1jeF~ zj1Z>^0TBk1%m~uQ@n$=6r};*0_QetRBSGM2_I#Q-M2oscbMiQ_WgT+6i8d>RI)RIL z5(EM1;XBAW6U!E713%l-*ajYW*t#&o5~paf#SXgP!PfP<__-5l z6)Y?HnHbt`(l6alx~P|xa+`ZSwUS{!q-kv&mVl5+QWIO29O&cPs(}2eDwwe>_#T?d zAs)_hYWl(j_y@=?!Oo$xhr9&k1wSKT`h8&?UFB0>?0xSY{-r{!V%3Y7)c7Cpa;>i( z&WodV(5oe)9dQKvn2Ma!4jMT;s3PaIBlgRWsmQT*)UxgVU>z?C-8RCGIq3ab9^nC4 zs5L=E=J8@w%1!)Yh+;_+{@#oK9Vg|~jbhoPSB5+T!?sRr2wce4A+iOPg1rjKHY7uk z^=q@iF%B_mtw#!?DRjTn5uzT!FYtj!jH+l)sT=K4%q3+-AQhWuV#FkLkJM*Q)aTr; zAuklS&a=BFnKFlXpW8LJM6jf8j7T^v8eQ?A9)aaXQEUl*6Hv6<9_dsMN0FJ=`-7Z5 z^L@Qx=Je|1VD{C5UR6VsmpOB%W`C~eN_VbR&Ae50BC~G=ZKHG0jMm(Y*8ropc7Yl0yF53Um5}J}91nk8$l8s4JQ9B^yz)B)B^lH3NsN(x z4o#Jda0d&H;gu(kK`>b_3MIJWCq%jt#5ogt0SS18jyNGblz99TzcLPkHY8)IWFVEy z9NHBw*DQpb#CogsHNI-1Hse1lWRw^wP9P$Ra7p7=#w$D1A~UPSD{Crhq*vXURV^u| zG02!o-8&yE#$QdoBF^wZiD=R=NylFB8kpqKM=melWJ#WhU-`YazdWHJ8(BhO*6Mcy zJ)#_scE1y9vsytj)SPM53i`~Q$FIc6ouzt|mhtFHNfZf=3iLEG4cCtqi#|hgv~E^h z@k*+iE4=Q=q1J_&RLcKKghHQq;{MBh)MAJI2l=>Ov|?TO#I3dISQ{3zV0*y zPG)lY%2XnYu=s4A9-$z!qr7&aMT3yNUL0j5(Jh|xR?Z_M*U~#C%iFM#^Mavy?t~?F z%(&fI>=IMEv~bwOo0(I&J-A$FWl7|jUaq`Wb#7WN_DYb29%=h?N^$RUu?_hclyz1x zY{gTn_RMm{uRL9v$CoR9%8c9I5_*efq!OLxOc zSM@Rm*-CcTrYmt>hqT-qt-b5@=ypc7=DAbV$&=#pLR55T`) zut+iJ#Vt|kCFm#uqwJL_eDwUNW5BC&9W{?%c@r`^NiRjYc&h5Q-3jiRtTuW)5x6yMJ6@%o9a^x|L! zg~17Em$+x_l@CzRF0s$Jal4;{c8Pz+WcdIM?Ggixxw8j|XqPx>%=bJ%MZ3g8i(mN& z8SN4ejmh#wWnA7#+B%P z%GxEi8uyU*lh!Wr)%Ymyr>$LLtg#h;f?AU1tZ}`(pSpI5wRRj|-bG%!#9K?;+1>QD zOUyN<>JJduE^*h`C?BA(U1G0sY~y|s+a>-QbGlD5E<^?!ALac-wo4qgBqq9>%65sx z#x2qPWVTB@wlg{8_YO9DKf>WAFj1Z8eV))wvEV+UZ=yZpprx|V?RR7NyRHFnD z5vo5|B@vN>&%cq6hVBS)XHd;3?nk*&S|ea96lTZaf2?&R=Qb|Gf06FIN zN3v`&ZJ}O-72f!?^ihhmK7G9~ao8>p#GTp!=~Mh!6A#Utb(~J zi93aItzg1i7pze%*A2w-Q^+GTCXwx?SXrFXqbl`p-IDq5a~14LLT^ifV26wL2<2?pD@P{XZz`G!hfHqDty9H`Cj0`M9BYtvc5jIJ-%7aKNep< z{P_`#y#>zf=JzKbx%oTRADesLkB?t}H+|PP&pq=%tQxT{xS+L4hK7g%EaG?qKWM+* z#?*j#0Ry8yFfd~A!aJup`%LZOAPF+F*cCuQv1Lj^UXfc-@vd4(9l8pi~ zJ|W7)S7}Heg=9j65l--oX{4EkQZN){?5mf@Lqt-B%BWu8nTkm-Lr$0s% zNEsenV~7dpcM!nF#4-%d?3h!=q97!?SR!&%g>5kmqC?V5y7_ z9!`Saw*tMP?cuq7yWg7W4KuIxhYW1e8u3O{T7Mx+xS;Pf)vGt74XlsHc;_M?P%V7~=+UXb!oUoD*GV7pF zq@YCELR{G{(bQI3PNbnm?GktQy^p2-&PJ;`8?8Si-hcVg9I)29uvPGh}Z&3&aM8Fkzp# zJ)S*QIAV$LBI*<~rzd&tthS}cxM)!mlrAoBQOlK>84fm~QTslkdo-25LaH|enX2q} zC=^@t=jjBM<5xUS^*9?^p*=x9n%(b?;{QQyV%CQQD{J@YUO!1j7weM@JgD!{p?-j+ zK_`q~f%FwF%gX*ZyYz>BGK^rjhz(#$iNG#q{f_`KezqL+#0$?9nM^_E5ScLA1q`LT zf>h80Rp)-397xiqmNF1rRGjT3>WwMi97aDy4f#IG7Aun#sZhAp<%Dfuj#v)1+;77~ zEhDovjqd!G5G(yTUX>sp_o#=gXDtNbhegD-(yL>+3VD>{ujqIp46 zbWc;Pzy!r=_bnL4fJYsSs2v9%B4hk7qeR0K;r6Q)HFh_Lm<}fR6sU^w)TdfQg)QL1&f^pnEpk1rU!UC|Pm%C`HmPPx zl$cnW7GZISrKD$1t+pEkG+W?lLdsOcOG-w}WQhY$e!Y(a<#eWd+QluX zZ|$HMvxrHoZZqT7OTtmrYhB6A5_}pnTiE-)-Rw?9(~%Aa7hiw;N~TsoeNLEd+)m!A z(wB6ZEl&a|orY4qeR3EhWwG$+F-$n_h)10?8|4{aEr2=*P;BFmf9ZdiB|87{D|%Wx zaV$(=;)$LIDQCDqz0iw^hJP0D5Yxa`tG z^``GoA!LStc;D7z~G^(~w9`0$!{!^f{}(nxmWriNNYLgSW^-ngx= z1>YQ37yUD+t#QbhoiBrz-MP=P74Kb+8@2x6=HLaR7QP{06TF}xPESsto1zm(E98~D zdm|6V&3S=QW7dq5-ls{9WS15i zmnDIa_m9XfEi{f_J}$eo(6}X1@jRO^lW`+kMQY>b#fRQvp$VwLqq0j2?OFhHRd(vd z2!uy&kIODCG`@T8lAUlL!0CqNFy3>KulTv7!goXDt46nMSf=VvM!w>Phk2#BE;zWM5&45HHzN$!{;-i3O zLU#5FXUiOhz;k=d**Yf-!<5 zOUjmt)$f6rNXqZY7X@pf{82FL5b@O}O_t{+I52L~uZa4QcK0S_+1ZP5;M45v*fHSA za0!iDM|R`pl~r=gP9nG@8psZpEc_#Z@rl0%D!I1DugcB5s%$*kcnwr?OiH^=Zb95= zLcT&*X}UUO7@Vsg{s6W&#x&M;zs4F{+fc}`SAZ~ z95X3D4fZ~2lmpWDl)IIJ06@Tg2L zAC%iUfhCyR?$I=#c6G9Ljwc`8tCN+V%Su`dUGS>ODWu)2B4KXazH+XnoP2dLFu~T9 zkt?I4$jR?0$YJgW3tl{jd=0Y%g~k!LQbiIaIfD@RTE;N3;|2eox{M>QgbF=L9D^-48oOupz5{1T9 z{dU>Wn>;RW)a$9#L(qwEoZw13qq`15@MzH1R>o465%?y#MrSt{vy~Z zJ<6LQ6Z7ivYUU_j1Uscq$GxD}#?Rqc>kk}+_`w`&uUvM?#gp~*JuH(SMnlYLJ=tl9 z@18^_4V_T_lZFz)C07nZNCKDm-6gqlOFp>djsp=t4KA6D8})EJ(#hqq<`>7RFMG&` z3$aNKd1>F?eXN@EHeQoWoU8YwYh!q=V^fnaqsaXV83o=?H}K?XshPtLvWx@sf+98( z8nc%#C}OLP176z6M)@k1PP#+%1F}1JID%QC7qN6wA?w|;Yl7K5lm7p7zIv`LJzt$% zUwuRRdhRU#lh`_2o->ZNU486WHNRfzHQB^z=W@}};zg(OF?I6KWD!4xsiQ)GVi`4w zaP_=MZ;}*OZ{R&sMc!3SCZ^6L`ps8=ETd>we}Rmm>?jlY;F7#9JzR2pZ@hd3*opEB zbVFPWqwx!s&O_;ohFkfH zhV8!ejhP;3(Eqom_yxTpdx}4VY-@cNqkZCmdbRm>fVk0BRECrH8?OTT@tx{0WV3&h$#FDjhXb7r4&|HfCuRc8Tk(5 zw6C0xkmfMXW^5dM>;-N=Q8?t6n!Lv03eA1B>TBA(Ub?sRxkgvqUV0LpfUWK2Z}qA+ zbfOu~!iw+DZWUQGp%=;V(w*7&$kCI&0NaSKlTR@wt7@`$mh`^nu3wn=GtT$= z1=k?A&hPB({_G6oHjut;?$pLhOWp%_Z7Z#*-Eeod(%o7kHzJn|4>QYgvkPCL{qY_a z;pfiQQVu&asIM)hxWk%;o6|r5MFxVuMJ*dxNGV}XYJ-Pjtv+DEPudh6=?QbDj{S4m5(RIdGewX*!i!;a|q?YyK z3?>$PJIuOs>=%BkdI#{E8#sU z{Zc{tt{@VU(zJeUFaL#F9q`HaXt)J)Qn+WciiDLHff2kuek8-gGbK3=kiDi27YZ+l zFS!i6M!|OixA3U3OFHWR>s4nW`NV6X{Cx&{Mgc}Rmk9koXhNw;(rXz42zNnPGV(TR zhf5_V-e>SNAcSDm?{7RX+`vS0)|s-?PiKN3Zn=y6CtptHl{Jz*c=8UQ*&QXxZ|=@a=p^#RgDPJ?(F``$;lB8Nf7HV+H%Q&SeHB27yOPZZF(LxbV8VQ zS#~wqdp0hH%GjuHWz5dhQ2y+!V`QfvL2)Z%c6Qr!-zZJnMKNMT*)lLTQiHF$<2Z!#K>lCaBE*3Bl;=X zS$J>#;wNNhjbb1lHw*S~UiuRX;BXHaJCvyNPmSDJE_p+TT6Tr!CZ%iWa5#5oxoj5@ z))%gf7Z8H%YVyN)OLL^j$R~+vpcKIg^Tzr6ci1`maFZ-CCp8BAsE# zaE%Sf?+c^HHsp(%cOMzITTgG5Lt9z-X09XO`EhW*6X_d`NZ7#pfH$2#L*`|A`J%cR z3hcPG%)=z;zvuk;fn9Gd@ijJncR7wDWFG#%CNsNXk6)MF`Rsy4!7s|pL=*hLX(M#9 zV}p`aL`WsU)x%f8qL+@OUxh_G@}I?@I8E?3fe(&E!sDu-hqs-}Pm0-vS2>HMpzxI* zhVb{5X|&dsjF4#N-b~|XK%&F7Wne>ee|9Td^hYypJ6ZV|wg#)txBMulJNO0eG_(~{rlT3U{6{mq z@_KAo5?+1_3ik`#l71~h{&~HZL3TH17re3j^8_j;v?P_fe#3lFjNq9NBISjA9sEFs ziu|{^K$g3+gW_gvXi_>|`HyCl=R{_W;@*adxDvK{t!3L4>2Jol@v@B{bmKg1-h%vB zs4?~}tLeUoxqOT7xhgyJ>|t<*tG|f3e9Y9jTXwLs7T7{XM)tCK_}zry5lHm0Tho6m zBr4<43{&Ex8O!*U#?|IFS@*3m&A?YgZST+B)do(zKmR8Q1~;P|-tpX;S^ezJfsvTT zeD!w6eH0MC!_S1iS=cZHdLYwpCuEZXDgYMwaj$XS&-no#PaN54~N zD=wr263sfn;k&8$ymu3P4AfYRCgulJB%~%fM^VOv5ZYipGWO9cn^b}JlP-4as+8NP z7}#hj{x?w+wPA&}l9`wz2y8GzYj&4bubi`Ut+iDtZPcpDq7Ai{oqqI6p;lG1C?`;+ zHMy|<#wKB6cQDQ;5}SmziZY@9vw`gBzkk?{8u`ECSH4Nr`19n_d-)oFYG=r)w@v)6 z1m=_%d|hs#?6(EpdjIneB}6NaCVoGV5M4WfzwYiHA@=^=mq3C1UT|X|*UQ%=&f;Nj zm}q`kv7At%OQ32wts?t9DQX?UU2thAIOzz{XD_>#j1XNL<{g5(Vu+BG>o_;-HGIJ@ zS|wZ!RIZ$;k>oCSuiPIS?sjZgD^sbTAcsE%s;j+411!RFc7YXBWBIS6_~p8eOGi~&sU(VHxPTmXYzU~L~MAG$B&F=|JV{}}Zv z`femC{*%bRBmFIwYN%C9Qaas<>`r%9lCUw8e&JeFJg z1$WQnt!<$?k9pS&VuJ%I%n1bfX-{k7B8Y=?{FCc99HR&%PEa#p*^84ab zNbP<{`U_$XQ#pq>y?wD!K-g?>4>lWd$Vt4f>9v7B=c7Cqv|!NOkr7_*Ph^LegVxQRIDZWJr@$Cf5|e)#{ucSdcH?kN z+m@)aq`DWg4LLhYzDo!%gWP)3!^;vn3Xq#8yQ3I3roQt)R`ItxicqU+^4B4KE3kIX zqAL|7IxA)5heS=o-$-~t%&e6C6)uSRQ@&Uws=pGkOJb@ufg?LYsX=W}-N z=XIS6wN&RXDX*NwXSUdOQ^ICKv&)yW%hOEARs76Y!rB=q&M$D2*liHrkFNKC?Bu`H zgfJYtSNr?_#O$o!kv}_I+loH5D-zu#rXFtMX2%h`X! zcz4Jx{0-wh0OK7VIq>~sVZ68cW=5j1QA?q1JqkOg34a=XWJ84J1nyBY7YuyB!t*^I zAM>+wW%v^>63oEgTZ{L|+0bKvzVC$Do#kbP*Jd;0h9A9;T;DR*8*OH8APn2^jA8)) zRN+7P@ocdB4~QS+7r`=wrZOnPS3y8nU%mC__tR?XTRSMS{v}(}=z{U@JX$Sf-DZy3 z%v7$1tpzz-aUIRX7k=j@zdI_E+cp*JlB}1 zukJClVl84IfX=B+LO@$tBMn=@P6Q=GG-2h0<(y>b=Zf!ykG1 zQ18tr@4a&MSk3d3e)2lpNm(b@V>GgKo87$=EqCvVE_R4^78aFHP~5|xoAF5BfS!d` z_-F2Y#7DY;k#2;MZsovo)OxbFfz)LxW`n>y*^Aj?^u|{F1syEDz98nh+^$4#oT>AC2MZ>! zqiEyi4i;+}adkB#JFX6HOYxO^&ITOgL@zOL?3F3K)1NSK{7~&{pb}I#&L9)JRg`zn z+(^QY6TF5Ri(ZvS!RGo^J>B??O|PNGstKqgRRLDb$zOmSK=NWL{t=g59!GI4U6sU9 z)LXswHw1o!X9KZ?RO8d z#|p5~FVcf-UX#*MnDRS{nL`eJ>UboeAQe4U`vp1%SV>@Q8ij>5lI%>18nxwbNZU+c z0jVxthYdWJ{o-{Pt&G`OXD|O1YR(ySK$>E9=8<~Ek1b}uKWON<*?3LYaugU34M1+* z^IgkPUvTt*bW*N-5y$B8!ucN!+JlW5$o$1wyw6!y@Y;e7#oFTI(Q+Ae- zzhi(HSC1+CZ>Ln<}rOct; zSLMHyIe=GeEg7@3?4##-eqeU?g3Fj+no$gXfsCT36p!jY$qCr-RhJ20y2Spv?6TLQ zI<_HSbZ{|U%)Uu>t=U+{j3O(4M&XY1KP2oBJsad??-1Skv}L+9PtE=UHHumxK8{dd z%gDwX6n+6VUZWn4LkSPuDPMIrmf>24ucMMn9`Xre$5s83-kEpbox(TdYbe|2ItHZl zt{)6@GCB$~l>aJ$fr4kGCQ{WXX1_p+ji5+Exvv|qaM=4*{hoLwMU>&2 z$4pviUvR7+F?eEgjH|M{{fq^#>AvPRP!yWbrSppHmSe+R+6{j(ZhqZ@{Aa*;PELLp z@7|8aKMKZk^76xYVP=6&M_y|i$(HBN?vdBpMsoUs*gbN-x+6@0v>hmN!o8P~4&Nf`Vf$o);mF^`@s33e)OZ_hIwHKn0hM`?A&Jy%LcAgka54V;W zu=E{r$0lBi=NvhZnxfAjFJE*a#5xdfsfXo=2iC=qAXpd_HWFRKTd2n)ZQ_Ew#$jox z&CD%?SKGMFF8tlZzpvCvX3{y|+*hp$uY-|cyI1^n$Q@xvX@9(j#V>jYJItNwVK!nT zeK(2${Fm&C5gF-*jx-bMUS_ZN;7`)a75?tw-~ap{ShCG1rzcQ-Yi9McI|l+k)0lhT zM-p`mm`L9-pyk;9ff=_qZz?OMju(ZPDAAi9Z{?@lL#F zH!o&v8QY;0bm`lnsC9n_ zE5E@}%6RkQT#^6g#g1WRaK+_2<=*Z)5}$H!vB27si9uFqOHVb1q&pgeqRDu8abA%9 z@Z$Ln+p^resO`0i^fxcoTfS`+kBBMK+&L3z)tc;g&Unke9?39RXz(IZ0B%|)OWeCr zDSQP4#fbs0Aq5CWe;E?h4T@{KQOtpUE+k~*MWyIhxbFCVV#?o142VjE@Ebd4 z@oPV2A_jo9`UQvqm)R&kvGHeAQsi&^Nv>s(u-i81o&47wKhfQo$?`?)F%%fb++M>T zLxHj7c@cX|>zhoLud4OjNLaa&)k0)I0j%JIclZ1~FkZw{d`)5)kJG)3!&<70 zq9%Pt(F&YE!mh^PFr@El9Jn%`Nr`HT{AW_EMQdmGTK1scc-wL?u!o89=o%9^=~aB^rRks0q6-g?2XyQ555sJQbYM~1hw%D&jbC){&qHA zq|G$h*?0>VCffDSk)wXznG_IQNB(|pxVz>Oc3+6pto+><+~zREGZZpj!>xt7_bXf_ zaE*fe)4DIf-gV^fzTjCfatZI8iPXIOch1;Ki<~sVb7V3)-MRb}_>eM^3m|GoMo2U& zNeqcHS=uu7#?vBA{@!@a?lRr#LxyhKAUjX~Zrkaf;GwAUqV9pz04h2cH~Zmam^%bnV`EY5KWEuA$E$j6GZXxg{;V zGcryG)>pGy?WTTYoMiDl^_@k=JW07Bt0m5(6~VKiHPU3v&Z3F@+1Xu=Ivjq^BD<8?UKBVuZ)B4HD>Ov=^}QI0EyU8l)IzQNqv0Y1C(KMWW8- zT1K!$5_c|_v3Ax8rLUb$T+A+%VI~c!$U#`qzvx9KMd$KV0~`qtdxIQ*fy3TYGe;T- zT_pgefQVPrBa^)6+Fs7y1CMfIz-#KcxQWm0@e_D7FJBjU&^=TBF7Rtx(fRQoIgS%# zc;75(j}xd3&Vhm8uYmPj`%3ch6Uaa8K6shQKY0mJ1N}-HDOpg1l7x_`erFr0p8<)| zo|gACvlNcGT&o|gACvpfk|&-b*vV#Twv7H2Lsv+R)H z@iOuCo|adeSpFwaCn*4nRlT9e$xwAp$7r5dAuUBatR+ci& z$atJ!kp11s;%+!^p!~k#;;Bv+*-NRcOr0z)U(Fx^jMtz10x;gSySaW|`zXCXm%e>e zQU|y0fKh&29a5i_#{rJc$pHt2!u$T%zo%@DI=S;!Muo9cT00LL(wb0z`ar%m}t%#wKixg)6w&- zQScqv+qq+bSA%Ht%glpA^{?q8Djp}ewI;3qd!DD^QSZMGfhpQ*-<%b;Zf|zX7in0@yca~RvbBzKUrCP-k z>!nbu;$g({5^SMQ81%hzWGh);ZI*YA;?r}_je_syw271J?wkPo2A2aTC~h#=hR~O* zdZi3H=|^qf4$9WTeUXz?MBvzrI^FtL6*aUWz;js!NfMYc*@Ww{p-tBbJUu{ z3|3}u&*IBr;Lj+=R{QIXytdG=bMdvZ4Qu5r^o&X%=fPxbsk6}5;3-2` zmX)d0*LHJ0Ntlgu(iMX0%}PT=|lg?Suk58Gxd=BajaWp<`sHA3<>oyNUd+2Z{#r z12YSw43Yhm-M|K1xuA=-V#4zWRC}Gr>iW_&+$@M4^iG?ALCcs%YmGm*jLrl`kF}nh z?H#`gChj|dTYwE4+RDl|bG+NOTBoHT+*B~3DJ7kj_po;r^w}^fP$JMt_!{^*Y}Db* zZQ_J6wj=-reikADeeX0|yN`yy zF)@$^S?V)&GFJRGn&#L#p+(p5ggXd^ktK0O*@(uCI<7W7NK0T5wBE#sEkOga7MO}7 z%>Ye-<@$urKxEuyC@a{?Ihs<-V0#;rH(Zk8+{D+xGGXb`ZXDulI5BXRVWD-(z_6xc zBaaHSbSWyk{9* z7y;xHhRrO%8&V4fr-9J9O`$ zn_$dZ#HE2*$Wlv_pY)U>MD0F!%he}*rX81+aiwYJp9V(;)f(*Ku-wN?6%=G-qH79? z?!hvY=df%u(ldC0mdFZ7!~vs)NEyE}j7a4&WeM!Qb#lslOpz zqDu$e+oLT-GXsLs@iI8E$tANy<^udp!ve?Yf^y)WnTcT(A-~CjT^vk=R$buhAubB{ zNjIQI0he+KvLtRT2@Oc!5zsGoE7-9qZWKLgna~uo*b*yIL`X!rK-GN=6j>BZRXRER z)x`Y@P5=}LL7P~Wg@Yv!;f;_d+#x~{xa$+)tkiMDR-X|a1jhmX1ROzehLeVUzjJav zPH=;}>#e>yxyO`8ZlWH#?ptG;fmAALdw=e}GRN(mu$)}|P9HbL7&Hw(HC-6OVeTf?QVSOUN`{-D z{JApZIvo;`SU1;n&wuI*OrufiW|CKig$3+m4?N=nzYPOtC&bmyT-+s~bZb$TNc|cq z`ie(xvAOW}9wyR4m8qcN&6bdlgYgopPkC~RTOhA><3ODIaSY)*Bw%sE(uVz;jy47w zK+Z5yj5~>aq`+eb7uBF0;nTaLiLDToOiN8 zwZ5s~H6b93j7^qN%|Up}VtV2&Id6kcctS(_G70U~$|TgiJj6@+I9`Ikd-(S`?(1sm zkkh1}<56(@x(oaEi(oL7eehuL_9W!1?1PQ6*DAg@pX0EewD5SI!^+63oRs!Fhm|c= zl0!ZnP_syyTcR5|ti#Mjz?LDk$;6)qY#CC^mgfb4EyFvxsy`3dGNhIVFwXacaEAcJ-3jh4#kFX0ibIj&!6kTcr%+sq)8bkjiqqom?p)|Q-#2&W&Sd5} zXP>pUBp54`7kBO~ie+d1VoFKi|U|AXc7kdXZOK$bFPB_9T8i zABJ>bUg1h7+pq?q)3o+&#sALL=c!k^*ZjIy$JMdKvt;F770hed%p)|I7hr8fJIo88 z1^Ti9_ZrTq68_glUG~epUU`zHdN$(A*#oyv@Gt*n(|`GYoW8_g@C7UX?olB-R?CK)9k5`tgWkKa9!&)6zsP3^Rj6O*dZkI3s z8%&?%6<;1lN9NT5YLi_82$w)+VLygqmBeH^?L_J>X?CF)lMy>b?Gm zK^fr+fICGh8kzH7vc;tS)REd!QPRzjRKZ=h)m{lh&dL8QPq?4VnXV%<%)fE7>1Vc8 z?H!%>VEPz1pfw)C{_7Iu=L8V6EraUvwb6Rzphs;@3Yf7&52TmG<>R$|OV(@QM1m7O zxfgE$-Xe9E0bcP@9pW;;!tYnmvlt!s#i#OzeP zoER%rb)LDr-;6uxSwiwd@;Oga4vWPO3x;~A8>b9`NhpdSj>tPU$H_CS&tjY6koue`0EP!M4x-*z2LP!247%SME4U<0Z>zWD@aely*C|NcUGk3msf}V5T6NoDSi}}1 zJEN4{nleeK>E)B1#nRY6Ft34H+>WceXfZ&B|_0WhnbcT~2@?wGum)Oj>Gin#1%-W*`RkiVo~ z!f>X&Vg1J>E)ZKwe#Ty^!1%PFi%E{7yL&T|T10U%$HMz+?X`V%LP7S^cw7zZ`y#&@ zSb*q->Yv~HjZT*q?W6<|dG>l{57t4~t&9CO+%L8gmd{~ho!zr%CUIpfRQNdZDO6N) zirLT^*&*qgaY3D%i&O`iXMQrJt1~@e<>Top;PST|0#7F~MkkFhzRdCzEtwyD6FF#u zQRac4#+-jowu;|suED$%*K7XExr+x>n=gP^Dw64vjQAs(%<+yfP&Z5(8}Xw?c4n;P zyvbEd_%0e9RtCydk~+@I+kx72qJw+0i$E@}+Cm^!``Ltd57>3M;?Dds+;vxU+t{d7 z7FQRYwS!uNd7-@DIRrJC$>w)=H*;rlo(-@y(2yT0ldYJ%T0`DL7J@VGDJwMf5L>~8 z!iD6P0EZWcxWl9_{N9b#BU>@R;Gs`=PHMRAr;sa@BN4tuVfgaRV-WR*0j;dg@A*Ar zTd}eIgxMJc_C`?@Wyu1Q%8B%JnXoLchEjd}HdAf1;o*9&YfG^b)NdMnfbTR`!65A0 zQMC7DaC1Ap0|*8B(|tw`I?l#~@wIwAZ8}wC0Sq>KB0*wpB7YVrapwCtf!U2bfO(9l zYOGd`@L}Ndaz0qRffJc#ojJ5z39gFe!(pFD9hRi<)%)5*1E{9gKvyL3+cAGV3MT!xln25plo zq-Kx5L7PNzU)dfa|4jX1LowRD`wH!?5&*DI9lwQ?81~J1-Kd(5weNOmiwMlARU3Rw zk$|dFPjAF>KPWgGc$&=;LypJ3r^vh4G-V8pKrJ&l2`GwOqKf0xqmN|2EfGwV-=8}n z-e$#b>J0qV(RA?B`fVe#9|r%xMVgAEnJL@P<3?9r?}eL13%K>jk)3XOHIMtrDiE z$JOV+D7EGgI}24bL#SHwJiYO|6|-cPuP^(TPBr5x+v}>hP3oFQkRG3|9lxZZED$^0 zzea^z1TEY|6*ixU7hea-I$|ieS7YR(E6?O?c^QaK_XOoN@F_tuoBcK5s^~WKlE613 zb4u9sm8jy-B>3Nj%4p_)5K+HZ$k0ggPSV69eX`)Locq6`BNkq_o*h4ik}`J${-e;J z{MW@pz}bIwp+Txd%TV=ZoQE^WKW>fGe~7cCbmy3%5r(Yde{Fy6{SOBo^8T-uj92I@ z-oQk-=wE{7A431;O5yw|l;=X%n}#EEU7+bej+6CouHv#AxPODnBL0`i>#NVdL|H&p zRl?T}5{J&eg#3P4)cI5U=!~r|@Yt^2>;Lk_6{`Q&5DHwPrCV#9tQabhm8 zwKw7Kp}6Gu>0R7AEBoq3>JZY=)!b40kR7g|IpD<`${l$BL>AgPuO-?9$^~6ZxH{$1 zceq2)^&7zSDm2KvLus{uz`rdmuR;)?@EpCpdqaK^%1X%N(A=Okkg^XUhBtdrN zQ!(yGi=Du{bMfpt=O2V9w^T-q!W;{J_EUd2yRWDX91 z+tt9aQIrzpLn$)~o=A!^6IEONt@qJmIw$!vuE+0B?B2OgPsBAIx+2l?1prX6?_Olp zynk_G7?iXpXS=>c>uoL(tgpR0$w@rz zis~aM!V+z~GZdiM=RtokaJmcu1zZ+=s^H7Xe$|rdmA`071OL;K)~Qnb^!0nFf7EsS z>2;KbwAMU!JU}O3kG` zvn2Tjq8Osh8sZ@ht~1UtYo)5I+N!~}LddFK&Y>v(na%~z1)t60sFgKvaBg4v(G4Sg zBHf&6Gg2qL!}64VmhBc*o3yB16}7MjViu13=E_@Z{ZoU?d6!Q_v?hIGa+{{~jdDfD zSkRq)d6P31!mB%i;_mhof<}0f+(cq*4?)zhtbxEIshOpNlI)5f-{V{n+@1?|h8m|? zIM#Mb4INi4=*IKd)%X;{Ih<^muW(u;aD{J}w3d25OfU@j1Og+3fuT_ z^z?K`Oig6mEJnnFMO$6VOpi?6@#-9pdXt6q)X(sCGl*!v>{WFAe-8wdt+MOu>!bEPMZRwbTMf2&J`qSO=9GZPl7Zwdw5CN)2JIQa=#$^e+xSA^F^^XYrg6yc@@-);0)WU{7+%)J6#KerC!nNJbl}vjdY7C{sv!ZJcvcoe86cmi;Dre%= zR`aTcL|9>u!Q`W3!cI<(c0{c3I=iQw`VUMckV@%Q2UuN9gj z#TY~Xo)U$V7K+w6BTUH=fbCKZdwyBg;*ciwiz=js%l>_F)&X~er~@9E-eY`Gg)lJwrKR0ld z{H)>0L)3s`cE}Su>`pO{=~)gD8mNiNdcCtoH_VMy?kAcjM_apv^o68VEn0gqWy#bFW6r2(ioagq&v3o5i$Fsc^= z8W@sGrMfN@23v5!)g_nh+%0cq?io~(>guY3l=Qx=#PM#{Qc}H{DQjzReNwAc+}R6Z z*gl_GV#a?mDU3UjfW!Y)3GJmXcY-);eF!uhVvvw!@?H;MB~_%>a7OLXXQ%$qtKGj5p4{pKozSr4-5G@8wx^0or}9Gf z3Yf$44+nqaD(5{dQkW0yX*#U7eEC~=nK~v}ng#+2VT5>6ha%n%#s^zlHWiTfipTBH z^Z%}OLvQJ+3i#}lKf^VR#rCc8azk;Q@rjoB$J1RXsuvOSWG zWL~T52%GNb&QYefbB;FpoR)zaG6N`z)fnBP;#vA_3v>Cfq zXx}>ZZ~ikz=m##|_4V`py6s=v4-*v)s%h*M3A=ERW*DRnU?Pt%Yg>{c6CLYx&_W^0C5&cx>=s zwa$(?8e-uZDqY;YrwwoK8(oixs=%1cC{?Z*`3WH~pXO1)Pd(ht4od{DusD>k@_U$# z9|I~=ln`=DF)vGhGM&NGI1mlTf;^A#6V+W!OH2 zop$%uMpxXmkTW{cDc;0iwqVPoif7t)mWicaZrM71zv&VCAjRb}rhfAyI;DYvNfwOW zME^}PB8AuS?A6^#976Gm4Ug-Y_V2Uf$I(DvX*#JnB+NV&WGFyAxv&5T=8sgu&2fJt zzYx;F2@2H`kIjFIPz;QP|7PK=h@Tk`7Xh#CR@_$B9hfkY(aqC+^9S5*xJ*?*fL#vs zQ-PuVPyzCxu*9dzC{iU9-z2`0T`jyc?ScBdW!9K!4BoZGOh(31@${oOC(zNf1~5qM zFoW4Q?~&}oX#f>6&ZZN}3i2GGQQK8mo5KPUe1iG6 zmlcxwz6U7P2i4Hk36`aM84p<-FoK1U?IHbAII|{287}Un66+q^EvTfKdc|6zS6uNs z-u#dEd1&$kgEMGD5yeYArs5UNZF%&<;lhgygEjlKvPHTBlq9hKmv>kD-;rAT$qug*oj2YZe(%TXw z(lM$q2Q?euhG8A`VT$ZE`vG~Jh3`m!3C4MbkY*rW?*=Hr?oU2rNpiEd^A-f($0Z^rjQ(Q`K8c&1Fis zhpW|0ZaWBnD&Dj(ke7-Hip?cA&!9#)AGi7Z61N^z@CwhI43CuXbSu$q+JkIF0$4}u z;l0#%GTcbJuZ%SgwmV6sD@pj6V9D0QTy7OCO=)~qJ`L~^cHmcMbl+Id24)I|E7v&? zGab>losFaD>)My;UjU>e^7}0H-1^^LfmeOWXB?PGGk7`H6jN`=hr&yH`i4PF;XfL! zb+EiskY#bPGl(*9q;`-;eV_7KTZG={keAb+5tDR%Qzl0Fo;=$$<( zB(!oc5aUVK-V*QR*c8B9hC17vNJYd<>a@S|LoC8yHaIMd|pvfkFW$b^azD;;-=u$tY#B6-jlKbOs+xDZ{rF0RQS zc;&)!$Ftn>Y5C5oTN1I?BGyLc|t5GIJ8ecEdh>uYhfA% z3P?kd7?oEkuQ)i;Oo9O`lF8VsnYD>-$zcj_nm8t8Nx;E zY-QNAh27Vqdh4Cty2{x~5BLg!H*USdwlVtJpkiC!k5yr8=iKEGp!=lWg>A3Cw4 z#^5-zBzF9zcqsJpBeqjD9&fR%vTA-}>icKUcI|(P z<4gim7H5C|sv4<8x9xaa^CDNrgZtP5WdV9xx;?22&O}-*m5dR6Kt$1#>Qry3>h%yZ zGY_<-)6DSf7vHzfAS=g82LTqgX&fHrZ_Z18(|5ba9z~|BCh!sR*GZw3U+pz=~x)W`h+*UP)G!HF|FyA__uD4HiB z#B$Pnq6nA%fp7FN@`vIM2LO5MJL(~F$1+hlt&Mda+hL|4m#hR%Mw4CTVs))YX@ZQJ z*Bn@xLN9);f)=ITGUm6%?|&uD5)oP0?6Fr!G@J zB)Qt#NhnmDN|OM33*F3@$V^;*M@ZgHVsfF62qBqs>WPF~W(RY4u|4;X54*B&>4lyj zT5o{~xi}m!^U}6WlxTk;O}spyMJuO};u^Ccl=mMGdqsW?7!nS8r6%Hg|3ePrG~D>U zQqOmiuM1wN!*8|Q@kZW`AhMn|VD%~KX9_|7;b~1SPM?H)Fw?y-`aV>BMjjzOxGZ1^ uLM=uOQv&q)P7ZavtzE`tiW+uQxpL$_=S#nJ;XZ$c-=F$1`>_=c=6?VW+IW!w diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 32d3f199c85cf..5906a837e8713 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -2,7 +2,7 @@ import json import os -from typing import Annotated, Any, Literal, Optional, cast +from typing import Annotated, Any, Literal, Optional, Union, cast import openai import pytest @@ -14,22 +14,33 @@ HumanMessage, MessageLikeRepresentation, ) +from langchain_core.v1.messages import AIMessage as AIMessageV1 +from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 +from langchain_core.v1.messages import HumanMessage as HumanMessageV1 from pydantic import BaseModel from typing_extensions import TypedDict from langchain_openai import ChatOpenAI, custom_tool +from langchain_openai.v1 import ChatOpenAI as ChatOpenAIV1 MODEL_NAME = "gpt-4o-mini" -def _check_response(response: Optional[BaseMessage]) -> None: - assert isinstance(response, AIMessage) +def _check_response( + response: Optional[Union[BaseMessage, AIMessageV1]], output_version: str +) -> None: + if output_version == "v1": + assert isinstance(response, AIMessageV1) or isinstance( + response, AIMessageChunkV1 + ) + else: + assert isinstance(response, AIMessage) assert isinstance(response.content, list) for block in response.content: assert isinstance(block, dict) if block["type"] == "text": - assert isinstance(block["text"], str) - for annotation in block["annotations"]: + assert isinstance(block["text"], str) # type: ignore[typeddict-item] + for annotation in block["annotations"]: # type: ignore[typeddict-item] if annotation["type"] == "file_citation": assert all( key in annotation @@ -40,8 +51,16 @@ def _check_response(response: Optional[BaseMessage]) -> None: key in annotation for key in ["end_index", "start_index", "title", "type", "url"] ) - - text_content = response.text() + elif annotation["type"] == "citation": + assert all(key in annotation for key in ["title", "type"]) + if "url" in annotation: + assert "start_index" in annotation + assert "end_index" in annotation + + if output_version == "v1": + text_content = response.text + else: + text_content = response.text() # type: ignore[operator,misc] assert isinstance(text_content, str) assert text_content assert response.usage_metadata @@ -49,68 +68,74 @@ def _check_response(response: Optional[BaseMessage]) -> None: assert response.usage_metadata["output_tokens"] > 0 assert response.usage_metadata["total_tokens"] > 0 assert response.response_metadata["model_name"] - assert response.response_metadata["service_tier"] + assert response.response_metadata["service_tier"] # type: ignore[typeddict-item] +@pytest.mark.default_cassette("test_web_search.yaml.gz") @pytest.mark.vcr -def test_web_search() -> None: - llm = ChatOpenAI(model=MODEL_NAME, output_version="responses/v1") +@pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) +def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: + if output_version == "v1": + llm = ChatOpenAIV1(model=MODEL_NAME) + else: + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] first_response = llm.invoke( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(first_response) + _check_response(first_response, output_version) # Test streaming - full: Optional[BaseMessageChunk] = None - for chunk in llm.stream( - "What was a positive news story from today?", - tools=[{"type": "web_search_preview"}], - ): - assert isinstance(chunk, AIMessageChunk) - full = chunk if full is None else full + chunk - _check_response(full) + if isinstance(llm, ChatOpenAIV1): + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + else: + full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk + _check_response(full, output_version) # Use OpenAI's stateful API response = llm.invoke( "what about a negative one", tools=[{"type": "web_search_preview"}], - previous_response_id=first_response.response_metadata["id"], + previous_response_id=first_response.response_metadata["id"], # type: ignore[typeddict-item] ) - _check_response(response) + _check_response(response, output_version) # Manually pass in chat history response = llm.invoke( [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What was a positive news story from today?", - } - ], - }, + {"role": "user", "content": "What was a positive news story from today?"}, first_response, - { - "role": "user", - "content": [{"type": "text", "text": "what about a negative one"}], - }, + {"role": "user", "content": "what about a negative one"}, ], tools=[{"type": "web_search_preview"}], ) - _check_response(response) + _check_response(response, output_version) # Bind tool response = llm.bind_tools([{"type": "web_search_preview"}]).invoke( "What was a positive news story from today?" ) - _check_response(response) + _check_response(response, output_version) for msg in [first_response, full, response]: - assert isinstance(msg, AIMessage) + assert msg is not None block_types = [block["type"] for block in msg.content] # type: ignore[index] - assert block_types == ["web_search_call", "text"] + if output_version == "responses/v1": + assert block_types == ["web_search_call", "text"] + else: + assert block_types == ["web_search_call", "web_search_result", "text"] @pytest.mark.flaky(retries=3, delay=1) @@ -120,7 +145,7 @@ async def test_web_search_async() -> None: "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(response) + _check_response(response, "v0") assert response.response_metadata["status"] # Test streaming @@ -132,7 +157,7 @@ async def test_web_search_async() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full) + _check_response(full, "v0") for msg in [response, full]: assert msg.additional_kwargs["tool_outputs"] @@ -141,13 +166,15 @@ async def test_web_search_async() -> None: assert tool_output["type"] == "web_search_call" -@pytest.mark.flaky(retries=3, delay=1) -def test_function_calling() -> None: +@pytest.mark.default_cassette("test_function_calling.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_function_calling(output_version: Literal["v0", "responses/v1"]) -> None: def multiply(x: int, y: int) -> int: """return x * y""" return x * y - llm = ChatOpenAI(model=MODEL_NAME) + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4")) assert len(ai_msg.tool_calls) == 1 @@ -163,7 +190,33 @@ def multiply(x: int, y: int) -> int: assert set(full.tool_calls[0]["args"]) == {"x", "y"} response = bound_llm.invoke("What was a positive news story from today?") - _check_response(response) + _check_response(response, output_version) + + +@pytest.mark.default_cassette("test_function_calling.yaml.gz") +@pytest.mark.vcr +def test_function_calling_v1() -> None: + def multiply(x: int, y: int) -> int: + """return x * y""" + return x * y + + llm = ChatOpenAIV1(model=MODEL_NAME) + bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) + ai_msg = bound_llm.invoke("whats 5 * 4") + assert len(ai_msg.tool_calls) == 1 + assert ai_msg.tool_calls[0]["name"] == "multiply" + assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} + + full: Any = None + for chunk in bound_llm.stream("whats 5 * 4"): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert len(full.tool_calls) == 1 + assert full.tool_calls[0]["name"] == "multiply" + assert set(full.tool_calls[0]["args"]) == {"x", "y"} + + response = bound_llm.invoke("What was a positive news story from today?") + _check_response(response, "v1") class Foo(BaseModel): @@ -174,8 +227,13 @@ class FooDict(TypedDict): response: str -def test_parsed_pydantic_schema() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) +@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_parsed_pydantic_schema(output_version: Literal["v0", "responses/v1"]) -> None: + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version=output_version + ) response = llm.invoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -192,6 +250,30 @@ def test_parsed_pydantic_schema() -> None: assert parsed.response +@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") +@pytest.mark.vcr +def test_parsed_pydantic_schema_v1() -> None: + llm = ChatOpenAIV1(model=MODEL_NAME, use_responses_api=True) + response = llm.invoke("how are ya", response_format=Foo) + assert response.text + parsed = Foo(**json.loads(response.text)) + assert parsed == response.parsed + assert parsed.response + + # Test stream + full: Optional[AIMessageChunkV1] = None + chunks = [] + for chunk in llm.stream("how are ya", response_format=Foo): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + chunks.append(chunk) + assert isinstance(full, AIMessageChunkV1) + assert full.text + parsed = Foo(**json.loads(full.text)) + assert parsed == full.parsed + assert parsed.response + + async def test_parsed_pydantic_schema_async() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=Foo) @@ -323,6 +405,26 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: assert block_types == ["reasoning", "text"] +@pytest.mark.default_cassette("test_reasoning.yaml.gz") +@pytest.mark.vcr +def test_reasoning_v1() -> None: + llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) + response = llm.invoke("Hello", reasoning={"effort": "low"}) + assert isinstance(response, AIMessageV1) + + # Test init params + streaming + llm = ChatOpenAIV1(model="o4-mini", reasoning={"effort": "low"}) + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream("Hello"): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunkV1) + + for msg in [response, full]: + block_types = [block["type"] for block in msg.content] + assert block_types == ["reasoning", "text"] + + def test_stateful_api() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are you, my name is Bobo") @@ -358,20 +460,25 @@ def test_computer_calls() -> None: def test_file_search() -> None: pytest.skip() # TODO: set up infra - llm = ChatOpenAI(model=MODEL_NAME) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) tool = { "type": "file_search", "vector_store_ids": [os.environ["OPENAI_VECTOR_STORE_ID"]], } - response = llm.invoke("What is deep research by OpenAI?", tools=[tool]) - _check_response(response) + + input_message = {"role": "user", "content": "What is deep research by OpenAI?"} + response = llm.invoke([input_message], tools=[tool]) + _check_response(response, "v0") full: Optional[BaseMessageChunk] = None - for chunk in llm.stream("What is deep research by OpenAI?", tools=[tool]): + for chunk in llm.stream([input_message], tools=[tool]): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full) + _check_response(full, "v0") + + next_message = {"role": "user", "content": "Thank you."} + _ = llm.invoke([input_message, full, next_message]) @pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") @@ -398,20 +505,28 @@ def test_stream_reasoning_summary( if output_version == "v0": reasoning = response_1.additional_kwargs["reasoning"] assert set(reasoning.keys()) == {"id", "type", "summary"} + summary = reasoning["summary"] + assert isinstance(summary, list) + for block in summary: + assert isinstance(block, dict) + assert isinstance(block["type"], str) + assert isinstance(block["text"], str) + assert block["text"] else: + # output_version == "responses/v1" reasoning = next( block for block in response_1.content if block["type"] == "reasoning" # type: ignore[index] ) assert set(reasoning.keys()) == {"id", "type", "summary", "index"} - summary = reasoning["summary"] - assert isinstance(summary, list) - for block in summary: - assert isinstance(block, dict) - assert isinstance(block["type"], str) - assert isinstance(block["text"], str) - assert block["text"] + summary = reasoning["summary"] + assert isinstance(summary, list) + for block in summary: + assert isinstance(block, dict) + assert isinstance(block["type"], str) + assert isinstance(block["text"], str) + assert block["text"] # Check we can pass back summaries message_2 = {"role": "user", "content": "Thank you."} @@ -419,9 +534,48 @@ def test_stream_reasoning_summary( assert isinstance(response_2, AIMessage) +@pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") @pytest.mark.vcr -def test_code_interpreter() -> None: - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) +def test_stream_reasoning_summary_v1() -> None: + llm = ChatOpenAIV1( + model="o4-mini", + # Routes to Responses API if `reasoning` is set. + reasoning={"effort": "medium", "summary": "auto"}, + ) + message_1 = { + "role": "user", + "content": "What was the third tallest buliding in the year 2000?", + } + response_1: Optional[AIMessageChunkV1] = None + for chunk in llm.stream([message_1]): + assert isinstance(chunk, AIMessageChunkV1) + response_1 = chunk if response_1 is None else response_1 + chunk + assert isinstance(response_1, AIMessageChunkV1) + + total_reasoning_blocks = 0 + for block in response_1.content: + if block["type"] == "reasoning": + total_reasoning_blocks += 1 + assert isinstance(block["id"], str) and block["id"].startswith("rs_") + assert isinstance(block["reasoning"], str) + assert isinstance(block["index"], int) + assert ( + total_reasoning_blocks > 1 + ) # This query typically generates multiple reasoning blocks + + # Check we can pass back summaries + message_2 = {"role": "user", "content": "Thank you."} + response_2 = llm.invoke([message_1, response_1, message_2]) + assert isinstance(response_2, AIMessageV1) + + +@pytest.mark.default_cassette("test_code_interpreter.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None: + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version=output_version + ) llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": {"type": "auto"}}] ) @@ -430,15 +584,25 @@ def test_code_interpreter() -> None: "content": "Write and run code to answer the question: what is 3^3?", } response = llm_with_tools.invoke([input_message]) - _check_response(response) - tool_outputs = response.additional_kwargs["tool_outputs"] - assert tool_outputs - assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) + assert isinstance(response, AIMessage) + _check_response(response, output_version) + if output_version == "v0": + tool_outputs = [ + item + for item in response.additional_kwargs["tool_outputs"] + if item["type"] == "code_interpreter_call" + ] + else: + # responses/v1 + tool_outputs = [ + item + for item in response.content + if isinstance(item, dict) and item["type"] == "code_interpreter_call" + ] + assert len(tool_outputs) == 1 # Test streaming # Use same container - tool_outputs = response.additional_kwargs["tool_outputs"] - assert len(tool_outputs) == 1 container_id = tool_outputs[0]["container_id"] llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": container_id}] @@ -449,9 +613,72 @@ def test_code_interpreter() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - tool_outputs = full.additional_kwargs["tool_outputs"] + if output_version == "v0": + tool_outputs = [ + item + for item in response.additional_kwargs["tool_outputs"] + if item["type"] == "code_interpreter_call" + ] + else: + # responses/v1 + tool_outputs = [ + item + for item in response.content + if isinstance(item, dict) and item["type"] == "code_interpreter_call" + ] + assert tool_outputs + + # Test we can pass back in + next_message = {"role": "user", "content": "Please add more comments to the code."} + _ = llm_with_tools.invoke([input_message, full, next_message]) + + +@pytest.mark.default_cassette("test_code_interpreter.yaml.gz") +@pytest.mark.vcr +def test_code_interpreter_v1() -> None: + llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) + llm_with_tools = llm.bind_tools( + [{"type": "code_interpreter", "container": {"type": "auto"}}] + ) + input_message = { + "role": "user", + "content": "Write and run code to answer the question: what is 3^3?", + } + response = llm_with_tools.invoke([input_message]) + assert isinstance(response, AIMessageV1) + _check_response(response, "v1") + + tool_outputs = [ + item for item in response.content if item["type"] == "code_interpreter_call" + ] + code_interpreter_result = next( + item for item in response.content if item["type"] == "code_interpreter_result" + ) + assert tool_outputs + assert code_interpreter_result + assert len(tool_outputs) == 1 + + # Test streaming + # Use same container + container_id = tool_outputs[0]["container_id"] # type: ignore[typeddict-item] + llm_with_tools = llm.bind_tools( + [{"type": "code_interpreter", "container": container_id}] + ) + + full: Optional[AIMessageChunkV1] = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunkV1) + code_interpreter_call = next( + item for item in full.content if item["type"] == "code_interpreter_call" + ) + code_interpreter_result = next( + item for item in full.content if item["type"] == "code_interpreter_result" + ) + assert code_interpreter_call + assert code_interpreter_result assert tool_outputs - assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) # Test we can pass back in next_message = {"role": "user", "content": "Please add more comments to the code."} @@ -546,10 +773,66 @@ def test_mcp_builtin_zdr() -> None: _ = llm_with_tools.invoke([input_message, full, approval_message]) -@pytest.mark.vcr() -def test_image_generation_streaming() -> None: +@pytest.mark.default_cassette("test_mcp_builtin_zdr.yaml.gz") +@pytest.mark.vcr +def test_mcp_builtin_zdr_v1() -> None: + llm = ChatOpenAIV1( + model="o4-mini", store=False, include=["reasoning.encrypted_content"] + ) + + llm_with_tools = llm.bind_tools( + [ + { + "type": "mcp", + "server_label": "deepwiki", + "server_url": "https://mcp.deepwiki.com/mcp", + "require_approval": {"always": {"tool_names": ["read_wiki_structure"]}}, + } + ] + ) + input_message = { + "role": "user", + "content": ( + "What transport protocols does the 2025-03-26 version of the MCP spec " + "support?" + ), + } + full: Optional[AIMessageChunkV1] = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + + assert isinstance(full, AIMessageChunkV1) + assert all(isinstance(block, dict) for block in full.content) + + approval_message = HumanMessageV1( + [ + { + "type": "non_standard", + "value": { + "type": "mcp_approval_response", + "approve": True, + "approval_request_id": block["value"]["id"], # type: ignore[index] + }, + } + for block in full.content + if block["type"] == "non_standard" + and block["value"]["type"] == "mcp_approval_request" # type: ignore[index] + ] + ) + _ = llm_with_tools.invoke([input_message, full, approval_message]) + + +@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_image_generation_streaming( + output_version: Literal["v0", "responses/v1"], +) -> None: """Test image generation streaming.""" - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version=output_version + ) tool = { "type": "image_generation", # For testing purposes let's keep the quality low, so the test runs faster. @@ -596,15 +879,82 @@ def test_image_generation_streaming() -> None: # At the moment, the streaming API does not pick up annotations fully. # So the following check is commented out. # _check_response(complete_ai_message) - tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] - assert set(tool_output.keys()).issubset(expected_keys) + if output_version == "v0": + assert complete_ai_message.additional_kwargs["tool_outputs"] + tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] + assert set(tool_output.keys()).issubset(expected_keys) + elif output_version == "responses/v1": + tool_output = next( + block + for block in complete_ai_message.content + if isinstance(block, dict) and block["type"] == "image_generation_call" + ) + assert set(tool_output.keys()).issubset(expected_keys) + else: + # v1 + standard_keys = {"type", "base64", "id", "status", "index"} + tool_output = next( + block + for block in complete_ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) -@pytest.mark.vcr() -def test_image_generation_multi_turn() -> None: +@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") +@pytest.mark.vcr +def test_image_generation_streaming_v1() -> None: + """Test image generation streaming.""" + llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + tool = { + "type": "image_generation", + "quality": "low", + "output_format": "jpeg", + "output_compression": 100, + "size": "1024x1024", + } + + expected_keys = { + # Standard + "type", + "base64", + "mime_type", + "id", + "index", + # OpenAI-specific + "background", + "output_format", + "quality", + "revised_prompt", + "size", + "status", + } + + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + complete_ai_message = cast(AIMessageChunkV1, full) + + tool_output = next( + block + for block in complete_ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(expected_keys).issubset(tool_output.keys()) + + +@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_image_generation_multi_turn( + output_version: Literal["v0", "responses/v1"], +) -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version=output_version + ) # Test invocation tool = { "type": "image_generation", @@ -620,10 +970,41 @@ def test_image_generation_multi_turn() -> None: {"role": "user", "content": "Draw a random short word in green font."} ] ai_message = llm_with_tools.invoke(chat_history) - _check_response(ai_message) - tool_output = ai_message.additional_kwargs["tool_outputs"][0] + assert isinstance(ai_message, AIMessage) + _check_response(ai_message, output_version) - # Example tool output for an image + expected_keys = { + "id", + "background", + "output_format", + "quality", + "result", + "revised_prompt", + "size", + "status", + "type", + } + + if output_version == "v0": + tool_output = ai_message.additional_kwargs["tool_outputs"][0] + assert set(tool_output.keys()).issubset(expected_keys) + elif output_version == "responses/v1": + tool_output = next( + block + for block in ai_message.content + if isinstance(block, dict) and block["type"] == "image_generation_call" + ) + assert set(tool_output.keys()).issubset(expected_keys) + else: + standard_keys = {"type", "base64", "id", "status"} + tool_output = next( + block + for block in ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) + + # Example tool output for an image (v0) # { # "background": "opaque", # "id": "ig_683716a8ddf0819888572b20621c7ae4029ec8c11f8dacf8", @@ -639,19 +1020,90 @@ def test_image_generation_multi_turn() -> None: # "result": # base64 encode image data # } + chat_history.extend( + [ + # AI message with tool output + ai_message, + # New request + { + "role": "user", + "content": ( + "Now, change the font to blue. Keep the word and everything else " + "the same." + ), + }, + ] + ) + + ai_message2 = llm_with_tools.invoke(chat_history) + assert isinstance(ai_message2, AIMessage) + _check_response(ai_message2, output_version) + + if output_version == "v0": + tool_output = ai_message2.additional_kwargs["tool_outputs"][0] + assert set(tool_output.keys()).issubset(expected_keys) + elif output_version == "responses/v1": + tool_output = next( + block + for block in ai_message2.content + if isinstance(block, dict) and block["type"] == "image_generation_call" + ) + assert set(tool_output.keys()).issubset(expected_keys) + else: + standard_keys = {"type", "base64", "id", "status"} + tool_output = next( + block + for block in ai_message2.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) + + +@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") +@pytest.mark.vcr +def test_image_generation_multi_turn_v1() -> None: + """Test multi-turn editing of image generation by passing in history.""" + # Test multi-turn + llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + # Test invocation + tool = { + "type": "image_generation", + "quality": "low", + "output_format": "jpeg", + "output_compression": 100, + "size": "1024x1024", + } + llm_with_tools = llm.bind_tools([tool]) + + chat_history: list[MessageLikeRepresentation] = [ + {"role": "user", "content": "Draw a random short word in green font."} + ] + ai_message = llm_with_tools.invoke(chat_history) + assert isinstance(ai_message, AIMessageV1) + _check_response(ai_message, "v1") + expected_keys = { + # Standard + "type", + "base64", + "mime_type", "id", + # OpenAI-specific "background", "output_format", "quality", - "result", "revised_prompt", "size", "status", - "type", } - assert set(tool_output.keys()).issubset(expected_keys) + standard_keys = {"type", "base64", "id", "status"} + tool_output = next( + block + for block in ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) chat_history.extend( [ @@ -669,9 +1121,15 @@ def test_image_generation_multi_turn() -> None: ) ai_message2 = llm_with_tools.invoke(chat_history) - _check_response(ai_message2) - tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0] - assert set(tool_output2.keys()).issubset(expected_keys) + assert isinstance(ai_message2, AIMessageV1) + _check_response(ai_message2, "v1") + + tool_output = next( + block + for block in ai_message2.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(expected_keys).issubset(tool_output.keys()) @pytest.mark.xfail( From 3ae37b5987b9781d84c8bf1f8da01b779fd437ab Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 11:12:46 -0400 Subject: [PATCH 14/56] openai: integration tests pass --- .../langchain_openai/chat_models/_compat.py | 6 +- .../langchain_openai/chat_models/base.py | 55 ++- .../chat_models/test_responses_api.py | 342 +++++------------- 3 files changed, 145 insertions(+), 258 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index f3431f1651499..0409c82c943bb 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -432,7 +432,9 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: "size", ): if extra_key in block: - new_block[extra_key] = block[extra_key] + if "extras" not in new_block: + new_block["extras"] = {} + new_block["extras"][extra_key] = block[extra_key] yield cast(types.ImageContentBlock, new_block) elif block_type == "function_call": @@ -718,6 +720,8 @@ def _convert_from_v1_to_responses( for extra_key in ("id", "status"): if extra_key in block: new_block[extra_key] = block[extra_key] # type: ignore[typeddict-item] + elif extra_key in block.get("extras", {}): + new_block[extra_key] = block["extras"][extra_key] new_content.append(new_block) elif block["type"] == "non_standard" and "value" in block: new_content.append(block["value"]) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 552d45e41eec3..d75a48a0104a6 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -207,7 +207,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type] -def _format_message_content(content: Any) -> Any: +def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any: """Format message content.""" if content and isinstance(content, list): formatted_content = [] @@ -219,7 +219,13 @@ def _format_message_content(content: Any) -> Any: and block["type"] in ("tool_use", "thinking", "reasoning_content") ): continue - elif isinstance(block, dict) and is_data_content_block(block): + elif ( + isinstance(block, dict) + and is_data_content_block(block) + # Responses API messages handled separately in _compat (parsed into + # image generation calls) + and not responses_ai_msg + ): formatted_content.append(convert_to_openai_data_block(block)) # Anthropic image blocks elif ( @@ -252,7 +258,9 @@ def _format_message_content(content: Any) -> Any: return formatted_content -def _convert_message_to_dict(message: BaseMessage) -> dict: +def _convert_message_to_dict( + message: BaseMessage, responses_ai_msg: bool = False +) -> dict: """Convert a LangChain message to a dictionary. Args: @@ -261,7 +269,11 @@ def _convert_message_to_dict(message: BaseMessage) -> dict: Returns: The dictionary. """ - message_dict: dict[str, Any] = {"content": _format_message_content(message.content)} + message_dict: dict[str, Any] = { + "content": _format_message_content( + message.content, responses_ai_msg=responses_ai_msg + ) + } if (name := message.name or message.additional_kwargs.get("name")) is not None: message_dict["name"] = name @@ -296,15 +308,25 @@ def _convert_message_to_dict(message: BaseMessage) -> dict: if "function_call" in message_dict or "tool_calls" in message_dict: message_dict["content"] = message_dict["content"] or None - if "audio" in message.additional_kwargs: - # openai doesn't support passing the data back - only the id - # https://platform.openai.com/docs/guides/audio/multi-turn-conversations + audio: Optional[dict[str, Any]] = None + for block in message.content: + if ( + isinstance(block, dict) + and block.get("type") == "audio" + and (id_ := block.get("id")) + and not responses_ai_msg + ): + # openai doesn't support passing the data back - only the id + # https://platform.openai.com/docs/guides/audio/multi-turn-conversations + audio = {"id": id_} + if not audio and "audio" in message.additional_kwargs: raw_audio = message.additional_kwargs["audio"] audio = ( {"id": message.additional_kwargs["audio"]["id"]} if "id" in raw_audio else raw_audio ) + if audio: message_dict["audio"] = audio elif isinstance(message, SystemMessage): message_dict["role"] = message.additional_kwargs.get( @@ -3694,11 +3716,20 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: for lc_msg in messages: if isinstance(lc_msg, AIMessage): lc_msg = _convert_from_v03_ai_message(lc_msg) - msg = _convert_message_to_dict(lc_msg) - if isinstance(lc_msg, AIMessage) and isinstance(msg.get("content"), list): - msg["content"] = _convert_from_v1_to_responses( - msg["content"], lc_msg.tool_calls - ) + msg = _convert_message_to_dict(lc_msg, responses_ai_msg=True) + if isinstance(msg.get("content"), list) and all( + isinstance(block, dict) for block in msg["content"] + ): + msg["content"] = _convert_from_v1_to_responses( + msg["content"], lc_msg.tool_calls + ) + else: + msg = _convert_message_to_dict(lc_msg) + # Get content from non-standard content blocks + if isinstance(msg["content"], list): + for i, block in enumerate(msg["content"]): + if isinstance(block, dict) and block.get("type") == "non_standard": + msg["content"][i] = block["value"] # "name" parameter unsupported if "name" in msg: msg.pop("name") diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 5906a837e8713..f98940f78c0f1 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -2,7 +2,7 @@ import json import os -from typing import Annotated, Any, Literal, Optional, Union, cast +from typing import Annotated, Any, Literal, Optional, cast import openai import pytest @@ -14,27 +14,16 @@ HumanMessage, MessageLikeRepresentation, ) -from langchain_core.v1.messages import AIMessage as AIMessageV1 -from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 -from langchain_core.v1.messages import HumanMessage as HumanMessageV1 from pydantic import BaseModel from typing_extensions import TypedDict from langchain_openai import ChatOpenAI, custom_tool -from langchain_openai.v1 import ChatOpenAI as ChatOpenAIV1 MODEL_NAME = "gpt-4o-mini" -def _check_response( - response: Optional[Union[BaseMessage, AIMessageV1]], output_version: str -) -> None: - if output_version == "v1": - assert isinstance(response, AIMessageV1) or isinstance( - response, AIMessageChunkV1 - ) - else: - assert isinstance(response, AIMessage) +def _check_response(response: Optional[BaseMessage], output_version: str) -> None: + assert isinstance(response, AIMessage) assert isinstance(response.content, list) for block in response.content: assert isinstance(block, dict) @@ -56,11 +45,7 @@ def _check_response( if "url" in annotation: assert "start_index" in annotation assert "end_index" in annotation - - if output_version == "v1": - text_content = response.text - else: - text_content = response.text() # type: ignore[operator,misc] + text_content = response.text() # type: ignore[operator,misc] assert isinstance(text_content, str) assert text_content assert response.usage_metadata @@ -75,10 +60,7 @@ def _check_response( @pytest.mark.vcr @pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: - if output_version == "v1": - llm = ChatOpenAIV1(model=MODEL_NAME) - else: - llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] first_response = llm.invoke( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], @@ -86,22 +68,13 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: _check_response(first_response, output_version) # Test streaming - if isinstance(llm, ChatOpenAIV1): - full: Optional[AIMessageChunkV1] = None - for chunk in llm.stream( - "What was a positive news story from today?", - tools=[{"type": "web_search_preview"}], - ): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - else: - full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] - for chunk in llm.stream( - "What was a positive news story from today?", - tools=[{"type": "web_search_preview"}], - ): - assert isinstance(chunk, AIMessageChunk) - full = chunk if full is None else full + chunk + full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk _check_response(full, output_version) # Use OpenAI's stateful API @@ -168,8 +141,8 @@ async def test_web_search_async() -> None: @pytest.mark.default_cassette("test_function_calling.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_function_calling(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -> None: def multiply(x: int, y: int) -> int: """return x * y""" return x * y @@ -193,32 +166,6 @@ def multiply(x: int, y: int) -> int: _check_response(response, output_version) -@pytest.mark.default_cassette("test_function_calling.yaml.gz") -@pytest.mark.vcr -def test_function_calling_v1() -> None: - def multiply(x: int, y: int) -> int: - """return x * y""" - return x * y - - llm = ChatOpenAIV1(model=MODEL_NAME) - bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) - ai_msg = bound_llm.invoke("whats 5 * 4") - assert len(ai_msg.tool_calls) == 1 - assert ai_msg.tool_calls[0]["name"] == "multiply" - assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} - - full: Any = None - for chunk in bound_llm.stream("whats 5 * 4"): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - assert len(full.tool_calls) == 1 - assert full.tool_calls[0]["name"] == "multiply" - assert set(full.tool_calls[0]["args"]) == {"x", "y"} - - response = bound_llm.invoke("What was a positive news story from today?") - _check_response(response, "v1") - - class Foo(BaseModel): response: str @@ -229,8 +176,10 @@ class FooDict(TypedDict): @pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_parsed_pydantic_schema(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_parsed_pydantic_schema( + output_version: Literal["v0", "responses/v1", "v1"], +) -> None: llm = ChatOpenAI( model=MODEL_NAME, use_responses_api=True, output_version=output_version ) @@ -250,30 +199,6 @@ def test_parsed_pydantic_schema(output_version: Literal["v0", "responses/v1"]) - assert parsed.response -@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") -@pytest.mark.vcr -def test_parsed_pydantic_schema_v1() -> None: - llm = ChatOpenAIV1(model=MODEL_NAME, use_responses_api=True) - response = llm.invoke("how are ya", response_format=Foo) - assert response.text - parsed = Foo(**json.loads(response.text)) - assert parsed == response.parsed - assert parsed.response - - # Test stream - full: Optional[AIMessageChunkV1] = None - chunks = [] - for chunk in llm.stream("how are ya", response_format=Foo): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - chunks.append(chunk) - assert isinstance(full, AIMessageChunkV1) - assert full.text - parsed = Foo(**json.loads(full.text)) - assert parsed == full.parsed - assert parsed.response - - async def test_parsed_pydantic_schema_async() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=Foo) @@ -379,8 +304,8 @@ def multiply(x: int, y: int) -> int: @pytest.mark.default_cassette("test_reasoning.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_reasoning(output_version: Literal["v0", "responses/v1", "v1"]) -> None: llm = ChatOpenAI( model="o4-mini", use_responses_api=True, output_version=output_version ) @@ -405,26 +330,6 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: assert block_types == ["reasoning", "text"] -@pytest.mark.default_cassette("test_reasoning.yaml.gz") -@pytest.mark.vcr -def test_reasoning_v1() -> None: - llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) - response = llm.invoke("Hello", reasoning={"effort": "low"}) - assert isinstance(response, AIMessageV1) - - # Test init params + streaming - llm = ChatOpenAIV1(model="o4-mini", reasoning={"effort": "low"}) - full: Optional[AIMessageChunkV1] = None - for chunk in llm.stream("Hello"): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunkV1) - - for msg in [response, full]: - block_types = [block["type"] for block in msg.content] - assert block_types == ["reasoning", "text"] - - def test_stateful_api() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are you, my name is Bobo") @@ -483,9 +388,9 @@ def test_file_search() -> None: @pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) def test_stream_reasoning_summary( - output_version: Literal["v0", "responses/v1"], + output_version: Literal["v0", "responses/v1", "v1"], ) -> None: llm = ChatOpenAI( model="o4-mini", @@ -512,8 +417,7 @@ def test_stream_reasoning_summary( assert isinstance(block["type"], str) assert isinstance(block["text"], str) assert block["text"] - else: - # output_version == "responses/v1" + elif output_version == "responses/v1": reasoning = next( block for block in response_1.content @@ -527,6 +431,18 @@ def test_stream_reasoning_summary( assert isinstance(block["type"], str) assert isinstance(block["text"], str) assert block["text"] + else: + # v1 + total_reasoning_blocks = 0 + for block in response_1.content: + if block["type"] == "reasoning": + total_reasoning_blocks += 1 + assert isinstance(block["id"], str) and block["id"].startswith("rs_") + assert isinstance(block["reasoning"], str) + assert isinstance(block["index"], int) + assert ( + total_reasoning_blocks > 1 + ) # This query typically generates multiple reasoning blocks # Check we can pass back summaries message_2 = {"role": "user", "content": "Thank you."} @@ -534,45 +450,10 @@ def test_stream_reasoning_summary( assert isinstance(response_2, AIMessage) -@pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") -@pytest.mark.vcr -def test_stream_reasoning_summary_v1() -> None: - llm = ChatOpenAIV1( - model="o4-mini", - # Routes to Responses API if `reasoning` is set. - reasoning={"effort": "medium", "summary": "auto"}, - ) - message_1 = { - "role": "user", - "content": "What was the third tallest buliding in the year 2000?", - } - response_1: Optional[AIMessageChunkV1] = None - for chunk in llm.stream([message_1]): - assert isinstance(chunk, AIMessageChunkV1) - response_1 = chunk if response_1 is None else response_1 + chunk - assert isinstance(response_1, AIMessageChunkV1) - - total_reasoning_blocks = 0 - for block in response_1.content: - if block["type"] == "reasoning": - total_reasoning_blocks += 1 - assert isinstance(block["id"], str) and block["id"].startswith("rs_") - assert isinstance(block["reasoning"], str) - assert isinstance(block["index"], int) - assert ( - total_reasoning_blocks > 1 - ) # This query typically generates multiple reasoning blocks - - # Check we can pass back summaries - message_2 = {"role": "user", "content": "Thank you."} - response_2 = llm.invoke([message_1, response_1, message_2]) - assert isinstance(response_2, AIMessageV1) - - @pytest.mark.default_cassette("test_code_interpreter.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) -> None: llm = ChatOpenAI( model="o4-mini", use_responses_api=True, output_version=output_version ) @@ -592,13 +473,28 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None for item in response.additional_kwargs["tool_outputs"] if item["type"] == "code_interpreter_call" ] - else: - # responses/v1 + assert len(tool_outputs) == 1 + elif output_version == "responses/v1": tool_outputs = [ item for item in response.content if isinstance(item, dict) and item["type"] == "code_interpreter_call" ] + assert len(tool_outputs) == 1 + else: + # v1 + tool_outputs = [ + item + for item in response.content_blocks + if item["type"] == "code_interpreter_call" + ] + code_interpreter_result = next( + item + for item in response.content_blocks + if item["type"] == "code_interpreter_result" + ) + assert tool_outputs + assert code_interpreter_result assert len(tool_outputs) == 1 # Test streaming @@ -619,66 +515,28 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None for item in response.additional_kwargs["tool_outputs"] if item["type"] == "code_interpreter_call" ] - else: - # responses/v1 + assert tool_outputs + elif output_version == "responses/v1": tool_outputs = [ item for item in response.content if isinstance(item, dict) and item["type"] == "code_interpreter_call" ] - assert tool_outputs - - # Test we can pass back in - next_message = {"role": "user", "content": "Please add more comments to the code."} - _ = llm_with_tools.invoke([input_message, full, next_message]) - - -@pytest.mark.default_cassette("test_code_interpreter.yaml.gz") -@pytest.mark.vcr -def test_code_interpreter_v1() -> None: - llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) - llm_with_tools = llm.bind_tools( - [{"type": "code_interpreter", "container": {"type": "auto"}}] - ) - input_message = { - "role": "user", - "content": "Write and run code to answer the question: what is 3^3?", - } - response = llm_with_tools.invoke([input_message]) - assert isinstance(response, AIMessageV1) - _check_response(response, "v1") - - tool_outputs = [ - item for item in response.content if item["type"] == "code_interpreter_call" - ] - code_interpreter_result = next( - item for item in response.content if item["type"] == "code_interpreter_result" - ) - assert tool_outputs - assert code_interpreter_result - assert len(tool_outputs) == 1 - - # Test streaming - # Use same container - container_id = tool_outputs[0]["container_id"] # type: ignore[typeddict-item] - llm_with_tools = llm.bind_tools( - [{"type": "code_interpreter", "container": container_id}] - ) - - full: Optional[AIMessageChunkV1] = None - for chunk in llm_with_tools.stream([input_message]): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunkV1) - code_interpreter_call = next( - item for item in full.content if item["type"] == "code_interpreter_call" - ) - code_interpreter_result = next( - item for item in full.content if item["type"] == "code_interpreter_result" - ) - assert code_interpreter_call - assert code_interpreter_result - assert tool_outputs + assert tool_outputs + else: + # v1 + code_interpreter_call = next( + item + for item in full.content_blocks + if item["type"] == "code_interpreter_call" + ) + code_interpreter_result = next( + item + for item in full.content_blocks + if item["type"] == "code_interpreter_result" + ) + assert code_interpreter_call + assert code_interpreter_result # Test we can pass back in next_message = {"role": "user", "content": "Please add more comments to the code."} @@ -776,8 +634,11 @@ def test_mcp_builtin_zdr() -> None: @pytest.mark.default_cassette("test_mcp_builtin_zdr.yaml.gz") @pytest.mark.vcr def test_mcp_builtin_zdr_v1() -> None: - llm = ChatOpenAIV1( - model="o4-mini", store=False, include=["reasoning.encrypted_content"] + llm = ChatOpenAI( + model="o4-mini", + output_version="v1", + store=False, + include=["reasoning.encrypted_content"], ) llm_with_tools = llm.bind_tools( @@ -797,15 +658,15 @@ def test_mcp_builtin_zdr_v1() -> None: "support?" ), } - full: Optional[AIMessageChunkV1] = None + full: Optional[BaseMessageChunk] = None for chunk in llm_with_tools.stream([input_message]): - assert isinstance(chunk, AIMessageChunkV1) + assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunkV1) + assert isinstance(full, AIMessageChunk) assert all(isinstance(block, dict) for block in full.content) - approval_message = HumanMessageV1( + approval_message = HumanMessage( [ { "type": "non_standard", @@ -815,7 +676,7 @@ def test_mcp_builtin_zdr_v1() -> None: "approval_request_id": block["value"]["id"], # type: ignore[index] }, } - for block in full.content + for block in full.content_blocks if block["type"] == "non_standard" and block["value"]["type"] == "mcp_approval_request" # type: ignore[index] ] @@ -905,7 +766,7 @@ def test_image_generation_streaming( @pytest.mark.vcr def test_image_generation_streaming_v1() -> None: """Test image generation streaming.""" - llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1") tool = { "type": "image_generation", "quality": "low", @@ -914,14 +775,8 @@ def test_image_generation_streaming_v1() -> None: "size": "1024x1024", } - expected_keys = { - # Standard - "type", - "base64", - "mime_type", - "id", - "index", - # OpenAI-specific + standard_keys = {"type", "base64", "mime_type", "id", "index"} + extra_keys = { "background", "output_format", "quality", @@ -930,18 +785,19 @@ def test_image_generation_streaming_v1() -> None: "status", } - full: Optional[AIMessageChunkV1] = None + full: Optional[BaseMessageChunk] = None for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]): - assert isinstance(chunk, AIMessageChunkV1) + assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk - complete_ai_message = cast(AIMessageChunkV1, full) + complete_ai_message = cast(AIMessageChunk, full) tool_output = next( block for block in complete_ai_message.content if isinstance(block, dict) and block["type"] == "image" ) - assert set(expected_keys).issubset(tool_output.keys()) + assert set(standard_keys).issubset(tool_output.keys()) + assert set(extra_keys).issubset(tool_output["extras"].keys()) @pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") @@ -1064,7 +920,7 @@ def test_image_generation_multi_turn( def test_image_generation_multi_turn_v1() -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn - llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1") # Test invocation tool = { "type": "image_generation", @@ -1079,16 +935,11 @@ def test_image_generation_multi_turn_v1() -> None: {"role": "user", "content": "Draw a random short word in green font."} ] ai_message = llm_with_tools.invoke(chat_history) - assert isinstance(ai_message, AIMessageV1) + assert isinstance(ai_message, AIMessage) _check_response(ai_message, "v1") - expected_keys = { - # Standard - "type", - "base64", - "mime_type", - "id", - # OpenAI-specific + standard_keys = {"type", "base64", "mime_type", "id"} + extra_keys = { "background", "output_format", "quality", @@ -1097,13 +948,13 @@ def test_image_generation_multi_turn_v1() -> None: "status", } - standard_keys = {"type", "base64", "id", "status"} tool_output = next( block for block in ai_message.content if isinstance(block, dict) and block["type"] == "image" ) assert set(standard_keys).issubset(tool_output.keys()) + assert set(extra_keys).issubset(tool_output["extras"].keys()) chat_history.extend( [ @@ -1121,7 +972,7 @@ def test_image_generation_multi_turn_v1() -> None: ) ai_message2 = llm_with_tools.invoke(chat_history) - assert isinstance(ai_message2, AIMessageV1) + assert isinstance(ai_message2, AIMessage) _check_response(ai_message2, "v1") tool_output = next( @@ -1129,7 +980,8 @@ def test_image_generation_multi_turn_v1() -> None: for block in ai_message2.content if isinstance(block, dict) and block["type"] == "image" ) - assert set(expected_keys).issubset(tool_output.keys()) + assert set(standard_keys).issubset(tool_output.keys()) + assert set(extra_keys).issubset(tool_output["extras"].keys()) @pytest.mark.xfail( From 2f604eb9a0fd8c51706bed6c9e7c46972bd76e43 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 11:23:54 -0400 Subject: [PATCH 15/56] openai: carry over refusals fix --- .../openai/langchain_openai/chat_models/base.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index d75a48a0104a6..303d1841d62b4 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -3441,6 +3441,20 @@ def _oai_structured_outputs_parser( return parsed elif ai_msg.additional_kwargs.get("refusal"): raise OpenAIRefusalError(ai_msg.additional_kwargs["refusal"]) + elif any( + isinstance(block, dict) + and block.get("type") == "non_standard" + and "refusal" in block["value"] + for block in ai_msg.content + ): + refusal = next( + block["value"]["refusal"] + for block in ai_msg.content + if isinstance(block, dict) + and block["type"] == "non_standard" + and "refusal" in block["value"] + ) + raise OpenAIRefusalError(refusal) elif ai_msg.tool_calls: return None else: From 153db48c924a1d6f210580d8df6d8e58b4d89e3b Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 15:32:02 -0400 Subject: [PATCH 16/56] openai: misc fixes for computer calls and custom tools --- .../langchain_openai/chat_models/base.py | 90 ++++++++++++++----- .../chat_models/test_responses_api.py | 10 ++- 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 72f9e179c421c..d8fca0513b17e 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1316,6 +1316,7 @@ def _create_chat_result( generations.append(gen) llm_output = { "token_usage": token_usage, + "model_provider": "openai", "model_name": response_dict.get("model", self.model_name), "system_fingerprint": response_dict.get("system_fingerprint", ""), } @@ -1331,7 +1332,18 @@ def _create_chat_result( if hasattr(message, "parsed"): generations[0].message.additional_kwargs["parsed"] = message.parsed if hasattr(message, "refusal"): - generations[0].message.additional_kwargs["refusal"] = message.refusal + if self.output_version in ("v0", "responses/v1"): + generations[0].message.additional_kwargs["refusal"] = ( + message.refusal + ) + elif self.output_version == "v1": + if isinstance(generations[0].message.content, list): + generations[0].message.content.append( + { + "type": "non_standard", + "value": {"refusal": message.refusal}, + } + ) if self.output_version == "v1": _ = llm_output.pop("token_usage", None) @@ -3571,7 +3583,7 @@ def _get_last_messages( msg = messages[i] if isinstance(msg, AIMessage): response_id = msg.response_metadata.get("id") - if response_id: + if response_id and response_id.startswith("resp_"): return messages[i + 1 :], response_id else: return messages, None @@ -3680,23 +3692,45 @@ def _construct_responses_api_payload( return payload -def _make_computer_call_output_from_message(message: ToolMessage) -> dict: - computer_call_output: dict = { - "call_id": message.tool_call_id, - "type": "computer_call_output", - } +def _make_computer_call_output_from_message( + message: ToolMessage, +) -> Optional[dict[str, Any]]: + computer_call_output: Optional[dict[str, Any]] = None if isinstance(message.content, list): - # Use first input_image block - output = next( - block - for block in message.content - if cast(dict, block)["type"] == "input_image" - ) + for block in message.content: + if ( + message.additional_kwargs.get("type") == "computer_call_output" + and isinstance(block, dict) + and block.get("type") == "input_image" + ): + # Use first input_image block + computer_call_output = { + "call_id": message.tool_call_id, + "type": "computer_call_output", + "output": block, + } + break + elif ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and block.get("value", {}).get("type") == "computer_call_output" + ): + computer_call_output = block["value"] + break + else: + pass else: - # string, assume image_url - output = {"type": "input_image", "image_url": message.content} - computer_call_output["output"] = output - if "acknowledged_safety_checks" in message.additional_kwargs: + if message.additional_kwargs.get("type") == "computer_call_output": + # string, assume image_url + computer_call_output = { + "call_id": message.tool_call_id, + "type": "computer_call_output", + "output": {"type": "input_image", "image_url": message.content}, + } + if ( + computer_call_output is not None + and "acknowledged_safety_checks" in message.additional_kwargs + ): computer_call_output["acknowledged_safety_checks"] = message.additional_kwargs[ "acknowledged_safety_checks" ] @@ -3713,6 +3747,15 @@ def _make_custom_tool_output_from_message(message: ToolMessage) -> Optional[dict "output": block.get("output") or "", } break + elif ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and block.get("value", {}).get("type") == "custom_tool_call_output" + ): + custom_tool_output = block["value"] + break + else: + pass return custom_tool_output @@ -3756,14 +3799,14 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: msg.pop("name") if msg["role"] == "tool": tool_output = msg["content"] + computer_call_output = _make_computer_call_output_from_message( + cast(ToolMessage, lc_msg) + ) custom_tool_output = _make_custom_tool_output_from_message(lc_msg) # type: ignore[arg-type] - if custom_tool_output: - input_.append(custom_tool_output) - elif lc_msg.additional_kwargs.get("type") == "computer_call_output": - computer_call_output = _make_computer_call_output_from_message( - cast(ToolMessage, lc_msg) - ) + if computer_call_output: input_.append(computer_call_output) + elif custom_tool_output: + input_.append(custom_tool_output) else: if not isinstance(tool_output, str): tool_output = _stringify(tool_output) @@ -3939,6 +3982,7 @@ def _construct_lc_result_from_responses_api( if metadata: response_metadata.update(metadata) # for compatibility with chat completion calls. + response_metadata["model_provider"] = "openai" response_metadata["model_name"] = response_metadata.get("model") if response.usage: usage_metadata = _create_usage_metadata_responses(response.usage.model_dump()) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 44a2edd158782..4ddf373ec78f1 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -436,7 +436,7 @@ def test_stream_reasoning_summary( else: # v1 total_reasoning_blocks = 0 - for block in response_1.content: + for block in response_1.content_blocks: if block["type"] == "reasoning": total_reasoning_blocks += 1 assert isinstance(block["id"], str) and block["id"].startswith("rs_") @@ -999,14 +999,16 @@ def test_verbosity_parameter() -> None: assert response.content -@pytest.mark.vcr() -def test_custom_tool() -> None: +@pytest.mark.default_cassette("test_custom_tool.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) +def test_custom_tool(output_version: Literal["responses/v1", "v1"]) -> None: @custom_tool def execute_code(code: str) -> str: """Execute python code.""" return "27" - llm = ChatOpenAI(model="gpt-5", output_version="responses/v1").bind_tools( + llm = ChatOpenAI(model="gpt-5", output_version=output_version).bind_tools( [execute_code] ) From 0aac20e65581d75b79e6c7418cd27b727f1113c0 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Thu, 14 Aug 2025 09:55:20 -0400 Subject: [PATCH 17/56] openai: tool calls in progress --- .../langchain_openai/chat_models/_compat.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 0409c82c943bb..d47aa735e15b5 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -271,8 +271,7 @@ def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: message.content = [] for tool_call in message.tool_calls: - if id_ := tool_call.get("id"): - message.content.append({"type": "tool_call", "id": id_}) + message.content.append(cast(dict, tool_call)) if "tool_calls" in message.additional_kwargs: _ = message.additional_kwargs.pop("tool_calls") @@ -284,8 +283,23 @@ def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: def _convert_to_v1_from_chat_completions_chunk(chunk: AIMessageChunk) -> AIMessageChunk: - result = _convert_to_v1_from_chat_completions(cast(AIMessage, chunk)) - return cast(AIMessageChunk, result) + """Mutate a Chat Completions chunk to v1 format.""" + if isinstance(chunk.content, str): + if chunk.content: + chunk.content = [{"type": "text", "text": chunk.content}] + else: + chunk.content = [] + + for tool_call_chunk in chunk.tool_call_chunks: + chunk.content.append(cast(dict, tool_call_chunk)) + + if "tool_calls" in chunk.additional_kwargs: + _ = chunk.additional_kwargs.pop("tool_calls") + + if "token_usage" in chunk.response_metadata: + _ = chunk.response_metadata.pop("token_usage") + + return chunk def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: From 624300cefa92a91217a8152d802a88d129afcd99 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Thu, 14 Aug 2025 10:06:33 -0400 Subject: [PATCH 18/56] core: populate tool_call_chunks in content_blocks --- libs/core/langchain_core/messages/ai.py | 71 ++++++++++++++----- .../core/tests/unit_tests/messages/test_ai.py | 26 +++++++ 2 files changed, 78 insertions(+), 19 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index b38a7fa1a4924..b383685c6c75d 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -230,25 +230,26 @@ def content_blocks(self) -> list[types.ContentBlock]: """Return content blocks of the message.""" blocks = super().content_blocks - # Add from tool_calls if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content - if isinstance(block, dict) and block.get("type") == "tool_call" - } - for tool_call in self.tool_calls: - if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: - tool_call_block: types.ToolCall = { - "type": "tool_call", - "id": id_, - "name": tool_call["name"], - "args": tool_call["args"], - } - if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] - if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] - blocks.append(tool_call_block) + if self.tool_calls: + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + blocks.append(tool_call_block) return blocks @@ -361,6 +362,38 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return content blocks of the message.""" + blocks = super().content_blocks + + if self.tool_call_chunks: + blocks = [ + block + for block in blocks + if block["type"] not in ("tool_call", "invalid_tool_call") + ] + # Add from tool_call_chunks if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call_chunk" + } + for chunk in self.tool_call_chunks: + if (id_ := chunk.get("id")) and id_ not in content_tool_call_ids: + tool_call_chunk_block: types.ToolCallChunk = { + "type": "tool_call_chunk", + "id": id_, + "name": chunk["name"], + "args": chunk["args"], + "index": chunk.get("index"), + } + if "extras" in chunk: + tool_call_chunk_block["extras"] = chunk["extras"] # type: ignore[typeddict-item] + blocks.append(tool_call_chunk_block) + + return blocks + @model_validator(mode="after") def init_tool_calls(self) -> Self: """Initialize tool calls from tool call chunks. diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index a7225015c2cff..b3c0a4d84ea08 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -284,3 +284,29 @@ def test_content_blocks() -> None: {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"}, missing_tool_call, ] + + # Chunks + message = AIMessageChunk( + content="", + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": "foo", + "args": "", + "id": "abc_123", + "index": 0, + } + ], + ) + assert len(message.content_blocks) == 1 + assert message.content_blocks[0]["type"] == "tool_call_chunk" + assert message.content_blocks == [ + { + "type": "tool_call_chunk", + "name": "foo", + "args": "", + "id": "abc_123", + "index": 0, + } + ] + assert message.content == "" From 7e39cd18c55050bb80d7d2ff6419aa2b66bb836f Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:30:32 -0400 Subject: [PATCH 19/56] feat: allow kwargs on content block factories (#32568) --- .../langchain_core/messages/content_blocks.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index daf5112507406..656d47cdaa6e7 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -812,6 +812,9 @@ class NonStandardContentBlock(TypedDict): the adapter's job to parse that payload and emit the corresponding standard ``ReasoningContentBlock`` and ``ToolCallContentBlocks``. + Has no ``extras`` field, as provider-specific data should be included in the + ``value`` field. + .. note:: ``create_non_standard_block`` may also be used as a factory to create a ``NonStandardContentBlock``. Benefits include: @@ -1023,6 +1026,7 @@ def create_text_block( id: Optional[str] = None, annotations: Optional[list[Annotation]] = None, index: Optional[int] = None, + **kwargs: Any, ) -> TextContentBlock: """Create a ``TextContentBlock``. @@ -1049,6 +1053,11 @@ def create_text_block( block["annotations"] = annotations if index is not None: block["index"] = index + + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1060,6 +1069,7 @@ def create_image_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> ImageContentBlock: """Create an ``ImageContentBlock``. @@ -1100,6 +1110,10 @@ def create_image_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1111,6 +1125,7 @@ def create_video_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> VideoContentBlock: """Create a ``VideoContentBlock``. @@ -1155,6 +1170,10 @@ def create_video_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1166,6 +1185,7 @@ def create_audio_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> AudioContentBlock: """Create an ``AudioContentBlock``. @@ -1210,6 +1230,10 @@ def create_audio_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1221,6 +1245,7 @@ def create_file_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> FileContentBlock: """Create a ``FileContentBlock``. @@ -1265,6 +1290,10 @@ def create_file_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1277,6 +1306,7 @@ def create_plaintext_block( context: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> PlainTextContentBlock: """Create a ``PlainTextContentBlock``. @@ -1319,6 +1349,10 @@ def create_plaintext_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1328,6 +1362,7 @@ def create_tool_call( *, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> ToolCall: """Create a ``ToolCall``. @@ -1355,6 +1390,10 @@ def create_tool_call( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1362,6 +1401,7 @@ def create_reasoning_block( reasoning: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> ReasoningContentBlock: """Create a ``ReasoningContentBlock``. @@ -1387,6 +1427,10 @@ def create_reasoning_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1398,6 +1442,7 @@ def create_citation( end_index: Optional[int] = None, cited_text: Optional[str] = None, id: Optional[str] = None, + **kwargs: Any, ) -> Citation: """Create a ``Citation``. @@ -1430,6 +1475,10 @@ def create_citation( if cited_text is not None: block["cited_text"] = cited_text + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block From c9e847fcb8978894fb25eff2a1c63de1460401ad Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:33:59 -0400 Subject: [PATCH 20/56] chore: format `output_version` docstring --- libs/core/langchain_core/language_models/chat_models.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 1eb32ea1d66af..572c805c0be0b 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -335,17 +335,18 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """ output_version: str = "v0" - """Version of AIMessage output format to use. + """Version of ``AIMessage`` output format to use. - This field is used to roll-out new output formats for chat model AIMessages + This field is used to roll-out new output formats for chat model ``AIMessage``s in a backwards-compatible way. ``'v1'`` standardizes output format using a list of typed ContentBlock dicts. We recommend this for new applications. - All chat models currently support the default of ``"v0"``. + All chat models currently support the default of ``'v0'``. .. versionadded:: 1.0 + """ @model_validator(mode="before") From 8d110599cb9562aaa9c13e1efd796b25258dc588 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:39:13 -0400 Subject: [PATCH 21/56] chore: more content block docstring formatting --- libs/core/langchain_core/messages/ai.py | 2 +- libs/core/langchain_core/messages/base.py | 9 +++++---- libs/core/langchain_core/messages/human.py | 2 +- libs/core/langchain_core/messages/system.py | 2 +- libs/core/langchain_core/messages/tool.py | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 774fab4611316..207ab6c11331d 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -201,7 +201,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: # If there are tool calls in content_blocks, but not in tool_calls, add them content_tool_calls = [ diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 13b12f764d19e..871f687d35cf6 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -83,7 +83,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__(content=content_blocks, **kwargs) else: @@ -108,17 +108,18 @@ def get_lc_namespace(cls) -> list[str]: @property def content_blocks(self) -> list[types.ContentBlock]: - """Return the content as a list of standard ContentBlocks. + """Return the content as a list of standard ``ContentBlock``s. To use this property, the corresponding chat model must support - ``message_version="v1"`` or higher: + ``message_version='v1'`` or higher: .. code-block:: python from langchain.chat_models import init_chat_model llm = init_chat_model("...", message_version="v1") - otherwise, does best-effort parsing to standard types. + Otherwise, does best-effort parsing to standard types. + """ blocks: list[types.ContentBlock] = [] content = ( diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 041db0cdb9726..954f05f037ec2 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -63,7 +63,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index c8a5bbae5c859..a3f399b88c142 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -56,7 +56,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 14177181480fc..efb714bb58506 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -157,7 +157,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), From 3db8c6011236acf1b9076cc2ffa9aec2d947a6eb Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:01:07 -0400 Subject: [PATCH 22/56] chore: more content block formatting --- .../langchain_core/messages/content_blocks.py | 235 +++++++++++------- 1 file changed, 141 insertions(+), 94 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 656d47cdaa6e7..1448d37086b42 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -4,17 +4,15 @@ This module is under active development. The API is unstable and subject to change in future releases. -This module provides a standardized data structure for representing inputs to and -outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict`` that -can represent a piece of text, an image, a tool call, or other structured data. +This module provides standardized data structures for representing inputs to and +outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict``. **Rationale** Different LLM providers use distinct and incompatible API schemas. This module -introduces a unified, provider-agnostic format to standardize these interactions. A -message to or from a model is simply a ``list`` of ``ContentBlock`` objects, allowing -for the natural interleaving of text, images, and other content in a single, ordered -sequence. +provides a unified, provider-agnostic format to facilitate these interactions. A +message to or from a model is simply a list of content blocks, allowing for the natural +interleaving of text, images, and other content in a single ordered sequence. An adapter for a specific provider is responsible for translating this standard list of blocks into the format required by its API. @@ -29,61 +27,70 @@ by default in the ``extras`` field of each block. This allows for additional metadata to be included without breaking the standard structure. -Following widespread adoption of `PEP 728 `__, we will add -``extra_items=Any`` as a param to Content Blocks. This will signify to type checkers -that additional provider-specific fields are allowed outside of the ``extras`` field, -and that will become the new standard approach to adding provider-specific metadata. - .. warning:: Do not heavily rely on the ``extras`` field for provider-specific data! This field is subject to deprecation in future releases as we move towards PEP 728. -**Example with PEP 728 provider-specific fields:** +.. note:: + Following widespread adoption of `PEP 728 `__, we + will add ``extra_items=Any`` as a param to Content Blocks. This will signify to type + checkers that additional provider-specific fields are allowed outside of the + ``extras`` field, and that will become the new standard approach to adding + provider-specific metadata. -.. code-block:: python + .. dropdown:: - # Content block definition - # NOTE: `extra_items=Any` - class TextContentBlock(TypedDict, extra_items=Any): - type: Literal["text"] - id: NotRequired[str] - text: str - annotations: NotRequired[list[Annotation]] - index: NotRequired[int] + **Example with PEP 728 provider-specific fields:** -.. code-block:: python + .. code-block:: python - from langchain_core.messages.content_blocks import TextContentBlock + # Content block definition + # NOTE: `extra_items=Any` + class TextContentBlock(TypedDict, extra_items=Any): + type: Literal["text"] + id: NotRequired[str] + text: str + annotations: NotRequired[list[Annotation]] + index: NotRequired[int] - # Create a text content block with provider-specific fields - my_block: TextContentBlock = { - # Add required fields - "type": "text", - "text": "Hello, world!", - # Additional fields not specified in the TypedDict - # These are valid with PEP 728 and are typed as Any - "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, - "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, - "custom_field": "any value", - } + .. code-block:: python - # Mutating an existing block to add provider-specific fields - openai_data = my_block["openai_metadata"] # Type: Any + from langchain_core.messages.content_blocks import TextContentBlock + + # Create a text content block with provider-specific fields + my_block: TextContentBlock = { + # Add required fields + "type": "text", + "text": "Hello, world!", + # Additional fields not specified in the TypedDict + # These are valid with PEP 728 and are typed as Any + "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, + "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, + "custom_field": "any value", + } -.. note:: - PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings - from type checkers that don't yet support it. The functionality works correctly - in Python 3.13+ and will be fully supported as the ecosystem catches up. + # Mutating an existing block to add provider-specific fields + openai_data = my_block["openai_metadata"] # Type: Any + + PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress + warnings from type checkers that don't yet support it. The functionality works + correctly in Python 3.13+ and will be fully supported as the ecosystem catches + up. **Key Block Types** The module defines several types of content blocks, including: -- ``TextContentBlock``: Standard text. -- ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data. +- ``TextContentBlock``: Standard text output. +- ``Citation``: For annotations that link text output to a source document. - ``ToolCallContentBlock``: For function calling. - ``ReasoningContentBlock``: To capture a model's thought process. -- ``Citation``: For annotations that link generated text to a source document. +- Multimodal data: + - ``ImageContentBlock`` + - ``AudioContentBlock`` + - ``VideoContentBlock`` + - ``PlainTextContentBlock`` (e.g. .txt or .md files) + - ``FileContentBlock`` (e.g. PDFs, etc.) **Example Usage** @@ -92,28 +99,31 @@ class TextContentBlock(TypedDict, extra_items=Any): # Direct construction: from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock - multimodal_message: AIMessage = [ - TextContentBlock(type="text", text="What is shown in this image?"), - ImageContentBlock( - type="image", - url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", - mime_type="image/png", - ), - ] + multimodal_message: AIMessage(content_blocks= + [ + TextContentBlock(type="text", text="What is shown in this image?"), + ImageContentBlock( + type="image", + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + ) + # Using factories: from langchain_core.messages.content_blocks import create_text_block, create_image_block - # Using factory functions: - multimodal_message: AIMessage = [ - create_text_block("What is shown in this image?"), - create_image_block( - url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", - mime_type="image/png", - ), - ] - -Factory functions like ``create_text_block`` and ``create_image_block`` are provided -and offer benefits such as: + multimodal_message: AIMessage(content= + [ + create_text_block("What is shown in this image?"), + create_image_block( + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + ) + +Factory functions offer benefits such as: - Automatic ID generation (when not provided) - No need to manually specify the ``type`` field @@ -136,7 +146,7 @@ def _ensure_id(id_val: Optional[str]) -> str: id_val: Optional string ID value to validate. Returns: - A valid string ID, either the provided value or a new UUID. + A string ID, either the validated provided value or a newly generated UUID4. """ return id_val or str(f"lc_{uuid4()}") @@ -166,18 +176,12 @@ class Citation(TypedDict): - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ url: NotRequired[str] """URL of the document source.""" - # For future consideration, if needed: - # provenance: NotRequired[str] - # """Provenance of the document, e.g., ``'Wikipedia'``, ``'arXiv'``, etc. - - # Included for future compatibility; not currently implemented. - # """ - title: NotRequired[str] """Source document title. @@ -185,12 +189,10 @@ class Citation(TypedDict): """ start_index: NotRequired[int] - """Start index of the **response text** (``TextContentBlock.text``) for which the - annotation applies.""" + """Start index of the **response text** (``TextContentBlock.text``).""" end_index: NotRequired[int] - """End index of the **response text** (``TextContentBlock.text``) for which the - annotation applies.""" + """End index of the **response text** (``TextContentBlock.text``)""" cited_text: NotRequired[str] """Excerpt of source text being cited.""" @@ -212,10 +214,12 @@ class NonStandardAnnotation(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ value: dict[str, Any] @@ -244,10 +248,12 @@ class TextContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ text: str @@ -296,6 +302,7 @@ class ToolCall(TypedDict): An identifier is needed to associate a tool call request with a tool call result in events when multiple concurrent tool calls are made. + """ # TODO: Consider making this NotRequired[str] in the future. @@ -330,6 +337,7 @@ class ToolCallChunk(TypedDict): AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] + """ # TODO: Consider making fields NotRequired[str] in the future. @@ -338,7 +346,12 @@ class ToolCallChunk(TypedDict): """Used for serialization.""" id: Optional[str] - """An identifier associated with the tool call.""" + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + + """ name: Optional[str] """The name of the tool to be called.""" @@ -358,6 +371,7 @@ class InvalidToolCall(TypedDict): Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) + """ # TODO: Consider making fields NotRequired[str] in the future. @@ -366,7 +380,12 @@ class InvalidToolCall(TypedDict): """Used for discrimination.""" id: Optional[str] - """An identifier associated with the tool call.""" + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + + """ name: Optional[str] """The name of the tool to be called.""" @@ -384,8 +403,6 @@ class InvalidToolCall(TypedDict): """Provider-specific metadata.""" -# Note: These are not standard tool calls, but rather provider-specific built-in tools. -# Web search class WebSearchCall(TypedDict): """Built-in web search tool call.""" @@ -393,10 +410,12 @@ class WebSearchCall(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ query: NotRequired[str] @@ -416,10 +435,12 @@ class WebSearchResult(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ urls: NotRequired[list[str]] @@ -439,10 +460,12 @@ class CodeInterpreterCall(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ language: NotRequired[str] @@ -463,22 +486,26 @@ class CodeInterpreterOutput(TypedDict): Full output of a code interpreter tool call is represented by ``CodeInterpreterResult`` which is a list of these blocks. + """ type: Literal["code_interpreter_output"] """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ return_code: NotRequired[int] """Return code of the executed code. Example: ``0`` for success, non-zero for failure. + """ stderr: NotRequired[str] @@ -504,10 +531,12 @@ class CodeInterpreterResult(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ output: list[CodeInterpreterOutput] @@ -536,10 +565,12 @@ class ReasoningContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ reasoning: NotRequired[str] @@ -547,6 +578,7 @@ class ReasoningContentBlock(TypedDict): Either the thought summary or the raw reasoning text itself. This is often parsed from ```` tags in the model's response. + """ index: NotRequired[int] @@ -575,10 +607,12 @@ class ImageContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -588,6 +622,7 @@ class ImageContentBlock(TypedDict): """MIME type of the image. Required for base64. `Examples from IANA `__ + """ index: NotRequired[int] @@ -619,10 +654,12 @@ class VideoContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -632,6 +669,7 @@ class VideoContentBlock(TypedDict): """MIME type of the video. Required for base64. `Examples from IANA `__ + """ index: NotRequired[int] @@ -662,10 +700,12 @@ class AudioContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -711,10 +751,12 @@ class PlainTextContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -768,10 +810,12 @@ class FileContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -828,10 +872,12 @@ class NonStandardContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ value: dict[str, Any] @@ -900,6 +946,7 @@ def is_data_content_block(block: dict) -> bool: Returns: True if the content block is a data content block, False otherwise. + """ return block.get("type") in ( "audio", @@ -947,7 +994,7 @@ def is_invalid_tool_call_block( def convert_to_openai_image_block(block: dict[str, Any]) -> dict: - """Convert image content block to format expected by OpenAI Chat Completions API.""" + """Convert ``ImageContentBlock`` to format expected by OpenAI Chat Completions.""" if "url" in block: return { "type": "image_url", From 301a425151cfc2dc7054d2f5247453f3e0e3ee66 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:16:07 -0400 Subject: [PATCH 23/56] snapshot --- .../__snapshots__/test_runnable.ambr | 248 ++++++++++++++++-- 1 file changed, 224 insertions(+), 24 deletions(-) diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 079e490906158..db17757a8c95f 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2674,7 +2674,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -2700,6 +2700,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2711,6 +2715,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -2728,9 +2736,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -2943,12 +2952,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2960,6 +2980,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -2970,9 +2994,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -4150,7 +4175,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -4176,6 +4201,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -4187,6 +4216,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -4204,9 +4237,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -4438,12 +4472,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -4455,6 +4500,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -4465,9 +4514,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -5657,7 +5707,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -5683,6 +5733,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -5694,6 +5748,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -5711,9 +5769,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -5945,12 +6004,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -5962,6 +6032,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -5972,9 +6046,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -7039,7 +7114,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -7065,6 +7140,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -7076,6 +7155,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -7093,9 +7176,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -7308,12 +7392,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -7325,6 +7420,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -7335,9 +7434,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -8557,7 +8657,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -8583,6 +8683,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -8594,6 +8698,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -8611,9 +8719,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -8845,12 +8954,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -8862,6 +8982,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -8872,9 +8996,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -9984,7 +10109,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -10010,6 +10135,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -10021,6 +10150,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -10038,9 +10171,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -10253,12 +10387,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -10270,6 +10415,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -10280,9 +10429,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -11410,7 +11560,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -11436,6 +11586,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -11447,6 +11601,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -11464,9 +11622,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -11709,12 +11868,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -11726,6 +11896,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -11736,9 +11910,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -12878,7 +13053,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -12904,6 +13079,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -12915,6 +13094,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -12932,9 +13115,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -13166,12 +13350,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -13183,6 +13378,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -13193,9 +13392,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', From a3b20b0ef575a1a6afa8b592c6f219a2fdf7dac3 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:28:11 -0400 Subject: [PATCH 24/56] clean up id test --- libs/core/tests/unit_tests/test_messages.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 3b591945954b6..c3304b5917361 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -180,22 +180,21 @@ def test_message_chunks() -> None: assert AIMessageChunk(content="") + left == left assert right + AIMessageChunk(content="") == right + default_id = "lc_run--abc123" + meaningful_id = "msg_def456" + # Test ID order of precedence - null_id = AIMessageChunk(content="", id=None) - default_id = AIMessageChunk( - content="", id="lc_run--abc123" + null_id_chunk = AIMessageChunk(content="", id=None) + default_id_chunk = AIMessageChunk( + content="", id=default_id ) # LangChain-assigned run ID - meaningful_id = AIMessageChunk(content="", id="msg_def456") # provider-assigned ID - - assert (null_id + default_id).id == "lc_run--abc123" - assert (default_id + null_id).id == "lc_run--abc123" + meaningful_id = AIMessageChunk(content="", id=meaningful_id) # provider-assigned ID - assert (null_id + meaningful_id).id == "msg_def456" - assert (meaningful_id + null_id).id == "msg_def456" + assert (null_id_chunk + default_id_chunk).id == default_id + assert (null_id_chunk + meaningful_id).id == meaningful_id # Provider assigned IDs have highest precedence - assert (default_id + meaningful_id).id == "msg_def456" - assert (meaningful_id + default_id).id == "msg_def456" + assert (default_id_chunk + meaningful_id).id == meaningful_id def test_chat_message_chunks() -> None: From 8fc1973bbf657d744817e93a1a1f6aba455c38f6 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:30:51 -0400 Subject: [PATCH 25/56] test: add note about for tuple conversion in ToolMessage --- libs/core/tests/unit_tests/test_messages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index c3304b5917361..281e964a1aed0 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1040,6 +1040,7 @@ def test_tool_message_content() -> None: ToolMessage(["foo"], tool_call_id="1") ToolMessage([{"foo": "bar"}], tool_call_id="1") + # Ignoring since we're testing that tuples get converted to lists in `coerce_args` assert ToolMessage(("a", "b", "c"), tool_call_id="1").content == ["a", "b", "c"] # type: ignore[call-overload] assert ToolMessage(5, tool_call_id="1").content == "5" # type: ignore[call-overload] assert ToolMessage(5.1, tool_call_id="1").content == "5.1" # type: ignore[call-overload] From 86252d2ae6a860c96e8dd61505601d8d2128d63b Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:39:36 -0400 Subject: [PATCH 26/56] refactor: move ID prefixes --- libs/core/langchain_core/messages/ai.py | 18 +++--------------- .../langchain_core/messages/content_blocks.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 207ab6c11331d..76bc8d980a1d1 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -26,18 +26,6 @@ logger = logging.getLogger(__name__) -_LC_AUTO_PREFIX = "lc_" -"""LangChain auto-generated ID prefix for messages and content blocks.""" - -_LC_ID_PREFIX = f"{_LC_AUTO_PREFIX}run-" -"""Internal tracing/callback system identifier. - -Used for: -- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) - gets a unique run_id (UUID) -- Enables tracking parent-child relationships between operations -""" - class InputTokenDetails(TypedDict, total=False): """Breakdown of input token counts. @@ -523,15 +511,15 @@ def add_ai_message_chunks( for id_ in candidates: if ( id_ - and not id_.startswith(_LC_ID_PREFIX) - and not id_.startswith(_LC_AUTO_PREFIX) + and not id_.startswith(types.LC_ID_PREFIX) + and not id_.startswith(types.LC_AUTO_PREFIX) ): chunk_id = id_ break else: # second pass: prefer lc_run-* ids over lc_* ids for id_ in candidates: - if id_ and id_.startswith(_LC_ID_PREFIX): + if id_ and id_.startswith(types.LC_ID_PREFIX): chunk_id = id_ break else: diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 1448d37086b42..32673f1d3cce2 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -135,6 +135,18 @@ class TextContentBlock(TypedDict, extra_items=Any): from typing_extensions import NotRequired, TypedDict, TypeGuard +LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" + +LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" + def _ensure_id(id_val: Optional[str]) -> str: """Ensure the ID is a valid string, generating a new UUID if not provided. @@ -148,7 +160,7 @@ def _ensure_id(id_val: Optional[str]) -> str: Returns: A string ID, either the validated provided value or a newly generated UUID4. """ - return id_val or str(f"lc_{uuid4()}") + return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") class Citation(TypedDict): From f691dc348f7c2caf2a84cdf65fc684bdc2dafc26 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:42:17 -0400 Subject: [PATCH 27/56] refactor: make `ensure_id` public --- libs/core/langchain_core/messages/__init__.py | 7 ++++++ .../langchain_core/messages/content_blocks.py | 22 +++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index 410299ea5b553..a0482bee86231 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -33,6 +33,8 @@ ) from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.content_blocks import ( + LC_AUTO_PREFIX, + LC_ID_PREFIX, Annotation, AudioContentBlock, Citation, @@ -53,6 +55,7 @@ WebSearchResult, convert_to_openai_data_block, convert_to_openai_image_block, + ensure_id, is_data_content_block, is_reasoning_block, is_text_block, @@ -85,6 +88,8 @@ ) __all__ = ( + "LC_AUTO_PREFIX", + "LC_ID_PREFIX", "AIMessage", "AIMessageChunk", "Annotation", @@ -128,6 +133,7 @@ "convert_to_openai_data_block", "convert_to_openai_image_block", "convert_to_openai_messages", + "ensure_id", "filter_messages", "get_buffer_string", "is_data_content_block", @@ -145,6 +151,7 @@ ) _dynamic_imports = { + "ensure_id": "content_blocks", "AIMessage": "ai", "AIMessageChunk": "ai", "Annotation": "content_blocks", diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 32673f1d3cce2..ac00966814a4f 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -148,7 +148,7 @@ class TextContentBlock(TypedDict, extra_items=Any): """ -def _ensure_id(id_val: Optional[str]) -> str: +def ensure_id(id_val: Optional[str]) -> str: """Ensure the ID is a valid string, generating a new UUID if not provided. Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are @@ -1106,7 +1106,7 @@ def create_text_block( block = TextContentBlock( type="text", text=text, - id=_ensure_id(id), + id=ensure_id(id), ) if annotations is not None: block["annotations"] = annotations @@ -1156,7 +1156,7 @@ def create_image_block( msg = "Must provide one of: url, base64, or file_id" raise ValueError(msg) - block = ImageContentBlock(type="image", id=_ensure_id(id)) + block = ImageContentBlock(type="image", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1216,7 +1216,7 @@ def create_video_block( msg = "mime_type is required when using base64 data" raise ValueError(msg) - block = VideoContentBlock(type="video", id=_ensure_id(id)) + block = VideoContentBlock(type="video", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1276,7 +1276,7 @@ def create_audio_block( msg = "mime_type is required when using base64 data" raise ValueError(msg) - block = AudioContentBlock(type="audio", id=_ensure_id(id)) + block = AudioContentBlock(type="audio", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1336,7 +1336,7 @@ def create_file_block( msg = "mime_type is required when using base64 data" raise ValueError(msg) - block = FileContentBlock(type="file", id=_ensure_id(id)) + block = FileContentBlock(type="file", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1390,7 +1390,7 @@ def create_plaintext_block( block = PlainTextContentBlock( type="text-plain", mime_type="text/plain", - id=_ensure_id(id), + id=ensure_id(id), ) if text is not None: @@ -1443,7 +1443,7 @@ def create_tool_call( type="tool_call", name=name, args=args, - id=_ensure_id(id), + id=ensure_id(id), ) if index is not None: @@ -1480,7 +1480,7 @@ def create_reasoning_block( block = ReasoningContentBlock( type="reasoning", reasoning=reasoning or "", - id=_ensure_id(id), + id=ensure_id(id), ) if index is not None: @@ -1521,7 +1521,7 @@ def create_citation( prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. """ - block = Citation(type="citation", id=_ensure_id(id)) + block = Citation(type="citation", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1565,7 +1565,7 @@ def create_non_standard_block( block = NonStandardContentBlock( type="non_standard", value=value, - id=_ensure_id(id), + id=ensure_id(id), ) if index is not None: From 7a8c6398a422c86665cc86c644c92d3ed1a8f4a1 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:01:29 -0400 Subject: [PATCH 28/56] clarify: meaning of provider --- libs/core/tests/unit_tests/test_messages.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 281e964a1aed0..ae837214b70fb 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -188,13 +188,15 @@ def test_message_chunks() -> None: default_id_chunk = AIMessageChunk( content="", id=default_id ) # LangChain-assigned run ID - meaningful_id = AIMessageChunk(content="", id=meaningful_id) # provider-assigned ID + provider_chunk = AIMessageChunk( + content="", id=meaningful_id + ) # provided ID (either by user or provider) assert (null_id_chunk + default_id_chunk).id == default_id - assert (null_id_chunk + meaningful_id).id == meaningful_id + assert (null_id_chunk + provider_chunk).id == meaningful_id # Provider assigned IDs have highest precedence - assert (default_id_chunk + meaningful_id).id == meaningful_id + assert (default_id_chunk + provider_chunk).id == meaningful_id def test_chat_message_chunks() -> None: From 987031f86c306daeb85651aeefaec78aaae95f31 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:27:08 -0400 Subject: [PATCH 29/56] fix: `_LC_ID_PREFIX` back --- libs/core/langchain_core/messages/__init__.py | 2 ++ libs/core/langchain_core/messages/ai.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index a0482bee86231..3787da41e8f70 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -21,6 +21,7 @@ if TYPE_CHECKING: from langchain_core.messages.ai import ( + _LC_ID_PREFIX, AIMessage, AIMessageChunk, ) @@ -90,6 +91,7 @@ __all__ = ( "LC_AUTO_PREFIX", "LC_ID_PREFIX", + "_LC_ID_PREFIX", "AIMessage", "AIMessageChunk", "Annotation", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 76bc8d980a1d1..e2e045c71071a 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -26,6 +26,8 @@ logger = logging.getLogger(__name__) +_LC_ID_PREFIX = types.LC_ID_PREFIX + class InputTokenDetails(TypedDict, total=False): """Breakdown of input token counts. From 08cd5bb9b4b45b235ac9eb0166fed479880bb806 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:27:47 -0400 Subject: [PATCH 30/56] clarify intent of `extras` under data blocks --- libs/core/langchain_core/messages/content_blocks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index ac00966814a4f..d98e0ff09ce4d 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -647,7 +647,7 @@ class ImageContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the image data itself.""" class VideoContentBlock(TypedDict): @@ -694,7 +694,7 @@ class VideoContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the video data itself.""" class AudioContentBlock(TypedDict): @@ -740,7 +740,7 @@ class AudioContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the audio data itself.""" class PlainTextContentBlock(TypedDict): @@ -796,7 +796,7 @@ class PlainTextContentBlock(TypedDict): """Context for the text, e.g., a description or summary of the text's content.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the data itself.""" class FileContentBlock(TypedDict): @@ -850,7 +850,7 @@ class FileContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the file data itself.""" # Future modalities to consider: From 7f9727ee088551937951b48e2b541389d5931f3c Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:28:33 -0400 Subject: [PATCH 31/56] refactor: `is_data_content_block` --- .../langchain_core/messages/content_blocks.py | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index d98e0ff09ce4d..9a3313488f5bc 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -130,7 +130,7 @@ class TextContentBlock(TypedDict, extra_items=Any): """ # noqa: E501 import warnings -from typing import Any, Literal, Optional, Union +from typing import Any, Literal, Optional, Union, get_args, get_type_hints from uuid import uuid4 from typing_extensions import NotRequired, TypedDict, TypeGuard @@ -950,8 +950,24 @@ class NonStandardContentBlock(TypedDict): } +def _get_data_content_block_types() -> tuple[str, ...]: + """Get type literals from DataContentBlock union members dynamically.""" + data_block_types = [] + + for block_type in get_args(DataContentBlock): + hints = get_type_hints(block_type) + if "type" in hints: + type_annotation = hints["type"] + if hasattr(type_annotation, "__args__"): + # This is a Literal type, get the literal value + literal_value = type_annotation.__args__[0] + data_block_types.append(literal_value) + + return tuple(data_block_types) + + def is_data_content_block(block: dict) -> bool: - """Check if the content block is a standard data content block. + """Check if the provided content block is a standard v1 data content block. Args: block: The content block to check. @@ -960,20 +976,19 @@ def is_data_content_block(block: dict) -> bool: True if the content block is a data content block, False otherwise. """ - return block.get("type") in ( - "audio", - "image", - "video", - "file", - "text-plain", - ) and any( + return block.get("type") in _get_data_content_block_types() and any( + # Check if at least one non-type key is present to signify presence of data key in block for key in ( "url", "base64", "file_id", "text", - "source_type", # backwards compatibility + "source_type", # for backwards compatibility with v0 content blocks + # TODO: should we verify that if source_type is present, at least one of + # url, base64, or file_id is also present? Otherwise, source_type could be + # present without any actual data? Need to confirm whether this was ever + # possible in v0 content blocks in the first place. ) ) From 00345c4de93a37cbd0297f8ddb3b4916cd1a1b37 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:28:46 -0400 Subject: [PATCH 32/56] tests: add more data content block tests --- libs/core/tests/unit_tests/test_messages.py | 76 ++++++++++++++------- 1 file changed, 53 insertions(+), 23 deletions(-) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index ae837214b70fb..6fc42db829b36 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1118,19 +1118,41 @@ def test_message_text() -> None: def test_is_data_content_block() -> None: + # Test all DataContentBlock types with various data fields + + # Image blocks + assert is_data_content_block({"type": "image", "url": "https://..."}) assert is_data_content_block( - { - "type": "image", - "url": "https://...", - } + {"type": "image", "base64": "", "mime_type": "image/jpeg"} ) + + # Video blocks + assert is_data_content_block({"type": "video", "url": "https://video.mp4"}) assert is_data_content_block( - { - "type": "image", - "base64": "", - "mime_type": "image/jpeg", - } + {"type": "video", "base64": "", "mime_type": "video/mp4"} ) + assert is_data_content_block({"type": "video", "file_id": "vid_123"}) + + # Audio blocks + assert is_data_content_block({"type": "audio", "url": "https://audio.mp3"}) + assert is_data_content_block( + {"type": "audio", "base64": "", "mime_type": "audio/mp3"} + ) + assert is_data_content_block({"type": "audio", "file_id": "aud_123"}) + + # Plain text blocks + assert is_data_content_block({"type": "text-plain", "text": "document content"}) + assert is_data_content_block({"type": "text-plain", "url": "https://doc.txt"}) + assert is_data_content_block({"type": "text-plain", "file_id": "txt_123"}) + + # File blocks + assert is_data_content_block({"type": "file", "url": "https://file.pdf"}) + assert is_data_content_block( + {"type": "file", "base64": "", "mime_type": "application/pdf"} + ) + assert is_data_content_block({"type": "file", "file_id": "file_123"}) + + # Blocks with additional metadata (should still be valid) assert is_data_content_block( { "type": "image", @@ -1150,27 +1172,35 @@ def test_is_data_content_block() -> None: assert is_data_content_block( { "type": "image", - "source_type": "base64", # backward compatibility - } - ) - assert not is_data_content_block( - { - "type": "text", - "text": "foo", + "base64": "", + "mime_type": "image/jpeg", + "extras": "hi", } ) + + # Invalid cases - wrong type + assert not is_data_content_block({"type": "text", "text": "foo"}) assert not is_data_content_block( { "type": "image_url", "image_url": {"url": "https://..."}, - } - ) - assert not is_data_content_block( - { - "type": "image", - "source": "", - } + } # This is OpenAI Chat Completions ) + assert not is_data_content_block({"type": "tool_call", "name": "func", "args": {}}) + assert not is_data_content_block({"type": "invalid", "url": "something"}) + + # Invalid cases - valid type but no data or `source_type` fields + assert not is_data_content_block({"type": "image"}) + assert not is_data_content_block({"type": "video", "mime_type": "video/mp4"}) + assert not is_data_content_block({"type": "audio", "extras": {"key": "value"}}) + + # Invalid cases - valid type but wrong data field name + assert not is_data_content_block({"type": "image", "source": ""}) + assert not is_data_content_block({"type": "video", "data": "video_data"}) + + # Edge cases - empty or missing values + assert not is_data_content_block({}) + assert not is_data_content_block({"url": "https://..."}) # missing type def test_convert_to_openai_image_block() -> None: From 0199b56bda27dbbbc0fb2b88988021c5fa9c994c Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:37:39 -0400 Subject: [PATCH 33/56] rfc `test_utils` to make clearer what was existing before and after, and add comments --- .../tests/unit_tests/messages/test_utils.py | 57 ++++++++++--------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py index f9f1c9c9ff081..d655fd13bdbf5 100644 --- a/libs/core/tests/unit_tests/messages/test_utils.py +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -1215,36 +1215,22 @@ def test_convert_to_openai_messages_developer() -> None: def test_convert_to_openai_messages_multimodal() -> None: + """v0 and v1 content to OpenAI messages conversion.""" messages = [ HumanMessage( content=[ + # Prior v0 blocks {"type": "text", "text": "Text message"}, { "type": "image", "url": "https://example.com/test.png", }, - { - "type": "image", - "source_type": "url", # backward compatibility - "url": "https://example.com/test.png", - }, - { - "type": "image", - "base64": "", - "mime_type": "image/png", - }, { "type": "image", "source_type": "base64", "data": "", "mime_type": "image/png", }, - { - "type": "file", - "base64": "", - "mime_type": "application/pdf", - "filename": "test.pdf", - }, { "type": "file", "source_type": "base64", @@ -1253,26 +1239,18 @@ def test_convert_to_openai_messages_multimodal() -> None: "filename": "test.pdf", }, { + # OpenAI Chat Completions file format "type": "file", "file": { "filename": "draconomicon.pdf", "file_data": "data:application/pdf;base64,", }, }, - { - "type": "file", - "file_id": "file-abc123", - }, { "type": "file", "source_type": "id", "id": "file-abc123", }, - { - "type": "audio", - "base64": "", - "mime_type": "audio/wav", - }, { "type": "audio", "source_type": "base64", @@ -1286,6 +1264,32 @@ def test_convert_to_openai_messages_multimodal() -> None: "format": "wav", }, }, + # v1 Additions + { + "type": "image", + "source_type": "url", # backward compatibility v0 block field + "url": "https://example.com/test.png", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/png", + }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "filename": "test.pdf", # backward compatibility v0 block field + }, + { + "type": "file", + "file_id": "file-abc123", + }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/wav", + }, ] ) ] @@ -1294,7 +1298,7 @@ def test_convert_to_openai_messages_multimodal() -> None: message = result[0] assert len(message["content"]) == 13 - # Test adding filename + # Test auto-adding filename messages = [ HumanMessage( content=[ @@ -1313,6 +1317,7 @@ def test_convert_to_openai_messages_multimodal() -> None: assert len(message["content"]) == 1 block = message["content"][0] assert block == { + # OpenAI Chat Completions file format "type": "file", "file": { "file_data": "data:application/pdf;base64,", From 2375c3a4d0402a8f12d9fa7490f3e08ef98c9b68 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:39:36 -0400 Subject: [PATCH 34/56] add note --- libs/core/langchain_core/messages/content_blocks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 9a3313488f5bc..a124dd56e69a7 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -1047,6 +1047,7 @@ def convert_to_openai_image_block(block: dict[str, Any]) -> dict: def convert_to_openai_data_block(block: dict) -> dict: """Format standard data content block to format expected by OpenAI.""" + # TODO: make sure this supports new v1 if block["type"] == "image": formatted_block = convert_to_openai_image_block(block) From aca7c1fe6a1a55c4134cb31f7046153751869938 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 18 Aug 2025 10:45:06 -0300 Subject: [PATCH 35/56] fix(core): temporarily fix tests (#32589) --- libs/core/langchain_core/messages/__init__.py | 3 +++ libs/core/tests/unit_tests/messages/test_imports.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index 3787da41e8f70..31e4b560b2b8d 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -176,6 +176,9 @@ "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", + "LC_AUTO_PREFIX": "content_blocks", + "LC_ID_PREFIX": "content_blocks", + "_LC_ID_PREFIX": "ai", "NonStandardAnnotation": "content_blocks", "NonStandardContentBlock": "content_blocks", "PlainTextContentBlock": "content_blocks", diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py index 750f2f49f060d..ada1c882a7242 100644 --- a/libs/core/tests/unit_tests/messages/test_imports.py +++ b/libs/core/tests/unit_tests/messages/test_imports.py @@ -25,6 +25,9 @@ "HumanMessageChunk", "ImageContentBlock", "InvalidToolCall", + "_LC_ID_PREFIX", + "LC_AUTO_PREFIX", + "LC_ID_PREFIX", "NonStandardAnnotation", "NonStandardContentBlock", "PlainTextContentBlock", @@ -41,6 +44,7 @@ "ReasoningContentBlock", "RemoveMessage", "convert_to_messages", + "ensure_id", "get_buffer_string", "is_data_content_block", "is_reasoning_block", From aeea0e3ff888a1b087489834fbc8142c3dfe9c52 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 18 Aug 2025 10:49:01 -0300 Subject: [PATCH 36/56] fix(langchain): fix tests on standard content branch (#32590) --- libs/langchain/langchain/agents/output_parsers/tools.py | 7 ++++++- libs/langchain/tests/unit_tests/chat_models/test_base.py | 1 + .../tests/unit_tests/chat_models/test_chat_models.py | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/agents/output_parsers/tools.py b/libs/langchain/langchain/agents/output_parsers/tools.py index b7ec8a47aa19a..1cf1ed8680f03 100644 --- a/libs/langchain/langchain/agents/output_parsers/tools.py +++ b/libs/langchain/langchain/agents/output_parsers/tools.py @@ -47,7 +47,12 @@ def parse_ai_message_to_tool_action( try: args = json.loads(function["arguments"] or "{}") tool_calls.append( - ToolCall(name=function_name, args=args, id=tool_call["id"]), + ToolCall( + type="tool_call", + name=function_name, + args=args, + id=tool_call["id"], + ), ) except JSONDecodeError as e: msg = ( diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index 611f251b8162c..bfd9826fb6d63 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -277,6 +277,7 @@ def test_configurable_with_default() -> None: "model_kwargs": {}, "streaming": False, "stream_usage": True, + "output_version": "v0", }, "kwargs": { "tools": [{"name": "foo", "description": "foo", "input_schema": {}}], diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py index 450844cbec175..7862ec1d4a8b6 100644 --- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py +++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py @@ -277,6 +277,7 @@ def test_configurable_with_default() -> None: "model_kwargs": {}, "streaming": False, "stream_usage": True, + "output_version": "v0", }, "kwargs": { "tools": [{"name": "foo", "description": "foo", "input_schema": {}}], From 4790c7265a205e45d1ac8e9db079c7334511aff8 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 18 Aug 2025 11:30:49 -0300 Subject: [PATCH 37/56] feat(core): lazy-load standard content (#32570) --- libs/core/langchain_core/messages/ai.py | 50 ++- .../messages/block_translators/__init__.py | 1 + .../messages/block_translators/openai.py | 345 ++++++++++++++++++ .../langchain_core/messages/content_blocks.py | 50 +-- libs/core/langchain_core/utils/_merge.py | 16 +- .../messages/block_translators/__init__.py | 0 .../messages/block_translators/test_openai.py | 231 ++++++++++++ .../prompts/__snapshots__/test_chat.ambr | 36 +- .../runnables/__snapshots__/test_graph.ambr | 18 +- .../__snapshots__/test_runnable.ambr | 144 +++++++- .../langchain_openai/chat_models/_compat.py | 276 +------------- .../langchain_openai/chat_models/base.py | 157 +++----- .../chat_models/test_responses_api.py | 62 ++-- .../tests/unit_tests/chat_models/test_base.py | 110 ------ .../chat_models/test_responses_stream.py | 21 +- 15 files changed, 917 insertions(+), 600 deletions(-) create mode 100644 libs/core/langchain_core/messages/block_translators/__init__.py create mode 100644 libs/core/langchain_core/messages/block_translators/openai.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/__init__.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_openai.py diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index e2e045c71071a..864444f53aeee 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -218,6 +218,16 @@ def lc_attributes(self) -> dict: @property def content_blocks(self) -> list[types.ContentBlock]: """Return content blocks of the message.""" + if self.response_metadata.get("output_version") == "v1": + return cast("list[types.ContentBlock]", self.content) + + model_provider = self.response_metadata.get("model_provider") + if model_provider == "openai": + from langchain_core.messages.block_translators import openai + + return openai.translate_content(self) + + # Otherwise, use best-effort parsing blocks = super().content_blocks if self.tool_calls: @@ -355,32 +365,34 @@ def lc_attributes(self) -> dict: @property def content_blocks(self) -> list[types.ContentBlock]: """Return content blocks of the message.""" + if self.response_metadata.get("output_version") == "v1": + return cast("list[types.ContentBlock]", self.content) + + model_provider = self.response_metadata.get("model_provider") + if model_provider == "openai": + from langchain_core.messages.block_translators import openai + + return openai.translate_content_chunk(self) + + # Otherwise, use best-effort parsing blocks = super().content_blocks - if self.tool_call_chunks: + if self.tool_call_chunks and not self.content: blocks = [ block for block in blocks if block["type"] not in ("tool_call", "invalid_tool_call") ] - # Add from tool_call_chunks if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content - if isinstance(block, dict) and block.get("type") == "tool_call_chunk" - } - for chunk in self.tool_call_chunks: - if (id_ := chunk.get("id")) and id_ not in content_tool_call_ids: - tool_call_chunk_block: types.ToolCallChunk = { - "type": "tool_call_chunk", - "id": id_, - "name": chunk["name"], - "args": chunk["args"], - "index": chunk.get("index"), - } - if "extras" in chunk: - tool_call_chunk_block["extras"] = chunk["extras"] # type: ignore[typeddict-item] - blocks.append(tool_call_chunk_block) + for tool_call_chunk in self.tool_call_chunks: + tc: types.ToolCallChunk = { + "type": "tool_call_chunk", + "id": tool_call_chunk.get("id"), + "name": tool_call_chunk.get("name"), + "args": tool_call_chunk.get("args"), + } + if (idx := tool_call_chunk.get("index")) is not None: + tc["index"] = idx + blocks.append(tc) return blocks diff --git a/libs/core/langchain_core/messages/block_translators/__init__.py b/libs/core/langchain_core/messages/block_translators/__init__.py new file mode 100644 index 0000000000000..1dd51cc836e3a --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/__init__.py @@ -0,0 +1 @@ +"""Derivations of standard content blocks from provider content.""" diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py new file mode 100644 index 0000000000000..19ab0fbdae712 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -0,0 +1,345 @@ +"""Derivations of standard content blocks from OpenAI content.""" + +from collections.abc import Iterable +from typing import Any, Optional, Union, cast + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content_blocks as types + + +# v1 / Chat Completions +def _convert_to_v1_from_chat_completions( + message: AIMessage, +) -> list[types.ContentBlock]: + """Mutate a Chat Completions message to v1 format.""" + content_blocks: list[types.ContentBlock] = [] + if isinstance(message.content, str): + if message.content: + content_blocks = [{"type": "text", "text": message.content}] + else: + content_blocks = [] + + for tool_call in message.tool_calls: + content_blocks.append(tool_call) + + return content_blocks + + +def _convert_to_v1_from_chat_completions_chunk( + chunk: AIMessageChunk, +) -> list[types.ContentBlock]: + """Mutate a Chat Completions chunk to v1 format.""" + content_blocks: list[types.ContentBlock] = [] + if isinstance(chunk.content, str): + if chunk.content: + content_blocks = [{"type": "text", "text": chunk.content}] + else: + content_blocks = [] + + for tool_call_chunk in chunk.tool_call_chunks: + tc: types.ToolCallChunk = { + "type": "tool_call_chunk", + "id": tool_call_chunk.get("id"), + "name": tool_call_chunk.get("name"), + "args": tool_call_chunk.get("args"), + } + if (idx := tool_call_chunk.get("index")) is not None: + tc["index"] = idx + content_blocks.append(tc) + + return content_blocks + + +def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: + """Convert a v1 message to the Chat Completions format.""" + if isinstance(message.content, list): + new_content: list = [] + for block in message.content: + if isinstance(block, dict): + block_type = block.get("type") + if block_type == "text": + # Strip annotations + new_content.append({"type": "text", "text": block["text"]}) + elif block_type in ("reasoning", "tool_call"): + pass + else: + new_content.append(block) + else: + new_content.append(block) + return message.model_copy(update={"content": new_content}) + + return message + + +# v1 / Responses +def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation: + annotation_type = annotation.get("type") + + if annotation_type == "url_citation": + known_fields = { + "type", + "url", + "title", + "cited_text", + "start_index", + "end_index", + } + url_citation = cast("types.Citation", {}) + for field in ("end_index", "start_index", "title"): + if field in annotation: + url_citation[field] = annotation[field] + url_citation["type"] = "citation" + url_citation["url"] = annotation["url"] + for field, value in annotation.items(): + if field not in known_fields: + if "extras" not in url_citation: + url_citation["extras"] = {} + url_citation["extras"][field] = value + return url_citation + + if annotation_type == "file_citation": + known_fields = { + "type", + "title", + "cited_text", + "start_index", + "end_index", + "filename", + } + document_citation: types.Citation = {"type": "citation"} + if "filename" in annotation: + document_citation["title"] = annotation["filename"] + for field, value in annotation.items(): + if field not in known_fields: + if "extras" not in document_citation: + document_citation["extras"] = {} + document_citation["extras"][field] = value + + return document_citation + + # TODO: standardise container_file_citation? + non_standard_annotation: types.NonStandardAnnotation = { + "type": "non_standard_annotation", + "value": annotation, + } + return non_standard_annotation + + +def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]: + if "summary" not in block: + yield cast("types.ReasoningContentBlock", block) + return + + known_fields = {"type", "reasoning", "id", "index"} + unknown_fields = [ + field for field in block if field != "summary" and field not in known_fields + ] + if unknown_fields: + block["extras"] = {} + for field in unknown_fields: + block["extras"][field] = block.pop(field) + + if not block["summary"]: + # [{'id': 'rs_...', 'summary': [], 'type': 'reasoning', 'index': 0}] + block = {k: v for k, v in block.items() if k != "summary"} + if "index" in block: + meaningful_idx = f"{block['index']}_0" + block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}" + yield cast("types.ReasoningContentBlock", block) + return + + # Common part for every exploded line, except 'summary' + common = {k: v for k, v in block.items() if k in known_fields} + + # Optional keys that must appear only in the first exploded item + first_only = block.pop("extras", None) + + for idx, part in enumerate(block["summary"]): + new_block = dict(common) + new_block["reasoning"] = part.get("text", "") + if idx == 0 and first_only: + new_block.update(first_only) + if "index" in new_block: + summary_index = part.get("index", 0) + meaningful_idx = f"{new_block['index']}_{summary_index}" + new_block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}" + + yield cast("types.ReasoningContentBlock", new_block) + + +def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock]: + """Convert a Responses message to v1 format.""" + + def _iter_blocks() -> Iterable[types.ContentBlock]: + for raw_block in message.content: + if not isinstance(raw_block, dict): + continue + block = raw_block.copy() + block_type = block.get("type") + + if block_type == "text": + if "text" not in block: + block["text"] = "" + if "annotations" in block: + block["annotations"] = [ + _convert_annotation_to_v1(a) for a in block["annotations"] + ] + if "index" in block: + block["index"] = f"lc_txt_{block['index']}" + yield cast("types.TextContentBlock", block) + + elif block_type == "reasoning": + yield from _explode_reasoning(block) + + elif block_type == "image_generation_call" and ( + result := block.get("result") + ): + new_block = {"type": "image", "base64": result} + if output_format := block.get("output_format"): + new_block["mime_type"] = f"image/{output_format}" + if "id" in block: + new_block["id"] = block["id"] + if "index" in block: + new_block["index"] = f"lc_img_{block['index']}" + for extra_key in ( + "status", + "background", + "output_format", + "quality", + "revised_prompt", + "size", + ): + if extra_key in block: + if "extras" not in new_block: + new_block["extras"] = {} + new_block["extras"][extra_key] = block[extra_key] + yield cast("types.ImageContentBlock", new_block) + + elif block_type == "function_call": + tool_call_block: Optional[ + Union[types.ToolCall, types.InvalidToolCall, types.ToolCallChunk] + ] = None + call_id = block.get("call_id", "") + if ( + isinstance(message, AIMessageChunk) + and len(message.tool_call_chunks) == 1 + ): + tool_call_block = message.tool_call_chunks[0].copy() # type: ignore[assignment] + elif call_id: + for tool_call in message.tool_calls or []: + if tool_call.get("id") == call_id: + tool_call_block = tool_call.copy() + break + else: + for invalid_tool_call in message.invalid_tool_calls or []: + if invalid_tool_call.get("id") == call_id: + tool_call_block = invalid_tool_call.copy() + break + else: + pass + if tool_call_block: + if "id" in block: + if "extras" not in tool_call_block: + tool_call_block["extras"] = {} + tool_call_block["extras"]["item_id"] = block["id"] + if "index" in block: + tool_call_block["index"] = f"lc_tc_{block['index']}" + yield tool_call_block + + elif block_type == "web_search_call": + web_search_call = {"type": "web_search_call", "id": block["id"]} + if "index" in block: + web_search_call["index"] = f"lc_wsc_{block['index']}" + if ( + "action" in block + and isinstance(block["action"], dict) + and block["action"].get("type") == "search" + and "query" in block["action"] + ): + web_search_call["query"] = block["action"]["query"] + for key in block: + if key not in ("type", "id", "index"): + web_search_call[key] = block[key] + + yield cast("types.WebSearchCall", web_search_call) + + # If .content already has web_search_result, don't add + if not any( + isinstance(other_block, dict) + and other_block.get("type") == "web_search_result" + and other_block.get("id") == block["id"] + for other_block in message.content + ): + web_search_result = {"type": "web_search_result", "id": block["id"]} + if "index" in block and isinstance(block["index"], int): + web_search_result["index"] = f"lc_wsr_{block['index'] + 1}" + yield cast("types.WebSearchResult", web_search_result) + + elif block_type == "code_interpreter_call": + code_interpreter_call = { + "type": "code_interpreter_call", + "id": block["id"], + } + if "code" in block: + code_interpreter_call["code"] = block["code"] + if "index" in block: + code_interpreter_call["index"] = f"lc_cic_{block['index']}" + known_fields = {"type", "id", "language", "code", "extras", "index"} + for key in block: + if key not in known_fields: + if "extras" not in code_interpreter_call: + code_interpreter_call["extras"] = {} + code_interpreter_call["extras"][key] = block[key] + + code_interpreter_result = { + "type": "code_interpreter_result", + "id": block["id"], + } + if "outputs" in block: + code_interpreter_result["outputs"] = block["outputs"] + for output in block["outputs"]: + if ( + isinstance(output, dict) + and (output_type := output.get("type")) + and output_type == "logs" + ): + if "output" not in code_interpreter_result: + code_interpreter_result["output"] = [] + code_interpreter_result["output"].append( + { + "type": "code_interpreter_output", + "stdout": output.get("logs", ""), + } + ) + + if "status" in block: + code_interpreter_result["status"] = block["status"] + if "index" in block and isinstance(block["index"], int): + code_interpreter_result["index"] = f"lc_cir_{block['index'] + 1}" + + yield cast("types.CodeInterpreterCall", code_interpreter_call) + yield cast("types.CodeInterpreterResult", code_interpreter_result) + + elif block_type in types.KNOWN_BLOCK_TYPES: + yield cast("types.ContentBlock", block) + else: + new_block = {"type": "non_standard", "value": block} + if "index" in new_block["value"]: + new_block["index"] = f"lc_ns_{new_block['value'].pop('index')}" + yield cast("types.NonStandardContentBlock", new_block) + + return list(_iter_blocks()) + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with OpenAI content.""" + if isinstance(message.content, str): + return _convert_to_v1_from_chat_completions(message) + return _convert_to_v1_from_responses(message) + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with OpenAI content.""" + if isinstance(message.content, str): + return _convert_to_v1_from_chat_completions_chunk(message) + return _convert_to_v1_from_responses(message) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index a124dd56e69a7..61d458fe93675 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -274,7 +274,7 @@ class TextContentBlock(TypedDict): annotations: NotRequired[list[Annotation]] """``Citation``s and other annotations.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -324,7 +324,7 @@ class ToolCall(TypedDict): args: dict[str, Any] """The arguments to the tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -371,7 +371,7 @@ class ToolCallChunk(TypedDict): args: Optional[str] """The arguments to the tool call.""" - index: Optional[int] + index: NotRequired[Union[int, str]] """The index of the tool call in a sequence.""" extras: NotRequired[dict[str, Any]] @@ -408,7 +408,7 @@ class InvalidToolCall(TypedDict): error: Optional[str] """An error message associated with the tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -433,7 +433,7 @@ class WebSearchCall(TypedDict): query: NotRequired[str] """The search query used in the web search tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -458,7 +458,7 @@ class WebSearchResult(TypedDict): urls: NotRequired[list[str]] """List of URLs returned by the web search tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -486,7 +486,7 @@ class CodeInterpreterCall(TypedDict): code: NotRequired[str] """The code to be executed by the code interpreter.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -529,7 +529,7 @@ class CodeInterpreterOutput(TypedDict): file_ids: NotRequired[list[str]] """List of file IDs generated by the code interpreter.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -554,7 +554,7 @@ class CodeInterpreterResult(TypedDict): output: list[CodeInterpreterOutput] """List of outputs from the code interpreter tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -593,7 +593,7 @@ class ReasoningContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -637,7 +637,7 @@ class ImageContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -684,7 +684,7 @@ class VideoContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -730,7 +730,7 @@ class AudioContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -777,7 +777,7 @@ class PlainTextContentBlock(TypedDict): mime_type: Literal["text/plain"] """MIME type of the file. Required for base64.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -840,7 +840,7 @@ class FileContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -895,7 +895,7 @@ class NonStandardContentBlock(TypedDict): value: dict[str, Any] """Provider-specific data.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" @@ -1100,7 +1100,7 @@ def create_text_block( *, id: Optional[str] = None, annotations: Optional[list[Annotation]] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> TextContentBlock: """Create a ``TextContentBlock``. @@ -1143,7 +1143,7 @@ def create_image_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> ImageContentBlock: """Create an ``ImageContentBlock``. @@ -1199,7 +1199,7 @@ def create_video_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> VideoContentBlock: """Create a ``VideoContentBlock``. @@ -1259,7 +1259,7 @@ def create_audio_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> AudioContentBlock: """Create an ``AudioContentBlock``. @@ -1319,7 +1319,7 @@ def create_file_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> FileContentBlock: """Create a ``FileContentBlock``. @@ -1380,7 +1380,7 @@ def create_plaintext_block( title: Optional[str] = None, context: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> PlainTextContentBlock: """Create a ``PlainTextContentBlock``. @@ -1436,7 +1436,7 @@ def create_tool_call( args: dict[str, Any], *, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> ToolCall: """Create a ``ToolCall``. @@ -1475,7 +1475,7 @@ def create_tool_call( def create_reasoning_block( reasoning: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> ReasoningContentBlock: """Create a ``ReasoningContentBlock``. @@ -1561,7 +1561,7 @@ def create_non_standard_block( value: dict[str, Any], *, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, ) -> NonStandardContentBlock: """Create a ``NonStandardContentBlock``. diff --git a/libs/core/langchain_core/utils/_merge.py b/libs/core/langchain_core/utils/_merge.py index 63d49de953c27..c32b09e2e669c 100644 --- a/libs/core/langchain_core/utils/_merge.py +++ b/libs/core/langchain_core/utils/_merge.py @@ -57,6 +57,11 @@ def merge_dicts(left: dict[str, Any], *others: dict[str, Any]) -> dict[str, Any] # "should either occur once or have the same value across " # "all dicts." # ) + if (right_k == "index" and merged[right_k].startswith("lc_")) or ( + right_k in ("id", "output_version", "model_provider") + and merged[right_k] == right_v + ): + continue merged[right_k] += right_v elif isinstance(merged[right_k], dict): merged[right_k] = merge_dicts(merged[right_k], right_v) @@ -93,7 +98,16 @@ def merge_lists(left: Optional[list], *others: Optional[list]) -> Optional[list] merged = other.copy() else: for e in other: - if isinstance(e, dict) and "index" in e and isinstance(e["index"], int): + if ( + isinstance(e, dict) + and "index" in e + and ( + isinstance(e["index"], int) + or ( + isinstance(e["index"], str) and e["index"].startswith("lc_") + ) + ) + ): to_merge = [ i for i, e_left in enumerate(merged) diff --git a/libs/core/tests/unit_tests/messages/block_translators/__init__.py b/libs/core/tests/unit_tests/messages/block_translators/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py new file mode 100644 index 0000000000000..00dae69865dab --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -0,0 +1,231 @@ +from typing import Optional + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content_blocks as types + + +def test_convert_to_v1_from_responses() -> None: + message = AIMessage( + [ + {"type": "reasoning", "id": "abc123", "summary": []}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo bar"}, + {"type": "summary_text", "text": "baz"}, + ], + }, + { + "type": "function_call", + "call_id": "call_123", + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + { + "type": "function_call", + "call_id": "call_234", + "name": "get_weather_2", + "arguments": '{"location": "New York"}', + "id": "fc_123", + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "url_citation", "url": "https://example.com"}, + { + "type": "file_citation", + "filename": "my doc", + "index": 1, + "file_id": "file_123", + }, + {"bar": "baz"}, + ], + }, + {"type": "image_generation_call", "id": "ig_123", "result": "..."}, + {"type": "something_else", "foo": "bar"}, + ], + tool_calls=[ + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + }, + ], + response_metadata={"model_provider": "openai"}, + ) + expected_content: list[types.ContentBlock] = [ + {"type": "reasoning", "id": "abc123"}, + {"type": "reasoning", "id": "abc234", "reasoning": "foo bar"}, + {"type": "reasoning", "id": "abc234", "reasoning": "baz"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + "extras": {"item_id": "fc_123"}, + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "citation", "url": "https://example.com"}, + { + "type": "citation", + "title": "my doc", + "extras": {"file_id": "file_123", "index": 1}, + }, + {"type": "non_standard_annotation", "value": {"bar": "baz"}}, + ], + }, + {"type": "image", "base64": "...", "id": "ig_123"}, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ] + assert message.content_blocks == expected_content + + # Check no mutation + assert message.content != expected_content + + +def test_convert_to_v1_from_responses_chunk() -> None: + chunks = [ + AIMessageChunk( + content=[{"type": "reasoning", "id": "abc123", "summary": [], "index": 0}], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo ", "index": 0}, + ], + "index": 1, + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "bar", "index": 0}, + ], + "index": 1, + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "baz", "index": 1}, + ], + "index": 1, + } + ], + response_metadata={"model_provider": "openai"}, + ), + ] + expected_chunks = [ + AIMessageChunk( + content=[{"type": "reasoning", "id": "abc123", "index": "lc_rs_305f30"}], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "reasoning": "foo ", + "index": "lc_rs_315f30", + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "reasoning": "bar", + "index": "lc_rs_315f30", + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "reasoning": "baz", + "index": "lc_rs_315f31", + } + ], + response_metadata={"model_provider": "openai"}, + ), + ] + for chunk, expected in zip(chunks, expected_chunks): + assert chunk.content_blocks == expected.content_blocks + + full: Optional[AIMessageChunk] = None + for chunk in chunks: + full = chunk if full is None else full + chunk # type: ignore[assignment] + assert isinstance(full, AIMessageChunk) + + expected_content = [ + {"type": "reasoning", "id": "abc123", "summary": [], "index": 0}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo bar", "index": 0}, + {"type": "summary_text", "text": "baz", "index": 1}, + ], + "index": 1, + }, + ] + assert full.content == expected_content + + expected_content_blocks = [ + {"type": "reasoning", "id": "abc123", "index": "lc_rs_305f30"}, + { + "type": "reasoning", + "id": "abc234", + "reasoning": "foo bar", + "index": "lc_rs_315f30", + }, + { + "type": "reasoning", + "id": "abc234", + "reasoning": "baz", + "index": "lc_rs_315f31", + }, + ] + assert full.content_blocks == expected_content_blocks diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 08a1c528cfb6f..1ff3d7aec133f 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -768,8 +768,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -1036,8 +1043,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -2225,8 +2239,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -2493,8 +2514,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index d3a746eaf7966..4f6c54a28d452 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1171,8 +1171,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -1439,8 +1446,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index db17757a8c95f..7d5642ce853a2 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2716,8 +2716,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -2981,8 +2988,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -4217,8 +4231,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -4501,8 +4522,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -5749,8 +5777,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -6033,8 +6068,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -7156,8 +7198,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -7421,8 +7470,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -8699,8 +8755,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -8983,8 +9046,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -10151,8 +10221,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -10416,8 +10493,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -11602,8 +11686,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -11897,8 +11988,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -13095,8 +13193,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -13379,8 +13484,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index d47aa735e15b5..967652ae9962d 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -66,9 +66,9 @@ import json from collections.abc import Iterable, Iterator -from typing import Any, Literal, Optional, Union, cast +from typing import Any, Literal, Union, cast -from langchain_core.messages import AIMessage, AIMessageChunk, is_data_content_block +from langchain_core.messages import AIMessage, is_data_content_block from langchain_core.messages import content_blocks as types _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" @@ -262,46 +262,6 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: # v1 / Chat Completions -def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: - """Mutate a Chat Completions message to v1 format.""" - if isinstance(message.content, str): - if message.content: - message.content = [{"type": "text", "text": message.content}] - else: - message.content = [] - - for tool_call in message.tool_calls: - message.content.append(cast(dict, tool_call)) - - if "tool_calls" in message.additional_kwargs: - _ = message.additional_kwargs.pop("tool_calls") - - if "token_usage" in message.response_metadata: - _ = message.response_metadata.pop("token_usage") - - return message - - -def _convert_to_v1_from_chat_completions_chunk(chunk: AIMessageChunk) -> AIMessageChunk: - """Mutate a Chat Completions chunk to v1 format.""" - if isinstance(chunk.content, str): - if chunk.content: - chunk.content = [{"type": "text", "text": chunk.content}] - else: - chunk.content = [] - - for tool_call_chunk in chunk.tool_call_chunks: - chunk.content.append(cast(dict, tool_call_chunk)) - - if "tool_calls" in chunk.additional_kwargs: - _ = chunk.additional_kwargs.pop("tool_calls") - - if "token_usage" in chunk.response_metadata: - _ = chunk.response_metadata.pop("token_usage") - - return chunk - - def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: """Convert a v1 message to the Chat Completions format.""" if isinstance(message.content, list): @@ -324,230 +284,6 @@ def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: # v1 / Responses -def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation: - annotation_type = annotation.get("type") - - if annotation_type == "url_citation": - known_fields = { - "type", - "url", - "title", - "cited_text", - "start_index", - "end_index", - } - url_citation = cast(types.Citation, {}) - for field in ("end_index", "start_index", "title"): - if field in annotation: - url_citation[field] = annotation[field] - url_citation["type"] = "citation" - url_citation["url"] = annotation["url"] - for field in annotation: - if field not in known_fields: - if "extras" not in url_citation: - url_citation["extras"] = {} - url_citation["extras"][field] = annotation[field] - return url_citation - - elif annotation_type == "file_citation": - known_fields = {"type", "title", "cited_text", "start_index", "end_index"} - document_citation: types.Citation = {"type": "citation"} - if "filename" in annotation: - document_citation["title"] = annotation.pop("filename") - for field in annotation: - if field not in known_fields: - if "extras" not in document_citation: - document_citation["extras"] = {} - document_citation["extras"][field] = annotation[field] - - return document_citation - - # TODO: standardise container_file_citation? - else: - non_standard_annotation: types.NonStandardAnnotation = { - "type": "non_standard_annotation", - "value": annotation, - } - return non_standard_annotation - - -def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]: - if "summary" not in block: - yield cast(types.ReasoningContentBlock, block) - return - - known_fields = {"type", "reasoning", "id", "index"} - unknown_fields = [ - field for field in block if field != "summary" and field not in known_fields - ] - if unknown_fields: - block["extras"] = {} - for field in unknown_fields: - block["extras"][field] = block.pop(field) - - if not block["summary"]: - _ = block.pop("summary", None) - yield cast(types.ReasoningContentBlock, block) - return - - # Common part for every exploded line, except 'summary' - common = {k: v for k, v in block.items() if k in known_fields} - - # Optional keys that must appear only in the first exploded item - first_only = block.pop("extras", None) - - for idx, part in enumerate(block["summary"]): - new_block = dict(common) - new_block["reasoning"] = part.get("text", "") - if idx == 0 and first_only: - new_block.update(first_only) - yield cast(types.ReasoningContentBlock, new_block) - - -def _convert_to_v1_from_responses( - content: list[dict[str, Any]], - tool_calls: Optional[list[types.ToolCall]] = None, - invalid_tool_calls: Optional[list[types.InvalidToolCall]] = None, -) -> list[types.ContentBlock]: - """Mutate a Responses message to v1 format.""" - - def _iter_blocks() -> Iterable[types.ContentBlock]: - for block in content: - if not isinstance(block, dict): - continue - block_type = block.get("type") - - if block_type == "text": - if "annotations" in block: - block["annotations"] = [ - _convert_annotation_to_v1(a) for a in block["annotations"] - ] - yield cast(types.TextContentBlock, block) - - elif block_type == "reasoning": - yield from _explode_reasoning(block) - - elif block_type == "image_generation_call" and ( - result := block.get("result") - ): - new_block = {"type": "image", "base64": result} - if output_format := block.get("output_format"): - new_block["mime_type"] = f"image/{output_format}" - if "id" in block: - new_block["id"] = block["id"] - if "index" in block: - new_block["index"] = block["index"] - for extra_key in ( - "status", - "background", - "output_format", - "quality", - "revised_prompt", - "size", - ): - if extra_key in block: - if "extras" not in new_block: - new_block["extras"] = {} - new_block["extras"][extra_key] = block[extra_key] - yield cast(types.ImageContentBlock, new_block) - - elif block_type == "function_call": - tool_call_block: Optional[ - Union[types.ToolCall, types.InvalidToolCall] - ] = None - call_id = block.get("call_id", "") - if call_id: - for tool_call in tool_calls or []: - if tool_call.get("id") == call_id: - tool_call_block = cast(types.ToolCall, tool_call.copy()) - break - else: - for invalid_tool_call in invalid_tool_calls or []: - if invalid_tool_call.get("id") == call_id: - tool_call_block = cast( - types.InvalidToolCall, invalid_tool_call.copy() - ) - break - if tool_call_block: - if "id" in block: - if "extras" not in tool_call_block: - tool_call_block["extras"] = {} - tool_call_block["extras"]["item_id"] = block["id"] # type: ignore[typeddict-item] - if "index" in block: - tool_call_block["index"] = block["index"] - yield tool_call_block - - elif block_type == "web_search_call": - web_search_call = {"type": "web_search_call", "id": block["id"]} - if "index" in block: - web_search_call["index"] = block["index"] - if ( - "action" in block - and isinstance(block["action"], dict) - and block["action"].get("type") == "search" - and "query" in block["action"] - ): - web_search_call["query"] = block["action"]["query"] - for key in block: - if key not in ("type", "id"): - web_search_call[key] = block[key] - - web_search_result = {"type": "web_search_result", "id": block["id"]} - if "index" in block: - web_search_result["index"] = block["index"] + 1 - yield cast(types.WebSearchCall, web_search_call) - yield cast(types.WebSearchResult, web_search_result) - - elif block_type == "code_interpreter_call": - code_interpreter_call = { - "type": "code_interpreter_call", - "id": block["id"], - } - if "code" in block: - code_interpreter_call["code"] = block["code"] - if "container_id" in block: - code_interpreter_call["container_id"] = block["container_id"] - if "index" in block: - code_interpreter_call["index"] = block["index"] - - code_interpreter_result = { - "type": "code_interpreter_result", - "id": block["id"], - } - if "outputs" in block: - code_interpreter_result["outputs"] = block["outputs"] - for output in block["outputs"]: - if ( - isinstance(output, dict) - and (output_type := output.get("type")) - and output_type == "logs" - ): - if "output" not in code_interpreter_result: - code_interpreter_result["output"] = [] - code_interpreter_result["output"].append( - { - "type": "code_interpreter_output", - "stdout": output.get("logs", ""), - } - ) - - if "status" in block: - code_interpreter_result["status"] = block["status"] - if "index" in block: - code_interpreter_result["index"] = block["index"] + 1 - - yield cast(types.CodeInterpreterCall, code_interpreter_call) - yield cast(types.CodeInterpreterResult, code_interpreter_result) - - else: - new_block = {"type": "non_standard", "value": block} - if "index" in new_block["value"]: - new_block["index"] = new_block["value"].pop("index") - yield cast(types.NonStandardContentBlock, new_block) - - return list(_iter_blocks()) - - def _convert_annotation_from_v1(annotation: types.Annotation) -> dict[str, Any]: if annotation["type"] == "citation": new_ann: dict[str, Any] = {} @@ -678,10 +414,18 @@ def _consolidate_calls( for key in ("code", "container_id"): if key in current: collapsed[key] = current[key] + elif key in current.get("extras", {}): + collapsed[key] = current["extras"][key] + else: + pass for key in ("outputs", "status"): if key in nxt: collapsed[key] = nxt[key] + elif key in nxt.get("extras", {}): + collapsed[key] = nxt["extras"][key] + else: + pass collapsed["type"] = "code_interpreter_call" yield collapsed diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index d8fca0513b17e..caeefdc662aae 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -69,6 +69,10 @@ OutputTokenDetails, UsageMetadata, ) +from langchain_core.messages.block_translators.openai import ( + translate_content, + translate_content_chunk, +) from langchain_core.messages.tool import tool_call_chunk from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.output_parsers.openai_tools import ( @@ -111,9 +115,6 @@ _convert_from_v1_to_chat_completions, _convert_from_v1_to_responses, _convert_to_v03_ai_message, - _convert_to_v1_from_chat_completions, - _convert_to_v1_from_chat_completions_chunk, - _convert_to_v1_from_responses, ) if TYPE_CHECKING: @@ -925,9 +926,9 @@ def _convert_chunk_to_generation_chunk( generation_info=base_generation_info, ) if self.output_version == "v1": - generation_chunk.message = _convert_to_v1_from_chat_completions_chunk( - cast(AIMessageChunk, generation_chunk.message) - ) + generation_chunk.message.content = [] + generation_chunk.message.response_metadata["output_version"] = "v1" + return generation_chunk choice = choices[0] @@ -940,6 +941,7 @@ def _convert_chunk_to_generation_chunk( generation_info = {**base_generation_info} if base_generation_info else {} if finish_reason := choice.get("finish_reason"): + generation_info["model_provider"] = "openai" generation_info["finish_reason"] = finish_reason if model_name := chunk.get("model"): generation_info["model_name"] = model_name @@ -956,18 +958,11 @@ def _convert_chunk_to_generation_chunk( message_chunk.usage_metadata = usage_metadata if self.output_version == "v1": - message_chunk = cast(AIMessageChunk, message_chunk) - # Convert to v1 format - if isinstance(message_chunk.content, str): - message_chunk = _convert_to_v1_from_chat_completions_chunk( - message_chunk - ) - if message_chunk.content: - message_chunk.content[0]["index"] = 0 # type: ignore[index] - else: - message_chunk = _convert_to_v1_from_chat_completions_chunk( - message_chunk - ) + message_chunk.content = cast( + "Union[str, list[Union[str, dict]]]", + translate_content_chunk(cast(AIMessageChunk, message_chunk)), + ) + message_chunk.response_metadata["output_version"] = "v1" generation_chunk = ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None @@ -1332,24 +1327,14 @@ def _create_chat_result( if hasattr(message, "parsed"): generations[0].message.additional_kwargs["parsed"] = message.parsed if hasattr(message, "refusal"): - if self.output_version in ("v0", "responses/v1"): - generations[0].message.additional_kwargs["refusal"] = ( - message.refusal - ) - elif self.output_version == "v1": - if isinstance(generations[0].message.content, list): - generations[0].message.content.append( - { - "type": "non_standard", - "value": {"refusal": message.refusal}, - } - ) + generations[0].message.additional_kwargs["refusal"] = message.refusal if self.output_version == "v1": - _ = llm_output.pop("token_usage", None) - generations[0].message = _convert_to_v1_from_chat_completions( - cast(AIMessage, generations[0].message) + generations[0].message.content = cast( + Union[str, list[Union[str, dict]]], + translate_content(cast(AIMessage, generations[0].message)), ) + generations[0].message.response_metadata["output_version"] = "v1" return ChatResult(generations=generations, llm_output=llm_output) @@ -4095,29 +4080,6 @@ def _construct_lc_result_from_responses_api( except json.JSONDecodeError: pass - if output_version == "v1": - content_blocks = _convert_to_v1_from_responses(content_blocks) - - if response.tools and any( - tool.type == "image_generation" for tool in response.tools - ): - # Get mime_time from tool definition and add to image generations - # if missing (primarily for tracing purposes). - image_generation_call = next( - tool for tool in response.tools if tool.type == "image_generation" - ) - if image_generation_call.output_format: - mime_type = f"image/{image_generation_call.output_format}" - for content_block in content_blocks: - # OK to mutate output message - if ( - isinstance(content_block, dict) - and content_block.get("type") == "image" - and "base64" in content_block - and "mime_type" not in block - ): - block["mime_type"] = mime_type - message = AIMessage( content=content_blocks, id=response.id, @@ -4129,6 +4091,11 @@ def _construct_lc_result_from_responses_api( ) if output_version == "v0": message = _convert_to_v03_ai_message(message) + elif output_version == "v1": + message.content = cast( + Union[str, list[Union[str, dict]]], translate_content(message) + ) + message.response_metadata["output_version"] = "v1" else: pass return ChatResult(generations=[ChatGeneration(message=message)]) @@ -4208,29 +4175,12 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: annotation = chunk.annotation else: annotation = chunk.annotation.model_dump(exclude_none=True, mode="json") - if output_version == "v1": - content.append( - { - "type": "text", - "text": "", - "annotations": [annotation], - "index": current_index, - } - ) - else: - content.append({"annotations": [annotation], "index": current_index}) + + content.append( + {"type": "text", "annotations": [annotation], "index": current_index} + ) elif chunk.type == "response.output_text.done": - if output_version == "v1": - content.append( - { - "type": "text", - "text": "", - "id": chunk.item_id, - "index": current_index, - } - ) - else: - content.append({"id": chunk.item_id, "index": current_index}) + content.append({"type": "text", "id": chunk.item_id, "index": current_index}) elif chunk.type == "response.created": id = chunk.response.id response_metadata["id"] = chunk.response.id # Backwards compatibility @@ -4328,30 +4278,18 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: reasoning["index"] = current_index content.append(reasoning) elif chunk.type == "response.reasoning_summary_part.added": - if output_version in ("v0", "responses/v1"): - _advance(chunk.output_index) - content.append( - { - # langchain-core uses the `index` key to aggregate text blocks. - "summary": [ - { - "index": chunk.summary_index, - "type": "summary_text", - "text": "", - } - ], - "index": current_index, - "type": "reasoning", - } - ) - else: - # v1 - block: dict = {"type": "reasoning", "reasoning": ""} - if chunk.summary_index > 0: - _advance(chunk.output_index, chunk.summary_index) - block["id"] = chunk.item_id - block["index"] = current_index - content.append(block) + _advance(chunk.output_index) + content.append( + { + # langchain-core uses the `index` key to aggregate text blocks. + "summary": [ + {"index": chunk.summary_index, "type": "summary_text", "text": ""} + ], + "index": current_index, + "type": "reasoning", + "id": chunk.item_id, + } + ) elif chunk.type == "response.image_generation_call.partial_image": # Partial images are not supported yet. pass @@ -4373,16 +4311,6 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: else: return current_index, current_output_index, current_sub_index, None - if output_version == "v1": - content = cast(list[dict], _convert_to_v1_from_responses(content)) - for content_block in content: - if ( - isinstance(content_block, dict) - and content_block.get("index", -1) > current_index - ): - # blocks were added for v1 - current_index = content_block["index"] - message = AIMessageChunk( content=content, # type: ignore[arg-type] tool_call_chunks=tool_call_chunks, @@ -4396,6 +4324,11 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: AIMessageChunk, _convert_to_v03_ai_message(message, has_reasoning=has_reasoning), ) + elif output_version == "v1": + message.content = cast( + Union[str, list[Union[str, dict]]], translate_content_chunk(message) + ) + message.response_metadata["output_version"] = "v1" else: pass return ( diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 4ddf373ec78f1..74670bf4e123f 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -22,7 +22,7 @@ MODEL_NAME = "gpt-4o-mini" -def _check_response(response: Optional[BaseMessage], output_version: str) -> None: +def _check_response(response: Optional[BaseMessage]) -> None: assert isinstance(response, AIMessage) assert isinstance(response.content, list) for block in response.content: @@ -65,7 +65,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(first_response, output_version) + _check_response(first_response) # Test streaming full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] @@ -75,7 +75,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: ): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk - _check_response(full, output_version) + _check_response(full) # Use OpenAI's stateful API response = llm.invoke( @@ -83,7 +83,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: tools=[{"type": "web_search_preview"}], previous_response_id=first_response.response_metadata["id"], # type: ignore[typeddict-item] ) - _check_response(response, output_version) + _check_response(response) # Manually pass in chat history response = llm.invoke( @@ -94,13 +94,13 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: ], tools=[{"type": "web_search_preview"}], ) - _check_response(response, output_version) + _check_response(response) # Bind tool response = llm.bind_tools([{"type": "web_search_preview"}]).invoke( "What was a positive news story from today?" ) - _check_response(response, output_version) + _check_response(response) for msg in [first_response, full, response]: assert msg is not None @@ -118,7 +118,7 @@ async def test_web_search_async() -> None: "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(response, "v0") + _check_response(response) assert response.response_metadata["status"] # Test streaming @@ -130,7 +130,7 @@ async def test_web_search_async() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full, "v0") + _check_response(full) for msg in [response, full]: assert msg.additional_kwargs["tool_outputs"] @@ -163,7 +163,7 @@ def multiply(x: int, y: int) -> int: assert set(full.tool_calls[0]["args"]) == {"x", "y"} response = bound_llm.invoke("What was a positive news story from today?") - _check_response(response, output_version) + _check_response(response) class Foo(BaseModel): @@ -373,14 +373,14 @@ def test_file_search() -> None: input_message = {"role": "user", "content": "What is deep research by OpenAI?"} response = llm.invoke([input_message], tools=[tool]) - _check_response(response, "v0") + _check_response(response) full: Optional[BaseMessageChunk] = None for chunk in llm.stream([input_message], tools=[tool]): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full, "v0") + _check_response(full) next_message = {"role": "user", "content": "Thank you."} _ = llm.invoke([input_message, full, next_message]) @@ -441,7 +441,7 @@ def test_stream_reasoning_summary( total_reasoning_blocks += 1 assert isinstance(block["id"], str) and block["id"].startswith("rs_") assert isinstance(block["reasoning"], str) - assert isinstance(block["index"], int) + assert isinstance(block["index"], str) assert ( total_reasoning_blocks > 1 ) # This query typically generates multiple reasoning blocks @@ -468,7 +468,7 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - } response = llm_with_tools.invoke([input_message]) assert isinstance(response, AIMessage) - _check_response(response, output_version) + _check_response(response) if output_version == "v0": tool_outputs = [ item @@ -501,7 +501,10 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - # Test streaming # Use same container - container_id = tool_outputs[0]["container_id"] + container_id = ( + tool_outputs[0].get("container_id") + or tool_outputs[0].get("extras")["container_id"] + ) llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": container_id}] ) @@ -746,22 +749,14 @@ def test_image_generation_streaming( assert complete_ai_message.additional_kwargs["tool_outputs"] tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] assert set(tool_output.keys()).issubset(expected_keys) - elif output_version == "responses/v1": + else: + # "responses/v1" tool_output = next( block for block in complete_ai_message.content if isinstance(block, dict) and block["type"] == "image_generation_call" ) assert set(tool_output.keys()).issubset(expected_keys) - else: - # v1 - standard_keys = {"type", "base64", "id", "status", "index"} - tool_output = next( - block - for block in complete_ai_message.content - if isinstance(block, dict) and block["type"] == "image" - ) - assert set(standard_keys).issubset(tool_output.keys()) @pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") @@ -829,7 +824,7 @@ def test_image_generation_multi_turn( ] ai_message = llm_with_tools.invoke(chat_history) assert isinstance(ai_message, AIMessage) - _check_response(ai_message, output_version) + _check_response(ai_message) expected_keys = { "id", @@ -895,26 +890,19 @@ def test_image_generation_multi_turn( ai_message2 = llm_with_tools.invoke(chat_history) assert isinstance(ai_message2, AIMessage) - _check_response(ai_message2, output_version) + _check_response(ai_message2) if output_version == "v0": tool_output = ai_message2.additional_kwargs["tool_outputs"][0] assert set(tool_output.keys()).issubset(expected_keys) - elif output_version == "responses/v1": + else: + # "responses/v1" tool_output = next( block for block in ai_message2.content if isinstance(block, dict) and block["type"] == "image_generation_call" ) assert set(tool_output.keys()).issubset(expected_keys) - else: - standard_keys = {"type", "base64", "id", "status"} - tool_output = next( - block - for block in ai_message2.content - if isinstance(block, dict) and block["type"] == "image" - ) - assert set(standard_keys).issubset(tool_output.keys()) @pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") @@ -938,7 +926,7 @@ def test_image_generation_multi_turn_v1() -> None: ] ai_message = llm_with_tools.invoke(chat_history) assert isinstance(ai_message, AIMessage) - _check_response(ai_message, "v1") + _check_response(ai_message) standard_keys = {"type", "base64", "mime_type", "id"} extra_keys = { @@ -975,7 +963,7 @@ def test_image_generation_multi_turn_v1() -> None: ai_message2 = llm_with_tools.invoke(chat_history) assert isinstance(ai_message2, AIMessage) - _check_response(ai_message2, "v1") + _check_response(ai_message2) tool_output = next( block diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 47695efa37cc5..542b7330378d0 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -20,7 +20,6 @@ ToolCall, ToolMessage, ) -from langchain_core.messages import content_blocks as types from langchain_core.messages.ai import UsageMetadata from langchain_core.outputs import ChatGeneration, ChatResult from langchain_core.runnables import RunnableLambda @@ -55,7 +54,6 @@ _convert_from_v1_to_chat_completions, _convert_from_v1_to_responses, _convert_to_v03_ai_message, - _convert_to_v1_from_responses, ) from langchain_openai.chat_models.base import ( _construct_lc_result_from_responses_api, @@ -2585,114 +2583,6 @@ def test_convert_from_v1_to_responses( assert message_v1 != result -@pytest.mark.parametrize( - "responses_content, tool_calls, expected_content", - [ - ( - [ - {"type": "reasoning", "id": "abc123", "summary": []}, - { - "type": "reasoning", - "id": "abc234", - "summary": [ - {"type": "summary_text", "text": "foo "}, - {"type": "summary_text", "text": "bar"}, - ], - }, - { - "type": "function_call", - "call_id": "call_123", - "name": "get_weather", - "arguments": '{"location": "San Francisco"}', - }, - { - "type": "function_call", - "call_id": "call_234", - "name": "get_weather_2", - "arguments": '{"location": "New York"}', - "id": "fc_123", - }, - {"type": "text", "text": "Hello "}, - { - "type": "text", - "text": "world", - "annotations": [ - {"type": "url_citation", "url": "https://example.com"}, - { - "type": "file_citation", - "filename": "my doc", - "index": 1, - "file_id": "file_123", - }, - {"bar": "baz"}, - ], - }, - {"type": "image_generation_call", "id": "ig_123", "result": "..."}, - {"type": "something_else", "foo": "bar"}, - ], - [ - { - "type": "tool_call", - "id": "call_123", - "name": "get_weather", - "args": {"location": "San Francisco"}, - }, - { - "type": "tool_call", - "id": "call_234", - "name": "get_weather_2", - "args": {"location": "New York"}, - }, - ], - [ - {"type": "reasoning", "id": "abc123"}, - {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, - {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, - { - "type": "tool_call", - "id": "call_123", - "name": "get_weather", - "args": {"location": "San Francisco"}, - }, - { - "type": "tool_call", - "id": "call_234", - "name": "get_weather_2", - "args": {"location": "New York"}, - "extras": {"item_id": "fc_123"}, - }, - {"type": "text", "text": "Hello "}, - { - "type": "text", - "text": "world", - "annotations": [ - {"type": "citation", "url": "https://example.com"}, - { - "type": "citation", - "title": "my doc", - "extras": {"file_id": "file_123", "index": 1}, - }, - {"type": "non_standard_annotation", "value": {"bar": "baz"}}, - ], - }, - {"type": "image", "base64": "...", "id": "ig_123"}, - { - "type": "non_standard", - "value": {"type": "something_else", "foo": "bar"}, - }, - ], - ) - ], -) -def test_convert_to_v1_from_responses( - responses_content: list[dict[str, Any]], - tool_calls: list[ToolCall], - expected_content: list[types.ContentBlock], -) -> None: - result = _convert_to_v1_from_responses(responses_content, tool_calls) - assert result == expected_content - - def test_get_last_messages() -> None: messages: list[BaseMessage] = [HumanMessage("Hello")] last_messages, previous_response_id = _get_last_messages(messages) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py index 49c88ab4aadeb..8d7c4a14c525e 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py @@ -676,19 +676,24 @@ def _strip_none(obj: Any) -> Any: "type": "reasoning", "reasoning": "reasoning block one", "id": "rs_123", - "index": 0, + "index": "lc_rs_305f30", }, { "type": "reasoning", "reasoning": "another reasoning block", "id": "rs_123", - "index": 1, + "index": "lc_rs_305f31", + }, + { + "type": "text", + "text": "text block one", + "index": "lc_txt_1", + "id": "msg_123", }, - {"type": "text", "text": "text block one", "index": 2, "id": "msg_123"}, { "type": "text", "text": "another text block", - "index": 3, + "index": "lc_txt_2", "id": "msg_123", }, { @@ -696,16 +701,16 @@ def _strip_none(obj: Any) -> Any: "reasoning": "more reasoning", "id": "rs_234", "extras": {"encrypted_content": "encrypted-content"}, - "index": 4, + "index": "lc_rs_335f30", }, { "type": "reasoning", "reasoning": "still more reasoning", "id": "rs_234", - "index": 5, + "index": "lc_rs_335f31", }, - {"type": "text", "text": "more", "index": 6, "id": "msg_234"}, - {"type": "text", "text": "text", "index": 7, "id": "msg_234"}, + {"type": "text", "text": "more", "index": "lc_txt_4", "id": "msg_234"}, + {"type": "text", "text": "text", "index": "lc_txt_5", "id": "msg_234"}, ], ), ], From 8ee0cbba3ce391b0d85ae36ba3376536814da857 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 16:33:12 -0400 Subject: [PATCH 38/56] refactor(core): prefixes (#32597) re: #32589 cc: @ccurme - Rename namespace: `messages.content_blocks` -> `messages.content` - Prefixes and ID logic are now in `messages.common` instead of `AIMessage` since the logic is shared between messages and message content. Did this instead of `utils` due to circular import problems that were hairy --- .../language_models/chat_models.py | 14 ++-- libs/core/langchain_core/messages/__init__.py | 67 +++++++++---------- libs/core/langchain_core/messages/ai.py | 18 +++-- libs/core/langchain_core/messages/base.py | 32 ++++++++- .../messages/block_translators/openai.py | 2 +- .../{content_blocks.py => content.py} | 36 ++-------- libs/core/langchain_core/messages/human.py | 2 +- libs/core/langchain_core/messages/system.py | 2 +- libs/core/langchain_core/messages/tool.py | 6 +- libs/core/langchain_core/messages/utils.py | 5 +- libs/core/langchain_core/runnables/base.py | 2 +- .../messages/block_translators/test_openai.py | 2 +- .../core/tests/unit_tests/messages/test_ai.py | 2 +- .../tests/unit_tests/messages/test_imports.py | 1 - libs/core/tests/unit_tests/test_messages.py | 2 +- .../langchain_openai/chat_models/_compat.py | 2 +- 16 files changed, 101 insertions(+), 94 deletions(-) rename libs/core/langchain_core/messages/{content_blocks.py => content.py} (97%) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 572c805c0be0b..8aed5b134b00e 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -35,6 +35,7 @@ ) from langchain_core.load import dumpd, dumps from langchain_core.messages import ( + LC_ID_PREFIX, AIMessage, AnyMessage, BaseMessage, @@ -46,7 +47,6 @@ is_data_content_block, message_chunk_to_message, ) -from langchain_core.messages.ai import _LC_ID_PREFIX from langchain_core.outputs import ( ChatGeneration, ChatGenerationChunk, @@ -540,7 +540,7 @@ def stream( try: input_messages = _normalize_messages(messages) - run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id))) + run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) for chunk in self._stream(input_messages, stop=stop, **kwargs): if chunk.message.id is None: chunk.message.id = run_id @@ -633,7 +633,7 @@ async def astream( try: input_messages = _normalize_messages(messages) - run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id))) + run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) async for chunk in self._astream( input_messages, stop=stop, @@ -1099,7 +1099,7 @@ def _generate_with_cache( chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: - chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}" + chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1115,7 +1115,7 @@ def _generate_with_cache( # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: - generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}" + generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) @@ -1172,7 +1172,7 @@ async def _agenerate_with_cache( chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: - chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}" + chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" await run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1188,7 +1188,7 @@ async def _agenerate_with_cache( # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: - generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}" + generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index 31e4b560b2b8d..f3224bc8e1b0e 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -21,21 +21,21 @@ if TYPE_CHECKING: from langchain_core.messages.ai import ( - _LC_ID_PREFIX, AIMessage, AIMessageChunk, ) from langchain_core.messages.base import ( + LC_AUTO_PREFIX, + LC_ID_PREFIX, BaseMessage, BaseMessageChunk, + ensure_id, merge_content, message_to_dict, messages_to_dict, ) from langchain_core.messages.chat import ChatMessage, ChatMessageChunk - from langchain_core.messages.content_blocks import ( - LC_AUTO_PREFIX, - LC_ID_PREFIX, + from langchain_core.messages.content import ( Annotation, AudioContentBlock, Citation, @@ -56,7 +56,6 @@ WebSearchResult, convert_to_openai_data_block, convert_to_openai_image_block, - ensure_id, is_data_content_block, is_reasoning_block, is_text_block, @@ -91,7 +90,6 @@ __all__ = ( "LC_AUTO_PREFIX", "LC_ID_PREFIX", - "_LC_ID_PREFIX", "AIMessage", "AIMessageChunk", "Annotation", @@ -153,63 +151,62 @@ ) _dynamic_imports = { - "ensure_id": "content_blocks", + "ensure_id": "base", "AIMessage": "ai", "AIMessageChunk": "ai", - "Annotation": "content_blocks", - "AudioContentBlock": "content_blocks", + "Annotation": "content", + "AudioContentBlock": "content", "BaseMessage": "base", "BaseMessageChunk": "base", "merge_content": "base", "message_to_dict": "base", "messages_to_dict": "base", - "Citation": "content_blocks", - "ContentBlock": "content_blocks", + "Citation": "content", + "ContentBlock": "content", "ChatMessage": "chat", "ChatMessageChunk": "chat", - "CodeInterpreterCall": "content_blocks", - "CodeInterpreterOutput": "content_blocks", - "CodeInterpreterResult": "content_blocks", - "DataContentBlock": "content_blocks", - "FileContentBlock": "content_blocks", + "CodeInterpreterCall": "content", + "CodeInterpreterOutput": "content", + "CodeInterpreterResult": "content", + "DataContentBlock": "content", + "FileContentBlock": "content", "FunctionMessage": "function", "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", - "LC_AUTO_PREFIX": "content_blocks", - "LC_ID_PREFIX": "content_blocks", - "_LC_ID_PREFIX": "ai", - "NonStandardAnnotation": "content_blocks", - "NonStandardContentBlock": "content_blocks", - "PlainTextContentBlock": "content_blocks", - "ReasoningContentBlock": "content_blocks", + "LC_AUTO_PREFIX": "base", + "LC_ID_PREFIX": "base", + "NonStandardAnnotation": "content", + "NonStandardContentBlock": "content", + "PlainTextContentBlock": "content", + "ReasoningContentBlock": "content", "RemoveMessage": "modifier", "SystemMessage": "system", "SystemMessageChunk": "system", - "WebSearchCall": "content_blocks", - "WebSearchResult": "content_blocks", - "ImageContentBlock": "content_blocks", + "WebSearchCall": "content", + "WebSearchResult": "content", + "ImageContentBlock": "content", "InvalidToolCall": "tool", - "TextContentBlock": "content_blocks", + "TextContentBlock": "content", "ToolCall": "tool", "ToolCallChunk": "tool", "ToolMessage": "tool", "ToolMessageChunk": "tool", - "VideoContentBlock": "content_blocks", + "VideoContentBlock": "content", "AnyMessage": "utils", "MessageLikeRepresentation": "utils", "_message_from_dict": "utils", "convert_to_messages": "utils", - "convert_to_openai_data_block": "content_blocks", - "convert_to_openai_image_block": "content_blocks", + "convert_to_openai_data_block": "content", + "convert_to_openai_image_block": "content", "convert_to_openai_messages": "utils", "filter_messages": "utils", "get_buffer_string": "utils", - "is_data_content_block": "content_blocks", - "is_reasoning_block": "content_blocks", - "is_text_block": "content_blocks", - "is_tool_call_block": "content_blocks", - "is_tool_call_chunk": "content_blocks", + "is_data_content_block": "content", + "is_reasoning_block": "content", + "is_text_block": "content", + "is_tool_call_block": "content", + "is_tool_call_chunk": "content", "merge_message_runs": "utils", "message_chunk_to_message": "utils", "messages_from_dict": "utils", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 864444f53aeee..3492195b23151 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -8,8 +8,14 @@ from pydantic import model_validator from typing_extensions import NotRequired, Self, TypedDict, override -from langchain_core.messages import content_blocks as types -from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content +from langchain_core.messages import content as types +from langchain_core.messages.base import ( + LC_AUTO_PREFIX, + LC_ID_PREFIX, + BaseMessage, + BaseMessageChunk, + merge_content, +) from langchain_core.messages.tool import ( InvalidToolCall, ToolCall, @@ -26,8 +32,6 @@ logger = logging.getLogger(__name__) -_LC_ID_PREFIX = types.LC_ID_PREFIX - class InputTokenDetails(TypedDict, total=False): """Breakdown of input token counts. @@ -525,15 +529,15 @@ def add_ai_message_chunks( for id_ in candidates: if ( id_ - and not id_.startswith(types.LC_ID_PREFIX) - and not id_.startswith(types.LC_AUTO_PREFIX) + and not id_.startswith(LC_ID_PREFIX) + and not id_.startswith(LC_AUTO_PREFIX) ): chunk_id = id_ break else: # second pass: prefer lc_run-* ids over lc_* ids for id_ in candidates: - if id_ and id_.startswith(types.LC_ID_PREFIX): + if id_ and id_.startswith(LC_ID_PREFIX): chunk_id = id_ break else: diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 871f687d35cf6..44f7d8ceeff95 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -3,11 +3,11 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload +from uuid import uuid4 from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable -from langchain_core.messages import content_blocks as types from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -15,8 +15,21 @@ if TYPE_CHECKING: from collections.abc import Sequence + from langchain_core.messages import content as types from langchain_core.prompts.chat import ChatPromptTemplate +LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" + +LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" + class BaseMessage(Serializable): """Base abstract message class. @@ -121,6 +134,8 @@ def content_blocks(self) -> list[types.ContentBlock]: Otherwise, does best-effort parsing to standard types. """ + from langchain_core.messages import content as types + blocks: list[types.ContentBlock] = [] content = ( [self.content] @@ -342,3 +357,18 @@ def get_msg_title_repr(title: str, *, bold: bool = False) -> str: if bold: padded = get_bolded_text(padded) return f"{sep}{padded}{second_sep}" + + +def ensure_id(id_val: Optional[str]) -> str: + """Ensure the ID is a valid string, generating a new UUID if not provided. + + Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are + LangChain-generated IDs. + + Args: + id_val: Optional string ID value to validate. + + Returns: + A string ID, either the validated provided value or a newly generated UUID4. + """ + return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 19ab0fbdae712..6f52f643484f0 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -4,7 +4,7 @@ from typing import Any, Optional, Union, cast from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types # v1 / Chat Completions diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content.py similarity index 97% rename from libs/core/langchain_core/messages/content_blocks.py rename to libs/core/langchain_core/messages/content.py index 61d458fe93675..3e7ae5174b5a7 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content.py @@ -55,7 +55,7 @@ class TextContentBlock(TypedDict, extra_items=Any): .. code-block:: python - from langchain_core.messages.content_blocks import TextContentBlock + from langchain_core.messages.content import TextContentBlock # Create a text content block with provider-specific fields my_block: TextContentBlock = { @@ -97,7 +97,7 @@ class TextContentBlock(TypedDict, extra_items=Any): .. code-block:: python # Direct construction: - from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock + from langchain_core.messages.content import TextContentBlock, ImageContentBlock multimodal_message: AIMessage(content_blocks= [ @@ -111,7 +111,7 @@ class TextContentBlock(TypedDict, extra_items=Any): ) # Using factories: - from langchain_core.messages.content_blocks import create_text_block, create_image_block + from langchain_core.messages.content import create_text_block, create_image_block multimodal_message: AIMessage(content= [ @@ -127,40 +127,14 @@ class TextContentBlock(TypedDict, extra_items=Any): - Automatic ID generation (when not provided) - No need to manually specify the ``type`` field -""" # noqa: E501 +""" import warnings from typing import Any, Literal, Optional, Union, get_args, get_type_hints -from uuid import uuid4 from typing_extensions import NotRequired, TypedDict, TypeGuard -LC_AUTO_PREFIX = "lc_" -"""LangChain auto-generated ID prefix for messages and content blocks.""" - -LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" -"""Internal tracing/callback system identifier. - -Used for: -- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) - gets a unique run_id (UUID) -- Enables tracking parent-child relationships between operations -""" - - -def ensure_id(id_val: Optional[str]) -> str: - """Ensure the ID is a valid string, generating a new UUID if not provided. - - Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are - LangChain-generated IDs. - - Args: - id_val: Optional string ID value to validate. - - Returns: - A string ID, either the validated provided value or a newly generated UUID4. - """ - return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") +from langchain_core.messages.base import ensure_id class Citation(TypedDict): diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 954f05f037ec2..a15a0f7533d11 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -2,7 +2,7 @@ from typing import Any, Literal, Optional, Union, cast, overload -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index a3f399b88c142..ca6589db8dbca 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -2,7 +2,7 @@ from typing import Any, Literal, Optional, Union, cast, overload -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index efb714bb58506..fab0315de63ca 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -7,10 +7,10 @@ from pydantic import Field, model_validator from typing_extensions import NotRequired, TypedDict, override -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content -from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall -from langchain_core.messages.content_blocks import ToolCall as ToolCall +from langchain_core.messages.content import InvalidToolCall as InvalidToolCall +from langchain_core.messages.content import ToolCall as ToolCall from langchain_core.utils._merge import merge_dicts, merge_obj diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index e84dc6c0191ed..79c1c4b66c6c9 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -31,10 +31,13 @@ from pydantic import Discriminator, Field, Tag from langchain_core.exceptions import ErrorCode, create_message -from langchain_core.messages import convert_to_openai_data_block, is_data_content_block from langchain_core.messages.ai import AIMessage, AIMessageChunk from langchain_core.messages.base import BaseMessage, BaseMessageChunk from langchain_core.messages.chat import ChatMessage, ChatMessageChunk +from langchain_core.messages.content import ( + convert_to_openai_data_block, + is_data_content_block, +) from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk from langchain_core.messages.human import HumanMessage, HumanMessageChunk from langchain_core.messages.modifier import RemoveMessage diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 38da82fcda363..c3721f50ecc19 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -2399,7 +2399,7 @@ def as_tool( description: The description of the tool. Defaults to None. arg_types: A dictionary of argument names to types. Defaults to None. message_version: Version of ``ToolMessage`` to return given - :class:`~langchain_core.messages.content_blocks.ToolCall` input. + :class:`~langchain_core.messages.content.ToolCall` input. Returns: A ``BaseTool`` instance. diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 00dae69865dab..9e2510d56159d 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -1,7 +1,7 @@ from typing import Optional from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types def test_convert_to_v1_from_responses() -> None: diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index b3c0a4d84ea08..67b0a2dc9680c 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -1,6 +1,6 @@ from langchain_core.load import dumpd, load from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.ai import ( InputTokenDetails, OutputTokenDetails, diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py index ada1c882a7242..bf438b0cd8eac 100644 --- a/libs/core/tests/unit_tests/messages/test_imports.py +++ b/libs/core/tests/unit_tests/messages/test_imports.py @@ -25,7 +25,6 @@ "HumanMessageChunk", "ImageContentBlock", "InvalidToolCall", - "_LC_ID_PREFIX", "LC_AUTO_PREFIX", "LC_ID_PREFIX", "NonStandardAnnotation", diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 6fc42db829b36..3de287a287c1e 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -31,7 +31,7 @@ messages_from_dict, messages_to_dict, ) -from langchain_core.messages.content_blocks import KNOWN_BLOCK_TYPES, ContentBlock +from langchain_core.messages.content import KNOWN_BLOCK_TYPES, ContentBlock from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 967652ae9962d..eb9ce3f40d464 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -69,7 +69,7 @@ from typing import Any, Literal, Union, cast from langchain_core.messages import AIMessage, is_data_content_block -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" From 0e6c172893b0daa9085e79a60d3b09350595c091 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 17:24:57 -0400 Subject: [PATCH 39/56] refactor(core): prefixes, again (#32599) Put in `core.utils` this time to prevent other circular import issues present in the `normalize()` rfc: `base` imports `content` `content` imports `ensure_id()` from `base` --- libs/core/langchain_core/messages/__init__.py | 7 +---- libs/core/langchain_core/messages/ai.py | 3 +- libs/core/langchain_core/messages/base.py | 28 ------------------ libs/core/langchain_core/messages/content.py | 2 +- libs/core/langchain_core/utils/utils.py | 29 +++++++++++++++++++ 5 files changed, 32 insertions(+), 37 deletions(-) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index f3224bc8e1b0e..dfbf1ff3b7559 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING from langchain_core._import_utils import import_attr +from langchain_core.utils.utils import LC_AUTO_PREFIX, LC_ID_PREFIX, ensure_id if TYPE_CHECKING: from langchain_core.messages.ai import ( @@ -25,11 +26,8 @@ AIMessageChunk, ) from langchain_core.messages.base import ( - LC_AUTO_PREFIX, - LC_ID_PREFIX, BaseMessage, BaseMessageChunk, - ensure_id, merge_content, message_to_dict, messages_to_dict, @@ -151,7 +149,6 @@ ) _dynamic_imports = { - "ensure_id": "base", "AIMessage": "ai", "AIMessageChunk": "ai", "Annotation": "content", @@ -174,8 +171,6 @@ "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", - "LC_AUTO_PREFIX": "base", - "LC_ID_PREFIX": "base", "NonStandardAnnotation": "content", "NonStandardContentBlock": "content", "PlainTextContentBlock": "content", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 3492195b23151..b37b33490f783 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -10,8 +10,6 @@ from langchain_core.messages import content as types from langchain_core.messages.base import ( - LC_AUTO_PREFIX, - LC_ID_PREFIX, BaseMessage, BaseMessageChunk, merge_content, @@ -29,6 +27,7 @@ from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.json import parse_partial_json from langchain_core.utils.usage import _dict_int_op +from langchain_core.utils.utils import LC_AUTO_PREFIX, LC_ID_PREFIX logger = logging.getLogger(__name__) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 44f7d8ceeff95..5355b75a71bf6 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -3,7 +3,6 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload -from uuid import uuid4 from pydantic import ConfigDict, Field @@ -18,18 +17,6 @@ from langchain_core.messages import content as types from langchain_core.prompts.chat import ChatPromptTemplate -LC_AUTO_PREFIX = "lc_" -"""LangChain auto-generated ID prefix for messages and content blocks.""" - -LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" -"""Internal tracing/callback system identifier. - -Used for: -- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) - gets a unique run_id (UUID) -- Enables tracking parent-child relationships between operations -""" - class BaseMessage(Serializable): """Base abstract message class. @@ -357,18 +344,3 @@ def get_msg_title_repr(title: str, *, bold: bool = False) -> str: if bold: padded = get_bolded_text(padded) return f"{sep}{padded}{second_sep}" - - -def ensure_id(id_val: Optional[str]) -> str: - """Ensure the ID is a valid string, generating a new UUID if not provided. - - Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are - LangChain-generated IDs. - - Args: - id_val: Optional string ID value to validate. - - Returns: - A string ID, either the validated provided value or a newly generated UUID4. - """ - return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") diff --git a/libs/core/langchain_core/messages/content.py b/libs/core/langchain_core/messages/content.py index 3e7ae5174b5a7..fd46859b9181b 100644 --- a/libs/core/langchain_core/messages/content.py +++ b/libs/core/langchain_core/messages/content.py @@ -134,7 +134,7 @@ class TextContentBlock(TypedDict, extra_items=Any): from typing_extensions import NotRequired, TypedDict, TypeGuard -from langchain_core.messages.base import ensure_id +from langchain_core.utils.utils import ensure_id class Citation(TypedDict): diff --git a/libs/core/langchain_core/utils/utils.py b/libs/core/langchain_core/utils/utils.py index a7467ec51e998..28becc822e2c3 100644 --- a/libs/core/langchain_core/utils/utils.py +++ b/libs/core/langchain_core/utils/utils.py @@ -9,6 +9,7 @@ from collections.abc import Iterator, Sequence from importlib.metadata import version from typing import Any, Callable, Optional, Union, overload +from uuid import uuid4 from packaging.version import parse from pydantic import SecretStr @@ -466,3 +467,31 @@ def get_secret_from_env() -> Optional[SecretStr]: raise ValueError(msg) return get_secret_from_env + + +LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" + +LC_ID_PREFIX = "lc_run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" + + +def ensure_id(id_val: Optional[str]) -> str: + """Ensure the ID is a valid string, generating a new UUID if not provided. + + Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are + LangChain-generated IDs. + + Args: + id_val: Optional string ID value to validate. + + Returns: + A string ID, either the validated provided value or a newly generated UUID4. + """ + return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") From 27d81cf3d924c461540437da6debf0850c3bac4a Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 19 Aug 2025 00:28:35 -0400 Subject: [PATCH 40/56] test(openai): address some type issues in tests (#32601) nits --- .../langchain_openai/chat_models/base.py | 2 +- .../chat_models/test_responses_api.py | 26 ++++++++++--------- .../tests/unit_tests/chat_models/test_base.py | 5 ++-- .../chat_models/test_responses_stream.py | 3 ++- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index caeefdc662aae..fc040991797dd 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1550,7 +1550,7 @@ def get_token_ids(self, text: str) -> list[int]: def get_num_tokens_from_messages( self, - messages: list[BaseMessage], + messages: Sequence[BaseMessage], tools: Optional[ Sequence[Union[dict[str, Any], type, Callable, BaseTool]] ] = None, diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 74670bf4e123f..bd9b83752a289 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -28,8 +28,9 @@ def _check_response(response: Optional[BaseMessage]) -> None: for block in response.content: assert isinstance(block, dict) if block["type"] == "text": - assert isinstance(block["text"], str) # type: ignore[typeddict-item] - for annotation in block["annotations"]: # type: ignore[typeddict-item] + assert isinstance(block.get("text"), str) + annotations = block.get("annotations", []) + for annotation in annotations: if annotation["type"] == "file_citation": assert all( key in annotation @@ -60,7 +61,7 @@ def _check_response(response: Optional[BaseMessage]) -> None: @pytest.mark.vcr @pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: - llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) first_response = llm.invoke( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], @@ -68,7 +69,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: _check_response(first_response) # Test streaming - full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] + full: Optional[BaseMessageChunk] = None for chunk in llm.stream( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], @@ -81,7 +82,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: response = llm.invoke( "what about a negative one", tools=[{"type": "web_search_preview"}], - previous_response_id=first_response.response_metadata["id"], # type: ignore[typeddict-item] + previous_response_id=first_response.response_metadata["id"], ) _check_response(response) @@ -439,9 +440,11 @@ def test_stream_reasoning_summary( for block in response_1.content_blocks: if block["type"] == "reasoning": total_reasoning_blocks += 1 - assert isinstance(block["id"], str) and block["id"].startswith("rs_") - assert isinstance(block["reasoning"], str) - assert isinstance(block["index"], str) + assert isinstance(block.get("id"), str) and block.get( + "id", "" + ).startswith("rs_") + assert isinstance(block.get("reasoning"), str) + assert isinstance(block.get("index"), str) assert ( total_reasoning_blocks > 1 ) # This query typically generates multiple reasoning blocks @@ -501,10 +504,9 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - # Test streaming # Use same container - container_id = ( - tool_outputs[0].get("container_id") - or tool_outputs[0].get("extras")["container_id"] - ) + container_id = tool_outputs[0].get("container_id") or tool_outputs[0].get( + "extras", {} + ).get("container_id") llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": container_id}] ) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 542b7330378d0..c68c86cef705b 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -26,7 +26,7 @@ from langchain_core.tracers.base import BaseTracer from langchain_core.tracers.schemas import Run from openai.types.responses import ResponseOutputMessage, ResponseReasoningItem -from openai.types.responses.response import IncompleteDetails, Response, ResponseUsage +from openai.types.responses.response import IncompleteDetails, Response from openai.types.responses.response_error import ResponseError from openai.types.responses.response_file_search_tool_call import ( ResponseFileSearchToolCall, @@ -43,6 +43,7 @@ from openai.types.responses.response_usage import ( InputTokensDetails, OutputTokensDetails, + ResponseUsage, ) from pydantic import BaseModel, Field, SecretStr from typing_extensions import TypedDict @@ -1233,7 +1234,7 @@ def test_structured_outputs_parser() -> None: serialized = dumps(llm_output) deserialized = loads(serialized) assert isinstance(deserialized, ChatGeneration) - result = output_parser.invoke(deserialized.message) + result = output_parser.invoke(cast(AIMessage, deserialized.message)) assert result == parsed_response diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py index 8d7c4a14c525e..fd4e716e882ab 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py @@ -21,7 +21,7 @@ ResponseTextDeltaEvent, ResponseTextDoneEvent, ) -from openai.types.responses.response import Response, ResponseUsage +from openai.types.responses.response import Response from openai.types.responses.response_output_text import ResponseOutputText from openai.types.responses.response_reasoning_item import Summary from openai.types.responses.response_reasoning_summary_part_added_event import ( @@ -33,6 +33,7 @@ from openai.types.responses.response_usage import ( InputTokensDetails, OutputTokensDetails, + ResponseUsage, ) from openai.types.shared.reasoning import Reasoning from openai.types.shared.response_format_text import ResponseFormatText From 43b9d3d9041b337ac803daeb6f29b3c08e219d61 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 19 Aug 2025 10:08:56 -0400 Subject: [PATCH 41/56] feat(core): implement dynamic translator registration for model providers (#32602) Extensible registry system for translating AI message content blocks from various model providers. Refactors the way provider-specific content is handled, moving from hardcoded logic to a plugin-like architecture. --- .../language_models/chat_models.py | 2 +- libs/core/langchain_core/messages/ai.py | 16 ++-- .../messages/block_translators/__init__.py | 80 +++++++++++++++++++ .../block_translators/amazon/__init__.py | 1 + .../block_translators/amazon/bedrock.py | 29 +++++++ .../amazon/bedrock_converse.py | 29 +++++++ .../messages/block_translators/anthropic.py | 27 +++++++ .../messages/block_translators/chroma.py | 27 +++++++ .../block_translators/google/__init__.py | 1 + .../block_translators/google/genai.py | 27 +++++++ .../block_translators/google/vertexai.py | 27 +++++++ .../messages/block_translators/groq.py | 27 +++++++ .../messages/block_translators/ollama.py | 27 +++++++ .../messages/block_translators/openai.py | 13 +++ 14 files changed, 326 insertions(+), 7 deletions(-) create mode 100644 libs/core/langchain_core/messages/block_translators/amazon/__init__.py create mode 100644 libs/core/langchain_core/messages/block_translators/amazon/bedrock.py create mode 100644 libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py create mode 100644 libs/core/langchain_core/messages/block_translators/anthropic.py create mode 100644 libs/core/langchain_core/messages/block_translators/chroma.py create mode 100644 libs/core/langchain_core/messages/block_translators/google/__init__.py create mode 100644 libs/core/langchain_core/messages/block_translators/google/genai.py create mode 100644 libs/core/langchain_core/messages/block_translators/google/vertexai.py create mode 100644 libs/core/langchain_core/messages/block_translators/groq.py create mode 100644 libs/core/langchain_core/messages/block_translators/ollama.py diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 8aed5b134b00e..51f92a04fedc8 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -35,7 +35,6 @@ ) from langchain_core.load import dumpd, dumps from langchain_core.messages import ( - LC_ID_PREFIX, AIMessage, AnyMessage, BaseMessage, @@ -66,6 +65,7 @@ convert_to_openai_tool, ) from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass +from langchain_core.utils.utils import LC_ID_PREFIX if TYPE_CHECKING: import uuid diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index b37b33490f783..83572bd231ea5 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -225,10 +225,12 @@ def content_blocks(self) -> list[types.ContentBlock]: return cast("list[types.ContentBlock]", self.content) model_provider = self.response_metadata.get("model_provider") - if model_provider == "openai": - from langchain_core.messages.block_translators import openai + if model_provider: + from langchain_core.messages.block_translators import get_translator - return openai.translate_content(self) + translator = get_translator(model_provider) + if translator: + return translator["translate_content"](self) # Otherwise, use best-effort parsing blocks = super().content_blocks @@ -372,10 +374,12 @@ def content_blocks(self) -> list[types.ContentBlock]: return cast("list[types.ContentBlock]", self.content) model_provider = self.response_metadata.get("model_provider") - if model_provider == "openai": - from langchain_core.messages.block_translators import openai + if model_provider: + from langchain_core.messages.block_translators import get_translator - return openai.translate_content_chunk(self) + translator = get_translator(model_provider) + if translator: + return translator["translate_content_chunk"](self) # Otherwise, use best-effort parsing blocks = super().content_blocks diff --git a/libs/core/langchain_core/messages/block_translators/__init__.py b/libs/core/langchain_core/messages/block_translators/__init__.py index 1dd51cc836e3a..ff58558713d13 100644 --- a/libs/core/langchain_core/messages/block_translators/__init__.py +++ b/libs/core/langchain_core/messages/block_translators/__init__.py @@ -1 +1,81 @@ """Derivations of standard content blocks from provider content.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from langchain_core.messages import AIMessage, AIMessageChunk + from langchain_core.messages import content as types + +# Provider to translator mapping +PROVIDER_TRANSLATORS: dict[str, dict[str, Callable[..., list[types.ContentBlock]]]] = {} + + +def register_translator( + provider: str, + translate_content: Callable[[AIMessage], list[types.ContentBlock]], + translate_content_chunk: Callable[[AIMessageChunk], list[types.ContentBlock]], +) -> None: + """Register content translators for a provider. + + Args: + provider: The model provider name (e.g. ``'openai'``, ``'anthropic'``). + translate_content: Function to translate ``AIMessage`` content. + translate_content_chunk: Function to translate ``AIMessageChunk`` content. + """ + PROVIDER_TRANSLATORS[provider] = { + "translate_content": translate_content, + "translate_content_chunk": translate_content_chunk, + } + + +def get_translator( + provider: str, +) -> dict[str, Callable[..., list[types.ContentBlock]]] | None: + """Get the translator functions for a provider. + + Args: + provider: The model provider name. + + Returns: + Dictionary with ``'translate_content'`` and ``'translate_content_chunk'`` + functions, or None if no translator is registered for the provider. + """ + return PROVIDER_TRANSLATORS.get(provider) + + +def _auto_register_translators() -> None: + """Automatically register all available block translators.""" + import contextlib + import importlib + import pkgutil + from pathlib import Path + + package_path = Path(__file__).parent + + # Discover all sub-modules + for module_info in pkgutil.iter_modules([str(package_path)]): + module_name = module_info.name + + # Skip the __init__ module and any private modules + if module_name.startswith("_"): + continue + + if module_info.ispkg: + # For subpackages, discover their submodules + subpackage_path = package_path / module_name + for submodule_info in pkgutil.iter_modules([str(subpackage_path)]): + submodule_name = submodule_info.name + if not submodule_name.startswith("_"): + with contextlib.suppress(ImportError, AttributeError): + importlib.import_module( + f".{module_name}.{submodule_name}", package=__name__ + ) + else: + # Import top-level translator modules + with contextlib.suppress(ImportError, AttributeError): + importlib.import_module(f".{module_name}", package=__name__) + + +_auto_register_translators() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/__init__.py b/libs/core/langchain_core/messages/block_translators/amazon/__init__.py new file mode 100644 index 0000000000000..1fbfad4912db7 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/amazon/__init__.py @@ -0,0 +1 @@ +"""Derivations of standard content blocks from Amazon content.""" diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py b/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py new file mode 100644 index 0000000000000..76467152b1028 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py @@ -0,0 +1,29 @@ +"""Derivations of standard content blocks from Amazon (Bedrock) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Bedrock content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Bedrock content.""" + raise NotImplementedError + + +def _register_bedrock_translator() -> None: + """Register the Bedrock translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator( + "amazon_bedrock_chat", translate_content, translate_content_chunk + ) + + +_register_bedrock_translator() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py b/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py new file mode 100644 index 0000000000000..5882ef2583bc8 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py @@ -0,0 +1,29 @@ +"""Derivations of standard content blocks from Amazon (Bedrock Converse) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Bedrock Converse content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Bedrock Converse content.""" + raise NotImplementedError + + +def _register_bedrock_converse_translator() -> None: + """Register the Bedrock Converse translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator( + "amazon_bedrock_converse_chat", translate_content, translate_content_chunk + ) + + +_register_bedrock_converse_translator() diff --git a/libs/core/langchain_core/messages/block_translators/anthropic.py b/libs/core/langchain_core/messages/block_translators/anthropic.py new file mode 100644 index 0000000000000..469b3812a570e --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/anthropic.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Anthropic content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Anthropic content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Anthropic content.""" + raise NotImplementedError + + +def _register_anthropic_translator() -> None: + """Register the Anthropic translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("anthropic", translate_content, translate_content_chunk) + + +_register_anthropic_translator() diff --git a/libs/core/langchain_core/messages/block_translators/chroma.py b/libs/core/langchain_core/messages/block_translators/chroma.py new file mode 100644 index 0000000000000..652aa8d0e1b0c --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/chroma.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Chroma content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Chroma content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Chroma content.""" + raise NotImplementedError + + +def _register_chroma_translator() -> None: + """Register the Chroma translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("chroma", translate_content, translate_content_chunk) + + +_register_chroma_translator() diff --git a/libs/core/langchain_core/messages/block_translators/google/__init__.py b/libs/core/langchain_core/messages/block_translators/google/__init__.py new file mode 100644 index 0000000000000..0c3f0698aa2a5 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/google/__init__.py @@ -0,0 +1 @@ +"""Derivations of standard content blocks from Google content.""" diff --git a/libs/core/langchain_core/messages/block_translators/google/genai.py b/libs/core/langchain_core/messages/block_translators/google/genai.py new file mode 100644 index 0000000000000..b9761f94bc44a --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/google/genai.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Google (GenAI) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Google (GenAI) content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Google (GenAI) content.""" + raise NotImplementedError + + +def _register_google_genai_translator() -> None: + """Register the Google (GenAI) translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("google_genai", translate_content, translate_content_chunk) + + +_register_google_genai_translator() diff --git a/libs/core/langchain_core/messages/block_translators/google/vertexai.py b/libs/core/langchain_core/messages/block_translators/google/vertexai.py new file mode 100644 index 0000000000000..ae51fd4065d89 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/google/vertexai.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Google (VertexAI) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Google (VertexAI) content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Google (VertexAI) content.""" + raise NotImplementedError + + +def _register_google_vertexai_translator() -> None: + """Register the Google (VertexAI) translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("google_vertexai", translate_content, translate_content_chunk) + + +_register_google_vertexai_translator() diff --git a/libs/core/langchain_core/messages/block_translators/groq.py b/libs/core/langchain_core/messages/block_translators/groq.py new file mode 100644 index 0000000000000..4b01dfb017f2f --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/groq.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Groq content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Groq content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Groq content.""" + raise NotImplementedError + + +def _register_groq_translator() -> None: + """Register the Groq translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("groq", translate_content, translate_content_chunk) + + +_register_groq_translator() diff --git a/libs/core/langchain_core/messages/block_translators/ollama.py b/libs/core/langchain_core/messages/block_translators/ollama.py new file mode 100644 index 0000000000000..a0f41ab76342d --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/ollama.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Ollama content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Ollama content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Ollama content.""" + raise NotImplementedError + + +def _register_ollama_translator() -> None: + """Register the Ollama translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("ollama", translate_content, translate_content_chunk) + + +_register_ollama_translator() diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 6f52f643484f0..16f81e6502595 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -343,3 +343,16 @@ def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock] if isinstance(message.content, str): return _convert_to_v1_from_chat_completions_chunk(message) return _convert_to_v1_from_responses(message) + + +def _register_openai_translator() -> None: + """Register the OpenAI translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("openai", translate_content, translate_content_chunk) + + +_register_openai_translator() From 0444e260bedc44b289bf16832e3f877517f582ad Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 19 Aug 2025 13:25:44 -0300 Subject: [PATCH 42/56] refactor: convert message content inside `BaseChatModel` (#32606) --- .../langchain_core/language_models/_utils.py | 17 ++- .../language_models/chat_models.py | 62 +++++++-- .../language_models/chat_models/test_base.py | 130 +++++++++++++++++- .../__snapshots__/test_runnable.ambr | 28 ++-- .../langchain_openai/chat_models/base.py | 40 +----- 5 files changed, 216 insertions(+), 61 deletions(-) diff --git a/libs/core/langchain_core/language_models/_utils.py b/libs/core/langchain_core/language_models/_utils.py index 883f8c855eab2..19dcd8699485b 100644 --- a/libs/core/langchain_core/language_models/_utils.py +++ b/libs/core/langchain_core/language_models/_utils.py @@ -1,6 +1,6 @@ import re from collections.abc import Sequence -from typing import Optional +from typing import Optional, TypeVar from langchain_core.messages import BaseMessage @@ -138,3 +138,18 @@ def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]: formatted_messages.append(formatted_message) return formatted_messages + + +T = TypeVar("T", bound=BaseMessage) + + +def _update_message_content_to_blocks(message: T, output_version: str) -> T: + return message.model_copy( + update={ + "content": message.content_blocks, + "response_metadata": { + **message.response_metadata, + "output_version": output_version, + }, + } + ) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 51f92a04fedc8..fe52f37f1937a 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -27,7 +27,10 @@ Callbacks, ) from langchain_core.globals import get_llm_cache -from langchain_core.language_models._utils import _normalize_messages +from langchain_core.language_models._utils import ( + _normalize_messages, + _update_message_content_to_blocks, +) from langchain_core.language_models.base import ( BaseLanguageModel, LangSmithParams, @@ -65,7 +68,7 @@ convert_to_openai_tool, ) from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass -from langchain_core.utils.utils import LC_ID_PREFIX +from langchain_core.utils.utils import LC_ID_PREFIX, from_env if TYPE_CHECKING: import uuid @@ -334,16 +337,23 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """ - output_version: str = "v0" - """Version of ``AIMessage`` output format to use. + output_version: str = Field( + default_factory=from_env("LC_OUTPUT_VERSION", default="v0") + ) + """Version of ``AIMessage`` output format to store in message content. + + ``AIMessage.content_blocks`` will lazily parse the contents of ``content`` into a + standard format. This flag can be used to additionally store the standard format + in message content, e.g., for serialization purposes. - This field is used to roll-out new output formats for chat model ``AIMessage``s - in a backwards-compatible way. + Supported values: - ``'v1'`` standardizes output format using a list of typed ContentBlock dicts. We - recommend this for new applications. + - ``"v0"``: provider-specific format in content (can lazily-parse with + ``.content_blocks``) + - ``"v1"``: standardized format in content (consistent with ``.content_blocks``) - All chat models currently support the default of ``'v0'``. + Partner packages (e.g., ``langchain-openai``) can also use this field to roll out + new content formats in a backward-compatible way. .. versionadded:: 1.0 @@ -545,6 +555,11 @@ def stream( if chunk.message.id is None: chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -642,6 +657,11 @@ async def astream( if chunk.message.id is None: chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) await run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1100,6 +1120,11 @@ def _generate_with_cache( if run_manager: if chunk.message.id is None: chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1112,6 +1137,13 @@ def _generate_with_cache( else: result = self._generate(messages, stop=stop, **kwargs) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + for generation in result.generations: + generation.message = _update_message_content_to_blocks( + generation.message, "v1" + ) + # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: @@ -1173,6 +1205,11 @@ async def _agenerate_with_cache( if run_manager: if chunk.message.id is None: chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) await run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1185,6 +1222,13 @@ async def _agenerate_with_cache( else: result = await self._agenerate(messages, stop=stop, **kwargs) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + for generation in result.generations: + generation.message = _update_message_content_to_blocks( + generation.message, "v1" + ) + # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 37b05ed825566..c21e5ba86b154 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -14,11 +14,15 @@ ParrotFakeChatModel, ) from langchain_core.language_models._utils import _normalize_messages -from langchain_core.language_models.fake_chat_models import FakeListChatModelError +from langchain_core.language_models.fake_chat_models import ( + FakeListChatModelError, + GenericFakeChatModel, +) from langchain_core.messages import ( AIMessage, AIMessageChunk, BaseMessage, + BaseMessageChunk, HumanMessage, SystemMessage, ) @@ -654,3 +658,127 @@ def test_normalize_messages_edge_cases() -> None: ) ] assert messages == _normalize_messages(messages) + + +def test_output_version_invoke(monkeypatch: Any) -> None: + messages = [AIMessage("hello")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + response = llm.invoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + response = llm.invoke("hello") + assert response.content == "hello" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + response = llm.invoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + +async def test_output_version_ainvoke(monkeypatch: Any) -> None: + messages = [AIMessage("hello")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + response = await llm.ainvoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + response = await llm.ainvoke("hello") + assert response.content == "hello" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + response = await llm.ainvoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + +def test_output_version_stream(monkeypatch: Any) -> None: + messages = [AIMessage("foo bar")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + for chunk in llm.stream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, str) + assert chunk.content + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.content == "foo bar" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + for chunk in llm.stream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" + + +async def test_output_version_astream(monkeypatch: Any) -> None: + messages = [AIMessage("foo bar")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full: Optional[BaseMessageChunk] = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, str) + assert chunk.content + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.content == "foo bar" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 7d5642ce853a2..04bab565b38b3 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -97,7 +97,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -227,7 +227,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['baz, qux'])", + "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -346,7 +346,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" }, { @@ -457,7 +457,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['baz, qux'])", + "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -1009,7 +1009,7 @@ # name: test_prompt_with_chat_model ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(responses=['foo']) + | FakeListChatModel(output_version='v0', responses=['foo']) ''' # --- # name: test_prompt_with_chat_model.1 @@ -1109,7 +1109,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", "name": "FakeListChatModel" } }, @@ -1220,7 +1220,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -1249,7 +1249,7 @@ # name: test_prompt_with_chat_model_async ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(responses=['foo']) + | FakeListChatModel(output_version='v0', responses=['foo']) ''' # --- # name: test_prompt_with_chat_model_async.1 @@ -1349,7 +1349,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", "name": "FakeListChatModel" } }, @@ -13863,7 +13863,7 @@ just_to_test_lambda: RunnableLambda(...) } | ChatPromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template='Context:\n{documents}\n\nQuestion:\n{question}'), additional_kwargs={})]) - | FakeListChatModel(responses=['foo, bar']) + | FakeListChatModel(output_version='v0', responses=['foo, bar']) | CommaSeparatedListOutputParser() ''' # --- @@ -14066,7 +14066,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -14092,7 +14092,7 @@ ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) | RunnableLambda(...) | { - chat: FakeListChatModel(responses=["i'm a chatbot"]), + chat: FakeListChatModel(output_version='v0', responses=["i'm a chatbot"]), llm: FakeListLLM(responses=["i'm a textbot"]) } ''' @@ -14218,7 +14218,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", + "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", "name": "FakeListChatModel" }, "llm": { @@ -14373,7 +14373,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", + "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", "name": "FakeListChatModel" }, "kwargs": { diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index fc040991797dd..ce3f01b12decd 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -69,10 +69,6 @@ OutputTokenDetails, UsageMetadata, ) -from langchain_core.messages.block_translators.openai import ( - translate_content, - translate_content_chunk, -) from langchain_core.messages.tool import tool_call_chunk from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.output_parsers.openai_tools import ( @@ -719,12 +715,9 @@ class BaseChatOpenAI(BaseChatModel): - ``'v0'``: AIMessage format as of langchain-openai 0.3.x. - ``'responses/v1'``: Formats Responses API output - items into AIMessage content blocks. + items into AIMessage content blocks (Responses API only) - ``"v1"``: v1 of LangChain cross-provider standard. - Currently only impacts the Responses API. ``output_version='v1'`` is - recommended. - .. versionadded:: 0.3.25 """ @@ -957,13 +950,6 @@ def _convert_chunk_to_generation_chunk( if usage_metadata and isinstance(message_chunk, AIMessageChunk): message_chunk.usage_metadata = usage_metadata - if self.output_version == "v1": - message_chunk.content = cast( - "Union[str, list[Union[str, dict]]]", - translate_content_chunk(cast(AIMessageChunk, message_chunk)), - ) - message_chunk.response_metadata["output_version"] = "v1" - generation_chunk = ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None ) @@ -1329,13 +1315,6 @@ def _create_chat_result( if hasattr(message, "refusal"): generations[0].message.additional_kwargs["refusal"] = message.refusal - if self.output_version == "v1": - generations[0].message.content = cast( - Union[str, list[Union[str, dict]]], - translate_content(cast(AIMessage, generations[0].message)), - ) - generations[0].message.response_metadata["output_version"] = "v1" - return ChatResult(generations=generations, llm_output=llm_output) async def _astream( @@ -4091,13 +4070,7 @@ def _construct_lc_result_from_responses_api( ) if output_version == "v0": message = _convert_to_v03_ai_message(message) - elif output_version == "v1": - message.content = cast( - Union[str, list[Union[str, dict]]], translate_content(message) - ) - message.response_metadata["output_version"] = "v1" - else: - pass + return ChatResult(generations=[ChatGeneration(message=message)]) @@ -4163,6 +4136,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: response_metadata = metadata else: response_metadata = {} + response_metadata["model_provider"] = "openai" usage_metadata = None id = None if chunk.type == "response.output_text.delta": @@ -4324,13 +4298,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: AIMessageChunk, _convert_to_v03_ai_message(message, has_reasoning=has_reasoning), ) - elif output_version == "v1": - message.content = cast( - Union[str, list[Union[str, dict]]], translate_content_chunk(message) - ) - message.response_metadata["output_version"] = "v1" - else: - pass + return ( current_index, current_output_index, From 5bcf7d006f8b63f87f3cd0a521b5910d4a484750 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 21 Aug 2025 14:48:23 -0400 Subject: [PATCH 43/56] refactor(core): data block handling, normalize message formats, strip IDs from messages (#32572) > [!WARNING] > **BREAKING:** Simplifies message normalization to single consistent path, requiring partner package updates **Key Changes:** - Consistent multimodal handling: - OpenAI `image_url` blocks pass through unchanged (broad compatibility) - OpenAI `input_audio` and `file` blocks convert to v1 standard equivalents - Legacy v0 multimodal blocks convert to v1 standard - Everything else passes through unchanged - Partner packages must update content block parsing logic **Partner Updates** `output_version` affects how messages are serialized into `.content`. `_normalize_messages()` will now upgrade v0 content to v1, so, all partners now receive v1 format input regardless of `output_version`. Migration: - Partner packages must update to handle v1 input content blocks - `output_version` still controls serialization format of responses (unchanged) --------- Co-authored-by: Chester Curme --- .../langchain_core/language_models/_utils.py | 288 ++++++++--- libs/core/langchain_core/messages/base.py | 45 +- .../messages/block_translators/langchain.py | 304 +++++++++++ .../messages/block_translators/openai.py | 66 ++- libs/core/langchain_core/messages/content.py | 54 +- .../language_models/chat_models/test_base.py | 477 ++++++++++++------ .../chat_models/test_rate_limiting.py | 8 +- .../messages/block_translators/test_openai.py | 32 ++ 8 files changed, 1005 insertions(+), 269 deletions(-) create mode 100644 libs/core/langchain_core/messages/block_translators/langchain.py diff --git a/libs/core/langchain_core/language_models/_utils.py b/libs/core/langchain_core/language_models/_utils.py index 19dcd8699485b..94680674e3a64 100644 --- a/libs/core/langchain_core/language_models/_utils.py +++ b/libs/core/langchain_core/language_models/_utils.py @@ -1,12 +1,30 @@ import re from collections.abc import Sequence -from typing import Optional, TypeVar +from typing import ( + TYPE_CHECKING, + Literal, + Optional, + TypedDict, + TypeVar, + Union, +) -from langchain_core.messages import BaseMessage +if TYPE_CHECKING: + from langchain_core.messages import BaseMessage +from langchain_core.messages.content import ( + ContentBlock, +) def _is_openai_data_block(block: dict) -> bool: - """Check if the block contains multimodal data in OpenAI Chat Completions format.""" + """Check if the block contains multimodal data in OpenAI Chat Completions format. + + Supports both data and ID-style blocks (e.g. ``'file_data'`` and ``'file_id'``) + + If additional keys are present, they are ignored / will not affect outcome as long + as the required keys are present and valid. + + """ if block.get("type") == "image_url": if ( (set(block.keys()) <= {"type", "image_url", "detail"}) @@ -15,29 +33,43 @@ def _is_openai_data_block(block: dict) -> bool: ): url = image_url.get("url") if isinstance(url, str): + # Required per OpenAI spec + return True + # Ignore `'detail'` since it's optional and specific to OpenAI + + elif block.get("type") == "input_audio": + if (audio := block.get("input_audio")) and isinstance(audio, dict): + audio_data = audio.get("data") + audio_format = audio.get("format") + # Both required per OpenAI spec + if isinstance(audio_data, str) and isinstance(audio_format, str): return True elif block.get("type") == "file": if (file := block.get("file")) and isinstance(file, dict): file_data = file.get("file_data") - if isinstance(file_data, str): - return True - - elif block.get("type") == "input_audio": - if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict): - audio_data = input_audio.get("data") - audio_format = input_audio.get("format") - if isinstance(audio_data, str) and isinstance(audio_format, str): + file_id = file.get("file_id") + # Files can be either base64-encoded or pre-uploaded with an ID + if isinstance(file_data, str) or isinstance(file_id, str): return True else: return False + # Has no `'type'` key return False -def _parse_data_uri(uri: str) -> Optional[dict]: - """Parse a data URI into its components. If parsing fails, return None. +class ParsedDataUri(TypedDict): + source_type: Literal["base64"] + data: str + mime_type: str + + +def _parse_data_uri(uri: str) -> Optional[ParsedDataUri]: + """Parse a data URI into its components. + + If parsing fails, return None. If either MIME type or data is missing, return None. Example: @@ -57,90 +89,208 @@ def _parse_data_uri(uri: str) -> Optional[dict]: match = re.match(regex, uri) if match is None: return None + + mime_type = match.group("mime_type") + data = match.group("data") + if not mime_type or not data: + return None + return { "source_type": "base64", - "data": match.group("data"), - "mime_type": match.group("mime_type"), + "data": data, + "mime_type": mime_type, } -def _convert_openai_format_to_data_block(block: dict) -> dict: - """Convert OpenAI image content block to standard data content block. +def _normalize_messages( + messages: Sequence["BaseMessage"], +) -> list["BaseMessage"]: + """Normalize message formats to LangChain v1 standard content blocks. - If parsing fails, pass-through. + Chat models already implement support for: + - Images in OpenAI Chat Completions format + These will be passed through unchanged + - LangChain v1 standard content blocks - Args: - block: The OpenAI image content block to convert. + This function extends support to: + - `Audio `__ and + `file `__ data in OpenAI + Chat Completions format + - Images are technically supported but we expect chat models to handle them + directly; this may change in the future + - LangChain v0 standard content blocks for backward compatibility - Returns: - The converted standard data content block. - """ - if block["type"] == "image_url": - parsed = _parse_data_uri(block["image_url"]["url"]) - if parsed is not None: - parsed["type"] = "image" - return parsed - return block - - if block["type"] == "file": - parsed = _parse_data_uri(block["file"]["file_data"]) - if parsed is not None: - parsed["type"] = "file" - if filename := block["file"].get("filename"): - parsed["filename"] = filename - return parsed - return block - - if block["type"] == "input_audio": - data = block["input_audio"].get("data") - audio_format = block["input_audio"].get("format") - if data and audio_format: - return { - "type": "audio", - "source_type": "base64", - "data": data, - "mime_type": f"audio/{audio_format}", + .. versionchanged:: 1.0.0 + In previous versions, this function returned messages in LangChain v0 format. + Now, it returns messages in LangChain v1 format, which upgraded chat models now + expect to receive when passing back in message history. For backward + compatibility, this function will convert v0 message content to v1 format. + + .. dropdown:: v0 Content Block Schemas + + ``URLContentBlock``: + + .. codeblock:: + + { + mime_type: NotRequired[str] + type: Literal['image', 'audio', 'file'], + source_type: Literal['url'], + url: str, } - return block - return block + ``Base64ContentBlock``: + + .. codeblock:: + + { + mime_type: NotRequired[str] + type: Literal['image', 'audio', 'file'], + source_type: Literal['base64'], + data: str, + } + ``IDContentBlock``: -def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]: - """Extend support for message formats. + (In practice, this was never used) + + .. codeblock:: + + { + type: Literal['image', 'audio', 'file'], + source_type: Literal['id'], + id: str, + } + + ``PlainTextContentBlock``: + + .. codeblock:: + + { + mime_type: NotRequired[str] + type: Literal['file'], + source_type: Literal['text'], + url: str, + } + + If a v1 message is passed in, it will be returned as-is, meaning it is safe to + always pass in v1 messages to this function for assurance. + + For posterity, here are the OpenAI Chat Completions schemas we expect: + + Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types + png, jpeg/jpg, webp, static gif: + { + "type": Literal['image_url'], + "image_url": { + "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"], + "detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI + } + } + + Chat Completions audio: + { + "type": Literal['input_audio'], + "input_audio": { + "format": Literal['wav', 'mp3'], + "data": str = "$BASE64_ENCODED_AUDIO", + }, + } + + Chat Completions files: either base64 or pre-uploaded file ID + { + "type": Literal['file'], + "file": Union[ + { + "filename": Optional[str] = "$FILENAME", + "file_data": str = "$BASE64_ENCODED_FILE", + }, + { + "file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI + }, + ], + } - Chat models implement support for images in OpenAI Chat Completions format, as well - as other multimodal data as standard data blocks. This function extends support to - audio and file data in OpenAI Chat Completions format by converting them to standard - data blocks. """ + from langchain_core.messages.block_translators.langchain import ( + _convert_legacy_v0_content_block_to_v1, + _convert_openai_format_to_data_block, + ) + formatted_messages = [] for message in messages: + # We preserve input messages - the caller may reuse them elsewhere and expects + # them to remain unchanged. We only create a copy if we need to translate. formatted_message = message + if isinstance(message.content, list): for idx, block in enumerate(message.content): + # OpenAI Chat Completions multimodal data blocks to v1 standard if ( isinstance(block, dict) - # Subset to (PDF) files and audio, as most relevant chat models - # support images in OAI format (and some may not yet support the - # standard data block format) - and block.get("type") in {"file", "input_audio"} + and block.get("type") in {"input_audio", "file"} + # Discriminate between OpenAI/LC format since they share `'type'` and _is_openai_data_block(block) ): - if formatted_message is message: - formatted_message = message.model_copy() - # Also shallow-copy content - formatted_message.content = list(formatted_message.content) - - formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy - _convert_openai_format_to_data_block(block) - ) + formatted_message = _ensure_message_copy(message, formatted_message) + + converted_block = _convert_openai_format_to_data_block(block) + _update_content_block(formatted_message, idx, converted_block) + + # Convert multimodal LangChain v0 to v1 standard content blocks + elif ( + isinstance(block, dict) + and block.get("type") + in { + "image", + "audio", + "file", + } + and block.get("source_type") # v1 doesn't have `source_type` + in { + "url", + "base64", + "id", + "text", + } + ): + formatted_message = _ensure_message_copy(message, formatted_message) + + converted_block = _convert_legacy_v0_content_block_to_v1(block) + _update_content_block(formatted_message, idx, converted_block) + continue + + # else, pass through blocks that look like they have v1 format unchanged + formatted_messages.append(formatted_message) return formatted_messages -T = TypeVar("T", bound=BaseMessage) +T = TypeVar("T", bound="BaseMessage") + + +def _ensure_message_copy(message: T, formatted_message: T) -> T: + """Create a copy of the message if it hasn't been copied yet.""" + if formatted_message is message: + formatted_message = message.model_copy() + # Shallow-copy content list to allow modifications + formatted_message.content = list(formatted_message.content) + return formatted_message + + +def _update_content_block( + formatted_message: "BaseMessage", idx: int, new_block: Union[ContentBlock, dict] +) -> None: + """Update a content block at the given index, handling type issues.""" + # Type ignore needed because: + # - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]` + # - When content is str, indexing fails (index error) + # - When content is list, the items are `Union[str, dict]` but we're assigning + # `Union[ContentBlock, dict]` where ContentBlock is richer than dict + # - This is safe because we only call this when we've verified content is a list and + # we're doing content block conversions + formatted_message.content[idx] = new_block # type: ignore[index, assignment] def _update_message_content_to_blocks(message: T, output_version: str) -> T: diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 5355b75a71bf6..3452740b46ef2 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -7,6 +7,13 @@ from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable +from langchain_core.messages.block_translators.langchain import ( + _convert_legacy_v0_content_block_to_v1, + _convert_v0_multimodal_input_to_v1, +) +from langchain_core.messages.block_translators.openai import ( + _convert_to_v1_from_chat_completions_input, +) from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -124,6 +131,8 @@ def content_blocks(self) -> list[types.ContentBlock]: from langchain_core.messages import content as types blocks: list[types.ContentBlock] = [] + + # First pass: convert to standard blocks content = ( [self.content] if isinstance(self.content, str) and self.content @@ -134,18 +143,26 @@ def content_blocks(self) -> list[types.ContentBlock]: blocks.append({"type": "text", "text": item}) elif isinstance(item, dict): item_type = item.get("type") - if item_type not in types.KNOWN_BLOCK_TYPES: - msg = ( - f"Non-standard content block type '{item_type}'. Ensure " - "the model supports `output_version='v1'` or higher and " - "that this attribute is set on initialization." + # Try to convert potential v0 format first + converted_block = _convert_legacy_v0_content_block_to_v1(item) + if converted_block is not item: # Conversion happened + blocks.append(cast("types.ContentBlock", converted_block)) + elif item_type is None or item_type not in types.KNOWN_BLOCK_TYPES: + blocks.append( + cast( + "types.ContentBlock", + {"type": "non_standard", "value": item}, + ) ) - raise ValueError(msg) - blocks.append(cast("types.ContentBlock", item)) - else: - pass + else: + blocks.append(cast("types.ContentBlock", item)) + + # Subsequent passes: attempt to unpack non-standard blocks + blocks = _convert_v0_multimodal_input_to_v1(blocks) + # blocks = _convert_to_v1_from_anthropic_input(blocks) + # ... - return blocks + return _convert_to_v1_from_chat_completions_input(blocks) def text(self) -> str: """Get the text content of the message. @@ -211,7 +228,9 @@ def merge_content( Returns: The merged content. """ - merged = first_content + merged: Union[str, list[Union[str, dict]]] + merged = "" if first_content is None else first_content + for content in contents: # If current is a string if isinstance(merged, str): @@ -232,8 +251,8 @@ def merge_content( # If second content is an empty string, treat as a no-op elif content == "": pass - else: - # Otherwise, add the second content as a new element of the list + # Otherwise, add the second content as a new element of the list + elif merged: merged.append(content) return merged diff --git a/libs/core/langchain_core/messages/block_translators/langchain.py b/libs/core/langchain_core/messages/block_translators/langchain.py new file mode 100644 index 0000000000000..4b5e4479835a3 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/langchain.py @@ -0,0 +1,304 @@ +"""Derivations of standard content blocks from LangChain content.""" + +from typing import Any, Union, cast + +from langchain_core.language_models._utils import _parse_data_uri +from langchain_core.messages import content as types + + +def _convert_v0_multimodal_input_to_v1( + blocks: list[types.ContentBlock], +) -> list[types.ContentBlock]: + """Convert v0 multimodal blocks to v1 format. + + Processes non_standard blocks that might be v0 format and converts them + to proper v1 ContentBlocks. + + Args: + blocks: List of content blocks to process. + + Returns: + Updated list with v0 blocks converted to v1 format. + """ + converted_blocks = [] + for block in blocks: + if ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and "value" in block + and isinstance(block["value"], dict) # type: ignore[typeddict-item] + ): + # We know this is a NonStandardContentBlock, so we can safely access value + value = cast("Any", block)["value"] + # Check if this looks like v0 format + if ( + value.get("type") in {"image", "audio", "file"} + and "source_type" in value + ): + converted_block = _convert_legacy_v0_content_block_to_v1(value) + converted_blocks.append(cast("types.ContentBlock", converted_block)) + else: + converted_blocks.append(block) + else: + converted_blocks.append(block) + + return converted_blocks + + +def _convert_legacy_v0_content_block_to_v1( + block: dict, +) -> Union[types.ContentBlock, dict]: + """Convert a LangChain v0 content block to v1 format. + + Preserves unknown keys as extras to avoid data loss. + + Returns the original block unchanged if it's not in v0 format. + + """ + + def _extract_v0_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: + """Extract unknown keys from v0 block to preserve as extras.""" + return {k: v for k, v in block_dict.items() if k not in known_keys} + + # Check if this is actually a v0 format block + block_type = block.get("type") + if block_type not in {"image", "audio", "file"} or "source_type" not in block: + # Not a v0 format block, return unchanged + return block + + if block.get("type") == "image": + source_type = block.get("source_type") + if source_type == "url": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + if "id" in block: + return types.create_image_block( + url=block["url"], + mime_type=block.get("mime_type"), + id=block["id"], + **extras, + ) + + # Don't construct with an ID if not present in original block + v1_block = types.ImageContentBlock(type="image", url=block["url"]) + if block.get("mime_type"): + v1_block["mime_type"] = block["mime_type"] + + for key, value in extras.items(): + if value is not None: + v1_block["extras"] = {} + v1_block["extras"][key] = value + return v1_block + if source_type == "base64": + known_keys = {"type", "source_type", "data", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + if "id" in block: + return types.create_image_block( + base64=block["data"], + mime_type=block.get("mime_type"), + id=block["id"], + **extras, + ) + + v1_block = types.ImageContentBlock(type="image", base64=block["data"]) + if block.get("mime_type"): + v1_block["mime_type"] = block["mime_type"] + + for key, value in extras.items(): + if value is not None: + v1_block["extras"] = {} + v1_block["extras"][key] = value + return v1_block + if source_type == "id": + known_keys = {"type", "source_type", "id"} + extras = _extract_v0_extras(block, known_keys) + # For id `source_type`, `id` is the file reference, not block ID + v1_block = types.ImageContentBlock(type="image", file_id=block["id"]) + + for key, value in extras.items(): + if value is not None: + v1_block["extras"] = {} + v1_block["extras"][key] = value + + return v1_block + elif block.get("type") == "audio": + source_type = block.get("source_type") + if source_type == "url": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_audio_block( + url=block["url"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "base64": + known_keys = {"type", "source_type", "data", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_audio_block( + base64=block["data"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "id": + known_keys = {"type", "source_type", "id"} + extras = _extract_v0_extras(block, known_keys) + return types.create_audio_block(file_id=block["id"], **extras) + elif block.get("type") == "file": + source_type = block.get("source_type") + if source_type == "url": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_file_block( + url=block["url"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "base64": + known_keys = {"type", "source_type", "data", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_file_block( + base64=block["data"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "id": + known_keys = {"type", "source_type", "id"} + extras = _extract_v0_extras(block, known_keys) + return types.create_file_block(file_id=block["id"], **extras) + if source_type == "text": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_plaintext_block( + # In v0, URL points to the text file content + text=block["url"], + **extras, + ) + + # If we can't convert, return the block unchanged + return block + + +def _convert_openai_format_to_data_block( + block: dict, +) -> Union[types.ContentBlock, dict[Any, Any]]: + """Convert OpenAI image/audio/file content block to respective v1 multimodal block. + + We expect that the incoming block is verified to be in OpenAI Chat Completions + format. + + If parsing fails, passes block through unchanged. + + Mappings (Chat Completions to LangChain v1): + - Image -> `ImageContentBlock` + - Audio -> `AudioContentBlock` + - File -> `FileContentBlock` + + """ + + # Extract extra keys to put them in `extras` + def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: + """Extract unknown keys from block to preserve as extras.""" + return {k: v for k, v in block_dict.items() if k not in known_keys} + + # base64-style image block + if (block["type"] == "image_url") and ( + parsed := _parse_data_uri(block["image_url"]["url"]) + ): + known_keys = {"type", "image_url"} + extras = _extract_extras(block, known_keys) + + # Also extract extras from nested image_url dict + image_url_known_keys = {"url"} + image_url_extras = _extract_extras(block["image_url"], image_url_known_keys) + + # Merge extras + all_extras = {**extras} + for key, value in image_url_extras.items(): + if key == "detail": # Don't rename + all_extras["detail"] = value + else: + all_extras[f"image_url_{key}"] = value + + return types.create_image_block( + # Even though this is labeled as `url`, it can be base64-encoded + base64=block["image_url"]["url"], + mime_type=parsed["mime_type"], + **all_extras, + ) + + # url-style image block + if (block["type"] == "image_url") and isinstance( + block["image_url"].get("url"), str + ): + known_keys = {"type", "image_url"} + extras = _extract_extras(block, known_keys) + + image_url_known_keys = {"url"} + image_url_extras = _extract_extras(block["image_url"], image_url_known_keys) + + all_extras = {**extras} + for key, value in image_url_extras.items(): + if key == "detail": # Don't rename + all_extras["detail"] = value + else: + all_extras[f"image_url_{key}"] = value + + return types.create_image_block( + url=block["image_url"]["url"], + **all_extras, + ) + + # base64-style audio block + # audio is only represented via raw data, no url or ID option + if block["type"] == "input_audio": + known_keys = {"type", "input_audio"} + extras = _extract_extras(block, known_keys) + + # Also extract extras from nested audio dict + audio_known_keys = {"data", "format"} + audio_extras = _extract_extras(block["input_audio"], audio_known_keys) + + all_extras = {**extras} + for key, value in audio_extras.items(): + all_extras[f"audio_{key}"] = value + + return types.create_audio_block( + base64=block["input_audio"]["data"], + mime_type=f"audio/{block['input_audio']['format']}", + **all_extras, + ) + + # id-style file block + if block.get("type") == "file" and "file_id" in block.get("file", {}): + known_keys = {"type", "file"} + extras = _extract_extras(block, known_keys) + + file_known_keys = {"file_id"} + file_extras = _extract_extras(block["file"], file_known_keys) + + all_extras = {**extras} + for key, value in file_extras.items(): + all_extras[f"file_{key}"] = value + + return types.create_file_block( + file_id=block["file"]["file_id"], + **all_extras, + ) + + # base64-style file block + if (block["type"] == "file") and ( + parsed := _parse_data_uri(block["file"]["file_data"]) + ): + known_keys = {"type", "file"} + extras = _extract_extras(block, known_keys) + + file_known_keys = {"file_data", "filename"} + file_extras = _extract_extras(block["file"], file_known_keys) + + all_extras = {**extras} + for key, value in file_extras.items(): + all_extras[f"file_{key}"] = value + + mime_type = parsed["mime_type"] + filename = block["file"].get("filename") + return types.create_file_block( + base64=block["file"]["file_data"], + mime_type=mime_type, + filename=filename, + **all_extras, + ) + + # Escape hatch + return block diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 16f81e6502595..029757563b44d 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -1,10 +1,20 @@ """Derivations of standard content blocks from OpenAI content.""" +from __future__ import annotations + from collections.abc import Iterable -from typing import Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Union, cast -from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.language_models._utils import ( + _is_openai_data_block, +) from langchain_core.messages import content as types +from langchain_core.messages.block_translators.langchain import ( + _convert_openai_format_to_data_block, +) + +if TYPE_CHECKING: + from langchain_core.messages import AIMessage, AIMessageChunk # v1 / Chat Completions @@ -25,6 +35,55 @@ def _convert_to_v1_from_chat_completions( return content_blocks +def _convert_to_v1_from_chat_completions_input( + blocks: list[types.ContentBlock], +) -> list[types.ContentBlock]: + """Convert OpenAI Chat Completions format blocks to v1 format. + + Processes non_standard blocks that might be OpenAI format and converts them + to proper ContentBlocks. If conversion fails, leaves them as non_standard. + + Args: + blocks: List of content blocks to process. + + Returns: + Updated list with OpenAI blocks converted to v1 format. + """ + from langchain_core.messages import content as types + + converted_blocks = [] + for block in blocks: + if ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and "value" in block + and isinstance(block["value"], dict) # type: ignore[typeddict-item] + ): + # We know this is a NonStandardContentBlock, so we can safely access value + value = cast("Any", block)["value"] + # Check if this looks like OpenAI format + if value.get("type") in { + "image_url", + "input_audio", + "file", + } and _is_openai_data_block(value): + converted_block = _convert_openai_format_to_data_block(value) + # If conversion succeeded, use it; otherwise keep as non_standard + if ( + isinstance(converted_block, dict) + and converted_block.get("type") in types.KNOWN_BLOCK_TYPES + ): + converted_blocks.append(cast("types.ContentBlock", converted_block)) + else: + converted_blocks.append(block) + else: + converted_blocks.append(block) + else: + converted_blocks.append(block) + + return converted_blocks + + def _convert_to_v1_from_chat_completions_chunk( chunk: AIMessageChunk, ) -> list[types.ContentBlock]: @@ -220,6 +279,9 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: Union[types.ToolCall, types.InvalidToolCall, types.ToolCallChunk] ] = None call_id = block.get("call_id", "") + + from langchain_core.messages import AIMessageChunk + if ( isinstance(message, AIMessageChunk) and len(message.tool_call_chunks) == 1 diff --git a/libs/core/langchain_core/messages/content.py b/libs/core/langchain_core/messages/content.py index fd46859b9181b..83287fb06c8b8 100644 --- a/libs/core/langchain_core/messages/content.py +++ b/libs/core/langchain_core/messages/content.py @@ -894,8 +894,6 @@ class NonStandardContentBlock(TypedDict): ContentBlock = Union[ TextContentBlock, - ToolCall, - ToolCallChunk, InvalidToolCall, ReasoningContentBlock, NonStandardContentBlock, @@ -905,22 +903,27 @@ class NonStandardContentBlock(TypedDict): KNOWN_BLOCK_TYPES = { + # Text output "text", - "text-plain", + "reasoning", + # Tools "tool_call", "invalid_tool_call", "tool_call_chunk", - "reasoning", - "non_standard", + # Multimodal data "image", "audio", "file", + "text-plain", "video", + # Server-side tool calls "code_interpreter_call", "code_interpreter_output", "code_interpreter_result", "web_search_call", "web_search_result", + # Catch-all + "non_standard", } @@ -950,21 +953,25 @@ def is_data_content_block(block: dict) -> bool: True if the content block is a data content block, False otherwise. """ - return block.get("type") in _get_data_content_block_types() and any( - # Check if at least one non-type key is present to signify presence of data - key in block - for key in ( - "url", - "base64", - "file_id", - "text", - "source_type", # for backwards compatibility with v0 content blocks - # TODO: should we verify that if source_type is present, at least one of - # url, base64, or file_id is also present? Otherwise, source_type could be - # present without any actual data? Need to confirm whether this was ever - # possible in v0 content blocks in the first place. - ) - ) + if block.get("type") not in _get_data_content_block_types(): + return False + + if any(key in block for key in ("url", "base64", "file_id", "text")): + return True + + # Verify data presence based on source type + if "source_type" in block: + source_type = block["source_type"] + if (source_type == "url" and "url" in block) or ( + source_type == "base64" and "data" in block + ): + return True + if (source_type == "id" and "id" in block) or ( + source_type == "text" and "url" in block + ): + return True + + return False def is_tool_call_block(block: ContentBlock) -> TypeGuard[ToolCall]: @@ -1021,12 +1028,13 @@ def convert_to_openai_image_block(block: dict[str, Any]) -> dict: def convert_to_openai_data_block(block: dict) -> dict: """Format standard data content block to format expected by OpenAI.""" - # TODO: make sure this supports new v1 if block["type"] == "image": formatted_block = convert_to_openai_image_block(block) elif block["type"] == "file": if "base64" in block or block.get("source_type") == "base64": + # Handle v0 format: {"source_type": "base64", "data": "...", ...} + # Handle v1 format: {"base64": "...", ...} base64_data = block["data"] if "source_type" in block else block["base64"] file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"} if filename := block.get("filename"): @@ -1045,6 +1053,8 @@ def convert_to_openai_data_block(block: dict) -> dict: ) formatted_block = {"type": "file", "file": file} elif "file_id" in block or block.get("source_type") == "id": + # Handle v0 format: {"source_type": "id", "id": "...", ...} + # Handle v1 format: {"file_id": "...", ...} file_id = block["id"] if "source_type" in block else block["file_id"] formatted_block = {"type": "file", "file": {"file_id": file_id}} else: @@ -1053,6 +1063,8 @@ def convert_to_openai_data_block(block: dict) -> dict: elif block["type"] == "audio": if "base64" in block or block.get("source_type") == "base64": + # Handle v0 format: {"source_type": "base64", "data": "...", ...} + # Handle v1 format: {"base64": "...", ...} base64_data = block["data"] if "source_type" in block else block["base64"] audio_format = block["mime_type"].split("/")[-1] formatted_block = { diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 4662c2fd58b7f..848fb75091a9f 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -44,6 +44,37 @@ from langchain_core.outputs.llm_result import LLMResult +def _content_blocks_equal_ignore_id( + actual: Union[str, list[Any]], expected: Union[str, list[Any]] +) -> bool: + """Compare content blocks, ignoring auto-generated `id` fields. + + Args: + actual: Actual content from response (string or list of content blocks). + expected: Expected content to compare against (string or list of blocks). + + Returns: + True if content matches (excluding `id` fields), False otherwise. + + """ + if isinstance(actual, str) or isinstance(expected, str): + return actual == expected + + if len(actual) != len(expected): + return False + for actual_block, expected_block in zip(actual, expected): + actual_without_id = ( + {k: v for k, v in actual_block.items() if k != "id"} + if isinstance(actual_block, dict) and "id" in actual_block + else actual_block + ) + + if actual_without_id != expected_block: + return False + + return True + + @pytest.fixture def messages() -> list: return [ @@ -145,7 +176,7 @@ def eval_response(callback: BaseFakeCallbackHandler, i: int) -> None: async def test_astream_fallback_to_ainvoke() -> None: - """Test astream uses appropriate implementation.""" + """Test `astream()` uses appropriate implementation.""" class ModelWithGenerate(BaseChatModel): @override @@ -431,11 +462,12 @@ def on_chat_model_start(self, *args: Any, **kwargs: Any) -> Run: def test_trace_images_in_openai_format() -> None: - """Test that images are traced in OpenAI format.""" + """Test that images are traced in OpenAI Chat Completions format.""" llm = ParrotFakeChatModel() messages = [ { "role": "user", + # v0 format "content": [ { "type": "image", @@ -446,7 +478,7 @@ def test_trace_images_in_openai_format() -> None: } ] tracer = FakeChatModelStartTracer() - response = llm.invoke(messages, config={"callbacks": [tracer]}) + llm.invoke(messages, config={"callbacks": [tracer]}) assert tracer.messages == [ [ [ @@ -461,19 +493,51 @@ def test_trace_images_in_openai_format() -> None: ] ] ] - # Test no mutation - assert response.content == [ - { + + +def test_content_block_transformation_v0_to_v1_image() -> None: + """Test that v0 format image content blocks are transformed to v1 format.""" + # Create a message with v0 format image content + image_message = AIMessage( + content=[ + { + "type": "image", + "source_type": "url", + "url": "https://example.com/image.png", + } + ] + ) + + llm = GenericFakeChatModel(messages=iter([image_message]), output_version="v1") + response = llm.invoke("test") + + # With v1 output_version, .content should be transformed + # Check structure, ignoring auto-generated IDs + assert len(response.content) == 1 + content_block = response.content[0] + if isinstance(content_block, dict) and "id" in content_block: + # Remove auto-generated id for comparison + content_without_id = {k: v for k, v in content_block.items() if k != "id"} + expected_content = { + "type": "image", + "url": "https://example.com/image.png", + } + assert content_without_id == expected_content + else: + assert content_block == { "type": "image", - "source_type": "url", "url": "https://example.com/image.png", } - ] -def test_trace_content_blocks_with_no_type_key() -> None: - """Test that we add a ``type`` key to certain content blocks that don't have one.""" - llm = ParrotFakeChatModel() +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_trace_content_blocks_with_no_type_key(output_version: str) -> None: + """Test behavior of content blocks that don't have a `type` key. + + Only for blocks with one key, in which case, the name of the key is used as `type`. + + """ + llm = ParrotFakeChatModel(output_version=output_version) messages = [ { "role": "user", @@ -508,156 +572,235 @@ def test_trace_content_blocks_with_no_type_key() -> None: ] ] ] - # Test no mutation - assert response.content == [ + + if output_version == "v0": + assert response.content == [ + { + "type": "text", + "text": "Hello", + }, + { + "cachePoint": {"type": "default"}, + }, + ] + else: + assert response.content == [ + { + "type": "text", + "text": "Hello", + }, + { + "type": "non_standard", + "value": { + "cachePoint": {"type": "default"}, + }, + }, + ] + + assert response.content_blocks == [ { "type": "text", "text": "Hello", }, { - "cachePoint": {"type": "default"}, + "type": "non_standard", + "value": { + "cachePoint": {"type": "default"}, + }, }, ] def test_extend_support_to_openai_multimodal_formats() -> None: - """Test that chat models normalize OpenAI file and audio inputs.""" - llm = ParrotFakeChatModel() - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Hello"}, - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, + """Test normalizing OpenAI audio, image, and file inputs to v1.""" + # Audio and file only (chat model default) + messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { # audio-base64 + "type": "input_audio", + "input_audio": { + "format": "wav", + "data": "data:audio/wav;base64,", }, - { - "type": "image_url", - "image_url": {"url": "..."}, + }, + { # file-base64 + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": "data:application/pdf;base64,", }, + }, + { # file-id + "type": "file", + "file": {"file_id": ""}, + }, + ] + ) + + expected_content_messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, # TextContentBlock + { # AudioContentBlock + "type": "audio", + "base64": "data:audio/wav;base64,", + "mime_type": "audio/wav", + }, + { # FileContentBlock + "type": "file", + "base64": "data:application/pdf;base64,", + "mime_type": "application/pdf", + "extras": {"filename": "draconomicon.pdf"}, + }, + { # ... + "type": "file", + "file_id": "", + }, + ] + ) + + normalized_content = _normalize_messages([messages]) + + # Check structure, ignoring auto-generated IDs + assert len(normalized_content) == 1 + normalized_message = normalized_content[0] + assert len(normalized_message.content) == len(expected_content_messages.content) + + assert _content_blocks_equal_ignore_id( + normalized_message.content, expected_content_messages.content + ) + + messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { # image-url + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + }, + { # image-base64 + "type": "image_url", + "image_url": {"url": "..."}, + }, + { # audio-base64 + "type": "input_audio", + "input_audio": { + "format": "wav", + "data": "data:audio/wav;base64,", + }, + }, + { # file-base64 + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": "data:application/pdf;base64,", + }, + }, + { # file-id + "type": "file", + "file": {"file_id": ""}, + }, + ] + ) + + expected_content_messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, # TextContentBlock + { # Chat Completions Image becomes ImageContentBlock after invoke + "type": "image", + "url": "https://example.com/image.png", + }, + { # ... + "type": "image", + "base64": "...", + "mime_type": "image/jpeg", + }, + { # AudioContentBlock + "type": "audio", + "base64": "data:audio/wav;base64,", + "mime_type": "audio/wav", + }, + { # FileContentBlock + "type": "file", + "base64": "data:application/pdf;base64,", + "mime_type": "application/pdf", + "extras": {"filename": "draconomicon.pdf"}, + }, + { # ... + "type": "file", + "file_id": "", + }, + ] + ) + + +def test_normalize_messages_edge_cases() -> None: + # Test behavior of malformed/unrecognized content blocks + + messages = [ + HumanMessage( + content=[ { - "type": "file", - "file": { - "filename": "draconomicon.pdf", - "file_data": "data:application/pdf;base64,", - }, + "type": "input_image", # Responses API type; not handled + "image_url": "uri", }, { - "type": "file", - "file": { - "file_data": "data:application/pdf;base64,", - }, + # Standard OpenAI Chat Completions type but malformed structure + "type": "input_audio", + "input_audio": "uri", # Should be nested in `audio` }, { "type": "file", - "file": {"file_id": ""}, + "file": "uri", # `file` should be a dict for Chat Completions }, { - "type": "input_audio", - "input_audio": {"data": "", "format": "wav"}, + "type": "input_file", # Responses API type; not handled + "file_data": "uri", + "filename": "file-name", }, - ], - }, - ] - expected_content = [ - {"type": "text", "text": "Hello"}, - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - }, - { - "type": "image_url", - "image_url": {"url": "..."}, - }, - { - "type": "file", - "source_type": "base64", - "data": "", - "mime_type": "application/pdf", - "filename": "draconomicon.pdf", - }, - { - "type": "file", - "source_type": "base64", - "data": "", - "mime_type": "application/pdf", - }, - { - "type": "file", - "file": {"file_id": ""}, - }, - { - "type": "audio", - "source_type": "base64", - "data": "", - "mime_type": "audio/wav", - }, + ] + ) ] - response = llm.invoke(messages) - assert response.content == expected_content - # Test no mutation - assert messages[0]["content"] == [ - {"type": "text", "text": "Hello"}, - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - }, - { - "type": "image_url", - "image_url": {"url": "..."}, - }, - { - "type": "file", - "file": { - "filename": "draconomicon.pdf", - "file_data": "data:application/pdf;base64,", - }, - }, - { - "type": "file", - "file": { - "file_data": "data:application/pdf;base64,", - }, - }, - { - "type": "file", - "file": {"file_id": ""}, - }, - { - "type": "input_audio", - "input_audio": {"data": "", "format": "wav"}, - }, - ] + assert messages == _normalize_messages(messages) -def test_normalize_messages_edge_cases() -> None: - # Test some blocks that should pass through - messages = [ +def test_normalize_messages_v1_content_blocks_unchanged() -> None: + """Test passing v1 content blocks to `_normalize_messages()` leaves unchanged.""" + input_messages = [ HumanMessage( content=[ { - "type": "file", - "file": "uri", + "type": "text", + "text": "Hello world", }, { - "type": "input_file", - "file_data": "uri", - "filename": "file-name", + "type": "image", + "url": "https://example.com/image.png", + "mime_type": "image/png", }, { - "type": "input_audio", - "input_audio": "uri", + "type": "audio", + "base64": "base64encodedaudiodata", + "mime_type": "audio/wav", }, { - "type": "input_image", - "image_url": "uri", + "type": "file", + "id": "file_123", + }, + { + "type": "reasoning", + "reasoning": "Let me think about this...", }, ] ) ] - assert messages == _normalize_messages(messages) + + result = _normalize_messages(input_messages) + + # Verify the result is identical to the input (message should not be copied) + assert len(result) == 1 + assert result[0] is input_messages[0] + assert result[0].content == input_messages[0].content def test_output_version_invoke(monkeypatch: Any) -> None: @@ -679,18 +822,24 @@ def test_output_version_invoke(monkeypatch: Any) -> None: assert response.response_metadata["output_version"] == "v1" +# -- v1 output version tests -- + + async def test_output_version_ainvoke(monkeypatch: Any) -> None: messages = [AIMessage("hello")] + # v0 + llm = GenericFakeChatModel(messages=iter(messages)) + response = await llm.ainvoke("hello") + assert response.content == "hello" + + # v1 llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") response = await llm.ainvoke("hello") assert response.content == [{"type": "text", "text": "hello"}] assert response.response_metadata["output_version"] == "v1" - llm = GenericFakeChatModel(messages=iter(messages)) - response = await llm.ainvoke("hello") - assert response.content == "hello" - + # v1 from env var monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") llm = GenericFakeChatModel(messages=iter(messages)) response = await llm.ainvoke("hello") @@ -701,20 +850,7 @@ async def test_output_version_ainvoke(monkeypatch: Any) -> None: def test_output_version_stream(monkeypatch: Any) -> None: messages = [AIMessage("foo bar")] - llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") - full: Optional[BaseMessageChunk] = None - for chunk in llm.stream("hello"): - assert isinstance(chunk, AIMessageChunk) - assert isinstance(chunk.content, list) - assert len(chunk.content) == 1 - block = chunk.content[0] - assert isinstance(block, dict) - assert block["type"] == "text" - assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" - + # v0 llm = GenericFakeChatModel(messages=iter(messages)) full = None for chunk in llm.stream("hello"): @@ -725,9 +861,9 @@ def test_output_version_stream(monkeypatch: Any) -> None: assert isinstance(full, AIMessageChunk) assert full.content == "foo bar" - monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") - llm = GenericFakeChatModel(messages=iter(messages)) - full = None + # v1 + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full_v1: Optional[BaseMessageChunk] = None for chunk in llm.stream("hello"): assert isinstance(chunk, AIMessageChunk) assert isinstance(chunk.content, list) @@ -736,17 +872,15 @@ def test_output_version_stream(monkeypatch: Any) -> None: assert isinstance(block, dict) assert block["type"] == "text" assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" - - -async def test_output_version_astream(monkeypatch: Any) -> None: - messages = [AIMessage("foo bar")] + full_v1 = chunk if full_v1 is None else full_v1 + chunk + assert isinstance(full_v1, AIMessageChunk) + assert full_v1.response_metadata["output_version"] == "v1" - llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") - full: Optional[BaseMessageChunk] = None - async for chunk in llm.astream("hello"): + # v1 from env var + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + full_env = None + for chunk in llm.stream("hello"): assert isinstance(chunk, AIMessageChunk) assert isinstance(chunk.content, list) assert len(chunk.content) == 1 @@ -754,10 +888,15 @@ async def test_output_version_astream(monkeypatch: Any) -> None: assert isinstance(block, dict) assert block["type"] == "text" assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" + full_env = chunk if full_env is None else full_env + chunk + assert isinstance(full_env, AIMessageChunk) + assert full_env.response_metadata["output_version"] == "v1" + +async def test_output_version_astream(monkeypatch: Any) -> None: + messages = [AIMessage("foo bar")] + + # v0 llm = GenericFakeChatModel(messages=iter(messages)) full = None async for chunk in llm.astream("hello"): @@ -768,9 +907,25 @@ async def test_output_version_astream(monkeypatch: Any) -> None: assert isinstance(full, AIMessageChunk) assert full.content == "foo bar" + # v1 + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full_v1: Optional[BaseMessageChunk] = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full_v1 = chunk if full_v1 is None else full_v1 + chunk + assert isinstance(full_v1, AIMessageChunk) + assert full_v1.response_metadata["output_version"] == "v1" + + # v1 from env var monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") llm = GenericFakeChatModel(messages=iter(messages)) - full = None + full_env = None async for chunk in llm.astream("hello"): assert isinstance(chunk, AIMessageChunk) assert isinstance(chunk.content, list) @@ -779,6 +934,6 @@ async def test_output_version_astream(monkeypatch: Any) -> None: assert isinstance(block, dict) assert block["type"] == "text" assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" + full_env = chunk if full_env is None else full_env + chunk + assert isinstance(full_env, AIMessageChunk) + assert full_env.response_metadata["output_version"] == "v1" diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py index c4d6a50f6bedb..0411915c26925 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py @@ -214,8 +214,8 @@ def test_rate_limit_skips_cache() -> None: assert list(cache._cache) == [ ( '[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", ' - '"messages", ' - '"HumanMessage"], "kwargs": {"content": "foo", "type": "human"}}]', + '"messages", "HumanMessage"], "kwargs": {"content": "foo", ' + '"type": "human"}}]', "[('_type', 'generic-fake-chat-model'), ('stop', None)]", ) ] @@ -241,7 +241,8 @@ def test_serialization_with_rate_limiter() -> None: assert InMemoryRateLimiter.__name__ not in serialized_model -async def test_rate_limit_skips_cache_async() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +async def test_rate_limit_skips_cache_async(output_version: str) -> None: """Test that rate limiting does not rate limit cache look ups.""" cache = InMemoryCache() model = GenericFakeChatModel( @@ -250,6 +251,7 @@ async def test_rate_limit_skips_cache_async() -> None: requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=1 ), cache=cache, + output_version=output_version, ) tic = time.time() diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 27f126a314fdd..3602d9eb08d03 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -1,7 +1,39 @@ from typing import Optional +from langchain_core.language_models.fake_chat_models import ParrotFakeChatModel from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +from tests.unit_tests.language_models.chat_models.test_base import ( + _content_blocks_equal_ignore_id, +) + + +def test_v0_to_v1_content_blocks() -> None: + llm = ParrotFakeChatModel() + messages = [ + { + "role": "user", + # v0 format + "content": [ + { + "type": "image", + "source_type": "url", + "url": "https://example.com/image.png", + } + ], + } + ] + response = llm.invoke(messages) + assert len(response.content_blocks) == 1 + expected_content_blocks = [ + { + "type": "image", + "url": "https://example.com/image.png", + } + ] + assert _content_blocks_equal_ignore_id( + response.content_blocks, expected_content_blocks + ) def test_convert_to_v1_from_responses() -> None: From 26833f2ebc66f1d4e3e5748762bdbed37fe40468 Mon Sep 17 00:00:00 2001 From: ccurme Date: Fri, 22 Aug 2025 17:06:53 -0300 Subject: [PATCH 44/56] feat(anthropic): v1 support (#32623) --- .../langchain_core/language_models/_utils.py | 2 +- .../language_models/chat_models.py | 2 +- libs/core/langchain_core/messages/ai.py | 10 +- libs/core/langchain_core/messages/base.py | 43 +- .../messages/block_translators/__init__.py | 76 +-- .../block_translators/amazon/__init__.py | 1 - .../messages/block_translators/anthropic.py | 414 ++++++++++++++++- .../block_translators/{amazon => }/bedrock.py | 26 +- .../{amazon => }/bedrock_converse.py | 28 +- .../messages/block_translators/chroma.py | 27 -- .../block_translators/google/__init__.py | 1 - .../{google/genai.py => google_genai.py} | 22 +- .../vertexai.py => google_vertexai.py} | 24 +- .../messages/block_translators/groq.py | 22 +- .../{langchain.py => langchain_v0.py} | 43 +- .../messages/block_translators/ollama.py | 22 +- .../messages/block_translators/openai.py | 51 +- libs/core/langchain_core/messages/content.py | 8 - libs/core/langchain_core/utils/_merge.py | 34 +- .../language_models/chat_models/test_base.py | 8 +- .../block_translators/test_anthropic.py | 439 ++++++++++++++++++ .../block_translators/test_langchain_v0.py | 79 ++++ .../messages/block_translators/test_openai.py | 92 ++-- .../block_translators/test_registration.py | 29 ++ .../core/tests/unit_tests/messages/test_ai.py | 93 ++++ .../anthropic/langchain_anthropic/_compat.py | 245 ++++++++++ .../langchain_anthropic/chat_models.py | 95 +++- .../tests/cassettes/test_agent_loop.yaml.gz | Bin 0 -> 2028 bytes .../test_agent_loop_streaming.yaml.gz | Bin 0 -> 3179 bytes .../tests/cassettes/test_citations.yaml.gz | Bin 0 -> 3388 bytes .../integration_tests/test_chat_models.py | 272 +++++++++-- .../__snapshots__/test_standard.ambr | 1 + .../tests/unit_tests/test_chat_models.py | 181 +++++++- 33 files changed, 2125 insertions(+), 265 deletions(-) delete mode 100644 libs/core/langchain_core/messages/block_translators/amazon/__init__.py rename libs/core/langchain_core/messages/block_translators/{amazon => }/bedrock.py (55%) rename libs/core/langchain_core/messages/block_translators/{amazon => }/bedrock_converse.py (54%) delete mode 100644 libs/core/langchain_core/messages/block_translators/chroma.py delete mode 100644 libs/core/langchain_core/messages/block_translators/google/__init__.py rename libs/core/langchain_core/messages/block_translators/{google/genai.py => google_genai.py} (60%) rename libs/core/langchain_core/messages/block_translators/{google/vertexai.py => google_vertexai.py} (59%) rename libs/core/langchain_core/messages/block_translators/{langchain.py => langchain_v0.py} (89%) create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_registration.py create mode 100644 libs/partners/anthropic/langchain_anthropic/_compat.py create mode 100644 libs/partners/anthropic/tests/cassettes/test_agent_loop.yaml.gz create mode 100644 libs/partners/anthropic/tests/cassettes/test_agent_loop_streaming.yaml.gz create mode 100644 libs/partners/anthropic/tests/cassettes/test_citations.yaml.gz diff --git a/libs/core/langchain_core/language_models/_utils.py b/libs/core/langchain_core/language_models/_utils.py index 94680674e3a64..cb80fedb3dd2b 100644 --- a/libs/core/langchain_core/language_models/_utils.py +++ b/libs/core/langchain_core/language_models/_utils.py @@ -212,7 +212,7 @@ def _normalize_messages( } """ - from langchain_core.messages.block_translators.langchain import ( + from langchain_core.messages.block_translators.langchain_v0 import ( _convert_legacy_v0_content_block_to_v1, _convert_openai_format_to_data_block, ) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index cfd648c2d0c3e..33331e512eb34 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -124,7 +124,7 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]: if ( block.get("type") == "image" and is_data_content_block(block) - and block.get("source_type") != "id" + and not ("file_id" in block or block.get("source_type") == "id") ): if message_to_trace is message: # Shallow copy diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 8fd48c5027eff..31be4dbca4e0a 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -231,7 +231,10 @@ def content_blocks(self) -> list[types.ContentBlock]: translator = get_translator(model_provider) if translator: - return translator["translate_content"](self) + try: + return translator["translate_content_chunk"](self) + except NotImplementedError: + pass # Otherwise, use best-effort parsing blocks = super().content_blocks @@ -380,7 +383,10 @@ def content_blocks(self) -> list[types.ContentBlock]: translator = get_translator(model_provider) if translator: - return translator["translate_content_chunk"](self) + try: + return translator["translate_content_chunk"](self) + except NotImplementedError: + pass # Otherwise, use best-effort parsing blocks = super().content_blocks diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 3452740b46ef2..89008c8c42998 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -7,13 +7,7 @@ from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable -from langchain_core.messages.block_translators.langchain import ( - _convert_legacy_v0_content_block_to_v1, - _convert_v0_multimodal_input_to_v1, -) -from langchain_core.messages.block_translators.openai import ( - _convert_to_v1_from_chat_completions_input, -) +from langchain_core.messages import content as types from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -21,7 +15,6 @@ if TYPE_CHECKING: from collections.abc import Sequence - from langchain_core.messages import content as types from langchain_core.prompts.chat import ChatPromptTemplate @@ -129,6 +122,15 @@ def content_blocks(self) -> list[types.ContentBlock]: """ from langchain_core.messages import content as types + from langchain_core.messages.block_translators.anthropic import ( + _convert_to_v1_from_anthropic_input, + ) + from langchain_core.messages.block_translators.langchain_v0 import ( + _convert_v0_multimodal_input_to_v1, + ) + from langchain_core.messages.block_translators.openai import ( + _convert_to_v1_from_chat_completions_input, + ) blocks: list[types.ContentBlock] = [] @@ -143,26 +145,19 @@ def content_blocks(self) -> list[types.ContentBlock]: blocks.append({"type": "text", "text": item}) elif isinstance(item, dict): item_type = item.get("type") - # Try to convert potential v0 format first - converted_block = _convert_legacy_v0_content_block_to_v1(item) - if converted_block is not item: # Conversion happened - blocks.append(cast("types.ContentBlock", converted_block)) - elif item_type is None or item_type not in types.KNOWN_BLOCK_TYPES: - blocks.append( - cast( - "types.ContentBlock", - {"type": "non_standard", "value": item}, - ) - ) + if item_type not in types.KNOWN_BLOCK_TYPES: + blocks.append({"type": "non_standard", "value": item}) else: blocks.append(cast("types.ContentBlock", item)) # Subsequent passes: attempt to unpack non-standard blocks - blocks = _convert_v0_multimodal_input_to_v1(blocks) - # blocks = _convert_to_v1_from_anthropic_input(blocks) - # ... - - return _convert_to_v1_from_chat_completions_input(blocks) + for parsing_step in [ + _convert_v0_multimodal_input_to_v1, + _convert_to_v1_from_chat_completions_input, + _convert_to_v1_from_anthropic_input, + ]: + blocks = parsing_step(blocks) + return blocks def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/langchain_core/messages/block_translators/__init__.py b/libs/core/langchain_core/messages/block_translators/__init__.py index ff58558713d13..bb9673a7c373b 100644 --- a/libs/core/langchain_core/messages/block_translators/__init__.py +++ b/libs/core/langchain_core/messages/block_translators/__init__.py @@ -45,37 +45,45 @@ def get_translator( return PROVIDER_TRANSLATORS.get(provider) -def _auto_register_translators() -> None: - """Automatically register all available block translators.""" - import contextlib - import importlib - import pkgutil - from pathlib import Path - - package_path = Path(__file__).parent - - # Discover all sub-modules - for module_info in pkgutil.iter_modules([str(package_path)]): - module_name = module_info.name - - # Skip the __init__ module and any private modules - if module_name.startswith("_"): - continue - - if module_info.ispkg: - # For subpackages, discover their submodules - subpackage_path = package_path / module_name - for submodule_info in pkgutil.iter_modules([str(subpackage_path)]): - submodule_name = submodule_info.name - if not submodule_name.startswith("_"): - with contextlib.suppress(ImportError, AttributeError): - importlib.import_module( - f".{module_name}.{submodule_name}", package=__name__ - ) - else: - # Import top-level translator modules - with contextlib.suppress(ImportError, AttributeError): - importlib.import_module(f".{module_name}", package=__name__) - - -_auto_register_translators() +def _register_translators() -> None: + """Register all translators in langchain-core. + + A unit test ensures all modules in ``block_translators`` are represented here. + + For translators implemented outside langchain-core, they can be registered by + calling ``register_translator`` from within the integration package. + """ + from langchain_core.messages.block_translators.anthropic import ( + _register_anthropic_translator, + ) + from langchain_core.messages.block_translators.bedrock import ( + _register_bedrock_translator, + ) + from langchain_core.messages.block_translators.bedrock_converse import ( + _register_bedrock_converse_translator, + ) + from langchain_core.messages.block_translators.google_genai import ( + _register_google_genai_translator, + ) + from langchain_core.messages.block_translators.google_vertexai import ( + _register_google_vertexai_translator, + ) + from langchain_core.messages.block_translators.groq import _register_groq_translator + from langchain_core.messages.block_translators.ollama import ( + _register_ollama_translator, + ) + from langchain_core.messages.block_translators.openai import ( + _register_openai_translator, + ) + + _register_bedrock_translator() + _register_bedrock_converse_translator() + _register_anthropic_translator() + _register_google_genai_translator() + _register_google_vertexai_translator() + _register_groq_translator() + _register_ollama_translator() + _register_openai_translator() + + +_register_translators() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/__init__.py b/libs/core/langchain_core/messages/block_translators/amazon/__init__.py deleted file mode 100644 index 1fbfad4912db7..0000000000000 --- a/libs/core/langchain_core/messages/block_translators/amazon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Derivations of standard content blocks from Amazon content.""" diff --git a/libs/core/langchain_core/messages/block_translators/anthropic.py b/libs/core/langchain_core/messages/block_translators/anthropic.py index 469b3812a570e..8f0b3919fa452 100644 --- a/libs/core/langchain_core/messages/block_translators/anthropic.py +++ b/libs/core/langchain_core/messages/block_translators/anthropic.py @@ -1,17 +1,423 @@ """Derivations of standard content blocks from Anthropic content.""" +import json +from collections.abc import Iterable +from typing import Any, cast + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +def _populate_extras( + standard_block: types.ContentBlock, block: dict[str, Any], known_fields: set[str] +) -> types.ContentBlock: + """Mutate a block, populating extras.""" + if standard_block.get("type") == "non_standard": + return standard_block + + for key, value in block.items(): + if key not in known_fields: + if "extras" not in block: + # Below type-ignores are because mypy thinks a non-standard block can + # get here, although we exclude them above. + standard_block["extras"] = {} # type: ignore[typeddict-unknown-key] + standard_block["extras"][key] = value # type: ignore[typeddict-item] + + return standard_block + + +def _convert_to_v1_from_anthropic_input( + content: list[types.ContentBlock], +) -> list[types.ContentBlock]: + """Attempt to unpack non-standard blocks.""" + + def _iter_blocks() -> Iterable[types.ContentBlock]: + blocks: list[dict[str, Any]] = [ + cast("dict[str, Any]", block) + if block.get("type") != "non_standard" + else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks + for block in content + ] + for block in blocks: + block_type = block.get("type") + + if ( + block_type == "document" + and "source" in block + and "type" in block["source"] + ): + if block["source"]["type"] == "base64": + file_block: types.FileContentBlock = { + "type": "file", + "base64": block["source"]["data"], + "mime_type": block["source"]["media_type"], + } + _populate_extras(file_block, block, {"type", "source"}) + yield file_block + + elif block["source"]["type"] == "url": + file_block = { + "type": "file", + "url": block["source"]["url"], + } + _populate_extras(file_block, block, {"type", "source"}) + yield file_block + + elif block["source"]["type"] == "file": + file_block = { + "type": "file", + "id": block["source"]["file_id"], + } + _populate_extras(file_block, block, {"type", "source"}) + yield file_block + + elif block["source"]["type"] == "text": + plain_text_block: types.PlainTextContentBlock = { + "type": "text-plain", + "text": block["source"]["data"], + "mime_type": block.get("media_type", "text/plain"), + } + _populate_extras(plain_text_block, block, {"type", "source"}) + yield plain_text_block + + else: + yield {"type": "non_standard", "value": block} + + elif ( + block_type == "image" + and "source" in block + and "type" in block["source"] + ): + if block["source"]["type"] == "base64": + image_block: types.ImageContentBlock = { + "type": "image", + "base64": block["source"]["data"], + "mime_type": block["source"]["media_type"], + } + _populate_extras(image_block, block, {"type", "source"}) + yield image_block + + elif block["source"]["type"] == "url": + image_block = { + "type": "image", + "url": block["source"]["url"], + } + _populate_extras(image_block, block, {"type", "source"}) + yield image_block + + elif block["source"]["type"] == "file": + image_block = { + "type": "image", + "id": block["source"]["file_id"], + } + _populate_extras(image_block, block, {"type", "source"}) + yield image_block + + else: + yield {"type": "non_standard", "value": block} + + elif block_type in types.KNOWN_BLOCK_TYPES: + yield cast("types.ContentBlock", block) + + else: + yield {"type": "non_standard", "value": block} + + return list(_iter_blocks()) + + +def _convert_citation_to_v1(citation: dict[str, Any]) -> types.Annotation: + citation_type = citation.get("type") + + if citation_type == "web_search_result_location": + url_citation: types.Citation = { + "type": "citation", + "cited_text": citation["cited_text"], + "url": citation["url"], + } + if title := citation.get("title"): + url_citation["title"] = title + known_fields = {"type", "cited_text", "url", "title", "index", "extras"} + for key, value in citation.items(): + if key not in known_fields: + if "extras" not in url_citation: + url_citation["extras"] = {} + url_citation["extras"][key] = value + + return url_citation + + if citation_type in ( + "char_location", + "content_block_location", + "page_location", + "search_result_location", + ): + document_citation: types.Citation = { + "type": "citation", + "cited_text": citation["cited_text"], + } + if "document_title" in citation: + document_citation["title"] = citation["document_title"] + elif title := citation.get("title"): + document_citation["title"] = title + else: + pass + known_fields = { + "type", + "cited_text", + "document_title", + "title", + "index", + "extras", + } + for key, value in citation.items(): + if key not in known_fields: + if "extras" not in document_citation: + document_citation["extras"] = {} + document_citation["extras"][key] = value + + return document_citation + + return { + "type": "non_standard_annotation", + "value": citation, + } + + +def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock]: + """Convert Anthropic message content to v1 format.""" + if isinstance(message.content, str): + message.content = [{"type": "text", "text": message.content}] + + def _iter_blocks() -> Iterable[types.ContentBlock]: + for block in message.content: + if not isinstance(block, dict): + continue + block_type = block.get("type") + + if block_type == "text": + if citations := block.get("citations"): + text_block: types.TextContentBlock = { + "type": "text", + "text": block.get("text", ""), + "annotations": [_convert_citation_to_v1(a) for a in citations], + } + else: + text_block = {"type": "text", "text": block["text"]} + if "index" in block: + text_block["index"] = block["index"] + yield text_block + + elif block_type == "thinking": + reasoning_block: types.ReasoningContentBlock = { + "type": "reasoning", + "reasoning": block.get("thinking", ""), + } + if "index" in block: + reasoning_block["index"] = block["index"] + known_fields = {"type", "thinking", "index", "extras"} + for key in block: + if key not in known_fields: + if "extras" not in reasoning_block: + reasoning_block["extras"] = {} + reasoning_block["extras"][key] = block[key] + yield reasoning_block + + elif block_type == "tool_use": + if ( + isinstance(message, AIMessageChunk) + and len(message.tool_call_chunks) == 1 + ): + tool_call_chunk: types.ToolCallChunk = ( + message.tool_call_chunks[0].copy() # type: ignore[assignment] + ) + if "type" not in tool_call_chunk: + tool_call_chunk["type"] = "tool_call_chunk" + yield tool_call_chunk + elif ( + not isinstance(message, AIMessageChunk) + and len(message.tool_calls) == 1 + ): + tool_call_block = message.tool_calls[0] + if "index" in block: + tool_call_block["index"] = block["index"] + yield tool_call_block + else: + tool_call_block = { + "type": "tool_call", + "name": block.get("name", ""), + "args": block.get("input", {}), + "id": block.get("id", ""), + } + yield tool_call_block + + elif ( + block_type == "input_json_delta" + and isinstance(message, AIMessageChunk) + and len(message.tool_call_chunks) == 1 + ): + tool_call_chunk = ( + message.tool_call_chunks[0].copy() # type: ignore[assignment] + ) + if "type" not in tool_call_chunk: + tool_call_chunk["type"] = "tool_call_chunk" + yield tool_call_chunk + + elif block_type == "server_tool_use": + if block.get("name") == "web_search": + web_search_call: types.WebSearchCall = {"type": "web_search_call"} + + if query := block.get("input", {}).get("query"): + web_search_call["query"] = query + + elif block.get("input") == {} and "partial_json" in block: + try: + input_ = json.loads(block["partial_json"]) + if isinstance(input_, dict) and "query" in input_: + web_search_call["query"] = input_["query"] + except json.JSONDecodeError: + pass + + if "id" in block: + web_search_call["id"] = block["id"] + if "index" in block: + web_search_call["index"] = block["index"] + known_fields = {"type", "name", "input", "id", "index"} + for key, value in block.items(): + if key not in known_fields: + if "extras" not in web_search_call: + web_search_call["extras"] = {} + web_search_call["extras"][key] = value + yield web_search_call + + elif block.get("name") == "code_execution": + code_interpreter_call: types.CodeInterpreterCall = { + "type": "code_interpreter_call" + } + + if code := block.get("input", {}).get("code"): + code_interpreter_call["code"] = code + + elif block.get("input") == {} and "partial_json" in block: + try: + input_ = json.loads(block["partial_json"]) + if isinstance(input_, dict) and "code" in input_: + code_interpreter_call["code"] = input_["code"] + except json.JSONDecodeError: + pass + + if "id" in block: + code_interpreter_call["id"] = block["id"] + if "index" in block: + code_interpreter_call["index"] = block["index"] + known_fields = {"type", "name", "input", "id", "index"} + for key, value in block.items(): + if key not in known_fields: + if "extras" not in code_interpreter_call: + code_interpreter_call["extras"] = {} + code_interpreter_call["extras"][key] = value + yield code_interpreter_call + + else: + new_block: types.NonStandardContentBlock = { + "type": "non_standard", + "value": block, + } + if "index" in new_block["value"]: + new_block["index"] = new_block["value"].pop("index") + yield new_block + + elif block_type == "web_search_tool_result": + web_search_result: types.WebSearchResult = {"type": "web_search_result"} + if "tool_use_id" in block: + web_search_result["id"] = block["tool_use_id"] + if "index" in block: + web_search_result["index"] = block["index"] + + if web_search_result_content := block.get("content", []): + if "extras" not in web_search_result: + web_search_result["extras"] = {} + urls = [] + extra_content = [] + for result_content in web_search_result_content: + if isinstance(result_content, dict): + if "url" in result_content: + urls.append(result_content["url"]) + extra_content.append(result_content) + web_search_result["extras"]["content"] = extra_content + if urls: + web_search_result["urls"] = urls + yield web_search_result + + elif block_type == "code_execution_tool_result": + code_interpreter_result: types.CodeInterpreterResult = { + "type": "code_interpreter_result", + "output": [], + } + if "tool_use_id" in block: + code_interpreter_result["id"] = block["tool_use_id"] + if "index" in block: + code_interpreter_result["index"] = block["index"] + + code_interpreter_output: types.CodeInterpreterOutput = { + "type": "code_interpreter_output" + } + + code_execution_content = block.get("content", {}) + if code_execution_content.get("type") == "code_execution_result": + if "return_code" in code_execution_content: + code_interpreter_output["return_code"] = code_execution_content[ + "return_code" + ] + if "stdout" in code_execution_content: + code_interpreter_output["stdout"] = code_execution_content[ + "stdout" + ] + if stderr := code_execution_content.get("stderr"): + code_interpreter_output["stderr"] = stderr + if ( + output := code_interpreter_output.get("content") + ) and isinstance(output, list): + if "extras" not in code_interpreter_result: + code_interpreter_result["extras"] = {} + code_interpreter_result["extras"]["content"] = output + for output_block in output: + if "file_id" in output_block: + if "file_ids" not in code_interpreter_output: + code_interpreter_output["file_ids"] = [] + code_interpreter_output["file_ids"].append( + output_block["file_id"] + ) + code_interpreter_result["output"].append(code_interpreter_output) + + elif ( + code_execution_content.get("type") + == "code_execution_tool_result_error" + ): + if "extras" not in code_interpreter_result: + code_interpreter_result["extras"] = {} + code_interpreter_result["extras"]["error_code"] = ( + code_execution_content.get("error_code") + ) + + yield code_interpreter_result + + else: + new_block = {"type": "non_standard", "value": block} + if "index" in new_block["value"]: + new_block["index"] = new_block["value"].pop("index") + yield new_block + + return list(_iter_blocks()) + + def translate_content(message: AIMessage) -> list[types.ContentBlock]: - """Derive standard content blocks from a message with Anthropic content.""" - raise NotImplementedError + """Derive standard content blocks from a message with OpenAI content.""" + return _convert_to_v1_from_anthropic(message) def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: - """Derive standard content blocks from a message chunk with Anthropic content.""" - raise NotImplementedError + """Derive standard content blocks from a message chunk with OpenAI content.""" + return _convert_to_v1_from_anthropic(message) def _register_anthropic_translator() -> None: diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py b/libs/core/langchain_core/messages/block_translators/bedrock.py similarity index 55% rename from libs/core/langchain_core/messages/block_translators/amazon/bedrock.py rename to libs/core/langchain_core/messages/block_translators/bedrock.py index 76467152b1028..796d45336b17f 100644 --- a/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py +++ b/libs/core/langchain_core/messages/block_translators/bedrock.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Amazon (Bedrock) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Bedrock content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Bedrock content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError @@ -21,9 +39,7 @@ def _register_bedrock_translator() -> None: """ from langchain_core.messages.block_translators import register_translator - register_translator( - "amazon_bedrock_chat", translate_content, translate_content_chunk - ) + register_translator("bedrock", translate_content, translate_content_chunk) _register_bedrock_translator() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py b/libs/core/langchain_core/messages/block_translators/bedrock_converse.py similarity index 54% rename from libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py rename to libs/core/langchain_core/messages/block_translators/bedrock_converse.py index 5882ef2583bc8..6249c9107a93c 100644 --- a/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py +++ b/libs/core/langchain_core/messages/block_translators/bedrock_converse.py @@ -1,16 +1,36 @@ """Derivations of standard content blocks from Amazon (Bedrock Converse) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Bedrock Converse content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock " + "Converse." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Bedrock Converse content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock " + "Converse." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError @@ -21,9 +41,7 @@ def _register_bedrock_converse_translator() -> None: """ from langchain_core.messages.block_translators import register_translator - register_translator( - "amazon_bedrock_converse_chat", translate_content, translate_content_chunk - ) + register_translator("bedrock_converse", translate_content, translate_content_chunk) _register_bedrock_converse_translator() diff --git a/libs/core/langchain_core/messages/block_translators/chroma.py b/libs/core/langchain_core/messages/block_translators/chroma.py deleted file mode 100644 index 652aa8d0e1b0c..0000000000000 --- a/libs/core/langchain_core/messages/block_translators/chroma.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Derivations of standard content blocks from Chroma content.""" - -from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content as types - - -def translate_content(message: AIMessage) -> list[types.ContentBlock]: - """Derive standard content blocks from a message with Chroma content.""" - raise NotImplementedError - - -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: - """Derive standard content blocks from a message chunk with Chroma content.""" - raise NotImplementedError - - -def _register_chroma_translator() -> None: - """Register the Chroma translator with the central registry. - - Run automatically when the module is imported. - """ - from langchain_core.messages.block_translators import register_translator - - register_translator("chroma", translate_content, translate_content_chunk) - - -_register_chroma_translator() diff --git a/libs/core/langchain_core/messages/block_translators/google/__init__.py b/libs/core/langchain_core/messages/block_translators/google/__init__.py deleted file mode 100644 index 0c3f0698aa2a5..0000000000000 --- a/libs/core/langchain_core/messages/block_translators/google/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Derivations of standard content blocks from Google content.""" diff --git a/libs/core/langchain_core/messages/block_translators/google/genai.py b/libs/core/langchain_core/messages/block_translators/google_genai.py similarity index 60% rename from libs/core/langchain_core/messages/block_translators/google/genai.py rename to libs/core/langchain_core/messages/block_translators/google_genai.py index b9761f94bc44a..bd4de65c3b0ce 100644 --- a/libs/core/langchain_core/messages/block_translators/google/genai.py +++ b/libs/core/langchain_core/messages/block_translators/google_genai.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Google (GenAI) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Google (GenAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google GenAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Google (GenAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google GenAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/google/vertexai.py b/libs/core/langchain_core/messages/block_translators/google_vertexai.py similarity index 59% rename from libs/core/langchain_core/messages/block_translators/google/vertexai.py rename to libs/core/langchain_core/messages/block_translators/google_vertexai.py index ae51fd4065d89..e49ee384058ee 100644 --- a/libs/core/langchain_core/messages/block_translators/google/vertexai.py +++ b/libs/core/langchain_core/messages/block_translators/google_vertexai.py @@ -1,16 +1,36 @@ """Derivations of standard content blocks from Google (VertexAI) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Google (VertexAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google " + "VertexAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Google (VertexAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google " + "VertexAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/groq.py b/libs/core/langchain_core/messages/block_translators/groq.py index 4b01dfb017f2f..6a96b1775f429 100644 --- a/libs/core/langchain_core/messages/block_translators/groq.py +++ b/libs/core/langchain_core/messages/block_translators/groq.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Groq content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Groq content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Groq." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message chunk with Groq content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Groq." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/langchain.py b/libs/core/langchain_core/messages/block_translators/langchain_v0.py similarity index 89% rename from libs/core/langchain_core/messages/block_translators/langchain.py rename to libs/core/langchain_core/messages/block_translators/langchain_v0.py index 4b5e4479835a3..5fde4c0fcb0d4 100644 --- a/libs/core/langchain_core/messages/block_translators/langchain.py +++ b/libs/core/langchain_core/messages/block_translators/langchain_v0.py @@ -1,4 +1,4 @@ -"""Derivations of standard content blocks from LangChain content.""" +"""Derivations of standard content blocks from LangChain v0 multimodal content.""" from typing import Any, Union, cast @@ -21,26 +21,20 @@ def _convert_v0_multimodal_input_to_v1( Updated list with v0 blocks converted to v1 format. """ converted_blocks = [] - for block in blocks: - if ( - isinstance(block, dict) - and block.get("type") == "non_standard" - and "value" in block - and isinstance(block["value"], dict) # type: ignore[typeddict-item] - ): - # We know this is a NonStandardContentBlock, so we can safely access value - value = cast("Any", block)["value"] - # Check if this looks like v0 format - if ( - value.get("type") in {"image", "audio", "file"} - and "source_type" in value - ): - converted_block = _convert_legacy_v0_content_block_to_v1(value) - converted_blocks.append(cast("types.ContentBlock", converted_block)) - else: - converted_blocks.append(block) + unpacked_blocks: list[dict[str, Any]] = [ + cast("dict[str, Any]", block) + if block.get("type") != "non_standard" + else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks + for block in blocks + ] + for block in unpacked_blocks: + if block.get("type") in {"image", "audio", "file"} and "source_type" in block: + converted_block = _convert_legacy_v0_content_block_to_v1(block) + converted_blocks.append(cast("types.ContentBlock", converted_block)) + elif block.get("type") in types.KNOWN_BLOCK_TYPES: + converted_blocks.append(cast("types.ContentBlock", block)) else: - converted_blocks.append(block) + converted_blocks.append({"type": "non_standard", "value": block}) return converted_blocks @@ -213,7 +207,7 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: return types.create_image_block( # Even though this is labeled as `url`, it can be base64-encoded - base64=block["image_url"]["url"], + base64=parsed["data"], mime_type=parsed["mime_type"], **all_extras, ) @@ -278,9 +272,7 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: ) # base64-style file block - if (block["type"] == "file") and ( - parsed := _parse_data_uri(block["file"]["file_data"]) - ): + if block["type"] == "file": known_keys = {"type", "file"} extras = _extract_extras(block, known_keys) @@ -291,11 +283,10 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: for key, value in file_extras.items(): all_extras[f"file_{key}"] = value - mime_type = parsed["mime_type"] filename = block["file"].get("filename") return types.create_file_block( base64=block["file"]["file_data"], - mime_type=mime_type, + mime_type="application/pdf", filename=filename, **all_extras, ) diff --git a/libs/core/langchain_core/messages/block_translators/ollama.py b/libs/core/langchain_core/messages/block_translators/ollama.py index a0f41ab76342d..736ecfe06513e 100644 --- a/libs/core/langchain_core/messages/block_translators/ollama.py +++ b/libs/core/langchain_core/messages/block_translators/ollama.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Ollama content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Ollama content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Ollama." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message chunk with Ollama content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Ollama." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 029757563b44d..b11e64558aad6 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -9,7 +9,7 @@ _is_openai_data_block, ) from langchain_core.messages import content as types -from langchain_core.messages.block_translators.langchain import ( +from langchain_core.messages.block_translators.langchain_v0 import ( _convert_openai_format_to_data_block, ) @@ -52,34 +52,31 @@ def _convert_to_v1_from_chat_completions_input( from langchain_core.messages import content as types converted_blocks = [] - for block in blocks: - if ( - isinstance(block, dict) - and block.get("type") == "non_standard" - and "value" in block - and isinstance(block["value"], dict) # type: ignore[typeddict-item] - ): - # We know this is a NonStandardContentBlock, so we can safely access value - value = cast("Any", block)["value"] - # Check if this looks like OpenAI format - if value.get("type") in { - "image_url", - "input_audio", - "file", - } and _is_openai_data_block(value): - converted_block = _convert_openai_format_to_data_block(value) - # If conversion succeeded, use it; otherwise keep as non_standard - if ( - isinstance(converted_block, dict) - and converted_block.get("type") in types.KNOWN_BLOCK_TYPES - ): - converted_blocks.append(cast("types.ContentBlock", converted_block)) - else: - converted_blocks.append(block) + unpacked_blocks: list[dict[str, Any]] = [ + cast("dict[str, Any]", block) + if block.get("type") != "non_standard" + else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks + for block in blocks + ] + for block in unpacked_blocks: + if block.get("type") in { + "image_url", + "input_audio", + "file", + } and _is_openai_data_block(block): + converted_block = _convert_openai_format_to_data_block(block) + # If conversion succeeded, use it; otherwise keep as non_standard + if ( + isinstance(converted_block, dict) + and converted_block.get("type") in types.KNOWN_BLOCK_TYPES + ): + converted_blocks.append(cast("types.ContentBlock", converted_block)) else: - converted_blocks.append(block) + converted_blocks.append({"type": "non_standard", "value": block}) + elif block.get("type") in types.KNOWN_BLOCK_TYPES: + converted_blocks.append(cast("types.ContentBlock", block)) else: - converted_blocks.append(block) + converted_blocks.append({"type": "non_standard", "value": block}) return converted_blocks diff --git a/libs/core/langchain_core/messages/content.py b/libs/core/langchain_core/messages/content.py index 83287fb06c8b8..845c3b481cecc 100644 --- a/libs/core/langchain_core/messages/content.py +++ b/libs/core/langchain_core/messages/content.py @@ -503,12 +503,6 @@ class CodeInterpreterOutput(TypedDict): file_ids: NotRequired[list[str]] """List of file IDs generated by the code interpreter.""" - index: NotRequired[Union[int, str]] - """Index of block in aggregate response. Used during streaming.""" - - extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" - class CodeInterpreterResult(TypedDict): """Result of a code interpreter tool call.""" @@ -886,7 +880,6 @@ class NonStandardContentBlock(TypedDict): ToolCall, ToolCallChunk, CodeInterpreterCall, - CodeInterpreterOutput, CodeInterpreterResult, WebSearchCall, WebSearchResult, @@ -918,7 +911,6 @@ class NonStandardContentBlock(TypedDict): "video", # Server-side tool calls "code_interpreter_call", - "code_interpreter_output", "code_interpreter_result", "web_search_call", "web_search_result", diff --git a/libs/core/langchain_core/utils/_merge.py b/libs/core/langchain_core/utils/_merge.py index c32b09e2e669c..7b8465e8d0256 100644 --- a/libs/core/langchain_core/utils/_merge.py +++ b/libs/core/langchain_core/utils/_merge.py @@ -116,11 +116,35 @@ def merge_lists(left: Optional[list], *others: Optional[list]) -> Optional[list] if to_merge: # TODO: Remove this once merge_dict is updated with special # handling for 'type'. - new_e = ( - {k: v for k, v in e.items() if k != "type"} - if "type" in e - else e - ) + if (left_type := merged[to_merge[0]].get("type")) and ( + e.get("type") == "non_standard" and "value" in e + ): + if left_type != "non_standard": + # standard + non_standard + new_e: dict[str, Any] = { + "extras": { + k: v + for k, v in e["value"].items() + if k != "type" + } + } + else: + # non_standard + non_standard + new_e = { + "value": { + k: v + for k, v in e["value"].items() + if k != "type" + } + } + if "index" in e: + new_e["index"] = e["index"] + else: + new_e = ( + {k: v for k, v in e.items() if k != "type"} + if "type" in e + else e + ) merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], new_e) else: merged.append(e) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 848fb75091a9f..22d8bc7907f5e 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -621,14 +621,14 @@ def test_extend_support_to_openai_multimodal_formats() -> None: "type": "input_audio", "input_audio": { "format": "wav", - "data": "data:audio/wav;base64,", + "data": "", }, }, { # file-base64 "type": "file", "file": { "filename": "draconomicon.pdf", - "file_data": "data:application/pdf;base64,", + "file_data": "", }, }, { # file-id @@ -643,12 +643,12 @@ def test_extend_support_to_openai_multimodal_formats() -> None: {"type": "text", "text": "Hello"}, # TextContentBlock { # AudioContentBlock "type": "audio", - "base64": "data:audio/wav;base64,", + "base64": "", "mime_type": "audio/wav", }, { # FileContentBlock "type": "file", - "base64": "data:application/pdf;base64,", + "base64": "", "mime_type": "application/pdf", "extras": {"filename": "draconomicon.pdf"}, }, diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py b/libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py new file mode 100644 index 0000000000000..e0f65657b99ca --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py @@ -0,0 +1,439 @@ +from typing import Optional + +from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage +from langchain_core.messages import content as types + + +def test_convert_to_v1_from_anthropic() -> None: + message = AIMessage( + [ + {"type": "thinking", "thinking": "foo", "signature": "foo_signature"}, + {"type": "text", "text": "Let's call a tool."}, + { + "type": "tool_use", + "id": "abc_123", + "name": "get_weather", + "input": {"location": "San Francisco"}, + }, + { + "type": "text", + "text": "It's sunny.", + "citations": [ + { + "type": "search_result_location", + "cited_text": "The weather is sunny.", + "source": "source_123", + "title": "Document Title", + "search_result_index": 1, + "start_block_index": 0, + "end_block_index": 2, + }, + {"bar": "baz"}, + ], + }, + { + "type": "server_tool_use", + "name": "web_search", + "input": {"query": "web search query"}, + "id": "srvtoolu_abc123", + }, + { + "type": "web_search_tool_result", + "tool_use_id": "srvtoolu_abc123", + "content": [ + { + "type": "web_search_result", + "title": "Page Title 1", + "url": "", + "page_age": "January 1, 2025", + "encrypted_content": "", + }, + { + "type": "web_search_result", + "title": "Page Title 2", + "url": "", + "page_age": "January 2, 2025", + "encrypted_content": "", + }, + ], + }, + { + "type": "server_tool_use", + "id": "srvtoolu_def456", + "name": "code_execution", + "input": {"code": "import numpy as np..."}, + }, + { + "type": "code_execution_tool_result", + "tool_use_id": "srvtoolu_def456", + "content": { + "type": "code_execution_result", + "stdout": "Mean: 5.5\nStandard deviation...", + "stderr": "", + "return_code": 0, + }, + }, + {"type": "something_else", "foo": "bar"}, + ], + response_metadata={"model_provider": "anthropic"}, + ) + expected_content: list[types.ContentBlock] = [ + { + "type": "reasoning", + "reasoning": "foo", + "extras": {"signature": "foo_signature"}, + }, + {"type": "text", "text": "Let's call a tool."}, + { + "type": "tool_call", + "id": "abc_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "text", + "text": "It's sunny.", + "annotations": [ + { + "type": "citation", + "title": "Document Title", + "cited_text": "The weather is sunny.", + "extras": { + "source": "source_123", + "search_result_index": 1, + "start_block_index": 0, + "end_block_index": 2, + }, + }, + {"type": "non_standard_annotation", "value": {"bar": "baz"}}, + ], + }, + { + "type": "web_search_call", + "id": "srvtoolu_abc123", + "query": "web search query", + }, + { + "type": "web_search_result", + "id": "srvtoolu_abc123", + "urls": ["", ""], + "extras": { + "content": [ + { + "type": "web_search_result", + "title": "Page Title 1", + "url": "", + "page_age": "January 1, 2025", + "encrypted_content": "", + }, + { + "type": "web_search_result", + "title": "Page Title 2", + "url": "", + "page_age": "January 2, 2025", + "encrypted_content": "", + }, + ] + }, + }, + { + "type": "code_interpreter_call", + "id": "srvtoolu_def456", + "code": "import numpy as np...", + }, + { + "type": "code_interpreter_result", + "id": "srvtoolu_def456", + "output": [ + { + "type": "code_interpreter_output", + "return_code": 0, + "stdout": "Mean: 5.5\nStandard deviation...", + } + ], + }, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ] + assert message.content_blocks == expected_content + + # Check no mutation + assert message.content != expected_content + + +def test_convert_to_v1_from_anthropic_chunk() -> None: + chunks = [ + AIMessageChunk( + content=[{"text": "Looking ", "type": "text", "index": 0}], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[{"text": "now.", "type": "text", "index": 0}], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + { + "type": "tool_use", + "name": "get_weather", + "input": {}, + "id": "toolu_abc123", + "index": 1, + } + ], + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": "get_weather", + "args": "", + "id": "toolu_abc123", + "index": 1, + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[{"type": "input_json_delta", "partial_json": "", "index": 1}], + tool_call_chunks=[ + { + "name": None, + "args": "", + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + {"type": "input_json_delta", "partial_json": '{"loca', "index": 1} + ], + tool_call_chunks=[ + { + "name": None, + "args": '{"loca', + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + {"type": "input_json_delta", "partial_json": 'tion": "San ', "index": 1} + ], + tool_call_chunks=[ + { + "name": None, + "args": 'tion": "San ', + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + {"type": "input_json_delta", "partial_json": 'Francisco"}', "index": 1} + ], + tool_call_chunks=[ + { + "name": None, + "args": 'Francisco"}', + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + ] + expected_contents: list[types.ContentBlock] = [ + {"type": "text", "text": "Looking ", "index": 0}, + {"type": "text", "text": "now.", "index": 0}, + { + "type": "tool_call_chunk", + "name": "get_weather", + "args": "", + "id": "toolu_abc123", + "index": 1, + }, + {"name": None, "args": "", "id": None, "index": 1, "type": "tool_call_chunk"}, + { + "name": None, + "args": '{"loca', + "id": None, + "index": 1, + "type": "tool_call_chunk", + }, + { + "name": None, + "args": 'tion": "San ', + "id": None, + "index": 1, + "type": "tool_call_chunk", + }, + { + "name": None, + "args": 'Francisco"}', + "id": None, + "index": 1, + "type": "tool_call_chunk", + }, + ] + for chunk, expected in zip(chunks, expected_contents): + assert chunk.content_blocks == [expected] + + full: Optional[AIMessageChunk] = None + for chunk in chunks: + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + + expected_content = [ + {"type": "text", "text": "Looking now.", "index": 0}, + { + "type": "tool_use", + "name": "get_weather", + "partial_json": '{"location": "San Francisco"}', + "input": {}, + "id": "toolu_abc123", + "index": 1, + }, + ] + assert full.content == expected_content + + expected_content_blocks = [ + {"type": "text", "text": "Looking now.", "index": 0}, + { + "type": "tool_call_chunk", + "name": "get_weather", + "args": '{"location": "San Francisco"}', + "id": "toolu_abc123", + "index": 1, + }, + ] + assert full.content_blocks == expected_content_blocks + + +def test_convert_to_v1_from_anthropic_input() -> None: + message = HumanMessage( + [ + {"type": "text", "text": "foo"}, + { + "type": "document", + "source": { + "type": "base64", + "data": "", + "media_type": "application/pdf", + }, + }, + { + "type": "document", + "source": { + "type": "url", + "url": "", + }, + }, + { + "type": "document", + "source": { + "type": "content", + "content": [ + {"type": "text", "text": "The grass is green"}, + {"type": "text", "text": "The sky is blue"}, + ], + }, + "citations": {"enabled": True}, + }, + { + "type": "document", + "source": { + "type": "text", + "data": "", + "media_type": "text/plain", + }, + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "", + }, + }, + { + "type": "image", + "source": { + "type": "url", + "url": "", + }, + }, + { + "type": "image", + "source": { + "type": "file", + "file_id": "", + }, + }, + { + "type": "document", + "source": {"type": "file", "file_id": ""}, + }, + ] + ) + + expected: list[types.ContentBlock] = [ + {"type": "text", "text": "foo"}, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + }, + { + "type": "file", + "url": "", + }, + { + "type": "non_standard", + "value": { + "type": "document", + "source": { + "type": "content", + "content": [ + {"type": "text", "text": "The grass is green"}, + {"type": "text", "text": "The sky is blue"}, + ], + }, + "citations": {"enabled": True}, + }, + }, + { + "type": "text-plain", + "text": "", + "mime_type": "text/plain", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/jpeg", + }, + { + "type": "image", + "url": "", + }, + { + "type": "image", + "id": "", + }, + { + "type": "file", + "id": "", + }, + ] + + assert message.content_blocks == expected diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py b/libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py new file mode 100644 index 0000000000000..c586f134075de --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py @@ -0,0 +1,79 @@ +from langchain_core.messages import HumanMessage +from langchain_core.messages import content as types +from tests.unit_tests.language_models.chat_models.test_base import ( + _content_blocks_equal_ignore_id, +) + + +def test_convert_to_v1_from_openai_input() -> None: + message = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { + "type": "image", + "source_type": "url", + "url": "https://example.com/image.png", + }, + { + "type": "image", + "source_type": "base64", + "data": "", + "mime_type": "image/png", + }, + { + "type": "file", + "source_type": "url", + "url": "", + }, + { + "type": "file", + "source_type": "base64", + "data": "", + "mime_type": "application/pdf", + }, + { + "type": "audio", + "source_type": "base64", + "data": "", + "mime_type": "audio/mpeg", + }, + { + "type": "file", + "source_type": "id", + "id": "", + }, + ] + ) + + expected: list[types.ContentBlock] = [ + {"type": "text", "text": "Hello"}, + { + "type": "image", + "url": "https://example.com/image.png", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/png", + }, + { + "type": "file", + "url": "", + }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/mpeg", + }, + { + "type": "file", + "file_id": "", + }, + ] + + assert _content_blocks_equal_ignore_id(message.content_blocks, expected) diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 3602d9eb08d03..2ed2086ea4443 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -1,41 +1,12 @@ from typing import Optional -from langchain_core.language_models.fake_chat_models import ParrotFakeChatModel -from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage from langchain_core.messages import content as types from tests.unit_tests.language_models.chat_models.test_base import ( _content_blocks_equal_ignore_id, ) -def test_v0_to_v1_content_blocks() -> None: - llm = ParrotFakeChatModel() - messages = [ - { - "role": "user", - # v0 format - "content": [ - { - "type": "image", - "source_type": "url", - "url": "https://example.com/image.png", - } - ], - } - ] - response = llm.invoke(messages) - assert len(response.content_blocks) == 1 - expected_content_blocks = [ - { - "type": "image", - "url": "https://example.com/image.png", - } - ] - assert _content_blocks_equal_ignore_id( - response.content_blocks, expected_content_blocks - ) - - def test_convert_to_v1_from_responses() -> None: message = AIMessage( [ @@ -261,3 +232,64 @@ def test_convert_to_v1_from_responses_chunk() -> None: }, ] assert full.content_blocks == expected_content_blocks + + +def test_convert_to_v1_from_openai_input() -> None: + message = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + }, + { + "type": "image_url", + "image_url": {"url": "..."}, + }, + { + "type": "input_audio", + "input_audio": { + "format": "wav", + "data": "", + }, + }, + { + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": "", + }, + }, + { + "type": "file", + "file": {"file_id": ""}, + }, + ] + ) + + expected: list[types.ContentBlock] = [ + {"type": "text", "text": "Hello"}, + { + "type": "image", + "url": "https://example.com/image.png", + }, + { + "type": "image", + "base64": "/9j/4AAQSkZJRg...", + "mime_type": "image/jpeg", + }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/wav", + }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "extras": {"filename": "draconomicon.pdf"}, + }, + {"type": "file", "file_id": ""}, + ] + + assert _content_blocks_equal_ignore_id(message.content_blocks, expected) diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_registration.py b/libs/core/tests/unit_tests/messages/block_translators/test_registration.py new file mode 100644 index 0000000000000..74c16d30a248a --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_registration.py @@ -0,0 +1,29 @@ +import pkgutil +from pathlib import Path + +import pytest + +from langchain_core.messages.block_translators import PROVIDER_TRANSLATORS + + +def test_all_providers_registered() -> None: + """Test that all block translators implemented in langchain-core are registered. + + If this test fails, it is likely that a block translator is implemented but not + registered on import. Check that the provider is included in + ``langchain_core.messages.block_translators.__init__._register_translators``. + """ + package_path = ( + Path(__file__).parents[4] / "langchain_core" / "messages" / "block_translators" + ) + + for module_info in pkgutil.iter_modules([str(package_path)]): + module_name = module_info.name + + # Skip the __init__ module, any private modules, and ``langchain_v0``, which is + # only used to parse v0 multimodal inputs. + if module_name.startswith("_") or module_name == "langchain_v0": + continue + + if module_name not in PROVIDER_TRANSLATORS: + pytest.fail(f"Block translator not registered: {module_name}") diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index 67b0a2dc9680c..4f623c0910c87 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -1,3 +1,7 @@ +from typing import Union, cast + +import pytest + from langchain_core.load import dumpd, load from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types @@ -310,3 +314,92 @@ def test_content_blocks() -> None: } ] assert message.content == "" + + # Non-standard + standard_content_1: list[types.ContentBlock] = [ + {"type": "non_standard", "index": 0, "value": {"foo": "bar "}} + ] + standard_content_2: list[types.ContentBlock] = [ + {"type": "non_standard", "index": 0, "value": {"foo": "baz"}} + ] + chunk_1 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_1) + ) + chunk_2 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_2) + ) + merged_chunk = chunk_1 + chunk_2 + assert merged_chunk.content == [ + {"type": "non_standard", "index": 0, "value": {"foo": "bar baz"}}, + ] + + # Test non-standard + non-standard + chunk_1 = AIMessageChunk( + content=[ + { + "type": "non_standard", + "index": 0, + "value": {"type": "non_standard_tool", "foo": "bar"}, + } + ] + ) + chunk_2 = AIMessageChunk( + content=[ + { + "type": "non_standard", + "index": 0, + "value": {"type": "input_json_delta", "partial_json": "a"}, + } + ] + ) + chunk_3 = AIMessageChunk( + content=[ + { + "type": "non_standard", + "index": 0, + "value": {"type": "input_json_delta", "partial_json": "b"}, + } + ] + ) + merged_chunk = chunk_1 + chunk_2 + chunk_3 + assert merged_chunk.content == [ + { + "type": "non_standard", + "index": 0, + "value": {"type": "non_standard_tool", "foo": "bar", "partial_json": "ab"}, + } + ] + + # Test standard + non-standard with same index + standard_content_1 = [ + {"type": "web_search_call", "id": "ws_123", "query": "web query", "index": 0} + ] + standard_content_2 = [{"type": "non_standard", "value": {"foo": "bar"}, "index": 0}] + chunk_1 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_1) + ) + chunk_2 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_2) + ) + merged_chunk = chunk_1 + chunk_2 + assert merged_chunk.content == [ + { + "type": "web_search_call", + "id": "ws_123", + "query": "web query", + "index": 0, + "extras": {"foo": "bar"}, + } + ] + + +def test_provider_warns() -> None: + # Test that major providers warn if content block standardization is not yet + # implemented. + # This test should be removed when all major providers support content block + # standardization. + message = AIMessage("Hello.", response_metadata={"model_provider": "groq"}) + with pytest.warns(match="not yet fully supported for Groq"): + content_blocks = message.content_blocks + + assert content_blocks == [{"type": "text", "text": "Hello."}] diff --git a/libs/partners/anthropic/langchain_anthropic/_compat.py b/libs/partners/anthropic/langchain_anthropic/_compat.py new file mode 100644 index 0000000000000..3b90416232406 --- /dev/null +++ b/libs/partners/anthropic/langchain_anthropic/_compat.py @@ -0,0 +1,245 @@ +from __future__ import annotations + +import json +from typing import Any, Optional, cast + +from langchain_core.messages import content as types + + +def _convert_annotation_from_v1(annotation: types.Annotation) -> dict[str, Any]: + """Right-inverse of _convert_citation_to_v1.""" + if annotation["type"] == "non_standard_annotation": + return annotation["value"] + + if annotation["type"] == "citation": + if "url" in annotation: + # web_search_result_location + out: dict[str, Any] = {} + if cited_text := annotation.get("cited_text"): + out["cited_text"] = cited_text + if "encrypted_index" in annotation.get("extras", {}): + out["encrypted_index"] = annotation["extras"]["encrypted_index"] + if "title" in annotation: + out["title"] = annotation["title"] + out["type"] = "web_search_result_location" + if "url" in annotation: + out["url"] = annotation["url"] + + for key, value in annotation.get("extras", {}).items(): + if key not in out: + out[key] = value + + return out + + if "start_char_index" in annotation.get("extras", {}): + # char_location + out = {"type": "char_location"} + for field in ["cited_text"]: + if value := annotation.get(field): + out[field] = value + if title := annotation.get("title"): + out["document_title"] = title + + for key, value in annotation.get("extras", {}).items(): + out[key] = value + + return out + + if "search_result_index" in annotation.get("extras", {}): + # search_result_location + out = {"type": "search_result_location"} + for field in ["cited_text", "title"]: + if value := annotation.get(field): + out[field] = value + + for key, value in annotation.get("extras", {}).items(): + out[key] = value + + return out + + if "start_block_index" in annotation.get("extras", {}): + # content_block_location + out = {} + if cited_text := annotation.get("cited_text"): + out["cited_text"] = cited_text + if "document_index" in annotation.get("extras", {}): + out["document_index"] = annotation["extras"]["document_index"] + if "title" in annotation: + out["document_title"] = annotation["title"] + + for key, value in annotation.get("extras", {}).items(): + if key not in out: + out[key] = value + + out["type"] = "content_block_location" + return out + + if "start_page_number" in annotation.get("extras", {}): + # page_location + out = {"type": "page_location"} + for field in ["cited_text"]: + if value := annotation.get(field): + out[field] = value + if title := annotation.get("title"): + out["document_title"] = title + + for key, value in annotation.get("extras", {}).items(): + out[key] = value + + return out + + return cast(dict[str, Any], annotation) + + return cast(dict[str, Any], annotation) + + +def _convert_from_v1_to_anthropic( + content: list[types.ContentBlock], + tool_calls: list[types.ToolCall], + model_provider: Optional[str], +) -> list[dict[str, Any]]: + new_content: list = [] + for block in content: + if block["type"] == "text": + if model_provider == "anthropic" and "annotations" in block: + new_block: dict[str, Any] = {"type": "text"} + new_block["citations"] = [ + _convert_annotation_from_v1(a) for a in block["annotations"] + ] + if "text" in block: + new_block["text"] = block["text"] + else: + new_block = {"text": block.get("text", ""), "type": "text"} + new_content.append(new_block) + + elif block["type"] == "tool_call": + new_content.append( + { + "type": "tool_use", + "name": block.get("name", ""), + "input": block.get("args", {}), + "id": block.get("id", ""), + } + ) + + elif block["type"] == "tool_call_chunk": + if isinstance(block["args"], str): + try: + input_ = json.loads(block["args"] or "{}") + except json.JSONDecodeError: + input_ = {} + else: + input_ = block.get("args") or {} + new_content.append( + { + "type": "tool_use", + "name": block.get("name", ""), + "input": input_, + "id": block.get("id", ""), + } + ) + + elif block["type"] == "reasoning" and model_provider == "anthropic": + new_block = {} + if "reasoning" in block: + new_block["thinking"] = block["reasoning"] + new_block["type"] = "thinking" + if signature := block.get("extras", {}).get("signature"): + new_block["signature"] = signature + + new_content.append(new_block) + + elif block["type"] == "web_search_call" and model_provider == "anthropic": + new_block = {} + if "id" in block: + new_block["id"] = block["id"] + + if (query := block.get("query")) and "input" not in block: + new_block["input"] = {"query": query} + elif input_ := block.get("extras", {}).get("input"): + new_block["input"] = input_ + elif partial_json := block.get("extras", {}).get("partial_json"): + new_block["input"] = {} + new_block["partial_json"] = partial_json + else: + pass + new_block["name"] = "web_search" + new_block["type"] = "server_tool_use" + new_content.append(new_block) + + elif block["type"] == "web_search_result" and model_provider == "anthropic": + new_block = {} + if "content" in block.get("extras", {}): + new_block["content"] = block["extras"]["content"] + if "id" in block: + new_block["tool_use_id"] = block["id"] + new_block["type"] = "web_search_tool_result" + new_content.append(new_block) + + elif block["type"] == "code_interpreter_call" and model_provider == "anthropic": + new_block = {} + if "id" in block: + new_block["id"] = block["id"] + if (code := block.get("code")) and "input" not in block: + new_block["input"] = {"code": code} + elif input_ := block.get("extras", {}).get("input"): + new_block["input"] = input_ + elif partial_json := block.get("extras", {}).get("partial_json"): + new_block["input"] = {} + new_block["partial_json"] = partial_json + else: + pass + new_block["name"] = "code_execution" + new_block["type"] = "server_tool_use" + new_content.append(new_block) + + elif ( + block["type"] == "code_interpreter_result" and model_provider == "anthropic" + ): + new_block = {} + if (output := block.get("output", [])) and len(output) == 1: + code_interpreter_output = output[0] + code_execution_content = {} + if "content" in block.get("extras", {}): + code_execution_content["content"] = block["extras"]["content"] + elif (file_ids := block.get("file_ids")) and isinstance(file_ids, list): + code_execution_content["content"] = [ + {"file_id": file_id, "type": "code_execution_output"} + for file_id in file_ids + ] + else: + code_execution_content["content"] = [] + if "return_code" in code_interpreter_output: + code_execution_content["return_code"] = code_interpreter_output[ + "return_code" + ] + code_execution_content["stderr"] = code_interpreter_output.get( + "stderr", "" + ) + if "stdout" in code_interpreter_output: + code_execution_content["stdout"] = code_interpreter_output["stdout"] + code_execution_content["type"] = "code_execution_result" + new_block["content"] = code_execution_content + elif "error_code" in block.get("extras", {}): + code_execution_content = { + "error_code": block["extras"]["error_code"], + "type": "code_execution_tool_result_error", + } + new_block["content"] = code_execution_content + else: + pass + if "id" in block: + new_block["tool_use_id"] = block["id"] + new_block["type"] = "code_execution_tool_result" + new_content.append(new_block) + + elif ( + block["type"] == "non_standard" + and "value" in block + and model_provider == "anthropic" + ): + new_content.append(block["value"]) + else: + new_content.append(block) + + return new_content diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 641b630ddb35c..b038ba09ff5d6 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -33,6 +33,7 @@ ToolMessage, is_data_content_block, ) +from langchain_core.messages import content as types from langchain_core.messages.ai import InputTokenDetails, UsageMetadata from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.output_parsers import JsonOutputKeyToolsParser, PydanticToolsParser @@ -51,6 +52,7 @@ _get_default_async_httpx_client, _get_default_httpx_client, ) +from langchain_anthropic._compat import _convert_from_v1_to_anthropic from langchain_anthropic.output_parsers import extract_tool_calls _message_type_lookups = { @@ -212,7 +214,7 @@ def _merge_messages( def _format_data_content_block(block: dict) -> dict: """Format standard data content block to format expected by Anthropic.""" if block["type"] == "image": - if block["source_type"] == "url": + if "url" in block: if block["url"].startswith("data:"): # Data URI formatted_block = { @@ -224,16 +226,24 @@ def _format_data_content_block(block: dict) -> dict: "type": "image", "source": {"type": "url", "url": block["url"]}, } - elif block["source_type"] == "base64": + elif "base64" in block or block.get("source_type") == "base64": formatted_block = { "type": "image", "source": { "type": "base64", "media_type": block["mime_type"], - "data": block["data"], + "data": block.get("base64") or block.get("data", ""), }, } - elif block["source_type"] == "id": + elif "file_id" in block: + formatted_block = { + "type": "image", + "source": { + "type": "file", + "file_id": block["file_id"], + }, + } + elif block.get("source_type") == "id": formatted_block = { "type": "image", "source": { @@ -243,7 +253,7 @@ def _format_data_content_block(block: dict) -> dict: } else: msg = ( - "Anthropic only supports 'url' and 'base64' source_type for image " + "Anthropic only supports 'url', 'base64', or 'id' keys for image " "content blocks." ) raise ValueError( @@ -251,7 +261,7 @@ def _format_data_content_block(block: dict) -> dict: ) elif block["type"] == "file": - if block["source_type"] == "url": + if "url" in block: formatted_block = { "type": "document", "source": { @@ -259,16 +269,16 @@ def _format_data_content_block(block: dict) -> dict: "url": block["url"], }, } - elif block["source_type"] == "base64": + elif "base64" in block or block.get("source_type") == "base64": formatted_block = { "type": "document", "source": { "type": "base64", "media_type": block.get("mime_type") or "application/pdf", - "data": block["data"], + "data": block.get("base64") or block.get("data", ""), }, } - elif block["source_type"] == "text": + elif block.get("source_type") == "text": formatted_block = { "type": "document", "source": { @@ -277,7 +287,15 @@ def _format_data_content_block(block: dict) -> dict: "data": block["text"], }, } - elif block["source_type"] == "id": + elif "file_id" in block: + formatted_block = { + "type": "document", + "source": { + "type": "file", + "file_id": block["file_id"], + }, + } + elif block.get("source_type") == "id": formatted_block = { "type": "document", "source": { @@ -285,6 +303,22 @@ def _format_data_content_block(block: dict) -> dict: "file_id": block["id"], }, } + else: + msg = ( + "Anthropic only supports 'url', 'base64', or 'id' keys for file " + "content blocks." + ) + raise ValueError(msg) + + elif block["type"] == "text-plain": + formatted_block = { + "type": "document", + "source": { + "type": "text", + "media_type": block.get("mime_type") or "text/plain", + "data": block["text"], + }, + } else: msg = f"Block of type {block['type']} is not supported." @@ -294,7 +328,10 @@ def _format_data_content_block(block: dict) -> dict: for key in ["cache_control", "citations", "title", "context"]: if key in block: formatted_block[key] = block[key] + elif (metadata := block.get("extras")) and key in metadata: + formatted_block[key] = metadata[key] elif (metadata := block.get("metadata")) and key in metadata: + # Backward compat formatted_block[key] = metadata[key] return formatted_block @@ -741,13 +778,11 @@ class Joke(BaseModel): }, { "type": "image", - "source_type": "base64", - "data": image_data, + "base64": image_data, "mime_type": "image/jpeg", }, { "type": "image", - "source_type": "url", "url": image_url, }, ], @@ -781,7 +816,6 @@ class Joke(BaseModel): }, { "type": "image", - "source_type": "id", "id": "file_abc123...", }, ], @@ -810,9 +844,8 @@ class Joke(BaseModel): "Summarize this document.", { "type": "file", - "source_type": "base64", "mime_type": "application/pdf", - "data": data, + "base64": data, }, ] ) @@ -846,7 +879,6 @@ class Joke(BaseModel): }, { "type": "file", - "source_type": "id", "id": "file_abc123...", }, ], @@ -1462,6 +1494,23 @@ def _get_request_payload( **kwargs: dict, ) -> dict: messages = self._convert_input(input_).to_messages() + + for idx, message in enumerate(messages): + # Translate v1 content + if ( + isinstance(message, AIMessage) + and message.response_metadata.get("output_version") == "v1" + ): + messages[idx] = message.model_copy( + update={ + "content": _convert_from_v1_to_anthropic( + cast(list[types.ContentBlock], message.content), + message.tool_calls, + message.response_metadata.get("model_provider"), + ) + } + ) + system, formatted_messages = _format_messages(messages) # If cache_control is provided in kwargs, add it to last message @@ -1626,6 +1675,7 @@ def _format_output(self, data: Any, **kwargs: Any) -> ChatResult: llm_output = { k: v for k, v in data_dict.items() if k not in ("content", "role", "type") } + response_metadata = {"model_provider": "anthropic"} if "model" in llm_output and "model_name" not in llm_output: llm_output["model_name"] = llm_output["model"] if ( @@ -1633,15 +1683,18 @@ def _format_output(self, data: Any, **kwargs: Any) -> ChatResult: and content[0]["type"] == "text" and not content[0].get("citations") ): - msg = AIMessage(content=content[0]["text"]) + msg = AIMessage( + content=content[0]["text"], response_metadata=response_metadata + ) elif any(block["type"] == "tool_use" for block in content): tool_calls = extract_tool_calls(content) msg = AIMessage( content=content, tool_calls=tool_calls, + response_metadata=response_metadata, ) else: - msg = AIMessage(content=content) + msg = AIMessage(content=content, response_metadata=response_metadata) msg.usage_metadata = _create_usage_metadata(data.usage) return ChatResult( generations=[ChatGeneration(message=msg)], @@ -2363,7 +2416,7 @@ def _make_message_chunk_from_anthropic_event( elif event.type == "message_delta" and stream_usage: usage_metadata = _create_usage_metadata(event.usage) message_chunk = AIMessageChunk( - content="", + content="" if coerce_content_to_string else [], usage_metadata=usage_metadata, response_metadata={ "stop_reason": event.delta.stop_reason, @@ -2375,6 +2428,8 @@ def _make_message_chunk_from_anthropic_event( else: pass + if message_chunk: + message_chunk.response_metadata["model_provider"] = "anthropic" return message_chunk, block_start_event diff --git a/libs/partners/anthropic/tests/cassettes/test_agent_loop.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_agent_loop.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..d53dffb02da7f2011eb20a1282a0470f96fe817b GIT binary patch literal 2028 zcmV7^UY;X<+e9@aN z8*GqluP=aI=f|Ix41`I@ZO+SXRq~WX-K|!)++Vkf=CbP<3<&HeX$B9I}Y`j;6%-XM4fxi^~y@eaX`%+OXl`| zpx#aA-;<>+lm->INViL7%#-#pHyAn=xt~ueb0%hP7T!${R8kX3?HD@X1mc}T&KuB>5` zJdm#W-{FYtx}%!{rldyASXs_?-H|c1_n|`?mRhOkIcWxZ(1nkU*)*xIu7z<%1Us zNvn+^YCNaE_Mt`vO;>&*M-lH6qcl>hk2`O=jZ&dxn?|lm*rpJ&EjJiwiio8J^RV_D zkuM_27F@a8^$@E(NL4W$7~jCiS$%_e5;KO-=$L9XH|R;E^`!5mF*H@St3k=;xMMJ~ zmy^4p2*#-I#)d!K`%}kskd&v3*0S+v>=~&T9^=`0*c}7B>>%MuN*(HU&AefmeXRS| zHjws9DD`dV?1iVY#)b&o;8>olux!^sx~Z8E_l$?ZUiyp=Nk^>E{=S>c?HcPSkA`tJ z*K*YpQR>rhZff=plT~+ZSbVUk0G-BL3^74eE5VrJx?;%gVjx*0U>;BtuxM}4Ola@t zi^|uv+?N+splgsMlY893g~tjoN2)&z+K&T=hag3O#m75ou-Mjk#FAyMfuWT*;|bcq zW#1upEclMya)X?qbief^fUTp{9aB$Nqzy-qVTzNb3kgfd1iN+)aon)G`hy-3%Ff`U z^46m{@qqLUw$K{1o($m_9l)H7N64M7n8(M9%Y)8 zx{#2PHL?|k*XDR|NCm&*)QT~kGLSw)*g)%9x8BXAk}!D0%nQq$8*|2uO9FR(b$MYH z6;O~aF3k%+Eti0j;Srgaq}-mB1zA=0=Kf`YN1A5E%1@KlXUFIz;TzHgxJ(k7RJ|n4 zYhYrwiBbRt``6E@77>NA`qDw(p_q literal 0 HcmV?d00001 diff --git a/libs/partners/anthropic/tests/cassettes/test_agent_loop_streaming.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_agent_loop_streaming.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e76e86628fef510449e73e3c8cdaef7343961f1 GIT binary patch literal 3179 zcmV-x43zU9iwFRS&ZKAp|Lt2@Z{o-nexF~Fc^auDrfn0_O{94U1RK-Ax!B5~w;p`*#*6w*JkmBuyUwXx! zg)=jqY7uacfO{?`C{m7Nb~}-gcLt;kjEBVsLWxdsAc?ooUHm&?$sG?9MAVHYpKDSpU<0xZL~~W9NlhMjY8Q03cX?pK z_xLipr&EJ}Rf!|m0Ey3U2!dX6O$Pl*?F=2_fpxYI91ESMD$$@bLWP6G)Bpe9K4y_{d0?z?YOI_q&8zyZ-1!>2Qa@&FUQCv1=F zY|^tg-#)A_PKVEQU+R&sD$U2Qle8~APu3$+4TZo=d7ldb3QSD!z*@j1q;Gc#5ZBI3 zM$8a5$YspFx7@f4MzE>ji)&L_5!JIBqre4r?h%9L&^DT4rU&y39onfuoNCet>rAKG zi79?x4kxBEp0ws7le7fFRJbwsEc(um!pbD0P&U?&LAE_`0RR{FLG|ebhykdO2(?n3 z;yoQb+~)v;#kndF>zDB))fhU`nv+y#=ncj~G+?$7ECOPs;Ddw_h#rDI#8DX~x=*KA zWZ(1vgLmi#cL7e9rp!-ENCbx^9Uug{?Bsg016=apMneqPI@6wkl_Q_$05U9^Vsu>VlZe3;$Tz5Mgkq_b* zM~5PGDn51(MGo2i1m{n1{siY6z&W;(C=3#7*VN!=Bx#JI;}Ek^E>aPmJs=1IpmYEf zrU)mEVDv+Ugr$r_yF?URl1t*+py)sq+iR1dbI7)+Gr@ODl7KTP%M?jAH7FC{px3ej zb!{{|nilk$LSKED^to{8bHa!iyx;`AO)MoV6Sp9Vng&Svm;(iB2>??$l*ifJ$9Cg&Nl=s~sNRlXyUGD^V(44h@p1i3r~-9ieU4cDB2V2YkW{+=q$fw#%1 zt!bIZD^=4H)h8vF5@dF`@~H90@mw?gnSTTysSp%ux@>cAxCM|9#>| z*B2>NkQF3f-Ojc~lPO$`#9#sfNDy702hiv}u>A^@`P@Pe0DuF2s`#kuOG`hlI1yB7 zyk~Vwy%ionw=u|R`L1={lrrKyc5fhS^^q^LP+M-eEl8G3*K&)>fj;k?%WdgI(Bi{! z_mJE2^L_kAobl7jok}4)Xcr`Rs_wMAcI25p2>l$|ZxleHNLmrfC<0UQ4oLN_Qek2V zOD<~!Ey@xo1Z?nxC)}~)&v8|&Uzz1)gxC$|Z|Tj+CORtIPeB`kzOxvgr+S7ZHjViAGGvXUbxpnp1 z2M%=11gCUgki66cr*vO(dBg<9G98s+z5e^ z_Pu<1SPWhW3U&FkAbBk)6!|n)Wi&Gf9e-RLM`JshKn35m-0raFCJ3u?Tk*>SmE)m= zCACQwP2ebpc+(@wDxa9K2I_OA@-%R5vZu`+yd_g2~op`Og5G4gZ7Zi8{Hh~ zy^L|xJJvf**--zB`tm9X0}UV;&(VwTaMlNxKXjrC0k+PRrK`UOSHh7>uF$PaMQhad zzbyur$eA<=dT)H>9tr+U?jjC{5E&TcDL zx66*ry5r1ZZ*{E7+ncPTu`zJrRg1okgJ$cBw3_1Y(0s#3n<%zY)pb5-^eEDONpdRR z9yO)uwq@4sQB#IXB?TWuO}T2@nd8j(-wt(dC|3_8DJ_16KyM{s?GZs6kkATA&c$&I zZ5QBDX&$4yA0Y>N+ir1BTmE1cELMfA23#>!MKjAHii*viDRW-29$Of6dyU=}0TyPJ zDXSiciH>HGfrvT6F7MaOE7wC3O;d|8>I@BcabM}RaaimEvNEhi#S7=47hW)T5`Sz+ zyMWIQKQBAop-k9E^>?CR*`KeY3Z=d#DstuFzN||(_$TGDZvW8L499A8!yt`Aul~et z>viO7yv7=%HCEk$XDzM&La^e671w?>jUht|!*{PY$#6YOeS1 zUe5O6{Y&_ci#P8sWO;k-a&9YEFJbg5&0ek2cXeq@8wL*JB-PR^sXbFjdd&CHCeVOK zp}32}IBg`Bk;QJB*PdRu+GG7P(+U=0-pC@LXK6lAi5IS6;mu~vje_4|9d5!Air7@E z#pd$y?)Aqnwy~sNm7tTW%|TD{oQWo@sp@$XYeGM$f#>|@8_UtUezo}9Z7kW#Hm*;b-*vLyMFww1sBHbX?O*VT0HA7#>6dGpxIYW4uOCJ*<9{(FsXzq7NrQJ2JQ z=sH2ahI>hWTn6No%aFSMxYA~yT81c1F*lIOm{n^xME(Ew#8(YY*QV6UEaBc3%56vY zO$mPOn4+rxV}f6>RXfywxwtFL0H8oZP_FCd*yl>HeJkPmt=gdk2kXUCj-)eQtO^|m zNpOnexATP`w{pBq8<*_?71Ut@q^Z!MvJCH7!fu1PogrTDG{**hH#kknxh5T%z<-=& z)7v)?K8~vcP!N{TgRC<=DaEhtM0l6rPquqisnvpy5Ompad=0T&`=ZDPSt$3-g()qM z<2cx3wqt^h#e?AAMp99m+f$8VK R^!FH;{tqr?1U$k%004{e5`h2! literal 0 HcmV?d00001 diff --git a/libs/partners/anthropic/tests/cassettes/test_citations.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_citations.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..c704ad451e382cecff40e25c5a842eb552961b45 GIT binary patch literal 3388 zcmV-C4a4#uiwFP^uB2!J|Lt2@Z{tQ1e$TIPo&pS@3`yDE$Uq(}*`%nk=!lYdz44Mn ziPwtML1BOVRyWx^BnMI68bl#wD{U7PIU3iRTQcn${s0N@jHZ;R%-H$#Bgg8x8?4L| z)`l+8f<`xiv7ylQF3%cNljt21sZtpHKSv#*oB(kiqhSRJ>>v*TG{y*&K&IUK?N zu9nWr!z_-&=;ix&2=MPYmVY+`@B5nm?txZtPIOO_KYNmdU71ZW(PTCz)=dU~-V^6N zao!W>J#qd&B~Gj~F_4eUT=S}XX_3he>~C*RQmT)}{aTx*3sf)&D- zHV{2Vmql0Wv=;^=(1r|)-$K)Hw=3)t5|GU5I{zU#tWafKN>#)VqP0Vj0!!z*0?OG| zDhL)!+|1=UD?ygyEyK?md=x1ukReXeBrHEIG-c|1&R@at;#zG=F@k~86d?%61>wq) zw-G=Df*?*HP-~TmC&jvV@He7`Ltm|g_iTq=ya(!e)h(FPXhLCXEdwjiRdxlXr` z{LJFqwc&vOhXqPf*tnI5h43%@EV)Sdqgx7qvzbDW7JQ0;s{i~HDJih`;j)J}6jkJB zO%^Ao@ybW3g=N20w_QXW0tA7==h*P2$ml>Yz}kIxr3&W(N(4KwDk=FXgaEiUSkoVF zYR+*H93-Ej6HyQL! zqyO0S$02LGLPu@7fiVuYBUjyhjxBZ|yQ09TPPraHa@?5=>!WPS`@MCh(nzy6unQrC zzGRlsl(YRzfBu>XZ||G_yhuoyO2Tf=P=*fFO;oI*8ntnvEU~F)g?o(oPOp`zL}{?5 zBwtwUNa>LlP;Fh}%QN(9uc#C?p;N8y@CmByOYey`xJ_QGkCf>nHJ(k&GE@WN%TiH5 z(LBpN{Nq%ux^dHRsA(pimk6j1&Zirfc~M?!ik%><(=wi^O!?YnmHb>0C%!yV%MnIR z{|GCdY16}?0W#6rC{4jN z72r;uP-b>?b^sC84h_Feqa7Md>>|HsDnI9AM!^1L=Q<}%RKQ~fd$ie&rHJKb2KjBS z%0i$?Ei_bEAfK9D2@y+j#&t%*gbP9UQ0*gbS2U#^XaH$of0442;Bri3Ta4j%jq*hWM(!)Mf5Y!3K`} zF)r^TRNm_Z-t@Ts4~F&<>PGD!jh4C}O`zhx6J}Ey0ks0e#bU)!Scwy{L4^VZ??L+_Ik7T z*I#4zGP~Kj(&T*E_12b_USsG@p1s+nXMJf-8m4Z}Y(%l*RrGQJU9S_#oN?@UAQEMQ zHRx!D!7^?{wh7cMPHLssj{Z{518sjDB#l)F`fS|;Ez7lI+L#p9tjM;M?ZE;OhDzx{ zM*3bJK7U+B_B2JqREXyw%{GOZ#z>R2`VD?SZlu&vFfEt*oZv~Vw4uc(jXER-n>5mZ){Bf}Dzwt>vc|o(6(&UFeti7dzdbZe>XjEOzy`|U^ zE05INTZ%2IfJfBaQfvv0$~TI~N3Q8>p$b3(vgeu(;+8mE3!thC-lto9=XF~CcQOcm z5FirH!yOJnPT{hKo9Fl>D!mF1JfNHq)!I>AF_P+~mUpQinaYkQF-XOy)2?B#V0~Ad zbv(p+-=FzzbaKo?K*UKAh$E7>*A1kLR99;IvzvY3N!Wn!pBAlY{VXLI9aaG7)UD6wy})izL0u zvKrvP=eCqONL3LXnabn+O6ofVn7t`%`uiGiEqpecu06+P>j~I~)#-Z~E%|{w-RYlb z@eghc2#B2X-TtL*J~=&=6Hwpn_m!fIodQ+PHgY##TE0p?t$=EgM;!n0^T8m7EiJw1 zhVOMsaCt$7N#WkwoC6oxc==54osf%aQZt=!q0eP1FM)J+j;l4PeBZa1FOwQSR)#iw zC7J#ABA(nFarl{ISBF*eZy_+w^#n%ipC06w#|p3sKd?2)O131rKM@MaeR7 z?{_TsJC^$$%TIa7^0Qx{9KM!`cKS$o(z#hM@Fm(FS)TpQ?$o*ZXiEJ5_G_8Grl7}q z`6GEFj!B|JPI`a>CcUra6Zz$SYE+aEsO^x7jQ8Mlps>SjO%`MqfrR5t_4 z!*ehSy6DA6-S8!E%C@Jk9?9Q4AoVGT7Yi4=_3@V;$c|7-C z+U!#bGN&G7gjQbhvgq8CA~GJj;$_h}fbGr2E_qpW?nzNGTs`#^_$qJ}O<66*E&x}u z4=&Q19KqNv;39nbo%L1OU4bQ_%FUjtA>eF*vh(RX@@_i;+=V7HJ^<8Eeng)@=_J2* z1^lJzD&nA)O<6M;*V+3CPqLJb%;lk z97xVZehUeB%r3NuGMm71Xmfhtzbt)pCG5MZ#gCQ8b_Fi8d;hOLwmY~MS9kp{KDHxP ze&POq|3aUr{KEZzu|s|%6mKg`MoKKtox=ve None: @@ -65,6 +66,9 @@ def test_stream() -> None: assert chunks_with_model_name == 1 # check token usage is populated assert isinstance(full, AIMessageChunk) + assert len(full.content_blocks) == 1 + assert full.content_blocks[0]["type"] == "text" + assert full.content_blocks[0]["text"] assert full.usage_metadata is not None assert full.usage_metadata["input_tokens"] > 0 assert full.usage_metadata["output_tokens"] > 0 @@ -105,6 +109,9 @@ async def test_astream() -> None: ) # check token usage is populated assert isinstance(full, AIMessageChunk) + assert len(full.content_blocks) == 1 + assert full.content_blocks[0]["type"] == "text" + assert full.content_blocks[0]["text"] assert full.usage_metadata is not None assert full.usage_metadata["input_tokens"] > 0 assert full.usage_metadata["output_tokens"] > 0 @@ -421,6 +428,14 @@ def test_tool_use() -> None: assert isinstance(tool_call["args"], dict) assert "location" in tool_call["args"] + content_blocks = response.content_blocks + assert len(content_blocks) == 2 + assert content_blocks[0]["type"] == "text" + assert content_blocks[0]["text"] + assert content_blocks[1]["type"] == "tool_call" + assert content_blocks[1]["name"] == "get_weather" + assert content_blocks[1]["args"] == tool_call["args"] + # Test streaming llm = ChatAnthropic( model="claude-3-7-sonnet-20250219", # type: ignore[call-arg] @@ -440,6 +455,8 @@ def test_tool_use() -> None: first = False else: gathered = gathered + chunk # type: ignore[assignment] + for block in chunk.content_blocks: + assert block["type"] in ("text", "tool_call_chunk") assert len(chunks) > 1 assert isinstance(gathered.content, list) assert len(gathered.content) == 2 @@ -461,6 +478,14 @@ def test_tool_use() -> None: assert "location" in tool_call["args"] assert tool_call["id"] is not None + content_blocks = gathered.content_blocks + assert len(content_blocks) == 2 + assert content_blocks[0]["type"] == "text" + assert content_blocks[0]["text"] + assert content_blocks[1]["type"] == "tool_call_chunk" + assert content_blocks[1]["name"] == "get_weather" + assert content_blocks[1]["args"] + # Testing token-efficient tools # https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use assert gathered.usage_metadata @@ -500,6 +525,13 @@ def test_builtin_tools() -> None: assert isinstance(response, AIMessage) assert response.tool_calls + content_blocks = response.content_blocks + assert len(content_blocks) == 2 + assert content_blocks[0]["type"] == "text" + assert content_blocks[0]["text"] + assert content_blocks[1]["type"] == "tool_call" + assert content_blocks[1]["name"] == "str_replace_editor" + class GenerateUsername(BaseModel): """Get a username based on someone's name and hair color.""" @@ -682,8 +714,74 @@ def test_pdf_document_input() -> None: assert len(result.content) > 0 -def test_citations() -> None: - llm = ChatAnthropic(model="claude-3-5-haiku-latest") # type: ignore[call-arg] +@pytest.mark.default_cassette("test_agent_loop.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_agent_loop(output_version: Literal["v0", "v1"]) -> None: + @tool + def get_weather(location: str) -> str: + """Get the weather for a location.""" + return "It's sunny." + + llm = ChatAnthropic(model="claude-3-5-haiku-latest", output_version=output_version) # type: ignore[call-arg] + llm_with_tools = llm.bind_tools([get_weather]) + input_message = HumanMessage("What is the weather in San Francisco, CA?") + tool_call_message = llm_with_tools.invoke([input_message]) + assert isinstance(tool_call_message, AIMessage) + tool_calls = tool_call_message.tool_calls + assert len(tool_calls) == 1 + tool_call = tool_calls[0] + tool_message = get_weather.invoke(tool_call) + assert isinstance(tool_message, ToolMessage) + response = llm_with_tools.invoke( + [ + input_message, + tool_call_message, + tool_message, + ] + ) + assert isinstance(response, AIMessage) + + +@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None: + @tool + def get_weather(location: str) -> str: + """Get the weather for a location.""" + return "It's sunny." + + llm = ChatAnthropic( + model="claude-3-5-haiku-latest", + streaming=True, + output_version=output_version, # type: ignore[call-arg] + ) + llm_with_tools = llm.bind_tools([get_weather]) + input_message = HumanMessage("What is the weather in San Francisco, CA?") + tool_call_message = llm_with_tools.invoke([input_message]) + assert isinstance(tool_call_message, AIMessage) + + tool_calls = tool_call_message.tool_calls + assert len(tool_calls) == 1 + tool_call = tool_calls[0] + tool_message = get_weather.invoke(tool_call) + assert isinstance(tool_message, ToolMessage) + response = llm_with_tools.invoke( + [ + input_message, + tool_call_message, + tool_message, + ] + ) + assert isinstance(response, AIMessage) + + +@pytest.mark.default_cassette("test_citations.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_citations(output_version: Literal["v0", "v1"]) -> None: + llm = ChatAnthropic(model="claude-3-5-haiku-latest", output_version=output_version) # type: ignore[call-arg] messages = [ { "role": "user", @@ -706,7 +804,10 @@ def test_citations() -> None: response = llm.invoke(messages) assert isinstance(response, AIMessage) assert isinstance(response.content, list) - assert any("citations" in block for block in response.content) + if output_version == "v1": + assert any("annotations" in block for block in response.content) + else: + assert any("citations" in block for block in response.content) # Test streaming full: Optional[BaseMessageChunk] = None @@ -714,8 +815,11 @@ def test_citations() -> None: full = cast(BaseMessageChunk, chunk) if full is None else full + chunk assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) - assert any("citations" in block for block in full.content) assert not any("citation" in block for block in full.content) + if output_version == "v1": + assert any("annotations" in block for block in full.content) + else: + assert any("citations" in block for block in full.content) # Test pass back in next_message = { @@ -762,25 +866,77 @@ def test_thinking() -> None: _ = llm.invoke([input_message, full, next_message]) +@pytest.mark.default_cassette("test_thinking.yaml.gz") @pytest.mark.vcr -def test_redacted_thinking() -> None: +def test_thinking_v1() -> None: llm = ChatAnthropic( model="claude-3-7-sonnet-latest", # type: ignore[call-arg] max_tokens=5_000, # type: ignore[call-arg] thinking={"type": "enabled", "budget_tokens": 2_000}, + output_version="v1", + ) + + input_message = {"role": "user", "content": "Hello"} + response = llm.invoke([input_message]) + assert any("reasoning" in block for block in response.content) + for block in response.content: + assert isinstance(block, dict) + if block["type"] == "reasoning": + assert set(block.keys()) == {"type", "reasoning", "extras"} + assert block["reasoning"] and isinstance(block["reasoning"], str) + signature = block["extras"]["signature"] + assert signature and isinstance(signature, str) + + # Test streaming + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream([input_message]): + full = cast(BaseMessageChunk, chunk) if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert isinstance(full.content, list) + assert any("reasoning" in block for block in full.content) + for block in full.content: + assert isinstance(block, dict) + if block["type"] == "reasoning": + assert set(block.keys()) == {"type", "reasoning", "extras", "index"} + assert block["reasoning"] and isinstance(block["reasoning"], str) + signature = block["extras"]["signature"] + assert signature and isinstance(signature, str) + + # Test pass back in + next_message = {"role": "user", "content": "How are you?"} + _ = llm.invoke([input_message, full, next_message]) + + +@pytest.mark.default_cassette("test_redacted_thinking.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_redacted_thinking(output_version: Literal["v0", "v1"]) -> None: + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", # type: ignore[call-arg] + max_tokens=5_000, # type: ignore[call-arg] + thinking={"type": "enabled", "budget_tokens": 2_000}, + output_version=output_version, ) query = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB" # noqa: E501 input_message = {"role": "user", "content": query} response = llm.invoke([input_message]) - has_reasoning = False + value = None for block in response.content: assert isinstance(block, dict) if block["type"] == "redacted_thinking": - has_reasoning = True - assert set(block.keys()) == {"type", "data"} - assert block["data"] and isinstance(block["data"], str) - assert has_reasoning + value = block + elif ( + block["type"] == "non_standard" + and block["value"]["type"] == "redacted_thinking" + ): + value = block["value"] + else: + pass + if value: + assert set(value.keys()) == {"type", "data"} + assert value["data"] and isinstance(value["data"], str) + assert value is not None # Test streaming full: Optional[BaseMessageChunk] = None @@ -788,14 +944,25 @@ def test_redacted_thinking() -> None: full = cast(BaseMessageChunk, chunk) if full is None else full + chunk assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) - stream_has_reasoning = False + value = None for block in full.content: assert isinstance(block, dict) if block["type"] == "redacted_thinking": - stream_has_reasoning = True - assert set(block.keys()) == {"type", "data", "index"} - assert block["data"] and isinstance(block["data"], str) - assert stream_has_reasoning + value = block + assert set(value.keys()) == {"type", "data", "index"} + assert "index" in block + elif ( + block["type"] == "non_standard" + and block["value"]["type"] == "redacted_thinking" + ): + value = block["value"] + assert set(value.keys()) == {"type", "data"} + assert "index" in block + else: + pass + if value: + assert value["data"] and isinstance(value["data"], str) + assert value is not None # Test pass back in next_message = {"role": "user", "content": "What?"} @@ -899,11 +1066,14 @@ class color_picker(BaseModel): llm.bind_tools([color_picker]).invoke(messages) +@pytest.mark.default_cassette("test_web_search.yaml.gz") @pytest.mark.vcr -def test_web_search() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_web_search(output_version: Literal["v0", "v1"]) -> None: llm = ChatAnthropic( model="claude-3-5-sonnet-latest", # type: ignore[call-arg] max_tokens=1024, + output_version=output_version, ) tool = {"type": "web_search_20250305", "name": "web_search", "max_uses": 1} @@ -921,7 +1091,10 @@ def test_web_search() -> None: response = llm_with_tools.invoke([input_message]) assert all(isinstance(block, dict) for block in response.content) block_types = {block["type"] for block in response.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + else: + assert block_types == {"text", "web_search_call", "web_search_result"} # Test streaming full: Optional[BaseMessageChunk] = None @@ -931,7 +1104,10 @@ def test_web_search() -> None: assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) block_types = {block["type"] for block in full.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + else: + assert block_types == {"text", "web_search_call", "web_search_result"} # Test we can pass back in next_message = { @@ -943,12 +1119,15 @@ def test_web_search() -> None: ) +@pytest.mark.default_cassette("test_code_execution.yaml.gz") @pytest.mark.vcr -def test_code_execution() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_code_execution(output_version: Literal["v0", "v1"]) -> None: llm = ChatAnthropic( model="claude-sonnet-4-20250514", # type: ignore[call-arg] betas=["code-execution-2025-05-22"], max_tokens=10_000, # type: ignore[call-arg] + output_version=output_version, ) tool = {"type": "code_execution_20250522", "name": "code_execution"} @@ -969,7 +1148,14 @@ def test_code_execution() -> None: response = llm_with_tools.invoke([input_message]) assert all(isinstance(block, dict) for block in response.content) block_types = {block["type"] for block in response.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + else: + assert block_types == { + "text", + "code_interpreter_call", + "code_interpreter_result", + } # Test streaming full: Optional[BaseMessageChunk] = None @@ -979,7 +1165,14 @@ def test_code_execution() -> None: assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) block_types = {block["type"] for block in full.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + else: + assert block_types == { + "text", + "code_interpreter_call", + "code_interpreter_result", + } # Test we can pass back in next_message = { @@ -991,8 +1184,10 @@ def test_code_execution() -> None: ) +@pytest.mark.default_cassette("test_remote_mcp.yaml.gz") @pytest.mark.vcr -def test_remote_mcp() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_remote_mcp(output_version: Literal["v0", "v1"]) -> None: mcp_servers = [ { "type": "url", @@ -1008,6 +1203,7 @@ def test_remote_mcp() -> None: betas=["mcp-client-2025-04-04"], mcp_servers=mcp_servers, max_tokens=10_000, # type: ignore[call-arg] + output_version=output_version, ) input_message = { @@ -1025,7 +1221,10 @@ def test_remote_mcp() -> None: response = llm.invoke([input_message]) assert all(isinstance(block, dict) for block in response.content) block_types = {block["type"] for block in response.content} # type: ignore[index] - assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + if output_version == "v0": + assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + else: + assert block_types == {"text", "non_standard"} # Test streaming full: Optional[BaseMessageChunk] = None @@ -1036,7 +1235,10 @@ def test_remote_mcp() -> None: assert isinstance(full.content, list) assert all(isinstance(block, dict) for block in full.content) block_types = {block["type"] for block in full.content} # type: ignore[index] - assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + if output_version == "v0": + assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + else: + assert block_types == {"text", "non_standard"} # Test we can pass back in next_message = { @@ -1069,8 +1271,7 @@ def test_files_api_image(block_format: str) -> None: # standard block format block = { "type": "image", - "source_type": "id", - "id": image_file_id, + "file_id": image_file_id, } input_message = { "role": "user", @@ -1097,8 +1298,7 @@ def test_files_api_pdf(block_format: str) -> None: # standard block format block = { "type": "file", - "source_type": "id", - "id": pdf_file_id, + "file_id": pdf_file_id, } input_message = { "role": "user", @@ -1163,6 +1363,11 @@ def retrieval_tool(query: str) -> list[dict]: assert isinstance(result.content, list) assert any("citations" in block for block in result.content) + assert ( + _convert_from_v1_to_anthropic(result.content_blocks, [], "anthropic") + == result.content + ) + def test_search_result_top_level() -> None: llm = ChatAnthropic( @@ -1209,6 +1414,11 @@ def test_search_result_top_level() -> None: assert isinstance(result.content, list) assert any("citations" in block for block in result.content) + assert ( + _convert_from_v1_to_anthropic(result.content_blocks, [], "anthropic") + == result.content + ) + def test_async_shared_client() -> None: llm = ChatAnthropic(model="claude-3-5-haiku-latest") # type: ignore[call-arg] diff --git a/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr b/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr index b831aef469b44..5c9164caae365 100644 --- a/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr +++ b/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr @@ -20,6 +20,7 @@ 'max_retries': 2, 'max_tokens': 100, 'model': 'claude-3-haiku-20240307', + 'output_version': 'v0', 'stop_sequences': list([ ]), 'stream_usage': True, diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index 382d2f774c5dc..3cf2b0e44ee27 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -211,6 +211,7 @@ def test__format_output() -> None: "total_tokens": 3, "input_token_details": {}, }, + response_metadata={"model_provider": "anthropic"}, ) llm = ChatAnthropic(model="test", anthropic_api_key="test") # type: ignore[call-arg, call-arg] actual = llm._format_output(anthropic_msg) @@ -241,6 +242,7 @@ def test__format_output_cached() -> None: "total_tokens": 10, "input_token_details": {"cache_creation": 3, "cache_read": 4}, }, + response_metadata={"model_provider": "anthropic"}, ) llm = ChatAnthropic(model="test", anthropic_api_key="test") # type: ignore[call-arg, call-arg] @@ -849,7 +851,7 @@ def test__format_messages_with_cache_control() -> None: assert expected_system == actual_system assert expected_messages == actual_messages - # Test standard multi-modal format + # Test standard multi-modal format (v0) messages = [ HumanMessage( [ @@ -891,6 +893,183 @@ def test__format_messages_with_cache_control() -> None: ] assert actual_messages == expected_messages + # Test standard multi-modal format (v1) + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "file", + "mime_type": "application/pdf", + "base64": "", + "extras": {"cache_control": {"type": "ephemeral"}}, + }, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "", + }, + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + ] + assert actual_messages == expected_messages + + # Test standard multi-modal format (v1, unpacked extras) + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "file", + "mime_type": "application/pdf", + "base64": "", + "cache_control": {"type": "ephemeral"}, + }, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "", + }, + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + ] + assert actual_messages == expected_messages + + # Also test file inputs + ## Images + for block in [ + # v1 + { + "type": "image", + "file_id": "abc123", + }, + # v0 + { + "type": "image", + "source_type": "id", + "id": "abc123", + }, + ]: + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this image:", + }, + block, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this image:", + }, + { + "type": "image", + "source": { + "type": "file", + "file_id": "abc123", + }, + }, + ], + }, + ] + assert actual_messages == expected_messages + + ## Documents + for block in [ + # v1 + { + "type": "file", + "file_id": "abc123", + }, + # v0 + { + "type": "file", + "source_type": "id", + "id": "abc123", + }, + ]: + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + block, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "document", + "source": { + "type": "file", + "file_id": "abc123", + }, + }, + ], + }, + ] + assert actual_messages == expected_messages + def test__format_messages_with_citations() -> None: input_messages = [ From 62d746e63096480e802208468d9c40ce00af1ea0 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 25 Aug 2025 10:37:41 -0300 Subject: [PATCH 45/56] feat(core): (v1) restore separate type for AIMessage.tool_calls (#32668) --- libs/core/langchain_core/messages/ai.py | 4 +- .../messages/block_translators/anthropic.py | 7 +- .../messages/block_translators/openai.py | 16 +- libs/core/langchain_core/messages/tool.py | 32 ++- .../core/tests/unit_tests/messages/test_ai.py | 10 + .../prompts/__snapshots__/test_chat.ambr | 50 +---- .../runnables/__snapshots__/test_graph.ambr | 25 +-- .../__snapshots__/test_runnable.ambr | 200 +----------------- .../langchain_anthropic/chat_models.py | 11 +- .../langchain_openai/chat_models/base.py | 14 +- .../tests/unit_tests/chat_models/test_base.py | 14 +- 11 files changed, 106 insertions(+), 277 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 31be4dbca4e0a..cc0e532f7c897 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -255,9 +255,9 @@ def content_blocks(self) -> list[types.ContentBlock]: "args": tool_call["args"], } if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] + tool_call_block["index"] = tool_call["index"] # type: ignore[typeddict-item] if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] + tool_call_block["extras"] = tool_call["extras"] # type: ignore[typeddict-item] blocks.append(tool_call_block) return blocks diff --git a/libs/core/langchain_core/messages/block_translators/anthropic.py b/libs/core/langchain_core/messages/block_translators/anthropic.py index 8f0b3919fa452..655cfaa2857ef 100644 --- a/libs/core/langchain_core/messages/block_translators/anthropic.py +++ b/libs/core/langchain_core/messages/block_translators/anthropic.py @@ -237,7 +237,12 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: not isinstance(message, AIMessageChunk) and len(message.tool_calls) == 1 ): - tool_call_block = message.tool_calls[0] + tool_call_block: types.ToolCall = { + "type": "tool_call", + "name": message.tool_calls[0]["name"], + "args": message.tool_calls[0]["args"], + "id": message.tool_calls[0].get("id"), + } if "index" in block: tool_call_block["index"] = block["index"] yield tool_call_block diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index b11e64558aad6..a9e5db98d9d54 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -30,7 +30,14 @@ def _convert_to_v1_from_chat_completions( content_blocks = [] for tool_call in message.tool_calls: - content_blocks.append(tool_call) + content_blocks.append( + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + ) return content_blocks @@ -287,7 +294,12 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: elif call_id: for tool_call in message.tool_calls or []: if tool_call.get("id") == call_id: - tool_call_block = tool_call.copy() + tool_call_block = { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } break else: for invalid_tool_call in message.invalid_tool_calls or []: diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index fab0315de63ca..f55be142672b7 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -10,7 +10,6 @@ from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.content import InvalidToolCall as InvalidToolCall -from langchain_core.messages.content import ToolCall as ToolCall from langchain_core.utils._merge import merge_dicts, merge_obj @@ -199,6 +198,37 @@ def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore[override] return super().__add__(other) +class ToolCall(TypedDict): + """Represents a request to call a tool. + + Example: + + .. code-block:: python + + { + "name": "foo", + "args": {"a": 1}, + "id": "123" + } + + This represents a request to call the tool named "foo" with arguments {"a": 1} + and an identifier of "123". + + """ + + name: str + """The name of the tool to be called.""" + args: dict[str, Any] + """The arguments to the tool call.""" + id: Optional[str] + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + """ + type: NotRequired[Literal["tool_call"]] + + def tool_call( *, name: str, diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index 4f623c0910c87..a0edf0b5714bc 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -203,6 +203,16 @@ def test_add_ai_message_chunks_usage() -> None: ) +def test_init_tool_calls() -> None: + # Test we add "type" key on init + msg = AIMessage("", tool_calls=[{"name": "foo", "args": {"a": "b"}, "id": "abc"}]) + assert len(msg.tool_calls) == 1 + assert msg.tool_calls[0]["type"] == "tool_call" + + # Test we can assign without adding type key + msg.tool_calls = [{"name": "bar", "args": {"c": "d"}, "id": "def"}] + + def test_content_blocks() -> None: message = AIMessage( "", diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 1ff3d7aec133f..7b28b1523c10d 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -1014,23 +1014,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1042,17 +1031,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1064,10 +1042,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -2485,23 +2462,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -2513,17 +2479,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -2535,10 +2490,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 4f6c54a28d452..3d9e96918bfc5 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1417,23 +1417,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1445,17 +1434,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1467,10 +1445,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 04bab565b38b3..ab07b2c2c0991 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2959,23 +2959,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -2987,17 +2976,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -3008,10 +2986,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -4493,23 +4470,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -4521,17 +4487,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -4542,10 +4497,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -6039,23 +5993,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -6067,17 +6010,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -6088,10 +6020,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -7441,23 +7372,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -7469,17 +7389,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -7490,10 +7399,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -9017,23 +8925,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -9045,17 +8942,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -9066,10 +8952,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -10464,23 +10349,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -10492,17 +10366,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -10513,10 +10376,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -11959,23 +11821,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -11987,17 +11838,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -12008,10 +11848,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -13455,23 +13294,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -13483,17 +13311,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -13504,10 +13321,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index b038ba09ff5d6..18651fb5f2c03 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -1501,11 +1501,20 @@ def _get_request_payload( isinstance(message, AIMessage) and message.response_metadata.get("output_version") == "v1" ): + tcs: list[types.ToolCall] = [ + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + for tool_call in message.tool_calls + ] messages[idx] = message.model_copy( update={ "content": _convert_from_v1_to_anthropic( cast(list[types.ContentBlock], message.content), - message.tool_calls, + tcs, message.response_metadata.get("model_provider"), ) } diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 4db93be82b214..47ef23801f9d9 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -64,6 +64,7 @@ convert_to_openai_data_block, is_data_content_block, ) +from langchain_core.messages import content as types from langchain_core.messages.ai import ( InputTokenDetails, OutputTokenDetails, @@ -3748,9 +3749,16 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: if isinstance(msg.get("content"), list) and all( isinstance(block, dict) for block in msg["content"] ): - msg["content"] = _convert_from_v1_to_responses( - msg["content"], lc_msg.tool_calls - ) + tcs: list[types.ToolCall] = [ + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + for tool_call in lc_msg.tool_calls + ] + msg["content"] = _convert_from_v1_to_responses(msg["content"], tcs) else: msg = _convert_message_to_dict(lc_msg) # Get content from non-standard content blocks diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index c68c86cef705b..083f06cc3d1b7 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -20,6 +20,7 @@ ToolCall, ToolMessage, ) +from langchain_core.messages import content as types from langchain_core.messages.ai import UsageMetadata from langchain_core.outputs import ChatGeneration, ChatResult from langchain_core.runnables import RunnableLambda @@ -2575,9 +2576,16 @@ def test_convert_from_v1_to_chat_completions( def test_convert_from_v1_to_responses( message_v1: AIMessage, expected: list[dict[str, Any]] ) -> None: - result = _convert_from_v1_to_responses( - message_v1.content_blocks, message_v1.tool_calls - ) + tcs: list[types.ToolCall] = [ + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + for tool_call in message_v1.tool_calls + ] + result = _convert_from_v1_to_responses(message_v1.content_blocks, tcs) assert result == expected # Check no mutation From 4e0fd330aa8462284e35a4c457814461a42be61f Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 25 Aug 2025 14:10:21 -0400 Subject: [PATCH 46/56] fix: update `content_blocks` property docstring --- libs/core/langchain_core/messages/base.py | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 89008c8c42998..68d81fcb8b6dd 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -108,19 +108,29 @@ def get_lc_namespace(cls) -> list[str]: @property def content_blocks(self) -> list[types.ContentBlock]: - """Return the content as a list of standard ``ContentBlock``s. + r"""Return ``content`` as a list of standardized :class:`~langchain_core.messages.content.ContentBlock`\s. - To use this property, the corresponding chat model must support - ``message_version='v1'`` or higher: + .. important:: - .. code-block:: python + To use this property correctly, the corresponding ``ChatModel`` must support + ``message_version='v1'`` or higher (and it must be set): - from langchain.chat_models import init_chat_model - llm = init_chat_model("...", message_version="v1") + .. code-block:: python - Otherwise, does best-effort parsing to standard types. + from langchain.chat_models import init_chat_model + llm = init_chat_model("...", message_version="v1") - """ + # or + + from langchain-openai import ChatOpenAI + llm = ChatOpenAI(model="gpt-4o", message_version="v1") + + Otherwise, the property will perform best-effort parsing to standard types, + though some content may be misinterpreted. + + .. versionadded:: 1.0.0 + + """ # noqa: E501 from langchain_core.messages import content as types from langchain_core.messages.block_translators.anthropic import ( _convert_to_v1_from_anthropic_input, From 93e89cf972611f4e44cbb06d7a4aaf774539219f Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 13:58:11 -0400 Subject: [PATCH 47/56] . --- docs/docs/how_to/index.mdx | 4 ++-- .../language_models/chat_models.py | 3 ++- libs/core/langchain_core/messages/ai.py | 3 --- .../langchain_core/outputs/chat_generation.py | 22 ++++++++++--------- libs/core/langchain_core/prompt_values.py | 8 +++++-- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/docs/docs/how_to/index.mdx b/docs/docs/how_to/index.mdx index dad7eab3603af..5706bf002ecba 100644 --- a/docs/docs/how_to/index.mdx +++ b/docs/docs/how_to/index.mdx @@ -72,7 +72,7 @@ See [supported integrations](/docs/integrations/chat/) for details on getting st ### Example selectors -[Example Selectors](/docs/concepts/example_selectors) are responsible for selecting the correct few shot examples to pass to the prompt. +[Example Selectors](/docs/concepts/example_selectors) are responsible for selecting the correct few-shot examples to pass to the prompt. - [How to: use example selectors](/docs/how_to/example_selectors) - [How to: select examples by length](/docs/how_to/example_selectors_length_based) @@ -168,7 +168,7 @@ See [supported integrations](/docs/integrations/vectorstores/) for details on ge Indexing is the process of keeping your vectorstore in-sync with the underlying data source. -- [How to: reindex data to keep your vectorstore in sync with the underlying data source](/docs/how_to/indexing) +- [How to: reindex data to keep your vectorstore in-sync with the underlying data source](/docs/how_to/indexing) ### Tools diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 10c7de268c078..db486eb2c594e 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -508,7 +508,7 @@ def stream( **kwargs: Any, ) -> Iterator[AIMessageChunk]: if not self._should_stream(async_api=False, **{**kwargs, "stream": True}): - # model doesn't implement streaming, so use default implementation + # Model doesn't implement streaming, so use default implementation yield cast( "AIMessageChunk", self.invoke(input, config=config, stop=stop, **kwargs), @@ -1284,6 +1284,7 @@ def _stream( run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: + # We expect that subclasses implement this method if they support streaming. raise NotImplementedError async def _astream( diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index cc0e532f7c897..74e726b4e1edb 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -39,7 +39,6 @@ class InputTokenDetails(TypedDict, total=False): Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -75,7 +74,6 @@ class OutputTokenDetails(TypedDict, total=False): Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -103,7 +101,6 @@ class UsageMetadata(TypedDict): This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { diff --git a/libs/core/langchain_core/outputs/chat_generation.py b/libs/core/langchain_core/outputs/chat_generation.py index d42f2038d346f..156544db05c57 100644 --- a/libs/core/langchain_core/outputs/chat_generation.py +++ b/libs/core/langchain_core/outputs/chat_generation.py @@ -15,14 +15,14 @@ class ChatGeneration(Generation): """A single chat generation output. - A subclass of Generation that represents the response from a chat model + A subclass of ``Generation`` that represents the response from a chat model that generates chat messages. - The `message` attribute is a structured representation of the chat message. - Most of the time, the message will be of type `AIMessage`. + The ``message`` attribute is a structured representation of the chat message. + Most of the time, the message will be of type ``AIMessage``. Users working with chat models will usually access information via either - `AIMessage` (returned from runnable interfaces) or `LLMResult` (available + ``AIMessage`` (returned from runnable interfaces) or ``LLMResult`` (available via callbacks). """ @@ -31,6 +31,7 @@ class ChatGeneration(Generation): .. warning:: SHOULD NOT BE SET DIRECTLY! + """ message: BaseMessage """The message output by the chat model.""" @@ -50,6 +51,7 @@ def set_text(self) -> Self: Raises: ValueError: If the message is not a string or a list. + """ text = "" if isinstance(self.message.content, str): @@ -69,9 +71,9 @@ def set_text(self) -> Self: class ChatGenerationChunk(ChatGeneration): - """ChatGeneration chunk. + """``ChatGeneration`` chunk. - ChatGeneration chunks can be concatenated with other ChatGeneration chunks. + ``ChatGeneration`` chunks can be concatenated with other ``ChatGeneration`` chunks. """ message: BaseMessageChunk @@ -83,11 +85,11 @@ class ChatGenerationChunk(ChatGeneration): def __add__( self, other: Union[ChatGenerationChunk, list[ChatGenerationChunk]] ) -> ChatGenerationChunk: - """Concatenate two ChatGenerationChunks. + """Concatenate two ``ChatGenerationChunks``. Args: - other: The other ChatGenerationChunk or list of ChatGenerationChunks to - concatenate. + other: The other ``ChatGenerationChunk`` or list of ``ChatGenerationChunk``s + to concatenate. """ if isinstance(other, ChatGenerationChunk): generation_info = merge_dicts( @@ -116,7 +118,7 @@ def __add__( def merge_chat_generation_chunks( chunks: list[ChatGenerationChunk], ) -> Union[ChatGenerationChunk, None]: - """Merge a list of ChatGenerationChunks into a single ChatGenerationChunk.""" + """Merge list of ``ChatGenerationChunk``s into a single ``ChatGenerationChunk``.""" if not chunks: return None diff --git a/libs/core/langchain_core/prompt_values.py b/libs/core/langchain_core/prompt_values.py index 5f5dd7eb6b21b..01827efcd8f70 100644 --- a/libs/core/langchain_core/prompt_values.py +++ b/libs/core/langchain_core/prompt_values.py @@ -107,8 +107,12 @@ class ImageURL(TypedDict, total=False): """Image URL.""" detail: Literal["auto", "low", "high"] - """Specifies the detail level of the image. Defaults to "auto". - Can be "auto", "low", or "high".""" + """Specifies the detail level of the image. Defaults to ``'auto'``. + Can be ``'auto'``, ``'low'``, or ``'high'``. + + This follows OpenAI's Chat Completion API's image URL format. + + """ url: str """Either a URL of the image or the base64 encoded image data.""" From 83a033995c59c38a3676c625fda6a65262505f9e Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:02:05 -0400 Subject: [PATCH 48/56] . --- .../prompts/__snapshots__/test_chat.ambr | 6 ----- .../runnables/__snapshots__/test_graph.ambr | 3 --- .../__snapshots__/test_runnable.ambr | 24 ------------------- 3 files changed, 33 deletions(-) diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 7b28b1523c10d..10168ca102b27 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -692,7 +692,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -812,7 +811,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -1339,7 +1337,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -2140,7 +2137,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -2260,7 +2256,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -2787,7 +2782,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 3d9e96918bfc5..7eb60d97b30cb 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1095,7 +1095,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -1215,7 +1214,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -1742,7 +1740,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index ab07b2c2c0991..fe96061713be4 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2640,7 +2640,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -2759,7 +2758,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -3280,7 +3278,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -4132,7 +4129,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -4251,7 +4247,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -4791,7 +4786,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -5655,7 +5649,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -5774,7 +5767,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -6314,7 +6306,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -7053,7 +7044,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -7172,7 +7162,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -7693,7 +7682,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -8587,7 +8575,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -8706,7 +8693,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -9246,7 +9232,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -10030,7 +10015,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -10149,7 +10133,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -10670,7 +10653,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -11472,7 +11454,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -11591,7 +11572,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -12142,7 +12122,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { @@ -12956,7 +12935,6 @@ Does *not* need to sum to full input token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -13075,7 +13053,6 @@ Does *not* need to sum to full output token count. Does *not* need to have all keys. Example: - .. code-block:: python { @@ -13615,7 +13592,6 @@ This is a standard representation of token usage that is consistent across models. Example: - .. code-block:: python { From 3a78f4fef9529810cc51ba15881033c88eb4e367 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:41:04 -0400 Subject: [PATCH 49/56] . --- libs/core/langchain_core/messages/base.py | 34 +++++++++++++++-------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 68d81fcb8b6dd..995c135de469a 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -22,6 +22,7 @@ class BaseMessage(Serializable): """Base abstract message class. Messages are the inputs and outputs of ChatModels. + """ content: Union[str, list[Union[str, dict]]] @@ -32,17 +33,18 @@ class BaseMessage(Serializable): For example, for a message from an AI, this could include tool calls as encoded by the model provider. + """ response_metadata: dict = Field(default_factory=dict) - """Response metadata. For example: response headers, logprobs, token counts, model - name.""" + """Examples: response headers, logprobs, token counts, model name.""" type: str """The type of the message. Must be a string that is unique to the message type. The purpose of this field is to allow for easy identification of the message type when deserializing messages. + """ name: Optional[str] = None @@ -52,6 +54,7 @@ class BaseMessage(Serializable): Usage of this field is optional, and whether it's used or not is up to the model implementation. + """ id: Optional[str] = Field(default=None, coerce_numbers_to_str=True) @@ -91,7 +94,7 @@ def __init__( @classmethod def is_lc_serializable(cls) -> bool: - """BaseMessage is serializable. + """``BaseMessage`` is serializable. Returns: True @@ -102,7 +105,7 @@ def is_lc_serializable(cls) -> bool: def get_lc_namespace(cls) -> list[str]: """Get the namespace of the langchain object. - Default is ["langchain", "schema", "messages"]. + Default is ``['langchain', 'schema', 'messages']``. """ return ["langchain", "schema", "messages"] @@ -170,10 +173,11 @@ def content_blocks(self) -> list[types.ContentBlock]: return blocks def text(self) -> str: - """Get the text content of the message. + """Get the text ``content`` of the message. Returns: The text content of the message. + """ if isinstance(self.content, str): return self.content @@ -208,6 +212,7 @@ def pretty_repr( Returns: A pretty representation of the message. + """ title = get_msg_title_repr(self.type.title() + " Message", bold=html) # TODO: handle non-string content. @@ -227,11 +232,12 @@ def merge_content( """Merge multiple message contents. Args: - first_content: The first content. Can be a string or a list. - contents: The other contents. Can be a string or a list. + first_content: The first ``content``. Can be a string or a list. + contents: The other ``content``s. Can be a string or a list. Returns: The merged content. + """ merged: Union[str, list[Union[str, dict]]] merged = "" if first_content is None else first_content @@ -283,9 +289,10 @@ def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore[override] For example, - `AIMessageChunk(content="Hello") + AIMessageChunk(content=" World")` + ``AIMessageChunk(content="Hello") + AIMessageChunk(content=" World")`` + + will give ``AIMessageChunk(content="Hello World")`` - will give `AIMessageChunk(content="Hello World")` """ if isinstance(other, BaseMessageChunk): # If both are (subclasses of) BaseMessageChunk, @@ -333,8 +340,9 @@ def message_to_dict(message: BaseMessage) -> dict: message: Message to convert. Returns: - Message as a dict. The dict will have a "type" key with the message type - and a "data" key with the message data as a dict. + Message as a dict. The dict will have a ``type`` key with the message type + and a ``data`` key with the message data as a dict. + """ return {"type": message.type, "data": message.model_dump()} @@ -343,10 +351,11 @@ def messages_to_dict(messages: Sequence[BaseMessage]) -> list[dict]: """Convert a sequence of Messages to a list of dictionaries. Args: - messages: Sequence of messages (as BaseMessages) to convert. + messages: Sequence of messages (as ``BaseMessage``s) to convert. Returns: List of messages as dicts. + """ return [message_to_dict(m) for m in messages] @@ -360,6 +369,7 @@ def get_msg_title_repr(title: str, *, bold: bool = False) -> str: Returns: The title representation. + """ padded = " " + title + " " sep_len = (80 - len(padded)) // 2 From 2dfbcc57384d4e0956dae0d98bd3136151433d67 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 15:54:53 -0400 Subject: [PATCH 50/56] . --- libs/core/langchain_core/messages/ai.py | 50 +++--- libs/core/langchain_core/messages/base.py | 30 ++-- libs/core/langchain_core/messages/chat.py | 5 +- libs/core/langchain_core/messages/function.py | 14 +- libs/core/langchain_core/messages/human.py | 35 ++--- libs/core/langchain_core/messages/modifier.py | 1 + libs/core/langchain_core/messages/system.py | 11 +- libs/core/langchain_core/messages/tool.py | 83 ++++++---- libs/core/langchain_core/messages/utils.py | 145 +++++++++--------- 9 files changed, 189 insertions(+), 185 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 74e726b4e1edb..615d334e5f9d7 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -65,6 +65,7 @@ class InputTokenDetails(TypedDict, total=False): Since there was a cache hit, the tokens were read from the cache. More precisely, the model state given these tokens was read from the cache. + """ @@ -92,6 +93,7 @@ class OutputTokenDetails(TypedDict, total=False): Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1 models) that are not returned as part of model output. + """ @@ -139,6 +141,7 @@ class UsageMetadata(TypedDict): """Breakdown of output token counts. Does *not* need to sum to full output token count. Does *not* need to have all keys. + """ @@ -150,12 +153,14 @@ class AIMessage(BaseMessage): This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. + """ example: bool = False """Use to denote that a message is part of an example conversation. At the moment, this is ignored by most models. Usage is discouraged. + """ tool_calls: list[ToolCall] = [] @@ -166,6 +171,7 @@ class AIMessage(BaseMessage): """If provided, usage metadata for a message, such as token counts. This is a standard representation of token usage that is consistent across models. + """ type: Literal["ai"] = "ai" @@ -178,35 +184,12 @@ def __init__( **kwargs: Any, ) -> None: ... - @overload - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: ... - - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: - """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" - if content_blocks is not None: - # If there are tool calls in content_blocks, but not in tool_calls, add them - content_tool_calls = [ - block for block in content_blocks if block.get("type") == "tool_call" - ] - if content_tool_calls and "tool_calls" not in kwargs: - kwargs["tool_calls"] = content_tool_calls + Args: + content: The content of the message. + kwargs: Additional arguments to pass to the parent class. - super().__init__( - content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - **kwargs, - ) - else: - super().__init__(content=content, **kwargs) + """ + super().__init__(content=content, **kwargs) @property def lc_attributes(self) -> dict: @@ -316,6 +299,7 @@ def pretty_repr(self, html: bool = False) -> str: Returns: A pretty representation of the message. + """ base = super().pretty_repr(html=html) lines = [] @@ -355,7 +339,10 @@ class AIMessageChunk(AIMessage, BaseMessageChunk): # non-chunk variant. type: Literal["AIMessageChunk"] = "AIMessageChunk" # type: ignore[assignment] """The type of the message (used for deserialization). - Defaults to "AIMessageChunk".""" + + Defaults to ``AIMessageChunk``. + + """ tool_call_chunks: list[ToolCallChunk] = [] """If provided, tool call chunks associated with the message.""" @@ -419,6 +406,7 @@ def init_tool_calls(self) -> Self: Raises: ValueError: If the tool call chunks are malformed. + """ if not self.tool_call_chunks: if self.tool_calls: @@ -632,9 +620,9 @@ def add_usage( def subtract_usage( left: Optional[UsageMetadata], right: Optional[UsageMetadata] ) -> UsageMetadata: - """Recursively subtract two UsageMetadata objects. + """Recursively subtract two ``UsageMetadata`` objects. - Token counts cannot be negative so the actual operation is max(left - right, 0). + Token counts cannot be negative so the actual operation is ``max(left - right, 0)``. Example: .. code-block:: python diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 995c135de469a..6e2d3800d6647 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -58,8 +58,11 @@ class BaseMessage(Serializable): """ id: Optional[str] = Field(default=None, coerce_numbers_to_str=True) - """An optional unique identifier for the message. This should ideally be - provided by the provider/model which created the message.""" + """An optional unique identifier for the message. + + This should ideally be provided by the provider/model which created the message. + + """ model_config = ConfigDict( extra="allow", @@ -72,25 +75,11 @@ def __init__( **kwargs: Any, ) -> None: ... - @overload - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: ... + Args: + content: The string contents of the message. - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: - """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" - if content_blocks is not None: - super().__init__(content=content_blocks, **kwargs) - else: - super().__init__(content=content, **kwargs) + """ + super().__init__(content=content, **kwargs) @classmethod def is_lc_serializable(cls) -> bool: @@ -106,6 +95,7 @@ def get_lc_namespace(cls) -> list[str]: """Get the namespace of the langchain object. Default is ``['langchain', 'schema', 'messages']``. + """ return ["langchain", "schema", "messages"] diff --git a/libs/core/langchain_core/messages/chat.py b/libs/core/langchain_core/messages/chat.py index a4791423fad79..fa44425302def 100644 --- a/libs/core/langchain_core/messages/chat.py +++ b/libs/core/langchain_core/messages/chat.py @@ -30,7 +30,10 @@ class ChatMessageChunk(ChatMessage, BaseMessageChunk): # non-chunk variant. type: Literal["ChatMessageChunk"] = "ChatMessageChunk" # type: ignore[assignment] """The type of the message (used during serialization). - Defaults to "ChatMessageChunk".""" + + Defaults to ``ChatMessageChunk``. + + """ @override def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore[override] diff --git a/libs/core/langchain_core/messages/function.py b/libs/core/langchain_core/messages/function.py index fc1018775b7e1..612be1dc10e33 100644 --- a/libs/core/langchain_core/messages/function.py +++ b/libs/core/langchain_core/messages/function.py @@ -15,19 +15,20 @@ class FunctionMessage(BaseMessage): """Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. + """ name: str """The name of the function that was executed.""" type: Literal["function"] = "function" - """The type of the message (used for serialization). Defaults to "function".""" + """The type of the message (used for serialization). Defaults to ``'function'``.""" class FunctionMessageChunk(FunctionMessage, BaseMessageChunk): @@ -38,7 +39,10 @@ class FunctionMessageChunk(FunctionMessage, BaseMessageChunk): # non-chunk variant. type: Literal["FunctionMessageChunk"] = "FunctionMessageChunk" # type: ignore[assignment] """The type of the message (used for serialization). - Defaults to "FunctionMessageChunk".""" + + Defaults to ``FunctionMessageChunk``. + + """ @override def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore[override] diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index a15a0f7533d11..a3ad7786c03ea 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -9,7 +9,7 @@ class HumanMessage(BaseMessage): """Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -37,10 +37,15 @@ class HumanMessage(BaseMessage): At the moment, this is ignored by most models. Usage is discouraged. Defaults to False. + """ type: Literal["human"] = "human" - """The type of the message (used for serialization). Defaults to "human".""" + """The type of the message (used for serialization). + + Defaults to ``'human'``. + + """ @overload def __init__( @@ -49,28 +54,12 @@ def __init__( **kwargs: Any, ) -> None: ... - @overload - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: ... + Args: + content: The string contents of the message. + kwargs: Additional fields to pass to the message. - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: - """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" - if content_blocks is not None: - super().__init__( - content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - **kwargs, - ) - else: - super().__init__(content=content, **kwargs) + """ + super().__init__(content=content, **kwargs) class HumanMessageChunk(HumanMessage, BaseMessageChunk): diff --git a/libs/core/langchain_core/messages/modifier.py b/libs/core/langchain_core/messages/modifier.py index 5f1602a4908d3..94ce8356fb397 100644 --- a/libs/core/langchain_core/messages/modifier.py +++ b/libs/core/langchain_core/messages/modifier.py @@ -24,6 +24,7 @@ def __init__( Raises: ValueError: If the 'content' field is passed in kwargs. + """ if kwargs.pop("content", None): msg = "RemoveMessage does not support 'content' field." diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index ca6589db8dbca..1ef63f83996b0 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -33,7 +33,11 @@ class SystemMessage(BaseMessage): """ type: Literal["system"] = "system" - """The type of the message (used for serialization). Defaults to "system".""" + """The type of the message (used for serialization). + + Defaults to ``'system'``. + + """ @overload def __init__( @@ -74,4 +78,7 @@ class SystemMessageChunk(SystemMessage, BaseMessageChunk): # non-chunk variant. type: Literal["SystemMessageChunk"] = "SystemMessageChunk" # type: ignore[assignment] """The type of the message (used for serialization). - Defaults to "SystemMessageChunk".""" + + Defaults to ``'SystemMessageChunk'``. + + """ diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index f55be142672b7..72c469c793196 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -16,19 +16,20 @@ class ToolOutputMixin: """Mixin for objects that tools can return directly. - If a custom BaseTool is invoked with a ToolCall and the output of custom code is - not an instance of ToolOutputMixin, the output will automatically be coerced to a - string and wrapped in a ToolMessage. + If a custom BaseTool is invoked with a ``ToolCall`` and the output of custom code is + not an instance of ``ToolOutputMixin``, the output will automatically be coerced to + a string and wrapped in a ``ToolMessage``. + """ class ToolMessage(BaseMessage, ToolOutputMixin): """Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -37,7 +38,7 @@ class ToolMessage(BaseMessage, ToolOutputMixin): ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -58,7 +59,7 @@ class ToolMessage(BaseMessage, ToolOutputMixin): tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -68,7 +69,11 @@ class ToolMessage(BaseMessage, ToolOutputMixin): """Tool call that this message is responding to.""" type: Literal["tool"] = "tool" - """The type of the message (used for serialization). Defaults to "tool".""" + """The type of the message (used for serialization). + + Defaults to ``'tool'``. + + """ artifact: Any = None """Artifact of the Tool execution which is not meant to be sent to the model. @@ -78,12 +83,14 @@ class ToolMessage(BaseMessage, ToolOutputMixin): output is needed in other parts of the code. .. versionadded:: 0.2.17 + """ status: Literal["success", "error"] = "success" """Status of the tool invocation. .. versionadded:: 0.2.24 + """ additional_kwargs: dict = Field(default_factory=dict, repr=False) @@ -98,6 +105,7 @@ def coerce_args(cls, values: dict) -> dict: Args: values: The model arguments. + """ content = values["content"] if isinstance(content, tuple): @@ -142,28 +150,12 @@ def __init__( **kwargs: Any, ) -> None: ... - @overload - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: ... + Args: + content: The string contents of the message. + **kwargs: Additional fields. - def __init__( - self, - content: Optional[Union[str, list[Union[str, dict]]]] = None, - content_blocks: Optional[list[types.ContentBlock]] = None, - **kwargs: Any, - ) -> None: - """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" - if content_blocks is not None: - super().__init__( - content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - **kwargs, - ) - else: - super().__init__(content=content, **kwargs) + """ + super().__init__(content=content, **kwargs) class ToolMessageChunk(ToolMessage, BaseMessageChunk): @@ -211,8 +203,8 @@ class ToolCall(TypedDict): "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. """ @@ -225,6 +217,7 @@ class ToolCall(TypedDict): An identifier is needed to associate a tool call request with a tool call result in events when multiple concurrent tool calls are made. + """ type: NotRequired[Literal["tool_call"]] @@ -241,6 +234,7 @@ def tool_call( name: The name of the tool to be called. args: The arguments to the tool call. id: An identifier associated with the tool call. + """ return ToolCall(name=name, args=args, id=id, type="tool_call") @@ -248,9 +242,9 @@ def tool_call( class ToolCallChunk(TypedDict): """A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -291,12 +285,32 @@ def tool_call_chunk( args: The arguments to the tool call. id: An identifier associated with the tool call. index: The index of the tool call in a sequence. + """ return ToolCallChunk( name=name, args=args, id=id, index=index, type="tool_call_chunk" ) +class InvalidToolCall(TypedDict): + """Allowance for errors made by LLM. + + Here we add an ``error`` key to surface errors made during generation + (e.g., invalid JSON arguments.) + + """ + + name: Optional[str] + """The name of the tool to be called.""" + args: Optional[str] + """The arguments to the tool call.""" + id: Optional[str] + """An identifier associated with the tool call.""" + error: Optional[str] + """An error message associated with the tool call.""" + type: NotRequired[Literal["invalid_tool_call"]] + + def invalid_tool_call( *, name: Optional[str] = None, @@ -311,6 +325,7 @@ def invalid_tool_call( args: The arguments to the tool call. id: An identifier associated with the tool call. error: An error message associated with the tool call. + """ return InvalidToolCall( name=name, args=args, id=id, error=error, type="invalid_tool_call" diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 79c1c4b66c6c9..292d7f110a4c0 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -5,6 +5,7 @@ * Convert messages to strings (serialization) * Convert messages from dicts to Message objects (deserialization) * Filter messages from a list of messages based on name, type or id etc. + """ from __future__ import annotations @@ -89,13 +90,13 @@ def _get_type(v: Any) -> str: def get_buffer_string( messages: Sequence[BaseMessage], human_prefix: str = "Human", ai_prefix: str = "AI" ) -> str: - r"""Convert a sequence of Messages to strings and concatenate them into one string. + r"""Convert a sequence of messages to strings and concatenate them into one string. Args: messages: Messages to be converted to strings. - human_prefix: The prefix to prepend to contents of HumanMessages. + human_prefix: The prefix to prepend to contents of ``HumanMessage``s. Default is "Human". - ai_prefix: THe prefix to prepend to contents of AIMessages. Default is "AI". + ai_prefix: The prefix to prepend to contents of AIMessages. Default is ``'AI'``. Returns: A single string concatenation of all input messages. @@ -174,19 +175,20 @@ def _message_from_dict(message: dict) -> BaseMessage: def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]: - """Convert a sequence of messages from dicts to Message objects. + """Convert a sequence of messages from dicts to ``Message`` objects. Args: messages: Sequence of messages (as dicts) to convert. Returns: list of messages (BaseMessages). + """ return [_message_from_dict(m) for m in messages] def message_chunk_to_message(chunk: BaseMessageChunk) -> BaseMessage: - """Convert a message chunk to a message. + """Convert a message chunk to a ``Message``. Args: chunk: Message chunk to convert. @@ -219,10 +221,10 @@ def _create_message_from_message_type( id: Optional[str] = None, **additional_kwargs: Any, ) -> BaseMessage: - """Create a message from a message type and content string. + """Create a message from a ``Message`` type and content string. Args: - message_type: (str) the type of the message (e.g., "human", "ai", etc.). + message_type: (str) the type of the message (e.g., ``'human'``, ``'ai'``, etc.). content: (str) the content string. name: (str) the name of the message. Default is None. tool_call_id: (str) the tool call id. Default is None. @@ -234,8 +236,9 @@ def _create_message_from_message_type( a message of the appropriate type. Raises: - ValueError: if the message type is not one of "human", "user", "ai", - "assistant", "function", "tool", "system", or "developer". + ValueError: if the message type is not one of ``'human'``, ``'user'``, ``'ai'``, + ``'assistant'``, ``'function'``, ``'tool'``, ``'system'``, or + ``'developer'``. """ kwargs: dict[str, Any] = {} if name is not None: @@ -298,15 +301,15 @@ def _create_message_from_message_type( def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage: - """Instantiate a message from a variety of message formats. + """Instantiate a ``Message`` from a variety of message formats. The message format can be one of the following: - - BaseMessagePromptTemplate - - BaseMessage - - 2-tuple of (role string, template); e.g., ("human", "{user_input}") + - ``BaseMessagePromptTemplate`` + - ``BaseMessage`` + - 2-tuple of (role string, template); e.g., (``'human'``, ``'{user_input}'``) - dict: a message dict with role and content keys - - string: shorthand for ("human", template); e.g., "{user_input}" + - string: shorthand for (``'human'``, template); e.g., ``'{user_input}'`` Args: message: a representation of a message in one of the supported formats. @@ -317,6 +320,7 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage: Raises: NotImplementedError: if the message type is not supported. ValueError: if the message dict does not contain the required keys. + """ if isinstance(message, BaseMessage): message_ = message @@ -362,6 +366,7 @@ def convert_to_messages( Returns: list of messages (BaseMessages). + """ # Import here to avoid circular imports from langchain_core.prompt_values import PromptValue @@ -411,31 +416,31 @@ def filter_messages( exclude_ids: Optional[Sequence[str]] = None, exclude_tool_calls: Optional[Sequence[str] | bool] = None, ) -> list[BaseMessage]: - """Filter messages based on name, type or id. + """Filter messages based on ``name``, ``type`` or ``id``. Args: messages: Sequence Message-like objects to filter. include_names: Message names to include. Default is None. exclude_names: Messages names to exclude. Default is None. include_types: Message types to include. Can be specified as string names (e.g. - "system", "human", "ai", ...) or as BaseMessage classes (e.g. - SystemMessage, HumanMessage, AIMessage, ...). Default is None. + ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage`` classes (e.g. + ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...). Default is None. exclude_types: Message types to exclude. Can be specified as string names (e.g. - "system", "human", "ai", ...) or as BaseMessage classes (e.g. - SystemMessage, HumanMessage, AIMessage, ...). Default is None. + ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage`` classes (e.g. + ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...). Default is None. include_ids: Message IDs to include. Default is None. exclude_ids: Message IDs to exclude. Default is None. exclude_tool_calls: Tool call IDs to exclude. Default is None. Can be one of the following: - - `True`: all AIMessages with tool calls and all ToolMessages will be excluded. + - ``True``: all ``AIMessage``s with tool calls and all ``ToolMessage``s will be excluded. - a sequence of tool call IDs to exclude: - - ToolMessages with the corresponding tool call ID will be excluded. - - The `tool_calls` in the AIMessage will be updated to exclude matching tool calls. - If all tool_calls are filtered from an AIMessage, the whole message is excluded. + - ``ToolMessage``s with the corresponding tool call ID will be excluded. + - The ``tool_calls`` in the AIMessage will be updated to exclude matching tool calls. + If all ``tool_calls`` are filtered from an AIMessage, the whole message is excluded. Returns: - A list of Messages that meets at least one of the incl_* conditions and none - of the excl_* conditions. If not incl_* conditions are specified then + A list of Messages that meets at least one of the ``incl_*`` conditions and none + of the ``excl_*`` conditions. If not ``incl_*`` conditions are specified then anything that is not explicitly excluded will be included. Raises: @@ -536,13 +541,14 @@ def merge_message_runs( ) -> list[BaseMessage]: r"""Merge consecutive Messages of the same type. - **NOTE**: ToolMessages are not merged, as each has a distinct tool call id that - can't be merged. + .. note:: + ToolMessages are not merged, as each has a distinct tool call id that can't be + merged. Args: messages: Sequence Message-like objects to merge. chunk_separator: Specify the string to be inserted between message chunks. - Default is "\n". + Default is ``'\n'``. Returns: list of BaseMessages with consecutive runs of message types merged into single @@ -651,22 +657,22 @@ def trim_messages( ) -> list[BaseMessage]: r"""Trim messages to be below a token count. - trim_messages can be used to reduce the size of a chat history to a specified token - count or specified message count. + ``trim_messages`` can be used to reduce the size of a chat history to a specified + token count or specified message count. In either case, if passing the trimmed chat history back into a chat model directly, the resulting chat history should usually satisfy the following properties: 1. The resulting chat history should be valid. Most chat models expect that chat - history starts with either (1) a ``HumanMessage`` or (2) a ``SystemMessage`` followed - by a ``HumanMessage``. To achieve this, set ``start_on="human"``. + history starts with either (1) a ``HumanMessage`` or (2) a ``SystemMessage`` + followed by a ``HumanMessage``. To achieve this, set ``start_on='human'``. In addition, generally a ``ToolMessage`` can only appear after an ``AIMessage`` that involved a tool call. Please see the following link for more information about messages: https://python.langchain.com/docs/concepts/#messages 2. It includes recent messages and drops old messages in the chat history. - To achieve this set the ``strategy="last"``. + To achieve this set the ``strategy='last'``. 3. Usually, the new chat history should include the ``SystemMessage`` if it was present in the original chat history since the ``SystemMessage`` includes special instructions to the chat model. The ``SystemMessage`` is almost always @@ -680,65 +686,66 @@ def trim_messages( Args: messages: Sequence of Message-like objects to trim. max_tokens: Max token count of trimmed messages. - token_counter: Function or llm for counting tokens in a BaseMessage or a list of - BaseMessage. If a BaseLanguageModel is passed in then - BaseLanguageModel.get_num_tokens_from_messages() will be used. - Set to `len` to count the number of **messages** in the chat history. + token_counter: Function or llm for counting tokens in a ``BaseMessage`` or a + list of ``BaseMessage``. If a ``BaseLanguageModel`` is passed in then + ``BaseLanguageModel.get_num_tokens_from_messages()`` will be used. + Set to ``len`` to count the number of **messages** in the chat history. .. note:: - Use `count_tokens_approximately` to get fast, approximate token counts. - This is recommended for using `trim_messages` on the hot path, where + Use ``count_tokens_approximately`` to get fast, approximate token counts. + This is recommended for using ``trim_messages`` on the hot path, where exact token counting is not necessary. strategy: Strategy for trimming. - - "first": Keep the first <= n_count tokens of the messages. - - "last": Keep the last <= n_count tokens of the messages. - Default is "last". + - ``'first'``: Keep the first ``<= n_count`` tokens of the messages. + - ``'last'``: Keep the last ``<= n_count`` tokens of the messages. + Default is ``'last'``. allow_partial: Whether to split a message if only part of the message can be - included. If ``strategy="last"`` then the last partial contents of a message - are included. If ``strategy="first"`` then the first partial contents of a + included. If ``strategy='last'`` then the last partial contents of a message + are included. If ``strategy='first'`` then the first partial contents of a message are included. Default is False. end_on: The message type to end on. If specified then every message after the - last occurrence of this type is ignored. If ``strategy=="last"`` then this + last occurrence of this type is ignored. If ``strategy='last'`` then this is done before we attempt to get the last ``max_tokens``. If - ``strategy=="first"`` then this is done after we get the first - ``max_tokens``. Can be specified as string names (e.g. "system", "human", - "ai", ...) or as BaseMessage classes (e.g. SystemMessage, HumanMessage, - AIMessage, ...). Can be a single type or a list of types. + ``strategy='first'`` then this is done after we get the first + ``max_tokens``. Can be specified as string names (e.g. ``'system'``, + ``'human'``, ``'ai'``, ...) or as ``BaseMessage`` classes (e.g. + ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...). Can be a single + type or a list of types. Default is None. start_on: The message type to start on. Should only be specified if - ``strategy="last"``. If specified then every message before + ``strategy='last'``. If specified then every message before the first occurrence of this type is ignored. This is done after we trim the initial messages to the last ``max_tokens``. Does not - apply to a SystemMessage at index 0 if ``include_system=True``. Can be - specified as string names (e.g. "system", "human", "ai", ...) or as - BaseMessage classes (e.g. SystemMessage, HumanMessage, AIMessage, ...). Can - be a single type or a list of types. + apply to a ``SystemMessage`` at index 0 if ``include_system=True``. Can be + specified as string names (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or + as ``BaseMessage`` classes (e.g. ``SystemMessage``, ``HumanMessage``, + ``AIMessage``, ...). Can be a single type or a list of types. Default is None. include_system: Whether to keep the SystemMessage if there is one at index 0. Should only be specified if ``strategy="last"``. Default is False. text_splitter: Function or ``langchain_text_splitters.TextSplitter`` for splitting the string contents of a message. Only used if - ``allow_partial=True``. If ``strategy="last"`` then the last split tokens - from a partial message will be included. if ``strategy=="first"`` then the + ``allow_partial=True``. If ``strategy='last'`` then the last split tokens + from a partial message will be included. if ``strategy='first'`` then the first split tokens from a partial message will be included. Token splitter assumes that separators are kept, so that split contents can be directly concatenated to recreate the original text. Defaults to splitting on newlines. Returns: - list of trimmed BaseMessages. + list of trimmed ``BaseMessage``. Raises: ValueError: if two incompatible arguments are specified or an unrecognized ``strategy`` is specified. Example: - Trim chat history based on token count, keeping the SystemMessage if - present, and ensuring that the chat history starts with a HumanMessage ( - or a SystemMessage followed by a HumanMessage). + Trim chat history based on token count, keeping the ``SystemMessage`` if + present, and ensuring that the chat history starts with a ``HumanMessage`` ( + or a ``SystemMessage`` followed by a ``HumanMessage``). .. code-block:: python @@ -787,9 +794,9 @@ def trim_messages( HumanMessage(content='what do you call a speechless parrot'), ] - Trim chat history based on the message count, keeping the SystemMessage if - present, and ensuring that the chat history starts with a HumanMessage ( - or a SystemMessage followed by a HumanMessage). + Trim chat history based on the message count, keeping the ``SystemMessage`` if + present, and ensuring that the chat history starts with a ``HumanMessage`` ( + or a ``SystemMessage`` followed by a ``HumanMessage``). trim_messages( messages, @@ -955,16 +962,16 @@ def convert_to_openai_messages( in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats. text_format: How to format string or text block contents: - - "string": + - ``'string'``: If a message has a string content, this is left as a string. If - a message has content blocks that are all of type 'text', these are - joined with a newline to make a single string. If a message has - content blocks and at least one isn't of type 'text', then + a message has content blocks that are all of type ``'text'``, these + are joined with a newline to make a single string. If a message has + content blocks and at least one isn't of type ``'text'``, then all blocks are left as dicts. - "block": If a message has a string content, this is turned into a list - with a single content block of type 'text'. If a message has content - blocks these are left as is. + with a single content block of type ``'text'``. If a message has + content blocks these are left as is. Returns: The return type depends on the input type: From 151483f66832666e7968879d908b76015782a99e Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 16:34:32 -0400 Subject: [PATCH 51/56] ss --- .../__snapshots__/test_runnable.ambr | 820 +++--------------- 1 file changed, 125 insertions(+), 695 deletions(-) diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index fe96061713be4..01356251e65ce 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -97,7 +97,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -227,7 +227,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", + "repr": "FakeListChatModel(responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -346,7 +346,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" }, { @@ -457,7 +457,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", + "repr": "FakeListChatModel(responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -483,273 +483,6 @@ RunTree(id=00000000-0000-4000-8000-000000000000, name='RunnableSequence', run_type='chain', dotted_order='20230101T000000000000Z00000000-0000-4000-8000-000000000000'), ]) # --- -# name: test_configurable_fields[schema2] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableConfigurableFieldsConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields[schema3] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'prompt_template': dict({ - 'default': 'Hello, {name}!', - 'description': 'The prompt template for this chain', - 'title': 'Prompt Template', - 'type': 'string', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableConfigurableFieldsConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields[schema4] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - 'default': 'Hello, {name}!', - 'description': 'The prompt template for this chain', - 'title': 'Prompt Template', - 'type': 'string', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields[schema5] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'other_responses': dict({ - 'default': list([ - 'a', - ]), - 'items': dict({ - 'type': 'string', - }), - 'title': 'Other Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - 'default': 'Hello, {name}!', - 'description': 'The prompt template for this chain', - 'title': 'Prompt Template', - 'type': 'string', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields_example[schema7] - dict({ - '$defs': dict({ - 'Chat_Responses': dict({ - 'title': 'Chat Responses', - }), - 'Configurable': dict({ - 'properties': dict({ - 'chat_responses': dict({ - 'default': list([ - 'hello', - 'bye', - ]), - 'items': dict({ - '$ref': '#/$defs/Chat_Responses', - }), - 'title': 'Chat Responses', - 'type': 'array', - }), - 'llm': dict({ - '$ref': '#/$defs/LLM', - 'default': 'default', - }), - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - '$ref': '#/$defs/Prompt_Template', - 'default': 'hello', - 'description': 'The prompt template for this chain', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - 'LLM': dict({ - 'title': 'LLM', - }), - 'Prompt_Template': dict({ - 'title': 'Prompt Template', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields_prefix_keys[schema6] - dict({ - 'definitions': dict({ - 'Chat_Responses': dict({ - 'title': 'Chat Responses', - }), - 'Configurable': dict({ - 'properties': dict({ - 'chat_sleep': dict({ - 'anyOf': list([ - dict({ - 'type': 'number', - }), - dict({ - 'type': 'null', - }), - ]), - 'default': None, - 'title': 'Chat Sleep', - }), - 'llm': dict({ - '$ref': '#/definitions/LLM', - 'default': 'default', - }), - 'llm==chat/responses': dict({ - 'default': list([ - 'hello', - 'bye', - ]), - 'items': dict({ - '$ref': '#/definitions/Chat_Responses', - }), - 'title': 'Chat Responses', - 'type': 'array', - }), - 'llm==default/responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - '$ref': '#/definitions/Prompt_Template', - 'default': 'hello', - 'description': 'The prompt template for this chain', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - 'LLM': dict({ - 'title': 'LLM', - }), - 'Prompt_Template': dict({ - 'title': 'Prompt Template', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/definitions/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- # name: test_each ''' { @@ -1009,7 +742,7 @@ # name: test_prompt_with_chat_model ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(output_version='v0', responses=['foo']) + | FakeListChatModel(responses=['foo']) ''' # --- # name: test_prompt_with_chat_model.1 @@ -1109,7 +842,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", + "repr": "FakeListChatModel(responses=['foo'])", "name": "FakeListChatModel" } }, @@ -1220,7 +953,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -1249,7 +982,7 @@ # name: test_prompt_with_chat_model_async ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(output_version='v0', responses=['foo']) + | FakeListChatModel(responses=['foo']) ''' # --- # name: test_prompt_with_chat_model_async.1 @@ -1349,7 +1082,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", + "repr": "FakeListChatModel(responses=['foo'])", "name": "FakeListChatModel" } }, @@ -2334,10 +2067,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -2467,7 +2200,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -2699,10 +2432,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -2714,17 +2443,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -2742,10 +2460,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -2955,8 +2672,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -2995,9 +2712,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -3075,10 +2792,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -3087,7 +2804,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -3108,7 +2825,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -3823,10 +3540,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -3956,7 +3673,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -4188,10 +3905,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -4203,17 +3916,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -4231,10 +3933,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -4463,8 +4164,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -4503,9 +4204,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -4583,10 +4284,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -4595,7 +4296,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -4616,7 +4317,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -5343,10 +5044,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -5476,7 +5177,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -5708,10 +5409,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -5723,17 +5420,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -5751,10 +5437,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -5983,8 +5668,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -6023,9 +5708,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -6103,10 +5788,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -6115,7 +5800,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -6136,7 +5821,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -6738,10 +6423,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -6871,7 +6556,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -7103,10 +6788,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -7118,17 +6799,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -7146,10 +6816,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -7359,8 +7028,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -7399,9 +7068,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -7479,10 +7148,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -7491,7 +7160,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -7512,7 +7181,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -8269,10 +7938,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -8402,7 +8071,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -8634,10 +8303,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -8649,17 +8314,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -8677,10 +8331,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -8909,8 +8562,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -8949,9 +8602,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -9029,10 +8682,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -9041,7 +8694,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -9062,7 +8715,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -9709,10 +9362,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -9842,7 +9495,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -10074,10 +9727,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -10089,17 +9738,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -10117,10 +9755,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -10330,8 +9967,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -10370,9 +10007,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -10450,10 +10087,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -10462,7 +10099,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -10483,7 +10120,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -11148,10 +10785,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -11281,7 +10918,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -11513,10 +11150,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -11528,17 +11161,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -11556,10 +11178,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -11799,8 +11420,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -11839,9 +11460,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -11919,10 +11540,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -11931,7 +11552,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -11952,7 +11573,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -12629,10 +12250,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -12762,7 +12383,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -12994,10 +12615,6 @@ ]), 'title': 'Error', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -13009,17 +12626,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'anyOf': list([ dict({ @@ -13037,10 +12643,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -13269,8 +12874,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -13309,9 +12914,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -13389,10 +12994,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -13401,7 +13006,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -13422,7 +13027,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -13655,7 +13260,7 @@ just_to_test_lambda: RunnableLambda(...) } | ChatPromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template='Context:\n{documents}\n\nQuestion:\n{question}'), additional_kwargs={})]) - | FakeListChatModel(output_version='v0', responses=['foo, bar']) + | FakeListChatModel(responses=['foo, bar']) | CommaSeparatedListOutputParser() ''' # --- @@ -13858,7 +13463,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -13884,7 +13489,7 @@ ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) | RunnableLambda(...) | { - chat: FakeListChatModel(output_version='v0', responses=["i'm a chatbot"]), + chat: FakeListChatModel(responses=["i'm a chatbot"]), llm: FakeListLLM(responses=["i'm a textbot"]) } ''' @@ -14010,7 +13615,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", + "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", "name": "FakeListChatModel" }, "llm": { @@ -14034,178 +13639,3 @@ } ''' # --- -# name: test_seq_prompt_map - ''' - { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "schema", - "runnable", - "RunnableSequence" - ], - "kwargs": { - "first": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "chat", - "ChatPromptTemplate" - ], - "kwargs": { - "input_variables": [ - "question" - ], - "messages": [ - { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "chat", - "SystemMessagePromptTemplate" - ], - "kwargs": { - "prompt": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "prompt", - "PromptTemplate" - ], - "kwargs": { - "input_variables": [], - "template": "You are a nice assistant.", - "template_format": "f-string" - }, - "name": "PromptTemplate" - } - } - }, - { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "chat", - "HumanMessagePromptTemplate" - ], - "kwargs": { - "prompt": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "prompt", - "PromptTemplate" - ], - "kwargs": { - "input_variables": [ - "question" - ], - "template": "{question}", - "template_format": "f-string" - }, - "name": "PromptTemplate" - } - } - } - ] - }, - "name": "ChatPromptTemplate" - }, - "middle": [ - { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "runnables", - "base", - "RunnableLambda" - ], - "repr": "RunnableLambda(...)" - } - ], - "last": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "schema", - "runnable", - "RunnableParallel" - ], - "kwargs": { - "steps__": { - "chat": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "schema", - "runnable", - "RunnableBinding" - ], - "kwargs": { - "bound": { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "language_models", - "fake_chat_models", - "FakeListChatModel" - ], - "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", - "name": "FakeListChatModel" - }, - "kwargs": { - "stop": [ - "Thought:" - ] - }, - "config": {} - }, - "name": "FakeListChatModel" - }, - "llm": { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "language_models", - "fake", - "FakeListLLM" - ], - "repr": "FakeListLLM(responses=[\"i'm a textbot\"])", - "name": "FakeListLLM" - }, - "passthrough": { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "runnables", - "base", - "RunnableLambda" - ], - "repr": "RunnableLambda(...)" - } - } - }, - "name": "RunnableParallel" - } - }, - "name": "RunnableSequence" - } - ''' -# --- From 4a1ac7f8298b21a275ab6b43b2fd5913c5c49679 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 16:35:06 -0400 Subject: [PATCH 52/56] ss --- .../prompts/__snapshots__/test_chat.ambr | 52 +-- .../runnables/__snapshots__/test_graph.ambr | 26 +- .../__snapshots__/test_runnable.ambr | 442 ++++++++++++++++++ 3 files changed, 481 insertions(+), 39 deletions(-) diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 10168ca102b27..80ab67312f33a 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -382,10 +382,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -517,7 +517,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -1010,8 +1010,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -1051,9 +1051,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -1132,10 +1132,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -1144,7 +1144,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -1165,7 +1165,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -1827,10 +1827,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -1962,7 +1962,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -2455,8 +2455,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -2496,9 +2496,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -2577,10 +2577,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -2589,7 +2589,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -2610,7 +2610,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 7eb60d97b30cb..8ed1ab684bec0 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -785,10 +785,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and - do not contain the tool_call_id field. + ``FunctionMessage`` are an older version of the ``ToolMessage`` schema, and + do not contain the ``tool_call_id`` field. - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', @@ -920,7 +920,7 @@ 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. + ``HumanMessage``s are messages that are passed in from a human to the model. Example: @@ -1413,8 +1413,8 @@ "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". + This represents a request to call the tool named ``'foo'`` with arguments + ``{"a": 1}`` and an identifier of ``'123'``. ''', 'properties': dict({ 'args': dict({ @@ -1454,9 +1454,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunk``s (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not None. Example: @@ -1535,10 +1535,10 @@ 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result - is encoded inside the `content` field. + ``ToolMessage``s contain the result of a tool invocation. Typically, the result + is encoded inside the ``content`` field. - Example: A ToolMessage representing a result of 42 from a tool call with id + Example: A ``ToolMessage`` representing a result of ``42`` from a tool call with id .. code-block:: python @@ -1547,7 +1547,7 @@ ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - Example: A ToolMessage where only part of the tool output is sent to the model + Example: A ``ToolMessage`` where only part of the tool output is sent to the model and the full output is passed in to artifact. .. versionadded:: 0.2.17 @@ -1568,7 +1568,7 @@ tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the + The ``tool_call_id`` field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. ''', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 01356251e65ce..87d9a140a9a99 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -483,6 +483,273 @@ RunTree(id=00000000-0000-4000-8000-000000000000, name='RunnableSequence', run_type='chain', dotted_order='20230101T000000000000Z00000000-0000-4000-8000-000000000000'), ]) # --- +# name: test_configurable_fields[schema2] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableConfigurableFieldsConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields[schema3] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'prompt_template': dict({ + 'default': 'Hello, {name}!', + 'description': 'The prompt template for this chain', + 'title': 'Prompt Template', + 'type': 'string', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableConfigurableFieldsConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields[schema4] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + 'default': 'Hello, {name}!', + 'description': 'The prompt template for this chain', + 'title': 'Prompt Template', + 'type': 'string', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields[schema5] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'other_responses': dict({ + 'default': list([ + 'a', + ]), + 'items': dict({ + 'type': 'string', + }), + 'title': 'Other Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + 'default': 'Hello, {name}!', + 'description': 'The prompt template for this chain', + 'title': 'Prompt Template', + 'type': 'string', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields_example[schema7] + dict({ + '$defs': dict({ + 'Chat_Responses': dict({ + 'title': 'Chat Responses', + }), + 'Configurable': dict({ + 'properties': dict({ + 'chat_responses': dict({ + 'default': list([ + 'hello', + 'bye', + ]), + 'items': dict({ + '$ref': '#/$defs/Chat_Responses', + }), + 'title': 'Chat Responses', + 'type': 'array', + }), + 'llm': dict({ + '$ref': '#/$defs/LLM', + 'default': 'default', + }), + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + '$ref': '#/$defs/Prompt_Template', + 'default': 'hello', + 'description': 'The prompt template for this chain', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + 'LLM': dict({ + 'title': 'LLM', + }), + 'Prompt_Template': dict({ + 'title': 'Prompt Template', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields_prefix_keys[schema6] + dict({ + 'definitions': dict({ + 'Chat_Responses': dict({ + 'title': 'Chat Responses', + }), + 'Configurable': dict({ + 'properties': dict({ + 'chat_sleep': dict({ + 'anyOf': list([ + dict({ + 'type': 'number', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chat Sleep', + }), + 'llm': dict({ + '$ref': '#/definitions/LLM', + 'default': 'default', + }), + 'llm==chat/responses': dict({ + 'default': list([ + 'hello', + 'bye', + ]), + 'items': dict({ + '$ref': '#/definitions/Chat_Responses', + }), + 'title': 'Chat Responses', + 'type': 'array', + }), + 'llm==default/responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + '$ref': '#/definitions/Prompt_Template', + 'default': 'hello', + 'description': 'The prompt template for this chain', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + 'LLM': dict({ + 'title': 'LLM', + }), + 'Prompt_Template': dict({ + 'title': 'Prompt Template', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/definitions/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- # name: test_each ''' { @@ -13639,3 +13906,178 @@ } ''' # --- +# name: test_seq_prompt_map + ''' + { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "schema", + "runnable", + "RunnableSequence" + ], + "kwargs": { + "first": { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "prompts", + "chat", + "ChatPromptTemplate" + ], + "kwargs": { + "input_variables": [ + "question" + ], + "messages": [ + { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "prompts", + "chat", + "SystemMessagePromptTemplate" + ], + "kwargs": { + "prompt": { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "prompts", + "prompt", + "PromptTemplate" + ], + "kwargs": { + "input_variables": [], + "template": "You are a nice assistant.", + "template_format": "f-string" + }, + "name": "PromptTemplate" + } + } + }, + { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "prompts", + "chat", + "HumanMessagePromptTemplate" + ], + "kwargs": { + "prompt": { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "prompts", + "prompt", + "PromptTemplate" + ], + "kwargs": { + "input_variables": [ + "question" + ], + "template": "{question}", + "template_format": "f-string" + }, + "name": "PromptTemplate" + } + } + } + ] + }, + "name": "ChatPromptTemplate" + }, + "middle": [ + { + "lc": 1, + "type": "not_implemented", + "id": [ + "langchain_core", + "runnables", + "base", + "RunnableLambda" + ], + "repr": "RunnableLambda(...)" + } + ], + "last": { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "schema", + "runnable", + "RunnableParallel" + ], + "kwargs": { + "steps__": { + "chat": { + "lc": 1, + "type": "constructor", + "id": [ + "langchain", + "schema", + "runnable", + "RunnableBinding" + ], + "kwargs": { + "bound": { + "lc": 1, + "type": "not_implemented", + "id": [ + "langchain_core", + "language_models", + "fake_chat_models", + "FakeListChatModel" + ], + "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", + "name": "FakeListChatModel" + }, + "kwargs": { + "stop": [ + "Thought:" + ] + }, + "config": {} + }, + "name": "FakeListChatModel" + }, + "llm": { + "lc": 1, + "type": "not_implemented", + "id": [ + "langchain_core", + "language_models", + "fake", + "FakeListLLM" + ], + "repr": "FakeListLLM(responses=[\"i'm a textbot\"])", + "name": "FakeListLLM" + }, + "passthrough": { + "lc": 1, + "type": "not_implemented", + "id": [ + "langchain_core", + "runnables", + "base", + "RunnableLambda" + ], + "repr": "RunnableLambda(...)" + } + } + }, + "name": "RunnableParallel" + } + }, + "name": "RunnableSequence" + } + ''' +# --- From 7b873ad2d6f70eb8d988e62bd129b85b23ff9860 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 19 Aug 2025 15:15:59 -0400 Subject: [PATCH 53/56] . --- .../langchain_core/language_models/fake_chat_models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index 3430db60f2d43..e0c49021fc861 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -19,7 +19,7 @@ class FakeMessagesListChatModel(BaseChatModel): - """Fake ChatModel for testing purposes.""" + """Fake ``ChatModel`` for testing purposes.""" responses: list[BaseMessage] """List of responses to **cycle** through in order.""" @@ -211,10 +211,11 @@ class GenericFakeChatModel(BaseChatModel): """Generic fake chat model that can be used to test the chat model interface. * Chat model should be usable in both sync and async tests - * Invokes on_llm_new_token to allow for testing of callback related code for new + * Invokes ``on_llm_new_token`` to allow for testing of callback related code for new tokens. * Includes logic to break messages into message chunk to facilitate testing of streaming. + """ messages: Iterator[Union[AIMessage, str]] @@ -229,6 +230,7 @@ class GenericFakeChatModel(BaseChatModel): .. warning:: Streaming is not implemented yet. We should try to implement it in the future by delegating to invoke and then breaking the resulting output into message chunks. + """ @override @@ -352,6 +354,7 @@ class ParrotFakeChatModel(BaseChatModel): """Generic fake chat model that can be used to test the chat model interface. * Chat model should be usable in both sync and async tests + """ @override From 1062ad9b8e57928aa234757188314252f89df7a6 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 19 Aug 2025 15:56:30 -0400 Subject: [PATCH 54/56] . --- libs/core/tests/unit_tests/stubs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libs/core/tests/unit_tests/stubs.py b/libs/core/tests/unit_tests/stubs.py index 5cd45afb41f48..57759ec9d47ff 100644 --- a/libs/core/tests/unit_tests/stubs.py +++ b/libs/core/tests/unit_tests/stubs.py @@ -15,34 +15,35 @@ def __eq__(self, other: object) -> bool: # The code below creates version of pydantic models # that will work in unit tests with AnyStr as id field + # Please note that the `id` field is assigned AFTER the model is created # to workaround an issue with pydantic ignoring the __eq__ method on # subclassed strings. def _any_id_document(**kwargs: Any) -> Document: - """Create a document with an id field.""" + """Create a `Document` with an id field.""" message = Document(**kwargs) message.id = AnyStr() return message def _any_id_ai_message(**kwargs: Any) -> AIMessage: - """Create ai message with an any id field.""" + """Create an `AIMessage` with an any id field.""" message = AIMessage(**kwargs) message.id = AnyStr() return message def _any_id_ai_message_chunk(**kwargs: Any) -> AIMessageChunk: - """Create ai message with an any id field.""" + """Create an `AIMessageChunk` with an any id field.""" message = AIMessageChunk(**kwargs) message.id = AnyStr() return message def _any_id_human_message(**kwargs: Any) -> HumanMessage: - """Create a human with an any id field.""" + """Create a `HumanMessage` with an any id field.""" message = HumanMessage(**kwargs) message.id = AnyStr() return message From 4d19be3ec9cb57859efa23ae6976f19844b5ecaf Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 25 Aug 2025 14:10:55 -0400 Subject: [PATCH 55/56] . --- libs/core/langchain_core/messages/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 6e2d3800d6647..49bc1121d85f3 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -21,8 +21,7 @@ class BaseMessage(Serializable): """Base abstract message class. - Messages are the inputs and outputs of ChatModels. - + Messages are the inputs and outputs of ``ChatModel``s. """ content: Union[str, list[Union[str, dict]]] From ee7391ba791d1cf1f66196521cafade79d938c35 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 25 Aug 2025 14:22:47 -0400 Subject: [PATCH 56/56] . --- libs/core/langchain_core/messages/ai.py | 8 +++++++- libs/core/langchain_core/messages/base.py | 8 +++++++- libs/core/langchain_core/messages/human.py | 11 ++++++++--- libs/core/langchain_core/messages/tool.py | 12 ++++++++---- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 615d334e5f9d7..7efb8565cb382 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -184,10 +184,16 @@ def __init__( **kwargs: Any, ) -> None: ... + def __init__( + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: + """Initialize AIMessage. + Args: content: The content of the message. kwargs: Additional arguments to pass to the parent class. - """ super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 49bc1121d85f3..3c7199ae060a1 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -74,9 +74,15 @@ def __init__( **kwargs: Any, ) -> None: ... + def __init__( + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: + """Initialize BaseMessage. + Args: content: The string contents of the message. - """ super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index a3ad7786c03ea..02383f91a876f 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -1,8 +1,7 @@ """Human message.""" -from typing import Any, Literal, Optional, Union, cast, overload +from typing import Any, Literal, Union, overload -from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk @@ -54,10 +53,16 @@ def __init__( **kwargs: Any, ) -> None: ... + def __init__( + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: + """Initialize HumanMessage. + Args: content: The string contents of the message. kwargs: Additional fields to pass to the message. - """ super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 72c469c793196..37c43d99fa528 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -1,15 +1,13 @@ """Messages for tools.""" import json -from typing import Any, Literal, Optional, Union, cast, overload +from typing import Any, Literal, Optional, Union, overload from uuid import UUID from pydantic import Field, model_validator from typing_extensions import NotRequired, TypedDict, override -from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content -from langchain_core.messages.content import InvalidToolCall as InvalidToolCall from langchain_core.utils._merge import merge_dicts, merge_obj @@ -150,10 +148,16 @@ def __init__( **kwargs: Any, ) -> None: ... + def __init__( + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: + """Initialize ToolMessage. + Args: content: The string contents of the message. **kwargs: Additional fields. - """ super().__init__(content=content, **kwargs)