Skip to content

WIP: Adds tests for BedrockChatGenerator not working with multi-turn conversations + thinking enabled #2094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions integrations/amazon_bedrock/tests/test_chat_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,57 @@ def test_live_run_with_multi_tool_calls(self, model_name, tools):
assert "paris" in final_message.text.lower()
assert "berlin" in final_message.text.lower()

@pytest.mark.skip(reason="This fails because we are missing the reasoning content in the second round of messages")
def test_live_run_with_tool_call_and_thinking(self, tools):
initial_messages = [ChatMessage.from_user("What's the weather like in Paris?")]
component = AmazonBedrockChatGenerator(
model="arn:aws:bedrock:us-east-1::inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
tools=tools,
generation_kwargs={
"maxTokens": 8192,
"thinking": {
"type": "enabled",
"budget_tokens": 1024,
},
},
)
results = component.run(messages=initial_messages)

assert len(results["replies"]) > 0, "No replies received"

# Find the message with tool calls
tool_call_message = None
for message in results["replies"]:
if message.tool_calls:
tool_call_message = message
break

assert tool_call_message is not None, "No message with tool call found"
assert isinstance(tool_call_message, ChatMessage), "Tool message is not a ChatMessage instance"
assert ChatMessage.is_from(tool_call_message, ChatRole.ASSISTANT), "Tool message is not from the assistant"

tool_calls = tool_call_message.tool_calls
assert len(tool_calls) == 1
assert tool_calls[0].id, "Tool call does not contain value for 'id' key"
assert tool_calls[0].tool_name == "weather"
assert tool_calls[0].arguments["city"] == "Paris"
assert tool_call_message.meta["finish_reason"] == "tool_use"

# Mock the response we'd get from ToolInvoker
tool_result_messages = [
ChatMessage.from_tool(tool_result="22° C", origin=tool_call) for tool_call in tool_calls
]

new_messages = [*initial_messages, tool_call_message, *tool_result_messages]
results = component.run(new_messages)

assert len(results["replies"]) == 1
final_message = results["replies"][0]
assert not final_message.tool_call
assert len(final_message.text) > 0
assert "paris" in final_message.text.lower()
assert "berlin" in final_message.text.lower()

@pytest.mark.parametrize("model_name", STREAMING_TOOL_MODELS)
def test_live_run_with_multi_tool_calls_streaming(self, model_name, tools):
"""
Expand Down
69 changes: 69 additions & 0 deletions integrations/amazon_bedrock/tests/test_chat_generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,75 @@ def test_extract_replies_from_multi_tool_response(self, mock_boto3_session):
)
assert replies[0] == expected_message

def test_extract_replies_from_multi_tool_response_with_thinking(self, mock_boto3_session):
model = "arn:aws:bedrock:us-east-1::inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
response_body = {
"ResponseMetadata": {
"RequestId": "d7be81a1-5d37-40fe-936a-7c96e850cdda",
"HTTPStatusCode": 200,
"HTTPHeaders": {
"date": "Tue, 15 Jul 2025 12:49:56 GMT",
"content-type": "application/json",
"content-length": "1107",
"connection": "keep-alive",
"x-amzn-requestid": "d7be81a1-5d37-40fe-936a-7c96e850cdda",
},
"RetryAttempts": 0,
},
"output": {
"message": {
"role": "assistant",
"content": [
{
"reasoningContent": {
"reasoningText": {
"text": "The user wants to know the weather in Paris. I have a `weather` function "
"available that can provide this information. \n\nRequired parameters for "
"the weather function:\n- city: The city to get the weather for\n\nIn this "
'case, the user has clearly specified "Paris" as the city, so I have all '
"the required information to make the function call.",
"signature": "...",
}
}
},
{"text": "I'll check the current weather in Paris for you."},
{
"toolUse": {
"toolUseId": "tooluse_iUqy8-ypSByLK5zFkka8uA",
"name": "weather",
"input": {"city": "Paris"},
}
},
],
}
},
"stopReason": "tool_use",
"usage": {
"inputTokens": 412,
"outputTokens": 146,
"totalTokens": 558,
"cacheReadInputTokens": 0,
"cacheWriteInputTokens": 0,
},
"metrics": {"latencyMs": 4811},
}
replies = _parse_completion_response(response_body, model)

# TODO We are missing the reasoning content in the ChatMessage
expected_message = ChatMessage.from_assistant(
text="I'll check the current weather in Paris for you.",
tool_calls=[
ToolCall(tool_name="weather", arguments={"city": "Paris"}, id="tooluse_iUqy8-ypSByLK5zFkka8uA"),
],
meta={
"model": "arn:aws:bedrock:us-east-1::inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
"index": 0,
"finish_reason": "tool_use",
"usage": {"prompt_tokens": 412, "completion_tokens": 146, "total_tokens": 558},
},
)
assert replies[0] == expected_message

def test_process_streaming_response_one_tool_call(self, mock_boto3_session):
"""
Test that process_streaming_response correctly handles streaming events and accumulates responses
Expand Down
Loading