Skip to content

Commit ec10cb8

Browse files
authored
[BugFix] Fix tool call finish reason in streaming case (#9209)
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
1 parent d11b46f commit ec10cb8

File tree

1 file changed

+15
-11
lines changed

1 file changed

+15
-11
lines changed

vllm/entrypoints/openai/serving_chat.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -538,10 +538,12 @@ async def chat_completion_stream_generator(
538538
# any tokens that were generated but previously
539539
# matched by partial json parsing
540540
# only happens if we are NOT using guided decoding
541+
auto_tools_called = False
541542
if tool_parser:
542-
index = len(
543-
tool_parser.prev_tool_call_arr) - 1 if len(
544-
tool_parser.prev_tool_call_arr) > 0 else 0
543+
auto_tools_called = len(
544+
tool_parser.prev_tool_call_arr) > 0
545+
index = len(tool_parser.prev_tool_call_arr
546+
) - 1 if auto_tools_called else 0
545547
else:
546548
index = 0
547549

@@ -576,9 +578,7 @@ async def chat_completion_stream_generator(
576578
delta=delta_message,
577579
logprobs=logprobs,
578580
finish_reason=output.finish_reason
579-
if not (tool_parser
580-
and len(tool_parser.prev_tool_call_arr))
581-
else "tool_calls",
581+
if not auto_tools_called else "tool_calls",
582582
stop_reason=output.stop_reason)
583583
chunk = ChatCompletionStreamResponse(
584584
id=request_id,
@@ -680,8 +680,10 @@ async def chat_completion_full_generator(
680680
else:
681681
logprobs = None
682682

683-
# by default, tools are not used.
684-
tools_called = False
683+
# In the OpenAI API the finish_reason is "tools_called"
684+
# if the tool choice is auto and the model produced a tool
685+
# call. The same is not true for named function calls
686+
auto_tools_called = False
685687

686688
# if auto tools are not enabled, and a named tool choice using
687689
# outlines is not being used
@@ -703,7 +705,6 @@ async def chat_completion_full_generator(
703705
name=request.tool_choice.function.name,
704706
arguments=output.text))
705707
])
706-
tools_called = True
707708

708709
# if the request doesn't use tool choice
709710
# OR specifies to not use a tool
@@ -725,7 +726,10 @@ async def chat_completion_full_generator(
725726

726727
tool_call_info = tool_parser.extract_tool_calls(
727728
output.text, request=request)
728-
tools_called = tool_call_info.tools_called
729+
# In the OpenAI API the finish_reason is "tools_called"
730+
# if the tool choice is auto and the model produced a tool
731+
# call. The same is not true for named function calls
732+
auto_tools_called = tool_call_info.tools_called
729733
if tool_call_info.tools_called:
730734
message = ChatMessage(role=role,
731735
content=tool_call_info.content,
@@ -748,7 +752,7 @@ async def chat_completion_full_generator(
748752
index=output.index,
749753
message=message,
750754
logprobs=logprobs,
751-
finish_reason="tool_calls" if tools_called else
755+
finish_reason="tool_calls" if auto_tools_called else
752756
output.finish_reason if output.finish_reason else "stop",
753757
stop_reason=output.stop_reason)
754758
choices.append(choice_data)

0 commit comments

Comments
 (0)