@@ -538,10 +538,12 @@ async def chat_completion_stream_generator(
538
538
# any tokens that were generated but previously
539
539
# matched by partial json parsing
540
540
# only happens if we are NOT using guided decoding
541
+ auto_tools_called = False
541
542
if tool_parser :
542
- index = len (
543
- tool_parser .prev_tool_call_arr ) - 1 if len (
544
- tool_parser .prev_tool_call_arr ) > 0 else 0
543
+ auto_tools_called = len (
544
+ tool_parser .prev_tool_call_arr ) > 0
545
+ index = len (tool_parser .prev_tool_call_arr
546
+ ) - 1 if auto_tools_called else 0
545
547
else :
546
548
index = 0
547
549
@@ -576,9 +578,7 @@ async def chat_completion_stream_generator(
576
578
delta = delta_message ,
577
579
logprobs = logprobs ,
578
580
finish_reason = output .finish_reason
579
- if not (tool_parser
580
- and len (tool_parser .prev_tool_call_arr ))
581
- else "tool_calls" ,
581
+ if not auto_tools_called else "tool_calls" ,
582
582
stop_reason = output .stop_reason )
583
583
chunk = ChatCompletionStreamResponse (
584
584
id = request_id ,
@@ -680,8 +680,10 @@ async def chat_completion_full_generator(
680
680
else :
681
681
logprobs = None
682
682
683
- # by default, tools are not used.
684
- tools_called = False
683
+ # In the OpenAI API the finish_reason is "tools_called"
684
+ # if the tool choice is auto and the model produced a tool
685
+ # call. The same is not true for named function calls
686
+ auto_tools_called = False
685
687
686
688
# if auto tools are not enabled, and a named tool choice using
687
689
# outlines is not being used
@@ -703,7 +705,6 @@ async def chat_completion_full_generator(
703
705
name = request .tool_choice .function .name ,
704
706
arguments = output .text ))
705
707
])
706
- tools_called = True
707
708
708
709
# if the request doesn't use tool choice
709
710
# OR specifies to not use a tool
@@ -725,7 +726,10 @@ async def chat_completion_full_generator(
725
726
726
727
tool_call_info = tool_parser .extract_tool_calls (
727
728
output .text , request = request )
728
- tools_called = tool_call_info .tools_called
729
+ # In the OpenAI API the finish_reason is "tools_called"
730
+ # if the tool choice is auto and the model produced a tool
731
+ # call. The same is not true for named function calls
732
+ auto_tools_called = tool_call_info .tools_called
729
733
if tool_call_info .tools_called :
730
734
message = ChatMessage (role = role ,
731
735
content = tool_call_info .content ,
@@ -748,7 +752,7 @@ async def chat_completion_full_generator(
748
752
index = output .index ,
749
753
message = message ,
750
754
logprobs = logprobs ,
751
- finish_reason = "tool_calls" if tools_called else
755
+ finish_reason = "tool_calls" if auto_tools_called else
752
756
output .finish_reason if output .finish_reason else "stop" ,
753
757
stop_reason = output .stop_reason )
754
758
choices .append (choice_data )
0 commit comments