Skip to content

Commit 5cc23f1

Browse files
committed
[Frontend] OpenAI Responses API supports Tool/Function calling
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
1 parent 28b6e98 commit 5cc23f1

File tree

6 files changed

+30
-12
lines changed

6 files changed

+30
-12
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,8 @@ async def init_app_state(
16311631
chat_template_content_format=args.chat_template_content_format,
16321632
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
16331633
enable_auto_tools=args.enable_auto_tool_choice,
1634+
exclude_tools_when_tool_choice_none=args.
1635+
exclude_tools_when_tool_choice_none,
16341636
tool_parser=args.tool_call_parser,
16351637
reasoning_parser=args.reasoning_parser,
16361638
enable_prompt_tokens_details=args.enable_prompt_tokens_details,

vllm/entrypoints/openai/serving_engine.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@
6868
MultiModalDataDict)
6969
from vllm.outputs import PoolingRequestOutput, RequestOutput
7070
from vllm.pooling_params import PoolingParams
71-
from vllm.prompt_adapter.request import PromptAdapterRequest
7271
from vllm.reasoning import ReasoningParser, ReasoningParserManager
7372
from vllm.sampling_params import BeamSearchParams, SamplingParams
7473
from vllm.sequence import Logprob, PromptLogprobs

vllm/entrypoints/openai/serving_responses.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def __init__(
6060
return_tokens_as_token_ids: bool = False,
6161
reasoning_parser: str = "",
6262
enable_auto_tools: bool = False,
63+
exclude_tools_when_tool_choice_none: bool = False,
6364
tool_parser: Optional[str] = None,
6465
enable_prompt_tokens_details: bool = False,
6566
enable_force_include_usage: bool = False,
@@ -81,6 +82,9 @@ def __init__(
8182

8283
self.tool_parser = self._get_tool_parser(
8384
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools)
85+
self.exclude_tools_when_tool_choice_none = (
86+
exclude_tools_when_tool_choice_none
87+
)
8488
self.enable_prompt_tokens_details = enable_prompt_tokens_details
8589
self.enable_force_include_usage = enable_force_include_usage
8690
self.default_sampling_params = (
@@ -138,7 +142,9 @@ async def create_responses(
138142
lora_request = self._maybe_get_adapters(request)
139143
model_name = self._get_model_name(request.model, lora_request)
140144
tokenizer = await self.engine_client.get_tokenizer(lora_request)
141-
if request.tools is None:
145+
if (request.tools is None or
146+
(request.tool_choice == "none"
147+
and self.exclude_tools_when_tool_choice_none)):
142148
tool_dicts = None
143149
else:
144150
tool_dicts = [tool.model_dump() for tool in request.tools]

vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@
66
from typing import Union
77

88
import regex as re
9-
9+
# yapf conflicts with isort for this block
10+
# yapf: disable
1011
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
1112
DeltaFunctionCall, DeltaMessage,
1213
DeltaToolCall,
1314
ExtractedToolCallInformation,
14-
FunctionCall, ToolCall)
15+
FunctionCall, ResponsesRequest,
16+
ToolCall)
17+
# yapf: enable
1518
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1619
ToolParser, ToolParserManager)
1720
from vllm.logger import init_logger
@@ -90,7 +93,7 @@ def _parse_arguments(self, args_text: str) -> str:
9093
def extract_tool_calls(
9194
self,
9295
model_output: str,
93-
request: ChatCompletionRequest,
96+
request: Union[ChatCompletionRequest, ResponsesRequest],
9497
) -> ExtractedToolCallInformation:
9598

9699
# sanity check; avoid unnecessary processing

vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@
77
from typing import Any, Optional, Union
88

99
import regex as re
10-
10+
# yapf conflicts with isort for this block
11+
# yapf: disable
1112
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
1213
DeltaFunctionCall, DeltaMessage,
1314
DeltaToolCall,
1415
ExtractedToolCallInformation,
15-
FunctionCall, ToolCall)
16+
FunctionCall, ResponsesRequest,
17+
ToolCall)
18+
# yapf: enable
1619
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1720
ToolParser, ToolParserManager)
1821
from vllm.entrypoints.openai.tool_parsers.utils import consume_space
@@ -86,8 +89,10 @@ def preprocess_model_output(
8689
return model_output, None
8790

8891
def extract_tool_calls(
89-
self, model_output: str,
90-
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
92+
self,
93+
model_output: str,
94+
request: Union[ChatCompletionRequest, ResponsesRequest],
95+
) -> ExtractedToolCallInformation:
9196
"""
9297
Extract tool calls from a complete model output.
9398
"""

vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@
77
from typing import Any, Optional, Union
88

99
import regex as re
10-
10+
# yapf conflicts with isort for this block
11+
# yapf: disable
1112
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
1213
ChatCompletionToolsParam,
1314
DeltaFunctionCall, DeltaMessage,
1415
DeltaToolCall,
1516
ExtractedToolCallInformation,
16-
FunctionCall, ToolCall)
17+
FunctionCall, ResponsesRequest,
18+
ToolCall)
19+
# yapf: enable
1720
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1821
ToolParser, ToolParserManager)
1922
from vllm.logger import init_logger
@@ -269,7 +272,7 @@ def _get_function_calls(self, model_output: str) -> list[str]:
269272
def extract_tool_calls(
270273
self,
271274
model_output: str,
272-
request: ChatCompletionRequest,
275+
request: Union[ChatCompletionRequest, ResponsesRequest],
273276
) -> ExtractedToolCallInformation:
274277
# Quick check to avoid unnecessary processing
275278
if self.tool_call_prefix not in model_output:

0 commit comments

Comments
 (0)