Skip to content

[Fix]fix empty prompt_token_ids,update the parser's triggering condit… #2891

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions fastdeploy/input/ernie_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def process_request(self, request, max_model_len=None, **kwargs):
else:
request.prompt_token_ids = self.messages2ids(request.to_dict())

if len(request.prompt_token_ids) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
if max_model_len is not None and len(
request.prompt_token_ids) > max_model_len:
request.prompt_token_ids = request.prompt_token_ids[:
Expand Down Expand Up @@ -170,6 +172,8 @@ def process_request_dict(self, request, max_model_len=None):
)
else:
request['prompt_token_ids'] = self.messages2ids(request)
if len(request['prompt_token_ids']) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")

# 截断超过长度限制的prompt
if max_model_len is not None and len(
Expand All @@ -196,7 +200,6 @@ def process_response(self, response_dict, **kwargs):
Returns:
Dict: response contain text fields
"""

req_id = response_dict.request_id
token_ids = response_dict.outputs.token_ids

Expand Down Expand Up @@ -245,6 +248,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
Returns:
Dict: response contain text fields
"""
enable_thinking = kwargs.get("enable_thinking")
token_ids = response_dict["outputs"]["token_ids"]
is_end = response_dict["finished"]
req_id = response_dict["request_id"]
Expand All @@ -254,7 +258,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
if is_end:
full_text = previous_texts + delta_text
if self.reasoning_parser:
if enable_thinking and self.reasoning_parser:
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
full_text, response_dict)
response_dict["outputs"]["text"] = text
Expand Down
5 changes: 5 additions & 0 deletions fastdeploy/input/mm_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from .utils.render_timestamp import render_frame_timestamp
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.utils import data_processor_logger

IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3}

Expand Down Expand Up @@ -241,6 +242,8 @@ def request2ids(self, request: Dict[str, Any],tgts: List[str]=None) -> Dict[str,
image_message_list.append(item)

prompt_token_ids = self.apply_chat_template(request)
if len(prompt_token_ids) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
image_start_index = 0
image_message_index = 0
for i in range(len(prompt_token_ids)):
Expand Down Expand Up @@ -481,4 +484,6 @@ def apply_chat_template(self, request):
).replace("<|image@placeholder|>", "").replace("<|video@placeholder|>", "")
tokens = self.tokenizer.tokenize(prompt_token_str)
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
data_processor_logger.info(
f"req_id:{request.get('request_id', ''),} tokens:{tokens}, token_ids: {token_ids}")
return token_ids
8 changes: 6 additions & 2 deletions fastdeploy/input/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ def process_request(self, request, max_model_len=None, **kwargs):
raise ValueError(
f"The request should have `input_ids`, `text` or `messages`: {request}."
)
if len(request.prompt_token_ids) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
if request.get("max_tokens") is None:
request.set("max_tokens",
max(1, max_model_len - len(request.prompt_token_ids)))
Expand Down Expand Up @@ -299,7 +301,8 @@ def process_request_dict(self, request, max_model_len=None, **kwargs):
raise ValueError(
f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}"
)

if len(request['prompt_token_ids']) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
if request.get("max_tokens") is None:
request["max_tokens"] = max(
1, max_model_len - len(request['prompt_token_ids']))
Expand Down Expand Up @@ -352,6 +355,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
Returns:
Dict: response contain text fields
"""
enable_thinking = kwargs.get("enable_thinking")
token_ids = response_dict["outputs"]["token_ids"]
is_end = response_dict["finished"]
req_id = response_dict["request_id"]
Expand All @@ -361,7 +365,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
if is_end:
full_text = previous_texts + delta_text
if self.reasoning_parser:
if enable_thinking and self.reasoning_parser:
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
full_text, response_dict)
response_dict["outputs"]["text"] = text
Expand Down