From 469b1d74c47a681356bb899daf901d2d9ee654e0 Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Thu, 17 Jul 2025 13:02:58 +0800 Subject: [PATCH 1/2] [Fix]fix empty prompt_token_ids,update the parser's triggering condition,add prompt log --- fastdeploy/input/ernie_processor.py | 8 ++++++-- fastdeploy/input/mm_processor/process.py | 5 +++++ fastdeploy/input/text_processor.py | 8 ++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index d4e45712bc..d14f7cfca4 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -114,6 +114,8 @@ def process_request(self, request, max_model_len=None, **kwargs): else: request.prompt_token_ids = self.messages2ids(request.to_dict()) + if len(request.prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") if max_model_len is not None and len( request.prompt_token_ids) > max_model_len: request.prompt_token_ids = request.prompt_token_ids[: @@ -170,6 +172,8 @@ def process_request_dict(self, request, max_model_len=None): ) else: request['prompt_token_ids'] = self.messages2ids(request) + if len(request['prompt_token_ids']) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") # 截断超过长度限制的prompt if max_model_len is not None and len( @@ -196,7 +200,6 @@ def process_response(self, response_dict, **kwargs): Returns: Dict: response contain text fields """ - req_id = response_dict.request_id token_ids = response_dict.outputs.token_ids @@ -245,6 +248,7 @@ def process_response_dict_normal(self, response_dict, **kwargs): Returns: Dict: response contain text fields """ + enable_thinking = kwargs.get("enable_thinking") token_ids = response_dict["outputs"]["token_ids"] is_end = response_dict["finished"] req_id = response_dict["request_id"] @@ -254,7 +258,7 @@ def process_response_dict_normal(self, response_dict, **kwargs): delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) if is_end: full_text = previous_texts + delta_text - if self.reasoning_parser: + if enable_thinking and self.reasoning_parser: reasoning_content, text = self.reasoning_parser.extract_reasoning_content( full_text, response_dict) response_dict["outputs"]["text"] = text diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index ea556900ba..2daa23562e 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -34,6 +34,7 @@ from .utils.render_timestamp import render_frame_timestamp from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer from fastdeploy.entrypoints.chat_utils import parse_chat_messages +from fastdeploy.utils import data_processor_logger IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} @@ -236,6 +237,8 @@ def request2ids(self, request: Dict[str, Any]) -> Dict[str, Union[np.ndarray, Li image_message_list.append(item) prompt_token_ids = self.apply_chat_template(request) + if len(prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") image_start_index = 0 image_message_index = 0 for i in range(len(prompt_token_ids)): @@ -452,4 +455,6 @@ def apply_chat_template(self, request): ).replace("<|image@placeholder|>", "").replace("<|video@placeholder|>", "") tokens = self.tokenizer.tokenize(prompt_token_str) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + data_processor_logger.info( + f"req_id:{request.get('request_id', ''),} tokens:{tokens}, token_ids: {token_ids}") return token_ids \ No newline at end of file diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index 9d30dee3e8..2bbc9fabe4 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -252,6 +252,8 @@ def process_request(self, request, max_model_len=None, **kwargs): raise ValueError( f"The request should have `input_ids`, `text` or `messages`: {request}." ) + if len(request.prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") if request.get("max_tokens") is None: request.set("max_tokens", max(1, max_model_len - len(request.prompt_token_ids))) @@ -299,7 +301,8 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): raise ValueError( f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}" ) - + if len(request['prompt_token_ids']) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") if request.get("max_tokens") is None: request["max_tokens"] = max( 1, max_model_len - len(request['prompt_token_ids'])) @@ -352,6 +355,7 @@ def process_response_dict_normal(self, response_dict, **kwargs): Returns: Dict: response contain text fields """ + enable_thinking = kwargs.get("enable_thinking") token_ids = response_dict["outputs"]["token_ids"] is_end = response_dict["finished"] req_id = response_dict["request_id"] @@ -361,7 +365,7 @@ def process_response_dict_normal(self, response_dict, **kwargs): delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) if is_end: full_text = previous_texts + delta_text - if self.reasoning_parser: + if enable_thinking and self.reasoning_parser: reasoning_content, text = self.reasoning_parser.extract_reasoning_content( full_text, response_dict) response_dict["outputs"]["text"] = text From f68c0095671035a592aeaba73a92d359f1ec8292 Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 18 Jul 2025 13:32:21 +0800 Subject: [PATCH 2/2] [Fix]fix empty prompt_token_ids,update the parser's triggering condition,add prompt log --- fastdeploy/input/mm_processor/process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index 04caeebde5..1ad056c008 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -485,5 +485,5 @@ def apply_chat_template(self, request): tokens = self.tokenizer.tokenize(prompt_token_str) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) data_processor_logger.info( - f"req_id:{request.get('request_id', ''),} tokens:{tokens}, token_ids: {token_ids}") + f"req_id:{request.get('request_id', ''),} tokens: {tokens}, token_ids: {token_ids}") return token_ids \ No newline at end of file