diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 872b52e083..1ccf3e13fd 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -111,6 +111,8 @@ def process_request(self, request, max_model_len=None, **kwargs): else: request.prompt_token_ids = self.messages2ids(request.to_dict()) + if len(request.prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") if max_model_len is not None and len(request.prompt_token_ids) > max_model_len: request.prompt_token_ids = request.prompt_token_ids[: max_model_len - 1] if request.get("max_tokens") is None: @@ -160,7 +162,9 @@ def process_request_dict(self, request, max_model_len=None): req_id = request.get("request_id", None) data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") else: - request["prompt_token_ids"] = self.messages2ids(request) + request['prompt_token_ids'] = self.messages2ids(request) + if len(request['prompt_token_ids']) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") # truncate prompts that exceed the length limit if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len: @@ -184,7 +188,6 @@ def process_response(self, response_dict, **kwargs): Returns: Dict: response contain text fields """ - req_id = response_dict.request_id token_ids = response_dict.outputs.token_ids @@ -228,6 +231,7 @@ def process_response_dict_normal(self, response_dict, **kwargs): Returns: Dict: response contain text fields """ + enable_thinking = kwargs.get("enable_thinking") token_ids = response_dict["outputs"]["token_ids"] is_end = response_dict["finished"] req_id = response_dict["request_id"] @@ -237,8 +241,9 @@ def process_response_dict_normal(self, response_dict, **kwargs): delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) if is_end: full_text = previous_texts + delta_text - if self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) + if enable_thinking and self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content( + full_text, response_dict) response_dict["outputs"]["text"] = text response_dict["outputs"]["reasoning_content"] = reasoning_content else: diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index 23c2828c0f..566921a601 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -27,6 +27,7 @@ from fastdeploy.entrypoints.chat_utils import parse_chat_messages from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer +from fastdeploy.utils import data_processor_logger from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor from .process_video import read_frames_decord, read_video_decord @@ -252,6 +253,8 @@ def request2ids( image_message_list.append(item) prompt_token_ids = self.apply_chat_template(request) + if len(prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") image_start_index = 0 image_message_index = 0 for i in range(len(prompt_token_ids)): @@ -503,4 +506,6 @@ def apply_chat_template(self, request): ) tokens = self.tokenizer.tokenize(prompt_token_str) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + data_processor_logger.info( + f"req_id:{request.get('request_id', ''),} tokens: {tokens}, token_ids: {token_ids}") return token_ids diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index a60b0a7818..d4d70bbc3c 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -239,7 +239,11 @@ def process_request(self, request, max_model_len=None, **kwargs): task["enable_thinking"] = kwargs.get("enable_thinking", True) request.prompt_token_ids = self.messages2ids(task) else: - raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.") + raise ValueError( + f"The request should have `input_ids`, `text` or `messages`: {request}." + ) + if len(request.prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") if request.get("max_tokens") is None: request.set( "max_tokens", @@ -283,8 +287,11 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): raise ValueError("This model does not support chat_template.") request["prompt_token_ids"] = self.messages2ids(request) else: - raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") - + raise ValueError( + f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}" + ) + if len(request['prompt_token_ids']) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") if request.get("max_tokens") is None: request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) if request.get("temperature") < _SAMPLING_EPS: @@ -335,6 +342,7 @@ def process_response_dict_normal(self, response_dict, **kwargs): Returns: Dict: response contain text fields """ + enable_thinking = kwargs.get("enable_thinking") token_ids = response_dict["outputs"]["token_ids"] is_end = response_dict["finished"] req_id = response_dict["request_id"] @@ -344,8 +352,9 @@ def process_response_dict_normal(self, response_dict, **kwargs): delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) if is_end: full_text = previous_texts + delta_text - if self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) + if enable_thinking and self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content( + full_text, response_dict) response_dict["outputs"]["text"] = text response_dict["outputs"]["reasoning_content"] = reasoning_content else: