add INFERENCE_TRUNCATED_RETURN_EOS

gongel · gongel · commit 6fc691dfc26f · 2025-05-23T11:45:10.000+08:00
diff --git a/llm/predict/predictor.py b/llm/predict/predictor.py
@@ -1338,6 +1338,8 @@ def predict_dy_insert(
         repeat_num=1,
         **kwargs
     ):
+        # NOTE(gongenlei): The output of the ultra-long truncation does not return an eos_token
+        os.environ["INFERENCE_TRUNCATED_RETURN_EOS"] = "0"
         assert repeat_num >= 1
         flag_current_rank_run = self.tensor_parallel_rank == 0 or all_rank_return
         self.input_ids = []
diff --git a/paddlenlp/rl/utils/infer_utils.py b/paddlenlp/rl/utils/infer_utils.py
@@ -16,7 +16,6 @@
 
 import copy
 import inspect
-import os
 from contextlib import contextmanager
 
 import paddle
@@ -94,8 +93,7 @@ def predict(self, input_ids: paddle.Tensor = None, repeat_num=1, **kwargs):
         for row in input_ids:
             row_ids = process_row(row, remove_value=self.tokenizer.pad_token_id, remove_side="left").tolist()
             input_ids_list.append(row_ids)
-        # NOTE(gongenlei): The output of the ultra-long truncation does not return an eos_token
-        os.environ["INFERENCE_TRUNCATED_RETURN_EOS"] = "0"
+
         if self.config.dynamic_insert:
             outputs = self.predict_dy_insert(
                 input_ids=input_ids_list,