Update rl_dataset.py with apply_chat_template (#10510)

ZHUI · web-flow · commit d6182ffa65ff · 2025-04-28T16:04:32.000+08:00
diff --git a/paddlenlp/datasets/rlhf_datasets/rl_dataset.py b/paddlenlp/datasets/rlhf_datasets/rl_dataset.py
@@ -94,9 +94,11 @@ def __init__(
         chat_template_func=None,
         splits=None,
         filter_overlong_prompts=True,
+        apply_chat_template=False,
     ):
         self.dataset_name_or_path = dataset_name_or_path
         self.tokenizer = tokenizer
+        self.apply_chat_template = apply_chat_template
 
         self.max_prompt_len = max_prompt_len
         self.filter_prompts = filter_prompts
@@ -154,7 +156,7 @@ def __getitem__(self, index: int) -> dict[str, paddle.Tensor]:
             data = {}
             raw_sample = self.rawdata[index]
             prompt = raw_sample[self.prompt_key]
-            if self.tokenizer.chat_template:
+            if self.apply_chat_template and self.tokenizer.chat_template:
                 prompt = self.tokenizer.apply_chat_template(prompt, add_generation_prompt=True, tokenize=False)
 
             data["input_ids"] = self.tokenize(text=prompt, max_length=self.max_prompt_len, truncation=True)