Skip to content

Commit d6182ff

Browse files
authored
Update rl_dataset.py with apply_chat_template (#10510)
1 parent 4821b63 commit d6182ff

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

paddlenlp/datasets/rlhf_datasets/rl_dataset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,11 @@ def __init__(
9494
chat_template_func=None,
9595
splits=None,
9696
filter_overlong_prompts=True,
97+
apply_chat_template=False,
9798
):
9899
self.dataset_name_or_path = dataset_name_or_path
99100
self.tokenizer = tokenizer
101+
self.apply_chat_template = apply_chat_template
100102

101103
self.max_prompt_len = max_prompt_len
102104
self.filter_prompts = filter_prompts
@@ -154,7 +156,7 @@ def __getitem__(self, index: int) -> dict[str, paddle.Tensor]:
154156
data = {}
155157
raw_sample = self.rawdata[index]
156158
prompt = raw_sample[self.prompt_key]
157-
if self.tokenizer.chat_template:
159+
if self.apply_chat_template and self.tokenizer.chat_template:
158160
prompt = self.tokenizer.apply_chat_template(prompt, add_generation_prompt=True, tokenize=False)
159161

160162
data["input_ids"] = self.tokenize(text=prompt, max_length=self.max_prompt_len, truncation=True)

0 commit comments

Comments
 (0)