Skip to content

Commit 6ce675e

Browse files
fix performance issue in convert_ids_to_tokens (#37773)
1 parent 57c620b commit 6ce675e

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

src/transformers/tokenization_utils_fast.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,9 +410,11 @@ def convert_ids_to_tokens(
410410
if isinstance(ids, int):
411411
return self._tokenizer.id_to_token(ids)
412412
tokens = []
413+
# self.all_special_ids is an @property which may be slow, so only compute it once before the loop
414+
ids_to_skip = set(self.all_special_ids) if skip_special_tokens else set()
413415
for index in ids:
414416
index = int(index)
415-
if skip_special_tokens and index in self.all_special_ids:
417+
if index in ids_to_skip:
416418
continue
417419
tokens.append(self._tokenizer.id_to_token(index))
418420
return tokens

0 commit comments

Comments
 (0)