Skip to content

Commit 5dd1ba6

Browse files
authored
Replace try_import sentencepiece with direct import sentencepiece (#1961)
* direct import sentencepiece * direct import sentencepiece
1 parent 519f552 commit 5dd1ba6

File tree

14 files changed

+32
-39
lines changed

14 files changed

+32
-39
lines changed

paddlenlp/transformers/albert/tokenizer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
import unicodedata
1919
from shutil import copyfile
2020

21-
from paddle.utils import try_import
21+
import sentencepiece as spm
22+
2223
from .. import PretrainedTokenizer, BertTokenizer, AddedToken
2324

2425
__all__ = ['AlbertTokenizer']
@@ -581,8 +582,6 @@ def __init__(self,
581582
self.remove_space = remove_space
582583
self.keep_accents = keep_accents
583584
self.sentencepiece_model_file = sentencepiece_model_file
584-
585-
spm = try_import("sentencepiece")
586585
self.sp_model = spm.SentencePieceProcessor()
587586
self.sp_model.Load(sentencepiece_model_file)
588587

@@ -597,7 +596,6 @@ def __getstate__(self):
597596

598597
def __setstate__(self, d):
599598
self.__dict__ = d
600-
spm = try_import("sentencepiece")
601599
self.sp_model = spm.SentencePieceProcessor()
602600
self.sp_model.Load(self.sentencepiece_model_file)
603601

paddlenlp/transformers/bart/tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
import itertools
15+
1616
from paddle.utils import try_import
1717
from .. import GPTTokenizer, AddedToken
1818

paddlenlp/transformers/bigbird/tokenizer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
import six
1919
import re
2020
import numpy as np
21-
from paddle.utils import try_import
21+
22+
import sentencepiece as spm
2223
from paddlenlp.data import Vocab
24+
2325
from .. import PretrainedTokenizer, AddedToken
2426

2527
__all__ = ['BigBirdTokenizer']
@@ -95,8 +97,7 @@ def __init__(self,
9597
"`tokenizer = BigBirdTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
9698
.format(sentencepiece_model_file))
9799
self.encoding = encoding
98-
mod = try_import('sentencepiece')
99-
self.sp_model = mod.SentencePieceProcessor()
100+
self.sp_model = spm.SentencePieceProcessor()
100101
if os.path.isfile(sentencepiece_model_file):
101102
self.sp_model.Load(sentencepiece_model_file)
102103
vocab_dict = {}

paddlenlp/transformers/ernie/tokenizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
import six
1919
import shutil
2020

21-
from paddle.utils import try_import
21+
import sentencepiece as spm
22+
2223
from paddlenlp.utils.env import MODEL_HOME
2324

2425
from .. import BasicTokenizer, PretrainedTokenizer, WordpieceTokenizer
@@ -425,8 +426,7 @@ def __init__(self,
425426
cls_token="[CLS]",
426427
mask_token="[MASK]",
427428
**kwargs):
428-
mod = try_import('sentencepiece')
429-
self.sp_model = mod.SentencePieceProcessor()
429+
self.sp_model = spm.SentencePieceProcessor()
430430
self.word_dict = word_dict
431431

432432
self.do_lower_case = do_lower_case

paddlenlp/transformers/ernie_m/tokenizer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import os
1616

17-
from paddle.utils import try_import
17+
import sentencepiece as spm
1818

1919
from .. import PretrainedTokenizer
2020
from ..tokenizer_utils import _is_control, _is_whitespace
@@ -91,8 +91,7 @@ def __init__(self,
9191
pad_token="[PAD]",
9292
cls_token="[CLS]",
9393
mask_token="[MASK]"):
94-
mod = try_import('sentencepiece')
95-
self.sp_model = mod.SentencePieceProcessor()
94+
self.sp_model = spm.SentencePieceProcessor()
9695

9796
self.do_lower_case = do_lower_case
9897
self.encoding = encoding

paddlenlp/transformers/gpt/tokenizer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@
1919
import json
2020
import jieba
2121
import shutil
22+
import sentencepiece as spm
2223
from paddle.utils import try_import
2324

24-
from paddlenlp.utils.log import logger
25-
2625
from .. import PretrainedTokenizer, AddedToken
2726

2827
__all__ = [
@@ -135,8 +134,8 @@ def __init__(
135134
"`tokenizer = GPTTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
136135
.format(model_file))
137136
self.max_len = max_len if max_len is not None else int(1e12)
138-
mod = try_import("sentencepiece")
139-
self.sp = mod.SentencePieceProcessor(model_file=model_file)
137+
self.sp = spm.SentencePieceProcessor()
138+
self.sp.Load(model_file)
140139
self.translator = str.maketrans(" \n", "\u2582\u2583")
141140

142141
'''

paddlenlp/transformers/layoutxlm/tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
import itertools
1818
from dataclasses import dataclass, field
1919
from collections import OrderedDict
20-
from paddle.utils import try_import
2120
from typing import List, Optional
2221

22+
import sentencepiece as spm
23+
2324
from .. import PretrainedTokenizer, AddedToken
2425
from ..tokenizer_utils import _is_punctuation, _is_control, _is_whitespace
2526

@@ -90,7 +91,6 @@ def __init__(self,
9091
self._unk_token = unk_token
9192
self._pad_token = pad_token
9293
self._mask_token = mask_token
93-
spm = try_import("sentencepiece")
9494
self.sp_model = spm.SentencePieceProcessor()
9595
self.sp_model.Load(vocab_file)
9696
self.vocab_file = vocab_file

paddlenlp/transformers/mbart/tokenizer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
import itertools
1616
from contextlib import contextmanager
17-
from paddle.utils import try_import
17+
import sentencepiece as spm
18+
1819
from .. import PretrainedTokenizer, AddedToken
1920

2021
__all__ = ['MBartTokenizer']
@@ -239,7 +240,6 @@ def __init__(self,
239240
mask_token, lstrip=True,
240241
rstrip=False) if isinstance(mask_token, str) else mask_token
241242
self._build_special_tokens_map_extended(mask_token=mask_token)
242-
spm = try_import('sentencepiece')
243243
self.sp_model = spm.SentencePieceProcessor()
244244
self.sp_model.Load(str(vocab_file))
245245
self.fairseq_offset = 1
@@ -432,7 +432,6 @@ def __init__(self,
432432
mask_token, lstrip=True,
433433
rstrip=False) if isinstance(mask_token, str) else mask_token
434434
self._build_special_tokens_map_extended(mask_token=mask_token)
435-
spm = try_import('sentencepiece')
436435
self.sp_model = spm.SentencePieceProcessor()
437436
self.sp_model.Load(str(vocab_file))
438437
self.fairseq_offset = 1

paddlenlp/transformers/reformer/tokenizer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from paddle.utils import try_import
15+
import sentencepiece as spm
16+
1617
from ..albert.tokenizer import AlbertEnglishTokenizer
1718

1819
__all__ = ['ReformerTokenizer']
@@ -76,8 +77,6 @@ def __init__(self,
7677
self.remove_space = remove_space
7778
self.keep_accents = keep_accents
7879
self.sentencepiece_model_file = sentencepiece_model_file
79-
80-
spm = try_import("sentencepiece")
8180
self.sp_model = spm.SentencePieceProcessor()
8281
self.sp_model.Load(sentencepiece_model_file)
8382

paddlenlp/transformers/rembert/tokenizer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@
1616
from shutil import copyfile
1717
from typing import List, Optional, Tuple
1818

19-
try:
20-
import sentencepiece as spm
21-
except:
22-
pass
19+
import sentencepiece as spm
20+
2321
from .. import PretrainedTokenizer
2422

2523
__all__ = ['RemBertTokenizer']

0 commit comments

Comments
 (0)