Skip to content

Commit 91aaa05

Browse files
committed
Fix
1 parent c2c41a3 commit 91aaa05

File tree

198 files changed

+361
-361
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

198 files changed

+361
-361
lines changed

docs/zh/locale/en/LC_MESSAGES/source/paddlenlp.taskflow.utils.po

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ msgid "word of current node."
276276
msgstr ""
277277

278278
#: of paddlenlp.taskflow.utils.BurkhardKellerTree:1
279-
msgid "Implementataion of BK-Tree"
279+
msgid "Implementation of BK-Tree"
280280
msgstr ""
281281

282282
#: of paddlenlp.taskflow.utils.BurkhardKellerTree.add:1
@@ -300,7 +300,7 @@ msgid "similar words."
300300
msgstr ""
301301

302302
#: of paddlenlp.taskflow.utils.TriedTree:1
303-
msgid "Implementataion of TriedTree"
303+
msgid "Implementation of TriedTree"
304304
msgstr ""
305305

306306
#: of paddlenlp.taskflow.utils.TriedTree.add_word:1

docs/zh/locale/en/LC_MESSAGES/source/paddlenlp.transformers.squeezebert.tokenizer.po

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ msgid ""
5050
msgstr ""
5151

5252
#: of paddlenlp.transformers.squeezebert.tokenizer.SqueezeBertTokenizer:11
53-
msgid "The special token for unkown words. Default: \"[UNK]\"."
53+
msgid "The special token for unknown words. Default: \"[UNK]\"."
5454
msgstr ""
5555

5656
#: of paddlenlp.transformers.squeezebert.tokenizer.SqueezeBertTokenizer:13

llm/experimental/ernie-3.5-se/modeling.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,12 +1380,12 @@ def forward(
13801380
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
13811381

13821382
def progressive_seq(x, y):
1383-
globel_step = int(os.getenv("TRAINER_GLOBAL_STEP", "0"))
1384-
if globel_step < 500:
1383+
global_step = int(os.getenv("TRAINER_GLOBAL_STEP", "0"))
1384+
if global_step < 500:
13851385
return x[:, :512], y[:, :512]
1386-
if globel_step < 1000:
1386+
if global_step < 1000:
13871387
return x[:, :1024], y[:, :1024]
1388-
if globel_step < 1500:
1388+
if global_step < 1500:
13891389
return x[:, :2048], y[:, :2048]
13901390
return x, y
13911391

paddlenlp/data/blendable_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(self, datasets, weights, size, share_folder, *, data_cache_path=Non
4343
assert sum_weights > 0.0
4444
weights /= sum_weights
4545

46-
# Build indicies.
46+
# Build indices.
4747
def _build_indices():
4848
start_time = time.time()
4949

paddlenlp/data/causal_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ def _build_sample_idx(sizes, doc_idx, seq_length, num_epochs, tokens_per_epoch):
677677
doc_offset += remaining_seq_length + doc_length - 1
678678
remaining_seq_length = 0
679679
else:
680-
# Otherwise, start from the begining of the next document.
680+
# Otherwise, start from the beginning of the next document.
681681
doc_idx_index += 1
682682
doc_offset = 0
683683
# Record the sequence.

paddlenlp/data/indexed_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def get_available_dataset_impl():
5555

5656
def make_dataset(path, impl, skip_warmup=False):
5757
if CompatibleIndexedDataset.exists(path):
58-
print("Using old dataet (.npy & .npz)")
58+
print("Using old dataset (.npy & .npz)")
5959
return CompatibleIndexedDataset(path)
6060
elif not IndexedDataset.exists(path):
6161
print(f"Dataset does not exist: {path}")
@@ -903,7 +903,7 @@ def __init__(self, path):
903903

904904
self._path = path
905905

906-
# All documment ids, extend as 1-D array.
906+
# All document ids, extend as 1-D array.
907907
self._token_ids = np.load(path + "_ids.npy", mmap_mode="r", allow_pickle=True)
908908
process_data = np.load(path + "_idx.npz")
909909
self._sizes = process_data["lens"]

paddlenlp/data/tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def cut(self, sentence, cut_all=False, use_hmm=True):
5858
The method used to cut the text to tokens.
5959
6060
Args:
61-
sentence(str): The text that needs to be cuted.
61+
sentence(str): The text that needs to be cut.
6262
cut_all(bool, optional): Whether to use the full mode. If True,
6363
using full mode that gets all the possible words from the
6464
sentence, which is fast but not accurate. If False, using
@@ -97,7 +97,7 @@ def encode(self, sentence, cut_all=False, use_hmm=True):
9797
ids using `vocab`.
9898
9999
Args:
100-
sentence(str): The text that needs to be cuted.
100+
sentence(str): The text that needs to be cut.
101101
cut_all(bool, optional): Whether to use the full mode. If True,
102102
using full mode that gets all the possible words from the
103103
sentence, which is fast but not accurate. If False, using

paddlenlp/data/vocab.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class Vocab(object):
4040
between tokens and indices to be used. If provided, adjust the tokens
4141
and indices mapping according to it. If None, counter must be provided.
4242
Default: None.
43-
unk_token (str, optional): Special token for unknow token. If no need,
43+
unk_token (str, optional): Special token for unknown token. If no need,
4444
it also could be None. Default: None.
4545
pad_token (str, optional): Special token for padding token. If no need,
4646
it also could be None. Default: None.
@@ -214,7 +214,7 @@ def to_tokens(self, indices):
214214
for idx in indices:
215215
if not isinstance(idx, (int, np.integer)):
216216
warnings.warn(
217-
"The type of `to_tokens()`'s input `indices` is not `int` which will be forcibly transfered to `int`. "
217+
"The type of `to_tokens()`'s input `indices` is not `int` which will be forcibly transferred to `int`. "
218218
)
219219
idx = int(idx)
220220

@@ -382,7 +382,7 @@ def from_dict(cls, token_to_idx, unk_token=None, pad_token=None, bos_token=None,
382382
Args:
383383
token_to_idx (dict): A dict describes the mapping relationship between
384384
tokens and indices.
385-
unk_token (str, optional): The special token for unknow token. If
385+
unk_token (str, optional): The special token for unknown token. If
386386
no need, it also could be None. Default: None.
387387
pad_token (str, optional): The special token for padding token. If
388388
no need, it also could be None. Default: None.
@@ -440,7 +440,7 @@ def build_vocab(
440440
**kwargs
441441
):
442442
"""
443-
Builds the :class:`Vocab` accoring to given iterator and other
443+
Builds the :class:`Vocab` according to given iterator and other
444444
information. Firstly, iterate over the `iterator` to construct a
445445
:class:`collections.Counter` and used to init the as :class:`Vocab`.
446446
@@ -455,7 +455,7 @@ def build_vocab(
455455
relationship between tokens and indices to be used. If provided,
456456
adjust the tokens and indices mapping according to it. If None,
457457
counter must be provided. Default: None.
458-
unk_token (str, optional): The special token for unknow token
458+
unk_token (str, optional): The special token for unknown token
459459
'<unk>'. If no need, it also could be None. Default: None.
460460
pad_token (str, optional): The special token for padding token
461461
'<pad>'. If no need, it also could be None. Default: None.

paddlenlp/datasets/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ def __iter__(self):
448448
num_samples += 1
449449
else:
450450
if inspect.isgenerator(self.data):
451-
warnings.warn("Reciving generator as data source, data can only be iterated once")
451+
warnings.warn("Receiving generator as data source, data can only be iterated once")
452452
for example in self.data:
453453
if (not self._filter_pipline or self._filter(self._filter_pipline)) and self._shard_filter(
454454
num_samples=num_samples
@@ -580,7 +580,7 @@ def remove_if_exit(filepath):
580580
lock_files.append(lock_file)
581581
# Must register to all procs to make the lock file can be removed
582582
# when any proc breaks. Otherwise, the single registered proc may
583-
# not receive proper singal send by the parent proc to exit.
583+
# not receive proper signal send by the parent proc to exit.
584584
atexit.register(lambda: remove_if_exit(lock_files))
585585
for split in splits:
586586
filename = self._get_data(split)

paddlenlp/datasets/hf_datasets/docvqa_zh.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
_DESCRIPTION = """\
2828
The training set from the competition of Insurance DocVQA organized by China Pacific Insurance. \
29-
The submission is now closed so we split original dataset into three parts for model evluation. \
29+
The submission is now closed so we split original dataset into three parts for model evaluation. \
3030
There are 4,187 training images, 500 validation images, and 500 test images.
3131
"""
3232

0 commit comments

Comments
 (0)