Skip to content

[Typos] Fix #10651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llm/utils/replace_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
_ReduceMode: TypeAlias = Literal['mean', 'sum', 'none']


# TODO: this function is rewrited from paddle.nn.functional.cross_entropy,
# TODO: this function is rewrote from paddle.nn.functional.cross_entropy,
# but better to merge into only one.
def parallel_cross_entropy(
input: Tensor,
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/rl/utils/bert_padding.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def prepare_flashmask_inputs(
input_ids_rmpad, indices, *_ = unpad_input(input_ids.unsqueeze(-1), attn_mask) # input_ids_rmpad (total_nnz, ...)
input_ids_rmpad = input_ids_rmpad.transpose([1, 0])

# positon ids rmpad
# position ids rmpad
position_ids_rmpad = index_first_axis(
rearrange(position_ids.unsqueeze(-1), "b s ... -> (b s) ..."), indices
).transpose([1, 0])
Expand Down
2 changes: 1 addition & 1 deletion scripts/unit_test/ci_unit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ if [[ ${FLAGS_enable_CI} == "true" ]] || [[ ${FLAGS_enable_CE} == "true" ]];then
print_info $exit_code unittest

cd ${nlp_dir}
echo -e "\033[35m ---- Genrate Allure Report \033[0m"
echo -e "\033[35m ---- Generate Allure Report \033[0m"
unset http_proxy && unset https_proxy
cp scripts/regression/gen_allure_report.py ./
python gen_allure_report.py > /dev/null
Expand Down
2 changes: 1 addition & 1 deletion slm/applications/information_extraction/label_studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _save_examples(save_dir, file_name, examples):

parser.add_argument("--label_studio_file", default="./data/label_studio.json", type=str, help="The annotation file exported from label studio platform.")
parser.add_argument("--save_dir", default="./data", type=str, help="The path of data that you wanna save.")
parser.add_argument("--negative_ratio", default=5, type=int, help="Used only for the extraction task, the ratio of positive and negative samples, number of negtive samples = negative_ratio * number of positive samples")
parser.add_argument("--negative_ratio", default=5, type=int, help="Used only for the extraction task, the ratio of positive and negative samples, number of negative samples = negative_ratio * number of positive samples")
parser.add_argument("--splits", default=[0.8, 0.1, 0.1], type=float, nargs="*", help="The ratio of samples in datasets. [0.6, 0.2, 0.2] means 60% samples used for training, 20% for evaluation and 20% for test.")
parser.add_argument("--task_type", choices=['ext', 'cls'], default="ext", type=str, help="Select task type, ext for the extraction task and cls for the classification task, defaults to ext.")
parser.add_argument("--options", default=["正向", "负向"], type=str, nargs="+", help="Used only for the classification task, the options for classification")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def compute_metrics(eval_preds):
micro_f1_score, macro_f1_score = metric.accumulate()
return {"micro_f1_score": micro_f1_score, "macro_f1_score": macro_f1_score}

# Deine the early-stopping callback.
# Define the early-stopping callback.
callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]

# Initialize the trainer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

def load_local_dataset(data_path, splits, label_list):
"""
Load dataset for hierachical classification from files, where
Load dataset for hierarchical classification from files, where
there is one example per line. Text and label are separated
by '\t', and multiple labels are delimited by ','.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def compute_metrics(eval_preds):
acc = metric.accumulate()
return {"accuracy": acc}

# Deine the early-stopping callback.
# Define the early-stopping callback.
callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]

# Initialize the trainer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def compute_metrics(eval_preds):
micro_f1_score, macro_f1_score = metric.accumulate()
return {"micro_f1_score": micro_f1_score, "macro_f1_score": macro_f1_score}

# Deine the early-stopping callback.
# Define the early-stopping callback.
callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]

# Initialize the trainer.
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ PaddleNLP provides rich application examples covering mainstream NLP task to hel
| simultaneous_translation | [同声翻译 (Simultaneous Translation)](./simultaneous_translation/) |
| machine_reading_comprehension | [阅读理解 (Machine Reading Comprehension)](./machine_reading_comprehension/) |

## NLP 拓展应用 (NLP Extented Applications)
## NLP 拓展应用 (NLP Extended Applications)

| 目录 Folder | 任务 Task |
|:---------------------|-------------------------------------------------------------------------|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def main():
if returncode is not None:
if returncode != 0:
retry[runs[i]["ts"]] += 1
print(f"> {runs[i]['ts']} task failed, will retried, tryed {retry[runs[i]['ts']]} times.")
print(f"> {runs[i]['ts']} task failed, will retried, tried {retry[runs[i]['ts']]} times.")
output = runs[i]["ps"].communicate()[0]
for line in output.decode("utf-8").split("\n"):
print(line)
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/benchmark/wiki_lambada/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def get_parser():
help="Whether to use flash attention",
)
# load autodist name files, eg: bloom-176b
parser.add_argument("--load_autodist", action="store_true", help="whether load auto-dist wieght file")
parser.add_argument("--load_autodist", action="store_true", help="whether load auto-dist weight file")

return parser

Expand Down
4 changes: 2 additions & 2 deletions slm/examples/few_shot/RGL/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def __init__(self, labels=None):
@property
def labels(self):
if not getattr(self, "_labels"):
raise ValueError("labels and label_mappings are not setted yet.")
raise ValueError("labels and label_mappings are not set yet.")
return self._labels

@labels.setter
Expand All @@ -179,7 +179,7 @@ def labels(self, labels):
@property
def label_mapping(self):
if not getattr(self, "_labels"):
raise ValueError("labels and label_mappings are not setted yet.")
raise ValueError("labels and label_mappings are not set yet.")
if not getattr(self, "_label_mapping"):
self._label_mapping = {k: i for i, k in enumerate(self._labels)}
return self._label_mapping
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/few_shot/RGL/verbalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def aggregate(label_words_logits, atype="mean", ndim=2):
elif atype == "first":
return label_words_logits[..., 0, :]
else:
raise ValueError("Unsupported aggreate type {}".format(atype))
raise ValueError("Unsupported aggregate type {}".format(atype))
return label_words_logits

def normalize(self, logits):
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/information_extraction/DuIE/run_duie.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def do_train():
print("\n=====start evaluating ckpt of %d steps=====" % global_step)
precision, recall, f1 = evaluate(model, criterion, test_data_loader, eval_file_path, "eval")
print("precision: %.2f\t recall: %.2f\t f1: %.2f\t" % (100 * precision, 100 * recall, 100 * f1))
print("saving checkpoing model_%d.pdparams to %s " % (global_step, args.output_dir))
print("saving checkpoint model_%d.pdparams to %s " % (global_step, args.output_dir))
paddle.save(model.state_dict(), os.path.join(args.output_dir, "model_%d.pdparams" % global_step))
model.train() # back to train mode

Expand Down
2 changes: 1 addition & 1 deletion slm/examples/information_extraction/DuIE/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def decoding(
complex_relation_label = [8, 10, 26, 32, 46]
complex_relation_affi_label = [9, 11, 27, 28, 29, 33, 47]

# flatten predictions then retrival all valid subject id
# flatten predictions then retrieval all valid subject id
flatten_predictions = []
for layer_1 in predictions:
for layer_2 in layer_1:
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/information_extraction/DuUIE/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def main():
"-c",
"--config",
dest="map_config",
help="Offset mapping config, maping generated sel to offset record",
help="Offset mapping config, mapping generated sel to offset record",
default="longer_first_zh",
)
parser.add_argument("--verbose", action="store_true")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ label_smooth_eps: 0.1
# select the top `beam_size * 2` beams and process the top `beam_size` alive
# and finish beams in them separately, while 'v1' would only select the top
# `beam_size` beams and mix up the alive and finish beams. 'v2' always
# searchs more and get better results, since the alive beams would
# searches more and get better results, since the alive beams would
# always be `beam_size` while the number of alive beams in `v1` might
# decrease when meeting the end token. However, 'v2' always generates
# longer results thus might do more calculation and be slower.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ label_smooth_eps: 0.1
# select the top `beam_size * 2` beams and process the top `beam_size` alive
# and finish beams in them separately, while 'v1' would only select the top
# `beam_size` beams and mix up the alive and finish beams. 'v2' always
# searchs more and get better results, since the alive beams would
# searches more and get better results, since the alive beams would
# always be `beam_size` while the number of alive beams in `v1` might
# decrease when meeting the end token. However, 'v2' always generates
# longer results thus might do more calculation and be slower.
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/machine_translation/transformer/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def do_predict(args):

# Define model
# `TransformerGenerator` automatically chioces using `FastGeneration`
# (with jit building) or the slower verison `InferTransformerModel`.
# (with jit building) or the slower version `InferTransformerModel`.
transformer = TransformerGenerator(
src_vocab_size=args.src_vocab_size,
trg_vocab_size=args.trg_vocab_size,
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/model_compression/distill_lstm/small.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
):
super(BiLSTM, self).__init__()
if embedding_name is not None:
raise ValueError("TokenEmbedding is depercated in PaddleNLP since 3.0, please set embedding_name to None ")
raise ValueError("TokenEmbedding is deprecated in PaddleNLP since 3.0, please set embedding_name to None ")
else:
self.embedder = nn.Embedding(vocab_size, embed_dim, padding_idx)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

export task=TNEWS

echo Inference of orgin FP32 model
echo Inference of origin FP32 model
for ((i=0;i<=4;i++));
do
python infer.py --task_name ${task} --model_path ../finetuning/ppminilm-6l-768h/models/${task}/1e-4_64/inference --use_trt --perf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

###
# This script concatenates results from previous running to generate a formated result for evaluation use
# This script concatenates results from previous running to generate a formatted result for evaluation use
###

BASE_MODEL=$1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _read(self, filename, language):

def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
"""
Creats dataloader.
Creates dataloader.

Args:
dataset(obj:`paddle.io.Dataset`): Dataset instance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def extract_integrated_gradient_scores(
out_handle,
)
else:
raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")

# Deal with last example
if args.language == "ch":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def __init__(
generate random numbers. If None, the random state will be
initialized using the internal numpy seed.
char_level: an boolean identifying that we treat each character
as an independent occurence in the string
as an independent occurrence in the string
"""

if kernel is None:
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/model_interpretation/task/senti/rnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def set_seed(seed=1000):

def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
"""
Creats dataloader.
Creates dataloader.

Args:
dataset(obj:`paddle.io.Dataset`): Dataset instance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _read(self, filename):

def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
"""
Creats dataloader.
Creates dataloader.

Args:
dataset(obj:`paddle.io.Dataset`): Dataset instance.
Expand Down Expand Up @@ -458,7 +458,7 @@ def extract_LIME_scores(

# Attention
if args.inter_mode == "attention":
# extract attention scores and write resutls to file
# extract attention scores and write results to file
extract_attention_scores(args, atts, input_ids, tokens, sub_word_id_dict, result, offset, out_handle)

# Integrated_gradient
Expand Down Expand Up @@ -496,7 +496,7 @@ def extract_LIME_scores(
)

else:
raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")

if args.inter_mode == "lime":
log.debug(np.average(np.array(lime_relative_err_total)))
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def __init__(
generate random numbers. If None, the random state will be
initialized using the internal numpy seed.
char_level: an boolean identifying that we treat each character
as an independent occurence in the string
as an independent occurrence in the string
"""

if kernel is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ def LIME_error_evaluation(
)

else:
raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")

if args.inter_mode == "lime":
print(np.average(np.array(lime_relative_err_total)))
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
"""
Creats dataloader.
Creates dataloader.

Args:
dataset(obj:`paddle.io.Dataset`): Dataset instance.
Expand Down
4 changes: 2 additions & 2 deletions slm/examples/model_interpretation/task/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def compute_kv(self, key, value):

def gen_cache(self, key, value=None, type=Cache):
"""
Generates cache for `forward` usage in inference accroding to arguments.
Generates cache for `forward` usage in inference according to arguments.
The generated cache is an instance of `MultiHeadAttention.Cache` or an
instance of `MultiHeadAttention.StaticCache`.

Expand Down Expand Up @@ -1063,7 +1063,7 @@ class Transformer(Layer):
Please refer to `Attention is all you need <http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf>`_ ,
and see `TransformerEncoder` and `TransformerDecoder` for more details.

Users can configurate the model architecture with corresponding parameters.
Users can configure the model architecture with corresponding parameters.
Note the usage of `normalize_before` representing where to apply layer
normalization (in pre-process or post-precess of multi-head attention or FFN),
and some transformer like models are different on this, such as
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/text_graph/erniesage/models/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _recv_func(message):
return self_feature, neigh_feature

def forward(self, graph, term_ids, act="relu"):
"""Forward funciton of Conv layer.
"""Forward function of Conv layer.

Args:
graph (Graph): Graph object.
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/text_graph/erniesage/models/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def take_final_feature(self, feature, index):
"""Gather the final feature.

Args:
feature (Tensor): the total featue tensor.
feature (Tensor): the total feature tensor.
index (Tensor): the index to gather.

Returns:
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/text_matching/ernie_matching/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

```
ernie_matching/
├── deply # 部署
├── deploy # 部署
| └── python
| └── predict.py # python 预测部署示例
├── export_model.py # 动态图参数导出静态图参数脚本
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/text_matching/sentence_transformers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ PaddleNLP 提供了丰富的预训练模型,并且可以便捷地获取 Paddle

```text
sentence_transformers/
├── model.py # Sentence Transfomer 组网文件
├── model.py # Sentence Transformer 组网文件
├── README.md # 文本说明
└── train.py # 模型训练评估
```
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/text_matching/simcse/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ python -u -m paddle.distributed.launch --gpus '0' \
可支持配置的参数:

* `infer_with_fc_pooler`:可选,在预测阶段计算文本 embedding 表示的时候网络前向是否会过训练阶段最后一层的 fc; 建议关闭模型效果最好。
* `dup_rate`: 可选,word reptition 的比例,默认是0.32,根据论文 Word Repetition 比例采用 0.32 效果最佳。
* `dup_rate`: 可选,word repetition 的比例,默认是0.32,根据论文 Word Repetition 比例采用 0.32 效果最佳。
* `scale`:可选,在计算 cross_entropy loss 之前对 cosine 相似度进行缩放的因子;默认为 20。
* `dropout`:可选,SimCSE 网络前向使用的 dropout 取值;默认 0.1。
* `save_dir`:可选,保存训练模型的目录;默认保存在当前目录 checkpoints 文件夹下。
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/text_matching/simnet/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
"""
Creats dataloader.
Creates dataloader.

Args:
dataset(obj:`paddle.io.Dataset`): Dataset instance.
Expand Down
2 changes: 1 addition & 1 deletion slm/examples/text_to_knowledge/nptag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def levenstein_distance(s1: str, s2: str) -> int:


class BurkhardKellerNode(object):
"""Node implementatation for BK-Tree. A BK-Tree node stores the information of current word, and its approximate words calculated by levenstein distance.
"""Node implementation for BK-Tree. A BK-Tree node stores the information of current word, and its approximate words calculated by levenstein distance.

Args:
word (str): word of current node.
Expand Down
4 changes: 2 additions & 2 deletions slm/examples/text_to_knowledge/termtree/termtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


class TermTreeNode(object):
"""Defination of term node. All members are protected, to keep rigorism of data struct.
"""Definition of term node. All members are protected, to keep rigorism of data struct.

Args:
sid (str): term id of node.
Expand All @@ -34,7 +34,7 @@ class TermTreeNode(object):
Defaults to None.
sub_type (Optional[List[str]], optional): grouped by some term. Defaults to None.
sub_term (Optional[List[str]], optional): some lower term. Defaults to None.
data (Optional[Dict[str, Any]], optional): to sore full imformation of a term. Defaults to None.
data (Optional[Dict[str, Any]], optional): to sore full information of a term. Defaults to None.

"""

Expand Down
Loading