PaddlePaddle · luotao1 · May 26, 2025 · May 24, 2025
diff --git a/llm/utils/replace_ops.py b/llm/utils/replace_ops.py
@@ -29,7 +29,7 @@
 _ReduceMode: TypeAlias = Literal['mean', 'sum', 'none']
 
 
-# TODO: this function is rewrited from paddle.nn.functional.cross_entropy,
+# TODO: this function is rewrote from paddle.nn.functional.cross_entropy,
 # but better to merge into only one.
 def parallel_cross_entropy(
     input: Tensor,

diff --git a/paddlenlp/rl/utils/bert_padding.py b/paddlenlp/rl/utils/bert_padding.py
@@ -158,7 +158,7 @@ def prepare_flashmask_inputs(
     input_ids_rmpad, indices, *_ = unpad_input(input_ids.unsqueeze(-1), attn_mask)  # input_ids_rmpad (total_nnz, ...)
     input_ids_rmpad = input_ids_rmpad.transpose([1, 0])
 
-    # positon ids rmpad
+    # position ids rmpad
     position_ids_rmpad = index_first_axis(
         rearrange(position_ids.unsqueeze(-1), "b s ... -> (b s) ..."), indices
     ).transpose([1, 0])

diff --git a/scripts/unit_test/ci_unit.sh b/scripts/unit_test/ci_unit.sh
@@ -112,7 +112,7 @@ if [[ ${FLAGS_enable_CI} == "true" ]] || [[ ${FLAGS_enable_CE} == "true" ]];then
     print_info $exit_code unittest
 
     cd ${nlp_dir}
-    echo -e "\033[35m ---- Genrate Allure Report  \033[0m"
+    echo -e "\033[35m ---- Generate Allure Report  \033[0m"
     unset http_proxy && unset https_proxy
     cp scripts/regression/gen_allure_report.py ./
     python gen_allure_report.py > /dev/null

diff --git a/slm/applications/information_extraction/label_studio.py b/slm/applications/information_extraction/label_studio.py
@@ -121,7 +121,7 @@ def _save_examples(save_dir, file_name, examples):
 
     parser.add_argument("--label_studio_file", default="./data/label_studio.json", type=str, help="The annotation file exported from label studio platform.")
     parser.add_argument("--save_dir", default="./data", type=str, help="The path of data that you wanna save.")
-    parser.add_argument("--negative_ratio", default=5, type=int, help="Used only for the extraction task, the ratio of positive and negative samples, number of negtive samples = negative_ratio * number of positive samples")
+    parser.add_argument("--negative_ratio", default=5, type=int, help="Used only for the extraction task, the ratio of positive and negative samples, number of negative samples = negative_ratio * number of positive samples")
     parser.add_argument("--splits", default=[0.8, 0.1, 0.1], type=float, nargs="*", help="The ratio of samples in datasets. [0.6, 0.2, 0.2] means 60% samples used for training, 20% for evaluation and 20% for test.")
     parser.add_argument("--task_type", choices=['ext', 'cls'], default="ext", type=str, help="Select task type, ext for the extraction task and cls for the classification task, defaults to ext.")
     parser.add_argument("--options", default=["正向", "负向"], type=str, nargs="+", help="Used only for the classification task, the options for classification")

diff --git a/slm/applications/text_classification/hierarchical/few-shot/train.py b/slm/applications/text_classification/hierarchical/few-shot/train.py
@@ -94,7 +94,7 @@ def compute_metrics(eval_preds):
         micro_f1_score, macro_f1_score = metric.accumulate()
         return {"micro_f1_score": micro_f1_score, "macro_f1_score": macro_f1_score}
 
-    # Deine the early-stopping callback.
+    # Define the early-stopping callback.
     callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]
 
     # Initialize the trainer.

diff --git a/slm/applications/text_classification/hierarchical/few-shot/utils.py b/slm/applications/text_classification/hierarchical/few-shot/utils.py
@@ -19,7 +19,7 @@
 
 def load_local_dataset(data_path, splits, label_list):
     """
-    Load dataset for hierachical classification from files, where
+    Load dataset for hierarchical classification from files, where
     there is one example per line. Text and label are separated
     by '\t', and multiple labels are delimited by ','.
 

diff --git a/slm/applications/text_classification/multi_class/few-shot/train.py b/slm/applications/text_classification/multi_class/few-shot/train.py
@@ -93,7 +93,7 @@ def compute_metrics(eval_preds):
         acc = metric.accumulate()
         return {"accuracy": acc}
 
-    # Deine the early-stopping callback.
+    # Define the early-stopping callback.
     callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]
 
     # Initialize the trainer.

diff --git a/slm/applications/text_classification/multi_label/few-shot/train.py b/slm/applications/text_classification/multi_label/few-shot/train.py
@@ -94,7 +94,7 @@ def compute_metrics(eval_preds):
         micro_f1_score, macro_f1_score = metric.accumulate()
         return {"micro_f1_score": micro_f1_score, "macro_f1_score": macro_f1_score}
 
-    # Deine the early-stopping callback.
+    # Define the early-stopping callback.
     callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]
 
     # Initialize the trainer.

diff --git a/slm/examples/README.md b/slm/examples/README.md
@@ -31,7 +31,7 @@ PaddleNLP provides rich application examples covering mainstream NLP task to hel
 | simultaneous_translation      | [同声翻译 (Simultaneous Translation)](./simultaneous_translation/)           |
 | machine_reading_comprehension | [阅读理解 (Machine Reading Comprehension)](./machine_reading_comprehension/) |
 
-## NLP 拓展应用 (NLP Extented Applications)
+## NLP 拓展应用 (NLP Extended Applications)
 
 | 目录 Folder          | 任务 Task                                                               |
 |:---------------------|-------------------------------------------------------------------------|

diff --git a/slm/examples/benchmark/clue/grid_search_tools/grid_search.py b/slm/examples/benchmark/clue/grid_search_tools/grid_search.py
@@ -151,7 +151,7 @@ def main():
             if returncode is not None:
                 if returncode != 0:
                     retry[runs[i]["ts"]] += 1
-                    print(f"> {runs[i]['ts']} task failed, will retried, tryed {retry[runs[i]['ts']]} times.")
+                    print(f"> {runs[i]['ts']} task failed, will retried, tried {retry[runs[i]['ts']]} times.")
                     output = runs[i]["ps"].communicate()[0]
                     for line in output.decode("utf-8").split("\n"):
                         print(line)

diff --git a/slm/examples/benchmark/wiki_lambada/eval.py b/slm/examples/benchmark/wiki_lambada/eval.py
@@ -74,7 +74,7 @@ def get_parser():
         help="Whether to use flash attention",
     )
     # load autodist name files, eg: bloom-176b
-    parser.add_argument("--load_autodist", action="store_true", help="whether load auto-dist wieght file")
+    parser.add_argument("--load_autodist", action="store_true", help="whether load auto-dist weight file")
 
     return parser
 

diff --git a/slm/examples/few_shot/RGL/data.py b/slm/examples/few_shot/RGL/data.py
@@ -168,7 +168,7 @@ def __init__(self, labels=None):
     @property
     def labels(self):
         if not getattr(self, "_labels"):
-            raise ValueError("labels and label_mappings are not setted yet.")
+            raise ValueError("labels and label_mappings are not set yet.")
         return self._labels
 
     @labels.setter
@@ -179,7 +179,7 @@ def labels(self, labels):
     @property
     def label_mapping(self):
         if not getattr(self, "_labels"):
-            raise ValueError("labels and label_mappings are not setted yet.")
+            raise ValueError("labels and label_mappings are not set yet.")
         if not getattr(self, "_label_mapping"):
             self._label_mapping = {k: i for i, k in enumerate(self._labels)}
         return self._label_mapping

diff --git a/slm/examples/few_shot/RGL/verbalizer.py b/slm/examples/few_shot/RGL/verbalizer.py
@@ -127,7 +127,7 @@ def aggregate(label_words_logits, atype="mean", ndim=2):
             elif atype == "first":
                 return label_words_logits[..., 0, :]
             else:
-                raise ValueError("Unsupported aggreate type {}".format(atype))
+                raise ValueError("Unsupported aggregate type {}".format(atype))
         return label_words_logits
 
     def normalize(self, logits):

diff --git a/slm/examples/information_extraction/DuIE/run_duie.py b/slm/examples/information_extraction/DuIE/run_duie.py
@@ -252,7 +252,7 @@ def do_train():
                 print("\n=====start evaluating ckpt of %d steps=====" % global_step)
                 precision, recall, f1 = evaluate(model, criterion, test_data_loader, eval_file_path, "eval")
                 print("precision: %.2f\t recall: %.2f\t f1: %.2f\t" % (100 * precision, 100 * recall, 100 * f1))
-                print("saving checkpoing model_%d.pdparams to %s " % (global_step, args.output_dir))
+                print("saving checkpoint model_%d.pdparams to %s " % (global_step, args.output_dir))
                 paddle.save(model.state_dict(), os.path.join(args.output_dir, "model_%d.pdparams" % global_step))
                 model.train()  # back to train mode
 

diff --git a/slm/examples/information_extraction/DuIE/utils.py b/slm/examples/information_extraction/DuIE/utils.py
@@ -66,7 +66,7 @@ def decoding(
         complex_relation_label = [8, 10, 26, 32, 46]
         complex_relation_affi_label = [9, 11, 27, 28, 29, 33, 47]
 
-        # flatten predictions then retrival all valid subject id
+        # flatten predictions then retrieval all valid subject id
         flatten_predictions = []
         for layer_1 in predictions:
             for layer_2 in layer_1:

diff --git a/slm/examples/information_extraction/DuUIE/inference.py b/slm/examples/information_extraction/DuUIE/inference.py
@@ -109,7 +109,7 @@ def main():
         "-c",
         "--config",
         dest="map_config",
-        help="Offset mapping config, maping generated sel to offset record",
+        help="Offset mapping config, mapping generated sel to offset record",
         default="longer_first_zh",
     )
     parser.add_argument("--verbose", action="store_true")

diff --git a/slm/examples/machine_translation/transformer/configs/transformer.base.yaml b/slm/examples/machine_translation/transformer/configs/transformer.base.yaml
@@ -66,7 +66,7 @@ label_smooth_eps: 0.1
 # select the top `beam_size * 2` beams and process the top `beam_size` alive
 # and finish beams in them separately, while 'v1' would only select the top
 # `beam_size` beams and mix up the alive and finish beams. 'v2' always
-# searchs more and get better results, since the alive beams would
+# searches more and get better results, since the alive beams would
 # always be `beam_size` while the number of alive beams in `v1` might
 # decrease when meeting the end token. However, 'v2' always generates
 # longer results thus might do more calculation and be slower.

diff --git a/slm/examples/machine_translation/transformer/configs/transformer.big.yaml b/slm/examples/machine_translation/transformer/configs/transformer.big.yaml
@@ -66,7 +66,7 @@ label_smooth_eps: 0.1
 # select the top `beam_size * 2` beams and process the top `beam_size` alive
 # and finish beams in them separately, while 'v1' would only select the top
 # `beam_size` beams and mix up the alive and finish beams. 'v2' always
-# searchs more and get better results, since the alive beams would
+# searches more and get better results, since the alive beams would
 # always be `beam_size` while the number of alive beams in `v1` might
 # decrease when meeting the end token. However, 'v2' always generates
 # longer results thus might do more calculation and be slower.

diff --git a/slm/examples/machine_translation/transformer/predict.py b/slm/examples/machine_translation/transformer/predict.py
@@ -126,7 +126,7 @@ def do_predict(args):
 
     # Define model
     # `TransformerGenerator` automatically chioces using `FastGeneration`
-    # (with jit building) or the slower verison `InferTransformerModel`.
+    # (with jit building) or the slower version `InferTransformerModel`.
     transformer = TransformerGenerator(
         src_vocab_size=args.src_vocab_size,
         trg_vocab_size=args.trg_vocab_size,

diff --git a/slm/examples/model_compression/distill_lstm/small.py b/slm/examples/model_compression/distill_lstm/small.py
@@ -43,7 +43,7 @@ def __init__(
     ):
         super(BiLSTM, self).__init__()
         if embedding_name is not None:
-            raise ValueError("TokenEmbedding is depercated in PaddleNLP since 3.0, please set embedding_name to None ")
+            raise ValueError("TokenEmbedding is deprecated in PaddleNLP since 3.0, please set embedding_name to None ")
         else:
             self.embedder = nn.Embedding(vocab_size, embed_dim, padding_idx)
 

diff --git a/slm/examples/model_compression/pp-minilm/deploy/python/infer_perf.sh b/slm/examples/model_compression/pp-minilm/deploy/python/infer_perf.sh
@@ -14,7 +14,7 @@
 
 export task=TNEWS
 
-echo Inference of orgin FP32 model
+echo Inference of origin FP32 model
 for ((i=0;i<=4;i++));
 do
     python infer.py  --task_name ${task} --model_path  ../finetuning/ppminilm-6l-768h/models/${task}/1e-4_64/inference  --use_trt --perf

diff --git a/slm/examples/model_interpretation/rationale_extraction/generate_evaluation_data.sh b/slm/examples/model_interpretation/rationale_extraction/generate_evaluation_data.sh
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 ###
- # This script concatenates results from previous running to generate a formated result for evaluation use
+ # This script concatenates results from previous running to generate a formatted result for evaluation use
 ### 
 
 BASE_MODEL=$1

diff --git a/slm/examples/model_interpretation/rationale_extraction/sentiment_pred.py b/slm/examples/model_interpretation/rationale_extraction/sentiment_pred.py
@@ -91,7 +91,7 @@ def _read(self, filename, language):
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.

diff --git a/slm/examples/model_interpretation/task/mrc/saliency_map/rc_interpretable.py b/slm/examples/model_interpretation/task/mrc/saliency_map/rc_interpretable.py
@@ -480,7 +480,7 @@ def extract_integrated_gradient_scores(
                     out_handle,
                 )
             else:
-                raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
+                raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")
 
         # Deal with last example
         if args.language == "ch":

diff --git a/slm/examples/model_interpretation/task/senti/LIME/lime_text.py b/slm/examples/model_interpretation/task/senti/LIME/lime_text.py
@@ -371,7 +371,7 @@ def __init__(
                 generate random numbers. If None, the random state will be
                 initialized using the internal numpy seed.
             char_level: an boolean identifying that we treat each character
-                as an independent occurence in the string
+                as an independent occurrence in the string
         """
 
         if kernel is None:

diff --git a/slm/examples/model_interpretation/task/senti/rnn/train.py b/slm/examples/model_interpretation/task/senti/rnn/train.py
@@ -50,7 +50,7 @@ def set_seed(seed=1000):
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.

diff --git a/slm/examples/model_interpretation/task/senti/saliency_map/sentiment_interpretable.py b/slm/examples/model_interpretation/task/senti/saliency_map/sentiment_interpretable.py
@@ -97,7 +97,7 @@ def _read(self, filename):
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.
@@ -458,7 +458,7 @@ def extract_LIME_scores(
 
             # Attention
             if args.inter_mode == "attention":
-                # extract attention scores and write resutls to file
+                # extract attention scores and write results to file
                 extract_attention_scores(args, atts, input_ids, tokens, sub_word_id_dict, result, offset, out_handle)
 
             # Integrated_gradient
@@ -496,7 +496,7 @@ def extract_LIME_scores(
                 )
 
             else:
-                raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
+                raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")
 
         if args.inter_mode == "lime":
             log.debug(np.average(np.array(lime_relative_err_total)))
diff --git a/slm/examples/model_interpretation/task/similarity/LIME/lime_text.py b/slm/examples/model_interpretation/task/similarity/LIME/lime_text.py
@@ -362,7 +362,7 @@ def __init__(
                 generate random numbers. If None, the random state will be
                 initialized using the internal numpy seed.
             char_level: an boolean identifying that we treat each character
-                as an independent occurence in the string
+                as an independent occurrence in the string
         """
 
         if kernel is None:

diff --git a/slm/examples/model_interpretation/task/similarity/saliency_map/similarity_interpretable.py b/slm/examples/model_interpretation/task/similarity/saliency_map/similarity_interpretable.py
@@ -640,7 +640,7 @@ def LIME_error_evaluation(
                 )
 
             else:
-                raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
+                raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")
 
         if args.inter_mode == "lime":
             print(np.average(np.array(lime_relative_err_total)))
diff --git a/slm/examples/model_interpretation/task/similarity/simnet/train.py b/slm/examples/model_interpretation/task/similarity/simnet/train.py
@@ -49,7 +49,7 @@
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.

diff --git a/slm/examples/model_interpretation/task/transformer.py b/slm/examples/model_interpretation/task/transformer.py
@@ -264,7 +264,7 @@ def compute_kv(self, key, value):
 
     def gen_cache(self, key, value=None, type=Cache):
         """
-        Generates cache for `forward` usage in inference accroding to arguments.
+        Generates cache for `forward` usage in inference according to arguments.
         The generated cache is an instance of `MultiHeadAttention.Cache` or an
         instance of `MultiHeadAttention.StaticCache`.
 
@@ -1063,7 +1063,7 @@ class Transformer(Layer):
     Please refer to `Attention is all you need <http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf>`_ ,
     and see `TransformerEncoder` and `TransformerDecoder` for more details.
 
-    Users can configurate the model architecture with corresponding parameters.
+    Users can configure the model architecture with corresponding parameters.
     Note the usage of `normalize_before` representing where to apply layer
     normalization (in pre-process or post-precess of multi-head attention or FFN),
     and some transformer like models are different on this, such as

diff --git a/slm/examples/text_graph/erniesage/models/conv.py b/slm/examples/text_graph/erniesage/models/conv.py
@@ -153,7 +153,7 @@ def _recv_func(message):
         return self_feature, neigh_feature
 
     def forward(self, graph, term_ids, act="relu"):
-        """Forward funciton of Conv layer.
+        """Forward function of Conv layer.
 
         Args:
             graph (Graph): Graph object.

diff --git a/slm/examples/text_graph/erniesage/models/encoder.py b/slm/examples/text_graph/erniesage/models/encoder.py
@@ -95,7 +95,7 @@ def take_final_feature(self, feature, index):
         """Gather the final feature.
 
         Args:
-            feature (Tensor): the total featue tensor.
+            feature (Tensor): the total feature tensor.
             index (Tensor): the index to gather.
 
         Returns:

diff --git a/slm/examples/text_matching/ernie_matching/README.md b/slm/examples/text_matching/ernie_matching/README.md
@@ -18,7 +18,7 @@
 
 ```
 ernie_matching/
-├── deply # 部署
+├── deploy # 部署
 |   └── python
 |       └── predict.py # python 预测部署示例
 ├── export_model.py # 动态图参数导出静态图参数脚本

diff --git a/slm/examples/text_matching/sentence_transformers/README.md b/slm/examples/text_matching/sentence_transformers/README.md
@@ -62,7 +62,7 @@ PaddleNLP 提供了丰富的预训练模型，并且可以便捷地获取 Paddle
 
 ```text
 sentence_transformers/
-├── model.py # Sentence Transfomer 组网文件
+├── model.py # Sentence Transformer 组网文件
 ├── README.md # 文本说明
 └── train.py # 模型训练评估
 ```

diff --git a/slm/examples/text_matching/simcse/README.md b/slm/examples/text_matching/simcse/README.md
@@ -61,7 +61,7 @@ python -u -m paddle.distributed.launch --gpus '0' \
 可支持配置的参数：
 
 * `infer_with_fc_pooler`：可选，在预测阶段计算文本 embedding 表示的时候网络前向是否会过训练阶段最后一层的 fc;  建议关闭模型效果最好。
-* `dup_rate`: 可选，word reptition 的比例，默认是0.32，根据论文 Word Repetition 比例采用 0.32 效果最佳。
+* `dup_rate`: 可选，word repetition 的比例，默认是0.32，根据论文 Word Repetition 比例采用 0.32 效果最佳。
 * `scale`：可选，在计算 cross_entropy loss 之前对 cosine 相似度进行缩放的因子；默认为 20。
 * `dropout`：可选，SimCSE 网络前向使用的 dropout 取值；默认 0.1。
 * `save_dir`：可选，保存训练模型的目录；默认保存在当前目录 checkpoints 文件夹下。

diff --git a/slm/examples/text_matching/simnet/train.py b/slm/examples/text_matching/simnet/train.py
@@ -39,7 +39,7 @@
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.

diff --git a/slm/examples/text_to_knowledge/nptag/utils.py b/slm/examples/text_to_knowledge/nptag/utils.py
@@ -113,7 +113,7 @@ def levenstein_distance(s1: str, s2: str) -> int:
 
 
 class BurkhardKellerNode(object):
-    """Node implementatation for BK-Tree. A BK-Tree node stores the information of current word, and its approximate words calculated by levenstein distance.
+    """Node implementation for BK-Tree. A BK-Tree node stores the information of current word, and its approximate words calculated by levenstein distance.
 
     Args:
         word (str): word of current node.

diff --git a/slm/examples/text_to_knowledge/termtree/termtree.py b/slm/examples/text_to_knowledge/termtree/termtree.py
@@ -20,7 +20,7 @@
 
 
 class TermTreeNode(object):
-    """Defination of term node. All members are protected, to keep rigorism of data struct.
+    """Definition of term node. All members are protected, to keep rigorism of data struct.
 
     Args:
         sid (str): term id of node.
@@ -34,7 +34,7 @@ class TermTreeNode(object):
             Defaults to None.
         sub_type (Optional[List[str]], optional): grouped by some term. Defaults to None.
         sub_term (Optional[List[str]], optional): some lower term. Defaults to None.
-        data (Optional[Dict[str, Any]], optional): to sore full imformation of a term. Defaults to None.
+        data (Optional[Dict[str, Any]], optional): to sore full information of a term. Defaults to None.
 
     """