diff --git a/llm/utils/replace_ops.py b/llm/utils/replace_ops.py
index 2e29339211d3..2c919e5b8c6f 100644
--- a/llm/utils/replace_ops.py
+++ b/llm/utils/replace_ops.py
@@ -29,7 +29,7 @@
 _ReduceMode: TypeAlias = Literal['mean', 'sum', 'none']
 
 
-# TODO: this function is rewrited from paddle.nn.functional.cross_entropy,
+# TODO: this function is rewrote from paddle.nn.functional.cross_entropy,
 # but better to merge into only one.
 def parallel_cross_entropy(
     input: Tensor,
diff --git a/paddlenlp/rl/utils/bert_padding.py b/paddlenlp/rl/utils/bert_padding.py
index ee4ebc12acdc..741a51ed040f 100644
--- a/paddlenlp/rl/utils/bert_padding.py
+++ b/paddlenlp/rl/utils/bert_padding.py
@@ -158,7 +158,7 @@ def prepare_flashmask_inputs(
     input_ids_rmpad, indices, *_ = unpad_input(input_ids.unsqueeze(-1), attn_mask)  # input_ids_rmpad (total_nnz, ...)
     input_ids_rmpad = input_ids_rmpad.transpose([1, 0])
 
-    # positon ids rmpad
+    # position ids rmpad
     position_ids_rmpad = index_first_axis(
         rearrange(position_ids.unsqueeze(-1), "b s ... -> (b s) ..."), indices
     ).transpose([1, 0])
diff --git a/scripts/unit_test/ci_unit.sh b/scripts/unit_test/ci_unit.sh
index 878e35e165b2..8e3f10133c0e 100644
--- a/scripts/unit_test/ci_unit.sh
+++ b/scripts/unit_test/ci_unit.sh
@@ -112,7 +112,7 @@ if [[ ${FLAGS_enable_CI} == "true" ]] || [[ ${FLAGS_enable_CE} == "true" ]];then
     print_info $exit_code unittest
 
     cd ${nlp_dir}
-    echo -e "\033[35m ---- Genrate Allure Report  \033[0m"
+    echo -e "\033[35m ---- Generate Allure Report  \033[0m"
     unset http_proxy && unset https_proxy
     cp scripts/regression/gen_allure_report.py ./
     python gen_allure_report.py > /dev/null
diff --git a/slm/applications/information_extraction/label_studio.py b/slm/applications/information_extraction/label_studio.py
index 0f0d815f7774..9d49ccea33d7 100644
--- a/slm/applications/information_extraction/label_studio.py
+++ b/slm/applications/information_extraction/label_studio.py
@@ -121,7 +121,7 @@ def _save_examples(save_dir, file_name, examples):
 
     parser.add_argument("--label_studio_file", default="./data/label_studio.json", type=str, help="The annotation file exported from label studio platform.")
     parser.add_argument("--save_dir", default="./data", type=str, help="The path of data that you wanna save.")
-    parser.add_argument("--negative_ratio", default=5, type=int, help="Used only for the extraction task, the ratio of positive and negative samples, number of negtive samples = negative_ratio * number of positive samples")
+    parser.add_argument("--negative_ratio", default=5, type=int, help="Used only for the extraction task, the ratio of positive and negative samples, number of negative samples = negative_ratio * number of positive samples")
     parser.add_argument("--splits", default=[0.8, 0.1, 0.1], type=float, nargs="*", help="The ratio of samples in datasets. [0.6, 0.2, 0.2] means 60% samples used for training, 20% for evaluation and 20% for test.")
     parser.add_argument("--task_type", choices=['ext', 'cls'], default="ext", type=str, help="Select task type, ext for the extraction task and cls for the classification task, defaults to ext.")
     parser.add_argument("--options", default=["正向", "负向"], type=str, nargs="+", help="Used only for the classification task, the options for classification")
diff --git a/slm/applications/text_classification/hierarchical/few-shot/train.py b/slm/applications/text_classification/hierarchical/few-shot/train.py
index 9214edc0fabb..e0ff949e2d53 100644
--- a/slm/applications/text_classification/hierarchical/few-shot/train.py
+++ b/slm/applications/text_classification/hierarchical/few-shot/train.py
@@ -94,7 +94,7 @@ def compute_metrics(eval_preds):
         micro_f1_score, macro_f1_score = metric.accumulate()
         return {"micro_f1_score": micro_f1_score, "macro_f1_score": macro_f1_score}
 
-    # Deine the early-stopping callback.
+    # Define the early-stopping callback.
     callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]
 
     # Initialize the trainer.
diff --git a/slm/applications/text_classification/hierarchical/few-shot/utils.py b/slm/applications/text_classification/hierarchical/few-shot/utils.py
index 2e1dc6f44756..2409e296e3af 100644
--- a/slm/applications/text_classification/hierarchical/few-shot/utils.py
+++ b/slm/applications/text_classification/hierarchical/few-shot/utils.py
@@ -19,7 +19,7 @@
 
 def load_local_dataset(data_path, splits, label_list):
     """
-    Load dataset for hierachical classification from files, where
+    Load dataset for hierarchical classification from files, where
     there is one example per line. Text and label are separated
     by '\t', and multiple labels are delimited by ','.
 
diff --git a/slm/applications/text_classification/multi_class/few-shot/train.py b/slm/applications/text_classification/multi_class/few-shot/train.py
index 2ea9bc2e9143..b6d1f85159f0 100644
--- a/slm/applications/text_classification/multi_class/few-shot/train.py
+++ b/slm/applications/text_classification/multi_class/few-shot/train.py
@@ -93,7 +93,7 @@ def compute_metrics(eval_preds):
         acc = metric.accumulate()
         return {"accuracy": acc}
 
-    # Deine the early-stopping callback.
+    # Define the early-stopping callback.
     callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]
 
     # Initialize the trainer.
diff --git a/slm/applications/text_classification/multi_label/few-shot/train.py b/slm/applications/text_classification/multi_label/few-shot/train.py
index c2d952435843..78ff3218df2d 100644
--- a/slm/applications/text_classification/multi_label/few-shot/train.py
+++ b/slm/applications/text_classification/multi_label/few-shot/train.py
@@ -94,7 +94,7 @@ def compute_metrics(eval_preds):
         micro_f1_score, macro_f1_score = metric.accumulate()
         return {"micro_f1_score": micro_f1_score, "macro_f1_score": macro_f1_score}
 
-    # Deine the early-stopping callback.
+    # Define the early-stopping callback.
     callbacks = [EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.0)]
 
     # Initialize the trainer.
diff --git a/slm/examples/README.md b/slm/examples/README.md
index 2f0d51f0002c..94dc13c1d425 100644
--- a/slm/examples/README.md
+++ b/slm/examples/README.md
@@ -31,7 +31,7 @@ PaddleNLP provides rich application examples covering mainstream NLP task to hel
 | simultaneous_translation      | [同声翻译 (Simultaneous Translation)](./simultaneous_translation/)           |
 | machine_reading_comprehension | [阅读理解 (Machine Reading Comprehension)](./machine_reading_comprehension/) |
 
-## NLP 拓展应用 (NLP Extented Applications)
+## NLP 拓展应用 (NLP Extended Applications)
 
 | 目录 Folder          | 任务 Task                                                               |
 |:---------------------|-------------------------------------------------------------------------|
diff --git a/slm/examples/benchmark/clue/grid_search_tools/grid_search.py b/slm/examples/benchmark/clue/grid_search_tools/grid_search.py
index 93387bf9daf7..548fefe674a3 100644
--- a/slm/examples/benchmark/clue/grid_search_tools/grid_search.py
+++ b/slm/examples/benchmark/clue/grid_search_tools/grid_search.py
@@ -151,7 +151,7 @@ def main():
             if returncode is not None:
                 if returncode != 0:
                     retry[runs[i]["ts"]] += 1
-                    print(f"> {runs[i]['ts']} task failed, will retried, tryed {retry[runs[i]['ts']]} times.")
+                    print(f"> {runs[i]['ts']} task failed, will retried, tried {retry[runs[i]['ts']]} times.")
                     output = runs[i]["ps"].communicate()[0]
                     for line in output.decode("utf-8").split("\n"):
                         print(line)
diff --git a/slm/examples/benchmark/wiki_lambada/eval.py b/slm/examples/benchmark/wiki_lambada/eval.py
index d0bd3493909b..4ca618577ec5 100644
--- a/slm/examples/benchmark/wiki_lambada/eval.py
+++ b/slm/examples/benchmark/wiki_lambada/eval.py
@@ -74,7 +74,7 @@ def get_parser():
         help="Whether to use flash attention",
     )
     # load autodist name files, eg: bloom-176b
-    parser.add_argument("--load_autodist", action="store_true", help="whether load auto-dist wieght file")
+    parser.add_argument("--load_autodist", action="store_true", help="whether load auto-dist weight file")
 
     return parser
 
diff --git a/slm/examples/few_shot/RGL/data.py b/slm/examples/few_shot/RGL/data.py
index 32efac286aad..4afec4c177f0 100644
--- a/slm/examples/few_shot/RGL/data.py
+++ b/slm/examples/few_shot/RGL/data.py
@@ -168,7 +168,7 @@ def __init__(self, labels=None):
     @property
     def labels(self):
         if not getattr(self, "_labels"):
-            raise ValueError("labels and label_mappings are not setted yet.")
+            raise ValueError("labels and label_mappings are not set yet.")
         return self._labels
 
     @labels.setter
@@ -179,7 +179,7 @@ def labels(self, labels):
     @property
     def label_mapping(self):
         if not getattr(self, "_labels"):
-            raise ValueError("labels and label_mappings are not setted yet.")
+            raise ValueError("labels and label_mappings are not set yet.")
         if not getattr(self, "_label_mapping"):
             self._label_mapping = {k: i for i, k in enumerate(self._labels)}
         return self._label_mapping
diff --git a/slm/examples/few_shot/RGL/verbalizer.py b/slm/examples/few_shot/RGL/verbalizer.py
index 0e741235dcc0..233899db0f2e 100644
--- a/slm/examples/few_shot/RGL/verbalizer.py
+++ b/slm/examples/few_shot/RGL/verbalizer.py
@@ -127,7 +127,7 @@ def aggregate(label_words_logits, atype="mean", ndim=2):
             elif atype == "first":
                 return label_words_logits[..., 0, :]
             else:
-                raise ValueError("Unsupported aggreate type {}".format(atype))
+                raise ValueError("Unsupported aggregate type {}".format(atype))
         return label_words_logits
 
     def normalize(self, logits):
diff --git a/slm/examples/information_extraction/DuIE/run_duie.py b/slm/examples/information_extraction/DuIE/run_duie.py
index 94e1a227292b..61b4b54308bd 100644
--- a/slm/examples/information_extraction/DuIE/run_duie.py
+++ b/slm/examples/information_extraction/DuIE/run_duie.py
@@ -252,7 +252,7 @@ def do_train():
                 print("\n=====start evaluating ckpt of %d steps=====" % global_step)
                 precision, recall, f1 = evaluate(model, criterion, test_data_loader, eval_file_path, "eval")
                 print("precision: %.2f\t recall: %.2f\t f1: %.2f\t" % (100 * precision, 100 * recall, 100 * f1))
-                print("saving checkpoing model_%d.pdparams to %s " % (global_step, args.output_dir))
+                print("saving checkpoint model_%d.pdparams to %s " % (global_step, args.output_dir))
                 paddle.save(model.state_dict(), os.path.join(args.output_dir, "model_%d.pdparams" % global_step))
                 model.train()  # back to train mode
 
diff --git a/slm/examples/information_extraction/DuIE/utils.py b/slm/examples/information_extraction/DuIE/utils.py
index b810043bdee8..5a62dfbb02f5 100644
--- a/slm/examples/information_extraction/DuIE/utils.py
+++ b/slm/examples/information_extraction/DuIE/utils.py
@@ -66,7 +66,7 @@ def decoding(
         complex_relation_label = [8, 10, 26, 32, 46]
         complex_relation_affi_label = [9, 11, 27, 28, 29, 33, 47]
 
-        # flatten predictions then retrival all valid subject id
+        # flatten predictions then retrieval all valid subject id
         flatten_predictions = []
         for layer_1 in predictions:
             for layer_2 in layer_1:
diff --git a/slm/examples/information_extraction/DuUIE/inference.py b/slm/examples/information_extraction/DuUIE/inference.py
index 1d70d484995c..95b74ea1f09f 100644
--- a/slm/examples/information_extraction/DuUIE/inference.py
+++ b/slm/examples/information_extraction/DuUIE/inference.py
@@ -109,7 +109,7 @@ def main():
         "-c",
         "--config",
         dest="map_config",
-        help="Offset mapping config, maping generated sel to offset record",
+        help="Offset mapping config, mapping generated sel to offset record",
         default="longer_first_zh",
     )
     parser.add_argument("--verbose", action="store_true")
diff --git a/slm/examples/machine_translation/transformer/configs/transformer.base.yaml b/slm/examples/machine_translation/transformer/configs/transformer.base.yaml
index c5837195f4bf..32c77264989d 100644
--- a/slm/examples/machine_translation/transformer/configs/transformer.base.yaml
+++ b/slm/examples/machine_translation/transformer/configs/transformer.base.yaml
@@ -66,7 +66,7 @@ label_smooth_eps: 0.1
 # select the top `beam_size * 2` beams and process the top `beam_size` alive
 # and finish beams in them separately, while 'v1' would only select the top
 # `beam_size` beams and mix up the alive and finish beams. 'v2' always
-# searchs more and get better results, since the alive beams would
+# searches more and get better results, since the alive beams would
 # always be `beam_size` while the number of alive beams in `v1` might
 # decrease when meeting the end token. However, 'v2' always generates
 # longer results thus might do more calculation and be slower.
diff --git a/slm/examples/machine_translation/transformer/configs/transformer.big.yaml b/slm/examples/machine_translation/transformer/configs/transformer.big.yaml
index efbe3b60870d..f4d9f54eb5c4 100644
--- a/slm/examples/machine_translation/transformer/configs/transformer.big.yaml
+++ b/slm/examples/machine_translation/transformer/configs/transformer.big.yaml
@@ -66,7 +66,7 @@ label_smooth_eps: 0.1
 # select the top `beam_size * 2` beams and process the top `beam_size` alive
 # and finish beams in them separately, while 'v1' would only select the top
 # `beam_size` beams and mix up the alive and finish beams. 'v2' always
-# searchs more and get better results, since the alive beams would
+# searches more and get better results, since the alive beams would
 # always be `beam_size` while the number of alive beams in `v1` might
 # decrease when meeting the end token. However, 'v2' always generates
 # longer results thus might do more calculation and be slower.
diff --git a/slm/examples/machine_translation/transformer/predict.py b/slm/examples/machine_translation/transformer/predict.py
index 9226da595e65..e21189934f92 100644
--- a/slm/examples/machine_translation/transformer/predict.py
+++ b/slm/examples/machine_translation/transformer/predict.py
@@ -126,7 +126,7 @@ def do_predict(args):
 
     # Define model
     # `TransformerGenerator` automatically chioces using `FastGeneration`
-    # (with jit building) or the slower verison `InferTransformerModel`.
+    # (with jit building) or the slower version `InferTransformerModel`.
     transformer = TransformerGenerator(
         src_vocab_size=args.src_vocab_size,
         trg_vocab_size=args.trg_vocab_size,
diff --git a/slm/examples/model_compression/distill_lstm/small.py b/slm/examples/model_compression/distill_lstm/small.py
index 4bf38ea225af..454f2928a0e4 100644
--- a/slm/examples/model_compression/distill_lstm/small.py
+++ b/slm/examples/model_compression/distill_lstm/small.py
@@ -43,7 +43,7 @@ def __init__(
     ):
         super(BiLSTM, self).__init__()
         if embedding_name is not None:
-            raise ValueError("TokenEmbedding is depercated in PaddleNLP since 3.0, please set embedding_name to None ")
+            raise ValueError("TokenEmbedding is deprecated in PaddleNLP since 3.0, please set embedding_name to None ")
         else:
             self.embedder = nn.Embedding(vocab_size, embed_dim, padding_idx)
 
diff --git a/slm/examples/model_compression/pp-minilm/deploy/python/infer_perf.sh b/slm/examples/model_compression/pp-minilm/deploy/python/infer_perf.sh
index c8469d9d9117..83ec5756f80a 100644
--- a/slm/examples/model_compression/pp-minilm/deploy/python/infer_perf.sh
+++ b/slm/examples/model_compression/pp-minilm/deploy/python/infer_perf.sh
@@ -14,7 +14,7 @@
 
 export task=TNEWS
 
-echo Inference of orgin FP32 model
+echo Inference of origin FP32 model
 for ((i=0;i<=4;i++));
 do
     python infer.py  --task_name ${task} --model_path  ../finetuning/ppminilm-6l-768h/models/${task}/1e-4_64/inference  --use_trt --perf
diff --git a/slm/examples/model_interpretation/rationale_extraction/generate_evaluation_data.sh b/slm/examples/model_interpretation/rationale_extraction/generate_evaluation_data.sh
index 9f1dbef6c660..9d68510870a2 100755
--- a/slm/examples/model_interpretation/rationale_extraction/generate_evaluation_data.sh
+++ b/slm/examples/model_interpretation/rationale_extraction/generate_evaluation_data.sh
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 ###
- # This script concatenates results from previous running to generate a formated result for evaluation use
+ # This script concatenates results from previous running to generate a formatted result for evaluation use
 ### 
 
 BASE_MODEL=$1
diff --git a/slm/examples/model_interpretation/rationale_extraction/sentiment_pred.py b/slm/examples/model_interpretation/rationale_extraction/sentiment_pred.py
index 4ab1397ed304..bf014ad40a59 100644
--- a/slm/examples/model_interpretation/rationale_extraction/sentiment_pred.py
+++ b/slm/examples/model_interpretation/rationale_extraction/sentiment_pred.py
@@ -91,7 +91,7 @@ def _read(self, filename, language):
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.
diff --git a/slm/examples/model_interpretation/task/mrc/saliency_map/rc_interpretable.py b/slm/examples/model_interpretation/task/mrc/saliency_map/rc_interpretable.py
index 7df2bc45d51f..f7fede443a8c 100644
--- a/slm/examples/model_interpretation/task/mrc/saliency_map/rc_interpretable.py
+++ b/slm/examples/model_interpretation/task/mrc/saliency_map/rc_interpretable.py
@@ -480,7 +480,7 @@ def extract_integrated_gradient_scores(
                     out_handle,
                 )
             else:
-                raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
+                raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")
 
         # Deal with last example
         if args.language == "ch":
diff --git a/slm/examples/model_interpretation/task/senti/LIME/lime_text.py b/slm/examples/model_interpretation/task/senti/LIME/lime_text.py
index 9e987646861c..1e8ae667efc5 100644
--- a/slm/examples/model_interpretation/task/senti/LIME/lime_text.py
+++ b/slm/examples/model_interpretation/task/senti/LIME/lime_text.py
@@ -371,7 +371,7 @@ def __init__(
                 generate random numbers. If None, the random state will be
                 initialized using the internal numpy seed.
             char_level: an boolean identifying that we treat each character
-                as an independent occurence in the string
+                as an independent occurrence in the string
         """
 
         if kernel is None:
diff --git a/slm/examples/model_interpretation/task/senti/rnn/train.py b/slm/examples/model_interpretation/task/senti/rnn/train.py
index 0a82924751fd..cdee72908734 100644
--- a/slm/examples/model_interpretation/task/senti/rnn/train.py
+++ b/slm/examples/model_interpretation/task/senti/rnn/train.py
@@ -50,7 +50,7 @@ def set_seed(seed=1000):
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.
diff --git a/slm/examples/model_interpretation/task/senti/saliency_map/sentiment_interpretable.py b/slm/examples/model_interpretation/task/senti/saliency_map/sentiment_interpretable.py
index 61afefc70ec5..23039121eb12 100644
--- a/slm/examples/model_interpretation/task/senti/saliency_map/sentiment_interpretable.py
+++ b/slm/examples/model_interpretation/task/senti/saliency_map/sentiment_interpretable.py
@@ -97,7 +97,7 @@ def _read(self, filename):
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.
@@ -458,7 +458,7 @@ def extract_LIME_scores(
 
             # Attention
             if args.inter_mode == "attention":
-                # extract attention scores and write resutls to file
+                # extract attention scores and write results to file
                 extract_attention_scores(args, atts, input_ids, tokens, sub_word_id_dict, result, offset, out_handle)
 
             # Integrated_gradient
@@ -496,7 +496,7 @@ def extract_LIME_scores(
                 )
 
             else:
-                raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
+                raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")
 
         if args.inter_mode == "lime":
             log.debug(np.average(np.array(lime_relative_err_total)))
diff --git a/slm/examples/model_interpretation/task/similarity/LIME/lime_text.py b/slm/examples/model_interpretation/task/similarity/LIME/lime_text.py
index 4453ef3dfea5..b3e1d86efa63 100644
--- a/slm/examples/model_interpretation/task/similarity/LIME/lime_text.py
+++ b/slm/examples/model_interpretation/task/similarity/LIME/lime_text.py
@@ -362,7 +362,7 @@ def __init__(
                 generate random numbers. If None, the random state will be
                 initialized using the internal numpy seed.
             char_level: an boolean identifying that we treat each character
-                as an independent occurence in the string
+                as an independent occurrence in the string
         """
 
         if kernel is None:
diff --git a/slm/examples/model_interpretation/task/similarity/saliency_map/similarity_interpretable.py b/slm/examples/model_interpretation/task/similarity/saliency_map/similarity_interpretable.py
index 730640962190..40bc8dbe7b2f 100644
--- a/slm/examples/model_interpretation/task/similarity/saliency_map/similarity_interpretable.py
+++ b/slm/examples/model_interpretation/task/similarity/saliency_map/similarity_interpretable.py
@@ -640,7 +640,7 @@ def LIME_error_evaluation(
                 )
 
             else:
-                raise KeyError(f"Unkonwn interpretable mode: {args.inter_mode}")
+                raise KeyError(f"Unknown interpretable mode: {args.inter_mode}")
 
         if args.inter_mode == "lime":
             print(np.average(np.array(lime_relative_err_total)))
diff --git a/slm/examples/model_interpretation/task/similarity/simnet/train.py b/slm/examples/model_interpretation/task/similarity/simnet/train.py
index 85af86ea8be9..85e0dea5d8eb 100644
--- a/slm/examples/model_interpretation/task/similarity/simnet/train.py
+++ b/slm/examples/model_interpretation/task/similarity/simnet/train.py
@@ -49,7 +49,7 @@
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.
diff --git a/slm/examples/model_interpretation/task/transformer.py b/slm/examples/model_interpretation/task/transformer.py
index 8d12b98149c8..d688d65aa58d 100644
--- a/slm/examples/model_interpretation/task/transformer.py
+++ b/slm/examples/model_interpretation/task/transformer.py
@@ -264,7 +264,7 @@ def compute_kv(self, key, value):
 
     def gen_cache(self, key, value=None, type=Cache):
         """
-        Generates cache for `forward` usage in inference accroding to arguments.
+        Generates cache for `forward` usage in inference according to arguments.
         The generated cache is an instance of `MultiHeadAttention.Cache` or an
         instance of `MultiHeadAttention.StaticCache`.
 
@@ -1063,7 +1063,7 @@ class Transformer(Layer):
     Please refer to `Attention is all you need <http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf>`_ ,
     and see `TransformerEncoder` and `TransformerDecoder` for more details.
 
-    Users can configurate the model architecture with corresponding parameters.
+    Users can configure the model architecture with corresponding parameters.
     Note the usage of `normalize_before` representing where to apply layer
     normalization (in pre-process or post-precess of multi-head attention or FFN),
     and some transformer like models are different on this, such as
diff --git a/slm/examples/text_graph/erniesage/models/conv.py b/slm/examples/text_graph/erniesage/models/conv.py
index 4dd23df3d263..89c2339f1797 100644
--- a/slm/examples/text_graph/erniesage/models/conv.py
+++ b/slm/examples/text_graph/erniesage/models/conv.py
@@ -153,7 +153,7 @@ def _recv_func(message):
         return self_feature, neigh_feature
 
     def forward(self, graph, term_ids, act="relu"):
-        """Forward funciton of Conv layer.
+        """Forward function of Conv layer.
 
         Args:
             graph (Graph): Graph object.
diff --git a/slm/examples/text_graph/erniesage/models/encoder.py b/slm/examples/text_graph/erniesage/models/encoder.py
index 9363beb43a45..3cc982409f25 100644
--- a/slm/examples/text_graph/erniesage/models/encoder.py
+++ b/slm/examples/text_graph/erniesage/models/encoder.py
@@ -95,7 +95,7 @@ def take_final_feature(self, feature, index):
         """Gather the final feature.
 
         Args:
-            feature (Tensor): the total featue tensor.
+            feature (Tensor): the total feature tensor.
             index (Tensor): the index to gather.
 
         Returns:
diff --git a/slm/examples/text_matching/ernie_matching/README.md b/slm/examples/text_matching/ernie_matching/README.md
index 7d0f0bb86148..22ad2b6049d5 100644
--- a/slm/examples/text_matching/ernie_matching/README.md
+++ b/slm/examples/text_matching/ernie_matching/README.md
@@ -18,7 +18,7 @@
 
 ```
 ernie_matching/
-├── deply # 部署
+├── deploy # 部署
 |   └── python
 |       └── predict.py # python 预测部署示例
 ├── export_model.py # 动态图参数导出静态图参数脚本
diff --git a/slm/examples/text_matching/sentence_transformers/README.md b/slm/examples/text_matching/sentence_transformers/README.md
index 5f377f2341ca..37b7d2de0c95 100644
--- a/slm/examples/text_matching/sentence_transformers/README.md
+++ b/slm/examples/text_matching/sentence_transformers/README.md
@@ -62,7 +62,7 @@ PaddleNLP 提供了丰富的预训练模型，并且可以便捷地获取 Paddle
 
 ```text
 sentence_transformers/
-├── model.py # Sentence Transfomer 组网文件
+├── model.py # Sentence Transformer 组网文件
 ├── README.md # 文本说明
 └── train.py # 模型训练评估
 ```
diff --git a/slm/examples/text_matching/simcse/README.md b/slm/examples/text_matching/simcse/README.md
index 81aab499ab86..ee5995221e99 100644
--- a/slm/examples/text_matching/simcse/README.md
+++ b/slm/examples/text_matching/simcse/README.md
@@ -61,7 +61,7 @@ python -u -m paddle.distributed.launch --gpus '0' \
 可支持配置的参数：
 
 * `infer_with_fc_pooler`：可选，在预测阶段计算文本 embedding 表示的时候网络前向是否会过训练阶段最后一层的 fc;  建议关闭模型效果最好。
-* `dup_rate`: 可选，word reptition 的比例，默认是0.32，根据论文 Word Repetition 比例采用 0.32 效果最佳。
+* `dup_rate`: 可选，word repetition 的比例，默认是0.32，根据论文 Word Repetition 比例采用 0.32 效果最佳。
 * `scale`：可选，在计算 cross_entropy loss 之前对 cosine 相似度进行缩放的因子；默认为 20。
 * `dropout`：可选，SimCSE 网络前向使用的 dropout 取值；默认 0.1。
 * `save_dir`：可选，保存训练模型的目录；默认保存在当前目录 checkpoints 文件夹下。
diff --git a/slm/examples/text_matching/simnet/train.py b/slm/examples/text_matching/simnet/train.py
index afba242f58df..b8dcb7da2ae1 100644
--- a/slm/examples/text_matching/simnet/train.py
+++ b/slm/examples/text_matching/simnet/train.py
@@ -39,7 +39,7 @@
 
 def create_dataloader(dataset, trans_fn=None, mode="train", batch_size=1, batchify_fn=None):
     """
-    Creats dataloader.
+    Creates dataloader.
 
     Args:
         dataset(obj:`paddle.io.Dataset`): Dataset instance.
diff --git a/slm/examples/text_to_knowledge/nptag/utils.py b/slm/examples/text_to_knowledge/nptag/utils.py
index 275becaa33c3..0ff981aa4463 100644
--- a/slm/examples/text_to_knowledge/nptag/utils.py
+++ b/slm/examples/text_to_knowledge/nptag/utils.py
@@ -113,7 +113,7 @@ def levenstein_distance(s1: str, s2: str) -> int:
 
 
 class BurkhardKellerNode(object):
-    """Node implementatation for BK-Tree. A BK-Tree node stores the information of current word, and its approximate words calculated by levenstein distance.
+    """Node implementation for BK-Tree. A BK-Tree node stores the information of current word, and its approximate words calculated by levenstein distance.
 
     Args:
         word (str): word of current node.
diff --git a/slm/examples/text_to_knowledge/termtree/termtree.py b/slm/examples/text_to_knowledge/termtree/termtree.py
index 7b09795ef113..ea0db5844df6 100644
--- a/slm/examples/text_to_knowledge/termtree/termtree.py
+++ b/slm/examples/text_to_knowledge/termtree/termtree.py
@@ -20,7 +20,7 @@
 
 
 class TermTreeNode(object):
-    """Defination of term node. All members are protected, to keep rigorism of data struct.
+    """Definition of term node. All members are protected, to keep rigorism of data struct.
 
     Args:
         sid (str): term id of node.
@@ -34,7 +34,7 @@ class TermTreeNode(object):
             Defaults to None.
         sub_type (Optional[List[str]], optional): grouped by some term. Defaults to None.
         sub_term (Optional[List[str]], optional): some lower term. Defaults to None.
-        data (Optional[Dict[str, Any]], optional): to sore full imformation of a term. Defaults to None.
+        data (Optional[Dict[str, Any]], optional): to sore full information of a term. Defaults to None.
 
     """
 
diff --git a/slm/examples/torch_migration/docs/ThesisReproduction_NLP.md b/slm/examples/torch_migration/docs/ThesisReproduction_NLP.md
index 98ae8c2d127a..c5fb53cac993 100644
--- a/slm/examples/torch_migration/docs/ThesisReproduction_NLP.md
+++ b/slm/examples/torch_migration/docs/ThesisReproduction_NLP.md
@@ -322,7 +322,7 @@ if __name__ == "__main__":
 
 * 模型在前向对齐验证时，需要调用`model.eval()`方法，保证组网中的随机量被关闭，比如 BatchNorm、Dropout 等。
 * 给定相同的输入数据，为保证可复现性，如果有随机数生成，固定相关的随机种子。
-* 输出 diff 可以使用`np.mean(np.abs(o1 - o2))`进行计算，一般小于1e-6的话，可以认为前向没有问题。如果最终输出结果 diff 较大，可以使用二分的方法进行排查，比如说 BERT，包含1个 embdding 层、12个 transformer-block 以及最后的 MLM head 层，那么完成模型组网和权重转换之后，如果模型输出没有对齐，可以尝试输出中间某一个 transformer-block 的 tensor 进行对比，如果相同，则向后进行排查；如果不同，则继续向前进行排查，以此类推，直到找到导致没有对齐的操作。
+* 输出 diff 可以使用`np.mean(np.abs(o1 - o2))`进行计算，一般小于1e-6的话，可以认为前向没有问题。如果最终输出结果 diff 较大，可以使用二分的方法进行排查，比如说 BERT，包含1个 embedding 层、12个 transformer-block 以及最后的 MLM head 层，那么完成模型组网和权重转换之后，如果模型输出没有对齐，可以尝试输出中间某一个 transformer-block 的 tensor 进行对比，如果相同，则向后进行排查；如果不同，则继续向前进行排查，以此类推，直到找到导致没有对齐的操作。
 
 **【实战】**
 
diff --git a/slm/pipelines/pipelines/data_handler/processor.py b/slm/pipelines/pipelines/data_handler/processor.py
index 59d6384fa45a..eafbb61510a2 100644
--- a/slm/pipelines/pipelines/data_handler/processor.py
+++ b/slm/pipelines/pipelines/data_handler/processor.py
@@ -755,7 +755,7 @@ def dataset_from_dicts(self, dicts: List[dict], indices: Optional[List[int]] = N
         # Take the dict and insert into our basket structure, this stages also adds an internal IDs
         baskets = self._fill_baskets(dicts, indices)
 
-        # Separat conversion of query
+        # Separate conversion of query
         baskets = self._convert_queries(baskets=baskets)
 
         # and context passages. When converting the context the label is also assigned.
diff --git a/slm/pipelines/pipelines/nodes/combine_documents/stuff.py b/slm/pipelines/pipelines/nodes/combine_documents/stuff.py
index f794ab777a52..4b01221db4af 100644
--- a/slm/pipelines/pipelines/nodes/combine_documents/stuff.py
+++ b/slm/pipelines/pipelines/nodes/combine_documents/stuff.py
@@ -45,7 +45,7 @@ def __init__(
         First, merge multiple documents, and then generate a multi document summary .
         Ensuring that the number of tokens for all documents does not exceed the len_str.
 
-        :param document_prompt: the prompt for geting and merging multiple documents
+        :param document_prompt: the prompt for getting and merging multiple documents
         :param llm_prompt: the prompt for multiple document summaries
         :param len_str: maximum document length
         :param llm: the  Language Model
diff --git a/slm/pipelines/pipelines/nodes/reader/ernie_dureader.py b/slm/pipelines/pipelines/nodes/reader/ernie_dureader.py
index 2feb96919ea0..2ad3763ef1c9 100644
--- a/slm/pipelines/pipelines/nodes/reader/ernie_dureader.py
+++ b/slm/pipelines/pipelines/nodes/reader/ernie_dureader.py
@@ -779,7 +779,7 @@ def pred_to_doc_idxs(pred, passage_start_t, sample_idx):
         """
         Converts the passage level predictions to document level predictions. Note that on the doc level we
         don't have special tokens or question tokens. This means that a no answer
-        cannot be prepresented by a (0,0) qa_answer but will instead be represented by (-1, -1)
+        cannot be represented by a (0,0) qa_answer but will instead be represented by (-1, -1)
         """
         new_pred = []
         for qa_answer in pred:
diff --git a/tests/llm/test_gradio.py b/tests/llm/test_gradio.py
index d47338545733..37e96c5a3979 100644
--- a/tests/llm/test_gradio.py
+++ b/tests/llm/test_gradio.py
@@ -72,7 +72,7 @@ def available_free_port(self, exclude=None):
                 continue
             return port
 
-        raise ValueError("can not get valiable port in [8000, 8200]")
+        raise ValueError("can not get available port in [8000, 8200]")
 
     def wait_until_server_is_ready(self):
         while True:
diff --git a/tests/transformers/reformer/test_modeling.py b/tests/transformers/reformer/test_modeling.py
index 70439235cc61..089d07fe173d 100644
--- a/tests/transformers/reformer/test_modeling.py
+++ b/tests/transformers/reformer/test_modeling.py
@@ -248,7 +248,7 @@ def create_and_check_reformer_model_with_attn_mask(
 
         model = ReformerModel(config=config)
         model.eval()
-        # set all position encodings to zero so that postions don't matter
+        # set all position encodings to zero so that positions don't matter
         with paddle.no_grad():
             embedding = model.embeddings.position_embeddings.embedding
             embedding.weight = paddle.create_parameter(