PaddlePaddle
diff --git a/‎paddlenlp/data/vocab.py‎
Lines changed: 2 additions & 2 deletions b/‎paddlenlp/data/vocab.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddlenlp/datasets/dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎paddlenlp/datasets/dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddlenlp/experimental/autonlp/README_en.md‎
Lines changed: 1 addition & 1 deletion b/‎paddlenlp/experimental/autonlp/README_en.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddlenlp/experimental/faster_tokenizer.py‎
Lines changed: 1 addition & 1 deletion b/‎paddlenlp/experimental/faster_tokenizer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddlenlp/experimental/transformers/qwen/modeling.py‎
Lines changed: 1 addition & 1 deletion b/‎paddlenlp/experimental/transformers/qwen/modeling.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddlenlp/experimental/transformers/qwen2/modeling.py‎
Lines changed: 1 addition & 1 deletion b/‎paddlenlp/experimental/transformers/qwen2/modeling.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddlenlp/generation/logits_process.py‎
Lines changed: 1 addition & 1 deletion b/‎paddlenlp/generation/logits_process.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddlenlp/layers/crf.py‎
Lines changed: 1 addition & 1 deletion b/‎paddlenlp/layers/crf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddlenlp/ops/distributed/parallel.py‎
Lines changed: 4 additions & 4 deletions b/‎paddlenlp/ops/distributed/parallel.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎paddlenlp/quantization/checkpoint_quantization_utils.py‎
Lines changed: 7 additions & 7 deletions b/‎paddlenlp/quantization/checkpoint_quantization_utils.py‎
Lines changed: 7 additions & 7 deletions
@@ -27,8 +27,8 @@ class Vocab(object):
     store/load functions.
 
     Args:
-        counter (collections.Counter, optional): A Counter intance describes
-            the tokens and their frequencies. Its keys will be indexed accroding
+        counter (collections.Counter, optional): A Counter instance describes
+            the tokens and their frequencies. Its keys will be indexed according
             to the order of frequency sorting to construct mapping relationship.
             If None, `token_to_idx` must be provided as the mapping relationship.
             Default: None.
 
@@ -570,7 +570,7 @@ def remove_if_exit(filepath):
             datasets = DatasetTuple(splits)
             parallel_env = dist.ParallelEnv()
             unique_endpoints = _get_unique_endpoints(parallel_env.trainer_endpoints[:])
-            # move register hook to first and register togather
+            # move register hook to first and register together
             lock_files = []
             for split in splits:
                 lock_file = os.path.join(DATA_HOME, self.__class__.__name__)
 
@@ -6,7 +6,7 @@
 
 **The AutoNLP APIs are subjective to significant changes until formal release**
 
-**AutoNLP** is an experimental project by PaddleNLP to democratize NLP for everyone. Delivering a successful NLP project is not easy, as it requires deep domain knowledge. Time after time, we have seen people struggle to make NLP work on their dataset, for their projects, which is why we are building **AutoNLP**. Compared with the traditional AutoML approach of massive paid compute for State-of-the-Art model performance, we have a different philosphy:
+**AutoNLP** is an experimental project by PaddleNLP to democratize NLP for everyone. Delivering a successful NLP project is not easy, as it requires deep domain knowledge. Time after time, we have seen people struggle to make NLP work on their dataset, for their projects, which is why we are building **AutoNLP**. Compared with the traditional AutoML approach of massive paid compute for State-of-the-Art model performance, we have a different philosophy:
 
 
 1. Instead of training State-of-the-Art models on huge datasets running on huge clusters, our goal is to deliver **decent models under limited compute**. We assume our users have a few GPUs at most and want to get decent models under 8 hours on their own in-house datasets. Note that you can get this level of compute for FREE on [Baidu AI Studio](https://aistudio.baidu.com/aistudio).
 
@@ -46,7 +46,7 @@ def to_vocab_buffer(vocab_dict, name):
     NOTICE: The value will be held in the cpu place.
 
     Args:
-        vocab_dict(dict): The value will be setted to the tensor.
+        vocab_dict(dict): The value will be set to the tensor.
             The key is token and the value is the token index.
         name(string): The name of the tensor.
     """
 
@@ -499,7 +499,7 @@ def forward(
         hidden_states = outputs[0]
 
         # if labels is None，means we need full output, instead of tensor_parallel_output
-        # tensor_parallel_output is togather with ParallelCrossEntropy
+        # tensor_parallel_output is together with ParallelCrossEntropy
         tensor_parallel_output = (
             self.config.tensor_parallel_output and labels is not None and self.config.tensor_parallel_degree > 1
         )
 
@@ -1226,7 +1226,7 @@ def forward(
         hidden_states = outputs[0]
 
         # if labels is None，means we need full output, instead of tensor_parallel_output
-        # tensor_parallel_output is togather with ParallelCrossEntropy
+        # tensor_parallel_output is together with ParallelCrossEntropy
         tensor_parallel_output = (
             self.config.tensor_parallel_output and labels is not None and self.config.tensor_parallel_degree > 1
         )
 
@@ -439,7 +439,7 @@ def __init__(self, sequence_bias: Dict[Tuple[int], float]):
         self._validate_arguments()
 
         # Bias variables that will be populated on the first call (for retrocompatibility purposes, the vocabulary size
-        # is infered in the first usage, which inhibits initializing here)
+        # is inferred in the first usage, which inhibits initializing here)
         self.length_1_bias = None
         self.prepared_bias_variables = False
 
 
@@ -248,7 +248,7 @@ def __init__(self, crf):
         self.crf = crf
         if isinstance(crf, paddle.Tensor):
             raise ValueError(
-                "From paddlenlp >= 2.0.0b4, the first param of LinearChainCrfLoss shoule be a LinearChainCrf object. For input parameter 'crf.transitions', you can remove '.transitions' to 'crf'"
+                "From paddlenlp >= 2.0.0b4, the first param of LinearChainCrfLoss should be a LinearChainCrf object. For input parameter 'crf.transitions', you can remove '.transitions' to 'crf'"
             )
 
     def forward(self, inputs, lengths, labels, old_version_labels=None):
 
@@ -191,8 +191,8 @@ def __init__(self, size, num_partitions=1, gather_out=True, param_attr=None, bia
         main_block = paddle.static.default_main_program().global_block()
         startup_block.vars[weight.name].is_distributed = True
         main_block.vars[weight.name].is_distributed = True
-        # set is_distributed for splited bias
-        # if a linear layer is splited by col, the bias would also be split into each rank as its weight
+        # set is_distributed for split bias
+        # if a linear layer is split by col, the bias would also be split into each rank as its weight
         if self.linear._bias_attr:
             startup_block.vars[self.linear.bias.name].is_distributed = True
             main_block.vars[self.linear.bias.name].is_distributed = True
@@ -285,8 +285,8 @@ def __init__(self, size, num_partitions=1, input_is_parallel=False, param_attr=N
         main_block = paddle.static.default_main_program().global_block()
         startup_block.vars[weight.name].is_distributed = True
         main_block.vars[weight.name].is_distributed = True
-        # set is_distributed for splited bias
-        # if a linear layer is splited by row, each rank would hold a complete bias
+        # set is_distributed for split bias
+        # if a linear layer is split by row, each rank would hold a complete bias
 
         if bias_attr is not False:
             self.bias = self.create_parameter(shape=[num_cols], attr=bias_attr, dtype=self._dtype, is_bias=True)
 
@@ -63,7 +63,7 @@ def group_wise_quant_dequant(
         tp_degree (`int`):
             Tensor parallel world size.
         use_pd (`bool`):
-            Whether to use paddle caculation. If False will use numpy.
+            Whether to use paddle calculation. If False will use numpy.
         symmetry (`bool`):
             Whether to use symmetry quantization.
     """
@@ -201,7 +201,7 @@ def cal_abs_min_max_channel(inputs, quant_axis=1):
         inputs (`numpy.array`):
             input tensor for quantization.
         quant_axis (`int`):
-            dimension where calulating inputs' abs min and max scales on.
+            dimension where calculating inputs' abs min and max scales on.
     """
     eps = 1e-8
     reduce_axis = tuple([i for i in range(len(inputs.shape)) if i != quant_axis])
@@ -227,7 +227,7 @@ def asymmetry_qdq_weight(
         quant_bits (`int`):
             Quantization bits.
         quant_axis (`int`):
-            Scales caculation axis.
+            Scales calculation axis.
         mins (`paddle.Tensor`):
             Min scales tensor in asymmetry quantization.
         maxs (`paddle.Tensor`):
@@ -239,7 +239,7 @@ def asymmetry_qdq_weight(
         tp_degree (`int`):
             Model parallel world size.
         use_pd (`bool`):
-            Whether to use paddle caculation. If False will use numpy.
+            Whether to use paddle calculation. If False will use numpy.
     """
 
     if mins is None:
@@ -288,7 +288,7 @@ def cal_abs_max_channel(inputs, quant_axis=1):
         inputs (`numpy.array`):
             input tensor for quantization.
         quant_axis (`int`):
-            dimension where calulating inputs' abs max scales on.
+            dimension where calculating inputs' abs max scales on.
     """
     epsilon = 1e-8
     reduce_axis = tuple([i for i in range(len(inputs.shape)) if i != quant_axis])
@@ -311,7 +311,7 @@ def qdq_weight(x, quant_bit=8, quant_axis=-1, scales=None, dequant=False, tp_ran
         quant_bits (`int`):
             Quantization bits.
         quant_axis (`int`):
-            Scales caculation axis.
+            Scales calculation axis.
         scales (`paddle.Tensor`):
             Abs max scales tensor in symmetry quantization.
         dequant (`bool`):
@@ -321,7 +321,7 @@ def qdq_weight(x, quant_bit=8, quant_axis=-1, scales=None, dequant=False, tp_ran
         tp_degree (`int`):
             Model parallel world size.
         use_pd (`bool`):
-            Whether to use paddle caculation. If False will use numpy.
+            Whether to use paddle calculation. If False will use numpy.
     """
 
     if scales is None:
Original file line number	Diff line number	Diff line change
`@@ -499,7 +499,7 @@ def forward(`
`499`	`499`	`hidden_states = outputs[0]`
`500`	`500`
`501`	`501`	`# if labels is None，means we need full output, instead of tensor_parallel_output`
`502`		`- # tensor_parallel_output is togather with ParallelCrossEntropy`
	`502`	`+ # tensor_parallel_output is together with ParallelCrossEntropy`
`503`	`503`	`tensor_parallel_output = (`
`504`	`504`	`self.config.tensor_parallel_output and labels is not None and self.config.tensor_parallel_degree > 1`
`505`	`505`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1226,7 +1226,7 @@ def forward(`
`1226`	`1226`	`hidden_states = outputs[0]`
`1227`	`1227`
`1228`	`1228`	`# if labels is None，means we need full output, instead of tensor_parallel_output`
`1229`		`- # tensor_parallel_output is togather with ParallelCrossEntropy`
	`1229`	`+ # tensor_parallel_output is together with ParallelCrossEntropy`
`1230`	`1230`	`tensor_parallel_output = (`
`1231`	`1231`	`self.config.tensor_parallel_output and labels is not None and self.config.tensor_parallel_degree > 1`
`1232`	`1232`	`)`
Original file line number	Diff line number	Diff line change
`@@ -248,7 +248,7 @@ def __init__(self, crf):`
`248`	`248`	`self.crf = crf`
`249`	`249`	`if isinstance(crf, paddle.Tensor):`
`250`	`250`	`raise ValueError(`
`251`		`- "From paddlenlp >= 2.0.0b4, the first param of LinearChainCrfLoss shoule be a LinearChainCrf object. For input parameter 'crf.transitions', you can remove '.transitions' to 'crf'"`
	`251`	`+ "From paddlenlp >= 2.0.0b4, the first param of LinearChainCrfLoss should be a LinearChainCrf object. For input parameter 'crf.transitions', you can remove '.transitions' to 'crf'"`
`252`	`252`	`)`
`253`	`253`
`254`	`254`	`def forward(self, inputs, lengths, labels, old_version_labels=None):`