Skip to content

Commit d82766b

Browse files
authored
pdate general distill in ppminilm (#1520)
1 parent a16c4bc commit d82766b

File tree

5 files changed

+15
-15
lines changed

5 files changed

+15
-15
lines changed

examples/model_compression/pp-minilm/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ PP-MiniLM 压缩方案以面向预训练模型的任务无关知识蒸馏(Task-a
8181
│ └── run_clue.sh # CLUE 上的微调启动脚本
8282
│ └── run_one_search.sh # 单数据集下精调脚本
8383
│ └── run_all_search.sh # CLUE数据集下精调脚本
84-
│ └── export_model.sh # 导出 fine-tuned 部署模型脚本
84+
│ └── export_model.py # 导出 fine-tuned 部署模型脚本
8585
├── pruning # 裁剪、蒸馏目录
8686
│ └── prune.py # 裁剪、蒸馏脚本
8787
│ └── prune.sh # 裁剪、蒸馏启动脚本

examples/model_compression/pp-minilm/general_distill/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ cd ..
2929

3030
其中 `general_distill.py` 参数释义如下:
3131

32-
- `model_type` 指示了学生模型类型,当前仅支持 'ernie'、'roberta'。
32+
- `model_type` 指示了学生模型类型,当前仅支持 'ppminilm'、'roberta'。
3333
- `num_relation_heads` relation head 的个数,一般对于 large-size 的教师模型是64,对于 base-size 的教师模型是 48。
34-
- `teacher_model_type`指示了教师模型类型,当前仅支持 'ernie'、'roberta'。
34+
- `teacher_model_type`指示了教师模型类型,当前仅支持 'roberta'。
3535
- `teacher_layer_index`蒸馏时使用的教师模型的层
3636
- `student_layer_index` 蒸馏时使用的学生模型的层
3737
- `teacher_model_name_or_path`教师模型的名称,例如`'roberta-wwm-ext-large'`

examples/model_compression/pp-minilm/general_distill/general_distill.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@
3232
from paddlenlp.utils.tools import TimeCostAverage
3333
from paddlenlp.transformers import LinearDecayWithWarmup
3434
from paddlenlp.transformers import RobertaModel, RobertaTokenizer
35-
from paddlenlp.transformers import ErnieModel, ErnieForSequenceClassification, ErnieTokenizer
35+
from paddlenlp.transformers import PPMiniLMModel, PPMiniLMForSequenceClassification, PPMiniLMTokenizer
3636
from paddlenlp.transformers.distill_utils import to_distill, calc_multi_relation_loss
3737

3838
MODEL_CLASSES = {
3939
"roberta": (RobertaModel, RobertaTokenizer),
40-
"ernie": (ErnieForSequenceClassification, ErnieTokenizer)
40+
"ppminilm": (PPMiniLMForSequenceClassification, PPMiniLMTokenizer)
4141
}
4242

4343

@@ -47,14 +47,14 @@ def parse_args():
4747
# Required parameters
4848
parser.add_argument(
4949
"--model_type",
50-
default="ernie",
50+
default="ppminilm",
5151
type=str,
5252
required=True,
5353
help="Model type selected in the list: " +
5454
", ".join(MODEL_CLASSES.keys()), )
5555
parser.add_argument(
5656
"--teacher_model_type",
57-
default="ernie",
57+
default="roberta",
5858
type=str,
5959
required=True,
6060
help="Model type selected in the list: " +
@@ -276,28 +276,28 @@ def do_train(args):
276276
# For student
277277
model_class, _ = MODEL_CLASSES[args.model_type]
278278
if args.num_layers == 6:
279-
ernie = ErnieModel(
279+
ppminilm = PPMiniLMModel(
280280
vocab_size=tokenizer.vocab_size,
281281
num_hidden_layers=6,
282282
hidden_act='relu',
283283
intermediate_size=3072,
284284
hidden_size=768) # layer: 6
285285
elif args.num_layers == 4:
286-
ernie = ErnieModel(
286+
ppminilm = PPMiniLMModel(
287287
vocab_size=tokenizer.vocab_size,
288288
num_hidden_layers=4,
289289
hidden_act='relu',
290290
intermediate_size=1024,
291291
hidden_size=256,
292292
num_attention_heads=16) # layer: 4
293293
else:
294-
ernie = ErnieModel(
294+
ppminilm = PPMiniLMModel(
295295
vocab_size=tokenizer.vocab_size,
296296
num_hidden_layers=2,
297297
hidden_act='relu',
298298
hidden_size=128,
299299
intermediate_size=512) # layer: 2
300-
student = model_class(ernie)
300+
student = model_class(ppminilm)
301301

302302
teacher = teacher_model_class.from_pretrained(
303303
args.teacher_model_name_or_path)

examples/model_compression/pp-minilm/general_distill/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ cp ../../../../paddlenlp/transformers/distill_utils.py ${output_dir}/
4747

4848

4949
python3 -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" general_distill.py \
50-
--model_type ernie \
50+
--model_type ppminilm \
5151
--num_relation_heads ${numH} \
5252
--teacher_model_type ${teacher} \
5353
--teacher_layer_index ${teacher_layer_index} \

paddlenlp/transformers/distill_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from paddle.fluid.data_feeder import convert_dtype
2222

2323
from paddlenlp.utils.log import logger
24-
from paddlenlp.transformers import ErnieForSequenceClassification
24+
from paddlenlp.transformers import PPMiniLMForSequenceClassification
2525
from paddlenlp.transformers import TinyBertForPretraining
2626
from paddlenlp.transformers import BertForSequenceClassification
2727

@@ -208,15 +208,15 @@ def to_distill(self,
208208
if return_qkv:
209209
# forward function of student class should be replaced for distributed training.
210210
TinyBertForPretraining._forward = minilm_pretraining_forward
211-
ErnieForSequenceClassification._forward = minilm_pretraining_forward
211+
PPMiniLMForSequenceClassification._forward = minilm_pretraining_forward
212212
else:
213213
TinyBertForPretraining._forward = tinybert_forward
214214

215215
def init_func(layer):
216216
if isinstance(layer, (MultiHeadAttention, TransformerEncoderLayer,
217217
TransformerEncoder, TinyBertForPretraining,
218218
BertForSequenceClassification,
219-
ErnieForSequenceClassification)):
219+
PPMiniLMForSequenceClassification)):
220220
layer.forward = layer._forward
221221
if isinstance(layer, TransformerEncoder):
222222
layer.return_layer_outputs = return_layer_outputs

0 commit comments

Comments
 (0)