Skip to content

Commit 694bb50

Browse files
Support intermediate_api gpt-3 test (#9912)
* split from pr9882 * fix
1 parent 30df8b6 commit 694bb50

File tree

3 files changed

+87
-3
lines changed

3 files changed

+87
-3
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
param="model_item=intermediate_api_gpt-3-13b_pretrain_dy2st "
16+
param+="run_mode=DP4_MP2_PP4 "
17+
param+="device_num=N4C32 "
18+
param+="global_batch_size=128 "
19+
param+="nnodes=4 "
20+
param+="model_type=gpt3_13b "
21+
param+="intermediate_api=intermediate_api_ "
22+
23+
cd ./tests
24+
bash ./test_tipc/static/auto_parallel/gpt3/benchmark_common/prepare.sh
25+
26+
bash -c "${param} bash ./test_tipc/static/auto_parallel/gpt3/benchmark_common/run_benchmark.sh"

tests/test_tipc/static/auto_parallel/gpt3/benchmark_common/run_benchmark.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ function _set_params(){
2424
fp_item="bf16"
2525
MODEL_TYPE=${model_type:-"gpt3_13b"}
2626

27+
# for intermediate api
28+
intermediate_api=${intermediate_api:-""}
29+
2730
ip_lists=($(echo $TRAINER_INSTANCES | tr ',' ' '))
2831
master_ip=${ip_lists[0]}
2932
nnodes=${nnodes:-1}
@@ -174,17 +177,17 @@ function _train(){
174177
train_cmd="python -u -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 \
175178
--nnodes 1 --nproc_per_node 8 \
176179
--log_dir mylog run_pretrain_auto.py \
177-
./pretrain_config_${MODEL_TYPE}/pretrain-${MODEL_TYPE}.json"
180+
./pretrain_config_${MODEL_TYPE}/${intermediate_api}pretrain-${MODEL_TYPE}.json"
178181
;;
179182
N4C32) echo "Run with: device_num=${device_num} run_mode=${run_mode}"
180183
train_cmd="python -u -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 \
181184
--log_dir mylog run_pretrain_auto.py \
182-
./pretrain_config_${MODEL_TYPE}/pretrain-${MODEL_TYPE}.json"
185+
./pretrain_config_${MODEL_TYPE}/${intermediate_api}pretrain-${MODEL_TYPE}.json"
183186
;;
184187
*) echo "Run with: device_num=${device_num}, run_mode=${run_mode}"
185188
train_cmd="python -u -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 \
186189
--log_dir mylog run_pretrain_auto.py \
187-
./pretrain_config_${MODEL_TYPE}/pretrain-${MODEL_TYPE}.json"
190+
./pretrain_config_${MODEL_TYPE}/${intermediate_api}pretrain-${MODEL_TYPE}.json"
188191
;;
189192
esac
190193
cd ../llm/auto_parallel/gpt-3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
{
2+
"model_name_or_path": "gpt3-13B-en",
3+
"tokenizer_name_or_path": "gpt3-13B-en",
4+
"to_static": true,
5+
"enable_auto_parallel": 1,
6+
"input_dir": "./data",
7+
"output_dir": "./checkpoints/gpt_pretrain_ckpts",
8+
"split": "949,50,1",
9+
"max_seq_length": 4096,
10+
"tensor_parallel_degree": 2,
11+
"pipeline_parallel_degree": 4,
12+
"per_device_train_batch_size": 1,
13+
"per_device_eval_batch_size": 1,
14+
"scale_loss": 1024,
15+
"learning_rate": 0.00001,
16+
"min_learning_rate": 0.000001,
17+
"max_steps": 500,
18+
"save_steps": 50000,
19+
"weight_decay": 0.01,
20+
"warmup_ratio": 0.01,
21+
"logging_steps": 5,
22+
"continue_training": 0,
23+
"dataloader_num_workers": 1,
24+
"eval_steps": 100000,
25+
"report_to": "visualdl",
26+
"disable_tqdm": true,
27+
"do_train": true,
28+
"do_eval": true,
29+
"device": "gpu",
30+
"model_type": "gpt_network",
31+
"sequence_parallel": 1,
32+
"use_flash_attention": 1,
33+
"use_fast_layer_norm": 1,
34+
"fused_linear": 1,
35+
"fuse_attention_ffn": 1,
36+
"fuse_attention_qkv": 1,
37+
"fused_linear_param_grad_add": 1,
38+
"use_fused_rope": true,
39+
"use_fused_rms_norm": true,
40+
"recompute": 0,
41+
"recompute_use_reentrant": true,
42+
"recompute_granularity": "full",
43+
"pp_recompute_interval": 1,
44+
"gradient_accumulation_steps": 32,
45+
"max_grad_norm": 1.0,
46+
"bf16": 1,
47+
"fp16_opt_level": "O2",
48+
"amp_master_grad": true,
49+
"attention_probs_dropout_prob": 0.1,
50+
"hidden_dropout_prob": 0.1,
51+
"tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy",
52+
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
53+
"pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward",
54+
"use_intermediate_api": true
55+
}

0 commit comments

Comments
 (0)