Skip to content

Commit 8205e3d

Browse files
authored
[CI]add rl to ci (#10553)
* fix scripts for docs * fix timeout * add grpo&rf++ * fix path * update yaml * fix * fix data * add install_external_ops * fix fused_ln * fix * update cmd * fix path * fix * fix path * fix codestyle * update class name * update name
1 parent 3570aaa commit 8205e3d

File tree

4 files changed

+248
-13
lines changed

4 files changed

+248
-13
lines changed

scripts/regression/run_ci.sh

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,14 @@ nlp_build (){
8888
python setup.py bdist_wheel
8989
python -m pip install --ignore-installed dist/p****.whl
9090
}
91+
install_external_ops(){
92+
echo -e "\033[31m ---- Install extern_ops \033"
93+
export PYTHONPATH=${nlp_dir}:$PYTHONPATH
94+
cd ${nlp_dir}/slm/model_zoo/gpt-3/external_ops
95+
python setup.py install
96+
python -c "import fused_ln;";
97+
cd ${nlp_dir}
98+
}
9199
####################################
92100
# get diff case
93101
cd ${nlp_dir}
@@ -99,10 +107,12 @@ for file_name in `git diff --numstat ${AGILE_COMPILE_BRANCH} |awk '{print $NF}'`
99107
dir3=${arr_file_name[2]}
100108
dir4=${arr_file_name[3]}
101109
file_item=$dir1/$dir2/$dir3/$dir4
110+
ext="${file_name##*.}"
102111
echo "file_name:"${file_name}, "dir1:"${dir1}, "dir2:"${dir2},"dir3:"${dir3},".xx:" ${file_name##*.}
112+
echo "ext: ${file_name##*.}"
103113
if [ ! -f ${file_name} ];then # 针对pr删掉文件
104114
continue
105-
elif [[ ${file_name##*.} == "md" ]] || [[ ${file_name##*.} == "rst" ]] || [[ ${dir1} == "docs" ]];then
115+
elif [[ "$ext" == "md" || "$ext" == "rst" || "$file_name" == docs/* ]]; then
106116
continue
107117
elif [[ "${AGILE_COMPILE_BRANCH}" == "refactor-training-loop" ]];then # 针对特定分支
108118
P0case_list[${#P0case_list[*]}]=gpt
@@ -196,6 +206,8 @@ if [[ ${#P0case_list[*]} -ne 0 ]];then
196206
else
197207
echo "install_paddlenlp_ops_pr done"
198208
fi
209+
# install fused_ln
210+
install_external_ops
199211
python -c "from paddlenlp import __version__; print('paddlenlp version:', __version__)" >> ${log_path}/commit_info.txt
200212
python -c "import paddlenlp; print('paddlenlp commit:',paddlenlp.version.commit)" >> ${log_path}/commit_info.txt
201213
python -m pip list >> ${log_path}/commit_info.txt

scripts/unit_test/ci_unit.sh

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,14 @@ set_env() {
5050
export FLAGS_cudnn_deterministic=1
5151
export HF_ENDPOINT=https://hf-mirror.com
5252
export FLAGS_use_cuda_managed_memory=true
53-
export running_time=30m
53+
export running_time=40m
5454

5555
# for CE
5656
if [[ ${FLAGS_enable_CE} == "true" ]];then
5757
export CE_TEST_ENV=1
5858
export RUN_SLOW_TEST=1
5959
export PYTHONPATH=${nlp_dir}:${nlp_dir}/llm:${PYTHONPATH}
6060
export running_time=5h
61-
else
62-
continue
6361
fi
6462
}
6563

@@ -74,7 +72,7 @@ print_info() {
7472
cd ${PPNLP_HOME} && python upload.py ${PPNLP_HOME}/upload 'paddlenlp/PaddleNLP_CI/PaddleNLP-CI-Unittest-GPU'
7573
rm -rf upload/* && cd -
7674
if [ $1 -eq 124 ]; then
77-
echo "\033[32m [failed-timeout] Test case execution was terminated after exceeding the 30m limit."
75+
echo "\033[32m [failed-timeout] Test case execution was terminated after exceeding the ${running_time} min limit."
7876
fi
7977
else
8078
tail -n 1 ${log_path}/unittest.log
@@ -85,16 +83,12 @@ print_info() {
8583
get_diff_TO_case(){
8684
export FLAGS_enable_CI=false
8785
for file_name in `git diff --numstat ${AGILE_COMPILE_BRANCH} |awk '{print $NF}'`;do
88-
arr_file_name=(${file_name//// })
89-
dir1=${arr_file_name[0]}
90-
dir2=${arr_file_name[1]}
91-
dir3=${arr_file_name[2]}
92-
dir4=${arr_file_name[3]}
93-
file_item=$dir1/$dir2/$dir3/$dir4
94-
echo "file_name:"${file_name}, "dir1:"${dir1}, "dir2:"${dir2},"dir3:"${dir3},".xx:" ${file_name##*.}
86+
ext="${file_name##*.}"
87+
echo "file_name: ${file_name}, ext: ${file_name##*.}"
88+
9589
if [ ! -f ${file_name} ];then # 针对pr删掉文件
9690
continue
97-
elif [[ ${file_name##*.} == "md" ]] || [[ ${file_name##*.} == "rst" ]] || [[ ${dir1} == "docs" ]];then
91+
elif [[ "$ext" == "md" || "$ext" == "rst" || "$file_name" == docs/* ]]; then
9892
continue
9993
else
10094
FLAGS_enable_CI=true

tests/llm/test_grpo.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import os
18+
import signal
19+
import subprocess
20+
import sys
21+
import time
22+
import unittest
23+
24+
from parameterized import parameterized_class
25+
26+
from .testing_utils import LLMTest
27+
28+
29+
@parameterized_class(
30+
["model_dir"],
31+
[["qwen"]],
32+
)
33+
class GRPOTest(LLMTest, unittest.TestCase):
34+
config_path: str = None
35+
model_dir: str = None
36+
37+
def setUp(self) -> None:
38+
LLMTest.setUp(self)
39+
sys.path.insert(0, "./llm/alignment/rl")
40+
sys.path.insert(0, self.model_dir)
41+
42+
def tearDown(self) -> None:
43+
LLMTest.tearDown(self)
44+
45+
def test_grpo(self):
46+
# 设置必要的环境变量
47+
env_vars = {
48+
"PYTHONPATH": f"{os.path.abspath('./')}:{os.path.abspath('./llm')}:" + os.environ.get("PYTHONPATH", ""),
49+
"FLAGS_set_to_1d": "False",
50+
"NVIDIA_TF32_OVERRIDE": "0",
51+
"FLAGS_dataloader_use_file_descriptor": "False",
52+
"HF_DATASETS_DOWNLOAD_TIMEOUT": "1",
53+
"FLAGS_gemm_use_half_precision_compute_type": "False",
54+
"FLAGS_force_cublaslt_no_reduced_precision_reduction": "True",
55+
"FLAGS_mla_use_tensorcore": "0",
56+
"FLAGS_cascade_attention_max_partition_size": "2048",
57+
}
58+
case_env = os.environ.copy()
59+
case_env.update(env_vars)
60+
61+
# 修改执行路径
62+
repo_path = os.getcwd()
63+
rl_dir = os.path.join(os.getcwd(), "./llm/alignment/rl")
64+
os.chdir(rl_dir)
65+
66+
# 下载并解压数据
67+
if not os.path.exists("ppo-kk.tgz"):
68+
subprocess.run(
69+
"wget -q https://paddlenlp.bj.bcebos.com/datasets/examples/ppo-kk.tgz && tar zxf ppo-kk.tgz",
70+
shell=True,
71+
check=True,
72+
)
73+
74+
# 启动 reward server
75+
reward_dir = os.path.join(os.getcwd(), "./reward")
76+
reward_log = os.path.join(reward_dir, "reward_server.log")
77+
reward_server_script = os.path.join(reward_dir, "reward_server.py")
78+
79+
with open(reward_log, "w") as log_file:
80+
reward_proc = subprocess.Popen(
81+
[sys.executable, reward_server_script],
82+
cwd=reward_dir,
83+
stdout=log_file,
84+
stderr=subprocess.STDOUT,
85+
preexec_fn=os.setsid, # 便于后续 kill 整个进程组
86+
)
87+
88+
try:
89+
# 等待 reward server 启动
90+
time.sleep(30)
91+
92+
# 运行主逻辑
93+
cmd = 'python -u -m paddle.distributed.launch \
94+
--devices "$CUDA_VISIBLE_DEVICES" run_rl.py \
95+
../../config/qwen/reinforce_plus_plus_argument.yaml \
96+
--actor_model_name_or_path "Qwen/Qwen2-1.5B" \
97+
--max_dec_len 128 \
98+
--max_steps 3 \
99+
--kl_coeff 0.000 \
100+
--kl_loss_coeff 0.000 \
101+
--use_fused_rms_norm true '
102+
pro = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
103+
out, err = pro.communicate()
104+
print(out)
105+
pro.wait()
106+
pro.returncode == 0
107+
assert str(out).find("Error") == -1
108+
assert str(err).find("Error") == -1
109+
os.chdir(repo_path)
110+
111+
finally:
112+
# main 执行完毕,关闭 reward server
113+
if reward_proc.poll() is None: # 确保进程还在
114+
os.killpg(os.getpgid(reward_proc.pid), signal.SIGTERM) # kill 整个进程组

tests/llm/test_reinforce_plus_plus.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import os
18+
import signal
19+
import subprocess
20+
import sys
21+
import time
22+
import unittest
23+
24+
from parameterized import parameterized_class
25+
26+
from .testing_utils import LLMTest
27+
28+
29+
@parameterized_class(
30+
["model_dir"],
31+
[["qwen"]],
32+
)
33+
class ReinforcePlusPlusTest(LLMTest, unittest.TestCase):
34+
config_path: str = None
35+
model_dir: str = None
36+
37+
def setUp(self) -> None:
38+
LLMTest.setUp(self)
39+
sys.path.insert(0, "./llm/alignment/rl")
40+
sys.path.insert(0, self.model_dir)
41+
42+
def tearDown(self) -> None:
43+
LLMTest.tearDown(self)
44+
45+
def test_reinforce_plus_plus(self):
46+
# 设置必要的环境变量
47+
env_vars = {
48+
"PYTHONPATH": f"{os.path.abspath('./')}:{os.path.abspath('./llm')}:" + os.environ.get("PYTHONPATH", ""),
49+
"FLAGS_set_to_1d": "False",
50+
"NVIDIA_TF32_OVERRIDE": "0",
51+
"FLAGS_dataloader_use_file_descriptor": "False",
52+
"HF_DATASETS_DOWNLOAD_TIMEOUT": "1",
53+
"FLAGS_gemm_use_half_precision_compute_type": "False",
54+
"FLAGS_force_cublaslt_no_reduced_precision_reduction": "True",
55+
"FLAGS_mla_use_tensorcore": "0",
56+
"FLAGS_cascade_attention_max_partition_size": "2048",
57+
}
58+
case_env = os.environ.copy()
59+
case_env.update(env_vars)
60+
61+
# 修改执行路径
62+
repo_path = os.getcwd()
63+
rl_dir = os.path.join(os.getcwd(), "./llm/alignment/rl")
64+
os.chdir(rl_dir)
65+
66+
# 下载并解压数据
67+
if not os.path.exists("ppo-kk.tgz"):
68+
subprocess.run(
69+
"wget -q https://paddlenlp.bj.bcebos.com/datasets/examples/ppo-kk.tgz && tar zxf ppo-kk.tgz",
70+
shell=True,
71+
check=True,
72+
)
73+
74+
# 启动 reward server
75+
reward_dir = os.path.join(os.getcwd(), "./reward")
76+
reward_log = os.path.join(reward_dir, "reward_server.log")
77+
reward_server_script = os.path.join(reward_dir, "reward_server.py")
78+
79+
with open(reward_log, "w") as log_file:
80+
reward_proc = subprocess.Popen(
81+
[sys.executable, reward_server_script],
82+
cwd=reward_dir,
83+
stdout=log_file,
84+
stderr=subprocess.STDOUT,
85+
preexec_fn=os.setsid, # 便于后续 kill 整个进程组
86+
)
87+
88+
try:
89+
# 等待 reward server 启动
90+
time.sleep(30)
91+
92+
# 运行主逻辑
93+
cmd = 'python -u -m paddle.distributed.launch \
94+
--devices "$CUDA_VISIBLE_DEVICES" run_rl.py \
95+
../../config/qwen/reinforce_plus_plus_argument.yaml \
96+
--rl_algorithm "reinforce_plus_plus" \
97+
--actor_model_name_or_path "Qwen/Qwen2-1.5B" \
98+
--max_dec_len 128 \
99+
--max_steps 3 \
100+
--kl_coeff 0.000 \
101+
--kl_loss_coeff 0.000 \
102+
--use_fused_rms_norm true '
103+
pro = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
104+
out, err = pro.communicate()
105+
print(out)
106+
pro.wait()
107+
pro.returncode == 0
108+
assert str(out).find("Error") == -1
109+
assert str(err).find("Error") == -1
110+
os.chdir(repo_path)
111+
112+
finally:
113+
# main 执行完毕,关闭 reward server
114+
if reward_proc.poll() is None: # 确保进程还在
115+
os.killpg(os.getpgid(reward_proc.pid), signal.SIGTERM) # kill 整个进程组

0 commit comments

Comments
 (0)