Skip to content

Commit a50c041

Browse files
authored
init v0.2.0
init v0.2.0
2 parents e864a08 + 2b798c0 commit a50c041

File tree

15 files changed

+74
-61
lines changed

15 files changed

+74
-61
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
[![Embark](https://img.shields.io/badge/discord-OpenRL-%237289da.svg?logo=discord)](https://discord.gg/qMbVT2qBhr)
2929
[![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://join.slack.com/t/openrlhq/shared_invite/zt-1tqwpvthd-Eeh0IxQ~DIaGqYXoW2IUQg)
3030

31-
OpenRL-v0.1.10 is updated on Oct 27, 2023
31+
OpenRL-v0.2.0 is updated on Dec 20, 2023
3232

3333
The main branch is the latest version of OpenRL, which is under active development. If you just want to have a try with
3434
OpenRL, you can switch to the stable branch.

examples/envpool/envpool_wrappers.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
from packaging import version
1010
from stable_baselines3.common.vec_env import VecEnvWrapper as BaseWrapper
1111
from stable_baselines3.common.vec_env import VecMonitor
12-
from stable_baselines3.common.vec_env.base_vec_env import (VecEnvObs,
13-
VecEnvStepReturn)
12+
from stable_baselines3.common.vec_env.base_vec_env import VecEnvObs, VecEnvStepReturn
1413

1514
is_legacy_gym = version.parse(gym.__version__) < version.parse("0.26.0")
1615

@@ -114,9 +113,9 @@ def __init__(
114113

115114
if is_wrapped_with_monitor:
116115
warnings.warn(
117-
"The environment is already wrapped with a `Monitor` wrapper"
118-
"but you are wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics will be"
119-
"overwritten by the `VecMonitor` ones.",
116+
"The environment is already wrapped with a `Monitor` wrapperbut you are"
117+
" wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics"
118+
" will beoverwritten by the `VecMonitor` ones.",
120119
UserWarning,
121120
)
122121

examples/envpool/make_env.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55
import envpool
66
from gymnasium import Env
77

8-
9-
from openrl.envs.vec_env import (AsyncVectorEnv, RewardWrapper,
10-
SyncVectorEnv, VecMonitorWrapper)
8+
from openrl.envs.vec_env import (
9+
AsyncVectorEnv,
10+
RewardWrapper,
11+
SyncVectorEnv,
12+
VecMonitorWrapper,
13+
)
1114
from openrl.envs.vec_env.vec_info import VecInfoFactory
1215
from openrl.envs.wrappers.base_wrapper import BaseWrapper
1316
from openrl.rewards import RewardFactory
@@ -76,7 +79,7 @@ def make_envpool_envs(
7679
assert kwargs.get("env_type") in ["gym", "dm", "gymnasium"]
7780
kwargs["envpool"] = True
7881

79-
if 'env_wrappers' in kwargs:
82+
if "env_wrappers" in kwargs:
8083
env_wrappers = kwargs.pop("env_wrappers")
8184
else:
8285
env_wrappers = []

examples/envpool/train_ppo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616

1717
""""""
1818
import numpy as np
19-
20-
from openrl.configs.config import create_config_parser
2119
from make_env import make
20+
2221
from examples.envpool.envpool_wrappers import VecAdapter, VecMonitor
22+
from openrl.configs.config import create_config_parser
2323
from openrl.modules.common import PPONet as Net
2424
from openrl.modules.common.ppo_net import PPONet as Net
2525
from openrl.runners.common import PPOAgent as Agent

openrl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
__TITLE__ = "openrl"
2-
__VERSION__ = "v0.1.10"
2+
__VERSION__ = "v0.2.0"
33
__DESCRIPTION__ = "Distributed Deep RL Framework"
44
__AUTHOR__ = "OpenRL Contributors"
55
__EMAIL__ = "huangshiyu@4paradigm.com"

openrl/envs/common/build_envs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ def _make_env() -> Env:
6969
return _make_env
7070

7171
env_fns = [create_env(env_id, env_num, need_env_id) for env_id in range(env_num)]
72-
return env_fns
72+
return env_fns

openrl/envs/common/registration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,4 @@ def make(
173173
vec_info_class = VecInfoFactory.get_vec_info_class(vec_info_class, env)
174174
env = VecMonitorWrapper(vec_info_class, env)
175175

176-
return env
176+
return env

openrl/envs/nlp/daily_dialog_env.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,18 @@ def __init__(
7272
# set the observation and action space here
7373
self._vocab_size = self.tokenizer.vocab_size
7474

75-
self.observation_space = DictSpace({
76-
"input_encoded_pt": spaces.Box(
77-
low=0,
78-
high=self._vocab_size,
79-
shape=(self._max_text_length + self.max_steps,),
80-
),
81-
"input_attention_mask_pt": spaces.Box(
82-
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
83-
),
84-
})
75+
self.observation_space = DictSpace(
76+
{
77+
"input_encoded_pt": spaces.Box(
78+
low=0,
79+
high=self._vocab_size,
80+
shape=(self._max_text_length + self.max_steps,),
81+
),
82+
"input_attention_mask_pt": spaces.Box(
83+
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
84+
),
85+
}
86+
)
8587
self.action_space = Discrete(n=self._vocab_size)
8688
# see https://github.com/huggingface/transformers/issues/4875 : rounding up to nearest power of 2 for better GPU efficiency
8789

@@ -112,7 +114,6 @@ def __init__(
112114
self.reward_function = None
113115

114116
def set_reward(self, reward_fn=None):
115-
116117
self.reward_function = reward_fn
117118

118119
def step_word(self, word: str) -> Tuple[Dict[str, torch.tensor], int, bool, dict]:

openrl/envs/nlp/fake_dialog_env.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@ def __init__(
3030
# set the observation and action space here
3131
self._vocab_size = 2
3232

33-
self.observation_space = DictSpace({
34-
"input_encoded_pt": spaces.Box(
35-
low=0,
36-
high=self._vocab_size,
37-
shape=(self._max_text_length + self.max_steps,),
38-
),
39-
"input_attention_mask_pt": spaces.Box(
40-
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
41-
),
42-
})
33+
self.observation_space = DictSpace(
34+
{
35+
"input_encoded_pt": spaces.Box(
36+
low=0,
37+
high=self._vocab_size,
38+
shape=(self._max_text_length + self.max_steps,),
39+
),
40+
"input_attention_mask_pt": spaces.Box(
41+
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
42+
),
43+
}
44+
)
4345
self.action_space = Discrete(n=self._vocab_size)
4446

4547
n = 2

openrl/envs/nlp/rewards/intent.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@ def __init__(
4141
self.use_model_parallel = False
4242

4343
if intent_model == "builtin_intent":
44-
4544
self._device = "cpu"
46-
self.use_data_parallel = False
47-
45+
self.use_data_parallel = False
46+
4847
from transformers import GPT2Config, GPT2LMHeadModel
4948

5049
class TestTokenizer:

openrl/envs/nlp/rewards/kl_penalty.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,9 @@ def __init__(
4747

4848
# reference model
4949
if ref_model == "builtin_ref":
50-
5150
self.device = "cpu"
52-
self.use_data_parallel = False
53-
51+
self.use_data_parallel = False
52+
5453
from transformers import GPT2Config, GPT2LMHeadModel
5554

5655
config = GPT2Config()
@@ -146,10 +145,12 @@ def __call__(
146145
rew = -self._alpha * kl_div
147146
infos = []
148147
for kl in kl_div:
149-
infos.append({
150-
"alpha": self._alpha,
151-
"kl_div": kl.mean(),
152-
})
148+
infos.append(
149+
{
150+
"alpha": self._alpha,
151+
"kl_div": kl.mean(),
152+
}
153+
)
153154
return rew, infos
154155

155156
def _prepare_inputs_for_model(

openrl/envs/nlp/utils/metrics/meteor.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,20 @@ def _info(self):
8888
citation=_CITATION,
8989
inputs_description=_KWARGS_DESCRIPTION,
9090
features=[
91-
datasets.Features({
92-
"predictions": datasets.Value("string", id="sequence"),
93-
"references": datasets.Sequence(
94-
datasets.Value("string", id="sequence"), id="references"
95-
),
96-
}),
97-
datasets.Features({
98-
"predictions": datasets.Value("string", id="sequence"),
99-
"references": datasets.Value("string", id="sequence"),
100-
}),
91+
datasets.Features(
92+
{
93+
"predictions": datasets.Value("string", id="sequence"),
94+
"references": datasets.Sequence(
95+
datasets.Value("string", id="sequence"), id="references"
96+
),
97+
}
98+
),
99+
datasets.Features(
100+
{
101+
"predictions": datasets.Value("string", id="sequence"),
102+
"references": datasets.Value("string", id="sequence"),
103+
}
104+
),
101105
],
102106
codebase_urls=[
103107
"https://github.com/nltk/nltk/blob/develop/nltk/translate/meteor_score.py"

openrl/modules/networks/policy_network_gpt.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ def __init__(
4646
disable_drop_out: bool = True,
4747
extra_args=None,
4848
) -> None:
49-
5049
self.device = device
5150
self.use_fp16 = cfg.use_fp16
5251
self.use_deepspeed = cfg.use_deepspeed

openrl/modules/networks/value_network_gpt.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def __init__(
4444
device=torch.device("cpu"),
4545
extra_args=None,
4646
):
47-
4847
self.device = device
4948

5049
self.use_fp16 = cfg.use_fp16

openrl/modules/utils/valuenorm.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,15 @@ def __init__(
2424
self.per_element_update = per_element_update
2525
self.tpdv = dict(dtype=torch.float32, device=device)
2626

27-
self.running_mean = nn.Parameter(torch.zeros(input_shape), requires_grad=False).to(**self.tpdv)
28-
self.running_mean_sq = nn.Parameter(torch.zeros(input_shape), requires_grad=False).to(**self.tpdv)
29-
self.debiasing_term = nn.Parameter(torch.tensor(0.0), requires_grad=False).to(**self.tpdv)
27+
self.running_mean = nn.Parameter(
28+
torch.zeros(input_shape), requires_grad=False
29+
).to(**self.tpdv)
30+
self.running_mean_sq = nn.Parameter(
31+
torch.zeros(input_shape), requires_grad=False
32+
).to(**self.tpdv)
33+
self.debiasing_term = nn.Parameter(torch.tensor(0.0), requires_grad=False).to(
34+
**self.tpdv
35+
)
3036

3137
# self.running_mean = nn.Parameter(torch.zeros(input_shape), requires_grad=False)
3238
# self.running_mean_sq = nn.Parameter(

0 commit comments

Comments
 (0)