Skip to content

Commit e5305cc

Browse files
authored
Merge pull request #264 from huangshiyu13/main
update test
2 parents 23cfd38 + 0707ba6 commit e5305cc

File tree

16 files changed

+230
-338
lines changed

16 files changed

+230
-338
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ run_results/
153153
api_docs
154154
.vscode
155155
*.pkl
156-
api_docs
157156
*.json
158157
opponent_pool
159158
!/examples/selfplay/opponent_templates/tictactoe_opponent/info.json

examples/nlp/nlp_ppo.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,6 @@ reward_class:
2828
args: {
2929
"intent_model": "rajkumarrrk/roberta-daily-dialog-intent-classifier",
3030
"ref_model": "rajkumarrrk/gpt2-fine-tuned-on-daily-dialog",
31+
"use_deepspeed": true,
3132
}
3233

openrl/envs/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,9 @@
1616

1717
toy_all_envs = [
1818
"BitFlippingEnv",
19-
"FakeImageEnv",
2019
"IdentityEnv",
2120
"IdentityEnvcontinuous",
2221
"IdentityEnvBox",
23-
"IdentityEnvMultiBinary",
24-
"IdentityEnvMultiDiscrete",
2522
"SimpleMultiObsEnv",
2623
"SimpleMultiObsEnv",
2724
]

openrl/envs/nlp/daily_dialog_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(
4343
self.env_name = "daily_dialog"
4444
tokenizer_name = cfg.env.args["tokenizer_path"]
4545
if tokenizer_name == "builtin_BPE":
46-
from tokenizers import AddedToken, Tokenizer, models
46+
from tokenizers import Tokenizer, models
4747

4848
self.tokenizer = Tokenizer(models.BPE())
4949

openrl/envs/nlp/rewards/intent.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,42 @@ def get_eval_ds_config(offload, stage=0):
2525

2626

2727
class Intent:
28-
def __init__(self, intent_model: str, intent_coeff: float = 1.0) -> None:
28+
def __init__(
29+
self, intent_model: str, intent_coeff: float = 1.0, use_deepspeed: bool = True
30+
) -> None:
2931
super().__init__()
3032

3133
self._intent_coeff = intent_coeff
32-
self.use_deepspeed = True # TODO
34+
self.use_deepspeed = use_deepspeed
35+
if intent_model == "builtin_intent":
36+
from transformers import GPT2Config, GPT2LMHeadModel
37+
38+
class TestTokenizer:
39+
def __call__(
40+
self,
41+
input_texts,
42+
return_tensors="pt",
43+
truncation=True,
44+
padding=True,
45+
max_length=None,
46+
):
47+
class EncodedOutput:
48+
def __init__(self, input_ids, attention_mask):
49+
self.input_ids = input_ids
50+
self.attention_mask = attention_mask
51+
52+
input_ids = torch.zeros((32), dtype=torch.long)
53+
attention_masks = torch.zeros((32), dtype=torch.long)
54+
return EncodedOutput(input_ids, attention_masks)
55+
56+
self._tokenizer = TestTokenizer()
57+
config = GPT2Config()
58+
self._model = GPT2LMHeadModel(config)
3359

34-
model_path = data_abs_path(intent_model)
35-
self._tokenizer = AutoTokenizer.from_pretrained(intent_model)
36-
self._model = AutoModelForSequenceClassification.from_pretrained(model_path)
60+
else:
61+
model_path = data_abs_path(intent_model)
62+
self._tokenizer = AutoTokenizer.from_pretrained(intent_model)
63+
self._model = AutoModelForSequenceClassification.from_pretrained(model_path)
3764

3865
if self.use_deepspeed:
3966
import deepspeed
@@ -87,6 +114,7 @@ def get_input_for_classifier(prompt, generated_text):
87114
input_ids=encoded.input_ids.to(self._device),
88115
attention_mask=encoded.attention_mask.to(self._device),
89116
)
117+
90118
pred_labels = torch.argmax(outputs.logits, dim=1).tolist()
91119

92120
score = (np.array(pred_labels) == np.array(target_intents)) * 1.0

openrl/envs/nlp/rewards/kl_penalty.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,21 @@ def __init__(
3131
action_space: gym.Space,
3232
ref_model: str,
3333
apply_model_parallel: bool = True,
34+
use_deepspeed: bool = True,
3435
):
3536
super().__init__()
36-
self.use_deepspeed = True
37+
self.use_deepspeed = use_deepspeed
3738
self.use_fp16 = True
3839

3940
# reference model
4041
self._apply_model_parallel = apply_model_parallel
41-
self._ref_net = AutoModelForCausalLM.from_pretrained(ref_model)
42+
if ref_model == "builtin_ref":
43+
from transformers import GPT2Config, GPT2LMHeadModel
44+
45+
config = GPT2Config()
46+
self._ref_net = GPT2LMHeadModel(config)
47+
else:
48+
self._ref_net = AutoModelForCausalLM.from_pretrained(ref_model)
4249
self._ref_net = self._ref_net.eval()
4350
if self.use_deepspeed:
4451
import deepspeed

openrl/envs/nlp/rewards/meteor.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,21 @@
66
import openrl.envs.nlp as nlp
77

88

9+
class VirtualMetric:
10+
def compute(self, predictions: Any, references: Any) -> Dict[str, float]:
11+
return {"meteor": 0.0}
12+
13+
914
class Meteor:
10-
def __init__(self, meteor_coeff: int) -> None:
15+
def __init__(self, meteor_coeff: int, test: bool = False) -> None:
1116
super().__init__()
1217
self._meteor_coeff = meteor_coeff
13-
self._metric = evaluate.load(
14-
str(Path(nlp.__file__).parent / "utils/metrics/meteor.py")
15-
)
18+
if test:
19+
self._metric = VirtualMetric()
20+
else:
21+
self._metric = evaluate.load(
22+
str(Path(nlp.__file__).parent / "utils/metrics/meteor.py")
23+
)
1624

1725
def __call__(
1826
self,

openrl/envs/toy_envs/__init__.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,12 @@
1818
from typing import Any
1919

2020
from openrl.envs.toy_envs.bit_flipping_env import BitFlippingEnv
21-
from openrl.envs.toy_envs.identity_env import (
22-
FakeImageEnv,
23-
IdentityEnv,
24-
IdentityEnvBox,
25-
IdentityEnvcontinuous,
26-
IdentityEnvMultiBinary,
27-
IdentityEnvMultiDiscrete,
28-
)
29-
from openrl.envs.toy_envs.multi_input_envs import SimpleMultiObsEnv
21+
from openrl.envs.toy_envs.identity_env import IdentityEnv, IdentityEnvcontinuous
3022

3123
__all__ = [
3224
"BitFlippingEnv",
33-
"FakeImageEnv",
3425
"IdentityEnv",
3526
"IdentityEnvcontinuous",
36-
"IdentityEnvBox",
37-
"IdentityEnvMultiBinary",
38-
"IdentityEnvMultiDiscrete",
39-
"SimpleMultiObsEnv",
4027
]
4128

4229

@@ -49,13 +36,8 @@
4936

5037
env_dict = {
5138
"BitFlippingEnv": BitFlippingEnv,
52-
"FakeImageEnv": FakeImageEnv,
5339
"IdentityEnv": IdentityEnv,
5440
"IdentityEnvcontinuous": IdentityEnvcontinuous,
55-
"IdentityEnvBox": IdentityEnvBox,
56-
"IdentityEnvMultiBinary": IdentityEnvMultiBinary,
57-
"IdentityEnvMultiDiscrete": IdentityEnvMultiDiscrete,
58-
"SimpleMultiObsEnv": SimpleMultiObsEnv,
5941
}
6042

6143

openrl/envs/toy_envs/identity_env.py

Lines changed: 0 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -157,114 +157,3 @@ def _get_reward(self, action: T) -> float:
157157

158158
def render(self, mode: str = "human") -> None:
159159
pass
160-
161-
162-
# Not Work Yet
163-
class IdentityEnvBox(IdentityEnv[np.ndarray]):
164-
def __init__(
165-
self,
166-
low: float = -1.0,
167-
high: float = 1.0,
168-
eps: float = 0.05,
169-
ep_length: int = 100,
170-
):
171-
"""
172-
Identity environment for testing purposes
173-
174-
:param low: the lower bound of the box dim
175-
:param high: the upper bound of the box dim
176-
:param eps: the epsilon bound for correct value
177-
:param ep_length: the length of each episode in timesteps
178-
"""
179-
space = spaces.Box(low=low, high=high, shape=(1,), dtype=np.float32)
180-
super().__init__(ep_length=ep_length, space=space)
181-
self.eps = eps
182-
183-
def step(
184-
self, action: np.ndarray
185-
) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
186-
reward = self._get_reward(action)
187-
self._choose_next_state()
188-
self.current_step += 1
189-
done = self.current_step >= self.ep_length
190-
return self.state, reward, done, {}
191-
192-
def _get_reward(self, action: np.ndarray) -> float:
193-
return (
194-
1.0 if (self.state - self.eps) <= action <= (self.state + self.eps) else 0.0
195-
)
196-
197-
198-
# Not Work Yet
199-
class IdentityEnvMultiDiscrete(IdentityEnv[np.ndarray]):
200-
def __init__(self, dim: int = 1, ep_length: int = 100) -> None:
201-
"""
202-
Identity environment for testing purposes
203-
204-
:param dim: the size of the dimensions you want to learn
205-
:param ep_length: the length of each episode in timesteps
206-
"""
207-
space = spaces.MultiDiscrete([dim, dim])
208-
super().__init__(ep_length=ep_length, space=space)
209-
210-
211-
# Not Work Yet
212-
class IdentityEnvMultiBinary(IdentityEnv[np.ndarray]):
213-
def __init__(self, dim: int = 1, ep_length: int = 100) -> None:
214-
"""
215-
Identity environment for testing purposes
216-
217-
:param dim: the size of the dimensions you want to learn
218-
:param ep_length: the length of each episode in timesteps
219-
"""
220-
space = spaces.MultiBinary(dim)
221-
super().__init__(ep_length=ep_length, space=space)
222-
223-
224-
# Not Work Yet
225-
class FakeImageEnv(gym.Env):
226-
"""
227-
Fake image environment for testing purposes, it mimics Atari games.
228-
229-
:param action_dim: Number of discrete actions
230-
:param screen_height: Height of the image
231-
:param screen_width: Width of the image
232-
:param n_channels: Number of color channels
233-
:param discrete: Create discrete action space instead of continuous
234-
:param channel_first: Put channels on first axis instead of last
235-
"""
236-
237-
def __init__(
238-
self,
239-
action_dim: int = 6,
240-
screen_height: int = 84,
241-
screen_width: int = 84,
242-
n_channels: int = 1,
243-
discrete: bool = True,
244-
channel_first: bool = False,
245-
) -> None:
246-
self.observation_shape = (screen_height, screen_width, n_channels)
247-
if channel_first:
248-
self.observation_shape = (n_channels, screen_height, screen_width)
249-
self.observation_space = spaces.Box(
250-
low=0, high=255, shape=self.observation_shape, dtype=np.uint8
251-
)
252-
if discrete:
253-
self.action_space = spaces.Discrete(action_dim)
254-
else:
255-
self.action_space = spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)
256-
self.ep_length = 10
257-
self.current_step = 0
258-
259-
def reset(self) -> np.ndarray:
260-
self.current_step = 0
261-
return self.observation_space.sample()
262-
263-
def step(self, action: Union[np.ndarray, int]):
264-
reward = 0.0
265-
self.current_step += 1
266-
done = self.current_step >= self.ep_length
267-
return self.observation_space.sample(), reward, done, {}
268-
269-
def render(self, mode: str = "human") -> None:
270-
pass

0 commit comments

Comments
 (0)