OpenRL-Lab
diff --git a/‎Gallery.md
Lines changed: 15 additions & 14 deletions b/‎Gallery.md
Lines changed: 15 additions & 14 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 1 deletion b/‎README.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎README_zh.md
Lines changed: 2 additions & 1 deletion b/‎README_zh.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/images/snakes_1v1.gif
108 KB b/‎docs/images/snakes_1v1.gif
108 KB
diff --git a/‎examples/dm_control/train_ppo.py
Lines changed: 1 addition & 2 deletions b/‎examples/dm_control/train_ppo.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/snake/README.md
Lines changed: 7 additions & 0 deletions b/‎examples/snake/README.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/snake/selfplay.yaml
Lines changed: 3 additions & 0 deletions b/‎examples/snake/selfplay.yaml
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/snake/submissions/random_agent/submission.py
Lines changed: 0 additions & 1 deletion b/‎examples/snake/submissions/random_agent/submission.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/snake/test_env.py
Lines changed: 85 additions & 10 deletions b/‎examples/snake/test_env.py
Lines changed: 85 additions & 10 deletions
diff --git a/‎examples/snake/train_selfplay.py
Lines changed: 87 additions & 0 deletions b/‎examples/snake/train_selfplay.py
Lines changed: 87 additions & 0 deletions
@@ -54,18 +54,19 @@ Users are also welcome to contribute their own training examples and demos to th
 
 <div align="center">
 
-|                                                                                                   Environment/Demo                                                                                                    |                                                        Tags                                                         |              Refs               |
-|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------:|:-------------------------------:|
-|                                                  [MuJoCo](https://github.com/deepmind/mujoco)<br>  <img width="300px" height="auto" src="./docs/images/mujoco.png">                                                   |                            ![continuous](https://img.shields.io/badge/-continous-green)                             |   [code](./examples/mujoco/)    |
-|                               [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/)<br>  <img width="300px" height="auto" src="./docs/images/cartpole.png">                                |                           ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                           |  [code](./examples/cartpole/)   |
-|                       [MPE: Simple Spread](https://pettingzoo.farama.org/environments/mpe/simple_spread/)<br>  <img width="300px" height="auto" src="./docs/images/simple_spread_trained.gif">                        | ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow) |     [code](./examples/mpe/)     |
-|                                                  [StarCraft II](https://github.com/oxwhirl/smac)<br>  <img width="300px" height="auto" src="./docs/images/smac.png">                                                  | ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow) |    [code](./examples/smac/)     |
-|                                [Chat Bot](https://openrl-docs.readthedocs.io/en/latest/quick_start/train_nlp.html)<br>  <img width="300px" height="auto" src="./docs/images/chat.gif">                                |                          ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![NLP](https://img.shields.io/badge/-NLP-green)     ![Transformer](https://img.shields.io/badge/-Transformer-blue)                               |     [code](./examples/nlp/)     |
-|                                        [Atari Pong](https://gymnasium.farama.org/environments/atari/pong/)<br>  <img width="300px" height="auto" src="./docs/images/pong.png">                                        |                          ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![image](https://img.shields.io/badge/-image-red)                                    |    [code](./examples/atari/)    |
-|                                   [PettingZoo: Tic-Tac-Toe](https://pettingzoo.farama.org/environments/classic/tictactoe/)<br>  <img width="300px" height="auto" src="./docs/images/tic-tac-toe.jpeg">                                    |                      ![selfplay](https://img.shields.io/badge/-selfplay-blue)    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                               |  [code](./examples/selfplay/)   |
-|                                   [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)<br>  <img width="300px" height="auto" src="https://shimmy.farama.org/_images/dm_locomotion.png">                                    |           ![continuous](https://img.shields.io/badge/-continous-green)                                         | [code](./examples/dm_control/)  |
-|                                   [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/34286328/171454189-6afafbff-bb61-4aac-b518-24646007cb9f.gif">                                    |                       ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                               |    [code](./examples/isaac/)    |
-|                                                      [GridWorld](./examples/gridworld/)<br>  <img width="300px" height="auto" src="./docs/images/gridworld.jpg">                                                      |                          ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                               |  [code](./examples/gridworld/)  |
-| [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/2184469/40948820-3d15e5c2-6830-11e8-81d4-ecfaffee0a14.png"> |                           ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                      | [code](./examples/super_mario/) |
-|                                                 [Gym Retro](https://github.com/openai/retro)<br>  <img width="300px" height="auto" src="./docs/images/gym-retro.jpg">                                                 |                           ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                      |    [code](./examples/retro/)    |
+|                                                                                                        Environment/Demo                                                                                                        |                                                                                           Tags                                                                                            |              Refs               |
+|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------:|
+|                                                       [MuJoCo](https://github.com/deepmind/mujoco)<br>  <img width="300px" height="auto" src="./docs/images/mujoco.png">                                                       |                                                               ![continuous](https://img.shields.io/badge/-continous-green)                                                                |   [code](./examples/mujoco/)    |
+|                                    [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/)<br>  <img width="300px" height="auto" src="./docs/images/cartpole.png">                                    |                                                              ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                                              |  [code](./examples/cartpole/)   |
+|                            [MPE: Simple Spread](https://pettingzoo.farama.org/environments/mpe/simple_spread/)<br>  <img width="300px" height="auto" src="./docs/images/simple_spread_trained.gif">                            |                                    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow)                                    |     [code](./examples/mpe/)     |
+|                                                      [StarCraft II](https://github.com/oxwhirl/smac)<br>  <img width="300px" height="auto" src="./docs/images/smac.png">                                                       |                                    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow)                                    |    [code](./examples/smac/)     |
+|                                    [Chat Bot](https://openrl-docs.readthedocs.io/en/latest/quick_start/train_nlp.html)<br>  <img width="300px" height="auto" src="./docs/images/chat.gif">                                     | ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![NLP](https://img.shields.io/badge/-NLP-green)     ![Transformer](https://img.shields.io/badge/-Transformer-blue) |     [code](./examples/nlp/)     |
+|                                            [Atari Pong](https://gymnasium.farama.org/environments/atari/pong/)<br>  <img width="300px" height="auto" src="./docs/images/pong.png">                                             |                                 ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![image](https://img.shields.io/badge/-image-red)                                  |    [code](./examples/atari/)    |
+|                              [PettingZoo: Tic-Tac-Toe](https://pettingzoo.farama.org/environments/classic/tictactoe/)<br>  <img width="300px" height="auto" src="./docs/images/tic-tac-toe.jpeg">                              |                                ![selfplay](https://img.shields.io/badge/-selfplay-blue)    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                |  [code](./examples/selfplay/)   |
+|                            [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)<br>  <img width="300px" height="auto" src="https://shimmy.farama.org/_images/dm_locomotion.png">                             |                                                               ![continuous](https://img.shields.io/badge/-continous-green)                                                                | [code](./examples/dm_control/)  |
+| [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/34286328/171454189-6afafbff-bb61-4aac-b518-24646007cb9f.gif"> |                                                              ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                                              |    [code](./examples/isaac/)    |
+|                                                   [Snake](http://www.jidiai.cn/env_detail?envid=1)<br>  <img width="300px" height="auto" src="./docs/images/snakes_1v1.gif">                                                   |                                 ![selfplay](https://img.shields.io/badge/-selfplay-blue)  ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                 |    [code](./examples/snake/)    |
+|                                                          [GridWorld](./examples/gridworld/)<br>  <img width="300px" height="auto" src="./docs/images/gridworld.jpg">                                                           |                                                              ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                                              |  [code](./examples/gridworld/)  |
+|     [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/2184469/40948820-3d15e5c2-6830-11e8-81d4-ecfaffee0a14.png">      |                                   ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                                   | [code](./examples/super_mario/) |
+|                                                     [Gym Retro](https://github.com/openai/retro)<br>  <img width="300px" height="auto" src="./docs/images/gym-retro.jpg">                                                      |                                   ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                                   |    [code](./examples/retro/)    |
 </div>
@@ -104,7 +104,8 @@ Environments currently supported by OpenRL (for more details, please refer to [G
 - [Atari](https://gymnasium.farama.org/environments/atari/)
 - [StarCraft II](https://github.com/oxwhirl/smac)
 - [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)
--    [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [Snake](http://www.jidiai.cn/env_detail?envid=1)
 - [GridWorld](./examples/gridworld/)
 - [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)
 - [Gym Retro](https://github.com/openai/retro)
 
@@ -86,7 +86,8 @@ OpenRL目前支持的环境（更多详情请参考 [Gallery](Gallery.md)）：
 - [Atari](https://gymnasium.farama.org/environments/atari/)
 - [StarCraft II](https://github.com/oxwhirl/smac)
 - [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)
--    [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [Snake](http://www.jidiai.cn/env_detail?envid=1)
 - [GridWorld](./examples/gridworld/)
 - [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)
 - [Gym Retro](https://github.com/openai/retro)
 
@@ -4,10 +4,9 @@
 from openrl.configs.config import create_config_parser
 from openrl.envs.common import make
 from openrl.envs.wrappers.base_wrapper import BaseWrapper
-from openrl.envs.wrappers.extra_wrappers import GIFWrapper
+from openrl.envs.wrappers.extra_wrappers import FrameSkip, GIFWrapper
 from openrl.modules.common import PPONet as Net
 from openrl.runners.common import PPOAgent as Agent
-from openrl.envs.wrappers.extra_wrappers import FrameSkip
 
 env_name = "dm_control/cartpole-balance-v0"
 # env_name = "dm_control/walker-walk-v0"
 
@@ -1,10 +1,17 @@
 
 This is the example for the snake game.
 
+## Usage
+
+```bash
+python train_selfplay.py
+```
+
 
 ## Submit to JiDi
 
 Submition site: http://www.jidiai.cn/env_detail?envid=1.
 
 Snake senarios: [here](https://github.com/jidiai/ai_lib/blob/7a6986f0cb543994277103dbf605e9575d59edd6/env/config.json#L94)
+Original Snake environment: [here](https://github.com/jidiai/ai_lib/blob/master/env/snakes.py)
 
@@ -0,0 +1,3 @@
+seed: 0
+callbacks:
+  - id: "ProgressBarCallback"
@@ -27,4 +27,3 @@ def my_controller(observation, action_space, is_act_continuous):
         player = sample_single_dim(action_space[i], is_act_continuous)
         joint_action.append(player)
     return joint_action
-
@@ -15,18 +15,93 @@
 # limitations under the License.
 
 """"""
+import time
+
 import numpy as np
+from wrappers import ConvertObs
+
 from openrl.envs.snake.snake import SnakeEatBeans
+from openrl.envs.snake.snake_pettingzoo import SnakeEatBeansAECEnv
+from openrl.selfplay.wrappers.random_opponent_wrapper import RandomOpponentWrapper
+
+
+def test_raw_env():
+    env = SnakeEatBeans()
+
+    obs, info = env.reset()
+
+    done = False
+    while not np.any(done):
+        a1 = np.zeros(4)
+        a1[env.action_space.sample()] = 1
+        a2 = np.zeros(4)
+        a2[env.action_space.sample()] = 1
+        obs, reward, done, info = env.step([a1, a2])
+        print("obs:", obs)
+        print("reward:", reward)
+        print("done:", done)
+        print("info:", info)
+
+
+def test_aec_env():
+    from PIL import Image
+
+    img_list = []
+    env = SnakeEatBeansAECEnv(render_mode="rgb_array")
+    env.reset(seed=0)
+    # time.sleep(1)
+    img = env.render()
+    img_list.append(img)
+    step = 0
+    for player_name in env.agent_iter():
+        if step > 20:
+            break
+        observation, reward, termination, truncation, info = env.last()
+        if termination or truncation:
+            break
+        action = env.action_space(player_name).sample()
+        # if player_name == "player_0":
+        #     action = 2
+        # elif player_name == "player_1":
+        #     action = 3
+        # else:
+        #     raise ValueError("Unknown player name: {}".format(player_name))
+        env.step(action)
+        img = env.render()
+        if player_name == "player_0":
+            img_list.append(img)
+        # time.sleep(1)
+
+        step += 1
+    print("Total steps: {}".format(step))
+
+    save_path = "test.gif"
+    img_list = [Image.fromarray(img) for img in img_list]
+    img_list[0].save(save_path, save_all=True, append_images=img_list[1:], duration=500)
+
+
+def test_vec_env():
+    from openrl.envs.common import make
 
-env = SnakeEatBeans()
+    env = make(
+        "snakes_1v1",
+        opponent_wrappers=[
+            RandomOpponentWrapper,
+        ],
+        env_wrappers=[ConvertObs],
+        render_mode="group_human",
+        env_num=2,
+    )
+    obs, info = env.reset()
+    step = 0
+    done = False
+    while not np.any(done):
+        action = env.random_action()
+        obs, reward, done, info = env.step(action)
+        time.sleep(0.3)
+        step += 1
+    print("Total steps: {}".format(step))
 
-obs, info = env.reset()
 
-done = False
-while not np.any(done):
-    a1 = np.zeros(4)
-    a1[env.action_space.sample()] = 1
-    a2 = np.zeros(4)
-    a2[env.action_space.sample()] = 1
-    obs, reward, done, info = env.step([a1, a2])
-    print("obs:", obs, reward, "\ndone:", done, info)
+if __name__ == "__main__":
+    test_vec_env()
@@ -0,0 +1,87 @@
+import numpy as np
+import torch
+from wrappers import ConvertObs
+
+from openrl.configs.config import create_config_parser
+from openrl.envs.common import make
+from openrl.modules.common import PPONet as Net
+from openrl.runners.common import PPOAgent as Agent
+from openrl.selfplay.wrappers.random_opponent_wrapper import RandomOpponentWrapper
+
+
+def train():
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args(["--config", "selfplay.yaml"])
+
+    # Create environment
+    env_num = 10
+    render_model = None
+    env = make(
+        "snakes_1v1",
+        render_mode=render_model,
+        env_num=env_num,
+        asynchronous=True,
+        opponent_wrappers=[RandomOpponentWrapper],
+        env_wrappers=[ConvertObs],
+        cfg=cfg,
+    )
+    # Create neural network
+
+    net = Net(env, cfg=cfg, device="cuda" if torch.cuda.is_available() else "cpu")
+    # Create agent
+    agent = Agent(net)
+    # Begin training
+    agent.train(total_time_steps=100000)
+    env.close()
+    agent.save("./selfplay_agent/")
+    return agent
+
+
+def evaluation():
+    from examples.selfplay.tictactoe_utils.tictactoe_render import TictactoeRender
+
+    print("Evaluation...")
+    env_num = 1
+    env = make(
+        "snakes_1v1",
+        env_num=env_num,
+        asynchronous=True,
+        opponent_wrappers=[RandomOpponentWrapper],
+        env_wrappers=[ConvertObs],
+        auto_reset=False,
+    )
+
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args()
+    net = Net(env, cfg=cfg, device="cuda" if torch.cuda.is_available() else "cpu")
+
+    agent = Agent(net)
+
+    agent.load("./selfplay_agent/")
+    agent.set_env(env)
+    env.reset(seed=0)
+
+    total_reward = 0.0
+    ep_num = 5
+    for ep_now in range(ep_num):
+        obs, info = env.reset()
+        done = False
+        step = 0
+
+        while not np.any(done):
+            # predict next action based on the observation
+            action, _ = agent.act(obs, info, deterministic=True)
+            obs, r, done, info = env.step(action)
+            step += 1
+
+            if np.any(done):
+                total_reward += np.mean(r) > 0
+                print(f"{ep_now}/{ep_num}: reward: {np.mean(r)}")
+    print(f"win rate: {total_reward/ep_num}")
+    env.close()
+    print("Evaluation finished.")
+
+
+if __name__ == "__main__":
+    train()
+    evaluation()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+seed: 0`
	`2`	`+callbacks:`
	`3`	`+ - id: "ProgressBarCallback"`