OpenRL-Lab
diff --git a/‎examples/smac/README.md
Lines changed: 4 additions & 1 deletion b/‎examples/smac/README.md
Lines changed: 4 additions & 1 deletion
diff --git a/‎examples/snake/README.md
Lines changed: 10 additions & 0 deletions b/‎examples/snake/README.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/snake/submissions/random_agent/submission.py
Lines changed: 30 additions & 0 deletions b/‎examples/snake/submissions/random_agent/submission.py
Lines changed: 30 additions & 0 deletions
diff --git a/‎examples/snake/test_env.py
Lines changed: 32 additions & 0 deletions b/‎examples/snake/test_env.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎openrl/envs/snake/__init__.py
Lines changed: 17 additions & 0 deletions b/‎openrl/envs/snake/__init__.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎openrl/envs/snake/common.py
Lines changed: 184 additions & 0 deletions b/‎openrl/envs/snake/common.py
Lines changed: 184 additions & 0 deletions
diff --git a/‎openrl/envs/snake/discrete.py
Lines changed: 31 additions & 0 deletions b/‎openrl/envs/snake/discrete.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎openrl/envs/snake/game.py
Lines changed: 47 additions & 0 deletions b/‎openrl/envs/snake/game.py
Lines changed: 47 additions & 0 deletions
@@ -11,4 +11,7 @@ Installation guide for Linux:
 
 Train SMAC with [MAPPO](https://arxiv.org/abs/2103.01955) algorithm:
 
-`python train_ppo.py --config smac_ppo.yaml`
+`python train_ppo.py --config smac_ppo.yaml`
+
+## Render replay on Mac
+
@@ -0,0 +1,10 @@
+
+This is the example for the snake game.
+
+
+## Submit to JiDi
+
+Submition site: http://www.jidiai.cn/env_detail?envid=1.
+
+Snake senarios: [here](https://github.com/jidiai/ai_lib/blob/7a6986f0cb543994277103dbf605e9575d59edd6/env/config.json#L94)
+
@@ -0,0 +1,30 @@
+# -*- coding:utf-8  -*-
+def sample_single_dim(action_space_list_each, is_act_continuous):
+    if is_act_continuous:
+        each = action_space_list_each.sample()
+    else:
+        if action_space_list_each.__class__.__name__ == "Discrete":
+            each = [0] * action_space_list_each.n
+            idx = action_space_list_each.sample()
+            each[idx] = 1
+        elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
+            each = []
+            nvec = action_space_list_each.high - action_space_list_each.low + 1
+            sample_indexes = action_space_list_each.sample()
+
+            for i in range(len(nvec)):
+                dim = nvec[i]
+                new_action = [0] * dim
+                index = sample_indexes[i]
+                new_action[index] = 1
+                each.extend(new_action)
+    return each
+
+
+def my_controller(observation, action_space, is_act_continuous):
+    joint_action = []
+    for i in range(len(action_space)):
+        player = sample_single_dim(action_space[i], is_act_continuous)
+        joint_action.append(player)
+    return joint_action
+
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+import numpy as np
+from openrl.envs.snake.snake import SnakeEatBeans
+
+env = SnakeEatBeans()
+
+obs, info = env.reset()
+
+done = False
+while not np.any(done):
+    a1 = np.zeros(4)
+    a1[env.action_space.sample()] = 1
+    a2 = np.zeros(4)
+    a2[env.action_space.sample()] = 1
+    obs, reward, done, info = env.step([a1, a2])
+    print("obs:", obs, reward, "\ndone:", done, info)
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
@@ -0,0 +1,184 @@
+import numpy as np
+import sys
+import os
+
+class HiddenPrints:
+    def __enter__(self):
+        self._original_stdout = sys.stdout
+        sys.stdout = open(os.devnull, 'w')
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        sys.stdout.close()
+        sys.stdout = self._original_stdout
+
+class Board:
+    def __init__(self, board_height, board_width, snakes, beans_positions, teams):
+        # print('create board, beans_position: ', beans_positions)
+        self.height = board_height
+        self.width = board_width
+        self.snakes = snakes
+        self.snakes_count = len(snakes)
+        self.beans_positions = beans_positions
+        self.blank_sign = -self.snakes_count
+        self.bean_sign = -self.snakes_count + 1
+        self.board = np.zeros((board_height, board_width), dtype=int) + self.blank_sign
+        self.open = dict()
+        for key, snake in self.snakes.items():
+            self.open[key] = [snake.head]  # state 0 open list, heads, ready to spread
+            # see [A* Pathfinding (E01: algorithm explanation)](https://www.youtube.com/watch?v=-L-WgKMFuhE)
+            for x, y in snake.pos:
+                self.board[x][y] = key  # obstacles, e.g. 0, 1, 2, 3, 4, 5
+        # for x, y in beans_positions:
+        #     self.board[x][y] = self.bean_sign  # beans
+
+        self.state = 0
+        self.controversy = dict()
+        self.teams = teams
+
+        # print('initial board')
+        # print(self.board)
+
+    def step(self):  # delay: prevent rear-end collision
+        new_open = {key: [] for key in self.snakes.keys()}
+        self.state += 1  # update state
+        # if self.state > delay:
+        #     for key, snake in self.snakes.items():   # drop tail
+        #         if snake.len >= self.state:
+        #             self.board[snake.pos[-(self.state - delay)][0]][snake.pos[-(self.state - delay)][1]] \
+        #                 = self.blank_sign
+        for key, snake in self.snakes.items():
+            if snake.len >= self.state:
+                self.board[snake.pos[-self.state][0]][snake.pos[-self.state][1]] = self.blank_sign  # drop tail
+        for key, value in self.open.items():  # value: e.g. [[8, 3], [6, 3], [7, 4]]
+            others_tail_pos = [self.snakes[_].pos[-self.state]
+                               if self.snakes[_].len >= self.state else []
+                               for _ in set(range(self.snakes_count)) - {key}]
+            for x, y in value:
+                # print('start to spread snake {} on grid ({}, {})'.format(key, x, y))
+                for x_, y_ in [((x + 1) % self.height, y),  # down
+                               ((x - 1) % self.height, y),  # up
+                               (x, (y + 1) % self.width),  # right
+                               (x, (y - 1) % self.width)]:  # left
+                    sign = self.board[x_][y_]
+                    idx = sign % self.snakes_count  # which snake, e.g. 0, 1, 2, 3, 4, 5 / number of claims
+                    state = sign // self.snakes_count  # manhattan distance to snake who claim the point or its negative
+                    if sign == self.blank_sign:  # grid in initial state
+                        if [x_, y_] in others_tail_pos:
+                            # print('do not spread other snakes tail, in case of rear-end collision')
+                            continue  # do not spread other snakes' tail, in case of rear-end collision
+                        self.board[x_][y_] = self.state * self.snakes_count + key
+                        self.snakes[key].claimed_count += 1
+                        new_open[key].append([x_, y_])
+
+                    elif key != idx and self.state == state:
+                        # second claim, init controversy, change grid value from + to -
+                        # print(
+                        #     '\tgird ({}, {}) in the same state claimed by different snakes '
+                        #     'with sign {}, idx {} and state {}'.format(
+                        #         x_, y_, sign, idx, state))
+                        if self.snakes[idx].len > self.snakes[key].len:  # shorter snake claim the controversial grid
+                            # print('\t\tsnake {} is shorter than snake {}'.format(key, idx))
+                            self.snakes[idx].claimed_count -= 1
+                            new_open[idx].remove([x_, y_])
+                            self.board[x_][y_] = self.state * self.snakes_count + key
+                            self.snakes[key].claimed_count += 1
+                            new_open[key].append([x_, y_])
+                        elif self.snakes[idx].len == self.snakes[key].len:  # controversial claim
+                            # print(
+                            #     '\t\tcontroversy! first claimed by snake {}, then claimed by snake {}'.format(idx, key))
+                            self.controversy[(x_, y_)] = {'state': self.state,
+                                                          'length': self.snakes[idx].len,
+                                                          'indexes': [idx, key]}
+                            # first claim by snake idx, then claim by snake key
+                            self.board[x_][y_] = -self.state * self.snakes_count + 1
+                            # if + 2, not enough for all snakes claim one grid!!
+                            self.snakes[idx].claimed_count -= 1  # controversy, no snake claim this grid!!
+                            new_open[key].append([x_, y_])
+                        else:  # (self.snakes[idx].len < self.snakes[key].len)
+                            pass  # longer snake do not claim the controversial grid
+
+                    elif (x_, y_) in self.controversy \
+                            and key not in self.controversy[(x_, y_)]['indexes'] \
+                            and self.state + state == 0:  # third claim or more
+                        # print('snake {} meets third or more claim in grid ({}, {})'.format(key, x_, y_))
+                        controversy = self.controversy[(x_, y_)]
+                        # pprint.pprint(controversy)
+                        if controversy['length'] > self.snakes[key].len:  # shortest snake claim grid, do 4 things
+                            # print('\t\tsnake {} is shortest'.format(key))
+                            indexes_count = len(controversy['indexes'])
+                            for i in controversy['indexes']:
+                                self.snakes[i].claimed_count -= 1 / indexes_count  # update claimed_count !
+                                new_open[i].remove([x_, y_])
+                            del self.controversy[(x_, y_)]
+                            self.board[x_][y_] = self.state * self.snakes_count + key
+                            self.snakes[key].claimed_count += 1
+                            new_open[key].append([x_, y_])
+                        elif controversy['length'] == self.snakes[key].len:  # controversial claim
+                            # print('\t\tcontroversy! multi claimed by snake {}'.format(key))
+                            self.controversy[(x_, y_)]['indexes'].append(key)
+                            self.board[x_][y_] += 1
+                            new_open[key].append([x_, y_])
+                        else:  # (controversy['length'] < self.snakes[key].len)
+                            pass  # longer snake do not claim the controversial grid
+                    else:
+                        pass  # do nothing with lower state grids
+
+        self.open = new_open  # update open
+        # update controversial snakes' claimed_count (in fraction) in the end
+        for _, d in self.controversy.items():
+            controversial_snake_count = len(d['indexes'])  # number of controversial snakes
+            for idx in d['indexes']:
+                self.snakes[idx].claimed_count += 1 / controversial_snake_count
+
+
+class SnakePos:
+    def __init__(self, snake_positions, board_height, board_width, beans_positions):
+        self.pos = snake_positions  # [[2, 9], [2, 8], [2, 7]]
+        self.len = len(snake_positions)  # >= 3
+        self.head = snake_positions[0]
+        self.beans_positions = beans_positions
+        self.claimed_count = 0
+
+        displace = [(self.head[0] - snake_positions[1][0]) % board_height,
+                    (self.head[1] - snake_positions[1][1]) % board_width]
+        # print('creat snake, pos: ', self.pos, 'displace:', displace)
+        if displace == [board_height - 1, 0]:  # all action are ordered by left, up, right, relative to the body
+            self.dir = 0  # up
+            self.legal_action = [2, 0, 3]
+        elif displace == [1, 0]:
+            self.dir = 1  # down
+            self.legal_action = [3, 1, 2]
+        elif displace == [0, board_width - 1]:
+            self.dir = 2  # left
+            self.legal_action = [1, 2, 0]
+        elif displace == [0, 1]:
+            self.dir = 3  # right
+            self.legal_action = [0, 3, 1]
+        else:
+            assert False, 'snake positions error'
+        positions = [[(self.head[0] - 1) % board_height, self.head[1]],
+                     [(self.head[0] + 1) % board_height, self.head[1]],
+                     [self.head[0], (self.head[1] - 1) % board_width],
+                     [self.head[0], (self.head[1] + 1) % board_width]]
+        self.legal_position = [positions[_] for _ in self.legal_action]
+
+    def get_action(self, position):
+        if position not in self.legal_position:
+            assert False, 'the start and end points do not match'
+        idx = self.legal_position.index(position)
+        return self.legal_action[idx]  # 0, 1, 2, 3: up, down, left, right
+
+    def step(self, legal_input):
+        if legal_input in self.legal_position:
+            position = legal_input
+        elif legal_input in self.legal_action:
+            idx = self.legal_action.index(legal_input)
+            position = self.legal_position[idx]
+        else:
+            assert False, 'illegal snake move'
+        self.head = position
+        self.pos.insert(0, position)
+        if position in self.beans_positions:  # eat a bean
+            self.len += 1
+        else:  # do not eat a bean
+            self.pos.pop()
@@ -0,0 +1,31 @@
+import numpy as np
+from .space import Space
+
+
+class Discrete(Space):
+    r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 
+    Example::
+        >>> Discrete(2)
+    """
+    def __init__(self, n):
+        assert n >= 0
+        self.n = n
+        super(Discrete, self).__init__((), np.int64)
+
+    def sample(self):
+        return self.np_random.randint(self.n)
+
+    def contains(self, x):
+        if isinstance(x, int):
+            as_int = x
+        elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
+            as_int = int(x)
+        else:
+            return False
+        return as_int >= 0 and as_int < self.n
+
+    def __repr__(self):
+        return "Discrete(%d)" % self.n
+
+    def __eq__(self, other):
+        return isinstance(other, Discrete) and self.n == other.n
@@ -0,0 +1,47 @@
+# -*- coding:utf-8  -*-
+# 作者：zruizhi   
+# 创建时间： 2020/7/10 10:24 上午   
+# 描述：
+from abc import ABC, abstractmethod
+
+
+class Game(ABC):
+    def __init__(self, n_player, is_obs_continuous, is_act_continuous, game_name, agent_nums, obs_type):
+        self.n_player = n_player
+        self.current_state = None
+        self.all_observes = None
+        self.is_obs_continuous = is_obs_continuous
+        self.is_act_continuous = is_act_continuous
+        self.game_name = game_name
+        self.agent_nums = agent_nums
+        self.obs_type = obs_type
+
+    def get_config(self, player_id):
+        raise NotImplementedError
+
+    def get_render_data(self, current_state):
+        return current_state
+
+    def set_current_state(self, current_state):
+        raise NotImplementedError
+
+    @abstractmethod
+    def is_terminal(self):
+        raise NotImplementedError
+
+    def get_next_state(self, all_action):
+        raise NotImplementedError
+
+    def get_reward(self, all_action):
+        raise NotImplementedError
+
+    @abstractmethod
+    def step(self, all_action):
+        raise NotImplementedError
+
+    @abstractmethod
+    def reset(self):
+        raise NotImplementedError
+
+    def set_action_space(self):
+        raise NotImplementedError