From e7f4a8a0a12e65328b3f03a5904dae574a69b7e0 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Tue, 15 Jun 2021 23:47:31 +0530 Subject: [PATCH 01/28] add StatsBase package --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index ea30e6b..5e5a68c 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44" Requires = "ae029012-a4dd-5104-9daa-d747884805df" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] Crayons = "4.0" From fd659a730bd7ca5efa0508a2392cc23ee12717a0 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Tue, 15 Jun 2021 23:50:24 +0530 Subject: [PATCH 02/28] add SingleRoomUndirectedBatch --- src/envs/envs.jl | 1 + src/envs/single_room_undirected_batch.jl | 200 +++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 src/envs/single_room_undirected_batch.jl diff --git a/src/envs/envs.jl b/src/envs/envs.jl index 730d069..840955c 100644 --- a/src/envs/envs.jl +++ b/src/envs/envs.jl @@ -42,3 +42,4 @@ include("snake.jl") include("catcher.jl") include("transport.jl") include("collect_gems_undirected_multi_agent.jl") +include("single_room_undirected_batch.jl") diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl new file mode 100644 index 0000000..0e768d2 --- /dev/null +++ b/src/envs/single_room_undirected_batch.jl @@ -0,0 +1,200 @@ +module ModuleSingleRoomUndirectedBatch + +import Crayons +import ..GridWorlds as GW +import Random +import ReinforcementLearningBase as RLBase +import StatsBase as SB + +const MOVE_UP = 1 +const MOVE_DOWN = 2 +const MOVE_LEFT = 3 +const MOVE_RIGHT = 4 + +const AGENT = 1 +const WALL = 2 +const GOAL = 3 + +const DUMMY_CHARACTER = '⋅' +const CHARACTERS = ('☻', '█', '♥') +const FOREGROUND_COLORS = (:light_red, :white, :light_red) + +function move(action::Integer, i, j) + if action == MOVE_UP + return i - 1, j + elseif action == MOVE_DOWN + return i + 1, j + elseif action == MOVE_LEFT + return i, j - 1 + elseif action == MOVE_RIGHT + return i, j + 1 + end +end + +struct SingleRoomUndirectedBatch{I, R, RNG} <: GW.AbstractGridWorld + tile_map::BitArray{4} + agent_position::Array{I, 2} + reward::Array{R, 1} + rng::Array{RNG, 1} + done::BitArray{1} + terminal_reward::R + goal_position::Array{I, 2} +end + +function SingleRoomUndirectedBatch(; I = Int32, R = Float32, num_envs = 2, height = 8, width = 8, rng = [Random.MersenneTwister() for i in 1:num_envs]) + tile_map = BitArray(undef, num_envs, 3, height, width) + agent_position = Array{I}(undef, num_envs, 2) + reward = Array{R}(undef, num_envs) + done = BitArray(undef, num_envs) + goal_position = Array{I}(undef, num_envs, 2) + terminal_reward = one(R) + + inner_area = CartesianIndices((2 : height - 1, 2 : width - 1)) + + for env_id in 1:num_envs + tile_map[env_id, :, :, :] .= false + tile_map[env_id, WALL, 1, :] .= true + tile_map[env_id, WALL, height, :] .= true + tile_map[env_id, WALL, :, 1] .= true + tile_map[env_id, WALL, :, width] .= true + + random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) + + agent_position[env_id, 1] = random_positions[1][1] + agent_position[env_id, 2] = random_positions[1][2] + tile_map[env_id, AGENT, random_positions[1]] = true + + goal_position[env_id, 1] = random_positions[2][1] + goal_position[env_id, 2] = random_positions[2][2] + tile_map[env_id, GOAL, random_positions[2]] = true + + reward[env_id] = zero(R) + done[env_id] = false + end + + env = SingleRoomUndirectedBatch(tile_map, agent_position, reward, rng, done, terminal_reward, goal_position) + + RLBase.reset!(env) + + return env +end + +RLBase.state_space(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = nothing +RLBase.state(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = copy(env.tile_map) + +RLBase.action_space(env::SingleRoomUndirectedBatch, player::RLBase.DefaultPlayer) = (MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT) +RLBase.reward(env::SingleRoomUndirectedBatch, ::RLBase.DefaultPlayer) = env.reward +RLBase.is_terminated(env::SingleRoomUndirectedBatch) = env.done + +function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}) where {I, R} + tile_map = env.tile_map + agent_position = env.agent_position + goal_position = env.goal_position + reward = env.reward + done = env.done + rng = env.rng + + num_envs = size(tile_map, 1) + inner_area = CartesianIndices((2 : size(tile_map, 3) - 1, 2 : size(tile_map, 4) - 1)) + + for env_id in 1:num_envs + tile_map[env_id, AGENT, agent_position[env_id, 1], agent_position[env_id, 2]] = false + tile_map[env_id, GOAL, goal_position[env_id, 1], goal_position[env_id, 2]] = false + + random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) + + agent_position[env_id, 1] = random_positions[1][1] + agent_position[env_id, 2] = random_positions[1][2] + tile_map[env_id, AGENT, random_positions[1]] = true + + goal_position[env_id, 1] = random_positions[2][1] + goal_position[env_id, 2] = random_positions[2][2] + tile_map[env_id, GOAL, random_positions[2]] = true + + reward[env_id] = zero(R) + done[env_id] = false + end + + return nothing +end + +function (env::SingleRoomUndirectedBatch{I, R})(action::Vector) where {I, R} + tile_map = env.tile_map + agent_position = env.agent_position + goal_position = env.goal_position + reward = env.reward + done = env.done + rng = env.rng + terminal_reward = env.terminal_reward + + num_envs = size(tile_map, 1) + + for env_id in 1:num_envs + current_position_i = agent_position[env_id, 1] + current_position_j = agent_position[env_id, 2] + next_position_i, next_position_j = move(action[env_id], current_position_i, current_position_j) + + if !tile_map[env_id, WALL, next_position_i, next_position_j] + tile_map[env_id, AGENT, current_position_i, current_position_j] = false + agent_position[env_id, 1] = next_position_i + agent_position[env_id, 2] = next_position_j + tile_map[env_id, AGENT, next_position_i, next_position_j] = true + end + + new_current_position_i = agent_position[env_id, 1] + new_current_position_j = agent_position[env_id, 2] + + if tile_map[env_id, GOAL, new_current_position_i, new_current_position_j] + done[env_id] = true + reward[env_id] = terminal_reward + else + done[env_id] = false + reward[env_id] = zero(R) + end + end + + return nothing +end + +function Base.show(io::IO, ::MIME"text/plain", env::SingleRoomUndirectedBatch) + tile_map = env.tile_map + reward = env.reward + done = env.done + + num_envs, num_objects, height, width = size(tile_map) + + print(io, "objects = ") + for i in 1 : length(CHARACTERS) + print(io, Crayons.Crayon(foreground = FOREGROUND_COLORS[i]), CHARACTERS[i], Crayons.Crayon(reset = true)) + if i < length(CHARACTERS) + print(io, ", ") + else + print(io, "\n") + end + end + println(io, "dummy character = ", DUMMY_CHARACTER) + + for env_id in 1:num_envs + println(io) + println(io, "env_id = ", env_id) + for i in 1:height + for j in 1:width + idx = findfirst(@view tile_map[env_id, :, i, j]) + if isnothing(idx) + print(io, DUMMY_CHARACTER) + else + print(io, Crayons.Crayon(foreground = FOREGROUND_COLORS[idx]), CHARACTERS[idx], Crayons.Crayon(reset = true)) + end + end + + println(io) + end + + println(io, "reward = ", reward[env_id]) + println(io, "done = ", done[env_id]) + end + + return nothing +end + +end # module From 0b8c1cba4e13d2dacb79fa0c30301b405c73d42a Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Wed, 16 Jun 2021 15:26:37 +0530 Subject: [PATCH 03/28] add playability to SingleRoomUndirectedBatch --- src/GridWorlds.jl | 1 + src/envs/single_room_undirected_batch.jl | 75 ++++++++++++++++++++++++ src/play.jl | 45 ++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 src/play.jl diff --git a/src/GridWorlds.jl b/src/GridWorlds.jl index d77d239..7cb077b 100644 --- a/src/GridWorlds.jl +++ b/src/GridWorlds.jl @@ -19,6 +19,7 @@ include("actions.jl") include("objects.jl") include("grid_world_base.jl") include("abstract_grid_world.jl") +include("play.jl") include("envs/envs.jl") include("textual_rendering.jl") diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index 0e768d2..e0b5744 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -2,7 +2,9 @@ module ModuleSingleRoomUndirectedBatch import Crayons import ..GridWorlds as GW +import ..Play import Random +import REPL import ReinforcementLearningBase as RLBase import StatsBase as SB @@ -197,4 +199,77 @@ function Base.show(io::IO, ::MIME"text/plain", env::SingleRoomUndirectedBatch) return nothing end +get_string_key_bindings(env::GW.AbstractGridWorld) = """Key bindings: + 'q': quit + 'r': RLBase.reset!(env) + 'w': MOVE_UP + 's': MOVE_DOWN + 'a': MOVE_LEFT + 'd': MOVE_RIGHT + """ + +function play!(terminal::REPL.Terminals.UnixTerminal, env::SingleRoomUndirectedBatch; file_name::Union{Nothing, AbstractString} = nothing) + REPL.Terminals.raw!(terminal, true) + + terminal_out = terminal.out_stream + terminal_in = terminal.in_stream + file = Play.open_maybe(file_name) + + Play.write_io1_maybe_io2(terminal_out, file, Play.CLEAR_SCREEN) + Play.write_io1_maybe_io2(terminal_out, file, Play.MOVE_CURSOR_TO_ORIGIN) + Play.write_io1_maybe_io2(terminal_out, file, Play.HIDE_CURSOR) + + num_envs = size(env.tile_map, 1) + chars = Array{Char}(undef, num_envs) + + action_chars = ('w', 's', 'a', 'd') + + char_to_action = Dict('w' => MOVE_UP, + 's' => MOVE_DOWN, + 'a' => MOVE_LEFT, + 'd' => MOVE_RIGHT, + ) + + action = Array{Int}(undef, num_envs) + + try + while true + Play.write_io1_maybe_io2(terminal_out, file, get_string_key_bindings(env)) + Play.show_io1_maybe_io2(terminal_out, file, MIME("text/plain"), env) + + for i in 1:num_envs + chars[i] = read(terminal_in, Char) + end + + Play.write_io1_maybe_io2(terminal_out, file, Play.EMPTY_SCREEN) + + if 'q' in chars + Play.write_io1_maybe_io2(terminal_out, file, Play.SHOW_CURSOR) + Play.close_maybe(file) + REPL.Terminals.raw!(terminal, false) + return nothing + elseif 'r' in chars + RLBase.reset!(env) + elseif all(char -> char in action_chars, chars) + for i in 1:num_envs + action[i] = char_to_action[chars[i]] + end + env(action) + else + @warn "No procedure exists for this character sequence: $chars" + end + + Play.write_io1_maybe_io2(terminal_out, file, "Last character sequence = $(chars)\n") + end + finally + Play.write_io1_maybe_io2(terminal_out, file, Play.SHOW_CURSOR) + Play.close_maybe(file) + REPL.Terminals.raw!(terminal, false) + end + + return nothing +end + +play!(env::SingleRoomUndirectedBatch; file_name = nothing) = play!(REPL.TerminalMenus.terminal, env, file_name = file_name) + end # module diff --git a/src/play.jl b/src/play.jl new file mode 100644 index 0000000..03525e4 --- /dev/null +++ b/src/play.jl @@ -0,0 +1,45 @@ +module Play + +import REPL + +const ESC = Char(0x1B) +const HIDE_CURSOR = ESC * "[?25l" +const SHOW_CURSOR = ESC * "[?25h" +const CLEAR_SCREEN = ESC * "[2J" +const MOVE_CURSOR_TO_ORIGIN = ESC * "[H" +const CLEAR_SCREEN_BEFORE_CURSOR = ESC * "[1J" +const EMPTY_SCREEN = CLEAR_SCREEN_BEFORE_CURSOR * MOVE_CURSOR_TO_ORIGIN + +open_maybe(file_name::AbstractString) = open(file_name, "w") +open_maybe(::Nothing) = nothing + +close_maybe(io::IO) = close(io) +close_maybe(io::Nothing) = nothing + +write_maybe(io::IO, content) = write(io, content) +write_maybe(io::Nothing, content) = 0 +write_io1_maybe_io2(io1::IO, io2::Union{Nothing, IO}, content) = write(io1, content) + write_maybe(io2, content) + +show_maybe(io::IO, mime::MIME, content) = show(io, mime, content) +show_maybe(io::Nothing, mime::MIME, content) = nothing +function show_io1_maybe_io2(io1::IO, io2::Union{Nothing, IO}, mime::MIME, content) + show(io1, mime, content) + show_maybe(io2, mime, content) +end + +function replay(terminal::REPL.Terminals.UnixTerminal, file_name::AbstractString, frame_rate) + terminal_out = terminal.out_stream + delimiter = get_string_empty_screen() + frames = split(read(file_name, String), delimiter) + for frame in frames + write(terminal_out, frame) + sleep(1 / frame_rate) + write(terminal_out, delimiter) + end + + return nothing +end + +replay(file_name; frame_rate = 2) = replay(REPL.TerminalMenus.terminal, file_name, frame_rate) + +end # module From 1bb2641de889682927be76ed9cb6db61a5bfa010 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Wed, 16 Jun 2021 15:33:37 +0530 Subject: [PATCH 04/28] add keyword force to RLBase.reset! method --- src/envs/single_room_undirected_batch.jl | 28 +++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index e0b5744..757a4b6 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -76,7 +76,7 @@ function SingleRoomUndirectedBatch(; I = Int32, R = Float32, num_envs = 2, heigh env = SingleRoomUndirectedBatch(tile_map, agent_position, reward, rng, done, terminal_reward, goal_position) - RLBase.reset!(env) + RLBase.reset!(env, force = true) return env end @@ -88,7 +88,7 @@ RLBase.action_space(env::SingleRoomUndirectedBatch, player::RLBase.DefaultPlayer RLBase.reward(env::SingleRoomUndirectedBatch, ::RLBase.DefaultPlayer) = env.reward RLBase.is_terminated(env::SingleRoomUndirectedBatch) = env.done -function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}) where {I, R} +function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}; force = false) where {I, R} tile_map = env.tile_map agent_position = env.agent_position goal_position = env.goal_position @@ -100,21 +100,23 @@ function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}) where {I, R} inner_area = CartesianIndices((2 : size(tile_map, 3) - 1, 2 : size(tile_map, 4) - 1)) for env_id in 1:num_envs - tile_map[env_id, AGENT, agent_position[env_id, 1], agent_position[env_id, 2]] = false - tile_map[env_id, GOAL, goal_position[env_id, 1], goal_position[env_id, 2]] = false + if force || done[env_id] + tile_map[env_id, AGENT, agent_position[env_id, 1], agent_position[env_id, 2]] = false + tile_map[env_id, GOAL, goal_position[env_id, 1], goal_position[env_id, 2]] = false - random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) + random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) - agent_position[env_id, 1] = random_positions[1][1] - agent_position[env_id, 2] = random_positions[1][2] - tile_map[env_id, AGENT, random_positions[1]] = true + agent_position[env_id, 1] = random_positions[1][1] + agent_position[env_id, 2] = random_positions[1][2] + tile_map[env_id, AGENT, random_positions[1]] = true - goal_position[env_id, 1] = random_positions[2][1] - goal_position[env_id, 2] = random_positions[2][2] - tile_map[env_id, GOAL, random_positions[2]] = true + goal_position[env_id, 1] = random_positions[2][1] + goal_position[env_id, 2] = random_positions[2][2] + tile_map[env_id, GOAL, random_positions[2]] = true - reward[env_id] = zero(R) - done[env_id] = false + reward[env_id] = zero(R) + done[env_id] = false + end end return nothing From 0d115af2b6e2e1535e65caea3e34b851f828ad94 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Wed, 16 Jun 2021 15:54:42 +0530 Subject: [PATCH 05/28] fix replay method --- src/play.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/play.jl b/src/play.jl index 03525e4..5096f3f 100644 --- a/src/play.jl +++ b/src/play.jl @@ -29,7 +29,7 @@ end function replay(terminal::REPL.Terminals.UnixTerminal, file_name::AbstractString, frame_rate) terminal_out = terminal.out_stream - delimiter = get_string_empty_screen() + delimiter = EMPTY_SCREEN frames = split(read(file_name, String), delimiter) for frame in frames write(terminal_out, frame) From d39fae711f0c14bbed04b3887d276b3a8cdf1242 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Wed, 16 Jun 2021 15:54:53 +0530 Subject: [PATCH 06/28] write characters to terminal out while playing --- src/envs/single_room_undirected_batch.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index 757a4b6..f297d60 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -240,7 +240,9 @@ function play!(terminal::REPL.Terminals.UnixTerminal, env::SingleRoomUndirectedB Play.show_io1_maybe_io2(terminal_out, file, MIME("text/plain"), env) for i in 1:num_envs - chars[i] = read(terminal_in, Char) + c = read(terminal_in, Char) + chars[i] = c + write(terminal_out, c) end Play.write_io1_maybe_io2(terminal_out, file, Play.EMPTY_SCREEN) From 20d103f99139147aca88d662c8b4f96cc7c6867c Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Wed, 16 Jun 2021 15:55:40 +0530 Subject: [PATCH 07/28] ignore scratchpad.jl --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 622cef2..aea776a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ Manifest.toml # vim temporary files *~ *.swp + +/src/scratchpad.jl From 4c16b209464f6f418661f7bd0b22c64843d236fc Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Wed, 16 Jun 2021 23:31:35 +0530 Subject: [PATCH 08/28] add tests for SingleRoomUndirectedBatch --- test/runtests.jl | 109 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index b6d8879..bfc7f1c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,6 +30,8 @@ ENVS = [GW.EmptyRoomDirected, GW.CollectGemsUndirectedMultiAgent, ] +BATCH_ENVS = [GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch] + const MAX_STEPS = 3000 const NUM_RESETS = 3 @@ -60,45 +62,86 @@ get_terminal_returns(env::GW.Catcher) = env.terminal_reward:env.ball_reward:MAX_ get_terminal_returns(env::GW.TransportDirected) = (GW.get_terminal_reward(env),) get_terminal_returns(env::GW.TransportUndirected) = (GW.get_terminal_reward(env),) +get_terminal_returns(env::GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch) = (env.terminal_reward,) + Test.@testset "GridWorlds.jl" begin - for Env in ENVS - Test.@testset "$(Env)" begin - T = Float32 - env = Env(T = T) - for _ in 1:NUM_RESETS - RLBase.reset!(env) - Test.@test RLBase.reward(env) == zero(T) - Test.@test RLBase.is_terminated(env) == false - - total_reward = zero(T) - for i in 1:MAX_STEPS - action = rand(RLBase.action_space(env)) - env(action) - total_reward += RLBase.reward(env) - - - if Env == GW.CollectGemsUndirectedMultiAgent - for i in 1:GW.get_num_agents(env) - agent_pos = env.agent_pos[i] - Test.@test 1 ≤ agent_pos[1] ≤ GW.get_height(env) - Test.@test 1 ≤ agent_pos[2] ≤ GW.get_width(env) - end - else - Test.@test 1 ≤ GW.get_agent_pos(env)[1] ≤ GW.get_height(env) - Test.@test 1 ≤ GW.get_agent_pos(env)[2] ≤ GW.get_width(env) - end + Test.@testset "Single Environments" begin + for Env in ENVS + Test.@testset "$(Env)" begin + T = Float32 + env = Env(T = T) + for _ in 1:NUM_RESETS + RLBase.reset!(env) + Test.@test RLBase.reward(env) == zero(T) + Test.@test RLBase.is_terminated(env) == false + + total_reward = zero(T) + for i in 1:MAX_STEPS + action = rand(RLBase.action_space(env)) + env(action) + total_reward += RLBase.reward(env) - if RLBase.is_terminated(env) - if Env == GW.Snake - Test.@test (total_reward in get_terminal_returns_win(env) || total_reward in get_terminal_returns_lose(env)) + + if Env == GW.CollectGemsUndirectedMultiAgent + for i in 1:GW.get_num_agents(env) + agent_pos = env.agent_pos[i] + Test.@test 1 ≤ agent_pos[1] ≤ GW.get_height(env) + Test.@test 1 ≤ agent_pos[2] ≤ GW.get_width(env) + end else - Test.@test total_reward in get_terminal_returns(env) + Test.@test 1 ≤ GW.get_agent_pos(env)[1] ≤ GW.get_height(env) + Test.@test 1 ≤ GW.get_agent_pos(env)[2] ≤ GW.get_width(env) + end + + if RLBase.is_terminated(env) + if Env == GW.Snake + Test.@test (total_reward in get_terminal_returns_win(env) || total_reward in get_terminal_returns_lose(env)) + else + Test.@test total_reward in get_terminal_returns(env) + end + break + end + + if i == MAX_STEPS + @info "$Env not terminated after MAX_STEPS = $MAX_STEPS" end - break end + end + end + end + end + + Test.@testset "Batch Environments" begin + for Env in BATCH_ENVS + Test.@testset "$(Env)" begin + num_envs = 1 + R = Float32 + I = Int32 + env = Env(I = I, R = R, num_envs = num_envs) + height = size(env.tile_map, 3) + width = size(env.tile_map, 4) + for _ in 1:NUM_RESETS + RLBase.reset!(env) + Test.@test RLBase.reward(env) == zeros(R, num_envs) + Test.@test RLBase.is_terminated(env) == falses(num_envs) - if i == MAX_STEPS - @info "$Env not terminated after MAX_STEPS = $MAX_STEPS" + total_reward = zeros(R, num_envs) + for i in 1:MAX_STEPS + action = [rand(RLBase.action_space(env)) for _ in 1:num_envs] + env(action) + total_reward .+= RLBase.reward(env) + + Test.@test 1 ≤ env.agent_position[1, 1] ≤ height + Test.@test 1 ≤ env.agent_position[1, 2] ≤ width + + if RLBase.is_terminated(env)[1] + Test.@test total_reward[1] in get_terminal_returns(env) + break + end + + if i == MAX_STEPS + @info "$Env not terminated after MAX_STEPS = $MAX_STEPS" + end end end end From 342deb070bef9fbbdf25fea18209c449b24b82c0 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 17 Jun 2021 23:36:12 +0530 Subject: [PATCH 09/28] set state style to internal state, copy reward & done --- src/envs/single_room_undirected_batch.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index f297d60..cefef9a 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -81,12 +81,13 @@ function SingleRoomUndirectedBatch(; I = Int32, R = Float32, num_envs = 2, heigh return env end +RLBase.StateStyle(env::SingleRoomUndirectedBatch) = RLBase.InternalState{Any}() RLBase.state_space(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = nothing RLBase.state(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = copy(env.tile_map) RLBase.action_space(env::SingleRoomUndirectedBatch, player::RLBase.DefaultPlayer) = (MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT) -RLBase.reward(env::SingleRoomUndirectedBatch, ::RLBase.DefaultPlayer) = env.reward -RLBase.is_terminated(env::SingleRoomUndirectedBatch) = env.done +RLBase.reward(env::SingleRoomUndirectedBatch, ::RLBase.DefaultPlayer) = copy(env.reward) +RLBase.is_terminated(env::SingleRoomUndirectedBatch) = copy(env.done) function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}; force = false) where {I, R} tile_map = env.tile_map From 4926c997388ff87c53f7b6ced95c8686f3c7f3ae Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 17 Jun 2021 23:37:30 +0530 Subject: [PATCH 10/28] add benchmark_multi_threaded.jl --- benchmark/benchmark_multi_threaded.jl | 143 ++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 benchmark/benchmark_multi_threaded.jl diff --git a/benchmark/benchmark_multi_threaded.jl b/benchmark/benchmark_multi_threaded.jl new file mode 100644 index 0000000..6698210 --- /dev/null +++ b/benchmark/benchmark_multi_threaded.jl @@ -0,0 +1,143 @@ +import GridWorlds as GW +import ReinforcementLearningBase as RLBase +import BenchmarkTools as BT +import Dates + +const STEPS_PER_RESET = 100 +const NUM_RESETS = 100 +const NUM_ENVS = 64 + +const information = Dict() + +ENVS = [GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch] + +function run_random_policy!(env, num_resets, steps_per_reset) + num_envs = size(env.tile_map, 1) + action = Array{eltype(RLBase.action_space(env))}(undef, num_envs) + for _ in 1:num_resets + RLBase.reset!(env, force = true) + for _ in 1:steps_per_reset + state = RLBase.state(env) + for i in 1:num_envs + action[i] = rand(RLBase.action_space(env)) + end + env(action) + is_terminated = RLBase.is_terminated(env) + reward = RLBase.reward(env) + end + end + + return nothing +end + +function format_benchmark(str::String) + l = split(str, "\n") + deleteat!(l, (1, 4, 9)) + return strip.(l) +end + +function write_benchmarks(information, file) + io = open(file, "w") + + write(io, "Date: " * Dates.format(Dates.now(), "yyyy_mm_dd_HH_MM_SS") * "\n") + write(io, "# List of Environments\n") + + for Env in ENVS + name = Env.body.body.body.name.name + write(io, " 1. [$(String(name))](#$(lowercase(String(name))))\n") + end + + write(io, "\n") + write(io, "# Benchmarks\n\n") + + for Env in ENVS + name = Env.body.body.body.name.name + env_benchmark = information[name] + + write(io, "# $(String(name))\n\n") + + write(io, "#### Run uniformly random policy, NUM_RESETS = $(NUM_RESETS), STEPS_PER_RESET = $(STEPS_PER_RESET), TOTAL_STEPS = $(NUM_RESETS * STEPS_PER_RESET)\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:run_random_policy])) + write(io, line * "\n\n") + end + + write(io, "#### $(String(Symbol(Env)))()\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:instantiation])) + write(io, line * "\n\n") + end + + write(io, "#### RLBase.reset!(env)\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:reset!])) + write(io, line * "\n\n") + end + + write(io, "#### RLBase.state(env)\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:state])) + write(io, line * "\n\n") + end + + write(io, "#### RLBase.action_space(env)\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:action_space])) + write(io, line * "\n\n") + end + + write(io, "#### RLBase.is_terminated(env)\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:is_terminated])) + write(io, line * "\n\n") + end + + write(io, "#### RLBase.reward(env)\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:reward])) + write(io, line * "\n\n") + end + + for action in keys(env_benchmark[:action_info]) + write(io, "#### env($action)\n\n") + for line in format_benchmark(repr("text/plain", env_benchmark[:action_info][action])) + write(io, line * "\n\n") + end + end + + end + + close(io) +end + +# compile everything once +for Env in ENVS + env = Env(num_envs = NUM_ENVS) + run_random_policy!(env, NUM_RESETS, STEPS_PER_RESET) +end + +@info "First run (for compilation) is complete" + +for Env in ENVS + + env = Env(num_envs = NUM_ENVS) + + env_benchmark = Dict() + + env_benchmark[:run_random_policy] = BT.@benchmark run_random_policy!($(Ref(env))[], $(Ref(NUM_RESETS))[], $(Ref(STEPS_PER_RESET))[]) + + env_benchmark[:instantiation] = BT.@benchmark $(Ref(Env))[](num_envs = $(NUM_ENVS)[]) + + env_benchmark[:reset!] = BT.@benchmark RLBase.reset!($(Ref(env))[], force = true) + env_benchmark[:state] = BT.@benchmark RLBase.state($(Ref(env))[]) + env_benchmark[:action_space] = BT.@benchmark RLBase.action_space($(Ref(env))[]) + env_benchmark[:is_terminated] = BT.@benchmark RLBase.is_terminated($(Ref(env))[]) + env_benchmark[:reward] = BT.@benchmark RLBase.reward($(Ref(env))[]) + + action_info = Dict() + for action in RLBase.action_space(env) + actions = fill(action, NUM_ENVS) + action_info[Symbol(action)] = BT.@benchmark $(Ref(env))[]($(Ref(actions))[]) + end + env_benchmark[:action_info] = action_info + + name = Env.body.body.body.name.name + information[name] = env_benchmark + + @info "$(name) benchmark complete" +end + +write_benchmarks(information, "benchmark_multi_threaded.md") From 7824551941d08eebd3a9187823d4c55a4f027ee9 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 17 Jun 2021 23:45:04 +0530 Subject: [PATCH 11/28] print NUM_ENVS --- benchmark/benchmark_multi_threaded.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/benchmark_multi_threaded.jl b/benchmark/benchmark_multi_threaded.jl index 6698210..7c5c28d 100644 --- a/benchmark/benchmark_multi_threaded.jl +++ b/benchmark/benchmark_multi_threaded.jl @@ -56,7 +56,7 @@ function write_benchmarks(information, file) write(io, "# $(String(name))\n\n") - write(io, "#### Run uniformly random policy, NUM_RESETS = $(NUM_RESETS), STEPS_PER_RESET = $(STEPS_PER_RESET), TOTAL_STEPS = $(NUM_RESETS * STEPS_PER_RESET)\n\n") + write(io, "#### Run uniformly random policy, NUM_ENVS = $(NUM_ENVS), NUM_RESETS = $(NUM_RESETS), STEPS_PER_RESET = $(STEPS_PER_RESET), TOTAL_STEPS = $(NUM_RESETS * STEPS_PER_RESET)\n\n") for line in format_benchmark(repr("text/plain", env_benchmark[:run_random_policy])) write(io, line * "\n\n") end From d711bccf95b42fcab7423c8b882a259dc955b850 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 17:56:36 +0530 Subject: [PATCH 12/28] move num_envs to the last dimension --- src/envs/single_room_undirected_batch.jl | 76 ++++++++++++------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index cefef9a..a3676b8 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -44,31 +44,31 @@ struct SingleRoomUndirectedBatch{I, R, RNG} <: GW.AbstractGridWorld end function SingleRoomUndirectedBatch(; I = Int32, R = Float32, num_envs = 2, height = 8, width = 8, rng = [Random.MersenneTwister() for i in 1:num_envs]) - tile_map = BitArray(undef, num_envs, 3, height, width) - agent_position = Array{I}(undef, num_envs, 2) + tile_map = BitArray(undef, 3, height, width, num_envs) + agent_position = Array{I}(undef, 2, num_envs) reward = Array{R}(undef, num_envs) done = BitArray(undef, num_envs) - goal_position = Array{I}(undef, num_envs, 2) + goal_position = Array{I}(undef, 2, num_envs) terminal_reward = one(R) inner_area = CartesianIndices((2 : height - 1, 2 : width - 1)) for env_id in 1:num_envs - tile_map[env_id, :, :, :] .= false - tile_map[env_id, WALL, 1, :] .= true - tile_map[env_id, WALL, height, :] .= true - tile_map[env_id, WALL, :, 1] .= true - tile_map[env_id, WALL, :, width] .= true + tile_map[:, :, :, env_id] .= false + tile_map[WALL, 1, :, env_id] .= true + tile_map[WALL, height, :, env_id] .= true + tile_map[WALL, :, 1, env_id] .= true + tile_map[WALL, :, width, env_id] .= true random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) - agent_position[env_id, 1] = random_positions[1][1] - agent_position[env_id, 2] = random_positions[1][2] - tile_map[env_id, AGENT, random_positions[1]] = true + agent_position[1, env_id] = random_positions[1][1] + agent_position[2, env_id] = random_positions[1][2] + tile_map[AGENT, random_positions[1], env_id] = true - goal_position[env_id, 1] = random_positions[2][1] - goal_position[env_id, 2] = random_positions[2][2] - tile_map[env_id, GOAL, random_positions[2]] = true + goal_position[1, env_id] = random_positions[2][1] + goal_position[2, env_id] = random_positions[2][2] + tile_map[GOAL, random_positions[2], env_id] = true reward[env_id] = zero(R) done[env_id] = false @@ -97,23 +97,23 @@ function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}; force = false) wher done = env.done rng = env.rng - num_envs = size(tile_map, 1) - inner_area = CartesianIndices((2 : size(tile_map, 3) - 1, 2 : size(tile_map, 4) - 1)) + num_objects, height, width, num_envs = size(tile_map) + inner_area = CartesianIndices((2 : height - 1, 2 : width - 1)) for env_id in 1:num_envs if force || done[env_id] - tile_map[env_id, AGENT, agent_position[env_id, 1], agent_position[env_id, 2]] = false - tile_map[env_id, GOAL, goal_position[env_id, 1], goal_position[env_id, 2]] = false + tile_map[AGENT, agent_position[1, env_id], agent_position[2, env_id], env_id] = false + tile_map[GOAL, goal_position[1, env_id], goal_position[2, env_id], env_id] = false random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) - agent_position[env_id, 1] = random_positions[1][1] - agent_position[env_id, 2] = random_positions[1][2] - tile_map[env_id, AGENT, random_positions[1]] = true + agent_position[1, env_id] = random_positions[1][1] + agent_position[2, env_id] = random_positions[1][2] + tile_map[AGENT, random_positions[1], env_id] = true - goal_position[env_id, 1] = random_positions[2][1] - goal_position[env_id, 2] = random_positions[2][2] - tile_map[env_id, GOAL, random_positions[2]] = true + goal_position[1, env_id] = random_positions[2][1] + goal_position[2, env_id] = random_positions[2][2] + tile_map[GOAL, random_positions[2], env_id] = true reward[env_id] = zero(R) done[env_id] = false @@ -132,24 +132,24 @@ function (env::SingleRoomUndirectedBatch{I, R})(action::Vector) where {I, R} rng = env.rng terminal_reward = env.terminal_reward - num_envs = size(tile_map, 1) + num_envs = size(tile_map, 4) for env_id in 1:num_envs - current_position_i = agent_position[env_id, 1] - current_position_j = agent_position[env_id, 2] + current_position_i = agent_position[1, env_id] + current_position_j = agent_position[2, env_id] next_position_i, next_position_j = move(action[env_id], current_position_i, current_position_j) - if !tile_map[env_id, WALL, next_position_i, next_position_j] - tile_map[env_id, AGENT, current_position_i, current_position_j] = false - agent_position[env_id, 1] = next_position_i - agent_position[env_id, 2] = next_position_j - tile_map[env_id, AGENT, next_position_i, next_position_j] = true + if !tile_map[WALL, next_position_i, next_position_j, env_id] + tile_map[AGENT, current_position_i, current_position_j, env_id] = false + agent_position[1, env_id] = next_position_i + agent_position[2, env_id] = next_position_j + tile_map[AGENT, next_position_i, next_position_j, env_id] = true end - new_current_position_i = agent_position[env_id, 1] - new_current_position_j = agent_position[env_id, 2] + new_current_position_i = agent_position[1, env_id] + new_current_position_j = agent_position[2, env_id] - if tile_map[env_id, GOAL, new_current_position_i, new_current_position_j] + if tile_map[GOAL, new_current_position_i, new_current_position_j, env_id] done[env_id] = true reward[env_id] = terminal_reward else @@ -166,7 +166,7 @@ function Base.show(io::IO, ::MIME"text/plain", env::SingleRoomUndirectedBatch) reward = env.reward done = env.done - num_envs, num_objects, height, width = size(tile_map) + num_objects, height, width, num_envs = size(tile_map) print(io, "objects = ") for i in 1 : length(CHARACTERS) @@ -184,7 +184,7 @@ function Base.show(io::IO, ::MIME"text/plain", env::SingleRoomUndirectedBatch) println(io, "env_id = ", env_id) for i in 1:height for j in 1:width - idx = findfirst(@view tile_map[env_id, :, i, j]) + idx = findfirst(@view tile_map[:, i, j, env_id]) if isnothing(idx) print(io, DUMMY_CHARACTER) else @@ -222,7 +222,7 @@ function play!(terminal::REPL.Terminals.UnixTerminal, env::SingleRoomUndirectedB Play.write_io1_maybe_io2(terminal_out, file, Play.MOVE_CURSOR_TO_ORIGIN) Play.write_io1_maybe_io2(terminal_out, file, Play.HIDE_CURSOR) - num_envs = size(env.tile_map, 1) + num_envs = size(env.tile_map, 4) chars = Array{Char}(undef, num_envs) action_chars = ('w', 's', 'a', 'd') From 2d68a0ad4c7d63ee73ce0e3d1122dad301aeb608 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 18:07:06 +0530 Subject: [PATCH 13/28] update tests for batch envs --- test/runtests.jl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index bfc7f1c..6419b15 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -114,12 +114,12 @@ Test.@testset "GridWorlds.jl" begin Test.@testset "Batch Environments" begin for Env in BATCH_ENVS Test.@testset "$(Env)" begin - num_envs = 1 + num_envs = 2 R = Float32 I = Int32 env = Env(I = I, R = R, num_envs = num_envs) - height = size(env.tile_map, 3) - width = size(env.tile_map, 4) + height = size(env.tile_map, 2) + width = size(env.tile_map, 3) for _ in 1:NUM_RESETS RLBase.reset!(env) Test.@test RLBase.reward(env) == zeros(R, num_envs) @@ -131,15 +131,19 @@ Test.@testset "GridWorlds.jl" begin env(action) total_reward .+= RLBase.reward(env) - Test.@test 1 ≤ env.agent_position[1, 1] ≤ height - Test.@test 1 ≤ env.agent_position[1, 2] ≤ width + for env_id in 1:num_envs + Test.@test 1 ≤ env.agent_position[1, env_id] ≤ height + Test.@test 1 ≤ env.agent_position[2, env_id] ≤ width + end - if RLBase.is_terminated(env)[1] - Test.@test total_reward[1] in get_terminal_returns(env) - break + for env_id in 1:num_envs + if RLBase.is_terminated(env)[env_id] + Test.@test total_reward[env_id] in get_terminal_returns(env) + total_reward[env_id] = zero(total_reward[env_id]) + end end - if i == MAX_STEPS + if i == MAX_STEPS && !any(RLBase.is_terminated(env)) @info "$Env not terminated after MAX_STEPS = $MAX_STEPS" end end From 3dbd497b0f5ce21c32a6d3bc3a526c96dd260835 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 18:08:24 +0530 Subject: [PATCH 14/28] don't copy tile_map, reward, and done in RLBase API --- src/envs/single_room_undirected_batch.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index a3676b8..73655f1 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -83,11 +83,11 @@ end RLBase.StateStyle(env::SingleRoomUndirectedBatch) = RLBase.InternalState{Any}() RLBase.state_space(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = nothing -RLBase.state(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = copy(env.tile_map) +RLBase.state(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = env.tile_map RLBase.action_space(env::SingleRoomUndirectedBatch, player::RLBase.DefaultPlayer) = (MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT) -RLBase.reward(env::SingleRoomUndirectedBatch, ::RLBase.DefaultPlayer) = copy(env.reward) -RLBase.is_terminated(env::SingleRoomUndirectedBatch) = copy(env.done) +RLBase.reward(env::SingleRoomUndirectedBatch, ::RLBase.DefaultPlayer) = env.reward +RLBase.is_terminated(env::SingleRoomUndirectedBatch) = env.done function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}; force = false) where {I, R} tile_map = env.tile_map From 8ec31a06bccc8b06a324bc299090ca42f2d5c913 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 18:10:56 +0530 Subject: [PATCH 15/28] remove unnecessary RLBase.DefaultPlayer --- src/envs/single_room_undirected_batch.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index 73655f1..eb8f6dc 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -82,11 +82,11 @@ function SingleRoomUndirectedBatch(; I = Int32, R = Float32, num_envs = 2, heigh end RLBase.StateStyle(env::SingleRoomUndirectedBatch) = RLBase.InternalState{Any}() -RLBase.state_space(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = nothing -RLBase.state(env::SingleRoomUndirectedBatch, ::RLBase.InternalState, ::RLBase.DefaultPlayer) = env.tile_map +RLBase.state_space(env::SingleRoomUndirectedBatch, ::RLBase.InternalState) = nothing +RLBase.state(env::SingleRoomUndirectedBatch, ::RLBase.InternalState) = env.tile_map -RLBase.action_space(env::SingleRoomUndirectedBatch, player::RLBase.DefaultPlayer) = (MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT) -RLBase.reward(env::SingleRoomUndirectedBatch, ::RLBase.DefaultPlayer) = env.reward +RLBase.action_space(env::SingleRoomUndirectedBatch) = (MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT) +RLBase.reward(env::SingleRoomUndirectedBatch) = env.reward RLBase.is_terminated(env::SingleRoomUndirectedBatch) = env.done function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}; force = false) where {I, R} From 73406ea5b1c829578b6bd0ef76fb3ba7bafa1038 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 18:12:52 +0530 Subject: [PATCH 16/28] rename benchmark_multi_threaded.jl to benchmark_batch.jl --- benchmark/{benchmark_multi_threaded.jl => benchmark_batch.jl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename benchmark/{benchmark_multi_threaded.jl => benchmark_batch.jl} (100%) diff --git a/benchmark/benchmark_multi_threaded.jl b/benchmark/benchmark_batch.jl similarity index 100% rename from benchmark/benchmark_multi_threaded.jl rename to benchmark/benchmark_batch.jl From 45bf86aa31e5fadfa4a115d9f6ab72ad116ab455 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 18:23:59 +0530 Subject: [PATCH 17/28] fix and cleanup benchmark_batch --- benchmark/benchmark_batch.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/benchmark_batch.jl b/benchmark/benchmark_batch.jl index 7c5c28d..d95b4ec 100644 --- a/benchmark/benchmark_batch.jl +++ b/benchmark/benchmark_batch.jl @@ -12,7 +12,7 @@ const information = Dict() ENVS = [GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch] function run_random_policy!(env, num_resets, steps_per_reset) - num_envs = size(env.tile_map, 1) + num_envs = size(env.tile_map, 4) action = Array{eltype(RLBase.action_space(env))}(undef, num_envs) for _ in 1:num_resets RLBase.reset!(env, force = true) @@ -32,7 +32,7 @@ end function format_benchmark(str::String) l = split(str, "\n") - deleteat!(l, (1, 4, 9)) + deleteat!(l, (1, 3, 4, 5, 7, 8, 9, 10, 11)) return strip.(l) end @@ -140,4 +140,4 @@ for Env in ENVS @info "$(name) benchmark complete" end -write_benchmarks(information, "benchmark_multi_threaded.md") +write_benchmarks(information, "benchmark_batch.md") From ed2b37bfc2094c1e4efa5da9d3260d0df48cefc2 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 18:29:14 +0530 Subject: [PATCH 18/28] make move function type stable (huge improvement in performance) --- src/envs/single_room_undirected_batch.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index eb8f6dc..966d792 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -30,6 +30,8 @@ function move(action::Integer, i, j) return i, j - 1 elseif action == MOVE_RIGHT return i, j + 1 + else + return i, j end end From 2ceedadbcec2d280ec24bc9cc8b176c243b2edf8 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Mon, 21 Jun 2021 19:38:54 +0530 Subject: [PATCH 19/28] add function sample_two_positions_without_replacement --- src/envs/single_room_undirected_batch.jl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index 966d792..4b657f2 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -6,6 +6,7 @@ import ..Play import Random import REPL import ReinforcementLearningBase as RLBase +import StaticArrays as SA import StatsBase as SB const MOVE_UP = 1 @@ -21,6 +22,17 @@ const DUMMY_CHARACTER = '⋅' const CHARACTERS = ('☻', '█', '♥') const FOREGROUND_COLORS = (:light_red, :white, :light_red) +function sample_two_positions_without_replacement(rng, region) + position1 = rand(rng, region) + position2 = rand(rng, region) + + while position1 == position2 + position2 = rand(rng, region) + end + + return position1, position2 +end + function move(action::Integer, i, j) if action == MOVE_UP return i - 1, j @@ -62,7 +74,7 @@ function SingleRoomUndirectedBatch(; I = Int32, R = Float32, num_envs = 2, heigh tile_map[WALL, :, 1, env_id] .= true tile_map[WALL, :, width, env_id] .= true - random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) + random_positions = sample_two_positions_without_replacement(rng[env_id], inner_area) agent_position[1, env_id] = random_positions[1][1] agent_position[2, env_id] = random_positions[1][2] @@ -107,7 +119,7 @@ function RLBase.reset!(env::SingleRoomUndirectedBatch{I, R}; force = false) wher tile_map[AGENT, agent_position[1, env_id], agent_position[2, env_id], env_id] = false tile_map[GOAL, goal_position[1, env_id], goal_position[2, env_id], env_id] = false - random_positions = SB.sample(rng[env_id], inner_area, 2, replace = false) + random_positions = sample_two_positions_without_replacement(rng[env_id], inner_area) agent_position[1, env_id] = random_positions[1][1] agent_position[2, env_id] = random_positions[1][2] From 24269558ea71a344f29870f4d8b3681fae0797a7 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 13:30:36 +0530 Subject: [PATCH 20/28] add DataStructures package in benchmarking code --- benchmark/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 2f876ff..dd589b6 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -1,5 +1,6 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" GridWorlds = "e15a9946-cd7f-4d03-83e2-6c30bacb0043" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" From cda573ee96a0c49289005de4d4f6d19ab41ea020 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 13:31:45 +0530 Subject: [PATCH 21/28] add ACTION_NAMES in ModuleSingleRoomUndirectedBatch --- src/envs/single_room_undirected_batch.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/envs/single_room_undirected_batch.jl b/src/envs/single_room_undirected_batch.jl index 4b657f2..c7ed553 100644 --- a/src/envs/single_room_undirected_batch.jl +++ b/src/envs/single_room_undirected_batch.jl @@ -13,6 +13,7 @@ const MOVE_UP = 1 const MOVE_DOWN = 2 const MOVE_LEFT = 3 const MOVE_RIGHT = 4 +const ACTION_NAMES = (:MOVE_UP, :MOVE_DOWN, :MOVE_LEFT, :MOVE_RIGHT) const AGENT = 1 const WALL = 2 From 26d1231738a203959981612d73315a99f3445c90 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 13:32:22 +0530 Subject: [PATCH 22/28] refactor benchmark_batch.jl --- benchmark/benchmark_batch.jl | 162 +++++++++++++++++------------------ 1 file changed, 78 insertions(+), 84 deletions(-) diff --git a/benchmark/benchmark_batch.jl b/benchmark/benchmark_batch.jl index d95b4ec..1b395ea 100644 --- a/benchmark/benchmark_batch.jl +++ b/benchmark/benchmark_batch.jl @@ -1,14 +1,14 @@ -import GridWorlds as GW -import ReinforcementLearningBase as RLBase import BenchmarkTools as BT +import DataStructures as DS import Dates +import GridWorlds as GW +import ReinforcementLearningBase as RLBase +import Statistics const STEPS_PER_RESET = 100 const NUM_RESETS = 100 const NUM_ENVS = 64 -const information = Dict() - ENVS = [GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch] function run_random_policy!(env, num_resets, steps_per_reset) @@ -30,114 +30,108 @@ function run_random_policy!(env, num_resets, steps_per_reset) return nothing end -function format_benchmark(str::String) - l = split(str, "\n") - deleteat!(l, (1, 3, 4, 5, 7, 8, 9, 10, 11)) - return strip.(l) +function compile_envs(Envs) + for Env in Envs + env = Env(num_envs = NUM_ENVS) + run_random_policy!(env, NUM_RESETS, STEPS_PER_RESET) + end + + @info "Compiled and ran all environments" + + return nothing end -function write_benchmarks(information, file) - io = open(file, "w") +function benchmark_batch_env(Env, num_resets, steps_per_reset, num_envs) + benchmark = DS.OrderedDict() - write(io, "Date: " * Dates.format(Dates.now(), "yyyy_mm_dd_HH_MM_SS") * "\n") - write(io, "# List of Environments\n") + parent_module = parentmodule(Env) - for Env in ENVS - name = Env.body.body.body.name.name - write(io, " 1. [$(String(name))](#$(lowercase(String(name))))\n") - end + env = Env(num_envs = num_envs) - write(io, "\n") - write(io, "# Benchmarks\n\n") + benchmark[:random_policy] = BT.@benchmark run_random_policy!($(Ref(env))[], $(Ref(num_resets))[], $(Ref(steps_per_reset))[]) + benchmark[:reset] = BT.@benchmark RLBase.reset!($(Ref(env))[], force = true) + benchmark[:state] = BT.@benchmark RLBase.state($(Ref(env))[]) - for Env in ENVS - name = Env.body.body.body.name.name - env_benchmark = information[name] + for action in RLBase.action_space(env) + action_name = parent_module.ACTION_NAMES[action] + batch_action = fill(action, NUM_ENVS) + benchmark[action_name] = BT.@benchmark $(Ref(env))[]($(Ref(batch_action))[]) + end - write(io, "# $(String(name))\n\n") + benchmark[:action_space] = BT.@benchmark RLBase.action_space($(Ref(env))[]) + benchmark[:is_terminated] = BT.@benchmark RLBase.is_terminated($(Ref(env))[]) + benchmark[:reward] = BT.@benchmark RLBase.reward($(Ref(env))[]) - write(io, "#### Run uniformly random policy, NUM_ENVS = $(NUM_ENVS), NUM_RESETS = $(NUM_RESETS), STEPS_PER_RESET = $(STEPS_PER_RESET), TOTAL_STEPS = $(NUM_RESETS * STEPS_PER_RESET)\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:run_random_policy])) - write(io, line * "\n\n") - end + @info "$(nameof(Env)) benchmarked" - write(io, "#### $(String(Symbol(Env)))()\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:instantiation])) - write(io, line * "\n\n") - end + return benchmark +end - write(io, "#### RLBase.reset!(env)\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:reset!])) - write(io, line * "\n\n") - end +function benchmark_batch_envs(Envs, num_resets, steps_per_reset, num_envs) + benchmarks = DS.OrderedDict() - write(io, "#### RLBase.state(env)\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:state])) - write(io, line * "\n\n") - end + for Env in Envs + benchmarks[nameof(Env)] = benchmark_batch_env(Env, num_resets, steps_per_reset, num_envs) + end - write(io, "#### RLBase.action_space(env)\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:action_space])) - write(io, line * "\n\n") - end + @info "All benchmarks complete" - write(io, "#### RLBase.is_terminated(env)\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:is_terminated])) - write(io, line * "\n\n") - end + return benchmarks +end - write(io, "#### RLBase.reward(env)\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:reward])) - write(io, line * "\n\n") - end +function get_summary(trial::BT.Trial) + median_trial = BT.median(trial) + memory = BT.prettymemory(median_trial.memory) + median_time = BT.prettytime(median_trial.time) + return memory, median_time +end - for action in keys(env_benchmark[:action_info]) - write(io, "#### env($action)\n\n") - for line in format_benchmark(repr("text/plain", env_benchmark[:action_info][action])) - write(io, line * "\n\n") - end - end +function get_table(benchmark) + title = "|" + separator = "|" + data = "|" + for key in keys(benchmark) + title = title * String(key) * "|" + separator = separator * ":---:|" + memory, median_time = get_summary(benchmark[key]) + data = data * "$(memory)
$(median_time)|" end - close(io) -end - -# compile everything once -for Env in ENVS - env = Env(num_envs = NUM_ENVS) - run_random_policy!(env, NUM_RESETS, STEPS_PER_RESET) + return title, separator, data end -@info "First run (for compilation) is complete" +function generate_benchmark_file_batch_envs(Envs, num_resets, steps_per_reset, num_envs, file_name = nothing) + date = Dates.format(Dates.now(), "yyyy_mm_dd_HH_MM_SS") -for Env in ENVS + if isnothing(file_name) + file_name = date * ".md" + end - env = Env(num_envs = NUM_ENVS) + io = open(file_name, "w") - env_benchmark = Dict() + benchmarks = benchmark_batch_envs(Envs, num_resets, steps_per_reset, num_envs) - env_benchmark[:run_random_policy] = BT.@benchmark run_random_policy!($(Ref(env))[], $(Ref(NUM_RESETS))[], $(Ref(STEPS_PER_RESET))[]) + println(io, "Date: $(date)") + println(io, "## List of Environments") - env_benchmark[:instantiation] = BT.@benchmark $(Ref(Env))[](num_envs = $(NUM_ENVS)[]) + for Env in ENVS + name_string = String(nameof(Env)) + println(io, " 1. [$(name_string)](#$(lowercase(name_string)))") + end - env_benchmark[:reset!] = BT.@benchmark RLBase.reset!($(Ref(env))[], force = true) - env_benchmark[:state] = BT.@benchmark RLBase.state($(Ref(env))[]) - env_benchmark[:action_space] = BT.@benchmark RLBase.action_space($(Ref(env))[]) - env_benchmark[:is_terminated] = BT.@benchmark RLBase.is_terminated($(Ref(env))[]) - env_benchmark[:reward] = BT.@benchmark RLBase.reward($(Ref(env))[]) + println(io) - action_info = Dict() - for action in RLBase.action_space(env) - actions = fill(action, NUM_ENVS) - action_info[Symbol(action)] = BT.@benchmark $(Ref(env))[]($(Ref(actions))[]) + for key in keys(benchmarks) + println(io, "### " * String(key)) + title, separator, data = get_table(benchmarks[key]) + println(io, title) + println(io, separator) + println(io, data) + println(io) end - env_benchmark[:action_info] = action_info - name = Env.body.body.body.name.name - information[name] = env_benchmark + close(io) - @info "$(name) benchmark complete" + return nothing end - -write_benchmarks(information, "benchmark_batch.md") From bb048ccf351307fcccc3edcc4a4af6ebee5b41df Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 13:33:06 +0530 Subject: [PATCH 23/28] rename benchmark_batch.jl to benchmark_utils.jl --- benchmark/{benchmark_batch.jl => benchmark_utils.jl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename benchmark/{benchmark_batch.jl => benchmark_utils.jl} (100%) diff --git a/benchmark/benchmark_batch.jl b/benchmark/benchmark_utils.jl similarity index 100% rename from benchmark/benchmark_batch.jl rename to benchmark/benchmark_utils.jl From e8854e23c4b8f85cac0293f76cd4d3e7274c2eaa Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 14:29:06 +0530 Subject: [PATCH 24/28] add SingleRoomUndirected --- src/envs/envs.jl | 1 + src/envs/single_room_undirected.jl | 267 +++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 src/envs/single_room_undirected.jl diff --git a/src/envs/envs.jl b/src/envs/envs.jl index 840955c..c9efa9e 100644 --- a/src/envs/envs.jl +++ b/src/envs/envs.jl @@ -43,3 +43,4 @@ include("catcher.jl") include("transport.jl") include("collect_gems_undirected_multi_agent.jl") include("single_room_undirected_batch.jl") +include("single_room_undirected.jl") diff --git a/src/envs/single_room_undirected.jl b/src/envs/single_room_undirected.jl new file mode 100644 index 0000000..fb8be6c --- /dev/null +++ b/src/envs/single_room_undirected.jl @@ -0,0 +1,267 @@ +module ModuleSingleRoomUndirected + +import Crayons +import ..GridWorlds as GW +import ..Play +import Random +import REPL +import ReinforcementLearningBase as RLBase +import StaticArrays as SA +import StatsBase as SB + +const MOVE_UP = 1 +const MOVE_DOWN = 2 +const MOVE_LEFT = 3 +const MOVE_RIGHT = 4 +const ACTION_NAMES = (:MOVE_UP, :MOVE_DOWN, :MOVE_LEFT, :MOVE_RIGHT) + +const AGENT = 1 +const WALL = 2 +const GOAL = 3 + +const DUMMY_CHARACTER = '⋅' +const CHARACTERS = ('☻', '█', '♥') +const FOREGROUND_COLORS = (:light_red, :white, :light_red) + +function sample_two_positions_without_replacement(rng, region) + position1 = rand(rng, region) + position2 = rand(rng, region) + + while position1 == position2 + position2 = rand(rng, region) + end + + return position1, position2 +end + +function move(action::Integer, i, j) + if action == MOVE_UP + return i - 1, j + elseif action == MOVE_DOWN + return i + 1, j + elseif action == MOVE_LEFT + return i, j - 1 + elseif action == MOVE_RIGHT + return i, j + 1 + else + return i, j + end +end + +struct SingleRoomUndirected{I, R, RNG} <: GW.AbstractGridWorld + tile_map::BitArray{3} + agent_position::SA.MVector{2, I} + reward::Ref{R} + rng::RNG + done::Ref{Bool} + terminal_reward::R + goal_position::SA.MVector{2, I} +end + +function SingleRoomUndirected(; I = Int32, R = Float32, height = 8, width = 8, rng = Random.MersenneTwister()) + tile_map = BitArray(undef, 3, height, width) + agent_position = SA.MVector{2, I}(undef) + reward = Ref{R}() + done = Ref{Bool}() + goal_position = SA.MVector{2, I}(undef) + terminal_reward = one(R) + + inner_area = CartesianIndices((2 : height - 1, 2 : width - 1)) + + tile_map[:, :, :] .= false + tile_map[WALL, 1, :] .= true + tile_map[WALL, height, :] .= true + tile_map[WALL, :, 1] .= true + tile_map[WALL, :, width] .= true + + random_positions = sample_two_positions_without_replacement(rng, inner_area) + + agent_position[1] = random_positions[1][1] + agent_position[2] = random_positions[1][2] + tile_map[AGENT, random_positions[1]] = true + + goal_position[1] = random_positions[2][1] + goal_position[2] = random_positions[2][2] + tile_map[GOAL, random_positions[2]] = true + + reward[] = zero(R) + done[] = false + + env = SingleRoomUndirected(tile_map, agent_position, reward, rng, done, terminal_reward, goal_position) + + RLBase.reset!(env) + + return env +end + +RLBase.StateStyle(env::SingleRoomUndirected) = RLBase.InternalState{Any}() +RLBase.state_space(env::SingleRoomUndirected, ::RLBase.InternalState) = nothing +RLBase.state(env::SingleRoomUndirected, ::RLBase.InternalState) = env.tile_map + +RLBase.action_space(env::SingleRoomUndirected) = (MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT) +RLBase.reward(env::SingleRoomUndirected) = env.reward[] +RLBase.is_terminated(env::SingleRoomUndirected) = env.done[] + +function RLBase.reset!(env::SingleRoomUndirected{I, R}) where {I, R} + tile_map = env.tile_map + agent_position = env.agent_position + goal_position = env.goal_position + reward = env.reward + done = env.done + rng = env.rng + + num_objects, height, width = size(tile_map) + inner_area = CartesianIndices((2 : height - 1, 2 : width - 1)) + + tile_map[AGENT, agent_position...] = false + tile_map[GOAL, goal_position...] = false + + random_positions = sample_two_positions_without_replacement(rng, inner_area) + + agent_position[1] = random_positions[1][1] + agent_position[2] = random_positions[1][2] + tile_map[AGENT, random_positions[1]] = true + + goal_position[1] = random_positions[2][1] + goal_position[2] = random_positions[2][2] + tile_map[GOAL, random_positions[2]] = true + + reward[] = zero(R) + done[] = false + + return nothing +end + +function (env::SingleRoomUndirected{I, R})(action) where {I, R} + tile_map = env.tile_map + agent_position = env.agent_position + goal_position = env.goal_position + reward = env.reward + done = env.done + rng = env.rng + terminal_reward = env.terminal_reward + + current_position_i = agent_position[1] + current_position_j = agent_position[2] + next_position_i, next_position_j = move(action, current_position_i, current_position_j) + + if !tile_map[WALL, next_position_i, next_position_j] + tile_map[AGENT, current_position_i, current_position_j] = false + agent_position[1] = next_position_i + agent_position[2] = next_position_j + tile_map[AGENT, next_position_i, next_position_j] = true + end + + if tile_map[GOAL, agent_position...] + reward[] = terminal_reward + done[] = true + else + reward[] = zero(R) + done[] = false + end + + return nothing +end + +function Base.show(io::IO, ::MIME"text/plain", env::SingleRoomUndirected) + tile_map = env.tile_map + reward = env.reward + done = env.done + + num_objects, height, width = size(tile_map) + + print(io, "objects = ") + for i in 1 : length(CHARACTERS) + print(io, Crayons.Crayon(foreground = FOREGROUND_COLORS[i]), CHARACTERS[i], Crayons.Crayon(reset = true)) + if i < length(CHARACTERS) + print(io, ", ") + else + print(io, "\n") + end + end + println(io, "dummy character = ", DUMMY_CHARACTER) + + println(io) + for i in 1:height + for j in 1:width + idx = findfirst(@view tile_map[:, i, j]) + if isnothing(idx) + print(io, DUMMY_CHARACTER) + else + print(io, Crayons.Crayon(foreground = FOREGROUND_COLORS[idx]), CHARACTERS[idx], Crayons.Crayon(reset = true)) + end + end + + println(io) + end + + println(io, "reward = ", reward[]) + println(io, "done = ", done[]) + + return nothing +end + +get_string_key_bindings(env::SingleRoomUndirected) = """Key bindings: + 'q': quit + 'r': RLBase.reset!(env) + 'w': MOVE_UP + 's': MOVE_DOWN + 'a': MOVE_LEFT + 'd': MOVE_RIGHT + """ + +function play!(terminal::REPL.Terminals.UnixTerminal, env::SingleRoomUndirected; file_name::Union{Nothing, AbstractString} = nothing) + REPL.Terminals.raw!(terminal, true) + + terminal_out = terminal.out_stream + terminal_in = terminal.in_stream + file = Play.open_maybe(file_name) + + Play.write_io1_maybe_io2(terminal_out, file, Play.CLEAR_SCREEN) + Play.write_io1_maybe_io2(terminal_out, file, Play.MOVE_CURSOR_TO_ORIGIN) + Play.write_io1_maybe_io2(terminal_out, file, Play.HIDE_CURSOR) + + action_chars = ('w', 's', 'a', 'd') + + char_to_action = Dict('w' => MOVE_UP, + 's' => MOVE_DOWN, + 'a' => MOVE_LEFT, + 'd' => MOVE_RIGHT, + ) + + try + while true + Play.write_io1_maybe_io2(terminal_out, file, get_string_key_bindings(env)) + Play.show_io1_maybe_io2(terminal_out, file, MIME("text/plain"), env) + + char = read(terminal_in, Char) + + Play.write_io1_maybe_io2(terminal_out, file, Play.EMPTY_SCREEN) + + if char == 'q' + Play.write_io1_maybe_io2(terminal_out, file, Play.SHOW_CURSOR) + Play.close_maybe(file) + REPL.Terminals.raw!(terminal, false) + return nothing + elseif char == 'r' + RLBase.reset!(env) + elseif char in action_chars + env(char_to_action[char]) + else + @warn "No procedure exists for this character: $char" + end + + Play.write_io1_maybe_io2(terminal_out, file, "Last character = $(char)\n") + end + finally + Play.write_io1_maybe_io2(terminal_out, file, Play.SHOW_CURSOR) + Play.close_maybe(file) + REPL.Terminals.raw!(terminal, false) + end + + return nothing +end + +play!(env::SingleRoomUndirected; file_name = nothing) = play!(REPL.TerminalMenus.terminal, env, file_name = file_name) + +end # module From 9f3b1bde252b881207a13b21a77ddc64deab22f0 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 14:44:11 +0530 Subject: [PATCH 25/28] ignore generated benchmark files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index aea776a..24cb5fb 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,5 @@ Manifest.toml *.swp /src/scratchpad.jl + +/benchmark/20* From b73095834f20ff3ada203090999af3cf747ad0ee Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 15:24:26 +0530 Subject: [PATCH 26/28] add benchmarking for non-batch envs --- benchmark/benchmark_utils.jl | 116 ++++++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 15 deletions(-) diff --git a/benchmark/benchmark_utils.jl b/benchmark/benchmark_utils.jl index 1b395ea..5b03da2 100644 --- a/benchmark/benchmark_utils.jl +++ b/benchmark/benchmark_utils.jl @@ -9,9 +9,25 @@ const STEPS_PER_RESET = 100 const NUM_RESETS = 100 const NUM_ENVS = 64 -ENVS = [GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch] +ENVS = [GW.ModuleSingleRoomUndirected.SingleRoomUndirected] +BATCH_ENVS = [GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch] -function run_random_policy!(env, num_resets, steps_per_reset) +function run_random_policy_env!(env, num_resets, steps_per_reset) + for _ in 1:num_resets + RLBase.reset!(env) + for _ in 1:steps_per_reset + state = RLBase.state(env) + action = rand(RLBase.action_space(env)) + env(action) + is_terminated = RLBase.is_terminated(env) + reward = RLBase.reward(env) + end + end + + return nothing +end + +function run_random_policy_batch_env!(env, num_resets, steps_per_reset) num_envs = size(env.tile_map, 4) action = Array{eltype(RLBase.action_space(env))}(undef, num_envs) for _ in 1:num_resets @@ -30,15 +46,40 @@ function run_random_policy!(env, num_resets, steps_per_reset) return nothing end -function compile_envs(Envs) - for Env in Envs - env = Env(num_envs = NUM_ENVS) - run_random_policy!(env, NUM_RESETS, STEPS_PER_RESET) +# function compile_envs(Envs, num_resets, steps_per_reset) + # for Env in Envs + # env = Env() + # run_random_policy!(env, num_resets, steps_per_reset) + # end + + # @info "Compiled and ran all environments" + + # return nothing +# end + +function benchmark_env(Env, num_resets, steps_per_reset) + benchmark = DS.OrderedDict() + + parent_module = parentmodule(Env) + + env = Env() + + benchmark[:random_policy] = BT.@benchmark run_random_policy_env!($(Ref(env))[], $(Ref(num_resets))[], $(Ref(steps_per_reset))[]) + benchmark[:reset] = BT.@benchmark RLBase.reset!($(Ref(env))[]) + benchmark[:state] = BT.@benchmark RLBase.state($(Ref(env))[]) + + for action in RLBase.action_space(env) + action_name = parent_module.ACTION_NAMES[action] + benchmark[action_name] = BT.@benchmark $(Ref(env))[]($(Ref(action))[]) end - @info "Compiled and ran all environments" + benchmark[:action_space] = BT.@benchmark RLBase.action_space($(Ref(env))[]) + benchmark[:is_terminated] = BT.@benchmark RLBase.is_terminated($(Ref(env))[]) + benchmark[:reward] = BT.@benchmark RLBase.reward($(Ref(env))[]) - return nothing + @info "$(nameof(Env)) benchmarked" + + return benchmark end function benchmark_batch_env(Env, num_resets, steps_per_reset, num_envs) @@ -48,7 +89,7 @@ function benchmark_batch_env(Env, num_resets, steps_per_reset, num_envs) env = Env(num_envs = num_envs) - benchmark[:random_policy] = BT.@benchmark run_random_policy!($(Ref(env))[], $(Ref(num_resets))[], $(Ref(steps_per_reset))[]) + benchmark[:random_policy] = BT.@benchmark run_random_policy_batch_env!($(Ref(env))[], $(Ref(num_resets))[], $(Ref(steps_per_reset))[]) benchmark[:reset] = BT.@benchmark RLBase.reset!($(Ref(env))[], force = true) benchmark[:state] = BT.@benchmark RLBase.state($(Ref(env))[]) @@ -67,6 +108,18 @@ function benchmark_batch_env(Env, num_resets, steps_per_reset, num_envs) return benchmark end +function benchmark_envs(Envs, num_resets, steps_per_reset) + benchmarks = DS.OrderedDict() + + for Env in Envs + benchmarks[nameof(Env)] = benchmark_env(Env, num_resets, steps_per_reset) + end + + @info "benchmark_envs complete" + + return benchmarks +end + function benchmark_batch_envs(Envs, num_resets, steps_per_reset, num_envs) benchmarks = DS.OrderedDict() @@ -74,7 +127,7 @@ function benchmark_batch_envs(Envs, num_resets, steps_per_reset, num_envs) benchmarks[nameof(Env)] = benchmark_batch_env(Env, num_resets, steps_per_reset, num_envs) end - @info "All benchmarks complete" + @info "benchmark_batch_envs complete" return benchmarks end @@ -101,7 +154,7 @@ function get_table(benchmark) return title, separator, data end -function generate_benchmark_file_batch_envs(Envs, num_resets, steps_per_reset, num_envs, file_name = nothing) +function generate_benchmark_file(benchmarks; file_name = nothing) date = Dates.format(Dates.now(), "yyyy_mm_dd_HH_MM_SS") if isnothing(file_name) @@ -110,13 +163,11 @@ function generate_benchmark_file_batch_envs(Envs, num_resets, steps_per_reset, n io = open(file_name, "w") - benchmarks = benchmark_batch_envs(Envs, num_resets, steps_per_reset, num_envs) - println(io, "Date: $(date)") println(io, "## List of Environments") - for Env in ENVS - name_string = String(nameof(Env)) + for key in keys(benchmarks) + name_string = String(key) println(io, " 1. [$(name_string)](#$(lowercase(name_string)))") end @@ -135,3 +186,38 @@ function generate_benchmark_file_batch_envs(Envs, num_resets, steps_per_reset, n return nothing end + +# function generate_benchmark_file_batch_envs(Envs, num_resets, steps_per_reset, num_envs; file_name = nothing) + # date = Dates.format(Dates.now(), "yyyy_mm_dd_HH_MM_SS") + + # if isnothing(file_name) + # file_name = date * ".md" + # end + + # io = open(file_name, "w") + + # benchmarks = benchmark_batch_envs(Envs, num_resets, steps_per_reset, num_envs) + + # println(io, "Date: $(date)") + # println(io, "## List of Environments") + + # for Env in Envs + # name_string = String(nameof(Env)) + # println(io, " 1. [$(name_string)](#$(lowercase(name_string)))") + # end + + # println(io) + + # for key in keys(benchmarks) + # println(io, "### " * String(key)) + # title, separator, data = get_table(benchmarks[key]) + # println(io, title) + # println(io, separator) + # println(io, data) + # println(io) + # end + + # close(io) + + # return nothing +# end From e2f313fe87ab8fb1901e1a8e94001439d9bb0c83 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 15:55:27 +0530 Subject: [PATCH 27/28] make SingleRoomUndirected mutable and improve performance --- src/envs/single_room_undirected.jl | 95 +++++++++++------------------- 1 file changed, 35 insertions(+), 60 deletions(-) diff --git a/src/envs/single_room_undirected.jl b/src/envs/single_room_undirected.jl index fb8be6c..7be382f 100644 --- a/src/envs/single_room_undirected.jl +++ b/src/envs/single_room_undirected.jl @@ -48,23 +48,18 @@ function move(action::Integer, i, j) end end -struct SingleRoomUndirected{I, R, RNG} <: GW.AbstractGridWorld +mutable struct SingleRoomUndirected{R, RNG} <: GW.AbstractGridWorld tile_map::BitArray{3} - agent_position::SA.MVector{2, I} - reward::Ref{R} + agent_position::CartesianIndex{2} + reward::R rng::RNG - done::Ref{Bool} + done::Bool terminal_reward::R - goal_position::SA.MVector{2, I} + goal_position::CartesianIndex{2} end -function SingleRoomUndirected(; I = Int32, R = Float32, height = 8, width = 8, rng = Random.MersenneTwister()) +function SingleRoomUndirected(; R = Float32, height = 8, width = 8, rng = Random.MersenneTwister()) tile_map = BitArray(undef, 3, height, width) - agent_position = SA.MVector{2, I}(undef) - reward = Ref{R}() - done = Ref{Bool}() - goal_position = SA.MVector{2, I}(undef) - terminal_reward = one(R) inner_area = CartesianIndices((2 : height - 1, 2 : width - 1)) @@ -74,18 +69,14 @@ function SingleRoomUndirected(; I = Int32, R = Float32, height = 8, width = 8, r tile_map[WALL, :, 1] .= true tile_map[WALL, :, width] .= true - random_positions = sample_two_positions_without_replacement(rng, inner_area) - - agent_position[1] = random_positions[1][1] - agent_position[2] = random_positions[1][2] - tile_map[AGENT, random_positions[1]] = true + agent_position, goal_position = sample_two_positions_without_replacement(rng, inner_area) - goal_position[1] = random_positions[2][1] - goal_position[2] = random_positions[2][2] - tile_map[GOAL, random_positions[2]] = true + tile_map[AGENT, agent_position] = true + tile_map[GOAL, goal_position] = true - reward[] = zero(R) - done[] = false + reward = zero(R) + done = false + terminal_reward = one(R) env = SingleRoomUndirected(tile_map, agent_position, reward, rng, done, terminal_reward, goal_position) @@ -102,62 +93,48 @@ RLBase.action_space(env::SingleRoomUndirected) = (MOVE_UP, MOVE_DOWN, MOVE_LEFT, RLBase.reward(env::SingleRoomUndirected) = env.reward[] RLBase.is_terminated(env::SingleRoomUndirected) = env.done[] -function RLBase.reset!(env::SingleRoomUndirected{I, R}) where {I, R} +function RLBase.reset!(env::SingleRoomUndirected{R}) where {R} tile_map = env.tile_map - agent_position = env.agent_position - goal_position = env.goal_position - reward = env.reward - done = env.done rng = env.rng num_objects, height, width = size(tile_map) inner_area = CartesianIndices((2 : height - 1, 2 : width - 1)) - tile_map[AGENT, agent_position...] = false - tile_map[GOAL, goal_position...] = false + tile_map[AGENT, env.agent_position] = false + tile_map[GOAL, env.goal_position] = false - random_positions = sample_two_positions_without_replacement(rng, inner_area) + new_agent_position, new_goal_position = sample_two_positions_without_replacement(rng, inner_area) - agent_position[1] = random_positions[1][1] - agent_position[2] = random_positions[1][2] - tile_map[AGENT, random_positions[1]] = true + env.agent_position = new_agent_position + tile_map[AGENT, new_agent_position] = true - goal_position[1] = random_positions[2][1] - goal_position[2] = random_positions[2][2] - tile_map[GOAL, random_positions[2]] = true + env.goal_position = new_goal_position + tile_map[GOAL, new_goal_position] = true - reward[] = zero(R) - done[] = false + env.reward = zero(R) + env.done = false return nothing end -function (env::SingleRoomUndirected{I, R})(action) where {I, R} +function (env::SingleRoomUndirected{R})(action) where {R} tile_map = env.tile_map agent_position = env.agent_position - goal_position = env.goal_position - reward = env.reward - done = env.done - rng = env.rng - terminal_reward = env.terminal_reward - current_position_i = agent_position[1] - current_position_j = agent_position[2] - next_position_i, next_position_j = move(action, current_position_i, current_position_j) + new_agent_position = CartesianIndex(move(action, agent_position.I...)) - if !tile_map[WALL, next_position_i, next_position_j] - tile_map[AGENT, current_position_i, current_position_j] = false - agent_position[1] = next_position_i - agent_position[2] = next_position_j - tile_map[AGENT, next_position_i, next_position_j] = true + if !tile_map[WALL, new_agent_position] + tile_map[AGENT, agent_position] = false + env.agent_position = new_agent_position + tile_map[AGENT, new_agent_position] = true end - if tile_map[GOAL, agent_position...] - reward[] = terminal_reward - done[] = true + if tile_map[GOAL, env.agent_position] + env.reward = env.terminal_reward + done = true else - reward[] = zero(R) - done[] = false + env.reward = zero(R) + done = false end return nothing @@ -165,8 +142,6 @@ end function Base.show(io::IO, ::MIME"text/plain", env::SingleRoomUndirected) tile_map = env.tile_map - reward = env.reward - done = env.done num_objects, height, width = size(tile_map) @@ -195,8 +170,8 @@ function Base.show(io::IO, ::MIME"text/plain", env::SingleRoomUndirected) println(io) end - println(io, "reward = ", reward[]) - println(io, "done = ", done[]) + println(io, "reward = ", env.reward) + println(io, "done = ", env.done) return nothing end From 6d114c66ee57fec610b539d3a2e5d146b99dfaa0 Mon Sep 17 00:00:00 2001 From: Siddharth Bhatia Date: Thu, 24 Jun 2021 16:05:09 +0530 Subject: [PATCH 28/28] remove constants NUM_RESETS, STEPS_PER_EPISODE, NUM_ENVS --- benchmark/benchmark_utils.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/benchmark/benchmark_utils.jl b/benchmark/benchmark_utils.jl index 5b03da2..8e21b2d 100644 --- a/benchmark/benchmark_utils.jl +++ b/benchmark/benchmark_utils.jl @@ -5,10 +5,6 @@ import GridWorlds as GW import ReinforcementLearningBase as RLBase import Statistics -const STEPS_PER_RESET = 100 -const NUM_RESETS = 100 -const NUM_ENVS = 64 - ENVS = [GW.ModuleSingleRoomUndirected.SingleRoomUndirected] BATCH_ENVS = [GW.ModuleSingleRoomUndirectedBatch.SingleRoomUndirectedBatch] @@ -95,7 +91,7 @@ function benchmark_batch_env(Env, num_resets, steps_per_reset, num_envs) for action in RLBase.action_space(env) action_name = parent_module.ACTION_NAMES[action] - batch_action = fill(action, NUM_ENVS) + batch_action = fill(action, num_envs) benchmark[action_name] = BT.@benchmark $(Ref(env))[]($(Ref(batch_action))[]) end