Skip to content

Commit b54a0b0

Browse files
Fix Timer bug (#915)
* Fix variable naming * Bug fix version bump * Add debug test * Add import statement
1 parent 038b59e commit b54a0b0

File tree

4 files changed

+59
-39
lines changed

4 files changed

+59
-39
lines changed

src/ReinforcementLearningCore/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "ReinforcementLearningCore"
22
uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
3-
version = "0.11.1"
3+
version = "0.11.2"
44

55
[deps]
66
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

src/ReinforcementLearningCore/src/core/run.jl

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -88,37 +88,37 @@ function _run(policy::AbstractPolicy,
8888
is_stop = false
8989
while !is_stop
9090
# NOTE: @timeit_debug statements are used for debug logging
91-
@timeit_debug to "reset!" reset!(env)
92-
@timeit_debug to "push!(policy) PreEpisodeStage" push!(policy, PreEpisodeStage(), env)
93-
@timeit_debug to "optimise! PreEpisodeStage" optimise!(policy, PreEpisodeStage())
94-
@timeit_debug to "push!(hook) PreEpisodeStage" push!(hook, PreEpisodeStage(), policy, env)
91+
@timeit_debug timer "reset!" reset!(env)
92+
@timeit_debug timer "push!(policy) PreEpisodeStage" push!(policy, PreEpisodeStage(), env)
93+
@timeit_debug timer "optimise! PreEpisodeStage" optimise!(policy, PreEpisodeStage())
94+
@timeit_debug timer "push!(hook) PreEpisodeStage" push!(hook, PreEpisodeStage(), policy, env)
9595

9696

9797
while !reset_condition(policy, env) # one episode
98-
@timeit_debug to "push!(policy) PreActStage" push!(policy, PreActStage(), env)
99-
@timeit_debug to "optimise! PreActStage" optimise!(policy, PreActStage())
100-
@timeit_debug to "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
98+
@timeit_debug timer "push!(policy) PreActStage" push!(policy, PreActStage(), env)
99+
@timeit_debug timer "optimise! PreActStage" optimise!(policy, PreActStage())
100+
@timeit_debug timer "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
101101

102-
action = @timeit_debug to "plan!" RLBase.plan!(policy, env)
103-
@timeit_debug to "act!" act!(env, action)
102+
action = @timeit_debug timer "plan!" RLBase.plan!(policy, env)
103+
@timeit_debug timer "act!" act!(env, action)
104104

105-
@timeit_debug to "push!(policy) PostActStage" push!(policy, PostActStage(), env)
106-
@timeit_debug to "optimise! PostActStage" optimise!(policy, PostActStage())
107-
@timeit_debug to "push!(hook) PostActStage" push!(hook, PostActStage(), policy, env)
105+
@timeit_debug timer "push!(policy) PostActStage" push!(policy, PostActStage(), env)
106+
@timeit_debug timer "optimise! PostActStage" optimise!(policy, PostActStage())
107+
@timeit_debug timer "push!(hook) PostActStage" push!(hook, PostActStage(), policy, env)
108108

109109
if check_stop(stop_condition, policy, env)
110110
is_stop = true
111-
@timeit_debug to "push!(policy) PreActStage" push!(policy, PreActStage(), env)
112-
@timeit_debug to "optimise! PreActStage" optimise!(policy, PreActStage())
113-
@timeit_debug to "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
114-
@timeit_debug to "plan!" RLBase.plan!(policy, env) # let the policy see the last observation
111+
@timeit_debug timer "push!(policy) PreActStage" push!(policy, PreActStage(), env)
112+
@timeit_debug timer "optimise! PreActStage" optimise!(policy, PreActStage())
113+
@timeit_debug timer "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
114+
@timeit_debug timer "plan!" RLBase.plan!(policy, env) # let the policy see the last observation
115115
break
116116
end
117117
end # end of an episode
118118

119-
@timeit_debug to "push!(policy) PostEpisodeStage" push!(policy, PostEpisodeStage(), env) # let the policy see the last observation
120-
@timeit_debug to "optimise! PostEpisodeStage" optimise!(policy, PostEpisodeStage())
121-
@timeit_debug to "push!(hook) PostEpisodeStage" push!(hook, PostEpisodeStage(), policy, env)
119+
@timeit_debug timer "push!(policy) PostEpisodeStage" push!(policy, PostEpisodeStage(), env) # let the policy see the last observation
120+
@timeit_debug timer "optimise! PostEpisodeStage" optimise!(policy, PostEpisodeStage())
121+
@timeit_debug timer "push!(hook) PostEpisodeStage" push!(hook, PostEpisodeStage(), policy, env)
122122

123123
end
124124
push!(policy, PostExperimentStage(), env)

src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -109,34 +109,34 @@ function Base.run(
109109
is_stop = false
110110
while !is_stop
111111
# NOTE: @timeit_debug statements are for debug logging
112-
@timeit_debug to "reset!" reset!(env)
113-
@timeit_debug to "push!(policy) PreEpisodeStage" push!(multiagent_policy, PreEpisodeStage(), env)
114-
@timeit_debug to "optimise! PreEpisodeStage" optimise!(multiagent_policy, PreEpisodeStage())
115-
@timeit_debug to "push!(hook) PreEpisodeStage" push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
112+
@timeit_debug timer "reset!" reset!(env)
113+
@timeit_debug timer "push!(policy) PreEpisodeStage" push!(multiagent_policy, PreEpisodeStage(), env)
114+
@timeit_debug timer "optimise! PreEpisodeStage" optimise!(multiagent_policy, PreEpisodeStage())
115+
@timeit_debug timer "push!(hook) PreEpisodeStage" push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
116116

117117
while !(reset_condition(multiagent_policy, env) || is_stop) # one episode
118118
for player in CurrentPlayerIterator(env)
119119
policy = multiagent_policy[player] # Select appropriate policy
120120
hook = multiagent_hook[player] # Select appropriate hook
121-
@timeit_debug to "push!(policy) PreActStage" push!(policy, PreActStage(), env)
122-
@timeit_debug to "optimise! PreActStage" optimise!(policy, PreActStage())
123-
@timeit_debug to "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
121+
@timeit_debug timer "push!(policy) PreActStage" push!(policy, PreActStage(), env)
122+
@timeit_debug timer "optimise! PreActStage" optimise!(policy, PreActStage())
123+
@timeit_debug timer "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
124124

125-
action = @timeit_debug to "plan!" RLBase.plan!(policy, env)
126-
@timeit_debug to "act!" act!(env, action)
125+
action = @timeit_debug timer "plan!" RLBase.plan!(policy, env)
126+
@timeit_debug timer "act!" act!(env, action)
127127

128128

129129

130-
@timeit_debug to "push!(policy) PostActStage" push!(policy, PostActStage(), env)
131-
@timeit_debug to "optimise! PostActStage" optimise!(policy, PostActStage())
132-
@timeit_debug to "push!(hook) PostActStage" push!(hook, PostActStage(), policy, env)
130+
@timeit_debug timer "push!(policy) PostActStage" push!(policy, PostActStage(), env)
131+
@timeit_debug timer "optimise! PostActStage" optimise!(policy, PostActStage())
132+
@timeit_debug timer "push!(hook) PostActStage" push!(hook, PostActStage(), policy, env)
133133

134134
if check_stop(stop_condition, policy, env)
135135
is_stop = true
136-
@timeit_debug to "push!(policy) PreActStage" push!(multiagent_policy, PreActStage(), env)
137-
@timeit_debug to "optimise! PreActStage" optimise!(multiagent_policy, PreActStage())
138-
@timeit_debug to "push!(hook) PreActStage" push!(multiagent_hook, PreActStage(), policy, env)
139-
@timeit_debug to "plan!" RLBase.plan!(multiagent_policy, env) # let the policy see the last observation
136+
@timeit_debug timer "push!(policy) PreActStage" push!(multiagent_policy, PreActStage(), env)
137+
@timeit_debug timer "optimise! PreActStage" optimise!(multiagent_policy, PreActStage())
138+
@timeit_debug timer "push!(hook) PreActStage" push!(multiagent_hook, PreActStage(), policy, env)
139+
@timeit_debug timer "plan!" RLBase.plan!(multiagent_policy, env) # let the policy see the last observation
140140
break
141141
end
142142

@@ -146,9 +146,9 @@ function Base.run(
146146
end
147147
end # end of an episode
148148

149-
@timeit_debug to "push!(policy) PostEpisodeStage" push!(multiagent_policy, PostEpisodeStage(), env) # let the policy see the last observation
150-
@timeit_debug to "optimise! PostEpisodeStage" optimise!(multiagent_policy, PostEpisodeStage())
151-
@timeit_debug to "push!(hook) PostEpisodeStage" push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
149+
@timeit_debug timer "push!(policy) PostEpisodeStage" push!(multiagent_policy, PostEpisodeStage(), env) # let the policy see the last observation
150+
@timeit_debug timer "optimise! PostEpisodeStage" optimise!(multiagent_policy, PostEpisodeStage())
151+
@timeit_debug timer "push!(hook) PostEpisodeStage" push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
152152
end
153153
push!(multiagent_policy, PostExperimentStage(), env)
154154
push!(multiagent_hook, PostExperimentStage(), multiagent_policy, env)

src/ReinforcementLearningCore/test/core/base.jl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using ReinforcementLearningCore: SRT
22
using ReinforcementLearningBase
3+
using TimerOutputs
34

45
@testset "core" begin
56
@testset "simple workflow" begin
@@ -54,4 +55,23 @@ using ReinforcementLearningBase
5455
@test sum(hook[]) == length(agent.trajectory.container)
5556
end
5657
end
58+
59+
@testset "Debug Timer" begin
60+
RLCore.TimerOutputs.enable_debug_timings(RLCore)
61+
62+
env = RandomWalk1D()
63+
agent = Agent(
64+
RandomPolicy(legal_action_space(env)),
65+
Trajectory(
66+
CircularArraySARTTraces(; capacity = 1_000),
67+
BatchSampler(1),
68+
InsertSampleRatioController(n_inserted = -1),
69+
),
70+
SRT{Any, Any, Any}(),
71+
)
72+
stop_condition = StopAfterStep(123; is_show_progress=false)
73+
hook = StepsPerEpisode()
74+
run(agent, env, stop_condition, hook)
75+
@test RLCore.timer isa TimerOutputs.TimerOutput
76+
end
5777
end

0 commit comments

Comments
 (0)