Skip to content

Commit 91d80e9

Browse files
Add performance testing run loop (#914)
1 parent 72d6766 commit 91d80e9

File tree

5 files changed

+50
-38
lines changed

5 files changed

+50
-38
lines changed

docs/src/tips.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,8 @@ dependency, remember to update both `docs/Project.toml` and
2727
All the cells after the `#+ tangle=true` line in `Your_Experment.jl` will be extracted into the
2828
`ReinforcementLearningExperiments` package automatically. This feature is
2929
supported by [Weave.jl](https://weavejl.mpastell.com/stable/usage/#tangle).
30+
31+
## How to enable debug timings for experiment runs?
32+
33+
Call `RLCore.TimerOutputs.enable_debug_timings(RLCore)` and default timings for hooks, policies and optimization steps will be printed. How do I reset the timer? Call `RLCore.TimerOutputs.reset_timer!(RLCore.timer)`. How do I show the timer results? Call `RLCore.timer`.
34+

src/ReinforcementLearningCore/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
2222
ReinforcementLearningTrajectories = "6486599b-a3cd-4e92-a99a-2cea90cc8c3c"
2323
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
2424
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
25+
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
2526
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
2627

2728
[compat]

src/ReinforcementLearningCore/src/ReinforcementLearningCore.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module ReinforcementLearningCore
22

3+
using TimerOutputs
34
using ReinforcementLearningBase
45
using Reexport
56

@@ -14,4 +15,7 @@ include("core/core.jl")
1415
include("policies/policies.jl")
1516
include("utils/utils.jl")
1617

18+
# Global timer for TimerOutputs.jl
19+
const timer = TimerOutput()
20+
1721
end # module

src/ReinforcementLearningCore/src/core/run.jl

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -87,37 +87,38 @@ function _run(policy::AbstractPolicy,
8787
push!(policy, PreExperimentStage(), env)
8888
is_stop = false
8989
while !is_stop
90-
reset!(env)
91-
push!(policy, PreEpisodeStage(), env)
92-
optimise!(policy, PreEpisodeStage())
93-
push!(hook, PreEpisodeStage(), policy, env)
90+
# NOTE: @timeit_debug statements are used for debug logging
91+
@timeit_debug to "reset!" reset!(env)
92+
@timeit_debug to "push!(policy) PreEpisodeStage" push!(policy, PreEpisodeStage(), env)
93+
@timeit_debug to "optimise! PreEpisodeStage" optimise!(policy, PreEpisodeStage())
94+
@timeit_debug to "push!(hook) PreEpisodeStage" push!(hook, PreEpisodeStage(), policy, env)
9495

9596

9697
while !reset_condition(policy, env) # one episode
97-
push!(policy, PreActStage(), env)
98-
optimise!(policy, PreActStage())
99-
push!(hook, PreActStage(), policy, env)
98+
@timeit_debug to "push!(policy) PreActStage" push!(policy, PreActStage(), env)
99+
@timeit_debug to "optimise! PreActStage" optimise!(policy, PreActStage())
100+
@timeit_debug to "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
100101

101-
action = RLBase.plan!(policy, env)
102-
act!(env, action)
102+
action = @timeit_debug to "plan!" RLBase.plan!(policy, env)
103+
@timeit_debug to "act!" act!(env, action)
103104

104-
push!(policy, PostActStage(), env)
105-
optimise!(policy, PostActStage())
106-
push!(hook, PostActStage(), policy, env)
105+
@timeit_debug to "push!(policy) PostActStage" push!(policy, PostActStage(), env)
106+
@timeit_debug to "optimise! PostActStage" optimise!(policy, PostActStage())
107+
@timeit_debug to "push!(hook) PostActStage" push!(hook, PostActStage(), policy, env)
107108

108109
if check_stop(stop_condition, policy, env)
109110
is_stop = true
110-
push!(policy, PreActStage(), env)
111-
optimise!(policy, PreActStage())
112-
push!(hook, PreActStage(), policy, env)
113-
RLBase.plan!(policy, env) # let the policy see the last observation
111+
@timeit_debug to "push!(policy) PreActStage" push!(policy, PreActStage(), env)
112+
@timeit_debug to "optimise! PreActStage" optimise!(policy, PreActStage())
113+
@timeit_debug to "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
114+
@timeit_debug to "plan!" RLBase.plan!(policy, env) # let the policy see the last observation
114115
break
115116
end
116117
end # end of an episode
117118

118-
push!(policy, PostEpisodeStage(), env) # let the policy see the last observation
119-
optimise!(policy, PostEpisodeStage())
120-
push!(hook, PostEpisodeStage(), policy, env)
119+
@timeit_debug to "push!(policy) PostEpisodeStage" push!(policy, PostEpisodeStage(), env) # let the policy see the last observation
120+
@timeit_debug to "optimise! PostEpisodeStage" optimise!(policy, PostEpisodeStage())
121+
@timeit_debug to "push!(hook) PostEpisodeStage" push!(hook, PostEpisodeStage(), policy, env)
121122

122123
end
123124
push!(policy, PostExperimentStage(), env)

src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -108,34 +108,35 @@ function Base.run(
108108
push!(multiagent_policy, PreExperimentStage(), env)
109109
is_stop = false
110110
while !is_stop
111-
reset!(env)
112-
push!(multiagent_policy, PreEpisodeStage(), env)
113-
optimise!(multiagent_policy, PreEpisodeStage())
114-
push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
111+
# NOTE: @timeit_debug statements are for debug logging
112+
@timeit_debug to "reset!" reset!(env)
113+
@timeit_debug to "push!(policy) PreEpisodeStage" push!(multiagent_policy, PreEpisodeStage(), env)
114+
@timeit_debug to "optimise! PreEpisodeStage" optimise!(multiagent_policy, PreEpisodeStage())
115+
@timeit_debug to "push!(hook) PreEpisodeStage" push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
115116

116117
while !(reset_condition(multiagent_policy, env) || is_stop) # one episode
117118
for player in CurrentPlayerIterator(env)
118119
policy = multiagent_policy[player] # Select appropriate policy
119120
hook = multiagent_hook[player] # Select appropriate hook
120-
push!(policy, PreActStage(), env)
121-
optimise!(policy, PreActStage())
122-
push!(hook, PreActStage(), policy, env)
121+
@timeit_debug to "push!(policy) PreActStage" push!(policy, PreActStage(), env)
122+
@timeit_debug to "optimise! PreActStage" optimise!(policy, PreActStage())
123+
@timeit_debug to "push!(hook) PreActStage" push!(hook, PreActStage(), policy, env)
123124

124-
action = RLBase.plan!(policy, env)
125-
act!(env, action)
125+
action = @timeit_debug to "plan!" RLBase.plan!(policy, env)
126+
@timeit_debug to "act!" act!(env, action)
126127

127128

128129

129-
push!(policy, PostActStage(), env)
130-
optimise!(policy, PostActStage())
131-
push!(hook, PostActStage(), policy, env)
130+
@timeit_debug to "push!(policy) PostActStage" push!(policy, PostActStage(), env)
131+
@timeit_debug to "optimise! PostActStage" optimise!(policy, PostActStage())
132+
@timeit_debug to "push!(hook) PostActStage" push!(hook, PostActStage(), policy, env)
132133

133134
if check_stop(stop_condition, policy, env)
134135
is_stop = true
135-
push!(multiagent_policy, PreActStage(), env)
136-
optimise!(multiagent_policy, PreActStage())
137-
push!(multiagent_hook, PreActStage(), policy, env)
138-
RLBase.plan!(multiagent_policy, env) # let the policy see the last observation
136+
@timeit_debug to "push!(policy) PreActStage" push!(multiagent_policy, PreActStage(), env)
137+
@timeit_debug to "optimise! PreActStage" optimise!(multiagent_policy, PreActStage())
138+
@timeit_debug to "push!(hook) PreActStage" push!(multiagent_hook, PreActStage(), policy, env)
139+
@timeit_debug to "plan!" RLBase.plan!(multiagent_policy, env) # let the policy see the last observation
139140
break
140141
end
141142

@@ -145,9 +146,9 @@ function Base.run(
145146
end
146147
end # end of an episode
147148

148-
push!(multiagent_policy, PostEpisodeStage(), env) # let the policy see the last observation
149-
optimise!(multiagent_policy, PostEpisodeStage())
150-
push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
149+
@timeit_debug to "push!(policy) PostEpisodeStage" push!(multiagent_policy, PostEpisodeStage(), env) # let the policy see the last observation
150+
@timeit_debug to "optimise! PostEpisodeStage" optimise!(multiagent_policy, PostEpisodeStage())
151+
@timeit_debug to "push!(hook) PostEpisodeStage" push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
151152
end
152153
push!(multiagent_policy, PostExperimentStage(), env)
153154
push!(multiagent_hook, PostExperimentStage(), multiagent_policy, env)

0 commit comments

Comments
 (0)