Add performance testing run loop (#914)

jeremiahpslewis · web-flow · commit 91d80e97af49 · 2023-06-29T16:35:53.000+02:00
diff --git a/docs/src/tips.md b/docs/src/tips.md
@@ -27,3 +27,8 @@ dependency, remember to update both `docs/Project.toml` and
     All the cells after the `#+ tangle=true` line in `Your_Experment.jl` will be extracted into the
     `ReinforcementLearningExperiments` package automatically. This feature is
     supported by [Weave.jl](https://weavejl.mpastell.com/stable/usage/#tangle).
+
+## How to enable debug timings for experiment runs?
+
+Call `RLCore.TimerOutputs.enable_debug_timings(RLCore)` and default timings for hooks, policies and optimization steps will be printed. How do I reset the timer? Call `RLCore.TimerOutputs.reset_timer!(RLCore.timer)`. How do I show the timer results? Call `RLCore.timer`.
+
diff --git a/src/ReinforcementLearningCore/Project.toml b/src/ReinforcementLearningCore/Project.toml
@@ -22,6 +22,7 @@ ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
 ReinforcementLearningTrajectories = "6486599b-a3cd-4e92-a99a-2cea90cc8c3c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 
 [compat]
diff --git a/src/ReinforcementLearningCore/src/ReinforcementLearningCore.jl b/src/ReinforcementLearningCore/src/ReinforcementLearningCore.jl
@@ -1,5 +1,6 @@
 module ReinforcementLearningCore
 
+using TimerOutputs
 using ReinforcementLearningBase
 using Reexport
 
@@ -14,4 +15,7 @@ include("core/core.jl")
 include("policies/policies.jl")
 include("utils/utils.jl")
 
+# Global timer for TimerOutputs.jl
+const timer = TimerOutput()
+
 end # module
diff --git a/src/ReinforcementLearningCore/src/core/run.jl b/src/ReinforcementLearningCore/src/core/run.jl
@@ -87,37 +87,38 @@ function _run(policy::AbstractPolicy,
     push!(policy, PreExperimentStage(), env)
     is_stop = false
     while !is_stop
-        reset!(env)
-        push!(policy, PreEpisodeStage(), env)
-        optimise!(policy, PreEpisodeStage())
-        push!(hook, PreEpisodeStage(), policy, env)
+        # NOTE: @timeit_debug statements are used for debug logging
+        @timeit_debug to "reset!"                            reset!(env)
+        @timeit_debug to "push!(policy) PreEpisodeStage"     push!(policy, PreEpisodeStage(), env)
+        @timeit_debug to "optimise! PreEpisodeStage"         optimise!(policy, PreEpisodeStage())
+        @timeit_debug to "push!(hook) PreEpisodeStage"       push!(hook, PreEpisodeStage(), policy, env)
 
 
         while !reset_condition(policy, env) # one episode
-            push!(policy, PreActStage(), env)
-            optimise!(policy, PreActStage())
-            push!(hook, PreActStage(), policy, env)
+            @timeit_debug to "push!(policy) PreActStage"     push!(policy, PreActStage(), env)
+            @timeit_debug to "optimise! PreActStage"         optimise!(policy, PreActStage())
+            @timeit_debug to "push!(hook) PreActStage"       push!(hook, PreActStage(), policy, env)
 
-            action = RLBase.plan!(policy, env)
-            act!(env, action)
+            action = @timeit_debug to "plan!"                RLBase.plan!(policy, env)
+            @timeit_debug to "act!"                          act!(env, action)
 
-            push!(policy, PostActStage(), env)
-            optimise!(policy, PostActStage())
-            push!(hook, PostActStage(), policy, env)
+            @timeit_debug to "push!(policy) PostActStage"    push!(policy, PostActStage(), env)
+            @timeit_debug to "optimise! PostActStage"        optimise!(policy, PostActStage())
+            @timeit_debug to "push!(hook) PostActStage"      push!(hook, PostActStage(), policy, env)
 
             if check_stop(stop_condition, policy, env)
                 is_stop = true
-                push!(policy, PreActStage(), env)
-                optimise!(policy, PreActStage())
-                push!(hook, PreActStage(), policy, env)
-                RLBase.plan!(policy, env)  # let the policy see the last observation
+                @timeit_debug to "push!(policy) PreActStage"   push!(policy, PreActStage(), env)
+                @timeit_debug to "optimise! PreActStage"       optimise!(policy, PreActStage())
+                @timeit_debug to "push!(hook) PreActStage"     push!(hook, PreActStage(), policy, env)
+                @timeit_debug to "plan!"                       RLBase.plan!(policy, env)  # let the policy see the last observation
                 break
             end
         end # end of an episode
 
-        push!(policy, PostEpisodeStage(), env)  # let the policy see the last observation
-        optimise!(policy, PostEpisodeStage())
-        push!(hook, PostEpisodeStage(), policy, env)
+        @timeit_debug to "push!(policy) PostEpisodeStage"      push!(policy, PostEpisodeStage(), env)  # let the policy see the last observation
+        @timeit_debug to "optimise! PostEpisodeStage"          optimise!(policy, PostEpisodeStage())
+        @timeit_debug to "push!(hook) PostEpisodeStage"        push!(hook, PostEpisodeStage(), policy, env)
 
     end
     push!(policy, PostExperimentStage(), env)
diff --git a/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl b/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl
@@ -108,34 +108,35 @@ function Base.run(
     push!(multiagent_policy, PreExperimentStage(), env)
     is_stop = false
     while !is_stop
-        reset!(env)
-        push!(multiagent_policy, PreEpisodeStage(), env)
-        optimise!(multiagent_policy, PreEpisodeStage())
-        push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
+        # NOTE: @timeit_debug statements are for debug logging
+        @timeit_debug to "reset!"                             reset!(env)
+        @timeit_debug to "push!(policy) PreEpisodeStage"      push!(multiagent_policy, PreEpisodeStage(), env)
+        @timeit_debug to "optimise! PreEpisodeStage"          optimise!(multiagent_policy, PreEpisodeStage())
+        @timeit_debug to "push!(hook) PreEpisodeStage"        push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
 
         while !(reset_condition(multiagent_policy, env) || is_stop) # one episode
             for player in CurrentPlayerIterator(env)
                 policy = multiagent_policy[player] # Select appropriate policy
                 hook = multiagent_hook[player] # Select appropriate hook
-                push!(policy, PreActStage(), env)
-                optimise!(policy, PreActStage())
-                push!(hook, PreActStage(), policy, env)
+                @timeit_debug to "push!(policy) PreActStage"    push!(policy, PreActStage(), env)
+                @timeit_debug to "optimise! PreActStage"        optimise!(policy, PreActStage())
+                @timeit_debug to "push!(hook) PreActStage"      push!(hook, PreActStage(), policy, env)
                 
-                action = RLBase.plan!(policy, env)
-                act!(env, action)
+                action = @timeit_debug to "plan!"               RLBase.plan!(policy, env)
+                @timeit_debug to "act!" act!(env, action)
 
                 
 
-                push!(policy, PostActStage(), env)
-                optimise!(policy, PostActStage())
-                push!(hook, PostActStage(), policy, env)
+                @timeit_debug to "push!(policy) PostActStage"     push!(policy, PostActStage(), env)
+                @timeit_debug to "optimise! PostActStage"         optimise!(policy, PostActStage())
+                @timeit_debug to "push!(hook) PostActStage"       push!(hook, PostActStage(), policy, env)
 
                 if check_stop(stop_condition, policy, env)
                     is_stop = true
-                    push!(multiagent_policy, PreActStage(), env)
-                    optimise!(multiagent_policy, PreActStage())
-                    push!(multiagent_hook, PreActStage(), policy, env)
-                    RLBase.plan!(multiagent_policy, env)  # let the policy see the last observation
+                    @timeit_debug to "push!(policy) PreActStage"  push!(multiagent_policy, PreActStage(), env)
+                    @timeit_debug to "optimise! PreActStage"      optimise!(multiagent_policy, PreActStage())
+                    @timeit_debug to "push!(hook) PreActStage"    push!(multiagent_hook, PreActStage(), policy, env)
+                    @timeit_debug to "plan!"                      RLBase.plan!(multiagent_policy, env)  # let the policy see the last observation
                     break
                 end
 
@@ -145,9 +146,9 @@ function Base.run(
             end
         end # end of an episode
 
-        push!(multiagent_policy, PostEpisodeStage(), env)  # let the policy see the last observation
-        optimise!(multiagent_policy, PostEpisodeStage())
-        push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
+        @timeit_debug to "push!(policy) PostEpisodeStage"         push!(multiagent_policy, PostEpisodeStage(), env)  # let the policy see the last observation
+        @timeit_debug to "optimise! PostEpisodeStage"             optimise!(multiagent_policy, PostEpisodeStage())
+        @timeit_debug to "push!(hook) PostEpisodeStage"           push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
     end
     push!(multiagent_policy, PostExperimentStage(), env)
     push!(multiagent_hook, PostExperimentStage(), multiagent_policy, env)