Fix Timer bug (#915)

jeremiahpslewis · web-flow · commit b54a0b0e66e2 · 2023-06-30T15:35:59.000+02:00
* Fix variable naming

* Bug fix version bump

* Add debug test

* Add import statement
diff --git a/src/ReinforcementLearningCore/Project.toml b/src/ReinforcementLearningCore/Project.toml
@@ -1,6 +1,6 @@
 name = "ReinforcementLearningCore"
 uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
-version = "0.11.1"
+version = "0.11.2"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
diff --git a/src/ReinforcementLearningCore/src/core/run.jl b/src/ReinforcementLearningCore/src/core/run.jl
@@ -88,37 +88,37 @@ function _run(policy::AbstractPolicy,
     is_stop = false
     while !is_stop
         # NOTE: @timeit_debug statements are used for debug logging
-        @timeit_debug to "reset!"                            reset!(env)
-        @timeit_debug to "push!(policy) PreEpisodeStage"     push!(policy, PreEpisodeStage(), env)
-        @timeit_debug to "optimise! PreEpisodeStage"         optimise!(policy, PreEpisodeStage())
-        @timeit_debug to "push!(hook) PreEpisodeStage"       push!(hook, PreEpisodeStage(), policy, env)
+        @timeit_debug timer "reset!"                            reset!(env)
+        @timeit_debug timer "push!(policy) PreEpisodeStage"     push!(policy, PreEpisodeStage(), env)
+        @timeit_debug timer "optimise! PreEpisodeStage"         optimise!(policy, PreEpisodeStage())
+        @timeit_debug timer "push!(hook) PreEpisodeStage"       push!(hook, PreEpisodeStage(), policy, env)
 
 
         while !reset_condition(policy, env) # one episode
-            @timeit_debug to "push!(policy) PreActStage"     push!(policy, PreActStage(), env)
-            @timeit_debug to "optimise! PreActStage"         optimise!(policy, PreActStage())
-            @timeit_debug to "push!(hook) PreActStage"       push!(hook, PreActStage(), policy, env)
+            @timeit_debug timer "push!(policy) PreActStage"     push!(policy, PreActStage(), env)
+            @timeit_debug timer "optimise! PreActStage"         optimise!(policy, PreActStage())
+            @timeit_debug timer "push!(hook) PreActStage"       push!(hook, PreActStage(), policy, env)
 
-            action = @timeit_debug to "plan!"                RLBase.plan!(policy, env)
-            @timeit_debug to "act!"                          act!(env, action)
+            action = @timeit_debug timer "plan!"                RLBase.plan!(policy, env)
+            @timeit_debug timer "act!"                          act!(env, action)
 
-            @timeit_debug to "push!(policy) PostActStage"    push!(policy, PostActStage(), env)
-            @timeit_debug to "optimise! PostActStage"        optimise!(policy, PostActStage())
-            @timeit_debug to "push!(hook) PostActStage"      push!(hook, PostActStage(), policy, env)
+            @timeit_debug timer "push!(policy) PostActStage"    push!(policy, PostActStage(), env)
+            @timeit_debug timer "optimise! PostActStage"        optimise!(policy, PostActStage())
+            @timeit_debug timer "push!(hook) PostActStage"      push!(hook, PostActStage(), policy, env)
 
             if check_stop(stop_condition, policy, env)
                 is_stop = true
-                @timeit_debug to "push!(policy) PreActStage"   push!(policy, PreActStage(), env)
-                @timeit_debug to "optimise! PreActStage"       optimise!(policy, PreActStage())
-                @timeit_debug to "push!(hook) PreActStage"     push!(hook, PreActStage(), policy, env)
-                @timeit_debug to "plan!"                       RLBase.plan!(policy, env)  # let the policy see the last observation
+                @timeit_debug timer "push!(policy) PreActStage"   push!(policy, PreActStage(), env)
+                @timeit_debug timer "optimise! PreActStage"       optimise!(policy, PreActStage())
+                @timeit_debug timer "push!(hook) PreActStage"     push!(hook, PreActStage(), policy, env)
+                @timeit_debug timer "plan!"                       RLBase.plan!(policy, env)  # let the policy see the last observation
                 break
             end
         end # end of an episode
 
-        @timeit_debug to "push!(policy) PostEpisodeStage"      push!(policy, PostEpisodeStage(), env)  # let the policy see the last observation
-        @timeit_debug to "optimise! PostEpisodeStage"          optimise!(policy, PostEpisodeStage())
-        @timeit_debug to "push!(hook) PostEpisodeStage"        push!(hook, PostEpisodeStage(), policy, env)
+        @timeit_debug timer "push!(policy) PostEpisodeStage"      push!(policy, PostEpisodeStage(), env)  # let the policy see the last observation
+        @timeit_debug timer "optimise! PostEpisodeStage"          optimise!(policy, PostEpisodeStage())
+        @timeit_debug timer "push!(hook) PostEpisodeStage"        push!(hook, PostEpisodeStage(), policy, env)
 
     end
     push!(policy, PostExperimentStage(), env)
diff --git a/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl b/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl
@@ -109,34 +109,34 @@ function Base.run(
     is_stop = false
     while !is_stop
         # NOTE: @timeit_debug statements are for debug logging
-        @timeit_debug to "reset!"                             reset!(env)
-        @timeit_debug to "push!(policy) PreEpisodeStage"      push!(multiagent_policy, PreEpisodeStage(), env)
-        @timeit_debug to "optimise! PreEpisodeStage"          optimise!(multiagent_policy, PreEpisodeStage())
-        @timeit_debug to "push!(hook) PreEpisodeStage"        push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
+        @timeit_debug timer "reset!"                             reset!(env)
+        @timeit_debug timer "push!(policy) PreEpisodeStage"      push!(multiagent_policy, PreEpisodeStage(), env)
+        @timeit_debug timer "optimise! PreEpisodeStage"          optimise!(multiagent_policy, PreEpisodeStage())
+        @timeit_debug timer "push!(hook) PreEpisodeStage"        push!(multiagent_hook, PreEpisodeStage(), multiagent_policy, env)
 
         while !(reset_condition(multiagent_policy, env) || is_stop) # one episode
             for player in CurrentPlayerIterator(env)
                 policy = multiagent_policy[player] # Select appropriate policy
                 hook = multiagent_hook[player] # Select appropriate hook
-                @timeit_debug to "push!(policy) PreActStage"    push!(policy, PreActStage(), env)
-                @timeit_debug to "optimise! PreActStage"        optimise!(policy, PreActStage())
-                @timeit_debug to "push!(hook) PreActStage"      push!(hook, PreActStage(), policy, env)
+                @timeit_debug timer "push!(policy) PreActStage"    push!(policy, PreActStage(), env)
+                @timeit_debug timer "optimise! PreActStage"        optimise!(policy, PreActStage())
+                @timeit_debug timer "push!(hook) PreActStage"      push!(hook, PreActStage(), policy, env)
                 
-                action = @timeit_debug to "plan!"               RLBase.plan!(policy, env)
-                @timeit_debug to "act!" act!(env, action)
+                action = @timeit_debug timer "plan!"               RLBase.plan!(policy, env)
+                @timeit_debug timer "act!" act!(env, action)
 
                 
 
-                @timeit_debug to "push!(policy) PostActStage"     push!(policy, PostActStage(), env)
-                @timeit_debug to "optimise! PostActStage"         optimise!(policy, PostActStage())
-                @timeit_debug to "push!(hook) PostActStage"       push!(hook, PostActStage(), policy, env)
+                @timeit_debug timer "push!(policy) PostActStage"     push!(policy, PostActStage(), env)
+                @timeit_debug timer "optimise! PostActStage"         optimise!(policy, PostActStage())
+                @timeit_debug timer "push!(hook) PostActStage"       push!(hook, PostActStage(), policy, env)
 
                 if check_stop(stop_condition, policy, env)
                     is_stop = true
-                    @timeit_debug to "push!(policy) PreActStage"  push!(multiagent_policy, PreActStage(), env)
-                    @timeit_debug to "optimise! PreActStage"      optimise!(multiagent_policy, PreActStage())
-                    @timeit_debug to "push!(hook) PreActStage"    push!(multiagent_hook, PreActStage(), policy, env)
-                    @timeit_debug to "plan!"                      RLBase.plan!(multiagent_policy, env)  # let the policy see the last observation
+                    @timeit_debug timer "push!(policy) PreActStage"  push!(multiagent_policy, PreActStage(), env)
+                    @timeit_debug timer "optimise! PreActStage"      optimise!(multiagent_policy, PreActStage())
+                    @timeit_debug timer "push!(hook) PreActStage"    push!(multiagent_hook, PreActStage(), policy, env)
+                    @timeit_debug timer "plan!"                      RLBase.plan!(multiagent_policy, env)  # let the policy see the last observation
                     break
                 end
 
@@ -146,9 +146,9 @@ function Base.run(
             end
         end # end of an episode
 
-        @timeit_debug to "push!(policy) PostEpisodeStage"         push!(multiagent_policy, PostEpisodeStage(), env)  # let the policy see the last observation
-        @timeit_debug to "optimise! PostEpisodeStage"             optimise!(multiagent_policy, PostEpisodeStage())
-        @timeit_debug to "push!(hook) PostEpisodeStage"           push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
+        @timeit_debug timer "push!(policy) PostEpisodeStage"         push!(multiagent_policy, PostEpisodeStage(), env)  # let the policy see the last observation
+        @timeit_debug timer "optimise! PostEpisodeStage"             optimise!(multiagent_policy, PostEpisodeStage())
+        @timeit_debug timer "push!(hook) PostEpisodeStage"           push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env)
     end
     push!(multiagent_policy, PostExperimentStage(), env)
     push!(multiagent_hook, PostExperimentStage(), multiagent_policy, env)
diff --git a/src/ReinforcementLearningCore/test/core/base.jl b/src/ReinforcementLearningCore/test/core/base.jl
@@ -1,5 +1,6 @@
 using ReinforcementLearningCore: SRT
 using ReinforcementLearningBase
+using TimerOutputs
 
 @testset "core" begin
     @testset "simple workflow" begin
@@ -54,4 +55,23 @@ using ReinforcementLearningBase
             @test sum(hook[]) == length(agent.trajectory.container)
         end        
     end
+
+    @testset "Debug Timer" begin
+        RLCore.TimerOutputs.enable_debug_timings(RLCore)
+
+        env = RandomWalk1D()
+        agent = Agent(
+            RandomPolicy(legal_action_space(env)),
+            Trajectory(
+                CircularArraySARTTraces(; capacity = 1_000),
+                BatchSampler(1),
+                InsertSampleRatioController(n_inserted = -1),
+            ),
+            SRT{Any, Any, Any}(),
+        )            
+        stop_condition = StopAfterStep(123; is_show_progress=false)
+        hook = StepsPerEpisode()
+        run(agent, env, stop_condition, hook)
+        @test RLCore.timer isa TimerOutputs.TimerOutput
+    end
 end