Add monitor keyword argument parameter (#304)

araffin · web-flow · commit 0c5becd3142a · 2022-10-24T15:39:14.000+02:00
* Allow custom monitor kwargso

* Add test and doc

* Update changelog

* Fix test

* Update README

* Allow `python -m rl_zoo3.cli` to be called directly
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,11 +1,13 @@
-## Release 1.7.0a0 (WIP)
+## Release 1.7.0a1 (WIP)
 
 ### Breaking Changes
 
 ### New Features
 - Specifying custom policies in yaml file is now supported (@Rick-v-E)
+- Added ``monitor_kwargs`` parameter
 
 ### Bug fixes
+- Allow `python -m rl_zoo3.cli` to be called directly
 
 ### Documentation
 
diff --git a/README.md b/README.md
@@ -296,6 +296,23 @@ env_wrapper:
 
 Note that you can easily specify parameters too.
 
+By default, the environment is wrapped with a `Monitor` wrapper to record episode statistics.
+You can specify arguments to it using `monitor_kwargs` parameter to log additional data.
+That data *must* be present in the info dictionary at the last step of each episode.
+
+For instance, for recording success with goal envs (e.g. `FetchReach-v1`):
+
+```yaml
+monitor_kwargs: dict(info_keywords=('is_success',))
+```
+
+or recording final x position with `Ant-v3`:
+```yaml
+monitor_kwargs: dict(info_keywords=('x_position',))
+```
+
+Note: for known `GoalEnv` like `FetchReach`, `info_keywords=('is_success',)` is actually the default.
+
 ## VecEnvWrapper
 
 You can specify which `VecEnvWrapper` to use in the config, the same way as for env wrappers (see above), using the `vec_env_wrapper` key:
diff --git a/rl_zoo3/cli.py b/rl_zoo3/cli.py
@@ -20,3 +20,7 @@ def main():
     if script_name not in known_scripts.keys():
         raise ValueError(f"The script {script_name} is unknown, please use one of {known_scripts.keys()}")
     known_scripts[script_name]()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py
@@ -135,6 +135,7 @@ def __init__(
         self.n_envs = 1  # it will be updated when reading hyperparams
         self.n_actions = None  # For DDPG/TD3 action noise objects
         self._hyperparams = {}
+        self.monitor_kwargs = {}
 
         self.trained_agent = trained_agent
         self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(trained_agent)
@@ -381,6 +382,14 @@ def _preprocess_hyperparams(
             if kwargs_key in hyperparams.keys() and isinstance(hyperparams[kwargs_key], str):
                 hyperparams[kwargs_key] = eval(hyperparams[kwargs_key])
 
+        # Preprocess monitor kwargs
+        if "monitor_kwargs" in hyperparams.keys():
+            self.monitor_kwargs = hyperparams["monitor_kwargs"]
+            # Convert str to python code
+            if isinstance(self.monitor_kwargs, str):
+                self.monitor_kwargs = eval(self.monitor_kwargs)
+            del hyperparams["monitor_kwargs"]
+
         # Delete keys so the dict can be pass to the model constructor
         if "n_envs" in hyperparams.keys():
             del hyperparams["n_envs"]
@@ -550,14 +559,14 @@ def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False)
         # Do not log eval env (issue with writing the same file)
         log_dir = None if eval_env or no_log else self.save_path
 
-        monitor_kwargs = {}
         # Special case for GoalEnvs: log success rate too
         if (
             "Neck" in self.env_name.gym_id
             or self.is_robotics_env(self.env_name.gym_id)
             or "parking-v0" in self.env_name.gym_id
+            and len(self.monitor_kwargs) == 0  # do not overwrite custom kwargs
         ):
-            monitor_kwargs = dict(info_keywords=("is_success",))
+            self.monitor_kwargs = dict(info_keywords=("is_success",))
 
         # On most env, SubprocVecEnv does not help and is quite memory hungry
         # therefore we use DummyVecEnv by default
@@ -570,7 +579,7 @@ def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False)
             wrapper_class=self.env_wrapper,
             vec_env_cls=self.vec_env_class,
             vec_env_kwargs=self.vec_env_kwargs,
-            monitor_kwargs=monitor_kwargs,
+            monitor_kwargs=self.monitor_kwargs,
         )
 
         if self.vec_env_wrapper is not None:
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
@@ -1 +1 @@
-1.7.0a0
+1.7.0a1
diff --git a/tests/test_train.py b/tests/test_train.py
@@ -96,6 +96,8 @@ def test_parallel_train(tmp_path):
         "--log-folder",
         tmp_path,
         "-params",
+        # Test custom argument for the monitor too
+        "monitor_kwargs:'dict(info_keywords=(\"TimeLimit.truncated\",))'",
         "callback:'rl_zoo3.callbacks.ParallelTrainCallback'",
     ]
 

Original file line number	Diff line number	Diff line change
`@@ -96,6 +96,8 @@ def test_parallel_train(tmp_path):`
`96`	`96`	`"--log-folder",`
`97`	`97`	`tmp_path,`
`98`	`98`	`"-params",`
	`99`	`+ # Test custom argument for the monitor too`
	`100`	`+ "monitor_kwargs:'dict(info_keywords=(\"TimeLimit.truncated\",))'",`
`99`	`101`	`"callback:'rl_zoo3.callbacks.ParallelTrainCallback'",`
`100`	`102`	`]`
`101`	`103`