Supports sb3 wrapper to pre-process env's image obs-space to trigger sb3 natively supported cnn creation pipeline (#2812)

ooctipus · kellyguo11 · web-flow · commit 3692acedd61c · 2025-07-09T21:20:11.000-07:00
# Description This PR modifies the SB3 Wrapper so that it utilizes the SB3 natively supported encoder creation on properly defined composite observation space, SB3's automatic CNN encoding will apply when 1. if observation space of that term is of shape, GrayScale, RGB, or RGBD 2. if agent_cfg has normalized flag, expects data to have channel as the first dimension, and data pre_normalized 3. if agent doesn't has normalized flag, expects data to have space min=0, max=255, dtype=uint8 This PR makes sure the sb3 wrapper adjust environment image term to meet either condition 2 or condition 3 by looking at space's min and max, so sb3 creation pipeline will be applied automatically. ## Type of change  - New feature (non-breaking change which adds functionality) ## Screenshots Please attach before and after screenshots of the change if applicable.  ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  Co-authored-by: Kelly Guo <kellyg@nvidia.com>
diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.1.7"
+version = "0.1.8"
 
 # Description
 title = "Isaac Lab RL"
diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst
@@ -1,6 +1,16 @@
 Changelog
 ---------
 
+0.1.8 (2025-06-29)
+~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Support SB3 VecEnv wrapper to configure with composite observation spaces properly so that the cnn creation pipelines
+  natively supported by sb3 can be automatically triggered
+
+
 0.1.7 (2025-06-30)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_rl/isaaclab_rl/sb3.py b/source/isaaclab_rl/isaaclab_rl/sb3.py
@@ -25,6 +25,7 @@
 import warnings
 from typing import Any
 
+from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
 from stable_baselines3.common.utils import constant_fn
 from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvObs, VecEnvStepReturn
 
@@ -156,17 +157,8 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, fast_variant: bool = Tr
         self.num_envs = self.unwrapped.num_envs
         self.sim_device = self.unwrapped.device
         self.render_mode = self.unwrapped.render_mode
-
-        # obtain gym spaces
-        # note: stable-baselines3 does not like when we have unbounded action space so
-        #   we set it to some high value here. Maybe this is not general but something to think about.
-        observation_space = self.unwrapped.single_observation_space["policy"]
-        action_space = self.unwrapped.single_action_space
-        if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
-            action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)
-
-        # initialize vec-env
-        VecEnv.__init__(self, self.num_envs, observation_space, action_space)
+        self.observation_processors = {}
+        self._process_spaces()
         # add buffer for logging episodic information
         self._ep_rew_buf = np.zeros(self.num_envs)
         self._ep_len_buf = np.zeros(self.num_envs)
@@ -303,14 +295,68 @@ def get_images(self):  # noqa: D102
     Helper functions.
     """
 
+    def _process_spaces(self):
+        # process observation space
+        observation_space = self.unwrapped.single_observation_space["policy"]
+        if isinstance(observation_space, gym.spaces.Dict):
+            for obs_key, obs_space in observation_space.spaces.items():
+                processors: list[callable[[torch.Tensor], Any]] = []
+                # assume normalized, if not, it won't pass is_image_space, which check [0-255].
+                # for scale like image space that has right shape but not scaled, we will scale it later
+                if is_image_space(obs_space, check_channels=True, normalized_image=True):
+                    actually_normalized = np.all(obs_space.low == -1.0) and np.all(obs_space.high == 1.0)
+                    if not actually_normalized:
+                        if np.any(obs_space.low != 0) or np.any(obs_space.high != 255):
+                            raise ValueError(
+                                "Your image observation is not normalized in environment, and will not be"
+                                "normalized by sb3 if its min is not 0 and max is not 255."
+                            )
+                        # sb3 will handle normalization and transpose, but sb3 expects uint8 images
+                        if obs_space.dtype != np.uint8:
+                            processors.append(lambda obs: obs.to(torch.uint8))
+                        observation_space.spaces[obs_key] = gym.spaces.Box(0, 255, obs_space.shape, np.uint8)
+                    else:
+                        # sb3 will NOT handle the normalization, while sb3 will transpose, its transpose applies to all
+                        # image terms and maybe non-ideal, more, if we can do it in torch on gpu, it will be faster then
+                        # sb3 transpose it in numpy with cpu.
+                        if not is_image_space_channels_first(obs_space):
+
+                            def tranp(img: torch.Tensor) -> torch.Tensor:
+                                return img.permute(2, 0, 1) if len(img.shape) == 3 else img.permute(0, 3, 1, 2)
+
+                            processors.append(tranp)
+                            h, w, c = obs_space.shape
+                            observation_space.spaces[obs_key] = gym.spaces.Box(-1.0, 1.0, (c, h, w), obs_space.dtype)
+
+                    def chained_processor(obs: torch.Tensor, procs=processors) -> Any:
+                        for proc in procs:
+                            obs = proc(obs)
+                        return obs
+
+                    # add processor to the dictionary
+                    if len(processors) > 0:
+                        self.observation_processors[obs_key] = chained_processor
+
+        # obtain gym spaces
+        # note: stable-baselines3 does not like when we have unbounded action space so
+        #   we set it to some high value here. Maybe this is not general but something to think about.
+        action_space = self.unwrapped.single_action_space
+        if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
+            action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)
+
+        # initialize vec-env
+        VecEnv.__init__(self, self.num_envs, observation_space, action_space)
+
     def _process_obs(self, obs_dict: torch.Tensor | dict[str, torch.Tensor]) -> np.ndarray | dict[str, np.ndarray]:
         """Convert observations into NumPy data type."""
         # Sb3 doesn't support asymmetric observation spaces, so we only use "policy"
         obs = obs_dict["policy"]
         # note: ManagerBasedRLEnv uses torch backend (by default).
         if isinstance(obs, dict):
             for key, value in obs.items():
-                obs[key] = value.detach().cpu().numpy()
+                if key in self.observation_processors:
+                    obs[key] = self.observation_processors[key](value)
+                obs[key] = obs[key].detach().cpu().numpy()
         elif isinstance(obs, torch.Tensor):
             obs = obs.detach().cpu().numpy()
         else: