Skip to content

Commit 3692ace

Browse files
ooctipuskellyguo11
andauthored
Supports sb3 wrapper to pre-process env's image obs-space to trigger sb3 natively supported cnn creation pipeline (#2812)
# Description This PR modifies the SB3 Wrapper so that it utilizes the SB3 natively supported encoder creation on properly defined composite observation space, SB3's automatic CNN encoding will apply when 1. if observation space of that term is of shape, GrayScale, RGB, or RGBD 2. if agent_cfg has normalized flag, expects data to have channel as the first dimension, and data pre_normalized 3. if agent doesn't has normalized flag, expects data to have space min=0, max=255, dtype=uint8 This PR makes sure the sb3 wrapper adjust environment image term to meet either condition 2 or condition 3 by looking at space's min and max, so sb3 creation pipeline will be applied automatically. ## Type of change <!-- As you go through the list, delete the ones that are not applicable. --> - New feature (non-breaking change which adds functionality) ## Screenshots Please attach before and after screenshots of the change if applicable. <!-- Example: | Before | After | | ------ | ----- | | _gif/png before_ | _gif/png after_ | To upload images to a PR -- simply drag and drop an image while in edit mode and it should upload the image directly. You can then paste that source into the above before/after sections. --> ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there <!-- As you go through the checklist above, you can mark something as done by putting an x character in it For example, - [x] I have done this task - [ ] I have not done this task --> Co-authored-by: Kelly Guo <kellyg@nvidia.com>
1 parent d02d3b8 commit 3692ace

File tree

3 files changed

+69
-13
lines changed

3 files changed

+69
-13
lines changed

source/isaaclab_rl/config/extension.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
# Note: Semantic Versioning is used: https://semver.org/
4-
version = "0.1.7"
4+
version = "0.1.8"
55

66
# Description
77
title = "Isaac Lab RL"

source/isaaclab_rl/docs/CHANGELOG.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
Changelog
22
---------
33

4+
0.1.8 (2025-06-29)
5+
~~~~~~~~~~~~~~~~~~
6+
7+
Added
8+
^^^^^
9+
10+
* Support SB3 VecEnv wrapper to configure with composite observation spaces properly so that the cnn creation pipelines
11+
natively supported by sb3 can be automatically triggered
12+
13+
414
0.1.7 (2025-06-30)
515
~~~~~~~~~~~~~~~~~~
616

source/isaaclab_rl/isaaclab_rl/sb3.py

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import warnings
2626
from typing import Any
2727

28+
from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
2829
from stable_baselines3.common.utils import constant_fn
2930
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvObs, VecEnvStepReturn
3031

@@ -156,17 +157,8 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, fast_variant: bool = Tr
156157
self.num_envs = self.unwrapped.num_envs
157158
self.sim_device = self.unwrapped.device
158159
self.render_mode = self.unwrapped.render_mode
159-
160-
# obtain gym spaces
161-
# note: stable-baselines3 does not like when we have unbounded action space so
162-
# we set it to some high value here. Maybe this is not general but something to think about.
163-
observation_space = self.unwrapped.single_observation_space["policy"]
164-
action_space = self.unwrapped.single_action_space
165-
if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
166-
action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)
167-
168-
# initialize vec-env
169-
VecEnv.__init__(self, self.num_envs, observation_space, action_space)
160+
self.observation_processors = {}
161+
self._process_spaces()
170162
# add buffer for logging episodic information
171163
self._ep_rew_buf = np.zeros(self.num_envs)
172164
self._ep_len_buf = np.zeros(self.num_envs)
@@ -303,14 +295,68 @@ def get_images(self): # noqa: D102
303295
Helper functions.
304296
"""
305297

298+
def _process_spaces(self):
299+
# process observation space
300+
observation_space = self.unwrapped.single_observation_space["policy"]
301+
if isinstance(observation_space, gym.spaces.Dict):
302+
for obs_key, obs_space in observation_space.spaces.items():
303+
processors: list[callable[[torch.Tensor], Any]] = []
304+
# assume normalized, if not, it won't pass is_image_space, which check [0-255].
305+
# for scale like image space that has right shape but not scaled, we will scale it later
306+
if is_image_space(obs_space, check_channels=True, normalized_image=True):
307+
actually_normalized = np.all(obs_space.low == -1.0) and np.all(obs_space.high == 1.0)
308+
if not actually_normalized:
309+
if np.any(obs_space.low != 0) or np.any(obs_space.high != 255):
310+
raise ValueError(
311+
"Your image observation is not normalized in environment, and will not be"
312+
"normalized by sb3 if its min is not 0 and max is not 255."
313+
)
314+
# sb3 will handle normalization and transpose, but sb3 expects uint8 images
315+
if obs_space.dtype != np.uint8:
316+
processors.append(lambda obs: obs.to(torch.uint8))
317+
observation_space.spaces[obs_key] = gym.spaces.Box(0, 255, obs_space.shape, np.uint8)
318+
else:
319+
# sb3 will NOT handle the normalization, while sb3 will transpose, its transpose applies to all
320+
# image terms and maybe non-ideal, more, if we can do it in torch on gpu, it will be faster then
321+
# sb3 transpose it in numpy with cpu.
322+
if not is_image_space_channels_first(obs_space):
323+
324+
def tranp(img: torch.Tensor) -> torch.Tensor:
325+
return img.permute(2, 0, 1) if len(img.shape) == 3 else img.permute(0, 3, 1, 2)
326+
327+
processors.append(tranp)
328+
h, w, c = obs_space.shape
329+
observation_space.spaces[obs_key] = gym.spaces.Box(-1.0, 1.0, (c, h, w), obs_space.dtype)
330+
331+
def chained_processor(obs: torch.Tensor, procs=processors) -> Any:
332+
for proc in procs:
333+
obs = proc(obs)
334+
return obs
335+
336+
# add processor to the dictionary
337+
if len(processors) > 0:
338+
self.observation_processors[obs_key] = chained_processor
339+
340+
# obtain gym spaces
341+
# note: stable-baselines3 does not like when we have unbounded action space so
342+
# we set it to some high value here. Maybe this is not general but something to think about.
343+
action_space = self.unwrapped.single_action_space
344+
if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
345+
action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)
346+
347+
# initialize vec-env
348+
VecEnv.__init__(self, self.num_envs, observation_space, action_space)
349+
306350
def _process_obs(self, obs_dict: torch.Tensor | dict[str, torch.Tensor]) -> np.ndarray | dict[str, np.ndarray]:
307351
"""Convert observations into NumPy data type."""
308352
# Sb3 doesn't support asymmetric observation spaces, so we only use "policy"
309353
obs = obs_dict["policy"]
310354
# note: ManagerBasedRLEnv uses torch backend (by default).
311355
if isinstance(obs, dict):
312356
for key, value in obs.items():
313-
obs[key] = value.detach().cpu().numpy()
357+
if key in self.observation_processors:
358+
obs[key] = self.observation_processors[key](value)
359+
obs[key] = obs[key].detach().cpu().numpy()
314360
elif isinstance(obs, torch.Tensor):
315361
obs = obs.detach().cpu().numpy()
316362
else:

0 commit comments

Comments
 (0)