Skip to content

Support sb3 wrapper to pre-process env's image obs-space to trigger sb3 natively supported cnn creation pipeline #2812

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion source/isaaclab_rl/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.1.6"
version = "0.1.7"

# Description
title = "Isaac Lab RL"
Expand Down
12 changes: 12 additions & 0 deletions source/isaaclab_rl/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
Changelog
---------


0.1.7 (2025-06-29)
~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Support SB3 VecEnv wrapper to configure with composite observation spaces properly so that the cnn creation pipelines
natively supported by sb3 can be automatically triggered



0.1.6 (2025-06-26)
~~~~~~~~~~~~~~~~~~

Expand Down
69 changes: 57 additions & 12 deletions source/isaaclab_rl/isaaclab_rl/sb3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import warnings
from typing import Any

from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
from stable_baselines3.common.utils import constant_fn
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvObs, VecEnvStepReturn

Expand Down Expand Up @@ -156,17 +157,8 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, fast_variant: bool = Tr
self.num_envs = self.unwrapped.num_envs
self.sim_device = self.unwrapped.device
self.render_mode = self.unwrapped.render_mode

# obtain gym spaces
# note: stable-baselines3 does not like when we have unbounded action space so
# we set it to some high value here. Maybe this is not general but something to think about.
observation_space = self.unwrapped.single_observation_space["policy"]
action_space = self.unwrapped.single_action_space
if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)

# initialize vec-env
VecEnv.__init__(self, self.num_envs, observation_space, action_space)
self.observation_processors = {}
self._process_spaces()
# add buffer for logging episodic information
self._ep_rew_buf = np.zeros(self.num_envs)
self._ep_len_buf = np.zeros(self.num_envs)
Expand Down Expand Up @@ -303,14 +295,67 @@ def get_images(self): # noqa: D102
Helper functions.
"""

def _process_spaces(self):
# process observation space
observation_space = self.unwrapped.single_observation_space["policy"]
if isinstance(observation_space, gym.spaces.Dict):
for obs_key, obs_space in observation_space.spaces.items():
processors: list[callable[[torch.Tensor], Any]] = []
# assume normalized, if not, it won't pass is_image_space, which check [0-255].
# for scale like image space that has right shape but not scaled, we will scale it later
if is_image_space(obs_space, check_channels=True, normalized_image=True):
actually_normalized = np.all(obs_space.low == -1.0) and np.all(obs_space.high == 1.0)
if not actually_normalized:
if np.any(obs_space.low != 0) or np.any(obs_space.high != 255):
raise ValueError(
"Your image observation is not normalized in environment, and will not be"
"normalized by sb3 if its min is not 0 and max is not 255."
)
# sb3 will handle normalization and transpose, but sb3 expects uint8 images
if obs_space.dtype != np.uint8:
processors.append(lambda obs: obs.to(torch.uint8))
observation_space.spaces[obs_key] = gym.spaces.Box(0, 255, obs_space.shape, np.uint8)
else:
# sb3 will NOT handle the normalization, while sb3 will transpose, its transpose applies to all
# image terms and maybe non-ideal, more, if we can do it in torch on gpu, it will be faster then
# sb3 transpose it in numpy with cpu.
if not is_image_space_channels_first(obs_space):

def tranp(img: torch.Tensor) -> torch.Tensor:
return img.permute(2, 0, 1) if len(img.shape) == 3 else img.permute(0, 3, 1, 2)

processors.append(tranp)
h, w, c = obs_space.shape
observation_space.spaces[obs_key] = gym.spaces.Box(-1.0, 1.0, (c, h, w), obs_space.dtype)

def chained_processor(obs: torch.Tensor, procs=processors) -> Any:
for proc in procs:
obs = proc(obs)
return obs
# add processor to the dictionary
if len(processors) > 0:
self.observation_processors[obs_key] = chained_processor

# obtain gym spaces
# note: stable-baselines3 does not like when we have unbounded action space so
# we set it to some high value here. Maybe this is not general but something to think about.
action_space = self.unwrapped.single_action_space
if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)

# initialize vec-env
VecEnv.__init__(self, self.num_envs, observation_space, action_space)

def _process_obs(self, obs_dict: torch.Tensor | dict[str, torch.Tensor]) -> np.ndarray | dict[str, np.ndarray]:
"""Convert observations into NumPy data type."""
# Sb3 doesn't support asymmetric observation spaces, so we only use "policy"
obs = obs_dict["policy"]
# note: ManagerBasedRLEnv uses torch backend (by default).
if isinstance(obs, dict):
for key, value in obs.items():
obs[key] = value.detach().cpu().numpy()
if key in self.observation_processors:
obs[key] = self.observation_processors[key](value)
obs[key] = obs[key].detach().cpu().numpy()
elif isinstance(obs, torch.Tensor):
obs = obs.detach().cpu().numpy()
else:
Expand Down
Loading