Add render flag to environment step() API

kellyguo11 · kellyguo11 · commit 4dd21ae39a07 · 2026-04-21T23:55:33.000Z
Expose a render: bool = True parameter on the step() method of all
environment base classes (ManagerBasedEnv, ManagerBasedRLEnv,
DirectRLEnv, DirectMARLEnv) and the MARL utility wrappers.

When render=False is passed, all rendering calls are skipped:
- GUI / RTX sensor renders inside the decimation loop
- Post-reset re-renders for RTX sensors

Physics simulation continues normally regardless of the flag.
This allows user workflows that do not need rendering every step
(e.g. headless RL training, fast rollouts) to opt out per-step.

Also adds unit tests for render=False and mixed render flag stepping.
diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "4.6.8"
+version = "4.6.9"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst
@@ -1,6 +1,20 @@
 Changelog
 ---------
 
+4.6.9 (2026-04-21)
+~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added ``render`` flag to the ``step()`` method of all environment base classes
+  (:class:`~isaaclab.envs.ManagerBasedEnv`, :class:`~isaaclab.envs.ManagerBasedRLEnv`,
+  :class:`~isaaclab.envs.DirectRLEnv`, :class:`~isaaclab.envs.DirectMARLEnv`).
+  Passing ``render=False`` skips all rendering calls (GUI updates, RTX sensor rendering,
+  post-reset re-renders) while physics simulation continues normally.  Defaults to ``True``
+  for full backward compatibility.
+
+
 4.6.8 (2026-04-21)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py
@@ -373,7 +373,7 @@ def reset(
         # return observations
         return self.obs_dict, self.extras
 
-    def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
+    def step(self, actions: dict[AgentID, ActionType], render: bool = True) -> EnvStepReturn:
         """Execute one time-step of the environment's dynamics.
 
         The environment steps forward at a fixed time-step, while the physics simulation is decimated at a
@@ -394,6 +394,8 @@ def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
         Args:
             actions: The actions to apply on the environment (keyed by the agent ID).
                 Shape of individual tensors is (num_envs, action_dim).
+            render: Whether to render during this step. When False, all rendering calls
+                (GUI updates, RTX sensor rendering) are skipped. Defaults to True.
 
         Returns:
             A tuple containing the observations, rewards, resets (terminated and truncated) and
@@ -411,7 +413,7 @@ def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
 
         # check if we need to do rendering within the physics loop
         # note: uses cached property to avoid settings lookup every step
-        is_rendering = self.sim.is_rendering
+        is_rendering = render and self.sim.is_rendering
 
         # perform physics stepping
         for _ in range(self.cfg.decimation):
diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py
@@ -367,7 +367,7 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None)
         # return observations
         return self._get_observations(), self.extras
 
-    def step(self, action: torch.Tensor) -> VecEnvStepReturn:
+    def step(self, action: torch.Tensor, render: bool = True) -> VecEnvStepReturn:
         """Execute one time-step of the environment's dynamics.
 
         The environment steps forward at a fixed time-step, while the physics simulation is decimated at a
@@ -387,6 +387,8 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
 
         Args:
             action: The actions to apply on the environment. Shape is (num_envs, action_dim).
+            render: Whether to render during this step. When False, all rendering calls
+                (GUI updates, RTX sensor rendering) are skipped. Defaults to True.
 
         Returns:
             A tuple containing the observations, rewards, resets (terminated and truncated) and extras.
@@ -401,7 +403,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
 
         # check if we need to do rendering within the physics loop
         # note: uses cached property to avoid settings lookup every step
-        is_rendering = self.sim.is_rendering
+        is_rendering = render and self.sim.is_rendering
 
         # perform physics stepping
         for _ in range(self.cfg.decimation):
@@ -434,7 +436,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
         if len(reset_env_ids) > 0:
             self._reset_idx(reset_env_ids)
             # if sensors are added to the scene, make sure we render to reflect changes in reset
-            if self.has_rtx_sensors and self.cfg.num_rerenders_on_reset > 0:
+            if is_rendering and self.has_rtx_sensors and self.cfg.num_rerenders_on_reset > 0:
                 for _ in range(self.cfg.num_rerenders_on_reset):
                     self.sim.render()
 
diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py
@@ -485,7 +485,7 @@ def reset_to(
         # return observations
         return self.obs_buf, self.extras
 
-    def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]:
+    def step(self, action: torch.Tensor, render: bool = True) -> tuple[VecEnvObs, dict]:
         """Execute one time-step of the environment's dynamics.
 
         The environment steps forward at a fixed time-step, while the physics simulation is
@@ -496,6 +496,8 @@ def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]:
 
         Args:
             action: The actions to apply on the environment. Shape is (num_envs, action_dim).
+            render: Whether to render during this step. When False, all rendering calls
+                (GUI updates, RTX sensor rendering) are skipped. Defaults to True.
 
         Returns:
             A tuple containing the observations and extras.
@@ -507,7 +509,7 @@ def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]:
 
         # check if we need to do rendering within the physics loop
         # note: uses cached property to avoid settings lookup every step
-        is_rendering = self.sim.is_rendering
+        is_rendering = render and self.sim.is_rendering
 
         # perform physics stepping
         for _ in range(self.cfg.decimation):
diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py
@@ -158,7 +158,7 @@ def setup_manager_visualizers(self):
     Operations - MDP
     """
 
-    def step(self, action: torch.Tensor) -> VecEnvStepReturn:
+    def step(self, action: torch.Tensor, render: bool = True) -> VecEnvStepReturn:
         """Execute one time-step of the environment's dynamics and reset terminated environments.
 
         Unlike the :class:`ManagerBasedEnv.step` class, the function performs the following operations:
@@ -173,6 +173,8 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
 
         Args:
             action: The actions to apply on the environment. Shape is (num_envs, action_dim).
+            render: Whether to render during this step. When False, all rendering calls
+                (GUI updates, RTX sensor rendering) are skipped. Defaults to True.
 
         Returns:
             A tuple containing the observations, rewards, resets (terminated and truncated) and extras.
@@ -184,7 +186,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
 
         # check if we need to do rendering within the physics loop
         # note: uses cached property to avoid settings lookup every step
-        is_rendering = self.sim.is_rendering
+        is_rendering = render and self.sim.is_rendering
 
         # perform physics stepping
         for _ in range(self.cfg.decimation):
@@ -229,7 +231,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             self._reset_idx(reset_env_ids)
 
             # if sensors are added to the scene, make sure we render to reflect changes in reset
-            if self.has_rtx_sensors and self.cfg.num_rerenders_on_reset > 0:
+            if is_rendering and self.has_rtx_sensors and self.cfg.num_rerenders_on_reset > 0:
                 for _ in range(self.cfg.num_rerenders_on_reset):
                     self.sim.render()
 
diff --git a/source/isaaclab/isaaclab/envs/utils/marl.py b/source/isaaclab/isaaclab/envs/utils/marl.py
@@ -97,7 +97,7 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None)
 
             return obs, extras
 
-        def step(self, action: torch.Tensor) -> VecEnvStepReturn:
+        def step(self, action: torch.Tensor, render: bool = True) -> VecEnvStepReturn:
             # split single-agent actions to build the multi-agent ones
             # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             index = 0
@@ -108,7 +108,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
                 index += delta
 
             # step the environment
-            obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
+            obs, rewards, terminated, time_outs, extras = self.env.step(_actions, render=render)
 
             # use environment state as observation
             if self._state_as_observation:
@@ -233,7 +233,7 @@ def reset(
 
             return obs, extras
 
-        def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
+        def step(self, actions: dict[AgentID, ActionType], render: bool = True) -> EnvStepReturn:
             # split agent actions to build the multi-agent ones
             # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             index = 0
@@ -244,7 +244,7 @@ def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
                 index += delta
 
             # step the environment
-            obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
+            obs, rewards, terminated, time_outs, extras = self.env.step(_actions, render=render)
 
             # use environment state as observation
             if self._state_as_observation:
diff --git a/source/isaaclab/test/envs/test_env_rendering_logic.py b/source/isaaclab/test/envs/test_env_rendering_logic.py
@@ -239,3 +239,165 @@ def wrapped_step(dt):
         else:
             # If env creation failed, still clear the singleton
             SimulationContext.clear_instance()
+
+
+@pytest.mark.parametrize("env_type", ["manager_based_env", "manager_based_rl_env", "direct_rl_env"])
+def test_env_render_false_skips_rendering(env_type, physics_callback, render_callback):
+    """Test that passing render=False to step() skips all rendering while physics continues."""
+    physics_cb, get_physics_stats = physics_callback
+    render_cb, get_render_stats = render_callback
+
+    env = None
+    physics_handle = None
+    original_step = None
+    viz = None
+
+    try:
+        # create a new stage
+        sim_utils.create_new_stage()
+
+        # create environment with render_interval=1 so rendering would happen every physics step
+        if env_type == "manager_based_env":
+            env = create_manager_based_env(render_interval=1)
+        elif env_type == "manager_based_rl_env":
+            env = create_manager_based_rl_env(render_interval=1)
+        else:
+            env = create_direct_rl_env(render_interval=1)
+
+        # enable the flag to render the environment
+        env.sim.set_setting("/isaaclab/render/rtx_sensors", True)
+
+        # disable the app from shutting down when the environment is closed
+        env.sim._app_control_on_stop_handle = None  # type: ignore
+
+        # Reset to initialize visualizers
+        env.reset()
+
+        # Ensure the default Kit visualizer is active for rendering callbacks.
+        assert isinstance(env.sim.visualizers[0], KitVisualizer)
+
+        # add physics callback
+        physics_handle = env.sim.physics_manager.register_callback(
+            physics_cb, IsaacEvents.POST_PHYSICS_STEP, name="physics_step"
+        )
+
+        # Wrap visualizer step to track render calls
+        viz = env.sim.visualizers[0]
+        original_step = viz.step
+        render_dt = env.cfg.sim.dt * env.cfg.sim.render_interval
+
+        def wrapped_step(dt):
+            original_step(dt)
+            render_cb(render_dt)
+
+        viz.step = wrapped_step
+
+        # create a zero action tensor for stepping the environment
+        actions = torch.zeros((env.num_envs, 0), device=env.device)
+
+        # Step with render=False for several steps
+        for i in range(10):
+            env.step(action=actions, render=False)
+
+            # Physics should still advance normally
+            _, num_physics_steps = get_physics_stats()
+            assert num_physics_steps == (i + 1) * env.cfg.decimation, "Physics steps mismatch with render=False"
+
+            # No rendering should have occurred
+            _, num_render_steps = get_render_stats()
+            assert num_render_steps == 0, f"Expected 0 render steps with render=False, got {num_render_steps}"
+
+    finally:
+        if viz is not None and original_step is not None:
+            viz.step = original_step
+        if physics_handle is not None:
+            physics_handle.deregister()
+        if env is not None:
+            env.close()
+        else:
+            SimulationContext.clear_instance()
+
+
+@pytest.mark.parametrize("env_type", ["manager_based_env", "manager_based_rl_env", "direct_rl_env"])
+def test_env_render_flag_mixed_steps(env_type, physics_callback, render_callback):
+    """Test that render flag can be toggled between steps and rendering counts are correct."""
+    physics_cb, get_physics_stats = physics_callback
+    render_cb, get_render_stats = render_callback
+
+    env = None
+    physics_handle = None
+    original_step = None
+    viz = None
+
+    try:
+        # create a new stage
+        sim_utils.create_new_stage()
+
+        # create environment with render_interval=1 so every decimation step renders
+        if env_type == "manager_based_env":
+            env = create_manager_based_env(render_interval=1)
+        elif env_type == "manager_based_rl_env":
+            env = create_manager_based_rl_env(render_interval=1)
+        else:
+            env = create_direct_rl_env(render_interval=1)
+
+        # enable the flag to render the environment
+        env.sim.set_setting("/isaaclab/render/rtx_sensors", True)
+
+        # disable the app from shutting down when the environment is closed
+        env.sim._app_control_on_stop_handle = None  # type: ignore
+
+        # Reset to initialize visualizers
+        env.reset()
+
+        # Ensure the default Kit visualizer is active for rendering callbacks.
+        assert isinstance(env.sim.visualizers[0], KitVisualizer)
+
+        # add physics callback
+        physics_handle = env.sim.physics_manager.register_callback(
+            physics_cb, IsaacEvents.POST_PHYSICS_STEP, name="physics_step"
+        )
+
+        # Wrap visualizer step to track render calls
+        viz = env.sim.visualizers[0]
+        original_step = viz.step
+        render_dt = env.cfg.sim.dt * env.cfg.sim.render_interval
+
+        def wrapped_step(dt):
+            original_step(dt)
+            render_cb(render_dt)
+
+        viz.step = wrapped_step
+
+        # create a zero action tensor for stepping the environment
+        actions = torch.zeros((env.num_envs, 0), device=env.device)
+
+        expected_render_steps = 0
+
+        # Step 5 times with render=True, then 5 with render=False
+        for i in range(10):
+            should_render = i < 5
+            env.step(action=actions, render=should_render)
+
+            # Physics always advances
+            _, num_physics_steps = get_physics_stats()
+            assert num_physics_steps == (i + 1) * env.cfg.decimation, "Physics steps mismatch in mixed test"
+
+            # Rendering only happens in the first 5 steps
+            if should_render:
+                expected_render_steps += env.cfg.decimation  # render_interval=1, so renders every decimation step
+
+            _, num_render_steps = get_render_stats()
+            assert num_render_steps == expected_render_steps, (
+                f"Render steps mismatch at step {i}: expected {expected_render_steps}, got {num_render_steps}"
+            )
+
+    finally:
+        if viz is not None and original_step is not None:
+            viz.step = original_step
+        if physics_handle is not None:
+            physics_handle.deregister()
+        if env is not None:
+            env.close()
+        else:
+            SimulationContext.clear_instance()