diff --git a/source/isaaclab/isaaclab/utils/dict.py b/source/isaaclab/isaaclab/utils/dict.py index a6a40f06775..527fa3ef156 100644 --- a/source/isaaclab/isaaclab/utils/dict.py +++ b/source/isaaclab/isaaclab/utils/dict.py @@ -267,6 +267,8 @@ def replace_slices_with_strings(data: dict) -> dict: """ if isinstance(data, dict): return {k: replace_slices_with_strings(v) for k, v in data.items()} + elif isinstance(data, list): + return [replace_slices_with_strings(v) for v in data] elif isinstance(data, slice): return f"slice({data.start},{data.stop},{data.step})" else: @@ -284,6 +286,8 @@ def replace_strings_with_slices(data: dict) -> dict: """ if isinstance(data, dict): return {k: replace_strings_with_slices(v) for k, v in data.items()} + elif isinstance(data, list): + return [replace_strings_with_slices(v) for v in data] elif isinstance(data, str) and data.startswith("slice("): return string_to_slice(data) else: diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/__init__.py index 1ac5daa1bc3..666f254162b 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/__init__.py @@ -29,6 +29,7 @@ entry_point="isaaclab.envs:ManagerBasedRLEnv", kwargs={ "env_cfg_entry_point": stack_joint_pos_env_cfg.FrankaCubeStackEnvCfg, + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:StackCubePPORunnerCfg", }, disable_env_checker=True, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..b9e1fa2702f --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class StackCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 1500 + save_interval = 50 + experiment_name = "franka_stack" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/stack_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/stack_env_cfg.py index 23348675ac0..b6acb1402e3 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/stack_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/stack_env_cfg.py @@ -90,7 +90,7 @@ class PolicyCfg(ObsGroup): def __post_init__(self): self.enable_corruption = False - self.concatenate_terms = False + self.concatenate_terms = True @configclass class RGBCameraPolicyCfg(ObsGroup):