diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py index c4c4e16..290f56d 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py @@ -12,11 +12,13 @@ if _WATO_HAND_DIR not in sys.path: sys.path.insert(0, _WATO_HAND_DIR) -from wato_hand_cfg import WATO_HAND_CFG # noqa: E402 +from wato_hand_cfg import JOINT_POS_LIMITS, WATO_HAND_CFG # noqa: E402 # Partial grasp pose for in-hand cube reorientation (~50% of flex range). -# Finger splay at ~50% of ±8.6 deg limit (0.075 rad) so spread is visible at reset. -_INHAND_SPREAD_RAD = 0.075 +# MCP_A splay limit is ±27 deg (expanded in JOINT_POS_LIMITS); keep default at mid-spread. +_MCP_A_SPREAD_MAX = JOINT_POS_LIMITS["MCP_A_1"][1] +_INHAND_SPREAD_RAD = 0.5 * _MCP_A_SPREAD_MAX +INHAND_SPREAD_RAD = _INHAND_SPREAD_RAD _INHAND_GRASP_JOINT_POS = { "circumduction": 0.0, @@ -47,13 +49,13 @@ def _quat_y_deg(deg: float) -> tuple[float, float, float, float]: return (math.cos(half), 0.0, math.sin(half), 0.0) -# Wato URDF at identity: fingers extend +Y. Palm-up tuned at +160 deg Y. +# In-hand task needs palm facing up (+Z). URDF identity has fingers along +Y, so rotate +# about world Y until the palm points upward. ±160 deg are near-flips; +160 was tuned +# with INHAND_CUBE_POS so the cube lands in the palm at reset. _INHAND_ROT_Y_POS_160 = _quat_y_deg(160.0) -_INHAND_ROT_Y_NEG_160 = _quat_y_deg(-160.0) _INHAND_PALM_UP_ROT = _INHAND_ROT_Y_POS_160 -# DexCube spawn in env frame, paired with _INHAND_PALM_UP_ROT (palm center ~ y=0.08, z=0.54). -INHAND_CUBE_POS = (0.0, 0.08, 0.54) +INHAND_CUBE_POS = (-0.01, 0.09, 0.5) INHAND_WATO_HAND_CFG = WATO_HAND_CFG.replace( spawn=WATO_HAND_CFG.spawn.replace( @@ -68,7 +70,7 @@ def _quat_y_deg(deg: float) -> tuple[float, float, float, float]: max_contact_impulse=1e32, ), articulation_props=sim_utils.ArticulationRootPropertiesCfg( - enabled_self_collisions=True, + enabled_self_collisions=False, solver_position_iteration_count=8, solver_velocity_iteration_count=0, sleep_threshold=0.005, diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/TRAINING_LOG.md b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/TRAINING_LOG.md new file mode 100644 index 0000000..480304a --- /dev/null +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/TRAINING_LOG.md @@ -0,0 +1,97 @@ +# In-Hand Cube Reorientation — Training Log + +## Task +20-DOF Wato hand, in-hand cube reorientation (Isaac-Repose-Cube-WatoHand-v0). +Palm-up orientation, cube spawned at `(-0.01, 0.09, 0.5)` in the palm. + +--- + +## What Was Tried + +### 1. MCP_A abduction range expansion (WORKS — kept) +**Problem:** USD bakes ±8.6° abduction limits; fingers could barely splay, making reorientation geometrically impossible. +**Fix:** Expanded `_MCP_A_LIMIT` in `wato_hand_cfg.py` from ±8.6° → ±27° (AllegroHand level). Added `expand_abduction_limits` startup event in `wato_env_cfg.py` that calls `apply_wato_hand_joint_limits()` to push new limits into PhysX at runtime, overriding the USD-baked values. +**Result:** Fingers now have meaningful splay range. Self-collision was already disabled so no issues. + +--- + +### 2. Reward shaping — object holding (WORKS — kept) +**Problem:** Cube dropped in ~30 steps (1 sec); no gradient for the policy to learn to grip. Episode too short for any useful signal. +**Fixes applied (cumulative):** +- `track_pos_l2 weight=-3.0` (continuous penalty for cube drifting from goal position) +- `object_away_penalty weight=-5.0` (terminal penalty on drop) +- `object_held_bonus weight=2.0, threshold=0.10m` — dense binary +1/step when cube is within 10 cm of goal. **This was the key fix.** + +**Result:** Episode length doubled from ~60 → 105-119 steps. `object_out_of_reach` rate halved from 1.5-2.1 → 0.6-0.8 per batch. + +--- + +### 3. Stagnation termination fix (WORKS — kept) +**Problem:** `orientation_error_threshold=0.12` in stagnation but `orientation_success_threshold=0.4`. Episodes achieving error 0.12-0.4 rad (better than needed!) were simultaneously getting success credit AND counting toward stagnation — ending good episodes at 90 steps. +**Fix:** Raised stagnation threshold to 0.5 rad (above success threshold) and extended `stagnant_steps` 90 → 150. +**Result:** Longer episodes, less premature termination. + +--- + +### 4. PPO config tuning (PARTIAL — kept) +- `entropy_coef: 0.002 → 0.0001` — stopped the optimizer from rewarding randomness. `action_noise_std` stopped climbing from 1.02 → 1.05. +- `num_steps_per_env: 24 → 48` — each rollout now covers ~44% of a typical episode; better return estimates for the critic. +- `success_bonus weight: 250 → 50` — original weight caused VF loss to spike to 74-88 as policy occasionally scored success. Reducing to 50 kept VF loss in 26-55 range. + +--- + +### 5. Angular velocity toward goal reward (FAILED — disabled) +**Idea:** Reward the component of object angular velocity aligned with the goal direction. `object_ang_vel_toward_goal` with weight=0.1. +**Problem:** Reward was negative (-0.003) because cube's random spin was anti-aligned with the goal on average. This taught the policy to *suppress* all rotation (hold rigidly) to avoid the penalty. +**Fix attempt:** Clamped to 0, increased weight to 0.5. Value became +0.05 but this was purely mechanical (5x weight + clamping removed negatives), not actual policy improvement. Orientation error unchanged. +**Outcome:** Disabled. The cube barely moves and there is insufficient angular velocity signal to reward. + +--- + +### 6. EMA alpha reduction 0.95 → 0.5 (FAILED — reverted) +**Idea:** alpha=0.95 gives ~1.5 Hz effective finger bandwidth. Reducing to 0.5 gives ~15 Hz, potentially allowing faster rotation torques. +**Result:** `action_rate_l2` did NOT increase (policy didn't use the extra bandwidth). `orientation_error` unchanged. `object_out_of_reach` slightly worse. Reverted to 0.95. + +--- + +### 7. Z-axis curriculum (WORKS — current) +**Problem:** Full 3D random orientation goal is too hard to explore from scratch. Policy never discovered how to rotate the cube despite holding it. +**Fix:** `rotation_axes = ["z"]` in `wato_env_cfg.py` — goals are now only rotations about the palm normal (z-axis). The cube just needs to spin in the plane of the palm. +**Result:** Immediate improvement: +- `track_orientation_inv_l2`: 0.24 → 0.88-1.05 (near 4x) +- `orientation_error`: 2.2-2.4 → 1.78-2.07 (trending down) +- `consecutive_success`: 0.03 → 0.20-0.27 +- `time_out` episodes appearing for first time + +**Note on oscillation:** Orientation error oscillates (~1.8-2.1) rather than trending smoothly because `update_goal_on_success=True` resamples a new random z-goal when error < 0.4, immediately resetting error to ~1.5. The mean oscillates around `0.25 × ~0.2 + 0.75 × ~2.3 ≈ 1.85`. + +--- + +## Current State (iter ~600, ~7.4M steps) + +| Metric | Start | Current | +|---|---|---| +| Episode length | ~30 steps | ~120-137 steps | +| `object_out_of_reach` rate | 5-7 | 0.5-0.9 | +| `orientation_error` | 2.2-2.4 rad (random) | 1.82-1.95 rad | +| `consecutive_success` | ~0 | 0.20-0.27 | +| `track_orientation_inv_l2` | ~0.25 | ~0.75-1.05 | + +**Bottleneck:** Plateaued at iter ~400. Policy learns z-rotation intermittently (occasional good episodes) but hasn't converged to a consistent strategy. Root cause is sample efficiency: 256 envs × 48 steps = 12,288 samples/gradient step. Standard IsaacLab in-hand setups use 8,192+ envs = 393k samples/step. + +--- + +## Next Steps + +1. **Make USD instanceable** — convert `wato_hand/urdf/hand_urdf.urdf` to instanceable format using: + ```bash + /home/hy/IsaacLab/isaaclab.sh -p /home/hy/IsaacLab/scripts/tools/convert_instanceable.py \ + /home/hy/Desktop/humanoid/autonomy/simulation/Humanoid_Wato/wato_hand/urdf/ \ + /home/hy/Desktop/humanoid/autonomy/simulation/Humanoid_Wato/wato_hand/urdf_instanceable/ \ + --make-instanceable --conversion-type urdf + ``` + Then update `_HAND_USD_PATH` in `wato_hand_cfg.py` to point to the new USD and change `replicate_physics = False` → `True` in `wato_env_cfg.py`. This allows scaling beyond 256 envs without OOM. +2. **Increase num_envs** — after instanceable conversion, try `--num_envs 1024+`. Standard IsaacLab in-hand setups use 8192 envs; even 1024 gives 4x better gradient quality. +3. **Longer training** — z-axis skill may require 50k+ iterations (~15 hours overnight) to converge at 256 envs. +4. **Curriculum expansion** — once `orientation_error` consistently < 1.5 and `consecutive_success` > 0.4 on z-axis, expand back to `rotation_axes = ["x", "y"]` for full 3D reorientation. +5. **Re-enable ang_vel reward** (with clamp, weight ~0.3) once holding is fully stable — provides rotation direction signal once the policy has enough bandwidth to act on it. diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py index ec0d90b..e69de29 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py @@ -1,14 +0,0 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -"""In-hand object reorientation environment. - -These environments are based on the `dexterous cube manipulation`_ environments -provided in IsaacGymEnvs repository from NVIDIA. However, they contain certain -modifications and additional features. - -.. _dexterous cube manipulation: https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/isaacgymenvs/tasks/allegro_hand.py - -""" diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/__init__.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/__init__.py deleted file mode 100644 index e63efd6..0000000 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/__init__.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -import gymnasium as gym - -from . import agents - -## -# Register Gym environments. -## - -## -# Full kinematic state observations. -## - -gym.register( - id="Isaac-Repose-Cube-Allegro-v0", - entry_point="isaaclab.envs:ManagerBasedRLEnv", - disable_env_checker=True, - kwargs={ - "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeEnvCfg", - "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubePPORunnerCfg", - "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", - "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", - }, -) - -gym.register( - id="Isaac-Repose-Cube-Allegro-Play-v0", - entry_point="isaaclab.envs:ManagerBasedRLEnv", - disable_env_checker=True, - kwargs={ - "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeEnvCfg_PLAY", - "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubePPORunnerCfg", - "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", - "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", - }, -) - -## -# Kinematic state observations without velocity information. -## - -gym.register( - id="Isaac-Repose-Cube-Allegro-NoVelObs-v0", - entry_point="isaaclab.envs:ManagerBasedRLEnv", - disable_env_checker=True, - kwargs={ - "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeNoVelObsEnvCfg", - "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubeNoVelObsPPORunnerCfg", - "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", - "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", - }, -) - -gym.register( - id="Isaac-Repose-Cube-Allegro-NoVelObs-Play-v0", - entry_point="isaaclab.envs:ManagerBasedRLEnv", - disable_env_checker=True, - kwargs={ - "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeNoVelObsEnvCfg_PLAY", - "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubeNoVelObsPPORunnerCfg", - "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", - "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", - }, -) diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/__init__.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/__init__.py deleted file mode 100644 index e75ca2b..0000000 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rl_games_ppo_cfg.yaml b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rl_games_ppo_cfg.yaml deleted file mode 100644 index 24b4540..0000000 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rl_games_ppo_cfg.yaml +++ /dev/null @@ -1,85 +0,0 @@ -params: - seed: 42 - - # environment wrapper clipping - env: - clip_observations: 5.0 - clip_actions: 1.0 - - algo: - name: a2c_continuous - - model: - name: continuous_a2c_logstd - - network: - name: actor_critic - separate: False - - space: - continuous: - mu_activation: None - sigma_activation: None - mu_init: - name: default - sigma_init: - name: const_initializer - val: 0 - fixed_sigma: True - - mlp: - units: [512, 256, 128] - activation: elu - d2rl: False - - initializer: - name: default - regularizer: - name: None - - load_checkpoint: False - load_path: '' - - config: - name: allegro_cube - env_name: rlgpu - device: 'cuda:0' - device_name: 'cuda:0' - multi_gpu: False - ppo: True - mixed_precision: False - normalize_input: True - normalize_value: True - value_bootstrap: True - num_actors: -1 # configured from the script (based on num_envs) - reward_shaper: - scale_value: 0.1 - normalize_advantage: True - gamma: 0.998 - tau: 0.95 - learning_rate: 5e-4 - lr_schedule: adaptive - schedule_type: standard - kl_threshold: 0.016 - score_to_win: 100000 - max_epochs: 5000 - save_best_after: 500 - save_frequency: 200 - print_stats: True - grad_norm: 1.0 - entropy_coef: 0.002 - truncate_grads: True - e_clip: 0.2 - horizon_length: 24 - minibatch_size: 16384 # 32768 - mini_epochs: 5 - critic_coef: 4 - clip_value: True - seq_length: 4 - bounds_loss_coef: 0.0005 - - player: - #render: True - deterministic: True - games_num: 100000 - print_stats: True diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py deleted file mode 100644 index 4a25aea..0000000 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -from isaaclab.utils import configclass - -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg - - -@configclass -class AllegroCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): - num_steps_per_env = 24 - max_iterations = 5000 - save_interval = 50 - experiment_name = "allegro_cube" - empirical_normalization = True - policy = RslRlPpoActorCriticCfg( - init_noise_std=1.0, - actor_hidden_dims=[512, 256, 128], - critic_hidden_dims=[512, 256, 128], - activation="elu", - ) - algorithm = RslRlPpoAlgorithmCfg( - value_loss_coef=1.0, - use_clipped_value_loss=True, - clip_param=0.2, - entropy_coef=0.002, - num_learning_epochs=5, - num_mini_batches=4, - learning_rate=0.001, - schedule="adaptive", - gamma=0.998, - lam=0.95, - desired_kl=0.01, - max_grad_norm=1.0, - ) - - -@configclass -class AllegroCubeNoVelObsPPORunnerCfg(AllegroCubePPORunnerCfg): - experiment_name = "allegro_cube_no_vel_obs" diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml deleted file mode 100644 index a776337..0000000 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml +++ /dev/null @@ -1,80 +0,0 @@ -seed: 42 - - -# Models are instantiated using skrl's model instantiator utility -# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html -models: - separate: False - policy: # see gaussian_model parameters - class: GaussianMixin - clip_actions: False - clip_log_std: True - min_log_std: -20.0 - max_log_std: 2.0 - initial_log_std: 0.0 - network: - - name: net - input: STATES - layers: [512, 256, 128] - activations: elu - output: ACTIONS - value: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: STATES - layers: [512, 256, 128] - activations: elu - output: ONE - - -# Rollout memory -# https://skrl.readthedocs.io/en/latest/api/memories/random.html -memory: - class: RandomMemory - memory_size: -1 # automatically determined (same as agent:rollouts) - - -# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) -# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html -agent: - class: PPO - rollouts: 24 - learning_epochs: 5 - mini_batches: 12 - discount_factor: 0.998 - lambda: 0.95 - learning_rate: 5.0e-04 - learning_rate_scheduler: KLAdaptiveLR - learning_rate_scheduler_kwargs: - kl_threshold: 0.016 - state_preprocessor: RunningStandardScaler - state_preprocessor_kwargs: null - value_preprocessor: RunningStandardScaler - value_preprocessor_kwargs: null - random_timesteps: 0 - learning_starts: 0 - grad_norm_clip: 1.0 - ratio_clip: 0.2 - value_clip: 0.2 - clip_predicted_values: True - entropy_loss_scale: 0.002 - value_loss_scale: 2.0 - kl_threshold: 0.0 - rewards_shaper_scale: 0.1 - time_limit_bootstrap: False - # logging and checkpoint - experiment: - directory: "allegro_cube" - experiment_name: "" - write_interval: auto - checkpoint_interval: auto - - -# Sequential trainer -# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html -trainer: - class: SequentialTrainer - timesteps: 120000 - environment_info: log diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/allegro_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/allegro_env_cfg.py deleted file mode 100644 index d55e595..0000000 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/allegro_env_cfg.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -from isaaclab.utils import configclass - -import isaaclab_tasks.manager_based.manipulation.inhand.inhand_env_cfg as inhand_env_cfg - -## -# Pre-defined configs -## -from isaaclab_assets import ALLEGRO_HAND_CFG # isort: skip - - -@configclass -class AllegroCubeEnvCfg(inhand_env_cfg.InHandObjectEnvCfg): - def __post_init__(self): - # post init of parent - super().__post_init__() - - # switch robot to allegro hand - self.scene.robot = ALLEGRO_HAND_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") - - -@configclass -class AllegroCubeEnvCfg_PLAY(AllegroCubeEnvCfg): - def __post_init__(self): - # post init of parent - super().__post_init__() - # make a smaller scene for play - self.scene.num_envs = 50 - # disable randomization for play - self.observations.policy.enable_corruption = False - # remove termination due to timeouts - self.terminations.time_out = None - - -## -# Environment configuration with no velocity observations. -## - - -@configclass -class AllegroCubeNoVelObsEnvCfg(AllegroCubeEnvCfg): - def __post_init__(self): - # post init of parent - super().__post_init__() - - # switch observation group to no velocity group - self.observations.policy = inhand_env_cfg.ObservationsCfg.NoVelocityKinematicObsGroupCfg() - - -@configclass -class AllegroCubeNoVelObsEnvCfg_PLAY(AllegroCubeNoVelObsEnvCfg): - def __post_init__(self): - # post init of parent - super().__post_init__() - # make a smaller scene for play - self.scene.num_envs = 50 - # disable randomization for play - self.observations.policy.enable_corruption = False - # remove termination due to timeouts - self.terminations.time_out = None diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py index 8b2cfa1..48ab3fb 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py @@ -4,7 +4,7 @@ @configclass class WatoHandCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): - num_steps_per_env = 24 + num_steps_per_env = 48 max_iterations = 5000 save_interval = 50 experiment_name = "wato_hand_cube" @@ -19,7 +19,7 @@ class WatoHandCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): value_loss_coef=1.0, use_clipped_value_loss=True, clip_param=0.2, - entropy_coef=0.002, + entropy_coef=0.0001, num_learning_epochs=5, num_mini_batches=4, learning_rate=0.001, diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py index 5c58136..0d25448 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py @@ -1,9 +1,12 @@ +from isaaclab.managers import EventTermCfg as EventTerm, RewardTermCfg as RewTerm, SceneEntityCfg from isaaclab.utils import configclass import HumanoidRLPackage.HumanoidRLSetup.tasks.inhand.inhand_env_cfg as inhand_env_cfg +import HumanoidRLPackage.HumanoidRLSetup.tasks.inhand.mdp as inhand_mdp from HumanoidRLPackage.HumanoidRLSetup.modelCfg.wato_hand import ( INHAND_WATO_HAND_CFG, INHAND_CUBE_POS, + INHAND_SPREAD_RAD, ) @@ -20,8 +23,13 @@ def __post_init__(self): self.scene.num_envs = 256 self.scene.robot = INHAND_WATO_HAND_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + # Push expanded MCP_A limits (±27 deg) into PhysX, overriding the ±8.6 deg baked in the USD. + self.events.expand_abduction_limits = EventTerm( + func=inhand_mdp.apply_wato_hand_joint_limits, + mode="startup", + ) # Paired with _INHAND_PALM_UP_ROT in modelCfg/wato_hand.py. - self.scene.object.spawn.scale = (0.6, 0.6, 0.6) + self.scene.object.spawn.scale = (0.8, 0.8, 0.8) self.scene.object.init_state.pos = INHAND_CUBE_POS self.scene.object.init_state.rot = (1.0, 0.0, 0.0, 0.0) @@ -29,10 +37,8 @@ def __post_init__(self): # shape mismatch in EMAJointPositionToLimitsAction.reset() on this Isaac Lab version. # Replace ".*" position_range: overlapping patterns duplicate joint ids (20 -> 24) and crash reset. _grasp_scale = [0.2, 0.2] - # Compensates for use_default_offset (+0.075 rad): [1.5, 0.5] maps - # soft limits [-0.15, +0.15] to sample range [-0.15, +0.15] after offset, - # giving uniform coverage of the full abduction range at reset. - _splay_scale = [1.5, 0.5] + # Full abduction range at reset (±27 deg limit with use_default_offset on _INHAND_SPREAD_RAD default). + _splay_scale = [1.0, 1.0] self.events.reset_robot_joints.params["position_range"] = { "circumduction": _grasp_scale, "MCP_A_thumb": _grasp_scale, @@ -56,6 +62,22 @@ def __post_init__(self): "DIP_4": _grasp_scale, } + # Curriculum: start with rotation about z-axis (palm normal) only. + # Full 3D random orientation is too hard to explore from scratch; z-axis + # rotation (spinning in the palm plane) is the most natural motion for this hand. + # Once orientation_error shows a downward trend, expand back to ["x", "y"]. + self.commands.object_pose.rotation_axes = ["z"] + + # Small bonus for MCP_A velocity + spread deflection. + self.rewards.spread_activity = RewTerm( + func=inhand_mdp.mcp_a_spread_activity, + weight=0.03, + params={ + "asset_cfg": SceneEntityCfg("robot"), + "spread_limit": INHAND_SPREAD_RAD * 2.0, + }, + ) + @configclass class WatoHandCubeEnvCfg_PLAY(WatoHandCubeEnvCfg): @@ -63,9 +85,9 @@ def __post_init__(self): super().__post_init__() self.scene.num_envs = 16 self.observations.policy.enable_corruption = False - self.terminations.time_out = None - self.commands.object_pose.debug_vis = True - # Goal-orientation marker offset (green DexCube) — not the physical object pose. + # Keep time_out so play episodes reset instead of clamping forever. + self.episode_length_s = 10.0 + # Nudge goal marker aside so it does not cover the physical cube. self.commands.object_pose.marker_pos_offset = (-0.10, 0.0, 0.12) @@ -82,7 +104,7 @@ def __post_init__(self): super().__post_init__() self.scene.num_envs = 16 self.observations.policy.enable_corruption = False - self.terminations.time_out = None - self.commands.object_pose.debug_vis = True - # Goal-orientation marker offset (green DexCube) — not the physical object pose. + # Keep time_out so play episodes reset instead of clamping forever. + self.episode_length_s = 10.0 + # Nudge goal marker aside so it does not cover the physical cube. self.commands.object_pose.marker_pos_offset = (-0.10, 0.0, 0.12) diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py index ca99801..defe3ae 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py @@ -1,8 +1,3 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - from __future__ import annotations from dataclasses import MISSING @@ -23,21 +18,14 @@ from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR from isaaclab.utils.noise import AdditiveGaussianNoiseCfg as Gnoise -import isaaclab_tasks.manager_based.manipulation.inhand.mdp as mdp +import HumanoidRLPackage.HumanoidRLSetup.tasks.inhand.mdp as mdp -## -# Scene definition -## @configclass class InHandObjectSceneCfg(InteractiveSceneCfg): - """Configuration for a scene with an object and a dexterous hand.""" - - # robots robot: ArticulationCfg = MISSING - # objects object: RigidObjectCfg = RigidObjectCfg( prim_path="{ENV_REGEX_NS}/object", spawn=sim_utils.UsdFileCfg( @@ -57,7 +45,6 @@ class InHandObjectSceneCfg(InteractiveSceneCfg): init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, -0.19, 0.56), rot=(1.0, 0.0, 0.0, 0.0)), ) - # lights light = AssetBaseCfg( prim_path="/World/light", spawn=sim_utils.DistantLightCfg(color=(0.95, 0.95, 0.95), intensity=1000.0), @@ -69,24 +56,12 @@ class InHandObjectSceneCfg(InteractiveSceneCfg): ) -## -# MDP settings -## - @configclass class CommandsCfg: """Command specifications for the MDP.""" - object_pose = mdp.InHandReOrientationCommandCfg( - asset_name="object", - init_pos_offset=(0.0, 0.0, -0.04), - update_goal_on_success=True, - orientation_success_threshold=0.1, - make_quat_unique=False, - marker_pos_offset=(-0.2, -0.06, 0.08), - debug_vis=True, - ) + object_pose = mdp.InHandReOrientationCommandCfg() @configclass @@ -179,7 +154,7 @@ class EventCfg: func=mdp.randomize_rigid_body_material, mode="startup", params={ - "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "asset_cfg": SceneEntityCfg("robot"), "static_friction_range": (0.7, 1.3), "dynamic_friction_range": (0.7, 1.3), "restitution_range": (0.0, 0.0), @@ -190,7 +165,7 @@ class EventCfg: func=mdp.randomize_rigid_body_mass, mode="startup", params={ - "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "asset_cfg": SceneEntityCfg("robot"), "mass_distribution_params": (0.95, 1.05), "operation": "scale", }, @@ -200,7 +175,7 @@ class EventCfg: mode="startup", params={ "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), - "stiffness_distribution_params": (0.3, 3.0), # default: 3.0 + "stiffness_distribution_params": (0.75, 1.5), # default: 3.0 "damping_distribution_params": (0.75, 1.5), # default: 0.1 "operation": "scale", "distribution": "log_uniform", @@ -212,7 +187,7 @@ class EventCfg: func=mdp.randomize_rigid_body_material, mode="startup", params={ - "asset_cfg": SceneEntityCfg("object", body_names=".*"), + "asset_cfg": SceneEntityCfg("object"), "static_friction_range": (0.7, 1.3), "dynamic_friction_range": (0.7, 1.3), "restitution_range": (0.0, 0.0), @@ -236,7 +211,7 @@ class EventCfg: params={ "pose_range": {"x": [-0.01, 0.01], "y": [-0.01, 0.01], "z": [-0.01, 0.01]}, "velocity_range": {}, - "asset_cfg": SceneEntityCfg("object", body_names=".*"), + "asset_cfg": SceneEntityCfg("object"), }, ) reset_robot_joints = EventTerm( @@ -256,19 +231,19 @@ class RewardsCfg: """Reward terms for the MDP.""" # -- task - # track_pos_l2 = RewTerm( - # func=mdp.track_pos_l2, - # weight=-10.0, - # params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"}, - # ) + track_pos_l2 = RewTerm( + func=mdp.track_pos_l2, + weight=-3.0, + params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"}, + ) track_orientation_inv_l2 = RewTerm( func=mdp.track_orientation_inv_l2, - weight=1.0, + weight=5.0, params={"object_cfg": SceneEntityCfg("object"), "rot_eps": 0.1, "command_name": "object_pose"}, ) success_bonus = RewTerm( func=mdp.success_bonus, - weight=250.0, + weight=50.0, params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"}, ) @@ -277,13 +252,29 @@ class RewardsCfg: action_l2 = RewTerm(func=mdp.action_l2, weight=-0.0001) action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01) - # -- optional penalties (these are disabled by default) - # object_away_penalty = RewTerm( - # func=mdp.is_terminated_term, - # weight=-0.0, - # params={"term_keys": "object_out_of_reach"}, + # Dense per-step bonus: +1 per step the cube stays within 0.10 m of the goal position. + # At weight=2.0 this is worth ~+120/episode when held vs 0 when dropped, giving a far + # stronger holding gradient than the continuous L2 penalty alone. + object_held_bonus = RewTerm( + func=mdp.object_held_bonus, + weight=2.0, + params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose", "hold_threshold": 0.10}, + ) + + # Rotation reward disabled — re-enable once alpha=0.5 is verified to allow cube rotation. + # object_ang_vel_toward_goal = RewTerm( + # func=mdp.object_ang_vel_toward_goal, + # weight=0.5, + # params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"}, # ) + # Penalty for dropping the object — critical for early training. + object_away_penalty = RewTerm( + func=mdp.is_terminated_term, + weight=-5.0, + params={"term_keys": "object_out_of_reach"}, + ) + @configclass class TerminationsCfg: @@ -297,23 +288,23 @@ class TerminationsCfg: object_out_of_reach = DoneTerm(func=mdp.object_away_from_robot, params={"threshold": 0.3}) - # object_out_of_reach = DoneTerm( - # func=mdp.object_away_from_goal, params={"threshold": 0.24, "command_name": "object_pose"} - # ) - - -## -# Environment configuration -## + # End episode when goal orientation is not reached for too long. + # Threshold kept above orientation_success_threshold (0.4) so episodes that have + # achieved loose success (error < 0.4) don't simultaneously count as stagnant. + orientation_stagnation = DoneTerm( + func=mdp.orientation_stagnation, + params={ + "command_name": "object_pose", + "orientation_error_threshold": 0.5, + "stagnant_steps": 150, + }, + ) @configclass class InHandObjectEnvCfg(ManagerBasedRLEnvCfg): - """Configuration for the in hand reorientation environment.""" - - # Scene settings scene: InHandObjectSceneCfg = InHandObjectSceneCfg(num_envs=8192, env_spacing=0.6) - # Simulation settings + sim: SimulationCfg = SimulationCfg( physics_material=RigidBodyMaterialCfg( static_friction=1.0, @@ -325,11 +316,11 @@ class InHandObjectEnvCfg(ManagerBasedRLEnvCfg): gpu_max_rigid_patch_count=2**23, ), ) - # Basic settings + observations: ObservationsCfg = ObservationsCfg() actions: ActionsCfg = ActionsCfg() commands: CommandsCfg = CommandsCfg() - # MDP settings + rewards: RewardsCfg = RewardsCfg() terminations: TerminationsCfg = TerminationsCfg() events: EventCfg = EventCfg() @@ -344,3 +335,6 @@ def __post_init__(self): self.sim.render_interval = self.decimation # change viewer settings self.viewer.eye = (2.0, 2.0, 2.0) + +# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/train.py --task=Isaac-Repose-Cube-WatoHand-v0 --headless +# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/play.py --task=Isaac-Repose-Cube-WatoHand-Play-v0 --num_envs=1 \ No newline at end of file diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py index 0969448..720b72e 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py @@ -3,8 +3,6 @@ # # SPDX-License-Identifier: BSD-3-Clause -from dataclasses import MISSING - import isaaclab.sim as sim_utils from isaaclab.managers import CommandTermCfg from isaaclab.markers import VisualizationMarkersCfg @@ -24,36 +22,32 @@ class InHandReOrientationCommandCfg(CommandTermCfg): class_type: type = InHandReOrientationCommand resampling_time_range: tuple[float, float] = (1e6, 1e6) # no resampling based on time - asset_name: str = MISSING - """Name of the asset in the environment for which the commands are generated.""" + asset_name: str = "object" + """Scene asset that receives orientation goals (the manipuland rigid body).""" - init_pos_offset: tuple[float, float, float] = (0.0, 0.0, 0.0) - """Position offset of the asset from its default position. + init_pos_offset: tuple[float, float, float] = (0.0, 0.0, -0.04) + """Offset added to the object's default spawn position to form the goal position command. - This is used to account for the offset typically present in the object's default position - so that the object is spawned at a height above the robot's palm. When the position command - is generated, the object's default position is used as the reference and the offset specified - is added to it to get the desired position of the object. + Does not move the physical cube at reset — only shifts the position target used by + ``track_pos_l2`` and the goal-marker anchor (before ``marker_pos_offset``). """ - make_quat_unique: bool = MISSING - """Whether to make the quaternion unique or not. + make_quat_unique: bool = False + """Whether sampled goal quaternions are forced to have a positive real part.""" - If True, the quaternion is made unique by ensuring the real part is positive. - """ + orientation_success_threshold: float = 0.4 + """Orientation error (rad) below which the goal counts as reached.""" - orientation_success_threshold: float = MISSING - """Threshold for the orientation error to consider the goal orientation to be reached.""" + update_goal_on_success: bool = True + """Resample goal orientation when the current goal is reached.""" - update_goal_on_success: bool = MISSING - """Whether to update the goal orientation when the goal orientation is reached.""" + rotation_axes: list[str] = ["x", "y"] + """World axes used when sampling random goal orientations.""" - marker_pos_offset: tuple[float, float, float] = (0.0, 0.0, 0.0) - """Position offset of the marker from the object's desired position. + marker_pos_offset: tuple[float, float, float] = (-0.2, -0.06, 0.08) + """Extra offset for the goal-orientation debug marker only (not the physical cube).""" - This is useful to position the marker at a height above the object's desired position. - Otherwise, the marker may occlude the object in the visualization. - """ + debug_vis: bool = True goal_pose_visualizer_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg( prim_path="/Visuals/Command/goal_marker", @@ -64,4 +58,4 @@ class InHandReOrientationCommandCfg(CommandTermCfg): ), }, ) - """The configuration for the goal pose visualization marker. Defaults to a DexCube marker.""" + """Goal-orientation debug marker (DexCube visual only).""" \ No newline at end of file diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py index 72cad39..f086605 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py @@ -104,14 +104,15 @@ def _update_metrics(self): self.metrics["consecutive_success"] += successes.float() def _resample_command(self, env_ids: Sequence[int]): - # sample new orientation targets - rand_floats = 2.0 * torch.rand((len(env_ids), 2), device=self.device) - 1.0 - # rotate randomly about x-axis and then y-axis - quat = math_utils.quat_mul( - math_utils.quat_from_angle_axis(rand_floats[:, 0] * torch.pi, self._X_UNIT_VEC[env_ids]), - math_utils.quat_from_angle_axis(rand_floats[:, 1] * torch.pi, self._Y_UNIT_VEC[env_ids]), - ) - # make sure the quaternion real-part is always positive + _axis_map = {"x": self._X_UNIT_VEC, "y": self._Y_UNIT_VEC, "z": self._Z_UNIT_VEC} + axes = self.cfg.rotation_axes + rand_floats = 2.0 * torch.rand((len(env_ids), len(axes)), device=self.device) - 1.0 + quat = math_utils.quat_from_angle_axis(rand_floats[:, 0] * torch.pi, _axis_map[axes[0]][env_ids]) + for i, ax in enumerate(axes[1:], start=1): + quat = math_utils.quat_mul( + quat, + math_utils.quat_from_angle_axis(rand_floats[:, i] * torch.pi, _axis_map[ax][env_ids]), + ) self.quat_command_w[env_ids] = math_utils.quat_unique(quat) if self.cfg.make_quat_unique else quat def _update_command(self): diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py index 8c52d80..f2c4258 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py @@ -15,6 +15,12 @@ from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg from isaaclab.utils.math import sample_uniform +import sys, os as _os +_WATO_HAND_DIR = _os.path.abspath(_os.path.join(_os.path.dirname(__file__), "../../../../../../wato_hand")) +if _WATO_HAND_DIR not in sys.path: + sys.path.insert(0, _WATO_HAND_DIR) +from wato_hand_cfg import apply_joint_limits as _apply_joint_limits # noqa: E402 + if TYPE_CHECKING: from isaaclab.envs import ManagerBasedEnv @@ -182,3 +188,17 @@ def __call__( # set into the physics simulation self._asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + + +def apply_wato_hand_joint_limits( + env: "ManagerBasedEnv", + env_ids: torch.Tensor, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> None: + """Push expanded JOINT_POS_LIMITS from wato_hand_cfg into PhysX at startup. + + The USD has the original hardware limits baked in (±8.6 deg for MCP_A). + This overwrites them so the RL policy can explore a larger abduction range. + """ + robot: Articulation = env.scene[asset_cfg.name] + _apply_joint_limits(robot) diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py index 012e095..e295673 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py @@ -9,10 +9,12 @@ from typing import TYPE_CHECKING import isaaclab.utils.math as math_utils -from isaaclab.assets import RigidObject +from isaaclab.assets import Articulation, RigidObject from isaaclab.envs import ManagerBasedRLEnv from isaaclab.managers import SceneEntityCfg +_MCP_A_JOINT_NAMES = ("MCP_A_1", "MCP_A_2", "MCP_A_3", "MCP_A_4") + if TYPE_CHECKING: from .commands import InHandReOrientationCommand @@ -94,3 +96,67 @@ def track_orientation_inv_l2( dtheta = math_utils.quat_error_magnitude(asset.data.root_quat_w, goal_quat_w) return 1.0 / (dtheta + rot_eps) + + +def object_ang_vel_toward_goal( + env: ManagerBasedRLEnv, + command_name: str, + object_cfg: SceneEntityCfg = SceneEntityCfg("object"), +) -> torch.Tensor: + """Reward the component of object angular velocity that reduces orientation error. + + Positive when the cube is spinning toward the goal orientation, negative when + spinning away. Gives an immediate per-step gradient for rotation attempts rather + than waiting for the orientation error metric to actually change. + """ + asset: RigidObject = env.scene[object_cfg.name] + command_term: "InHandReOrientationCommand" = env.command_manager.get_term(command_name) + + goal_quat_w = command_term.command[:, 3:7] + curr_quat_w = asset.data.root_quat_w + ang_vel_w = asset.data.root_ang_vel_w # (N, 3) + + # q_err = q_goal * conj(q_curr): quaternion to rotate from current → goal. + # Its imaginary part gives the world-frame axis one should rotate about, scaled + # by sin(θ/2) — large and well-conditioned at the ~2 rad errors seen early in training. + q_err = math_utils.quat_mul(goal_quat_w, math_utils.quat_conjugate(curr_quat_w)) + err_axis_w = q_err[:, 1:] # (N, 3) + + # Clamp to zero: only reward spinning *toward* goal, never penalise spinning away. + # Without the clamp, negative values teach the policy to suppress all rotation. + return torch.sum(ang_vel_w * err_axis_w, dim=-1).clamp(min=0.0) + + +def object_held_bonus( + env: ManagerBasedRLEnv, + command_name: str, + object_cfg: SceneEntityCfg = SceneEntityCfg("object"), + hold_threshold: float = 0.10, +) -> torch.Tensor: + """Per-step bonus (1.0) when object is within hold_threshold of the goal position. + + Provides a dense holding signal that is zero when the cube has drifted away and 1.0 + when it is within reach — much stronger gradient than the continuous L2 penalty. + """ + asset: RigidObject = env.scene[object_cfg.name] + command_term: "InHandReOrientationCommand" = env.command_manager.get_term(command_name) + goal_pos_e = command_term.command[:, :3] + object_pos_e = asset.data.root_pos_w - env.scene.env_origins + dist = torch.norm(goal_pos_e - object_pos_e, p=2, dim=-1) + return (dist < hold_threshold).float() + + +def mcp_a_spread_activity( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + spread_limit: float = 0.15, +) -> torch.Tensor: + """Reward finger abduction motion and deflection (encourages MCP_A spread use).""" + robot: Articulation = env.scene[asset_cfg.name] + spread_ids = [] + for name in _MCP_A_JOINT_NAMES: + spread_ids.extend(robot.find_joints(name)[0]) + + spread_vel = torch.linalg.norm(robot.data.joint_vel[:, spread_ids], dim=1) + spread_pos = torch.mean(torch.abs(robot.data.joint_pos[:, spread_ids]), dim=1) / spread_limit + return spread_vel + 0.5 * spread_pos diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py index f50ffec..b860ae9 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py @@ -5,9 +5,13 @@ """Functions specific to the in-hand dexterous manipulation environments.""" -import torch +from __future__ import annotations + from typing import TYPE_CHECKING +import torch + +from isaaclab.assets import Articulation, RigidObject from isaaclab.envs import ManagerBasedRLEnv from isaaclab.managers import SceneEntityCfg @@ -81,3 +85,43 @@ def object_away_from_robot( dist = torch.norm(robot.data.root_pos_w - object.data.root_pos_w, dim=1) return dist > threshold + + +def orientation_stagnation( + env: ManagerBasedRLEnv, + command_name: str, + orientation_error_threshold: float, + stagnant_steps: int, + object_ang_vel_threshold: float | None = None, + joint_vel_threshold: float | None = None, + object_cfg: SceneEntityCfg = SceneEntityCfg("object"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> torch.Tensor: + """Terminate when orientation error stays above threshold for too many steps. + + Velocity gates are optional. Clamped grasps often keep joint velocities above a + tight threshold due to actuator tracking, so orientation-only detection is the default. + """ + if not hasattr(env, "_orientation_stagnant_counter"): + env._orientation_stagnant_counter = torch.zeros(env.num_envs, device=env.device, dtype=torch.int32) + + counter = env._orientation_stagnant_counter + # episode_length_buf is incremented before termination compute; value 1 means a fresh episode. + counter[env.episode_length_buf == 1] = 0 + + command_term: InHandReOrientationCommand = env.command_manager.get_term(command_name) + is_stagnant = command_term.metrics["orientation_error"] > orientation_error_threshold + + if object_ang_vel_threshold is not None: + object_asset: RigidObject = env.scene[object_cfg.name] + object_ang_vel = torch.linalg.norm(object_asset.data.root_ang_vel_w, dim=1) + is_stagnant = is_stagnant & (object_ang_vel < object_ang_vel_threshold) + + if joint_vel_threshold is not None: + robot: Articulation = env.scene[robot_cfg.name] + joint_vel = torch.linalg.norm(robot.data.joint_vel, dim=1) + is_stagnant = is_stagnant & (joint_vel < joint_vel_threshold) + + counter[:] = torch.where(is_stagnant, counter + 1, torch.zeros_like(counter)) + + return counter >= stagnant_steps diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-12_02-31-31/model_200.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-12_02-31-31/model_200.pt deleted file mode 100644 index d34c2d9..0000000 Binary files a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-12_02-31-31/model_200.pt and /dev/null differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-13_01-12-13/model_700.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-13_01-12-13/model_700.pt new file mode 100644 index 0000000..a0fc9b3 Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-13_01-12-13/model_700.pt differ diff --git a/autonomy/simulation/Humanoid_Wato/wato_hand/splay_demo.py b/autonomy/simulation/Humanoid_Wato/wato_hand/splay_demo.py new file mode 100644 index 0000000..85efde1 --- /dev/null +++ b/autonomy/simulation/Humanoid_Wato/wato_hand/splay_demo.py @@ -0,0 +1,141 @@ +"""Sweep MCP_A abduction joints (4 fingers + thumb) so spread motion is easy to see. + +Run from this directory (omit --headless to watch in the GUI): + + /home/hy/IsaacLab/isaaclab.sh -p splay_demo.py + +Finger splay (MCP_A_1..4): ±0.15 rad (±8.594 deg). +Thumb abduction (MCP_A_thumb): 0 to 2.0 rad (~114.6 deg). +""" + +from __future__ import annotations + +import argparse +import math +import os +import sys + +from isaaclab.app import AppLauncher + +parser = argparse.ArgumentParser(description="Wato hand MCP_A splay demo.") +parser.add_argument("--freq_hz", type=float, default=1.0, help="Splay sweep frequency in Hz.") +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() + +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +import torch + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.scene import InteractiveScene, InteractiveSceneCfg +from isaaclab.utils import configclass + +from wato_hand_cfg import JOINT_POS_LIMITS, apply_joint_limits + +# Same hand asset/physics as in-hand RL (self-collision off, palm-up, etc.). +_HUMANOID_RL_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "HumanoidRL")) +if _HUMANOID_RL_ROOT not in sys.path: + sys.path.insert(0, _HUMANOID_RL_ROOT) +from HumanoidRLPackage.HumanoidRLSetup.modelCfg.wato_hand import INHAND_WATO_HAND_CFG # noqa: E402 + +# Finger splay ±8.594 deg; thumb abduction 0..114.592 deg (each uses its own limits). +SPREAD_JOINTS = ["MCP_A_1", "MCP_A_2", "MCP_A_3", "MCP_A_4", "MCP_A_thumb"] + +# Neutral demo pose: only MCP_A joints move; others pinned at 0. +_MCP_A_THUMB_MID = 0.5 * (JOINT_POS_LIMITS["MCP_A_thumb"][0] + JOINT_POS_LIMITS["MCP_A_thumb"][1]) +_NEUTRAL_JOINT_POS = {name: 0.0 for name in [ + "circumduction", "MCP_A_thumb", "PIP_thumb", "DIP_thumb", + "MCP_A_1", "MCP_A_2", "MCP_A_3", "MCP_A_4", + "MCP_1", "PIP_1", "DIP_1", "MCP_2", "PIP_2", "DIP_2", + "MCP_3", "PIP_3", "DIP_3", "MCP_4", "PIP_4", "DIP_4", +]} +_NEUTRAL_JOINT_POS["MCP_A_thumb"] = _MCP_A_THUMB_MID + + +@configclass +class SplayDemoSceneCfg(InteractiveSceneCfg): + ground = AssetBaseCfg( + prim_path="/World/defaultGroundPlane", + spawn=sim_utils.GroundPlaneCfg(), + ) + light = AssetBaseCfg( + prim_path="/World/Light", + spawn=sim_utils.DomeLightCfg(intensity=2500.0, color=(0.85, 0.85, 0.85)), + ) + robot = INHAND_WATO_HAND_CFG.replace( + prim_path="{ENV_REGEX_NS}/Robot", + init_state=ArticulationCfg.InitialStateCfg( + pos=INHAND_WATO_HAND_CFG.init_state.pos, + rot=INHAND_WATO_HAND_CFG.init_state.rot, + joint_pos=_NEUTRAL_JOINT_POS, + ), + ) + + +def _sweep_angle(sim_time_s: float, freq_hz: float, lo: float, hi: float) -> float: + """Sine sweep between joint-specific limits.""" + mid = 0.5 * (lo + hi) + amp = 0.5 * (hi - lo) + return mid + amp * math.sin(2.0 * math.pi * freq_hz * sim_time_s) + + +def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene, freq_hz: float) -> None: + robot = scene["robot"] + sim_dt = sim.get_physics_dt() + sim_time_s = 0.0 + print_every = int(1.0 / sim_dt) + + spread_ids = [robot.find_joints(name)[0][0] for name in SPREAD_JOINTS] + spread_limits = [JOINT_POS_LIMITS[name] for name in SPREAD_JOINTS] + fixed_ids = [i for i in range(robot.num_joints) if i not in spread_ids] + + # Pin every non-spread joint at 0 rad. + fixed_pos = torch.zeros((robot.num_instances, len(fixed_ids)), device=robot.device) + spread_target = torch.zeros((robot.num_instances, len(spread_ids)), device=robot.device) + + print(f"[INFO] Moving only: {SPREAD_JOINTS}") + print("[INFO] All other joints pinned at 0 rad") + for name, (lo, hi) in zip(SPREAD_JOINTS, spread_limits): + print(f"[INFO] {name}: {math.degrees(lo):.1f} deg to {math.degrees(hi):.1f} deg") + print(f"[INFO] Sweep frequency: {freq_hz:.2f} Hz") + + step = 0 + while simulation_app.is_running(): + for i, (lo, hi) in enumerate(spread_limits): + spread_target[:, i] = _sweep_angle(sim_time_s, freq_hz, lo, hi) + + robot.set_joint_position_target(fixed_pos, joint_ids=fixed_ids) + robot.set_joint_position_target(spread_target, joint_ids=spread_ids) + scene.write_data_to_sim() + sim.step() + scene.update(sim_dt) + + if step % print_every == 0: + finger = spread_target[0, 0].item() + thumb = spread_target[0, -1].item() + print( + f"[splay] t={sim_time_s:5.1f}s MCP_A_1={math.degrees(finger):+.1f} deg" + f" MCP_A_thumb={math.degrees(thumb):+.1f} deg" + ) + + sim_time_s += sim_dt + step += 1 + + +def main() -> None: + sim_cfg = sim_utils.SimulationCfg(dt=1.0 / 120.0, device=args_cli.device) + sim = sim_utils.SimulationContext(sim_cfg) + sim.set_camera_view(eye=(0.35, 0.35, 0.75), target=(0.0, 0.08, 0.54)) + + scene = InteractiveScene(SplayDemoSceneCfg(num_envs=1, env_spacing=2.0)) + sim.reset() + apply_joint_limits(scene["robot"]) + print("[INFO] Splay demo running. MCP_A_1..4 + MCP_A_thumb move; all other joints stay at 0.") + run_simulator(sim, scene, args_cli.freq_hz) + + +if __name__ == "__main__": + main() + simulation_app.close() diff --git a/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py b/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py index 71904e0..a8a16ec 100644 --- a/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py @@ -26,7 +26,7 @@ def _deg(degrees: float) -> float: # --- Joint limits from Physics Inspector (degrees -> rad) -------------------- -_MCP_A_LIMIT = (_deg(-8.594), _deg(8.594)) +_MCP_A_LIMIT = (_deg(-27.0), _deg(27.0)) # expanded to AllegroHand range; self-collision is off _MCP_FLEX_LIMIT = (0.0, _deg(85.944)) _PIP_FLEX_LIMIT = (_deg(-85.944), 0.0) _DIP_FLEX_LIMIT = (0.0, _deg(85.944))