diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py
index c4c4e16..290f56d 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/wato_hand.py
@@ -12,11 +12,13 @@
 if _WATO_HAND_DIR not in sys.path:
     sys.path.insert(0, _WATO_HAND_DIR)
 
-from wato_hand_cfg import WATO_HAND_CFG  # noqa: E402
+from wato_hand_cfg import JOINT_POS_LIMITS, WATO_HAND_CFG  # noqa: E402
 
 # Partial grasp pose for in-hand cube reorientation (~50% of flex range).
-# Finger splay at ~50% of ±8.6 deg limit (0.075 rad) so spread is visible at reset.
-_INHAND_SPREAD_RAD = 0.075
+# MCP_A splay limit is ±27 deg (expanded in JOINT_POS_LIMITS); keep default at mid-spread.
+_MCP_A_SPREAD_MAX = JOINT_POS_LIMITS["MCP_A_1"][1]
+_INHAND_SPREAD_RAD = 0.5 * _MCP_A_SPREAD_MAX
+INHAND_SPREAD_RAD = _INHAND_SPREAD_RAD
 
 _INHAND_GRASP_JOINT_POS = {
     "circumduction": 0.0,
@@ -47,13 +49,13 @@ def _quat_y_deg(deg: float) -> tuple[float, float, float, float]:
     return (math.cos(half), 0.0, math.sin(half), 0.0)
 
 
-# Wato URDF at identity: fingers extend +Y. Palm-up tuned at +160 deg Y.
+# In-hand task needs palm facing up (+Z). URDF identity has fingers along +Y, so rotate
+# about world Y until the palm points upward. ±160 deg are near-flips; +160 was tuned
+# with INHAND_CUBE_POS so the cube lands in the palm at reset.
 _INHAND_ROT_Y_POS_160 = _quat_y_deg(160.0)
-_INHAND_ROT_Y_NEG_160 = _quat_y_deg(-160.0)
 _INHAND_PALM_UP_ROT = _INHAND_ROT_Y_POS_160
 
-# DexCube spawn in env frame, paired with _INHAND_PALM_UP_ROT (palm center ~ y=0.08, z=0.54).
-INHAND_CUBE_POS = (0.0, 0.08, 0.54)
+INHAND_CUBE_POS = (-0.01, 0.09, 0.5)
 
 INHAND_WATO_HAND_CFG = WATO_HAND_CFG.replace(
     spawn=WATO_HAND_CFG.spawn.replace(
@@ -68,7 +70,7 @@ def _quat_y_deg(deg: float) -> tuple[float, float, float, float]:
             max_contact_impulse=1e32,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=True,
+            enabled_self_collisions=False,
             solver_position_iteration_count=8,
             solver_velocity_iteration_count=0,
             sleep_threshold=0.005,
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/TRAINING_LOG.md b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/TRAINING_LOG.md
new file mode 100644
index 0000000..480304a
--- /dev/null
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/TRAINING_LOG.md
@@ -0,0 +1,97 @@
+# In-Hand Cube Reorientation — Training Log
+
+## Task
+20-DOF Wato hand, in-hand cube reorientation (Isaac-Repose-Cube-WatoHand-v0).
+Palm-up orientation, cube spawned at `(-0.01, 0.09, 0.5)` in the palm.
+
+---
+
+## What Was Tried
+
+### 1. MCP_A abduction range expansion (WORKS — kept)
+**Problem:** USD bakes ±8.6° abduction limits; fingers could barely splay, making reorientation geometrically impossible.  
+**Fix:** Expanded `_MCP_A_LIMIT` in `wato_hand_cfg.py` from ±8.6° → ±27° (AllegroHand level). Added `expand_abduction_limits` startup event in `wato_env_cfg.py` that calls `apply_wato_hand_joint_limits()` to push new limits into PhysX at runtime, overriding the USD-baked values.  
+**Result:** Fingers now have meaningful splay range. Self-collision was already disabled so no issues.
+
+---
+
+### 2. Reward shaping — object holding (WORKS — kept)
+**Problem:** Cube dropped in ~30 steps (1 sec); no gradient for the policy to learn to grip. Episode too short for any useful signal.  
+**Fixes applied (cumulative):**
+- `track_pos_l2 weight=-3.0` (continuous penalty for cube drifting from goal position)
+- `object_away_penalty weight=-5.0` (terminal penalty on drop)
+- `object_held_bonus weight=2.0, threshold=0.10m` — dense binary +1/step when cube is within 10 cm of goal. **This was the key fix.**
+
+**Result:** Episode length doubled from ~60 → 105-119 steps. `object_out_of_reach` rate halved from 1.5-2.1 → 0.6-0.8 per batch.
+
+---
+
+### 3. Stagnation termination fix (WORKS — kept)
+**Problem:** `orientation_error_threshold=0.12` in stagnation but `orientation_success_threshold=0.4`. Episodes achieving error 0.12-0.4 rad (better than needed!) were simultaneously getting success credit AND counting toward stagnation — ending good episodes at 90 steps.  
+**Fix:** Raised stagnation threshold to 0.5 rad (above success threshold) and extended `stagnant_steps` 90 → 150.  
+**Result:** Longer episodes, less premature termination.
+
+---
+
+### 4. PPO config tuning (PARTIAL — kept)
+- `entropy_coef: 0.002 → 0.0001` — stopped the optimizer from rewarding randomness. `action_noise_std` stopped climbing from 1.02 → 1.05.
+- `num_steps_per_env: 24 → 48` — each rollout now covers ~44% of a typical episode; better return estimates for the critic.
+- `success_bonus weight: 250 → 50` — original weight caused VF loss to spike to 74-88 as policy occasionally scored success. Reducing to 50 kept VF loss in 26-55 range.
+
+---
+
+### 5. Angular velocity toward goal reward (FAILED — disabled)
+**Idea:** Reward the component of object angular velocity aligned with the goal direction. `object_ang_vel_toward_goal` with weight=0.1.  
+**Problem:** Reward was negative (-0.003) because cube's random spin was anti-aligned with the goal on average. This taught the policy to *suppress* all rotation (hold rigidly) to avoid the penalty.  
+**Fix attempt:** Clamped to 0, increased weight to 0.5. Value became +0.05 but this was purely mechanical (5x weight + clamping removed negatives), not actual policy improvement. Orientation error unchanged.  
+**Outcome:** Disabled. The cube barely moves and there is insufficient angular velocity signal to reward.
+
+---
+
+### 6. EMA alpha reduction 0.95 → 0.5 (FAILED — reverted)
+**Idea:** alpha=0.95 gives ~1.5 Hz effective finger bandwidth. Reducing to 0.5 gives ~15 Hz, potentially allowing faster rotation torques.  
+**Result:** `action_rate_l2` did NOT increase (policy didn't use the extra bandwidth). `orientation_error` unchanged. `object_out_of_reach` slightly worse. Reverted to 0.95.
+
+---
+
+### 7. Z-axis curriculum (WORKS — current)
+**Problem:** Full 3D random orientation goal is too hard to explore from scratch. Policy never discovered how to rotate the cube despite holding it.  
+**Fix:** `rotation_axes = ["z"]` in `wato_env_cfg.py` — goals are now only rotations about the palm normal (z-axis). The cube just needs to spin in the plane of the palm.  
+**Result:** Immediate improvement:
+- `track_orientation_inv_l2`: 0.24 → 0.88-1.05 (near 4x)
+- `orientation_error`: 2.2-2.4 → 1.78-2.07 (trending down)
+- `consecutive_success`: 0.03 → 0.20-0.27
+- `time_out` episodes appearing for first time
+
+**Note on oscillation:** Orientation error oscillates (~1.8-2.1) rather than trending smoothly because `update_goal_on_success=True` resamples a new random z-goal when error < 0.4, immediately resetting error to ~1.5. The mean oscillates around `0.25 × ~0.2 + 0.75 × ~2.3 ≈ 1.85`.
+
+---
+
+## Current State (iter ~600, ~7.4M steps)
+
+| Metric | Start | Current |
+|---|---|---|
+| Episode length | ~30 steps | ~120-137 steps |
+| `object_out_of_reach` rate | 5-7 | 0.5-0.9 |
+| `orientation_error` | 2.2-2.4 rad (random) | 1.82-1.95 rad |
+| `consecutive_success` | ~0 | 0.20-0.27 |
+| `track_orientation_inv_l2` | ~0.25 | ~0.75-1.05 |
+
+**Bottleneck:** Plateaued at iter ~400. Policy learns z-rotation intermittently (occasional good episodes) but hasn't converged to a consistent strategy. Root cause is sample efficiency: 256 envs × 48 steps = 12,288 samples/gradient step. Standard IsaacLab in-hand setups use 8,192+ envs = 393k samples/step.
+
+---
+
+## Next Steps
+
+1. **Make USD instanceable** — convert `wato_hand/urdf/hand_urdf.urdf` to instanceable format using:
+   ```bash
+   /home/hy/IsaacLab/isaaclab.sh -p /home/hy/IsaacLab/scripts/tools/convert_instanceable.py \
+     /home/hy/Desktop/humanoid/autonomy/simulation/Humanoid_Wato/wato_hand/urdf/ \
+     /home/hy/Desktop/humanoid/autonomy/simulation/Humanoid_Wato/wato_hand/urdf_instanceable/ \
+     --make-instanceable --conversion-type urdf
+   ```
+   Then update `_HAND_USD_PATH` in `wato_hand_cfg.py` to point to the new USD and change `replicate_physics = False` → `True` in `wato_env_cfg.py`. This allows scaling beyond 256 envs without OOM.
+2. **Increase num_envs** — after instanceable conversion, try `--num_envs 1024+`. Standard IsaacLab in-hand setups use 8192 envs; even 1024 gives 4x better gradient quality.
+3. **Longer training** — z-axis skill may require 50k+ iterations (~15 hours overnight) to converge at 256 envs.
+4. **Curriculum expansion** — once `orientation_error` consistently < 1.5 and `consecutive_success` > 0.4 on z-axis, expand back to `rotation_axes = ["x", "y"]` for full 3D reorientation.
+5. **Re-enable ang_vel reward** (with clamp, weight ~0.3) once holding is fully stable — provides rotation direction signal once the policy has enough bandwidth to act on it.
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py
index ec0d90b..e69de29 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/__init__.py
@@ -1,14 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-"""In-hand object reorientation environment.
-
-These environments are based on the `dexterous cube manipulation`_ environments
-provided in IsaacGymEnvs repository from NVIDIA. However, they contain certain
-modifications and additional features.
-
-.. _dexterous cube manipulation: https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/isaacgymenvs/tasks/allegro_hand.py
-
-"""
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/__init__.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/__init__.py
deleted file mode 100644
index e63efd6..0000000
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/__init__.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-import gymnasium as gym
-
-from . import agents
-
-##
-# Register Gym environments.
-##
-
-##
-# Full kinematic state observations.
-##
-
-gym.register(
-    id="Isaac-Repose-Cube-Allegro-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv",
-    disable_env_checker=True,
-    kwargs={
-        "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeEnvCfg",
-        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubePPORunnerCfg",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
-        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-    },
-)
-
-gym.register(
-    id="Isaac-Repose-Cube-Allegro-Play-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv",
-    disable_env_checker=True,
-    kwargs={
-        "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeEnvCfg_PLAY",
-        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubePPORunnerCfg",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
-        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-    },
-)
-
-##
-# Kinematic state observations without velocity information.
-##
-
-gym.register(
-    id="Isaac-Repose-Cube-Allegro-NoVelObs-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv",
-    disable_env_checker=True,
-    kwargs={
-        "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeNoVelObsEnvCfg",
-        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubeNoVelObsPPORunnerCfg",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
-        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-    },
-)
-
-gym.register(
-    id="Isaac-Repose-Cube-Allegro-NoVelObs-Play-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv",
-    disable_env_checker=True,
-    kwargs={
-        "env_cfg_entry_point": f"{__name__}.allegro_env_cfg:AllegroCubeNoVelObsEnvCfg_PLAY",
-        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AllegroCubeNoVelObsPPORunnerCfg",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
-        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-    },
-)
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/__init__.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/__init__.py
deleted file mode 100644
index e75ca2b..0000000
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rl_games_ppo_cfg.yaml b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rl_games_ppo_cfg.yaml
deleted file mode 100644
index 24b4540..0000000
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rl_games_ppo_cfg.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-params:
-  seed: 42
-
-  # environment wrapper clipping
-  env:
-    clip_observations: 5.0
-    clip_actions: 1.0
-
-  algo:
-    name: a2c_continuous
-
-  model:
-    name: continuous_a2c_logstd
-
-  network:
-    name: actor_critic
-    separate: False
-
-    space:
-      continuous:
-        mu_activation: None
-        sigma_activation: None
-        mu_init:
-          name: default
-        sigma_init:
-          name: const_initializer
-          val: 0
-        fixed_sigma: True
-
-    mlp:
-      units: [512, 256, 128]
-      activation: elu
-      d2rl: False
-
-      initializer:
-        name: default
-      regularizer:
-        name: None
-
-  load_checkpoint: False
-  load_path: ''
-
-  config:
-    name: allegro_cube
-    env_name: rlgpu
-    device: 'cuda:0'
-    device_name: 'cuda:0'
-    multi_gpu: False
-    ppo: True
-    mixed_precision: False
-    normalize_input: True
-    normalize_value: True
-    value_bootstrap: True
-    num_actors: -1  # configured from the script (based on num_envs)
-    reward_shaper:
-      scale_value: 0.1
-    normalize_advantage: True
-    gamma: 0.998
-    tau: 0.95
-    learning_rate: 5e-4
-    lr_schedule: adaptive
-    schedule_type: standard
-    kl_threshold: 0.016
-    score_to_win: 100000
-    max_epochs: 5000
-    save_best_after: 500
-    save_frequency: 200
-    print_stats: True
-    grad_norm: 1.0
-    entropy_coef: 0.002
-    truncate_grads: True
-    e_clip: 0.2
-    horizon_length: 24
-    minibatch_size: 16384  # 32768
-    mini_epochs: 5
-    critic_coef: 4
-    clip_value: True
-    seq_length: 4
-    bounds_loss_coef: 0.0005
-
-    player:
-      #render: True
-      deterministic: True
-      games_num: 100000
-      print_stats: True
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py
deleted file mode 100644
index 4a25aea..0000000
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-from isaaclab.utils import configclass
-
-from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
-
-
-@configclass
-class AllegroCubePPORunnerCfg(RslRlOnPolicyRunnerCfg):
-    num_steps_per_env = 24
-    max_iterations = 5000
-    save_interval = 50
-    experiment_name = "allegro_cube"
-    empirical_normalization = True
-    policy = RslRlPpoActorCriticCfg(
-        init_noise_std=1.0,
-        actor_hidden_dims=[512, 256, 128],
-        critic_hidden_dims=[512, 256, 128],
-        activation="elu",
-    )
-    algorithm = RslRlPpoAlgorithmCfg(
-        value_loss_coef=1.0,
-        use_clipped_value_loss=True,
-        clip_param=0.2,
-        entropy_coef=0.002,
-        num_learning_epochs=5,
-        num_mini_batches=4,
-        learning_rate=0.001,
-        schedule="adaptive",
-        gamma=0.998,
-        lam=0.95,
-        desired_kl=0.01,
-        max_grad_norm=1.0,
-    )
-
-
-@configclass
-class AllegroCubeNoVelObsPPORunnerCfg(AllegroCubePPORunnerCfg):
-    experiment_name = "allegro_cube_no_vel_obs"
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml
deleted file mode 100644
index a776337..0000000
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-seed: 42
-
-
-# Models are instantiated using skrl's model instantiator utility
-# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
-models:
-  separate: False
-  policy:  # see gaussian_model parameters
-    class: GaussianMixin
-    clip_actions: False
-    clip_log_std: True
-    min_log_std: -20.0
-    max_log_std: 2.0
-    initial_log_std: 0.0
-    network:
-      - name: net
-        input: STATES
-        layers: [512, 256, 128]
-        activations: elu
-    output: ACTIONS
-  value:  # see deterministic_model parameters
-    class: DeterministicMixin
-    clip_actions: False
-    network:
-      - name: net
-        input: STATES
-        layers: [512, 256, 128]
-        activations: elu
-    output: ONE
-
-
-# Rollout memory
-# https://skrl.readthedocs.io/en/latest/api/memories/random.html
-memory:
-  class: RandomMemory
-  memory_size: -1  # automatically determined (same as agent:rollouts)
-
-
-# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
-# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
-agent:
-  class: PPO
-  rollouts: 24
-  learning_epochs: 5
-  mini_batches: 12
-  discount_factor: 0.998
-  lambda: 0.95
-  learning_rate: 5.0e-04
-  learning_rate_scheduler: KLAdaptiveLR
-  learning_rate_scheduler_kwargs:
-    kl_threshold: 0.016
-  state_preprocessor: RunningStandardScaler
-  state_preprocessor_kwargs: null
-  value_preprocessor: RunningStandardScaler
-  value_preprocessor_kwargs: null
-  random_timesteps: 0
-  learning_starts: 0
-  grad_norm_clip: 1.0
-  ratio_clip: 0.2
-  value_clip: 0.2
-  clip_predicted_values: True
-  entropy_loss_scale: 0.002
-  value_loss_scale: 2.0
-  kl_threshold: 0.0
-  rewards_shaper_scale: 0.1
-  time_limit_bootstrap: False
-  # logging and checkpoint
-  experiment:
-    directory: "allegro_cube"
-    experiment_name: ""
-    write_interval: auto
-    checkpoint_interval: auto
-
-
-# Sequential trainer
-# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
-trainer:
-  class: SequentialTrainer
-  timesteps: 120000
-  environment_info: log
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/allegro_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/allegro_env_cfg.py
deleted file mode 100644
index d55e595..0000000
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/allegro_hand/allegro_env_cfg.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-from isaaclab.utils import configclass
-
-import isaaclab_tasks.manager_based.manipulation.inhand.inhand_env_cfg as inhand_env_cfg
-
-##
-# Pre-defined configs
-##
-from isaaclab_assets import ALLEGRO_HAND_CFG  # isort: skip
-
-
-@configclass
-class AllegroCubeEnvCfg(inhand_env_cfg.InHandObjectEnvCfg):
-    def __post_init__(self):
-        # post init of parent
-        super().__post_init__()
-
-        # switch robot to allegro hand
-        self.scene.robot = ALLEGRO_HAND_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
-
-
-@configclass
-class AllegroCubeEnvCfg_PLAY(AllegroCubeEnvCfg):
-    def __post_init__(self):
-        # post init of parent
-        super().__post_init__()
-        # make a smaller scene for play
-        self.scene.num_envs = 50
-        # disable randomization for play
-        self.observations.policy.enable_corruption = False
-        # remove termination due to timeouts
-        self.terminations.time_out = None
-
-
-##
-# Environment configuration with no velocity observations.
-##
-
-
-@configclass
-class AllegroCubeNoVelObsEnvCfg(AllegroCubeEnvCfg):
-    def __post_init__(self):
-        # post init of parent
-        super().__post_init__()
-
-        # switch observation group to no velocity group
-        self.observations.policy = inhand_env_cfg.ObservationsCfg.NoVelocityKinematicObsGroupCfg()
-
-
-@configclass
-class AllegroCubeNoVelObsEnvCfg_PLAY(AllegroCubeNoVelObsEnvCfg):
-    def __post_init__(self):
-        # post init of parent
-        super().__post_init__()
-        # make a smaller scene for play
-        self.scene.num_envs = 50
-        # disable randomization for play
-        self.observations.policy.enable_corruption = False
-        # remove termination due to timeouts
-        self.terminations.time_out = None
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py
index 8b2cfa1..48ab3fb 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/agents/rsl_rl_ppo_cfg.py
@@ -4,7 +4,7 @@
 
 @configclass
 class WatoHandCubePPORunnerCfg(RslRlOnPolicyRunnerCfg):
-    num_steps_per_env = 24
+    num_steps_per_env = 48
     max_iterations = 5000
     save_interval = 50
     experiment_name = "wato_hand_cube"
@@ -19,7 +19,7 @@ class WatoHandCubePPORunnerCfg(RslRlOnPolicyRunnerCfg):
         value_loss_coef=1.0,
         use_clipped_value_loss=True,
         clip_param=0.2,
-        entropy_coef=0.002,
+        entropy_coef=0.0001,
         num_learning_epochs=5,
         num_mini_batches=4,
         learning_rate=0.001,
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py
index 5c58136..0d25448 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/config/wato_hand/wato_env_cfg.py
@@ -1,9 +1,12 @@
+from isaaclab.managers import EventTermCfg as EventTerm, RewardTermCfg as RewTerm, SceneEntityCfg
 from isaaclab.utils import configclass
 
 import HumanoidRLPackage.HumanoidRLSetup.tasks.inhand.inhand_env_cfg as inhand_env_cfg
+import HumanoidRLPackage.HumanoidRLSetup.tasks.inhand.mdp as inhand_mdp
 from HumanoidRLPackage.HumanoidRLSetup.modelCfg.wato_hand import (
     INHAND_WATO_HAND_CFG,
     INHAND_CUBE_POS,
+    INHAND_SPREAD_RAD,
 )
 
 
@@ -20,8 +23,13 @@ def __post_init__(self):
         self.scene.num_envs = 256
 
         self.scene.robot = INHAND_WATO_HAND_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        # Push expanded MCP_A limits (±27 deg) into PhysX, overriding the ±8.6 deg baked in the USD.
+        self.events.expand_abduction_limits = EventTerm(
+            func=inhand_mdp.apply_wato_hand_joint_limits,
+            mode="startup",
+        )
         # Paired with _INHAND_PALM_UP_ROT in modelCfg/wato_hand.py.
-        self.scene.object.spawn.scale = (0.6, 0.6, 0.6)
+        self.scene.object.spawn.scale = (0.8, 0.8, 0.8)
         self.scene.object.init_state.pos = INHAND_CUBE_POS
         self.scene.object.init_state.rot = (1.0, 0.0, 0.0, 0.0)
 
@@ -29,10 +37,8 @@ def __post_init__(self):
         # shape mismatch in EMAJointPositionToLimitsAction.reset() on this Isaac Lab version.
         # Replace ".*" position_range: overlapping patterns duplicate joint ids (20 -> 24) and crash reset.
         _grasp_scale = [0.2, 0.2]
-        # Compensates for use_default_offset (+0.075 rad): [1.5, 0.5] maps
-        # soft limits [-0.15, +0.15] to sample range [-0.15, +0.15] after offset,
-        # giving uniform coverage of the full abduction range at reset.
-        _splay_scale = [1.5, 0.5]
+        # Full abduction range at reset (±27 deg limit with use_default_offset on _INHAND_SPREAD_RAD default).
+        _splay_scale = [1.0, 1.0]
         self.events.reset_robot_joints.params["position_range"] = {
             "circumduction": _grasp_scale,
             "MCP_A_thumb": _grasp_scale,
@@ -56,6 +62,22 @@ def __post_init__(self):
             "DIP_4": _grasp_scale,
         }
 
+        # Curriculum: start with rotation about z-axis (palm normal) only.
+        # Full 3D random orientation is too hard to explore from scratch; z-axis
+        # rotation (spinning in the palm plane) is the most natural motion for this hand.
+        # Once orientation_error shows a downward trend, expand back to ["x", "y"].
+        self.commands.object_pose.rotation_axes = ["z"]
+
+        # Small bonus for MCP_A velocity + spread deflection.
+        self.rewards.spread_activity = RewTerm(
+            func=inhand_mdp.mcp_a_spread_activity,
+            weight=0.03,
+            params={
+                "asset_cfg": SceneEntityCfg("robot"),
+                "spread_limit": INHAND_SPREAD_RAD * 2.0,
+            },
+        )
+
 
 @configclass
 class WatoHandCubeEnvCfg_PLAY(WatoHandCubeEnvCfg):
@@ -63,9 +85,9 @@ def __post_init__(self):
         super().__post_init__()
         self.scene.num_envs = 16
         self.observations.policy.enable_corruption = False
-        self.terminations.time_out = None
-        self.commands.object_pose.debug_vis = True
-        # Goal-orientation marker offset (green DexCube) — not the physical object pose.
+        # Keep time_out so play episodes reset instead of clamping forever.
+        self.episode_length_s = 10.0
+        # Nudge goal marker aside so it does not cover the physical cube.
         self.commands.object_pose.marker_pos_offset = (-0.10, 0.0, 0.12)
 
 
@@ -82,7 +104,7 @@ def __post_init__(self):
         super().__post_init__()
         self.scene.num_envs = 16
         self.observations.policy.enable_corruption = False
-        self.terminations.time_out = None
-        self.commands.object_pose.debug_vis = True
-        # Goal-orientation marker offset (green DexCube) — not the physical object pose.
+        # Keep time_out so play episodes reset instead of clamping forever.
+        self.episode_length_s = 10.0
+        # Nudge goal marker aside so it does not cover the physical cube.
         self.commands.object_pose.marker_pos_offset = (-0.10, 0.0, 0.12)
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py
index ca99801..defe3ae 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/inhand_env_cfg.py
@@ -1,8 +1,3 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
 from __future__ import annotations
 
 from dataclasses import MISSING
@@ -23,21 +18,14 @@
 from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
 from isaaclab.utils.noise import AdditiveGaussianNoiseCfg as Gnoise
 
-import isaaclab_tasks.manager_based.manipulation.inhand.mdp as mdp
+import HumanoidRLPackage.HumanoidRLSetup.tasks.inhand.mdp as mdp
 
-##
-# Scene definition
-##
 
 
 @configclass
 class InHandObjectSceneCfg(InteractiveSceneCfg):
-    """Configuration for a scene with an object and a dexterous hand."""
-
-    # robots
     robot: ArticulationCfg = MISSING
 
-    # objects
     object: RigidObjectCfg = RigidObjectCfg(
         prim_path="{ENV_REGEX_NS}/object",
         spawn=sim_utils.UsdFileCfg(
@@ -57,7 +45,6 @@ class InHandObjectSceneCfg(InteractiveSceneCfg):
         init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, -0.19, 0.56), rot=(1.0, 0.0, 0.0, 0.0)),
     )
 
-    # lights
     light = AssetBaseCfg(
         prim_path="/World/light",
         spawn=sim_utils.DistantLightCfg(color=(0.95, 0.95, 0.95), intensity=1000.0),
@@ -69,24 +56,12 @@ class InHandObjectSceneCfg(InteractiveSceneCfg):
     )
 
 
-##
-# MDP settings
-##
-
 
 @configclass
 class CommandsCfg:
     """Command specifications for the MDP."""
 
-    object_pose = mdp.InHandReOrientationCommandCfg(
-        asset_name="object",
-        init_pos_offset=(0.0, 0.0, -0.04),
-        update_goal_on_success=True,
-        orientation_success_threshold=0.1,
-        make_quat_unique=False,
-        marker_pos_offset=(-0.2, -0.06, 0.08),
-        debug_vis=True,
-    )
+    object_pose = mdp.InHandReOrientationCommandCfg()
 
 
 @configclass
@@ -179,7 +154,7 @@ class EventCfg:
         func=mdp.randomize_rigid_body_material,
         mode="startup",
         params={
-            "asset_cfg": SceneEntityCfg("robot", body_names=".*"),
+            "asset_cfg": SceneEntityCfg("robot"),
             "static_friction_range": (0.7, 1.3),
             "dynamic_friction_range": (0.7, 1.3),
             "restitution_range": (0.0, 0.0),
@@ -190,7 +165,7 @@ class EventCfg:
         func=mdp.randomize_rigid_body_mass,
         mode="startup",
         params={
-            "asset_cfg": SceneEntityCfg("robot", body_names=".*"),
+            "asset_cfg": SceneEntityCfg("robot"),
             "mass_distribution_params": (0.95, 1.05),
             "operation": "scale",
         },
@@ -200,7 +175,7 @@ class EventCfg:
         mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
-            "stiffness_distribution_params": (0.3, 3.0),  # default: 3.0
+            "stiffness_distribution_params": (0.75, 1.5),  # default: 3.0
             "damping_distribution_params": (0.75, 1.5),  # default: 0.1
             "operation": "scale",
             "distribution": "log_uniform",
@@ -212,7 +187,7 @@ class EventCfg:
         func=mdp.randomize_rigid_body_material,
         mode="startup",
         params={
-            "asset_cfg": SceneEntityCfg("object", body_names=".*"),
+            "asset_cfg": SceneEntityCfg("object"),
             "static_friction_range": (0.7, 1.3),
             "dynamic_friction_range": (0.7, 1.3),
             "restitution_range": (0.0, 0.0),
@@ -236,7 +211,7 @@ class EventCfg:
         params={
             "pose_range": {"x": [-0.01, 0.01], "y": [-0.01, 0.01], "z": [-0.01, 0.01]},
             "velocity_range": {},
-            "asset_cfg": SceneEntityCfg("object", body_names=".*"),
+            "asset_cfg": SceneEntityCfg("object"),
         },
     )
     reset_robot_joints = EventTerm(
@@ -256,19 +231,19 @@ class RewardsCfg:
     """Reward terms for the MDP."""
 
     # -- task
-    # track_pos_l2 = RewTerm(
-    #     func=mdp.track_pos_l2,
-    #     weight=-10.0,
-    #     params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"},
-    # )
+    track_pos_l2 = RewTerm(
+        func=mdp.track_pos_l2,
+        weight=-3.0,
+        params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"},
+    )
     track_orientation_inv_l2 = RewTerm(
         func=mdp.track_orientation_inv_l2,
-        weight=1.0,
+        weight=5.0,
         params={"object_cfg": SceneEntityCfg("object"), "rot_eps": 0.1, "command_name": "object_pose"},
     )
     success_bonus = RewTerm(
         func=mdp.success_bonus,
-        weight=250.0,
+        weight=50.0,
         params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"},
     )
 
@@ -277,13 +252,29 @@ class RewardsCfg:
     action_l2 = RewTerm(func=mdp.action_l2, weight=-0.0001)
     action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
 
-    # -- optional penalties (these are disabled by default)
-    # object_away_penalty = RewTerm(
-    #     func=mdp.is_terminated_term,
-    #     weight=-0.0,
-    #     params={"term_keys": "object_out_of_reach"},
+    # Dense per-step bonus: +1 per step the cube stays within 0.10 m of the goal position.
+    # At weight=2.0 this is worth ~+120/episode when held vs 0 when dropped, giving a far
+    # stronger holding gradient than the continuous L2 penalty alone.
+    object_held_bonus = RewTerm(
+        func=mdp.object_held_bonus,
+        weight=2.0,
+        params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose", "hold_threshold": 0.10},
+    )
+
+    # Rotation reward disabled — re-enable once alpha=0.5 is verified to allow cube rotation.
+    # object_ang_vel_toward_goal = RewTerm(
+    #     func=mdp.object_ang_vel_toward_goal,
+    #     weight=0.5,
+    #     params={"object_cfg": SceneEntityCfg("object"), "command_name": "object_pose"},
     # )
 
+    # Penalty for dropping the object — critical for early training.
+    object_away_penalty = RewTerm(
+        func=mdp.is_terminated_term,
+        weight=-5.0,
+        params={"term_keys": "object_out_of_reach"},
+    )
+
 
 @configclass
 class TerminationsCfg:
@@ -297,23 +288,23 @@ class TerminationsCfg:
 
     object_out_of_reach = DoneTerm(func=mdp.object_away_from_robot, params={"threshold": 0.3})
 
-    # object_out_of_reach = DoneTerm(
-    #     func=mdp.object_away_from_goal, params={"threshold": 0.24, "command_name": "object_pose"}
-    # )
-
-
-##
-# Environment configuration
-##
+    # End episode when goal orientation is not reached for too long.
+    # Threshold kept above orientation_success_threshold (0.4) so episodes that have
+    # achieved loose success (error < 0.4) don't simultaneously count as stagnant.
+    orientation_stagnation = DoneTerm(
+        func=mdp.orientation_stagnation,
+        params={
+            "command_name": "object_pose",
+            "orientation_error_threshold": 0.5,
+            "stagnant_steps": 150,
+        },
+    )
 
 
 @configclass
 class InHandObjectEnvCfg(ManagerBasedRLEnvCfg):
-    """Configuration for the in hand reorientation environment."""
-
-    # Scene settings
     scene: InHandObjectSceneCfg = InHandObjectSceneCfg(num_envs=8192, env_spacing=0.6)
-    # Simulation settings
+
     sim: SimulationCfg = SimulationCfg(
         physics_material=RigidBodyMaterialCfg(
             static_friction=1.0,
@@ -325,11 +316,11 @@ class InHandObjectEnvCfg(ManagerBasedRLEnvCfg):
             gpu_max_rigid_patch_count=2**23,
         ),
     )
-    # Basic settings
+
     observations: ObservationsCfg = ObservationsCfg()
     actions: ActionsCfg = ActionsCfg()
     commands: CommandsCfg = CommandsCfg()
-    # MDP settings
+
     rewards: RewardsCfg = RewardsCfg()
     terminations: TerminationsCfg = TerminationsCfg()
     events: EventCfg = EventCfg()
@@ -344,3 +335,6 @@ def __post_init__(self):
         self.sim.render_interval = self.decimation
         # change viewer settings
         self.viewer.eye = (2.0, 2.0, 2.0)
+
+# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/train.py --task=Isaac-Repose-Cube-WatoHand-v0 --headless
+# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/play.py --task=Isaac-Repose-Cube-WatoHand-Play-v0 --num_envs=1
\ No newline at end of file
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py
index 0969448..720b72e 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/commands_cfg.py
@@ -3,8 +3,6 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-from dataclasses import MISSING
-
 import isaaclab.sim as sim_utils
 from isaaclab.managers import CommandTermCfg
 from isaaclab.markers import VisualizationMarkersCfg
@@ -24,36 +22,32 @@ class InHandReOrientationCommandCfg(CommandTermCfg):
     class_type: type = InHandReOrientationCommand
     resampling_time_range: tuple[float, float] = (1e6, 1e6)  # no resampling based on time
 
-    asset_name: str = MISSING
-    """Name of the asset in the environment for which the commands are generated."""
+    asset_name: str = "object"
+    """Scene asset that receives orientation goals (the manipuland rigid body)."""
 
-    init_pos_offset: tuple[float, float, float] = (0.0, 0.0, 0.0)
-    """Position offset of the asset from its default position.
+    init_pos_offset: tuple[float, float, float] = (0.0, 0.0, -0.04)
+    """Offset added to the object's default spawn position to form the goal position command.
 
-    This is used to account for the offset typically present in the object's default position
-    so that the object is spawned at a height above the robot's palm. When the position command
-    is generated, the object's default position is used as the reference and the offset specified
-    is added to it to get the desired position of the object.
+    Does not move the physical cube at reset — only shifts the position target used by
+    ``track_pos_l2`` and the goal-marker anchor (before ``marker_pos_offset``).
     """
 
-    make_quat_unique: bool = MISSING
-    """Whether to make the quaternion unique or not.
+    make_quat_unique: bool = False
+    """Whether sampled goal quaternions are forced to have a positive real part."""
 
-    If True, the quaternion is made unique by ensuring the real part is positive.
-    """
+    orientation_success_threshold: float = 0.4
+    """Orientation error (rad) below which the goal counts as reached."""
 
-    orientation_success_threshold: float = MISSING
-    """Threshold for the orientation error to consider the goal orientation to be reached."""
+    update_goal_on_success: bool = True
+    """Resample goal orientation when the current goal is reached."""
 
-    update_goal_on_success: bool = MISSING
-    """Whether to update the goal orientation when the goal orientation is reached."""
+    rotation_axes: list[str] = ["x", "y"]
+    """World axes used when sampling random goal orientations."""
 
-    marker_pos_offset: tuple[float, float, float] = (0.0, 0.0, 0.0)
-    """Position offset of the marker from the object's desired position.
+    marker_pos_offset: tuple[float, float, float] = (-0.2, -0.06, 0.08)
+    """Extra offset for the goal-orientation debug marker only (not the physical cube)."""
 
-    This is useful to position the marker at a height above the object's desired position.
-    Otherwise, the marker may occlude the object in the visualization.
-    """
+    debug_vis: bool = True
 
     goal_pose_visualizer_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
         prim_path="/Visuals/Command/goal_marker",
@@ -64,4 +58,4 @@ class InHandReOrientationCommandCfg(CommandTermCfg):
             ),
         },
     )
-    """The configuration for the goal pose visualization marker. Defaults to a DexCube marker."""
+    """Goal-orientation debug marker (DexCube visual only)."""
\ No newline at end of file
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py
index 72cad39..f086605 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/commands/orientation_command.py
@@ -104,14 +104,15 @@ def _update_metrics(self):
         self.metrics["consecutive_success"] += successes.float()
 
     def _resample_command(self, env_ids: Sequence[int]):
-        # sample new orientation targets
-        rand_floats = 2.0 * torch.rand((len(env_ids), 2), device=self.device) - 1.0
-        # rotate randomly about x-axis and then y-axis
-        quat = math_utils.quat_mul(
-            math_utils.quat_from_angle_axis(rand_floats[:, 0] * torch.pi, self._X_UNIT_VEC[env_ids]),
-            math_utils.quat_from_angle_axis(rand_floats[:, 1] * torch.pi, self._Y_UNIT_VEC[env_ids]),
-        )
-        # make sure the quaternion real-part is always positive
+        _axis_map = {"x": self._X_UNIT_VEC, "y": self._Y_UNIT_VEC, "z": self._Z_UNIT_VEC}
+        axes = self.cfg.rotation_axes
+        rand_floats = 2.0 * torch.rand((len(env_ids), len(axes)), device=self.device) - 1.0
+        quat = math_utils.quat_from_angle_axis(rand_floats[:, 0] * torch.pi, _axis_map[axes[0]][env_ids])
+        for i, ax in enumerate(axes[1:], start=1):
+            quat = math_utils.quat_mul(
+                quat,
+                math_utils.quat_from_angle_axis(rand_floats[:, i] * torch.pi, _axis_map[ax][env_ids]),
+            )
         self.quat_command_w[env_ids] = math_utils.quat_unique(quat) if self.cfg.make_quat_unique else quat
 
     def _update_command(self):
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py
index 8c52d80..f2c4258 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/events.py
@@ -15,6 +15,12 @@
 from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg
 from isaaclab.utils.math import sample_uniform
 
+import sys, os as _os
+_WATO_HAND_DIR = _os.path.abspath(_os.path.join(_os.path.dirname(__file__), "../../../../../../wato_hand"))
+if _WATO_HAND_DIR not in sys.path:
+    sys.path.insert(0, _WATO_HAND_DIR)
+from wato_hand_cfg import apply_joint_limits as _apply_joint_limits  # noqa: E402
+
 if TYPE_CHECKING:
     from isaaclab.envs import ManagerBasedEnv
 
@@ -182,3 +188,17 @@ def __call__(
 
         # set into the physics simulation
         self._asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
+
+
+def apply_wato_hand_joint_limits(
+    env: "ManagerBasedEnv",
+    env_ids: torch.Tensor,
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+) -> None:
+    """Push expanded JOINT_POS_LIMITS from wato_hand_cfg into PhysX at startup.
+
+    The USD has the original hardware limits baked in (±8.6 deg for MCP_A).
+    This overwrites them so the RL policy can explore a larger abduction range.
+    """
+    robot: Articulation = env.scene[asset_cfg.name]
+    _apply_joint_limits(robot)
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py
index 012e095..e295673 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/rewards.py
@@ -9,10 +9,12 @@
 from typing import TYPE_CHECKING
 
 import isaaclab.utils.math as math_utils
-from isaaclab.assets import RigidObject
+from isaaclab.assets import Articulation, RigidObject
 from isaaclab.envs import ManagerBasedRLEnv
 from isaaclab.managers import SceneEntityCfg
 
+_MCP_A_JOINT_NAMES = ("MCP_A_1", "MCP_A_2", "MCP_A_3", "MCP_A_4")
+
 if TYPE_CHECKING:
     from .commands import InHandReOrientationCommand
 
@@ -94,3 +96,67 @@ def track_orientation_inv_l2(
     dtheta = math_utils.quat_error_magnitude(asset.data.root_quat_w, goal_quat_w)
 
     return 1.0 / (dtheta + rot_eps)
+
+
+def object_ang_vel_toward_goal(
+    env: ManagerBasedRLEnv,
+    command_name: str,
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
+) -> torch.Tensor:
+    """Reward the component of object angular velocity that reduces orientation error.
+
+    Positive when the cube is spinning toward the goal orientation, negative when
+    spinning away. Gives an immediate per-step gradient for rotation attempts rather
+    than waiting for the orientation error metric to actually change.
+    """
+    asset: RigidObject = env.scene[object_cfg.name]
+    command_term: "InHandReOrientationCommand" = env.command_manager.get_term(command_name)
+
+    goal_quat_w = command_term.command[:, 3:7]
+    curr_quat_w = asset.data.root_quat_w
+    ang_vel_w = asset.data.root_ang_vel_w  # (N, 3)
+
+    # q_err = q_goal * conj(q_curr): quaternion to rotate from current → goal.
+    # Its imaginary part gives the world-frame axis one should rotate about, scaled
+    # by sin(θ/2) — large and well-conditioned at the ~2 rad errors seen early in training.
+    q_err = math_utils.quat_mul(goal_quat_w, math_utils.quat_conjugate(curr_quat_w))
+    err_axis_w = q_err[:, 1:]  # (N, 3)
+
+    # Clamp to zero: only reward spinning *toward* goal, never penalise spinning away.
+    # Without the clamp, negative values teach the policy to suppress all rotation.
+    return torch.sum(ang_vel_w * err_axis_w, dim=-1).clamp(min=0.0)
+
+
+def object_held_bonus(
+    env: ManagerBasedRLEnv,
+    command_name: str,
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
+    hold_threshold: float = 0.10,
+) -> torch.Tensor:
+    """Per-step bonus (1.0) when object is within hold_threshold of the goal position.
+
+    Provides a dense holding signal that is zero when the cube has drifted away and 1.0
+    when it is within reach — much stronger gradient than the continuous L2 penalty.
+    """
+    asset: RigidObject = env.scene[object_cfg.name]
+    command_term: "InHandReOrientationCommand" = env.command_manager.get_term(command_name)
+    goal_pos_e = command_term.command[:, :3]
+    object_pos_e = asset.data.root_pos_w - env.scene.env_origins
+    dist = torch.norm(goal_pos_e - object_pos_e, p=2, dim=-1)
+    return (dist < hold_threshold).float()
+
+
+def mcp_a_spread_activity(
+    env: ManagerBasedRLEnv,
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    spread_limit: float = 0.15,
+) -> torch.Tensor:
+    """Reward finger abduction motion and deflection (encourages MCP_A spread use)."""
+    robot: Articulation = env.scene[asset_cfg.name]
+    spread_ids = []
+    for name in _MCP_A_JOINT_NAMES:
+        spread_ids.extend(robot.find_joints(name)[0])
+
+    spread_vel = torch.linalg.norm(robot.data.joint_vel[:, spread_ids], dim=1)
+    spread_pos = torch.mean(torch.abs(robot.data.joint_pos[:, spread_ids]), dim=1) / spread_limit
+    return spread_vel + 0.5 * spread_pos
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py
index f50ffec..b860ae9 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/inhand/mdp/terminations.py
@@ -5,9 +5,13 @@
 
 """Functions specific to the in-hand dexterous manipulation environments."""
 
-import torch
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 
+import torch
+
+from isaaclab.assets import Articulation, RigidObject
 from isaaclab.envs import ManagerBasedRLEnv
 from isaaclab.managers import SceneEntityCfg
 
@@ -81,3 +85,43 @@ def object_away_from_robot(
     dist = torch.norm(robot.data.root_pos_w - object.data.root_pos_w, dim=1)
 
     return dist > threshold
+
+
+def orientation_stagnation(
+    env: ManagerBasedRLEnv,
+    command_name: str,
+    orientation_error_threshold: float,
+    stagnant_steps: int,
+    object_ang_vel_threshold: float | None = None,
+    joint_vel_threshold: float | None = None,
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+) -> torch.Tensor:
+    """Terminate when orientation error stays above threshold for too many steps.
+
+    Velocity gates are optional. Clamped grasps often keep joint velocities above a
+    tight threshold due to actuator tracking, so orientation-only detection is the default.
+    """
+    if not hasattr(env, "_orientation_stagnant_counter"):
+        env._orientation_stagnant_counter = torch.zeros(env.num_envs, device=env.device, dtype=torch.int32)
+
+    counter = env._orientation_stagnant_counter
+    # episode_length_buf is incremented before termination compute; value 1 means a fresh episode.
+    counter[env.episode_length_buf == 1] = 0
+
+    command_term: InHandReOrientationCommand = env.command_manager.get_term(command_name)
+    is_stagnant = command_term.metrics["orientation_error"] > orientation_error_threshold
+
+    if object_ang_vel_threshold is not None:
+        object_asset: RigidObject = env.scene[object_cfg.name]
+        object_ang_vel = torch.linalg.norm(object_asset.data.root_ang_vel_w, dim=1)
+        is_stagnant = is_stagnant & (object_ang_vel < object_ang_vel_threshold)
+
+    if joint_vel_threshold is not None:
+        robot: Articulation = env.scene[robot_cfg.name]
+        joint_vel = torch.linalg.norm(robot.data.joint_vel, dim=1)
+        is_stagnant = is_stagnant & (joint_vel < joint_vel_threshold)
+
+    counter[:] = torch.where(is_stagnant, counter + 1, torch.zeros_like(counter))
+
+    return counter >= stagnant_steps
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-12_02-31-31/model_200.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-12_02-31-31/model_200.pt
deleted file mode 100644
index d34c2d9..0000000
Binary files a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-12_02-31-31/model_200.pt and /dev/null differ
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-13_01-12-13/model_700.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-13_01-12-13/model_700.pt
new file mode 100644
index 0000000..a0fc9b3
Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/wato_hand_cube/2026-06-13_01-12-13/model_700.pt differ
diff --git a/autonomy/simulation/Humanoid_Wato/wato_hand/splay_demo.py b/autonomy/simulation/Humanoid_Wato/wato_hand/splay_demo.py
new file mode 100644
index 0000000..85efde1
--- /dev/null
+++ b/autonomy/simulation/Humanoid_Wato/wato_hand/splay_demo.py
@@ -0,0 +1,141 @@
+"""Sweep MCP_A abduction joints (4 fingers + thumb) so spread motion is easy to see.
+
+Run from this directory (omit --headless to watch in the GUI):
+
+    /home/hy/IsaacLab/isaaclab.sh -p splay_demo.py
+
+Finger splay (MCP_A_1..4): ±0.15 rad (±8.594 deg).
+Thumb abduction (MCP_A_thumb): 0 to 2.0 rad (~114.6 deg).
+"""
+
+from __future__ import annotations
+
+import argparse
+import math
+import os
+import sys
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="Wato hand MCP_A splay demo.")
+parser.add_argument("--freq_hz", type=float, default=1.0, help="Splay sweep frequency in Hz.")
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+import torch
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg
+from isaaclab.scene import InteractiveScene, InteractiveSceneCfg
+from isaaclab.utils import configclass
+
+from wato_hand_cfg import JOINT_POS_LIMITS, apply_joint_limits
+
+# Same hand asset/physics as in-hand RL (self-collision off, palm-up, etc.).
+_HUMANOID_RL_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "HumanoidRL"))
+if _HUMANOID_RL_ROOT not in sys.path:
+    sys.path.insert(0, _HUMANOID_RL_ROOT)
+from HumanoidRLPackage.HumanoidRLSetup.modelCfg.wato_hand import INHAND_WATO_HAND_CFG  # noqa: E402
+
+# Finger splay ±8.594 deg; thumb abduction 0..114.592 deg (each uses its own limits).
+SPREAD_JOINTS = ["MCP_A_1", "MCP_A_2", "MCP_A_3", "MCP_A_4", "MCP_A_thumb"]
+
+# Neutral demo pose: only MCP_A joints move; others pinned at 0.
+_MCP_A_THUMB_MID = 0.5 * (JOINT_POS_LIMITS["MCP_A_thumb"][0] + JOINT_POS_LIMITS["MCP_A_thumb"][1])
+_NEUTRAL_JOINT_POS = {name: 0.0 for name in [
+    "circumduction", "MCP_A_thumb", "PIP_thumb", "DIP_thumb",
+    "MCP_A_1", "MCP_A_2", "MCP_A_3", "MCP_A_4",
+    "MCP_1", "PIP_1", "DIP_1", "MCP_2", "PIP_2", "DIP_2",
+    "MCP_3", "PIP_3", "DIP_3", "MCP_4", "PIP_4", "DIP_4",
+]}
+_NEUTRAL_JOINT_POS["MCP_A_thumb"] = _MCP_A_THUMB_MID
+
+
+@configclass
+class SplayDemoSceneCfg(InteractiveSceneCfg):
+    ground = AssetBaseCfg(
+        prim_path="/World/defaultGroundPlane",
+        spawn=sim_utils.GroundPlaneCfg(),
+    )
+    light = AssetBaseCfg(
+        prim_path="/World/Light",
+        spawn=sim_utils.DomeLightCfg(intensity=2500.0, color=(0.85, 0.85, 0.85)),
+    )
+    robot = INHAND_WATO_HAND_CFG.replace(
+        prim_path="{ENV_REGEX_NS}/Robot",
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=INHAND_WATO_HAND_CFG.init_state.pos,
+            rot=INHAND_WATO_HAND_CFG.init_state.rot,
+            joint_pos=_NEUTRAL_JOINT_POS,
+        ),
+    )
+
+
+def _sweep_angle(sim_time_s: float, freq_hz: float, lo: float, hi: float) -> float:
+    """Sine sweep between joint-specific limits."""
+    mid = 0.5 * (lo + hi)
+    amp = 0.5 * (hi - lo)
+    return mid + amp * math.sin(2.0 * math.pi * freq_hz * sim_time_s)
+
+
+def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene, freq_hz: float) -> None:
+    robot = scene["robot"]
+    sim_dt = sim.get_physics_dt()
+    sim_time_s = 0.0
+    print_every = int(1.0 / sim_dt)
+
+    spread_ids = [robot.find_joints(name)[0][0] for name in SPREAD_JOINTS]
+    spread_limits = [JOINT_POS_LIMITS[name] for name in SPREAD_JOINTS]
+    fixed_ids = [i for i in range(robot.num_joints) if i not in spread_ids]
+
+    # Pin every non-spread joint at 0 rad.
+    fixed_pos = torch.zeros((robot.num_instances, len(fixed_ids)), device=robot.device)
+    spread_target = torch.zeros((robot.num_instances, len(spread_ids)), device=robot.device)
+
+    print(f"[INFO] Moving only: {SPREAD_JOINTS}")
+    print("[INFO] All other joints pinned at 0 rad")
+    for name, (lo, hi) in zip(SPREAD_JOINTS, spread_limits):
+        print(f"[INFO]   {name}: {math.degrees(lo):.1f} deg to {math.degrees(hi):.1f} deg")
+    print(f"[INFO] Sweep frequency: {freq_hz:.2f} Hz")
+
+    step = 0
+    while simulation_app.is_running():
+        for i, (lo, hi) in enumerate(spread_limits):
+            spread_target[:, i] = _sweep_angle(sim_time_s, freq_hz, lo, hi)
+
+        robot.set_joint_position_target(fixed_pos, joint_ids=fixed_ids)
+        robot.set_joint_position_target(spread_target, joint_ids=spread_ids)
+        scene.write_data_to_sim()
+        sim.step()
+        scene.update(sim_dt)
+
+        if step % print_every == 0:
+            finger = spread_target[0, 0].item()
+            thumb = spread_target[0, -1].item()
+            print(
+                f"[splay] t={sim_time_s:5.1f}s  MCP_A_1={math.degrees(finger):+.1f} deg"
+                f"  MCP_A_thumb={math.degrees(thumb):+.1f} deg"
+            )
+
+        sim_time_s += sim_dt
+        step += 1
+
+
+def main() -> None:
+    sim_cfg = sim_utils.SimulationCfg(dt=1.0 / 120.0, device=args_cli.device)
+    sim = sim_utils.SimulationContext(sim_cfg)
+    sim.set_camera_view(eye=(0.35, 0.35, 0.75), target=(0.0, 0.08, 0.54))
+
+    scene = InteractiveScene(SplayDemoSceneCfg(num_envs=1, env_spacing=2.0))
+    sim.reset()
+    apply_joint_limits(scene["robot"])
+    print("[INFO] Splay demo running. MCP_A_1..4 + MCP_A_thumb move; all other joints stay at 0.")
+    run_simulator(sim, scene, args_cli.freq_hz)
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
diff --git a/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py b/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py
index 71904e0..a8a16ec 100644
--- a/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py
+++ b/autonomy/simulation/Humanoid_Wato/wato_hand/wato_hand_cfg.py
@@ -26,7 +26,7 @@ def _deg(degrees: float) -> float:
 
 
 # --- Joint limits from Physics Inspector (degrees -> rad) --------------------
-_MCP_A_LIMIT = (_deg(-8.594), _deg(8.594))
+_MCP_A_LIMIT = (_deg(-27.0), _deg(27.0))  # expanded to AllegroHand range; self-collision is off
 _MCP_FLEX_LIMIT = (0.0, _deg(85.944))
 _PIP_FLEX_LIMIT = (_deg(-85.944), 0.0)
 _DIP_FLEX_LIMIT = (0.0, _deg(85.944))