gr00t-WholeBodyControl/decoupled_wbc/control/policy/g1_decoupled_whole_body_pol...


								import time as time_module

								from typing import Optional


								import numpy as np

								from pinocchio import rpy


								from decoupled_wbc.control.base.policy import Policy

								from decoupled_wbc.control.main.constants import DEFAULT_NAV_CMD


								class G1DecoupledWholeBodyPolicy(Policy):

								    """

								    This class implements a whole-body policy for the G1 robot by combining an upper-body

								    policy and a lower-body RL-based policy.

								    It is designed to work with the G1 robot's specific configuration and control requirements.

								    """


								    def __init__(

								        self,

								        robot_model,

								        lower_body_policy: Policy,

								        upper_body_policy: Policy,

								    ):

								        self.robot_model = robot_model

								        self.lower_body_policy = lower_body_policy

								        self.upper_body_policy = upper_body_policy

								        self.last_goal_time = time_module.monotonic()

								        self.is_in_teleop_mode = False  # Track if lower body is in teleop mode


								    def set_observation(self, observation):

								        # Upper body policy is open loop (just interpolation), so we don't need to set the observation

								        self.lower_body_policy.set_observation(observation)


								    def set_goal(self, goal):

								        """

								        Set the goal for both upper and lower body policies.


								        Args:

								            goal: Command from the planners

								            goal["target_upper_body_pose"]: Target pose for the upper body policy

								            goal["target_time"]: Target goal time

								            goal["interpolation_garbage_collection_time"]: Waypoints earlier than this time are removed

								            goal["navigate_cmd"]: Target navigation velocities for the lower body policy

								            goal["base_height_command"]: Target base height for both upper and lower body policies

								        """

								        # Update goal timestamp for timeout safety

								        self.last_goal_time = time_module.monotonic()


								        upper_body_goal = {}

								        lower_body_goal = {}


								        # Upper body goal keys

								        upper_body_keys = [

								            "target_upper_body_pose",

								            "base_height_command",

								            "target_time",

								            "interpolation_garbage_collection_time",

								            "navigate_cmd",

								        ]

								        for key in upper_body_keys:

								            if key in goal:

								                upper_body_goal[key] = goal[key]


								        # Always ensure navigate_cmd is present to prevent interpolation from old dangerous values

								        if "navigate_cmd" not in goal:

								            # Safety: Inject safe default navigate_cmd to ensure interpolation goes to stop

								            if "target_time" in goal and isinstance(goal["target_time"], list):

								                upper_body_goal["navigate_cmd"] = [np.array(DEFAULT_NAV_CMD)] * len(

								                    goal["target_time"]

								                )

								            else:

								                upper_body_goal["navigate_cmd"] = np.array(DEFAULT_NAV_CMD)


								        # Set teleop policy command flag

								        has_teleop_commands = ("navigate_cmd" in goal) or ("base_height_command" in goal)

								        self.is_in_teleop_mode = has_teleop_commands  # Track teleop state for timeout safety

								        self.lower_body_policy.set_use_teleop_policy_cmd(has_teleop_commands)


								        # Lower body goal keys

								        lower_body_keys = [

								            "toggle_stand_command",

								            "toggle_policy_action",

								        ]

								        for key in lower_body_keys:

								            if key in goal:

								                lower_body_goal[key] = goal[key]


								        self.upper_body_policy.set_goal(upper_body_goal)

								        self.lower_body_policy.set_goal(lower_body_goal)


								    def get_action(self, time: Optional[float] = None):

								        current_time = time if time is not None else time_module.monotonic()


								        # Safety timeout: Only apply when in teleop mode (communication loss dangerous)

								        # When in keyboard mode, no timeout needed (user controls directly)

								        if self.is_in_teleop_mode:

								            time_since_goal = current_time - self.last_goal_time

								            if time_since_goal > 1.0:  # 1 second timeout

								                print(

								                    f"SAFETY: Teleop mode timeout after {time_since_goal:.1f}s, injecting safe goal"

								                )

								                # Inject safe goal to trigger all safety mechanisms (gear_wbc reset + interpolation reset)

								                safe_goal = {

								                    "target_time": current_time + 0.1,

								                    "interpolation_garbage_collection_time": current_time - 1.0,

								                }

								                self.set_goal(

								                    safe_goal

								                )  # This will reset is_in_teleop_mode to False and trigger all safety


								        # Get indices for groups

								        lower_body_indices = self.robot_model.get_joint_group_indices("lower_body")

								        upper_body_indices = self.robot_model.get_joint_group_indices("upper_body")


								        # Initialize full configuration with zeros

								        q = np.zeros(self.robot_model.num_dofs)


								        upper_body_action = self.upper_body_policy.get_action(time)

								        q[upper_body_indices] = upper_body_action["target_upper_body_pose"]

								        q_arms = q[self.robot_model.get_joint_group_indices("arms")]

								        base_height_command = upper_body_action.get("base_height_command", None)

								        interpolated_navigate_cmd = upper_body_action.get("navigate_cmd", None)


								        # Compute torso orientation relative to waist, to pass to lower body policy

								        self.robot_model.cache_forward_kinematics(q, auto_clip=False)

								        torso_orientation = self.robot_model.frame_placement("torso_link").rotation

								        waist_orientation = self.robot_model.frame_placement("pelvis").rotation

								        # Extract yaw from rotation matrix and create a rotation with only yaw

								        # The rotation property is a 3x3 numpy array

								        waist_yaw = np.arctan2(waist_orientation[1, 0], waist_orientation[0, 0])

								        # Create a rotation matrix with only yaw using Pinocchio's rpy functions

								        waist_yaw_only_rotation = rpy.rpyToMatrix(0, 0, waist_yaw)

								        yaw_only_waist_from_torso = waist_yaw_only_rotation.T @ torso_orientation

								        torso_orientation_rpy = rpy.matrixToRpy(yaw_only_waist_from_torso)


								        lower_body_action = self.lower_body_policy.get_action(

								            time, q_arms, base_height_command, torso_orientation_rpy, interpolated_navigate_cmd

								        )


								        # If pelvis is both in upper and lower body, lower body policy takes preference

								        q[lower_body_indices] = lower_body_action["body_action"][0][

								            : len(lower_body_indices)

								        ]  # lower body (legs + waist)


								        self.last_action = {"q": q}


								        return {"q": q}


								    def handle_keyboard_button(self, key):

								        try:

								            self.lower_body_policy.locomotion_policy.handle_keyboard_button(key)

								        except AttributeError:

								            # Only catch AttributeError, let other exceptions propagate

								            self.lower_body_policy.handle_keyboard_button(key)


								    def activate_policy(self):

								        self.handle_keyboard_button("]")