import time as time_module from typing import Optional import numpy as np from pinocchio import rpy from decoupled_wbc.control.base.policy import Policy from decoupled_wbc.control.main.constants import DEFAULT_NAV_CMD class G1DecoupledWholeBodyPolicy(Policy): """ This class implements a whole-body policy for the G1 robot by combining an upper-body policy and a lower-body RL-based policy. It is designed to work with the G1 robot's specific configuration and control requirements. """ def __init__( self, robot_model, lower_body_policy: Policy, upper_body_policy: Policy, ): self.robot_model = robot_model self.lower_body_policy = lower_body_policy self.upper_body_policy = upper_body_policy self.last_goal_time = time_module.monotonic() self.is_in_teleop_mode = False # Track if lower body is in teleop mode def set_observation(self, observation): # Upper body policy is open loop (just interpolation), so we don't need to set the observation self.lower_body_policy.set_observation(observation) def set_goal(self, goal): """ Set the goal for both upper and lower body policies. Args: goal: Command from the planners goal["target_upper_body_pose"]: Target pose for the upper body policy goal["target_time"]: Target goal time goal["interpolation_garbage_collection_time"]: Waypoints earlier than this time are removed goal["navigate_cmd"]: Target navigation velocities for the lower body policy goal["base_height_command"]: Target base height for both upper and lower body policies """ # Update goal timestamp for timeout safety self.last_goal_time = time_module.monotonic() upper_body_goal = {} lower_body_goal = {} # Upper body goal keys upper_body_keys = [ "target_upper_body_pose", "base_height_command", "target_time", "interpolation_garbage_collection_time", "navigate_cmd", ] for key in upper_body_keys: if key in goal: upper_body_goal[key] = goal[key] # Always ensure navigate_cmd is present to prevent interpolation from old dangerous values if "navigate_cmd" not in goal: # Safety: Inject safe default navigate_cmd to ensure interpolation goes to stop if "target_time" in goal and isinstance(goal["target_time"], list): upper_body_goal["navigate_cmd"] = [np.array(DEFAULT_NAV_CMD)] * len( goal["target_time"] ) else: upper_body_goal["navigate_cmd"] = np.array(DEFAULT_NAV_CMD) # Set teleop policy command flag has_teleop_commands = ("navigate_cmd" in goal) or ("base_height_command" in goal) self.is_in_teleop_mode = has_teleop_commands # Track teleop state for timeout safety self.lower_body_policy.set_use_teleop_policy_cmd(has_teleop_commands) # Lower body goal keys lower_body_keys = [ "toggle_stand_command", "toggle_policy_action", ] for key in lower_body_keys: if key in goal: lower_body_goal[key] = goal[key] self.upper_body_policy.set_goal(upper_body_goal) self.lower_body_policy.set_goal(lower_body_goal) def get_action(self, time: Optional[float] = None): current_time = time if time is not None else time_module.monotonic() # Safety timeout: Only apply when in teleop mode (communication loss dangerous) # When in keyboard mode, no timeout needed (user controls directly) if self.is_in_teleop_mode: time_since_goal = current_time - self.last_goal_time if time_since_goal > 1.0: # 1 second timeout print( f"SAFETY: Teleop mode timeout after {time_since_goal:.1f}s, injecting safe goal" ) # Inject safe goal to trigger all safety mechanisms (gear_wbc reset + interpolation reset) safe_goal = { "target_time": current_time + 0.1, "interpolation_garbage_collection_time": current_time - 1.0, } self.set_goal( safe_goal ) # This will reset is_in_teleop_mode to False and trigger all safety # Get indices for groups lower_body_indices = self.robot_model.get_joint_group_indices("lower_body") upper_body_indices = self.robot_model.get_joint_group_indices("upper_body") # Initialize full configuration with zeros q = np.zeros(self.robot_model.num_dofs) upper_body_action = self.upper_body_policy.get_action(time) q[upper_body_indices] = upper_body_action["target_upper_body_pose"] q_arms = q[self.robot_model.get_joint_group_indices("arms")] base_height_command = upper_body_action.get("base_height_command", None) interpolated_navigate_cmd = upper_body_action.get("navigate_cmd", None) # Compute torso orientation relative to waist, to pass to lower body policy self.robot_model.cache_forward_kinematics(q, auto_clip=False) torso_orientation = self.robot_model.frame_placement("torso_link").rotation waist_orientation = self.robot_model.frame_placement("pelvis").rotation # Extract yaw from rotation matrix and create a rotation with only yaw # The rotation property is a 3x3 numpy array waist_yaw = np.arctan2(waist_orientation[1, 0], waist_orientation[0, 0]) # Create a rotation matrix with only yaw using Pinocchio's rpy functions waist_yaw_only_rotation = rpy.rpyToMatrix(0, 0, waist_yaw) yaw_only_waist_from_torso = waist_yaw_only_rotation.T @ torso_orientation torso_orientation_rpy = rpy.matrixToRpy(yaw_only_waist_from_torso) lower_body_action = self.lower_body_policy.get_action( time, q_arms, base_height_command, torso_orientation_rpy, interpolated_navigate_cmd ) # If pelvis is both in upper and lower body, lower body policy takes preference q[lower_body_indices] = lower_body_action["body_action"][0][ : len(lower_body_indices) ] # lower body (legs + waist) self.last_action = {"q": q} return {"q": q} def handle_keyboard_button(self, key): try: self.lower_body_policy.locomotion_policy.handle_keyboard_button(key) except AttributeError: # Only catch AttributeError, let other exceptions propagate self.lower_body_policy.handle_keyboard_button(key) def activate_policy(self): self.handle_keyboard_button("]")