Merge pull request #5 from NVlabs/ry/fix_lang

[fix] fix lang key for sync G1 env
4 months ago · ccee365ac5
15 changed files with 472 additions and 257 deletions
--- a/gr00t_wbc/control/envs/robocasa/async_env_server.py
+++ b/gr00t_wbc/control/envs/robocasa/async_env_server.py
@ -8,7 +8,9 @@ import numpy as np
 import rclpy
 from gr00t_wbc.control.envs.g1.sim.image_publish_utils import ImagePublishProcess
 from gr00t_wbc.control.envs.robocasa.utils.robocasa_env import Gr00tLocomanipRoboCasaEnv  # noqa: F401
 from gr00t_wbc.control.envs.robocasa.utils.robocasa_env import (
    Gr00tLocomanipRoboCasaEnv,
 )  # noqa: F401
 from gr00t_wbc.control.robot_model.robot_model import RobotModel
 from gr00t_wbc.control.utils.keyboard_dispatcher import KeyboardListenerSubscriber
--- a/gr00t_wbc/control/envs/robocasa/sync_env.py
+++ b/gr00t_wbc/control/envs/robocasa/sync_env.py
@ -10,11 +10,14 @@ from robosuite.environments.robot_env import RobotEnv
 from scipy.spatial.transform import Rotation as R
 from gr00t_wbc.control.envs.g1.utils.joint_safety import JointSafetyMonitor
 from gr00t_wbc.control.envs.robocasa.utils.controller_utils import update_robosuite_controller_configs
 from gr00t_wbc.control.envs.robocasa.utils.controller_utils import (
    update_robosuite_controller_configs,
 )
 from gr00t_wbc.control.envs.robocasa.utils.robocasa_env import (  # noqa: F401
    ALLOWED_LANGUAGE_CHARSET,
    Gr00tLocomanipRoboCasaEnv,
 )
 from gr00t_wbc.control.envs.robocasa.utils.sim_utils import change_simulation_timestep
 from gr00t_wbc.control.robot_model.instantiation import get_robot_type_and_model
 from gr00t_wbc.control.utils.n1_utils import (
    prepare_gym_space_for_eval,
@ -36,6 +39,8 @@ class SyncEnv(gym.Env):
            robot_name, enable_waist_ik=kwargs.pop("enable_waist", False)
        )
        change_simulation_timestep(kwargs.get("sim_freq", 1 / 0.005))
        env_kwargs = {
            "onscreen": kwargs.get("onscreen", True),
            "offscreen": kwargs.get("offscreen", False),
@ -183,7 +188,7 @@ class SyncEnv(gym.Env):
        # Add state keys for model input
        obs = prepare_observation_for_eval(self.robot_model, obs)
        obs["language.language_instruction"] = raw_obs["language.language_instruction"]
        obs["annotation.human.task_description"] = raw_obs["language.language_instruction"]
        if hasattr(self.base_env, "get_privileged_obs_keys"):
            for key in self.base_env.get_privileged_obs_keys():
@ -205,7 +210,7 @@ class SyncEnv(gym.Env):
    def get_observation(self):
        return self.base_env._get_observations()  # assumes base env is robosuite
    def get_step_info(self) -> Dict[str, any]:
    def get_step_info(self) -> Tuple[Dict[str, any], float, bool, bool, Dict[str, any]]:
        return (
            self.observe(),
            self.cache["reward"],
@ -319,7 +324,7 @@ class SyncEnv(gym.Env):
        obs_space = prepare_gym_space_for_eval(self.robot_model, obs_space)
        obs_space["language.language_instruction"] = gym.spaces.Text(
        obs_space["annotation.human.task_description"] = gym.spaces.Text(
            max_length=256, charset=ALLOWED_LANGUAGE_CHARSET
        )
--- a/gr00t_wbc/control/main/teleop/configs/configs.py
+++ b/gr00t_wbc/control/main/teleop/configs/configs.py
@ -77,8 +77,7 @@ class BaseConfig(ArgsConfigTemplate):
    """Version of the whole body controller."""
    wbc_model_path: str = (
        "policy/GR00T-WholeBodyControl-Balance.onnx,"
        "policy/GR00T-WholeBodyControl-Walk.onnx"
        "policy/GR00T-WholeBodyControl-Balance.onnx," "policy/GR00T-WholeBodyControl-Walk.onnx"
    )
    """Path to WBC model file (relative to gr00t_wbc/sim2mujoco/resources/robots/g1)"""
    """gear_wbc model path: policy/GR00T-WholeBodyControl-Balance.onnx,policy/GR00T-WholeBodyControl-Walk.onnx"""
--- a/gr00t_wbc/control/policy/lerobot_replay_policy.py
+++ b/gr00t_wbc/control/policy/lerobot_replay_policy.py
@ -3,7 +3,11 @@ import time
 import pandas as pd
 from gr00t_wbc.control.base.policy import Policy
 from gr00t_wbc.control.main.constants import DEFAULT_BASE_HEIGHT, DEFAULT_NAV_CMD, DEFAULT_WRIST_POSE
 from gr00t_wbc.control.main.constants import (
    DEFAULT_BASE_HEIGHT,
    DEFAULT_NAV_CMD,
    DEFAULT_WRIST_POSE,
 )
 from gr00t_wbc.control.robot_model.robot_model import RobotModel
 from gr00t_wbc.data.viz.rerun_viz import RerunViz
--- a/gr00t_wbc/control/teleop/gui/main.py
+++ b/gr00t_wbc/control/teleop/gui/main.py
@ -29,9 +29,7 @@ import gr00t_wbc
 signal.signal(signal.SIGINT, signal.SIG_DFL)
 GR00T_TELEOP_DATA_ROOT = os.path.join(
    os.path.dirname(gr00t_wbc.__file__), "./external/teleop/data"
 )
 GR00T_TELEOP_DATA_ROOT = os.path.join(os.path.dirname(gr00t_wbc.__file__), "./external/teleop/data")
 class MainWindow(QMainWindow):
--- a/gr00t_wbc/control/teleop/solver/hand/instantiation/g1_hand_ik_instantiation.py
+++ b/gr00t_wbc/control/teleop/solver/hand/instantiation/g1_hand_ik_instantiation.py
@ -1,4 +1,6 @@
 from gr00t_wbc.control.teleop.solver.hand.g1_gripper_ik_solver import G1GripperInverseKinematicsSolver
 from gr00t_wbc.control.teleop.solver.hand.g1_gripper_ik_solver import (
    G1GripperInverseKinematicsSolver,
 )
 # initialize hand ik solvers for g1 robot
--- a/gr00t_wbc/control/teleop/teleop_streamer.py
+++ b/gr00t_wbc/control/teleop/teleop_streamer.py
@ -45,7 +45,9 @@ class TeleopStreamer:
                self.body_streamer = IphoneStreamer()
                self.body_streamer.start_streaming()
            elif body_control_device == "leapmotion":
                from gr00t_wbc.control.teleop.streamers.leapmotion_streamer import LeapMotionStreamer
                from gr00t_wbc.control.teleop.streamers.leapmotion_streamer import (
                    LeapMotionStreamer,
                )
                self.body_streamer = LeapMotionStreamer()
                self.body_streamer.start_streaming()
--- a/gr00t_wbc/control/utils/sync_sim_utils.py
+++ b/gr00t_wbc/control/utils/sync_sim_utils.py
@ -503,6 +503,7 @@ def get_env(config: SyncSimDataCollectionConfig, **kwargs) -> SyncEnv:
            "enable_waist": config.enable_waist,
            "enable_gravity_compensation": config.enable_gravity_compensation,
            "gravity_compensation_joints": config.gravity_compensation_joints,
            "sim_freq": config.sim_frequency,
        }
    )
    if robot_type == "g1":
--- a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/init.py
+++ b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/init.py
@ -26,12 +26,18 @@ from robocasa.environments.locomanipulation.locomanip_basic import (
    LMNavPickBottleShelf,
    LMPickBottleShelfLow,
    LMNavPickBottleShelfLow,
    LMPnPBottleToPlate,
    LMPnPAppleToPlate,
 )
 from robocasa.environments.locomanipulation.locomanip_pnp import (
    LMBottlePnP,
    LMBoxPnP,
 )
 from robocasa.environments.locomanipulation.locomanip_dc import (
    LMNavPickBottleDC,
    LMPnPAppleToPlateDC,
 )
 # from robosuite.controllers import ALL_CONTROLLERS, load_controller_config
 from robosuite.controllers import ALL_PART_CONTROLLERS, load_composite_controller_config
--- a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_basic.py
+++ b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_basic.py
@ -574,3 +574,160 @@ class LMNavPickBottleShelfLow(LMNavPickBottleShelf):
            )
        )
        return [self.shelf, self.bottle]
 class LMPnPBottleToPlate(LMPnPBottle):
    def _get_objects(self) -> list[SceneObject]:
        super()._get_objects()
        self.plate = SceneObject(
            ObjectConfig(
                name="plate",
                mjcf_path="objects/omniverse/locomanip/plate_1/model.xml",
                scale=1.0,
                static=True,
                sampler_config=SamplingConfig(
                    x_range=np.array([-0.2 - 0.08, -0.2 + 0.04]),
                    y_range=np.array([-0.08, 0.08]),
                    rotation=np.array([-np.pi, np.pi]),
                    reference=ReferenceConfig(self.table_target),
                ),
            )
        )
        return [self.table, self.table_target, self.bottle, self.plate]
    def _get_success_criteria(self) -> SuccessCriteria:
        return AllCriteria(
            IsUpright(self.bottle, symmetric=True), IsInContact(self.bottle, self.plate)
        )
    def _get_instruction(self) -> str:
        return "Pick up the bottle and place it on the plate."
    def get_object(self):
        return dict(
            bottle=dict(obj_name=self.bottle.mj_obj.root_body, obj_type="body"),
            plate=dict(obj_name=self.plate.mj_obj.root_body, obj_type="body"),
        )
    def get_subtask_term_signals(self):
        obj_z = self.sim.data.body_xpos[self.obj_body_id(self.bottle.mj_obj.name)][2]
        target_table_pos = self.sim.data.body_xpos[self.obj_body_id(self.table_target.mj_obj.name)]
        target_table_z = target_table_pos[2] + self.table_target.mj_obj.top_offset[2]
        return dict(obj_off_table=int(obj_z - target_table_z > self.LIFT_OFFSET))
    @staticmethod
    def task_config():
        task = DexMGConfigHelper.AttrDict()
        task.task_spec_0.subtask_1 = dict(
            object_ref="bottle",
            subtask_term_signal="obj_off_table",
            subtask_term_offset_range=(5, 10),
            selection_strategy="random",
            selection_strategy_kwargs=None,
            action_noise=0.05,
            num_interpolation_steps=5,
            num_fixed_steps=0,
            apply_noise_during_interpolation=False,
        )
        # Second subtask for placing on plate
        task.task_spec_0.subtask_2 = dict(
            object_ref="plate",
            subtask_term_signal=None,
            subtask_term_offset_range=None,
            selection_strategy="random",
            selection_strategy_kwargs=None,
            action_noise=0.05,
            num_interpolation_steps=5,
            num_fixed_steps=0,
            apply_noise_during_interpolation=False,
        )
        task.task_spec_1.subtask_1 = dict(
            object_ref=None,
            subtask_term_signal=None,
            subtask_term_offset_range=None,
            selection_strategy="random",
            selection_strategy_kwargs=None,
            action_noise=0.05,
            num_interpolation_steps=5,
            num_fixed_steps=0,
            apply_noise_during_interpolation=False,
        )
        return task.to_dict()
 class LMPnPAppleToPlate(LMPnPBottleToPlate):
    def _get_objects(self) -> list[SceneObject]:
        super()._get_objects()
        self.apple = SceneObject(
            ObjectConfig(
                name="apple",
                mjcf_path="objects/omniverse/locomanip/apple_0/model.xml",
                static=False,
                scale=1.0,
                sampler_config=SamplingConfig(
                    x_range=np.array([-0.08, 0.04]),
                    y_range=np.array([-0.08, 0.08]),
                    rotation=np.array([-np.pi, np.pi]),
                    reference_pos=np.array([0.4, 0, self.table.mj_obj.top_offset[2]]),
                ),
                rgba=(0.85, 0.1, 0.1, 1.0),
            )
        )
        return [self.table, self.table_target, self.apple, self.plate]
    def _get_success_criteria(self) -> SuccessCriteria:
        return IsInContact(self.apple, self.plate)
    def _get_instruction(self) -> str:
        return "pick up the apple, walk left and place the apple on the plate."
    def get_object(self):
        return dict(
            apple=dict(obj_name=self.apple.mj_obj.root_body, obj_type="body"),
            plate=dict(obj_name=self.plate.mj_obj.root_body, obj_type="body"),
        )
    def get_subtask_term_signals(self):
        obj_z = self.sim.data.body_xpos[self.obj_body_id(self.apple.mj_obj.name)][2]
        target_table_pos = self.sim.data.body_xpos[self.obj_body_id(self.table_target.mj_obj.name)]
        target_table_z = target_table_pos[2] + self.table_target.mj_obj.top_offset[2]
        return dict(obj_off_table=int(obj_z - target_table_z > self.LIFT_OFFSET))
    @staticmethod
    def task_config():
        task = DexMGConfigHelper.AttrDict()
        task.task_spec_0.subtask_1 = dict(
            object_ref="apple",
            subtask_term_signal="obj_off_table",
            subtask_term_offset_range=(5, 10),
            selection_strategy="random",
            selection_strategy_kwargs=None,
            action_noise=0.05,
            num_interpolation_steps=5,
            num_fixed_steps=0,
            apply_noise_during_interpolation=False,
        )
        # Second subtask for placing on plate
        task.task_spec_0.subtask_2 = dict(
            object_ref="plate",
            subtask_term_signal=None,
            subtask_term_offset_range=None,
            selection_strategy="random",
            selection_strategy_kwargs=None,
            action_noise=0.05,
            num_interpolation_steps=5,
            num_fixed_steps=0,
            apply_noise_during_interpolation=False,
        )
        task.task_spec_1.subtask_1 = dict(
            object_ref=None,
            subtask_term_signal=None,
            subtask_term_offset_range=None,
            selection_strategy="random",
            selection_strategy_kwargs=None,
            action_noise=0.05,
            num_interpolation_steps=5,
            num_fixed_steps=0,
            apply_noise_during_interpolation=False,
        )
        return task.to_dict()
--- a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_dc.py
+++ b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_dc.py
@ -0,0 +1,15 @@
 from robocasa import (
    LMNavPickBottle,
    LMPnPAppleToPlate,
 )
 from robocasa.models.scenes.lab_arena import LabArena
 class LabEnvMixin:
    MUJOCO_ARENA_CLS = LabArena
 class LMNavPickBottleDC(LabEnvMixin, LMNavPickBottle): ...
 class LMPnPAppleToPlateDC(LabEnvMixin, LMPnPAppleToPlate): ...
--- a/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc.py
+++ b/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc.py
@ -1,21 +1,16 @@
 import sys
 import time
 import collections
 import yaml
 import torch
 import numpy as np
 import sys
 import os
 import threading
 import time
 import collections
 import yaml
 import torch
 import numpy as np
 import mujoco
 import mujoco.viewer
 import numpy as np
 import onnxruntime as ort
 import threading
 from pynput import keyboard as pkb
 import os
 import torch
 import yaml
 class GearWbcController:
    def __init__(self, config_path):
@ -24,22 +19,22 @@ class GearWbcController:
        self.config = self.load_config(os.path.join(self.CONFIG_PATH, "g1_gear_wbc.yaml"))
        self.control_dict = {
            'loco_cmd': self.config['cmd_init'],
            "height_cmd": self.config['height_cmd'],
            "rpy_cmd": self.config.get('rpy_cmd', [0.0, 0.0, 0.0]),
            "freq_cmd": self.config.get('freq_cmd', 1.5)
            "loco_cmd": self.config["cmd_init"],
            "height_cmd": self.config["height_cmd"],
            "rpy_cmd": self.config.get("rpy_cmd", [0.0, 0.0, 0.0]),
            "freq_cmd": self.config.get("freq_cmd", 1.5),
        }
        self.model = mujoco.MjModel.from_xml_path(self.config['xml_path'])
        self.model = mujoco.MjModel.from_xml_path(self.config["xml_path"])
        self.data = mujoco.MjData(self.model)
        self.model.opt.timestep = self.config['simulation_dt']
        self.model.opt.timestep = self.config["simulation_dt"]
        self.n_joints = self.data.qpos.shape[0] - 7
        self.torso_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "torso_link")
        self.base_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "pelvis")
        self.action = np.zeros(self.config['num_actions'], dtype=np.float32)
        self.target_dof_pos = self.config['default_angles'].copy()
        self.policy = self.load_onnx_policy(self.config['policy_path'])
        self.walk_policy = self.load_onnx_policy(self.config['walk_policy_path'])
        self.action = np.zeros(self.config["num_actions"], dtype=np.float32)
        self.target_dof_pos = self.config["default_angles"].copy()
        self.policy = self.load_onnx_policy(self.config["policy_path"])
        self.walk_policy = self.load_onnx_policy(self.config["walk_policy_path"])
        self.gait_indices = torch.zeros((1), dtype=torch.float32)
        self.counter = 0
        self.just_started = 0.0
@ -50,15 +45,15 @@ class GearWbcController:
            self.data, self.config, self.action, self.control_dict, self.n_joints
        )
        self.obs_history = collections.deque(
            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config['obs_history_len'],
            maxlen=self.config['obs_history_len']
            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config["obs_history_len"],
            maxlen=self.config["obs_history_len"],
        )
        self.obs = np.zeros(self.config['num_obs'], dtype=np.float32)
        self.obs = np.zeros(self.config["num_obs"], dtype=np.float32)
        self.keyboard_listener(self.control_dict, self.config)
    def keyboard_listener(self, control_dict, config):
        """Listen to key press events and update cmd and height_cmd"""
        def on_press(key):
            try:
                k = key.char
@ -66,58 +61,60 @@ class GearWbcController:
                return  # Special keys ignored
            with self.cmd_lock:
                if k == 'w':
                    control_dict['loco_cmd'][0] += 0.1
                elif k == 's':
                    control_dict['loco_cmd'][0] -= 0.1
                elif k == 'a':
                    control_dict['loco_cmd'][1] += 0.1
                elif k == 'd':
                    control_dict['loco_cmd'][1] -= 0.1
                elif k == 'q':
                    control_dict['loco_cmd'][2] += 0.1
                elif k == 'e':
                    control_dict['loco_cmd'][2] -= 0.1
                elif k == 'z':
                    control_dict['loco_cmd'][:] = config['cmd_init']
                    control_dict["height_cmd"] = config['height_cmd']
                    control_dict['rpy_cmd'][:] = config['rpy_cmd']
                    control_dict['freq_cmd'] = config['freq_cmd']
                elif k == '1':
                if k == "w":
                    control_dict["loco_cmd"][0] += 0.1
                elif k == "s":
                    control_dict["loco_cmd"][0] -= 0.1
                elif k == "a":
                    control_dict["loco_cmd"][1] += 0.1
                elif k == "d":
                    control_dict["loco_cmd"][1] -= 0.1
                elif k == "q":
                    control_dict["loco_cmd"][2] += 0.1
                elif k == "e":
                    control_dict["loco_cmd"][2] -= 0.1
                elif k == "z":
                    control_dict["loco_cmd"][:] = config["cmd_init"]
                    control_dict["height_cmd"] = config["height_cmd"]
                    control_dict["rpy_cmd"][:] = config["rpy_cmd"]
                    control_dict["freq_cmd"] = config["freq_cmd"]
                elif k == "1":
                    control_dict["height_cmd"] += 0.05
                elif k == '2':
                elif k == "2":
                    control_dict["height_cmd"] -= 0.05
                elif k == '3':
                    control_dict['rpy_cmd'][0] += 0.2
                elif k == '4':
                    control_dict['rpy_cmd'][0] -= 0.2
                elif k == '5':
                    control_dict['rpy_cmd'][1] += 0.2
                elif k == '6':
                    control_dict['rpy_cmd'][1] -= 0.2
                elif k == '7':
                    control_dict['rpy_cmd'][2] += 0.2
                elif k == '8':
                    control_dict['rpy_cmd'][2] -= 0.2
                elif k == 'm':
                    control_dict['freq_cmd'] += 0.1
                elif k == 'n':
                    control_dict['freq_cmd'] -= 0.1
                print(f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}")
                elif k == "3":
                    control_dict["rpy_cmd"][0] += 0.2
                elif k == "4":
                    control_dict["rpy_cmd"][0] -= 0.2
                elif k == "5":
                    control_dict["rpy_cmd"][1] += 0.2
                elif k == "6":
                    control_dict["rpy_cmd"][1] -= 0.2
                elif k == "7":
                    control_dict["rpy_cmd"][2] += 0.2
                elif k == "8":
                    control_dict["rpy_cmd"][2] -= 0.2
                elif k == "m":
                    control_dict["freq_cmd"] += 0.1
                elif k == "n":
                    control_dict["freq_cmd"] -= 0.1
                print(
                    f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}"
                )
        listener = pkb.Listener(on_press=on_press)
        listener.daemon = True
        listener.start()
    def load_config(self, config_path):
        with open(config_path, 'r') as f:
        with open(config_path, "r") as f:
            config = yaml.safe_load(f)
        for path_key in ['policy_path', 'xml_path', 'walk_policy_path']:
        for path_key in ["policy_path", "xml_path", "walk_policy_path"]:
            config[path_key] = os.path.join(CONFIG_PATH, config[path_key])
        array_keys = ['kps', 'kds', 'default_angles', 'cmd_scale', 'cmd_init']
        array_keys = ["kps", "kds", "default_angles", "cmd_scale", "cmd_init"]
        for key in array_keys:
            config[key] = np.array(config[key], dtype=np.float32)
@ -129,17 +126,19 @@ class GearWbcController:
    def quat_rotate_inverse(self, q, v):
        w, x, y, z = q
        q_conj = np.array([w, -x, -y, -z])
        return np.array([
            v[0] * (q_conj[0]**2 + q_conj[1]**2 - q_conj[2]**2 - q_conj[3]**2) +
            v[1] * 2 * (q_conj[1]*q_conj[2] - q_conj[0]*q_conj[3]) +
            v[2] * 2 * (q_conj[1]*q_conj[3] + q_conj[0]*q_conj[2]),
            v[0] * 2 * (q_conj[1]*q_conj[2] + q_conj[0]*q_conj[3]) +
            v[1] * (q_conj[0]**2 - q_conj[1]**2 + q_conj[2]**2 - q_conj[3]**2) +
            v[2] * 2 * (q_conj[2]*q_conj[3] - q_conj[0]*q_conj[1]),
            v[0] * 2 * (q_conj[1]*q_conj[3] - q_conj[0]*q_conj[2]) +
            v[1] * 2 * (q_conj[2]*q_conj[3] + q_conj[0]*q_conj[1]) +
            v[2] * (q_conj[0]**2 - q_conj[1]**2 - q_conj[2]**2 + q_conj[3]**2)
        ])
        return np.array(
            [
                v[0] * (q_conj[0] ** 2 + q_conj[1] ** 2 - q_conj[2] ** 2 - q_conj[3] ** 2)
                + v[1] * 2 * (q_conj[1] * q_conj[2] - q_conj[0] * q_conj[3])
                + v[2] * 2 * (q_conj[1] * q_conj[3] + q_conj[0] * q_conj[2]),
                v[0] * 2 * (q_conj[1] * q_conj[2] + q_conj[0] * q_conj[3])
                + v[1] * (q_conj[0] ** 2 - q_conj[1] ** 2 + q_conj[2] ** 2 - q_conj[3] ** 2)
                + v[2] * 2 * (q_conj[2] * q_conj[3] - q_conj[0] * q_conj[1]),
                v[0] * 2 * (q_conj[1] * q_conj[3] - q_conj[0] * q_conj[2])
                + v[1] * 2 * (q_conj[2] * q_conj[3] + q_conj[0] * q_conj[1])
                + v[2] * (q_conj[0] ** 2 - q_conj[1] ** 2 - q_conj[2] ** 2 + q_conj[3] ** 2),
            ]
        )
    def get_gravity_orientation(self, quat):
        gravity_vec = np.array([0.0, 0.0, -1.0])
@ -147,10 +146,10 @@ class GearWbcController:
    def compute_observation(self, d, config, action, control_dict, n_joints):
        command = np.zeros(7, dtype=np.float32)
        command[:3] = control_dict['loco_cmd'][:3] * config['cmd_scale']
        command[3] = control_dict['height_cmd']
        command[:3] = control_dict["loco_cmd"][:3] * config["cmd_scale"]
        command[3] = control_dict["height_cmd"]
        # command[4] = control_dict['freq_cmd']
        command[4:7] = control_dict['rpy_cmd']
        command[4:7] = control_dict["rpy_cmd"]
        # # gait indice
        # is_static = np.linalg.norm(command[:3]) < 0.1
@ -192,7 +191,6 @@ class GearWbcController:
        # # Clock signal
        # clock = [torch.sin(2 * np.pi * fi) for fi in gait_pair]
        # for i, (clk, frozen_mask_attr) in enumerate(
        #     zip(clock, ['frozen_FL', 'frozen_FR'])
        # ):
@ -206,23 +204,25 @@ class GearWbcController:
        #     clock[i] = clk
        # self.clock_inputs = torch.stack(clock).unsqueeze(0)
        qj = d.qpos[7:7+n_joints].copy()
        dqj = d.qvel[6:6+n_joints].copy()
        qj = d.qpos[7 : 7 + n_joints].copy()
        dqj = d.qvel[6 : 6 + n_joints].copy()
        quat = d.qpos[3:7].copy()
        omega = d.qvel[3:6].copy()
        # omega = self.data.xmat[self.base_index].reshape(3, 3).T @ self.data.cvel[self.base_index][3:6]
        padded_defaults = np.zeros(n_joints, dtype=np.float32)
        L = min(len(config['default_angles']), n_joints)
        padded_defaults[:L] = config['default_angles'][:L]
        L = min(len(config["default_angles"]), n_joints)
        padded_defaults[:L] = config["default_angles"][:L]
        qj_scaled = (qj - padded_defaults) * config['dof_pos_scale']
        dqj_scaled = dqj * config['dof_vel_scale']
        qj_scaled = (qj - padded_defaults) * config["dof_pos_scale"]
        dqj_scaled = dqj * config["dof_vel_scale"]
        gravity_orientation = self.get_gravity_orientation(quat)
        omega_scaled = omega * config['ang_vel_scale']
        omega_scaled = omega * config["ang_vel_scale"]
        torso_quat = self.data.xquat[self.torso_index]
        torso_omega = self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
        torso_omega_scaled = torso_omega * config['ang_vel_scale']
        torso_omega = (
            self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
        )
        torso_omega_scaled = torso_omega * config["ang_vel_scale"]
        torso_gravity_orientation = self.get_gravity_orientation(torso_quat)
        single_obs_dim = 86
@ -232,19 +232,21 @@ class GearWbcController:
        single_obs[10:13] = gravity_orientation
        # single_obs[14:17] = 0.#torso_omega_scaled
        # single_obs[17:20] = 0.#torso_gravity_orientation
        single_obs[13:13+n_joints] = qj_scaled
        single_obs[13+n_joints:13+2*n_joints] = dqj_scaled
        single_obs[13+2*n_joints:13+2*n_joints+15] = action
        single_obs[13 : 13 + n_joints] = qj_scaled
        single_obs[13 + n_joints : 13 + 2 * n_joints] = dqj_scaled
        single_obs[13 + 2 * n_joints : 13 + 2 * n_joints + 15] = action
        # single_obs[20+2*n_joints+15:20+2*n_joints+15+2] = self.clock_inputs.cpu().numpy().reshape(2)
        return single_obs, single_obs_dim
    def load_onnx_policy(self, path):
        model = ort.InferenceSession(path)
        def run_inference(input_tensor):
            ort_inputs = {model.get_inputs()[0].name: input_tensor.cpu().numpy()}
            ort_outs = model.run(None, ort_inputs)
            return torch.tensor(ort_outs[0], device="cuda:0")
        return run_inference
    def run(self):
@ -253,55 +255,61 @@ class GearWbcController:
        with mujoco.viewer.launch_passive(self.model, self.data) as viewer:
            start = time.time()
            while viewer.is_running() and time.time() - start < self.config['simulation_duration']:
            while viewer.is_running() and time.time() - start < self.config["simulation_duration"]:
                step_start = time.time()
                leg_tau = self.pd_control(
                    self.target_dof_pos,
                    self.data.qpos[7:7+self.config['num_actions']],
                    self.config['kps'],
                    np.zeros_like(self.config['kps']),
                    self.data.qvel[6:6+self.config['num_actions']],
                    self.config['kds']
                    self.data.qpos[7 : 7 + self.config["num_actions"]],
                    self.config["kps"],
                    np.zeros_like(self.config["kps"]),
                    self.data.qvel[6 : 6 + self.config["num_actions"]],
                    self.config["kds"],
                )
                self.data.ctrl[:self.config['num_actions']] = leg_tau
                self.data.ctrl[: self.config["num_actions"]] = leg_tau
                if self.n_joints > self.config['num_actions']:
                if self.n_joints > self.config["num_actions"]:
                    arm_tau = self.pd_control(
                        np.zeros(self.n_joints - self.config['num_actions'], dtype=np.float32),
                        self.data.qpos[7+self.config['num_actions']:7+self.n_joints],
                        np.full(self.n_joints - self.config['num_actions'], 100.0),
                        np.zeros(self.n_joints - self.config['num_actions']),
                        self.data.qvel[6+self.config['num_actions']:6+self.n_joints],
                        np.full(self.n_joints - self.config['num_actions'], 0.5)
                        np.zeros(self.n_joints - self.config["num_actions"], dtype=np.float32),
                        self.data.qpos[7 + self.config["num_actions"] : 7 + self.n_joints],
                        np.full(self.n_joints - self.config["num_actions"], 100.0),
                        np.zeros(self.n_joints - self.config["num_actions"]),
                        self.data.qvel[6 + self.config["num_actions"] : 6 + self.n_joints],
                        np.full(self.n_joints - self.config["num_actions"], 0.5),
                    )
                    self.data.ctrl[self.config['num_actions']:] = arm_tau
                    self.data.ctrl[self.config["num_actions"] :] = arm_tau
                mujoco.mj_step(self.model, self.data)
                self.counter += 1
                if self.counter % self.config['control_decimation'] == 0:
                if self.counter % self.config["control_decimation"] == 0:
                    with self.cmd_lock:
                        current_cmd = self.control_dict
                    single_obs, _ = self.compute_observation(self.data, self.config, self.action, current_cmd,  self.n_joints)
                    single_obs, _ = self.compute_observation(
                        self.data, self.config, self.action, current_cmd, self.n_joints
                    )
                    self.obs_history.append(single_obs)
                    for i, hist_obs in enumerate(self.obs_history):
                        self.obs[i * self.single_obs_dim:(i + 1) * self.single_obs_dim] = hist_obs
                        self.obs[i * self.single_obs_dim : (i + 1) * self.single_obs_dim] = hist_obs
                    obs_tensor = torch.from_numpy(self.obs).unsqueeze(0)
                    if (np.linalg.norm(np.array(current_cmd['loco_cmd']))) <= 0.05:
                    if (np.linalg.norm(np.array(current_cmd["loco_cmd"]))) <= 0.05:
                        self.action = self.policy(obs_tensor).cpu().detach().numpy().squeeze()
                    else:
                        self.action = self.walk_policy(obs_tensor).cpu().detach().numpy().squeeze()
                    self.target_dof_pos = self.action * self.config['action_scale'] + self.config['default_angles']
                    self.target_dof_pos = (
                        self.action * self.config["action_scale"] + self.config["default_angles"]
                    )
                viewer.sync()
                # time.sleep(max(0, self.model.opt.timestep - (time.time() - step_start)))
 if __name__ == "__main__":
    CONFIG_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1")
    CONFIG_PATH = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1"
    )
    controller = GearWbcController(CONFIG_PATH)
    controller.run()
--- a/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc_gait.py
+++ b/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc_gait.py
@ -1,21 +1,16 @@
 import sys
 import time
 import collections
 import yaml
 import torch
 import numpy as np
 import sys
 import os
 import threading
 import time
 import collections
 import yaml
 import torch
 import numpy as np
 import mujoco
 import mujoco.viewer
 import numpy as np
 import onnxruntime as ort
 import threading
 from pynput import keyboard as pkb
 import os
 import torch
 import yaml
 class GearWbcController:
    def __init__(self, config_path):
@ -24,21 +19,21 @@ class GearWbcController:
        self.config = self.load_config(os.path.join(self.CONFIG_PATH, "g1_gear_wbc.yaml"))
        self.control_dict = {
            'loco_cmd': self.config['cmd_init'],
            "height_cmd": self.config['height_cmd'],
            "rpy_cmd": self.config.get('rpy_cmd', [0.0, 0.0, 0.0]),
            "freq_cmd": self.config.get('freq_cmd', 1.5)
            "loco_cmd": self.config["cmd_init"],
            "height_cmd": self.config["height_cmd"],
            "rpy_cmd": self.config.get("rpy_cmd", [0.0, 0.0, 0.0]),
            "freq_cmd": self.config.get("freq_cmd", 1.5),
        }
        self.model = mujoco.MjModel.from_xml_path(self.config['xml_path'])
        self.model = mujoco.MjModel.from_xml_path(self.config["xml_path"])
        self.data = mujoco.MjData(self.model)
        self.model.opt.timestep = self.config['simulation_dt']
        self.model.opt.timestep = self.config["simulation_dt"]
        self.n_joints = self.data.qpos.shape[0] - 7
        self.torso_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "torso_link")
        self.base_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "pelvis")
        self.action = np.zeros(self.config['num_actions'], dtype=np.float32)
        self.target_dof_pos = self.config['default_angles'].copy()
        self.policy = self.load_onnx_policy(self.config['policy_path'])
        self.action = np.zeros(self.config["num_actions"], dtype=np.float32)
        self.target_dof_pos = self.config["default_angles"].copy()
        self.policy = self.load_onnx_policy(self.config["policy_path"])
        self.gait_indices = torch.zeros((1), dtype=torch.float32)
        self.counter = 0
        self.just_started = 0.0
@ -49,15 +44,15 @@ class GearWbcController:
            self.data, self.config, self.action, self.control_dict, self.n_joints
        )
        self.obs_history = collections.deque(
            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config['obs_history_len'],
            maxlen=self.config['obs_history_len']
            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config["obs_history_len"],
            maxlen=self.config["obs_history_len"],
        )
        self.obs = np.zeros(self.config['num_obs'], dtype=np.float32)
        self.obs = np.zeros(self.config["num_obs"], dtype=np.float32)
        self.keyboard_listener(self.control_dict, self.config)
    def keyboard_listener(self, control_dict, config):
        """Listen to key press events and update cmd and height_cmd"""
        def on_press(key):
            try:
                k = key.char
@ -65,58 +60,60 @@ class GearWbcController:
                return  # Special keys ignored
            with self.cmd_lock:
                if k == 'w':
                    control_dict['loco_cmd'][0] += 0.2
                elif k == 's':
                    control_dict['loco_cmd'][0] -= 0.2
                elif k == 'a':
                    control_dict['loco_cmd'][1] += 0.5
                elif k == 'd':
                    control_dict['loco_cmd'][1] -= 0.5
                elif k == 'q':
                    control_dict['loco_cmd'][2] += 0.5
                elif k == 'e':
                    control_dict['loco_cmd'][2] -= 0.5
                elif k == 'z':
                    control_dict['loco_cmd'][:] = config['cmd_init']
                    control_dict["height_cmd"] = config['height_cmd']
                    control_dict['rpy_cmd'][:] = config['rpy_cmd']
                    control_dict['freq_cmd'] = config['freq_cmd']
                elif k == '1':
                if k == "w":
                    control_dict["loco_cmd"][0] += 0.2
                elif k == "s":
                    control_dict["loco_cmd"][0] -= 0.2
                elif k == "a":
                    control_dict["loco_cmd"][1] += 0.5
                elif k == "d":
                    control_dict["loco_cmd"][1] -= 0.5
                elif k == "q":
                    control_dict["loco_cmd"][2] += 0.5
                elif k == "e":
                    control_dict["loco_cmd"][2] -= 0.5
                elif k == "z":
                    control_dict["loco_cmd"][:] = config["cmd_init"]
                    control_dict["height_cmd"] = config["height_cmd"]
                    control_dict["rpy_cmd"][:] = config["rpy_cmd"]
                    control_dict["freq_cmd"] = config["freq_cmd"]
                elif k == "1":
                    control_dict["height_cmd"] += 0.05
                elif k == '2':
                elif k == "2":
                    control_dict["height_cmd"] -= 0.05
                elif k == '3':
                    control_dict['rpy_cmd'][0] += 0.2
                elif k == '4':
                    control_dict['rpy_cmd'][0] -= 0.2
                elif k == '5':
                    control_dict['rpy_cmd'][1] += 0.2
                elif k == '6':
                    control_dict['rpy_cmd'][1] -= 0.2
                elif k == '7':
                    control_dict['rpy_cmd'][2] += 0.2
                elif k == '8':
                    control_dict['rpy_cmd'][2] -= 0.2
                elif k == 'm':
                    control_dict['freq_cmd'] += 0.1
                elif k == 'n':
                    control_dict['freq_cmd'] -= 0.1
                print(f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}")
                elif k == "3":
                    control_dict["rpy_cmd"][0] += 0.2
                elif k == "4":
                    control_dict["rpy_cmd"][0] -= 0.2
                elif k == "5":
                    control_dict["rpy_cmd"][1] += 0.2
                elif k == "6":
                    control_dict["rpy_cmd"][1] -= 0.2
                elif k == "7":
                    control_dict["rpy_cmd"][2] += 0.2
                elif k == "8":
                    control_dict["rpy_cmd"][2] -= 0.2
                elif k == "m":
                    control_dict["freq_cmd"] += 0.1
                elif k == "n":
                    control_dict["freq_cmd"] -= 0.1
                print(
                    f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}"
                )
        listener = pkb.Listener(on_press=on_press)
        listener.daemon = True
        listener.start()
    def load_config(self, config_path):
        with open(config_path, 'r') as f:
        with open(config_path, "r") as f:
            config = yaml.safe_load(f)
        for path_key in ['policy_path', 'xml_path']:
        for path_key in ["policy_path", "xml_path"]:
            config[path_key] = os.path.join(CONFIG_PATH, config[path_key])
        array_keys = ['kps', 'kds', 'default_angles', 'cmd_scale', 'cmd_init']
        array_keys = ["kps", "kds", "default_angles", "cmd_scale", "cmd_init"]
        for key in array_keys:
            config[key] = np.array(config[key], dtype=np.float32)
@ -128,17 +125,19 @@ class GearWbcController:
    def quat_rotate_inverse(self, q, v):
        w, x, y, z = q
        q_conj = np.array([w, -x, -y, -z])
        return np.array([
            v[0] * (q_conj[0]**2 + q_conj[1]**2 - q_conj[2]**2 - q_conj[3]**2) +
            v[1] * 2 * (q_conj[1]*q_conj[2] - q_conj[0]*q_conj[3]) +
            v[2] * 2 * (q_conj[1]*q_conj[3] + q_conj[0]*q_conj[2]),
            v[0] * 2 * (q_conj[1]*q_conj[2] + q_conj[0]*q_conj[3]) +
            v[1] * (q_conj[0]**2 - q_conj[1]**2 + q_conj[2]**2 - q_conj[3]**2) +
            v[2] * 2 * (q_conj[2]*q_conj[3] - q_conj[0]*q_conj[1]),
            v[0] * 2 * (q_conj[1]*q_conj[3] - q_conj[0]*q_conj[2]) +
            v[1] * 2 * (q_conj[2]*q_conj[3] + q_conj[0]*q_conj[1]) +
            v[2] * (q_conj[0]**2 - q_conj[1]**2 - q_conj[2]**2 + q_conj[3]**2)
        ])
        return np.array(
            [
                v[0] * (q_conj[0] ** 2 + q_conj[1] ** 2 - q_conj[2] ** 2 - q_conj[3] ** 2)
                + v[1] * 2 * (q_conj[1] * q_conj[2] - q_conj[0] * q_conj[3])
                + v[2] * 2 * (q_conj[1] * q_conj[3] + q_conj[0] * q_conj[2]),
                v[0] * 2 * (q_conj[1] * q_conj[2] + q_conj[0] * q_conj[3])
                + v[1] * (q_conj[0] ** 2 - q_conj[1] ** 2 + q_conj[2] ** 2 - q_conj[3] ** 2)
                + v[2] * 2 * (q_conj[2] * q_conj[3] - q_conj[0] * q_conj[1]),
                v[0] * 2 * (q_conj[1] * q_conj[3] - q_conj[0] * q_conj[2])
                + v[1] * 2 * (q_conj[2] * q_conj[3] + q_conj[0] * q_conj[1])
                + v[2] * (q_conj[0] ** 2 - q_conj[1] ** 2 - q_conj[2] ** 2 + q_conj[3] ** 2),
            ]
        )
    def get_gravity_orientation(self, quat):
        gravity_vec = np.array([0.0, 0.0, -1.0])
@ -146,10 +145,10 @@ class GearWbcController:
    def compute_observation(self, d, config, action, control_dict, n_joints):
        command = np.zeros(8, dtype=np.float32)
        command[:3] = control_dict['loco_cmd'][:3] * config['cmd_scale']
        command[3] = control_dict['height_cmd']
        command[4] = control_dict['freq_cmd']
        command[5:8] = control_dict['rpy_cmd']
        command[:3] = control_dict["loco_cmd"][:3] * config["cmd_scale"]
        command[3] = control_dict["height_cmd"]
        command[4] = control_dict["freq_cmd"]
        command[5:8] = control_dict["rpy_cmd"]
        # gait indice
        is_static = np.linalg.norm(command[:3]) < 0.1
@ -191,10 +190,7 @@ class GearWbcController:
        # Clock signal
        clock = [torch.sin(2 * np.pi * fi) for fi in gait_pair]
        for i, (clk, frozen_mask_attr) in enumerate(
            zip(clock, ['frozen_FL', 'frozen_FR'])
        ):
        for i, (clk, frozen_mask_attr) in enumerate(zip(clock, ["frozen_FL", "frozen_FR"])):
            frozen_mask = getattr(self, frozen_mask_attr)
            # Freeze condition: static and at sin peak
            if is_static and (not frozen_mask) and clk.item() > 0.98:
@ -205,23 +201,25 @@ class GearWbcController:
            clock[i] = clk
        self.clock_inputs = torch.stack(clock).unsqueeze(0)
        qj = d.qpos[7:7+n_joints].copy()
        dqj = d.qvel[6:6+n_joints].copy()
        qj = d.qpos[7 : 7 + n_joints].copy()
        dqj = d.qvel[6 : 6 + n_joints].copy()
        quat = d.qpos[3:7].copy()
        omega = d.qvel[3:6].copy()
        # omega = self.data.xmat[self.base_index].reshape(3, 3).T @ self.data.cvel[self.base_index][3:6]
        padded_defaults = np.zeros(n_joints, dtype=np.float32)
        L = min(len(config['default_angles']), n_joints)
        padded_defaults[:L] = config['default_angles'][:L]
        L = min(len(config["default_angles"]), n_joints)
        padded_defaults[:L] = config["default_angles"][:L]
        qj_scaled = (qj - padded_defaults) * config['dof_pos_scale']
        dqj_scaled = dqj * config['dof_vel_scale']
        qj_scaled = (qj - padded_defaults) * config["dof_pos_scale"]
        dqj_scaled = dqj * config["dof_vel_scale"]
        gravity_orientation = self.get_gravity_orientation(quat)
        omega_scaled = omega * config['ang_vel_scale']
        omega_scaled = omega * config["ang_vel_scale"]
        torso_quat = self.data.xquat[self.torso_index]
        torso_omega = self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
        torso_omega_scaled = torso_omega * config['ang_vel_scale']
        torso_omega = (
            self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
        )
        torso_omega_scaled = torso_omega * config["ang_vel_scale"]
        torso_gravity_orientation = self.get_gravity_orientation(torso_quat)
        single_obs_dim = 95
@ -229,21 +227,25 @@ class GearWbcController:
        single_obs[0:8] = command[:8]
        single_obs[8:11] = omega_scaled
        single_obs[11:14] = gravity_orientation
        single_obs[14:17] = 0.#torso_omega_scaled
        single_obs[17:20] = 0.#torso_gravity_orientation
        single_obs[20:20+n_joints] = qj_scaled
        single_obs[20+n_joints:20+2*n_joints] = dqj_scaled
        single_obs[20+2*n_joints:20+2*n_joints+15] = action
        single_obs[20+2*n_joints+15:20+2*n_joints+15+2] = self.clock_inputs.cpu().numpy().reshape(2)
        single_obs[14:17] = 0.0  # torso_omega_scaled
        single_obs[17:20] = 0.0  # torso_gravity_orientation
        single_obs[20 : 20 + n_joints] = qj_scaled
        single_obs[20 + n_joints : 20 + 2 * n_joints] = dqj_scaled
        single_obs[20 + 2 * n_joints : 20 + 2 * n_joints + 15] = action
        single_obs[20 + 2 * n_joints + 15 : 20 + 2 * n_joints + 15 + 2] = (
            self.clock_inputs.cpu().numpy().reshape(2)
        )
        return single_obs, single_obs_dim
    def load_onnx_policy(self, path):
        model = ort.InferenceSession(path)
        def run_inference(input_tensor):
            ort_inputs = {model.get_inputs()[0].name: input_tensor.cpu().numpy()}
            ort_outs = model.run(None, ort_inputs)
            return torch.tensor(ort_outs[0], device="cuda:0")
        return run_inference
    def run(self):
@ -252,52 +254,58 @@ class GearWbcController:
        with mujoco.viewer.launch_passive(self.model, self.data) as viewer:
            start = time.time()
            while viewer.is_running() and time.time() - start < self.config['simulation_duration']:
            while viewer.is_running() and time.time() - start < self.config["simulation_duration"]:
                step_start = time.time()
                leg_tau = self.pd_control(
                    self.target_dof_pos,
                    self.data.qpos[7:7+self.config['num_actions']],
                    self.config['kps'],
                    np.zeros_like(self.config['kps']),
                    self.data.qvel[6:6+self.config['num_actions']],
                    self.config['kds']
                    self.data.qpos[7 : 7 + self.config["num_actions"]],
                    self.config["kps"],
                    np.zeros_like(self.config["kps"]),
                    self.data.qvel[6 : 6 + self.config["num_actions"]],
                    self.config["kds"],
                )
                self.data.ctrl[:self.config['num_actions']] = leg_tau
                self.data.ctrl[: self.config["num_actions"]] = leg_tau
                if self.n_joints > self.config['num_actions']:
                if self.n_joints > self.config["num_actions"]:
                    arm_tau = self.pd_control(
                        np.zeros(self.n_joints - self.config['num_actions'], dtype=np.float32),
                        self.data.qpos[7+self.config['num_actions']:7+self.n_joints],
                        np.full(self.n_joints - self.config['num_actions'], 100.0),
                        np.zeros(self.n_joints - self.config['num_actions']),
                        self.data.qvel[6+self.config['num_actions']:6+self.n_joints],
                        np.full(self.n_joints - self.config['num_actions'], 0.5)
                        np.zeros(self.n_joints - self.config["num_actions"], dtype=np.float32),
                        self.data.qpos[7 + self.config["num_actions"] : 7 + self.n_joints],
                        np.full(self.n_joints - self.config["num_actions"], 100.0),
                        np.zeros(self.n_joints - self.config["num_actions"]),
                        self.data.qvel[6 + self.config["num_actions"] : 6 + self.n_joints],
                        np.full(self.n_joints - self.config["num_actions"], 0.5),
                    )
                    self.data.ctrl[self.config['num_actions']:] = arm_tau
                    self.data.ctrl[self.config["num_actions"] :] = arm_tau
                mujoco.mj_step(self.model, self.data)
                self.counter += 1
                if self.counter % self.config['control_decimation'] == 0:
                if self.counter % self.config["control_decimation"] == 0:
                    with self.cmd_lock:
                        current_cmd = self.control_dict
                    single_obs, _ = self.compute_observation(self.data, self.config, self.action, current_cmd,  self.n_joints)
                    single_obs, _ = self.compute_observation(
                        self.data, self.config, self.action, current_cmd, self.n_joints
                    )
                    self.obs_history.append(single_obs)
                    for i, hist_obs in enumerate(self.obs_history):
                        self.obs[i * self.single_obs_dim:(i + 1) * self.single_obs_dim] = hist_obs
                        self.obs[i * self.single_obs_dim : (i + 1) * self.single_obs_dim] = hist_obs
                    obs_tensor = torch.from_numpy(self.obs).unsqueeze(0)
                    self.action = self.policy(obs_tensor).cpu().detach().numpy().squeeze()
                    self.target_dof_pos = self.action * self.config['action_scale'] + self.config['default_angles']
                    self.target_dof_pos = (
                        self.action * self.config["action_scale"] + self.config["default_angles"]
                    )
                viewer.sync()
                # time.sleep(max(0, self.model.opt.timestep - (time.time() - step_start)))
 if __name__ == "__main__":
    CONFIG_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1")
    CONFIG_PATH = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1"
    )
    controller = GearWbcController(CONFIG_PATH)
    controller.run()
--- a/tests/control/main/teleop/test_g1_control_loop.py
+++ b/tests/control/main/teleop/test_g1_control_loop.py
@ -12,7 +12,11 @@ import rclpy
 from scipy.spatial.transform import Rotation as R
 from std_msgs.msg import String as RosStringMsg
 from gr00t_wbc.control.main.constants import CONTROL_GOAL_TOPIC, KEYBOARD_INPUT_TOPIC, STATE_TOPIC_NAME
 from gr00t_wbc.control.main.constants import (
    CONTROL_GOAL_TOPIC,
    KEYBOARD_INPUT_TOPIC,
    STATE_TOPIC_NAME,
 )
 from gr00t_wbc.control.utils.ros_utils import ROSMsgPublisher, ROSMsgSubscriber
 from gr00t_wbc.control.utils.term_color_constants import GREEN_BOLD, RESET, YELLOW_BOLD
 from gr00t_wbc.data.viz.rerun_viz import RerunViz
--- a/tests/control/visualization/test_meshcat_visualizer_env.py
+++ b/tests/control/visualization/test_meshcat_visualizer_env.py
@ -5,7 +5,9 @@ import numpy as np
 import pytest
 from gr00t_wbc.control.robot_model import RobotModel
 from gr00t_wbc.control.robot_model.supplemental_info.g1.g1_supplemental_info import G1SupplementalInfo
 from gr00t_wbc.control.robot_model.supplemental_info.g1.g1_supplemental_info import (
    G1SupplementalInfo,
 )
@pytest.fixture
@ -17,7 +19,9 @@ def env_fixture():
    from gr00t_wbc.control.visualization.meshcat_visualizer_env import MeshcatVisualizerEnv
    root_dir = pathlib.Path(__file__).parent.parent.parent.parent
    urdf_path = str(root_dir / "gr00t_wbc/control/robot_model/model_data/g1/g1_29dof_with_hand.urdf")
    urdf_path = str(
        root_dir / "gr00t_wbc/control/robot_model/model_data/g1/g1_29dof_with_hand.urdf"
    )
    asset_path = str(root_dir / "gr00t_wbc/control/robot_model/model_data/g1")
    robot_config = {
        "asset_path": asset_path,