Browse Source

Initial commit: Native Quest 3 teleop app with body tracking

Godot 4.3 project for Quest 3 that replaces the browser-based WebXR
teleoperation with a native app using XR_FB_body_tracking (70 joints).
Key advantage: chest-relative wrist tracking eliminates head-position
subtraction artifacts and decouples body rotation from arm control.

Godot app: body_tracker.gd, teleop_client.gd, webcam_display.gd
Robot server: teleop_server.py (WebSocket, replaces Vuer ~900→~250 lines)
Drop-in wrapper: native_tv_wrapper.py (compatible with teleop_hand_and_arm.py)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
master
Joe DiPrima 1 month ago
commit
321ae3c322
  1. 26
      .gitignore
  2. 81
      Main.gd
  3. 29
      Main.tscn
  4. 79
      SETUP.md
  5. 36
      android/AndroidManifest.xml
  6. 47
      export_presets.cfg
  7. 5
      icon.svg
  8. 3
      openxr_action_map.tres
  9. 47
      project.godot
  10. 17
      scenes/webcam_quad.tscn
  11. 202
      scripts/body_tracker.gd
  12. 179
      scripts/teleop_client.gd
  13. 87
      scripts/webcam_display.gd
  14. 6
      server/__init__.py
  15. 330
      server/native_tv_wrapper.py
  16. 4
      server/requirements.txt
  17. 315
      server/teleop_server.py

26
.gitignore

@ -0,0 +1,26 @@
# Godot
.godot/
*.import
android/build/
build/
*.apk
# OS
.DS_Store
Thumbs.db
*.swp
*~
# IDE
.vscode/
.idea/
# Python
__pycache__/
*.pyc
*.pyo
.venv/
venv/
# Export
*.aab

81
Main.gd

@ -0,0 +1,81 @@
extends Node3D
## Main entry point for G1 Teleop Quest 3 app.
## Initializes XR session with passthrough, wires body tracker to WebSocket client.
@onready var body_tracker: Node = $BodyTracker
@onready var teleop_client: Node = $TeleopClient
@onready var xr_origin: XROrigin3D = $XROrigin3D
@onready var xr_camera: XRCamera3D = $XROrigin3D/XRCamera3D
@onready var webcam_quad: MeshInstance3D = $XROrigin3D/XRCamera3D/WebcamQuad
var xr_interface: XRInterface
var xr_is_focused: bool = false
func _ready() -> void:
# Initialize OpenXR interface
xr_interface = XRServer.find_interface("OpenXR")
if xr_interface and xr_interface.is_initialized():
print("[Main] OpenXR already initialized")
_on_openxr_ready()
elif xr_interface:
xr_interface.connect("session_begun", _on_openxr_session_begun)
xr_interface.connect("session_focussed", _on_openxr_focused)
xr_interface.connect("session_stopping", _on_openxr_stopping)
if not xr_interface.initialize():
printerr("[Main] Failed to initialize OpenXR")
get_tree().quit()
return
print("[Main] OpenXR initialized, waiting for session")
else:
printerr("[Main] OpenXR interface not found. Is the plugin enabled?")
get_tree().quit()
return
# Enable XR on the viewport
get_viewport().use_xr = true
# Wire body tracker output to teleop client
body_tracker.tracking_data_ready.connect(teleop_client._on_tracking_data)
# Wire webcam frames from teleop client to webcam display
teleop_client.webcam_frame_received.connect(webcam_quad._on_webcam_frame)
func _on_openxr_session_begun() -> void:
print("[Main] OpenXR session begun")
_on_openxr_ready()
func _on_openxr_ready() -> void:
# Enable passthrough (Quest 3 mixed reality)
_enable_passthrough()
func _on_openxr_focused() -> void:
xr_is_focused = true
print("[Main] OpenXR session focused")
func _on_openxr_stopping() -> void:
xr_is_focused = false
print("[Main] OpenXR session stopping")
func _enable_passthrough() -> void:
# Request passthrough blend mode for mixed reality
# Environment blend mode 3 = alpha blend (passthrough)
var openxr = xr_interface as OpenXRInterface
if openxr:
# Try to start passthrough
var modes = openxr.get_supported_environment_blend_modes()
print("[Main] Supported blend modes: ", modes)
# XR_ENVIRONMENT_BLEND_MODE_ALPHA_BLEND = 2 in Godot's enum
if XRInterface.XR_ENV_BLEND_MODE_ALPHA_BLEND in modes:
openxr.set_environment_blend_mode(XRInterface.XR_ENV_BLEND_MODE_ALPHA_BLEND)
print("[Main] Passthrough enabled (alpha blend)")
# Clear background to transparent
get_viewport().transparent_bg = true
RenderingServer.set_default_clear_color(Color(0, 0, 0, 0))
else:
print("[Main] Alpha blend not supported, using opaque mode")

29
Main.tscn

@ -0,0 +1,29 @@
[gd_scene load_steps=6 format=3 uid="uid://main_scene"]
[ext_resource type="Script" path="res://Main.gd" id="1"]
[ext_resource type="Script" path="res://scripts/body_tracker.gd" id="2"]
[ext_resource type="Script" path="res://scripts/teleop_client.gd" id="3"]
[ext_resource type="Script" path="res://scripts/webcam_display.gd" id="4"]
[ext_resource type="PackedScene" path="res://scenes/webcam_quad.tscn" id="5"]
[node name="Main" type="Node3D"]
script = ExtResource("1")
[node name="XROrigin3D" type="XROrigin3D" parent="."]
[node name="XRCamera3D" type="XRCamera3D" parent="XROrigin3D"]
[node name="LeftController" type="XRController3D" parent="XROrigin3D"]
tracker = &"left_hand"
[node name="RightController" type="XRController3D" parent="XROrigin3D"]
tracker = &"right_hand"
[node name="BodyTracker" type="Node" parent="."]
script = ExtResource("2")
[node name="TeleopClient" type="Node" parent="."]
script = ExtResource("3")
[node name="WebcamQuad" parent="XROrigin3D/XRCamera3D" instance=ExtResource("5")]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, -0.3, -1.5)

79
SETUP.md

@ -0,0 +1,79 @@
# G1 Teleop — Quest 3 Native App Setup
## Prerequisites
- **Godot 4.3+** (download from https://godotengine.org)
- **Android SDK + NDK** (install via Android Studio or standalone)
- **Meta Quest Developer Hub** (for sideloading) or `adb`
- Quest 3 in **developer mode**
## Step 1: Install Meta OpenXR Vendors Plugin
1. Open project in Godot: `godot --editor --path C:\git\g1-teleop`
2. Go to **AssetLib** tab (top center)
3. Search for "Godot OpenXR Vendors"
4. Download and install (includes Meta Quest support)
5. Enable the plugin: **Project → Project Settings → Plugins → godotopenxrvendors** → Enable
6. Restart Godot when prompted
## Step 2: Configure Android Export
1. Go to **Editor → Editor Settings → Export → Android**
2. Set the path to your Android SDK
3. Set Java SDK path
4. Go to **Project → Export → Quest 3**
5. Download Android export templates if prompted (Editor → Manage Export Templates → Download)
## Step 3: Enable Body Tracking
Body tracking is configured via:
- `export_presets.cfg`: `xr_features/hand_tracking=2` (required)
- `android/AndroidManifest.xml`: body tracking features and metadata
The Meta OpenXR Vendors plugin v4.1.1+ exposes `XRBodyTracker` in GDScript.
## Step 4: Build and Sideload
1. Connect Quest 3 via USB (or Wi-Fi ADB)
2. In Godot: **Project → Export → Quest 3 → Export Project** (or one-click deploy)
3. APK will be at `build/g1-teleop.apk`
4. Sideload: `adb install build/g1-teleop.apk`
## Step 5: Robot Server
On the robot (or dev machine for testing):
```bash
cd server/
pip install -r requirements.txt
python teleop_server.py --port 8765
```
## Step 6: Configure App
Edit `scripts/teleop_client.gd` to set the robot's IP:
```gdscript
@export var server_host: String = "10.0.0.64" # Robot IP
@export var server_port: int = 8765
```
Or configure at runtime via the Godot editor's export properties.
## Network
- Quest 3 and robot must be on the same network
- No SSL required (raw WebSocket)
- Default port: 8765
- Firewall: allow TCP 8765 on the robot
## Troubleshooting
### Body tracking not activating
- Ensure Quest 3 system software is up to date
- Check Settings → Movement Tracking → Body Tracking is enabled
- Ensure the Meta OpenXR Vendors plugin version is ≥ 4.1.1
### WebSocket connection fails
- Verify robot IP is correct and reachable: `ping 10.0.0.64`
- Check server is running: `ss -tlnp | grep 8765`
- Check firewall: `sudo ufw allow 8765/tcp`

36
android/AndroidManifest.xml

@ -0,0 +1,36 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Additional manifest entries for Quest 3 body tracking.
These are merged into the final AndroidManifest.xml by Godot's build system
when using the Meta OpenXR Vendors plugin.
Key extensions enabled:
- XR_FB_body_tracking (70 joints including chest)
- XR_META_body_tracking_full_body (v4.1.1+)
- XR_EXT_hand_tracking (fallback, if body tracking unavailable)
- Passthrough (mixed reality)
-->
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<!-- Required permissions -->
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<uses-permission android:name="android.permission.ACCESS_WIFI_STATE" />
<!-- Meta Quest features -->
<uses-feature android:name="com.oculus.feature.PASSTHROUGH" android:required="true" />
<uses-feature android:name="oculus.software.body_tracking" android:required="true" />
<uses-feature android:name="oculus.software.handtracking" android:required="false" />
<application>
<!-- Body tracking metadata -->
<meta-data android:name="com.oculus.supportedDevices" android:value="quest3|quest3s|questpro" />
<meta-data android:name="com.oculus.intent.category.VR" android:value="vr_only" />
<!-- Enable body tracking API -->
<meta-data android:name="com.oculus.bodytracking.enabled" android:value="true" />
<!-- Request full body tracking (includes upper body by default) -->
<meta-data android:name="com.oculus.bodytracking.full_body" android:value="true" />
</application>
</manifest>

47
export_presets.cfg

@ -0,0 +1,47 @@
[preset.0]
name="Quest 3"
platform="Android"
runnable=true
dedicated_server=false
custom_features=""
export_filter="all_resources"
include_filter=""
exclude_filter=""
export_path="build/g1-teleop.apk"
encryption_include_filters=""
encryption_exclude_filters=""
encrypt_pck=false
encrypt_directory=false
[preset.0.options]
custom_template/debug=""
custom_template/release=""
gradle_build/use_gradle_build=true
gradle_build/export_format=0
gradle_build/min_sdk="29"
gradle_build/target_sdk="32"
architectures/arm64-v8a=true
architectures/armeabi-v7a=false
architectures/x86=false
architectures/x86_64=false
version/code=1
version/name="1.0.0"
package/unique_name="org.opentesla.g1teleop"
package/name="G1 Teleop"
package/signed=true
package/classify_as_game=false
package/retain_data_on_uninstall=false
package/exclude_from_recents=false
launcher_icons/main_192x192=""
launcher_icons/adaptive_foreground_432x432=""
launcher_icons/adaptive_background_432x432=""
graphics/opengl_debug=false
xr_features/xr_mode=1
xr_features/hand_tracking=2
xr_features/hand_tracking_frequency=1
xr_features/passthrough=2
permissions/internet=true
permissions/access_network_state=true
permissions/access_wifi_state=true

5
icon.svg

@ -0,0 +1,5 @@
<svg xmlns="http://www.w3.org/2000/svg" width="128" height="128" viewBox="0 0 128 128">
<rect width="128" height="128" rx="16" fill="#1a1a2e"/>
<text x="64" y="72" font-family="Arial, sans-serif" font-size="48" font-weight="bold" fill="#e94560" text-anchor="middle">G1</text>
<text x="64" y="100" font-family="Arial, sans-serif" font-size="16" fill="#0f3460" text-anchor="middle">TELEOP</text>
</svg>

3
openxr_action_map.tres

@ -0,0 +1,3 @@
[gd_resource type="OpenXRActionMap" format=3 uid="uid://openxr_actions"]
[resource]

47
project.godot

@ -0,0 +1,47 @@
; Engine configuration file.
; It's best edited using the editor UI and not directly,
; since the parameters that go here are not all obvious.
;
; Format:
; [section] is a section
; param=value ; assigned a value
; param=value ; assigned a value
config_version=5
[application]
config/name="G1 Teleop"
config/description="Native Quest 3 teleoperation app for Unitree G1 humanoid robot with body tracking"
run/main_scene="res://Main.tscn"
config/features=PackedStringArray("4.3", "Mobile")
config/icon="res://icon.svg"
[autoload]
[display]
window/size/viewport_width=1920
window/size/viewport_height=1920
[rendering]
renderer/rendering_method="mobile"
renderer/rendering_method.mobile="mobile"
textures/vram_compression/import_etc2_astc=true
[xr]
openxr/enabled=true
openxr/default_action_map="res://openxr_action_map.tres"
openxr/form_factor=0
openxr/view_configuration=1
openxr/reference_space=1
openxr/environment_blend_mode=0
openxr/foveation_level=3
openxr/foveation_dynamic=true
shaders/enabled=true
[editor_plugins]
enabled=PackedStringArray("res://addons/godotopenxrvendors/plugin.cfg")

17
scenes/webcam_quad.tscn

@ -0,0 +1,17 @@
[gd_scene load_steps=4 format=3 uid="uid://webcam_quad"]
[ext_resource type="Script" path="res://scripts/webcam_display.gd" id="1"]
[sub_resource type="QuadMesh" id="QuadMesh_1"]
size = Vector2(0.8, 0.6)
[sub_resource type="StandardMaterial3D" id="StandardMaterial3D_1"]
albedo_color = Color(1, 1, 1, 1)
shading_mode = 0
disable_ambient_light = true
disable_fog = true
[node name="WebcamQuad" type="MeshInstance3D"]
mesh = SubResource("QuadMesh_1")
material_override = SubResource("StandardMaterial3D_1")
script = ExtResource("1")

202
scripts/body_tracker.gd

@ -0,0 +1,202 @@
extends Node
## Reads XR_FB_body_tracking joints each frame via Godot's XRBodyTracker.
## Computes chest-relative wrist positions and emits tracking data.
##
## Meta body tracking provides 70 joints. We use:
## CHEST (5) - torso orientation (solves body rotation problem)
## HEAD (7) - head pose
## LEFT_HAND_WRIST (12) - left wrist
## RIGHT_HAND_WRIST (17) - right wrist
## Hand joints (18-69) - finger positions
##
## Output poses are in Godot's coordinate system (Y-up, -Z forward).
## The server handles conversion to robot conventions.
## Emitted every frame with tracking data dict ready to send.
signal tracking_data_ready(data: Dictionary)
## Joint indices from XR_FB_body_tracking
## Reference: Meta OpenXR body tracking extension
const JOINT_HIPS := 1
const JOINT_SPINE_LOWER := 2
const JOINT_SPINE_MIDDLE := 3
const JOINT_SPINE_UPPER := 4
const JOINT_CHEST := 5
const JOINT_NECK := 6
const JOINT_HEAD := 7
const JOINT_LEFT_SHOULDER := 8
const JOINT_LEFT_SCAPULA := 9
const JOINT_LEFT_ARM_UPPER := 10
const JOINT_LEFT_ARM_LOWER := 11
const JOINT_LEFT_HAND_WRIST := 12
const JOINT_RIGHT_SHOULDER := 13
const JOINT_RIGHT_SCAPULA := 14
const JOINT_RIGHT_ARM_UPPER := 15
const JOINT_RIGHT_ARM_LOWER := 16
const JOINT_RIGHT_HAND_WRIST := 17
# Hand joints start at index 18 for left hand, 43 for right hand
# 25 joints per hand (same layout as XR_EXT_hand_tracking)
const JOINT_LEFT_HAND_START := 18
const JOINT_RIGHT_HAND_START := 43
const HAND_JOINT_COUNT := 25
## Total body joint count
const BODY_JOINT_COUNT := 70
## Tracking state
var body_tracker_name: StringName = &"/user/body_tracker"
var is_tracking: bool = false
var frames_since_last_send: int = 0
## Send rate control: target ~30 Hz (every other frame at 72 Hz)
@export var send_every_n_frames: int = 2
## Debug logging
@export var debug_log: bool = false
var _log_counter: int = 0
func _ready() -> void:
print("[BodyTracker] Initialized, waiting for body tracking data...")
func _process(_delta: float) -> void:
frames_since_last_send += 1
if frames_since_last_send < send_every_n_frames:
return
frames_since_last_send = 0
var tracker := XRServer.get_tracker(body_tracker_name) as XRBodyTracker
if tracker == null:
if is_tracking:
print("[BodyTracker] Lost body tracking")
is_tracking = false
return
if not tracker.get_has_tracking_data():
if is_tracking:
print("[BodyTracker] Body tracking data unavailable")
is_tracking = false
return
if not is_tracking:
print("[BodyTracker] Body tracking active!")
is_tracking = true
# Read key joint poses
var chest_xform := tracker.get_joint_transform(JOINT_CHEST)
var head_xform := tracker.get_joint_transform(JOINT_HEAD)
var left_wrist_xform := tracker.get_joint_transform(JOINT_LEFT_HAND_WRIST)
var right_wrist_xform := tracker.get_joint_transform(JOINT_RIGHT_HAND_WRIST)
# Compute chest-relative wrist positions
# This is the key advantage: eliminates head-position subtraction artifacts
var chest_inv := chest_xform.affine_inverse()
var left_wrist_rel := chest_inv * left_wrist_xform
var right_wrist_rel := chest_inv * right_wrist_xform
# Build tracking data packet
var data := {}
data["type"] = "tracking"
data["timestamp"] = Time.get_ticks_msec()
# Head pose (world space) - 7 floats: pos(3) + quat(4)
data["head"] = _xform_to_pose7(head_xform)
# Chest pose (world space) - 7 floats: pos(3) + quat(4)
data["chest"] = _xform_to_pose7(chest_xform)
# Chest-relative wrist poses as 4x4 matrices (16 floats each, column-major)
# These replace the world-space wrist poses; no head subtraction needed
data["left_wrist"] = _xform_to_mat16(left_wrist_rel)
data["right_wrist"] = _xform_to_mat16(right_wrist_rel)
# Hand joint positions (25 joints per hand, relative to wrist)
var left_hand_pos := _get_hand_positions(tracker, JOINT_LEFT_HAND_START, left_wrist_xform)
var right_hand_pos := _get_hand_positions(tracker, JOINT_RIGHT_HAND_START, right_wrist_xform)
data["left_hand_pos"] = left_hand_pos
data["right_hand_pos"] = right_hand_pos
# Hand joint rotations (25 joints per hand, relative to wrist)
var left_hand_rot := _get_hand_rotations(tracker, JOINT_LEFT_HAND_START, left_wrist_xform)
var right_hand_rot := _get_hand_rotations(tracker, JOINT_RIGHT_HAND_START, right_wrist_xform)
data["left_hand_rot"] = left_hand_rot
data["right_hand_rot"] = right_hand_rot
# Debug logging every ~2 seconds
if debug_log:
_log_counter += 1
if _log_counter >= 60:
_log_counter = 0
print("[BodyTracker] chest=", chest_xform.origin,
" head=", head_xform.origin,
" L_wrist_rel=", left_wrist_rel.origin,
" R_wrist_rel=", right_wrist_rel.origin)
tracking_data_ready.emit(data)
## Convert Transform3D to 7-float pose: [x, y, z, qx, qy, qz, qw]
func _xform_to_pose7(xform: Transform3D) -> Array:
var pos := xform.origin
var quat := xform.basis.get_rotation_quaternion()
return [pos.x, pos.y, pos.z, quat.x, quat.y, quat.z, quat.w]
## Convert Transform3D to 16-float column-major 4x4 matrix
## Layout: [m00, m10, m20, 0, m01, m11, m21, 0, m02, m12, m22, 0, tx, ty, tz, 1]
## This matches NumPy's Fortran ('F') order used by the existing teleop code.
func _xform_to_mat16(xform: Transform3D) -> Array:
var b := xform.basis
var o := xform.origin
# Godot Basis: b[col][row] — but we store column-major
# Column 0: basis x-axis
# Column 1: basis y-axis
# Column 2: basis z-axis
# Column 3: origin (translation)
return [
b.x.x, b.x.y, b.x.z, 0.0, # Column 0
b.y.x, b.y.y, b.y.z, 0.0, # Column 1
b.z.x, b.z.y, b.z.z, 0.0, # Column 2
o.x, o.y, o.z, 1.0 # Column 3
]
## Get 25 hand joint positions relative to wrist, as flat array (75 floats)
## Each joint: [x, y, z] relative to wrist
func _get_hand_positions(tracker: XRBodyTracker, start_idx: int, wrist_xform: Transform3D) -> Array:
var wrist_inv := wrist_xform.affine_inverse()
var positions := []
positions.resize(HAND_JOINT_COUNT * 3)
for i in range(HAND_JOINT_COUNT):
var joint_xform := tracker.get_joint_transform(start_idx + i)
var rel := wrist_inv * joint_xform
positions[i * 3 + 0] = rel.origin.x
positions[i * 3 + 1] = rel.origin.y
positions[i * 3 + 2] = rel.origin.z
return positions
## Get 25 hand joint rotations relative to wrist, as flat array (225 floats)
## Each joint: 9 floats (3x3 rotation matrix, column-major)
func _get_hand_rotations(tracker: XRBodyTracker, start_idx: int, wrist_xform: Transform3D) -> Array:
var wrist_rot_inv := wrist_xform.basis.inverse()
var rotations := []
rotations.resize(HAND_JOINT_COUNT * 9)
for i in range(HAND_JOINT_COUNT):
var joint_xform := tracker.get_joint_transform(start_idx + i)
var rel_basis := wrist_rot_inv * joint_xform.basis
# Store as column-major 3x3
rotations[i * 9 + 0] = rel_basis.x.x
rotations[i * 9 + 1] = rel_basis.x.y
rotations[i * 9 + 2] = rel_basis.x.z
rotations[i * 9 + 3] = rel_basis.y.x
rotations[i * 9 + 4] = rel_basis.y.y
rotations[i * 9 + 5] = rel_basis.y.z
rotations[i * 9 + 6] = rel_basis.z.x
rotations[i * 9 + 7] = rel_basis.z.y
rotations[i * 9 + 8] = rel_basis.z.z
return rotations

179
scripts/teleop_client.gd

@ -0,0 +1,179 @@
extends Node
## WebSocket client that connects to teleop_server.py on the robot.
## Sends body tracking data as JSON, receives JPEG webcam frames as binary.
##
## Protocol:
## Client → Server: JSON text messages with tracking data
## Server → Client: Binary messages with JPEG webcam frames
##
## No SSL required — raw WebSocket over local network.
## Emitted when a JPEG webcam frame is received from the server.
signal webcam_frame_received(jpeg_bytes: PackedByteArray)
## Emitted when connection state changes.
signal connection_state_changed(connected: bool)
## Server address — robot's IP and teleop_server.py port
@export var server_host: String = "10.0.0.64"
@export var server_port: int = 8765
@export var auto_connect: bool = true
## Reconnection settings
@export var reconnect_delay_sec: float = 2.0
@export var max_reconnect_attempts: int = 0 # 0 = unlimited
## Connection state
var ws := WebSocketPeer.new()
var is_connected: bool = false
var _reconnect_timer: float = 0.0
var _reconnect_attempts: int = 0
var _was_connected: bool = false
var _pending_data: Array = []
## Stats
var messages_sent: int = 0
var messages_received: int = 0
var bytes_sent: int = 0
var bytes_received: int = 0
var last_send_time: int = 0
var last_receive_time: int = 0
func _ready() -> void:
if auto_connect:
connect_to_server()
func _process(delta: float) -> void:
ws.poll()
var state := ws.get_ready_state()
match state:
WebSocketPeer.STATE_OPEN:
if not is_connected:
is_connected = true
_was_connected = true
_reconnect_attempts = 0
print("[TeleopClient] Connected to ws://%s:%d" % [server_host, server_port])
connection_state_changed.emit(true)
# Process incoming messages
while ws.get_available_packet_count() > 0:
var packet := ws.get_packet()
bytes_received += packet.size()
messages_received += 1
last_receive_time = Time.get_ticks_msec()
# Check if this is a binary (JPEG) or text message
if ws.was_string_packet():
_handle_text_message(packet.get_string_from_utf8())
else:
# Binary = JPEG webcam frame
webcam_frame_received.emit(packet)
# Send any pending tracking data
for data in _pending_data:
_send_json(data)
_pending_data.clear()
WebSocketPeer.STATE_CLOSING:
pass # Wait for close to complete
WebSocketPeer.STATE_CLOSED:
if is_connected or _was_connected:
var code := ws.get_close_code()
var reason := ws.get_close_reason()
print("[TeleopClient] Disconnected (code=%d, reason=%s)" % [code, reason])
is_connected = false
connection_state_changed.emit(false)
# Auto-reconnect
if auto_connect and _was_connected:
_reconnect_timer -= delta
if _reconnect_timer <= 0:
_reconnect_timer = reconnect_delay_sec
_attempt_reconnect()
WebSocketPeer.STATE_CONNECTING:
pass # Still connecting
func connect_to_server() -> void:
var url := "ws://%s:%d" % [server_host, server_port]
print("[TeleopClient] Connecting to %s..." % url)
var err := ws.connect_to_url(url)
if err != OK:
printerr("[TeleopClient] Failed to initiate connection: ", err)
func disconnect_from_server() -> void:
_was_connected = false
ws.close()
func _attempt_reconnect() -> void:
if max_reconnect_attempts > 0:
_reconnect_attempts += 1
if _reconnect_attempts > max_reconnect_attempts:
print("[TeleopClient] Max reconnect attempts reached, giving up")
_was_connected = false
return
print("[TeleopClient] Reconnect attempt %d/%d..." % [_reconnect_attempts, max_reconnect_attempts])
else:
_reconnect_attempts += 1
if _reconnect_attempts % 5 == 1: # Log every 5th attempt to avoid spam
print("[TeleopClient] Reconnect attempt %d..." % _reconnect_attempts)
connect_to_server()
## Called by body_tracker via signal when new tracking data is ready.
func _on_tracking_data(data: Dictionary) -> void:
if is_connected:
_send_json(data)
# Don't buffer if not connected — tracking data is perishable
func _send_json(data: Dictionary) -> void:
var json_str := JSON.stringify(data)
var err := ws.send_text(json_str)
if err == OK:
messages_sent += 1
bytes_sent += json_str.length()
last_send_time = Time.get_ticks_msec()
else:
printerr("[TeleopClient] Failed to send: ", err)
func _handle_text_message(text: String) -> void:
# Server may send JSON status/config messages
var json := JSON.new()
var err := json.parse(text)
if err != OK:
printerr("[TeleopClient] Invalid JSON from server: ", text.left(100))
return
var data: Dictionary = json.data
var msg_type: String = data.get("type", "")
match msg_type:
"config":
print("[TeleopClient] Server config: ", data)
"status":
print("[TeleopClient] Server status: ", data.get("message", ""))
_:
print("[TeleopClient] Unknown message type: ", msg_type)
## Get connection statistics as a dictionary
func get_stats() -> Dictionary:
return {
"connected": is_connected,
"messages_sent": messages_sent,
"messages_received": messages_received,
"bytes_sent": bytes_sent,
"bytes_received": bytes_received,
"reconnect_attempts": _reconnect_attempts,
}

87
scripts/webcam_display.gd

@ -0,0 +1,87 @@
extends MeshInstance3D
## Displays JPEG webcam frames received from the robot on a quad mesh.
## The quad is positioned in front of the user's view (child of XRCamera3D).
##
## Receives JPEG bytes via the webcam_frame_received signal from TeleopClient.
## Display settings
@export var default_color := Color(0.1, 0.1, 0.1, 0.8)
## State
var _texture: ImageTexture
var _material: StandardMaterial3D
var _frame_count: int = 0
var _last_frame_time: int = 0
var _fps: float = 0.0
var _fps_update_timer: float = 0.0
var _has_received_frame: bool = false
func _ready() -> void:
# Get or create the material
_material = material_override as StandardMaterial3D
if _material == null:
_material = StandardMaterial3D.new()
material_override = _material
# Configure material for unlit display (no scene lighting affects the webcam feed)
_material.shading_mode = BaseMaterial3D.SHADING_MODE_UNSHADED
_material.albedo_color = default_color
_material.transparency = BaseMaterial3D.TRANSPARENCY_ALPHA
# Create initial texture (will be replaced on first frame)
_texture = ImageTexture.new()
print("[WebcamDisplay] Ready, waiting for frames...")
func _process(delta: float) -> void:
# Update FPS counter
_fps_update_timer += delta
if _fps_update_timer >= 1.0:
_fps = _frame_count / _fps_update_timer
_frame_count = 0
_fps_update_timer = 0.0
## Called when a JPEG webcam frame is received from the server.
## Connected via signal from TeleopClient in Main.gd.
func _on_webcam_frame(jpeg_bytes: PackedByteArray) -> void:
if jpeg_bytes.size() < 2:
return
var image := Image.new()
var err := image.load_jpg_from_buffer(jpeg_bytes)
if err != OK:
if _frame_count == 0:
printerr("[WebcamDisplay] Failed to decode JPEG frame (size=%d, err=%d)" % [jpeg_bytes.size(), err])
return
# Update texture from decoded image
if _texture.get_image() == null or _texture.get_width() != image.get_width() or _texture.get_height() != image.get_height():
_texture = ImageTexture.create_from_image(image)
_material.albedo_texture = _texture
_material.albedo_color = Color.WHITE # Full brightness once we have a real image
print("[WebcamDisplay] Texture created: %dx%d" % [image.get_width(), image.get_height()])
else:
_texture.update(image)
_frame_count += 1
_last_frame_time = Time.get_ticks_msec()
if not _has_received_frame:
_has_received_frame = true
print("[WebcamDisplay] First frame received!")
## Get display FPS
func get_fps() -> float:
return _fps
## Check if frames are being received
func is_receiving() -> bool:
if not _has_received_frame:
return false
# Consider stale if no frame for 2 seconds
return (Time.get_ticks_msec() - _last_frame_time) < 2000

6
server/__init__.py

@ -0,0 +1,6 @@
"""
G1 Teleop Server Quest 3 native app backend.
"""
from .teleop_server import TeleopServer
from .native_tv_wrapper import NativeTeleWrapper, TeleData

330
server/native_tv_wrapper.py

@ -0,0 +1,330 @@
"""
Native TeleVuerWrapper drop-in replacement for TeleVuerWrapper that uses
the Quest 3 native app via TeleopServer instead of Vuer/browser.
Returns TeleData in the same format as the original TeleVuerWrapper, but:
- Wrist poses arrive chest-relative from body tracking (no head subtraction needed)
- Chest orientation is available for body rotation decoupling
- No SSL, no browser, no Vuer dependency
The coordinate transform pipeline is preserved:
1. Quest 3 sends poses in Godot's coordinate system (Y-up, -Z forward)
2. This wrapper converts to robot convention (X-forward, Y-left, Z-up)
3. Applies Unitree arm URDF convention transforms
4. Applies head-to-waist offset (same as original)
Usage:
# Instead of:
# tv_wrapper = TeleVuerWrapper(use_hand_tracking=True, ...)
# Use:
# tv_wrapper = NativeTeleWrapper(port=8765)
# tv_wrapper.start() # Starts WebSocket server in background
"""
import numpy as np
import time
import threading
from multiprocessing import Array, Value
from dataclasses import dataclass, field
from typing import Optional
from teleop_server import TeleopServer
# --- Coordinate Transform Constants ---
# Godot coordinate system: Y-up, -Z forward, X-right
# This is equivalent to OpenXR convention.
# Robot convention: X-forward, Y-left, Z-up
# Basis change: OpenXR (Y-up, -Z fwd) → Robot (Z-up, X fwd)
T_ROBOT_OPENXR = np.array([[ 0, 0,-1, 0],
[-1, 0, 0, 0],
[ 0, 1, 0, 0],
[ 0, 0, 0, 1]], dtype=np.float64)
T_OPENXR_ROBOT = np.array([[ 0,-1, 0, 0],
[ 0, 0, 1, 0],
[-1, 0, 0, 0],
[ 0, 0, 0, 1]], dtype=np.float64)
R_ROBOT_OPENXR = T_ROBOT_OPENXR[:3, :3]
R_OPENXR_ROBOT = T_OPENXR_ROBOT[:3, :3]
# Arm initial pose convention transforms
T_TO_UNITREE_HUMANOID_LEFT_ARM = np.array([[1, 0, 0, 0],
[0, 0,-1, 0],
[0, 1, 0, 0],
[0, 0, 0, 1]], dtype=np.float64)
T_TO_UNITREE_HUMANOID_RIGHT_ARM = np.array([[1, 0, 0, 0],
[0, 0, 1, 0],
[0,-1, 0, 0],
[0, 0, 0, 1]], dtype=np.float64)
# Hand convention transform
T_TO_UNITREE_HAND = np.array([[0, 0, 1, 0],
[-1, 0, 0, 0],
[0, -1, 0, 0],
[0, 0, 0, 1]], dtype=np.float64)
# Default poses (fallback when no data)
CONST_HEAD_POSE = np.array([[1, 0, 0, 0],
[0, 1, 0, 1.5],
[0, 0, 1, -0.2],
[0, 0, 0, 1]], dtype=np.float64)
CONST_LEFT_ARM_POSE = np.array([[1, 0, 0, -0.15],
[0, 1, 0, 1.13],
[0, 0, 1, -0.3],
[0, 0, 0, 1]], dtype=np.float64)
CONST_RIGHT_ARM_POSE = np.array([[1, 0, 0, 0.15],
[0, 1, 0, 1.13],
[0, 0, 1, -0.3],
[0, 0, 0, 1]], dtype=np.float64)
def fast_mat_inv(mat):
"""Fast SE(3) inverse using rotation transpose."""
ret = np.eye(4)
ret[:3, :3] = mat[:3, :3].T
ret[:3, 3] = -mat[:3, :3].T @ mat[:3, 3]
return ret
def safe_mat_update(prev_mat, mat):
"""Return previous matrix if new matrix is singular."""
det = np.linalg.det(mat)
if not np.isfinite(det) or np.isclose(det, 0.0, atol=1e-6):
return prev_mat, False
return mat, True
@dataclass
class TeleData:
"""Matches the TeleData from tv_wrapper.py exactly."""
head_pose: np.ndarray # (4,4) SE(3) pose of head
left_wrist_pose: np.ndarray # (4,4) SE(3) pose of left wrist (IK frame)
right_wrist_pose: np.ndarray # (4,4) SE(3) pose of right wrist (IK frame)
chest_pose: np.ndarray = None # (4,4) SE(3) NEW: chest orientation
# Hand tracking
left_hand_pos: np.ndarray = None # (25,3) 3D positions of left hand joints
right_hand_pos: np.ndarray = None # (25,3) 3D positions of right hand joints
left_hand_rot: np.ndarray = None # (25,3,3) rotation matrices
right_hand_rot: np.ndarray = None # (25,3,3) rotation matrices
# Hand state (computed from finger positions)
left_hand_pinch: bool = False
left_hand_pinchValue: float = 10.0
left_hand_squeeze: bool = False
left_hand_squeezeValue: float = 0.0
right_hand_pinch: bool = False
right_hand_pinchValue: float = 10.0
right_hand_squeeze: bool = False
right_hand_squeezeValue: float = 0.0
class NativeTeleWrapper:
"""Drop-in replacement for TeleVuerWrapper using Quest 3 native app.
The key difference: wrist poses from body tracking are chest-relative,
so the head position subtraction in the original TeleVuerWrapper is
unnecessary. Instead, we use chest-relative data directly.
"""
def __init__(self, port: int = 8765, host: str = "0.0.0.0",
chest_to_waist_x: float = 0.15,
chest_to_waist_z: float = 0.28):
"""
Args:
port: WebSocket server port
host: WebSocket bind address
chest_to_waist_x: Forward offset from chest to IK solver origin (meters).
Original head-to-waist was 0.15; chest is slightly forward
of head, so this may need tuning.
chest_to_waist_z: Vertical offset from chest to IK solver origin (meters).
Original head-to-waist was 0.45; chest-to-waist is shorter
(~0.25-0.30). Default 0.28 is a starting estimate.
"""
self.server = TeleopServer(host=host, port=port)
self._server_thread = None
self._chest_to_waist_x = chest_to_waist_x
self._chest_to_waist_z = chest_to_waist_z
def start(self):
"""Start the WebSocket server in a background thread."""
self._server_thread = self.server.start_in_thread()
def get_tele_data(self) -> TeleData:
"""Get processed motion data, transformed to robot convention.
Returns TeleData compatible with teleop_hand_and_arm.py.
"""
# Read raw poses from shared memory (column-major 4x4)
with self.server.head_pose_shared.get_lock():
head_raw = np.array(self.server.head_pose_shared[:]).reshape(4, 4, order='F')
with self.server.chest_pose_shared.get_lock():
chest_raw = np.array(self.server.chest_pose_shared[:]).reshape(4, 4, order='F')
with self.server.left_arm_pose_shared.get_lock():
left_wrist_raw = np.array(self.server.left_arm_pose_shared[:]).reshape(4, 4, order='F')
with self.server.right_arm_pose_shared.get_lock():
right_wrist_raw = np.array(self.server.right_arm_pose_shared[:]).reshape(4, 4, order='F')
# Validate matrices
head_raw, head_valid = safe_mat_update(CONST_HEAD_POSE, head_raw)
left_wrist_raw, left_valid = safe_mat_update(CONST_LEFT_ARM_POSE, left_wrist_raw)
right_wrist_raw, right_valid = safe_mat_update(CONST_RIGHT_ARM_POSE, right_wrist_raw)
# --- Transform from Godot/OpenXR convention to Robot convention ---
# Head: basis change (world-space pose)
Brobot_world_head = T_ROBOT_OPENXR @ head_raw @ T_OPENXR_ROBOT
# Chest: basis change (world-space pose)
Brobot_world_chest = T_ROBOT_OPENXR @ chest_raw @ T_OPENXR_ROBOT
# Wrist poses: these are CHEST-RELATIVE from body tracking.
# Apply basis change to chest-relative poses.
left_Brobot = T_ROBOT_OPENXR @ left_wrist_raw @ T_OPENXR_ROBOT
right_Brobot = T_ROBOT_OPENXR @ right_wrist_raw @ T_OPENXR_ROBOT
# Apply Unitree arm URDF convention transforms
left_IPunitree = left_Brobot @ (T_TO_UNITREE_HUMANOID_LEFT_ARM if left_valid else np.eye(4))
right_IPunitree = right_Brobot @ (T_TO_UNITREE_HUMANOID_RIGHT_ARM if right_valid else np.eye(4))
# CHEST-TO-WAIST offset: the IK solver origin is near the waist.
# Body tracking gives us chest-relative poses. The chest is lower
# than the head, so the vertical offset is smaller than the original
# head-to-waist offset (0.45). Configurable via constructor args.
left_wrist_final = left_IPunitree.copy()
right_wrist_final = right_IPunitree.copy()
left_wrist_final[0, 3] += self._chest_to_waist_x # x (forward)
right_wrist_final[0, 3] += self._chest_to_waist_x
left_wrist_final[2, 3] += self._chest_to_waist_z # z (up)
right_wrist_final[2, 3] += self._chest_to_waist_z
# --- Hand positions ---
left_hand_pos = np.zeros((25, 3))
right_hand_pos = np.zeros((25, 3))
if left_valid and right_valid:
with self.server.left_hand_position_shared.get_lock():
raw_left = np.array(self.server.left_hand_position_shared[:])
with self.server.right_hand_position_shared.get_lock():
raw_right = np.array(self.server.right_hand_position_shared[:])
if np.any(raw_left != 0):
# Hand positions are wrist-relative from Quest 3, in OpenXR coords.
# Must apply basis change (OpenXR→Robot) before hand convention transform.
# Derivation: original code does T_TO_UNITREE_HAND @ inv(arm_robot) @ (T_ROBOT_OPENXR @ world_pos)
# which simplifies to T_TO_UNITREE_HAND @ T_ROBOT_OPENXR @ wrist_relative_openxr
left_pos_25x3 = raw_left.reshape(25, 3)
left_hom = np.concatenate([left_pos_25x3.T, np.ones((1, 25))])
left_hand_pos = (T_TO_UNITREE_HAND @ T_ROBOT_OPENXR @ left_hom)[0:3, :].T
if np.any(raw_right != 0):
right_pos_25x3 = raw_right.reshape(25, 3)
right_hom = np.concatenate([right_pos_25x3.T, np.ones((1, 25))])
right_hand_pos = (T_TO_UNITREE_HAND @ T_ROBOT_OPENXR @ right_hom)[0:3, :].T
# --- Hand rotations ---
left_hand_rot = None
right_hand_rot = None
# TODO: Transform hand rotations if needed for dexterous hand control
# --- Pinch/squeeze from finger distances ---
# Compute pinch from thumb-tip to index-tip distance
left_pinch_val, left_pinch = _compute_pinch(left_hand_pos)
right_pinch_val, right_pinch = _compute_pinch(right_hand_pos)
left_squeeze_val, left_squeeze = _compute_squeeze(left_hand_pos)
right_squeeze_val, right_squeeze = _compute_squeeze(right_hand_pos)
return TeleData(
head_pose=Brobot_world_head,
left_wrist_pose=left_wrist_final,
right_wrist_pose=right_wrist_final,
chest_pose=Brobot_world_chest,
left_hand_pos=left_hand_pos,
right_hand_pos=right_hand_pos,
left_hand_rot=left_hand_rot,
right_hand_rot=right_hand_rot,
left_hand_pinch=left_pinch,
left_hand_pinchValue=left_pinch_val,
left_hand_squeeze=left_squeeze,
left_hand_squeezeValue=left_squeeze_val,
right_hand_pinch=right_pinch,
right_hand_pinchValue=right_pinch_val,
right_hand_squeeze=right_squeeze,
right_hand_squeezeValue=right_squeeze_val,
)
def render_to_xr(self, img):
"""Send a webcam frame to the Quest 3 app.
Args:
img: BGR numpy array (OpenCV format). Will be JPEG-encoded and sent.
"""
import cv2
# Encode as JPEG
_, jpeg_buf = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 70])
self.server.set_webcam_frame(jpeg_buf.tobytes())
def close(self):
"""Shutdown the server."""
# The server thread is daemonic, it will stop when the process exits.
pass
@property
def has_client(self) -> bool:
"""Check if a Quest 3 client is connected."""
return len(self.server._clients) > 0
@property
def last_data_time(self) -> float:
"""Timestamp of last received tracking data."""
with self.server.last_data_time_shared.get_lock():
return self.server.last_data_time_shared.value
def _compute_pinch(hand_pos_25x3: np.ndarray) -> tuple:
"""Compute pinch state from thumb tip (4) and index tip (8) distance.
Returns (pinchValue, is_pinching).
pinchValue: ~15.0 (open) 0.0 (pinched), scaled to match Vuer convention * 100.
"""
if np.all(hand_pos_25x3 == 0):
return 10.0, False
# OpenXR hand joint indices: thumb tip = 4, index tip = 8
thumb_tip = hand_pos_25x3[4]
index_tip = hand_pos_25x3[8]
dist = np.linalg.norm(thumb_tip - index_tip)
# Map distance to pinch value (meters → normalized)
# Typical range: 0.0 (touching) to 0.10 (fully open)
pinch_value = min(dist * 150.0, 15.0) # Scale to ~0-15 range
is_pinching = dist < 0.025 # 2.5cm threshold
return pinch_value * 100.0, is_pinching # Match TeleVuerWrapper scaling
def _compute_squeeze(hand_pos_25x3: np.ndarray) -> tuple:
"""Compute squeeze (fist) state from average finger curl.
Returns (squeezeValue, is_squeezing).
squeezeValue: 0.0 (open) 1.0 (fist).
"""
if np.all(hand_pos_25x3 == 0):
return 0.0, False
# Measure distance from fingertips to palm center
# Joint indices: palm=0, index_tip=8, middle_tip=12, ring_tip=16, pinky_tip=20
palm = hand_pos_25x3[0]
tips = hand_pos_25x3[[8, 12, 16, 20]]
dists = np.linalg.norm(tips - palm, axis=1)
avg_dist = np.mean(dists)
# Map: ~0.02m (fist) → 1.0, ~0.10m (open) → 0.0
squeeze_value = np.clip(1.0 - (avg_dist - 0.02) / 0.08, 0.0, 1.0)
is_squeezing = squeeze_value > 0.7
return squeeze_value, is_squeezing

4
server/requirements.txt

@ -0,0 +1,4 @@
# Robot-side server dependencies
websockets>=12.0
numpy>=1.24
opencv-python>=4.8

315
server/teleop_server.py

@ -0,0 +1,315 @@
#!/usr/bin/env python3
"""
Lightweight WebSocket server for Quest 3 native teleop app.
Replaces Vuer (~900 lines) with ~200 lines.
Receives body tracking data (JSON) from the Quest 3 native app,
writes poses to shared memory arrays compatible with teleop_hand_and_arm.py,
and sends webcam JPEG frames back to the app.
Protocol:
Client Server: JSON text messages with tracking data
Server Client: Binary messages with JPEG webcam frames
Server Client: JSON text messages for status/config
Shared Memory Format (matches TeleVuer/TeleVuerWrapper interface):
head_pose_shared: Array('d', 16) 4x4 SE(3), column-major ('F' order)
left_arm_pose_shared: Array('d', 16) 4x4 SE(3), column-major ('F' order)
right_arm_pose_shared: Array('d', 16) 4x4 SE(3), column-major ('F' order)
last_data_time_shared: Value('d') Unix timestamp of last received data
chest_pose_shared: Array('d', 16) 4x4 SE(3), NEW: chest orientation
Hand tracking (if enabled):
left_hand_position_shared: Array('d', 75) 25×3 positions
right_hand_position_shared: Array('d', 75) 25×3 positions
left_hand_orientation_shared: Array('d', 225) 25×3×3 rotations (col-major)
right_hand_orientation_shared: Array('d', 225) 25×3×3 rotations (col-major)
Usage:
# Standalone (for testing):
python teleop_server.py --port 8765
# Integrated with teleop_hand_and_arm.py:
# Import TeleopServer and pass shared memory arrays
"""
import asyncio
import json
import time
import logging
import argparse
import threading
import numpy as np
from multiprocessing import Array, Value
try:
import websockets
import websockets.asyncio.server
except ImportError:
print("ERROR: 'websockets' package required. Install with: pip install websockets")
raise
logger = logging.getLogger("teleop_server")
class TeleopServer:
"""WebSocket server that bridges Quest 3 native app to teleop shared memory."""
def __init__(self, host="0.0.0.0", port=8765,
head_pose_shared=None,
left_arm_pose_shared=None,
right_arm_pose_shared=None,
last_data_time_shared=None,
chest_pose_shared=None,
left_hand_position_shared=None,
right_hand_position_shared=None,
left_hand_orientation_shared=None,
right_hand_orientation_shared=None,
left_hand_pinch_shared=None,
left_hand_squeeze_shared=None,
right_hand_pinch_shared=None,
right_hand_squeeze_shared=None):
self.host = host
self.port = port
# Create shared memory if not provided (standalone mode)
self.head_pose_shared = head_pose_shared or Array('d', 16, lock=True)
self.left_arm_pose_shared = left_arm_pose_shared or Array('d', 16, lock=True)
self.right_arm_pose_shared = right_arm_pose_shared or Array('d', 16, lock=True)
self.last_data_time_shared = last_data_time_shared or Value('d', 0.0, lock=True)
self.chest_pose_shared = chest_pose_shared or Array('d', 16, lock=True)
# Hand tracking arrays
self.left_hand_position_shared = left_hand_position_shared or Array('d', 75, lock=True)
self.right_hand_position_shared = right_hand_position_shared or Array('d', 75, lock=True)
self.left_hand_orientation_shared = left_hand_orientation_shared or Array('d', 225, lock=True)
self.right_hand_orientation_shared = right_hand_orientation_shared or Array('d', 225, lock=True)
# Pinch/squeeze (computed from finger positions)
self.left_hand_pinch_shared = left_hand_pinch_shared
self.left_hand_squeeze_shared = left_hand_squeeze_shared
self.right_hand_pinch_shared = right_hand_pinch_shared
self.right_hand_squeeze_shared = right_hand_squeeze_shared
# Initialize with identity matrices
identity_flat = np.eye(4).flatten(order='F').tolist()
with self.head_pose_shared.get_lock():
self.head_pose_shared[:] = identity_flat
with self.left_arm_pose_shared.get_lock():
self.left_arm_pose_shared[:] = identity_flat
with self.right_arm_pose_shared.get_lock():
self.right_arm_pose_shared[:] = identity_flat
with self.chest_pose_shared.get_lock():
self.chest_pose_shared[:] = identity_flat
# Webcam frame (set by external code, sent to clients)
self._webcam_frame = None
self._webcam_lock = threading.Lock()
self._webcam_event = asyncio.Event()
# Connection tracking
self._clients = set()
self._message_count = 0
self._last_log_time = 0
def set_webcam_frame(self, jpeg_bytes: bytes):
"""Called by external code (e.g., ThreadedWebcam) to provide a new JPEG frame."""
with self._webcam_lock:
self._webcam_frame = jpeg_bytes
# Signal the async send loop
try:
self._webcam_event.set()
except RuntimeError:
pass # Event loop not running yet
async def _handle_client(self, websocket):
"""Handle a single WebSocket client connection."""
remote = websocket.remote_address
logger.info(f"Client connected: {remote}")
self._clients.add(websocket)
# Send initial config
await websocket.send(json.dumps({
"type": "config",
"version": "1.0",
"body_tracking": True,
}))
# Start webcam sender task for this client
webcam_task = asyncio.create_task(self._send_webcam_loop(websocket))
try:
async for message in websocket:
if isinstance(message, str):
self._handle_tracking_message(message)
# Binary messages from client are ignored
except websockets.exceptions.ConnectionClosed:
logger.info(f"Client disconnected: {remote}")
finally:
self._clients.discard(websocket)
webcam_task.cancel()
try:
await webcam_task
except asyncio.CancelledError:
pass
def _handle_tracking_message(self, message: str):
"""Parse tracking JSON and write to shared memory."""
try:
data = json.loads(message)
except json.JSONDecodeError:
logger.warning("Invalid JSON received")
return
msg_type = data.get("type")
if msg_type != "tracking":
return
self._message_count += 1
now = time.time()
# Rate-limited logging (every 5 seconds)
if now - self._last_log_time > 5.0:
logger.info(f"Tracking messages received: {self._message_count}")
self._last_log_time = now
# Update timestamp
with self.last_data_time_shared.get_lock():
self.last_data_time_shared.value = now
# Head pose: 7 floats [x, y, z, qx, qy, qz, qw] → 4x4 column-major
head = data.get("head")
if head and len(head) == 7:
mat = _pose7_to_mat16(head)
with self.head_pose_shared.get_lock():
self.head_pose_shared[:] = mat
# Chest pose: 7 floats → 4x4 column-major
chest = data.get("chest")
if chest and len(chest) == 7:
mat = _pose7_to_mat16(chest)
with self.chest_pose_shared.get_lock():
self.chest_pose_shared[:] = mat
# Left wrist: 16 floats (already column-major 4x4)
left_wrist = data.get("left_wrist")
if left_wrist and len(left_wrist) == 16:
with self.left_arm_pose_shared.get_lock():
self.left_arm_pose_shared[:] = left_wrist
# Right wrist: 16 floats (already column-major 4x4)
right_wrist = data.get("right_wrist")
if right_wrist and len(right_wrist) == 16:
with self.right_arm_pose_shared.get_lock():
self.right_arm_pose_shared[:] = right_wrist
# Hand positions: 75 floats each (25 joints × 3)
left_hand_pos = data.get("left_hand_pos")
if left_hand_pos and len(left_hand_pos) == 75:
with self.left_hand_position_shared.get_lock():
self.left_hand_position_shared[:] = left_hand_pos
right_hand_pos = data.get("right_hand_pos")
if right_hand_pos and len(right_hand_pos) == 75:
with self.right_hand_position_shared.get_lock():
self.right_hand_position_shared[:] = right_hand_pos
# Hand rotations: 225 floats each (25 joints × 9)
left_hand_rot = data.get("left_hand_rot")
if left_hand_rot and len(left_hand_rot) == 225:
with self.left_hand_orientation_shared.get_lock():
self.left_hand_orientation_shared[:] = left_hand_rot
right_hand_rot = data.get("right_hand_rot")
if right_hand_rot and len(right_hand_rot) == 225:
with self.right_hand_orientation_shared.get_lock():
self.right_hand_orientation_shared[:] = right_hand_rot
async def _send_webcam_loop(self, websocket):
"""Send webcam JPEG frames to a client at ~15 fps."""
interval = 1.0 / 15.0
while True:
await asyncio.sleep(interval)
with self._webcam_lock:
frame = self._webcam_frame
if frame is not None:
try:
await websocket.send(frame)
except websockets.exceptions.ConnectionClosed:
break
async def serve(self):
"""Start the WebSocket server."""
logger.info(f"Starting teleop server on ws://{self.host}:{self.port}")
async with websockets.asyncio.server.serve(
self._handle_client,
self.host,
self.port,
max_size=2**20, # 1 MB max message size
ping_interval=20,
ping_timeout=20,
) as server:
logger.info(f"Teleop server running on ws://{self.host}:{self.port}")
await asyncio.Future() # Run forever
def start_in_thread(self):
"""Start the server in a background thread. Returns the thread."""
def _run():
asyncio.run(self.serve())
thread = threading.Thread(target=_run, daemon=True)
thread.start()
logger.info("Teleop server started in background thread")
return thread
def _pose7_to_mat16(pose7):
"""Convert [x, y, z, qx, qy, qz, qw] to 16-float column-major 4x4 matrix.
Compatible with NumPy reshape(4,4, order='F').
"""
x, y, z, qx, qy, qz, qw = pose7
# Quaternion to rotation matrix
xx = qx * qx; yy = qy * qy; zz = qz * qz
xy = qx * qy; xz = qx * qz; yz = qy * qz
wx = qw * qx; wy = qw * qy; wz = qw * qz
r00 = 1 - 2 * (yy + zz)
r01 = 2 * (xy - wz)
r02 = 2 * (xz + wy)
r10 = 2 * (xy + wz)
r11 = 1 - 2 * (xx + zz)
r12 = 2 * (yz - wx)
r20 = 2 * (xz - wy)
r21 = 2 * (yz + wx)
r22 = 1 - 2 * (xx + yy)
# Column-major (Fortran order): column 0, column 1, column 2, column 3
return [
r00, r10, r20, 0.0, # Column 0
r01, r11, r21, 0.0, # Column 1
r02, r12, r22, 0.0, # Column 2
x, y, z, 1.0, # Column 3
]
# --- Standalone mode ---
def main():
parser = argparse.ArgumentParser(description="Teleop WebSocket server for Quest 3 native app")
parser.add_argument("--host", default="0.0.0.0", help="Bind address (default: 0.0.0.0)")
parser.add_argument("--port", type=int, default=8765, help="WebSocket port (default: 8765)")
parser.add_argument("--verbose", action="store_true", help="Enable debug logging")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s"
)
server = TeleopServer(host=args.host, port=args.port)
asyncio.run(server.serve())
if __name__ == "__main__":
main()
Loading…
Cancel
Save