Browse Source

Add triple camera view: head + left/right wrist cameras

Server: reads all 3 cameras from Isaac Sim SHM, encodes JPEG
(head Q85, wrists Q70), sends with 1-byte camera ID prefix.
Head at ~10fps, wrist cameras at ~5fps for bandwidth control.

Godot: teleop_client parses camera ID byte and emits per-camera
signals. Three webcam quads arranged in a row (left|head|right)
in front of the user in AR mode.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
master
melancholytron 3 weeks ago
parent
commit
cce63b9f65
  1. 56
      Main.gd
  2. 8
      Main.tscn
  3. 22
      scripts/teleop_client.gd
  4. 86
      server/retarget_bridge.py
  5. 49
      server/teleop_server.py

56
Main.gd

@ -10,7 +10,9 @@ enum Phase { CONFIG, AR }
@onready var teleop_client: Node = $TeleopClient
@onready var xr_origin: XROrigin3D = $XROrigin3D
@onready var xr_camera: XRCamera3D = $XROrigin3D/XRCamera3D
@onready var webcam_quad: MeshInstance3D = $XROrigin3D/WebcamQuad
@onready var webcam_quad_head: MeshInstance3D = $XROrigin3D/WebcamQuadHead
@onready var webcam_quad_left: MeshInstance3D = $XROrigin3D/WebcamQuadLeft
@onready var webcam_quad_right: MeshInstance3D = $XROrigin3D/WebcamQuadRight
@onready var start_screen: Node3D = $XROrigin3D/StartScreen
@onready var left_controller: XRController3D = $XROrigin3D/LeftController
@onready var right_controller: XRController3D = $XROrigin3D/RightController
@ -52,8 +54,10 @@ const GAZE_BALL_BASE_COLORS: Array = [
func _ready() -> void:
# Hide webcam quad and start screen until positioned
webcam_quad.visible = false
# Hide webcam quads and start screen until positioned
webcam_quad_head.visible = false
webcam_quad_left.visible = false
webcam_quad_right.visible = false
start_screen.visible = false
# Initialize OpenXR interface
@ -92,8 +96,10 @@ func _ready() -> void:
# Connect teleop client connection state to start screen
teleop_client.connection_state_changed.connect(_on_connection_state_changed)
# Wire webcam frames (can happen anytime we're connected)
teleop_client.webcam_frame_received.connect(webcam_quad._on_webcam_frame)
# Wire webcam frames — 3 cameras routed to 3 quads
teleop_client.webcam_frame_head.connect(webcam_quad_head._on_webcam_frame)
teleop_client.webcam_frame_left.connect(webcam_quad_left._on_webcam_frame)
teleop_client.webcam_frame_right.connect(webcam_quad_right._on_webcam_frame)
# Setup VR pointer with references to controllers and XR origin
vr_pointer.setup(xr_origin, xr_camera, left_controller, right_controller)
@ -219,7 +225,7 @@ func _on_pose_recentered() -> void:
if current_phase == Phase.CONFIG:
_position_panel_in_front_of_user(hmd)
elif current_phase == Phase.AR:
_position_quad_in_front_of_user(webcam_quad, hmd, 1.2, -0.3)
_position_camera_row(hmd)
_create_gaze_balls(hmd)
@ -258,10 +264,12 @@ func _on_launch_ar_requested() -> void:
if not body_tracker.tracking_data_ready.is_connected(teleop_client._on_tracking_data):
body_tracker.tracking_data_ready.connect(teleop_client._on_tracking_data)
# Position webcam quad in front of user (stationary)
# Position webcam quads in front of user — row: left | head | right
var hmd := XRServer.get_hmd_transform()
_position_quad_in_front_of_user(webcam_quad, hmd, 1.2, -0.3)
webcam_quad.visible = true
_position_camera_row(hmd)
webcam_quad_head.visible = true
webcam_quad_left.visible = true
webcam_quad_right.visible = true
# Hide start screen and G1 models
start_screen.hide_screen()
@ -318,6 +326,32 @@ func _position_quad_in_front_of_user(quad: Node3D, hmd: Transform3D, distance: f
quad.rotate_y(PI)
func _position_camera_row(hmd: Transform3D) -> void:
## Position 3 camera quads in a row: left | head | right
var forward := -hmd.basis.z
forward.y = 0
if forward.length() < 0.01:
forward = Vector3(0, 0, -1)
forward = forward.normalized()
var right_dir := forward.cross(Vector3.UP).normalized()
var center := hmd.origin + forward * 1.2
center.y = hmd.origin.y - 0.3
var spacing := 0.85 # distance between quad centers
for quad_data in [
[webcam_quad_left, -spacing],
[webcam_quad_head, 0.0],
[webcam_quad_right, spacing],
]:
var quad: Node3D = quad_data[0]
var offset: float = quad_data[1]
quad.global_position = center + right_dir * offset
quad.look_at(hmd.origin, Vector3.UP)
quad.rotate_y(PI)
func _create_gaze_balls(hmd: Transform3D) -> void:
var forward := -hmd.basis.z
forward.y = 0
@ -471,7 +505,9 @@ func _exit_ar_mode() -> void:
_gaze_timers[i] = 0.0
if _gaze_laser:
_gaze_laser.visible = false
webcam_quad.visible = false
webcam_quad_head.visible = false
webcam_quad_left.visible = false
webcam_quad_right.visible = false
# Show start screen again, reposition in front of user
var hmd := XRServer.get_hmd_transform()

8
Main.tscn

@ -14,9 +14,15 @@ script = ExtResource("1")
[node name="XRCamera3D" type="XRCamera3D" parent="XROrigin3D"]
[node name="WebcamQuad" parent="XROrigin3D" instance=ExtResource("5")]
[node name="WebcamQuadHead" parent="XROrigin3D" instance=ExtResource("5")]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, -0.3, -1.5)
[node name="WebcamQuadLeft" parent="XROrigin3D" instance=ExtResource("5")]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, -0.85, -0.3, -1.5)
[node name="WebcamQuadRight" parent="XROrigin3D" instance=ExtResource("5")]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0.85, -0.3, -1.5)
[node name="StartScreen" parent="XROrigin3D" instance=ExtResource("6")]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1.5, -1.5)

22
scripts/teleop_client.gd

@ -9,7 +9,11 @@ extends Node
## No SSL required — raw WebSocket over local network.
## Emitted when a JPEG webcam frame is received from the server.
signal webcam_frame_received(jpeg_bytes: PackedByteArray)
## Camera IDs: 0=head, 1=left_wrist, 2=right_wrist
signal webcam_frame_received(jpeg_bytes: PackedByteArray) ## Legacy (head only)
signal webcam_frame_head(jpeg_bytes: PackedByteArray)
signal webcam_frame_left(jpeg_bytes: PackedByteArray)
signal webcam_frame_right(jpeg_bytes: PackedByteArray)
## Emitted when connection state changes.
signal connection_state_changed(connected: bool)
@ -70,8 +74,20 @@ func _process(delta: float) -> void:
if ws.was_string_packet():
_handle_text_message(packet.get_string_from_utf8())
else:
# Binary = JPEG webcam frame
webcam_frame_received.emit(packet)
# Binary = 1 byte camera_id + JPEG data
if packet.size() > 1:
var cam_id := packet[0]
var jpeg_data := packet.slice(1)
match cam_id:
0:
webcam_frame_head.emit(jpeg_data)
webcam_frame_received.emit(jpeg_data)
1:
webcam_frame_left.emit(jpeg_data)
2:
webcam_frame_right.emit(jpeg_data)
_:
webcam_frame_received.emit(packet)
# Send any pending tracking data
for data in _pending_data:

86
server/retarget_bridge.py

@ -607,7 +607,11 @@ def retarget_hands(left_retargeting, right_retargeting,
# ---------------------------------------------------------------------------
def start_camera_relay(server, zmq_port=55555):
"""Read camera from Isaac Sim shared memory and relay JPEG frames to WebSocket clients."""
"""Read all cameras from Isaac Sim shared memory and relay JPEG frames to WebSocket clients.
Camera IDs: 0=head, 1=left_wrist, 2=right_wrist
Head at Q85, wrist cameras at Q70 for bandwidth control.
"""
import cv2
sys.path.insert(0, os.path.expanduser("~/git/unitree_sim_isaaclab"))
from tools.shared_memory_utils import MultiImageReader
@ -620,45 +624,67 @@ def start_camera_relay(server, zmq_port=55555):
except Exception:
pass
# SHM name -> (camera_id, jpeg_quality)
CAMERAS = [
("head", 0, 85),
("left", 1, 70),
("right", 2, 70),
]
def _relay_loop():
reader = MultiImageReader()
# Try camera names in priority order
camera_names = ["head", "left", "right"]
active_camera = None
frame_count = 0
last_ts = 0
last_ts = {"head": 0, "left": 0, "right": 0}
frame_counts = {"head": 0, "left": 0, "right": 0}
active_cameras = set()
logger.info("Camera relay: waiting for Isaac Sim shared memory...")
while active_camera is None:
for name in camera_names:
# Wait for at least one camera
while not active_cameras:
for name, cam_id, quality in CAMERAS:
img = reader.read_single_image(name)
if img is not None:
active_camera = name
# Untrack all opened SHMs so killing the bridge won't destroy them
for shm_name in list(reader.shms.keys()):
_untrack_shm(shm_name)
logger.info(f"Camera relay: using '{name}' camera ({img.shape[1]}x{img.shape[0]})")
break
if active_camera is None:
active_cameras.add(name)
if not active_cameras:
time.sleep(1.0)
# Untrack all opened SHMs
for shm_name in list(reader.shms.keys()):
_untrack_shm(shm_name)
logger.info(f"Camera relay: active cameras: {sorted(active_cameras)}")
while True:
try:
img = reader.read_single_image(active_camera)
if img is not None:
cur_ts = reader.last_timestamps.get(active_camera, 0)
if cur_ts > last_ts:
last_ts = cur_ts
h, w = img.shape[:2]
ok, buf = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 85])
if ok:
server.set_webcam_frame(buf.tobytes())
frame_count += 1
if frame_count == 1:
logger.info(f"Camera relay: first frame {w}x{h}, "
f"{len(buf)} bytes")
elif frame_count % 300 == 0:
logger.info(f"Camera relay: {frame_count} frames sent")
for name, cam_id, quality in CAMERAS:
if name not in active_cameras:
# Periodically retry missing cameras
if frame_counts["head"] % 150 == 0:
img = reader.read_single_image(name)
if img is not None:
active_cameras.add(name)
for sn in list(reader.shms.keys()):
_untrack_shm(sn)
logger.info(f"Camera relay: '{name}' camera now available")
continue
img = reader.read_single_image(name)
if img is not None:
cur_ts = reader.last_timestamps.get(name, 0)
if cur_ts > last_ts[name]:
last_ts[name] = cur_ts
ok, buf = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, quality])
if ok:
server.set_webcam_frame(buf.tobytes(), camera_id=cam_id)
frame_counts[name] += 1
total = sum(frame_counts.values())
if total == 1:
h, w = img.shape[:2]
logger.info(f"Camera relay: first frame from '{name}' "
f"{w}x{h}, {len(buf)} bytes")
elif total % 900 == 0:
logger.info(f"Camera relay: frames sent "
f"head={frame_counts['head']} "
f"left={frame_counts['left']} "
f"right={frame_counts['right']}")
except Exception as e:
logger.error(f"Camera relay error: {e}")
time.sleep(1.0 / 15.0)

49
server/teleop_server.py

@ -107,8 +107,9 @@ class TeleopServer:
with self.chest_pose_shared.get_lock():
self.chest_pose_shared[:] = identity_flat
# Webcam frame (set by external code, sent to clients)
self._webcam_frame = None
# Camera frames: dict of camera_id -> jpeg_bytes (set by external code)
# Camera IDs: 0=head, 1=left_wrist, 2=right_wrist
self._camera_frames = {}
self._webcam_lock = threading.Lock()
self._webcam_event = asyncio.Event()
@ -117,11 +118,15 @@ class TeleopServer:
self._message_count = 0
self._last_log_time = 0
def set_webcam_frame(self, jpeg_bytes: bytes):
"""Called by external code (e.g., ThreadedWebcam) to provide a new JPEG frame."""
def set_webcam_frame(self, jpeg_bytes: bytes, camera_id: int = 0):
"""Called by external code to provide a new JPEG frame for a camera.
Args:
jpeg_bytes: JPEG-encoded image data
camera_id: 0=head, 1=left_wrist, 2=right_wrist
"""
with self._webcam_lock:
self._webcam_frame = jpeg_bytes
# Signal the async send loop
self._camera_frames[camera_id] = jpeg_bytes
try:
self._webcam_event.set()
except RuntimeError:
@ -237,20 +242,32 @@ class TeleopServer:
self.body_joints_shared[:] = body_joints
async def _send_webcam_loop(self, websocket):
"""Send webcam JPEG frames to a client at ~10 fps."""
"""Send camera JPEG frames to a client.
Head camera (~10fps), wrist cameras (~5fps).
Each binary message: 1 byte camera_id + JPEG data.
"""
interval = 1.0 / 10.0
cycle = 0
while True:
await asyncio.sleep(interval)
with self._webcam_lock:
frame = self._webcam_frame
if frame is not None:
try:
await websocket.send(frame)
except websockets.exceptions.ConnectionClosed:
break
except Exception as e:
logger.warning(f"Webcam send error: {e}")
break
frames = dict(self._camera_frames)
# Head every cycle, wrist cameras every other cycle
cam_ids = [0] # always send head
if cycle % 2 == 0:
cam_ids.extend([1, 2])
cycle += 1
for cam_id in cam_ids:
frame = frames.get(cam_id)
if frame is not None:
try:
await websocket.send(bytes([cam_id]) + frame)
except websockets.exceptions.ConnectionClosed:
return
except Exception as e:
logger.warning(f"Webcam send error (cam {cam_id}): {e}")
return
async def serve(self):
"""Start the WebSocket server."""

Loading…
Cancel
Save