diff --git a/Main.gd b/Main.gd index 4af0d4a..6094c71 100644 --- a/Main.gd +++ b/Main.gd @@ -10,7 +10,9 @@ enum Phase { CONFIG, AR } @onready var teleop_client: Node = $TeleopClient @onready var xr_origin: XROrigin3D = $XROrigin3D @onready var xr_camera: XRCamera3D = $XROrigin3D/XRCamera3D -@onready var webcam_quad: MeshInstance3D = $XROrigin3D/WebcamQuad +@onready var webcam_quad_head: MeshInstance3D = $XROrigin3D/WebcamQuadHead +@onready var webcam_quad_left: MeshInstance3D = $XROrigin3D/WebcamQuadLeft +@onready var webcam_quad_right: MeshInstance3D = $XROrigin3D/WebcamQuadRight @onready var start_screen: Node3D = $XROrigin3D/StartScreen @onready var left_controller: XRController3D = $XROrigin3D/LeftController @onready var right_controller: XRController3D = $XROrigin3D/RightController @@ -52,8 +54,10 @@ const GAZE_BALL_BASE_COLORS: Array = [ func _ready() -> void: - # Hide webcam quad and start screen until positioned - webcam_quad.visible = false + # Hide webcam quads and start screen until positioned + webcam_quad_head.visible = false + webcam_quad_left.visible = false + webcam_quad_right.visible = false start_screen.visible = false # Initialize OpenXR interface @@ -92,8 +96,10 @@ func _ready() -> void: # Connect teleop client connection state to start screen teleop_client.connection_state_changed.connect(_on_connection_state_changed) - # Wire webcam frames (can happen anytime we're connected) - teleop_client.webcam_frame_received.connect(webcam_quad._on_webcam_frame) + # Wire webcam frames — 3 cameras routed to 3 quads + teleop_client.webcam_frame_head.connect(webcam_quad_head._on_webcam_frame) + teleop_client.webcam_frame_left.connect(webcam_quad_left._on_webcam_frame) + teleop_client.webcam_frame_right.connect(webcam_quad_right._on_webcam_frame) # Setup VR pointer with references to controllers and XR origin vr_pointer.setup(xr_origin, xr_camera, left_controller, right_controller) @@ -219,7 +225,7 @@ func _on_pose_recentered() -> void: if current_phase == Phase.CONFIG: _position_panel_in_front_of_user(hmd) elif current_phase == Phase.AR: - _position_quad_in_front_of_user(webcam_quad, hmd, 1.2, -0.3) + _position_camera_row(hmd) _create_gaze_balls(hmd) @@ -258,10 +264,12 @@ func _on_launch_ar_requested() -> void: if not body_tracker.tracking_data_ready.is_connected(teleop_client._on_tracking_data): body_tracker.tracking_data_ready.connect(teleop_client._on_tracking_data) - # Position webcam quad in front of user (stationary) + # Position webcam quads in front of user — row: left | head | right var hmd := XRServer.get_hmd_transform() - _position_quad_in_front_of_user(webcam_quad, hmd, 1.2, -0.3) - webcam_quad.visible = true + _position_camera_row(hmd) + webcam_quad_head.visible = true + webcam_quad_left.visible = true + webcam_quad_right.visible = true # Hide start screen and G1 models start_screen.hide_screen() @@ -318,6 +326,32 @@ func _position_quad_in_front_of_user(quad: Node3D, hmd: Transform3D, distance: f quad.rotate_y(PI) +func _position_camera_row(hmd: Transform3D) -> void: + ## Position 3 camera quads in a row: left | head | right + var forward := -hmd.basis.z + forward.y = 0 + if forward.length() < 0.01: + forward = Vector3(0, 0, -1) + forward = forward.normalized() + var right_dir := forward.cross(Vector3.UP).normalized() + + var center := hmd.origin + forward * 1.2 + center.y = hmd.origin.y - 0.3 + + var spacing := 0.85 # distance between quad centers + + for quad_data in [ + [webcam_quad_left, -spacing], + [webcam_quad_head, 0.0], + [webcam_quad_right, spacing], + ]: + var quad: Node3D = quad_data[0] + var offset: float = quad_data[1] + quad.global_position = center + right_dir * offset + quad.look_at(hmd.origin, Vector3.UP) + quad.rotate_y(PI) + + func _create_gaze_balls(hmd: Transform3D) -> void: var forward := -hmd.basis.z forward.y = 0 @@ -471,7 +505,9 @@ func _exit_ar_mode() -> void: _gaze_timers[i] = 0.0 if _gaze_laser: _gaze_laser.visible = false - webcam_quad.visible = false + webcam_quad_head.visible = false + webcam_quad_left.visible = false + webcam_quad_right.visible = false # Show start screen again, reposition in front of user var hmd := XRServer.get_hmd_transform() diff --git a/Main.tscn b/Main.tscn index 33a6ccd..550f380 100644 --- a/Main.tscn +++ b/Main.tscn @@ -14,9 +14,15 @@ script = ExtResource("1") [node name="XRCamera3D" type="XRCamera3D" parent="XROrigin3D"] -[node name="WebcamQuad" parent="XROrigin3D" instance=ExtResource("5")] +[node name="WebcamQuadHead" parent="XROrigin3D" instance=ExtResource("5")] transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, -0.3, -1.5) +[node name="WebcamQuadLeft" parent="XROrigin3D" instance=ExtResource("5")] +transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, -0.85, -0.3, -1.5) + +[node name="WebcamQuadRight" parent="XROrigin3D" instance=ExtResource("5")] +transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0.85, -0.3, -1.5) + [node name="StartScreen" parent="XROrigin3D" instance=ExtResource("6")] transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1.5, -1.5) diff --git a/scripts/teleop_client.gd b/scripts/teleop_client.gd index e2bfb34..8b0fc47 100644 --- a/scripts/teleop_client.gd +++ b/scripts/teleop_client.gd @@ -9,7 +9,11 @@ extends Node ## No SSL required — raw WebSocket over local network. ## Emitted when a JPEG webcam frame is received from the server. -signal webcam_frame_received(jpeg_bytes: PackedByteArray) +## Camera IDs: 0=head, 1=left_wrist, 2=right_wrist +signal webcam_frame_received(jpeg_bytes: PackedByteArray) ## Legacy (head only) +signal webcam_frame_head(jpeg_bytes: PackedByteArray) +signal webcam_frame_left(jpeg_bytes: PackedByteArray) +signal webcam_frame_right(jpeg_bytes: PackedByteArray) ## Emitted when connection state changes. signal connection_state_changed(connected: bool) @@ -70,8 +74,20 @@ func _process(delta: float) -> void: if ws.was_string_packet(): _handle_text_message(packet.get_string_from_utf8()) else: - # Binary = JPEG webcam frame - webcam_frame_received.emit(packet) + # Binary = 1 byte camera_id + JPEG data + if packet.size() > 1: + var cam_id := packet[0] + var jpeg_data := packet.slice(1) + match cam_id: + 0: + webcam_frame_head.emit(jpeg_data) + webcam_frame_received.emit(jpeg_data) + 1: + webcam_frame_left.emit(jpeg_data) + 2: + webcam_frame_right.emit(jpeg_data) + _: + webcam_frame_received.emit(packet) # Send any pending tracking data for data in _pending_data: diff --git a/server/retarget_bridge.py b/server/retarget_bridge.py index 1b28459..3448a99 100644 --- a/server/retarget_bridge.py +++ b/server/retarget_bridge.py @@ -607,7 +607,11 @@ def retarget_hands(left_retargeting, right_retargeting, # --------------------------------------------------------------------------- def start_camera_relay(server, zmq_port=55555): - """Read camera from Isaac Sim shared memory and relay JPEG frames to WebSocket clients.""" + """Read all cameras from Isaac Sim shared memory and relay JPEG frames to WebSocket clients. + + Camera IDs: 0=head, 1=left_wrist, 2=right_wrist + Head at Q85, wrist cameras at Q70 for bandwidth control. + """ import cv2 sys.path.insert(0, os.path.expanduser("~/git/unitree_sim_isaaclab")) from tools.shared_memory_utils import MultiImageReader @@ -620,45 +624,67 @@ def start_camera_relay(server, zmq_port=55555): except Exception: pass + # SHM name -> (camera_id, jpeg_quality) + CAMERAS = [ + ("head", 0, 85), + ("left", 1, 70), + ("right", 2, 70), + ] + def _relay_loop(): reader = MultiImageReader() - # Try camera names in priority order - camera_names = ["head", "left", "right"] - active_camera = None - frame_count = 0 - last_ts = 0 + last_ts = {"head": 0, "left": 0, "right": 0} + frame_counts = {"head": 0, "left": 0, "right": 0} + active_cameras = set() logger.info("Camera relay: waiting for Isaac Sim shared memory...") - while active_camera is None: - for name in camera_names: + # Wait for at least one camera + while not active_cameras: + for name, cam_id, quality in CAMERAS: img = reader.read_single_image(name) if img is not None: - active_camera = name - # Untrack all opened SHMs so killing the bridge won't destroy them - for shm_name in list(reader.shms.keys()): - _untrack_shm(shm_name) - logger.info(f"Camera relay: using '{name}' camera ({img.shape[1]}x{img.shape[0]})") - break - if active_camera is None: + active_cameras.add(name) + if not active_cameras: time.sleep(1.0) + # Untrack all opened SHMs + for shm_name in list(reader.shms.keys()): + _untrack_shm(shm_name) + logger.info(f"Camera relay: active cameras: {sorted(active_cameras)}") + while True: try: - img = reader.read_single_image(active_camera) - if img is not None: - cur_ts = reader.last_timestamps.get(active_camera, 0) - if cur_ts > last_ts: - last_ts = cur_ts - h, w = img.shape[:2] - ok, buf = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 85]) - if ok: - server.set_webcam_frame(buf.tobytes()) - frame_count += 1 - if frame_count == 1: - logger.info(f"Camera relay: first frame {w}x{h}, " - f"{len(buf)} bytes") - elif frame_count % 300 == 0: - logger.info(f"Camera relay: {frame_count} frames sent") + for name, cam_id, quality in CAMERAS: + if name not in active_cameras: + # Periodically retry missing cameras + if frame_counts["head"] % 150 == 0: + img = reader.read_single_image(name) + if img is not None: + active_cameras.add(name) + for sn in list(reader.shms.keys()): + _untrack_shm(sn) + logger.info(f"Camera relay: '{name}' camera now available") + continue + + img = reader.read_single_image(name) + if img is not None: + cur_ts = reader.last_timestamps.get(name, 0) + if cur_ts > last_ts[name]: + last_ts[name] = cur_ts + ok, buf = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, quality]) + if ok: + server.set_webcam_frame(buf.tobytes(), camera_id=cam_id) + frame_counts[name] += 1 + total = sum(frame_counts.values()) + if total == 1: + h, w = img.shape[:2] + logger.info(f"Camera relay: first frame from '{name}' " + f"{w}x{h}, {len(buf)} bytes") + elif total % 900 == 0: + logger.info(f"Camera relay: frames sent " + f"head={frame_counts['head']} " + f"left={frame_counts['left']} " + f"right={frame_counts['right']}") except Exception as e: logger.error(f"Camera relay error: {e}") time.sleep(1.0 / 15.0) diff --git a/server/teleop_server.py b/server/teleop_server.py index d40241f..4e2b315 100644 --- a/server/teleop_server.py +++ b/server/teleop_server.py @@ -107,8 +107,9 @@ class TeleopServer: with self.chest_pose_shared.get_lock(): self.chest_pose_shared[:] = identity_flat - # Webcam frame (set by external code, sent to clients) - self._webcam_frame = None + # Camera frames: dict of camera_id -> jpeg_bytes (set by external code) + # Camera IDs: 0=head, 1=left_wrist, 2=right_wrist + self._camera_frames = {} self._webcam_lock = threading.Lock() self._webcam_event = asyncio.Event() @@ -117,11 +118,15 @@ class TeleopServer: self._message_count = 0 self._last_log_time = 0 - def set_webcam_frame(self, jpeg_bytes: bytes): - """Called by external code (e.g., ThreadedWebcam) to provide a new JPEG frame.""" + def set_webcam_frame(self, jpeg_bytes: bytes, camera_id: int = 0): + """Called by external code to provide a new JPEG frame for a camera. + + Args: + jpeg_bytes: JPEG-encoded image data + camera_id: 0=head, 1=left_wrist, 2=right_wrist + """ with self._webcam_lock: - self._webcam_frame = jpeg_bytes - # Signal the async send loop + self._camera_frames[camera_id] = jpeg_bytes try: self._webcam_event.set() except RuntimeError: @@ -237,20 +242,32 @@ class TeleopServer: self.body_joints_shared[:] = body_joints async def _send_webcam_loop(self, websocket): - """Send webcam JPEG frames to a client at ~10 fps.""" + """Send camera JPEG frames to a client. + + Head camera (~10fps), wrist cameras (~5fps). + Each binary message: 1 byte camera_id + JPEG data. + """ interval = 1.0 / 10.0 + cycle = 0 while True: await asyncio.sleep(interval) with self._webcam_lock: - frame = self._webcam_frame - if frame is not None: - try: - await websocket.send(frame) - except websockets.exceptions.ConnectionClosed: - break - except Exception as e: - logger.warning(f"Webcam send error: {e}") - break + frames = dict(self._camera_frames) + # Head every cycle, wrist cameras every other cycle + cam_ids = [0] # always send head + if cycle % 2 == 0: + cam_ids.extend([1, 2]) + cycle += 1 + for cam_id in cam_ids: + frame = frames.get(cam_id) + if frame is not None: + try: + await websocket.send(bytes([cam_id]) + frame) + except websockets.exceptions.ConnectionClosed: + return + except Exception as e: + logger.warning(f"Webcam send error (cam {cam_id}): {e}") + return async def serve(self): """Start the WebSocket server."""