From b29793c765555b2160115a615d66ef9859658652 Mon Sep 17 00:00:00 2001 From: Joe DiPrima Date: Thu, 19 Feb 2026 12:26:40 -0600 Subject: [PATCH] Fix webcam bandwidth choking arm tracking: threaded capture, 480p, 15fps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ThreadedWebcam class: cv2.read() in background thread, never blocks control loop - Default resolution 720p→480p (~2.5x less data) - New --webcam-fps arg (default 15) caps send rate, matched to Vuer display_fps - CAP_PROP_BUFFERSIZE=1 to always get latest frame - JPEG quality 80→50 in televuer (~40% smaller frames) - Combined ~8-10x bandwidth reduction, DDS arm data no longer starved Co-Authored-By: Claude Opus 4.6 --- teleop/teleop_hand_and_arm.py | 92 +++++++++++++++++++++++++---------- teleop/televuer | 2 +- 2 files changed, 68 insertions(+), 26 deletions(-) diff --git a/teleop/teleop_hand_and_arm.py b/teleop/teleop_hand_and_arm.py index 3c01eab..9de8b64 100644 --- a/teleop/teleop_hand_and_arm.py +++ b/teleop/teleop_hand_and_arm.py @@ -95,6 +95,47 @@ def scale_rotation(R, alpha): rv = rotation_to_rotvec(R) return rotvec_to_rotation(rv * alpha) +class ThreadedWebcam: + """Non-blocking webcam reader. Captures frames in a background thread + so cv2.VideoCapture.read() never blocks the control loop.""" + def __init__(self, device_index, width, height): + import cv2 + self.cap = cv2.VideoCapture(device_index) + if not self.cap.isOpened(): + raise RuntimeError(f"Cannot open /dev/video{device_index}") + self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) + self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) + # Minimize internal buffer so we always get the latest frame + self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) + self.actual_w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + self.actual_h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + self._frame = None + self._lock = threading.Lock() + self._stop = threading.Event() + self._thread = threading.Thread(target=self._capture_loop, daemon=True) + self._thread.start() + + def _capture_loop(self): + while not self._stop.is_set(): + ret, frame = self.cap.read() + if ret: + with self._lock: + self._frame = frame + + def read(self): + """Return (True, frame) or (False, None). Never blocks.""" + with self._lock: + if self._frame is not None: + frame = self._frame + self._frame = None # consume it + return True, frame + return False, None + + def release(self): + self._stop.set() + self._thread.join(timeout=2.0) + self.cap.release() + # Previous button state for edge detection _r3_prev_buttons = {} @@ -161,8 +202,10 @@ if __name__ == '__main__': # webcam parser.add_argument('--webcam', type=int, default=None, help='USB webcam device index (e.g. 0 for /dev/video0). Bypasses teleimager.') - parser.add_argument('--webcam-res', type=str, default='720p', choices=['480p', '720p', '1080p'], - help='Webcam resolution (default: 720p)') + parser.add_argument('--webcam-res', type=str, default='480p', choices=['480p', '720p', '1080p'], + help='Webcam resolution (default: 480p)') + parser.add_argument('--webcam-fps', type=int, default=15, + help='Max webcam send rate in fps (default: 15, saves bandwidth for arm tracking)') # record mode and task info parser.add_argument('--record', action = 'store_true', help = 'Enable data recording mode') parser.add_argument('--task-dir', type = str, default = './utils/data/', help = 'path to save data') @@ -210,23 +253,17 @@ if __name__ == '__main__': # image source: USB webcam or teleimager webcam_cap = None img_client = None + webcam_last_send = 0.0 + webcam_send_interval = 1.0 / args.webcam_fps # Cap video fps to save bandwidth for DDS if args.webcam is not None: - import cv2 as _cv2 - webcam_cap = _cv2.VideoCapture(args.webcam) - if not webcam_cap.isOpened(): - logger_mp.error(f"[webcam] Cannot open /dev/video{args.webcam}") - exit(1) res_map = {'480p': (640, 480), '720p': (1280, 720), '1080p': (1920, 1080)} cam_w, cam_h = res_map[args.webcam_res] - webcam_cap.set(_cv2.CAP_PROP_FRAME_WIDTH, cam_w) - webcam_cap.set(_cv2.CAP_PROP_FRAME_HEIGHT, cam_h) - actual_w = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_WIDTH)) - actual_h = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_HEIGHT)) - logger_mp.info(f"[webcam] Opened /dev/video{args.webcam} at {actual_w}x{actual_h}") + webcam_cap = ThreadedWebcam(args.webcam, cam_w, cam_h) + logger_mp.info(f"[webcam] Opened /dev/video{args.webcam} at {webcam_cap.actual_w}x{webcam_cap.actual_h} (threaded, {args.webcam_fps}fps cap)") camera_config = { 'head_camera': { 'enable_zmq': True, 'enable_webrtc': False, - 'binocular': False, 'image_shape': (actual_h, actual_w), + 'binocular': False, 'image_shape': (webcam_cap.actual_h, webcam_cap.actual_w), 'fps': 30, 'webrtc_port': 0, }, 'left_wrist_camera': {'enable_zmq': False}, @@ -240,12 +277,12 @@ if __name__ == '__main__': logger_mp.debug(f"Camera config: {camera_config}") # televuer_wrapper: obtain hand pose data from the XR device and transmit the robot's head camera image to the XR device. - tv_wrapper = TeleVuerWrapper(use_hand_tracking=args.input_mode == "hand", + # Match display_fps to webcam send rate to avoid re-sending stale frames + vuer_display_fps = float(args.webcam_fps) if args.webcam is not None else 30.0 + tv_wrapper = TeleVuerWrapper(use_hand_tracking=args.input_mode == "hand", binocular=camera_config['head_camera']['binocular'], img_shape=camera_config['head_camera']['image_shape'], - # maybe should decrease fps for better performance? - # https://github.com/unitreerobotics/xr_teleoperate/issues/172 - # display_fps=camera_config['head_camera']['fps'] ? args.frequency? 30.0? + display_fps=vuer_display_fps, display_mode=args.display_mode, zmq=camera_config['head_camera']['enable_zmq'], webrtc=camera_config['head_camera']['enable_webrtc'], @@ -382,10 +419,13 @@ if __name__ == '__main__': logger_mp.info("[R3] A pressed → START tracking") _r3_prev_buttons = r3_btns if xr_need_local_img: + now = time.time() if webcam_cap is not None: - ret, frame = webcam_cap.read() - if ret: - tv_wrapper.render_to_xr(frame) + if now - webcam_last_send >= webcam_send_interval: + ret, frame = webcam_cap.read() + if ret: + tv_wrapper.render_to_xr(frame) + webcam_last_send = now elif camera_config['head_camera']['enable_zmq']: head_img = img_client.get_head_frame() tv_wrapper.render_to_xr(head_img) @@ -502,12 +542,14 @@ if __name__ == '__main__': time.sleep(sleep_time) continue - # get image + # get image (webcam: non-blocking, rate-limited to save bandwidth) if webcam_cap is not None: - ret, webcam_frame = webcam_cap.read() - if ret: - head_img = webcam_frame - tv_wrapper.render_to_xr(webcam_frame) + if current_time - webcam_last_send >= webcam_send_interval: + ret, webcam_frame = webcam_cap.read() + if ret: + head_img = webcam_frame + tv_wrapper.render_to_xr(webcam_frame) + webcam_last_send = current_time else: if camera_config['head_camera']['enable_zmq']: if args.record or xr_need_local_img: diff --git a/teleop/televuer b/teleop/televuer index 0ea96e5..2c89896 160000 --- a/teleop/televuer +++ b/teleop/televuer @@ -1 +1 @@ -Subproject commit 0ea96e5f51a1248036d902dcd9af2bb8a1eeebcb +Subproject commit 2c89896ab141288e8151ff0bf74f7bd1e274a819