Record3D 获取iphone RGBD 和 pose

最近准备复现Universal Manipulation Interface 遥操，在选择相机时，考虑到先快速验证下视觉模块，刚好手里有台 iphone 15 pro，这不巧了嘛，iphone自带LiDAR，有RGB图像、深度图、pose，刚好满足数据采集的需要。

1.环境搭建

APP：Record3D

https://record3d.app/

PC端环境构建：record3d

https://github.com/marek-simonik/record3d

这里因为需要快速的功能验证，选择Python进行部署：

2.源码和实际使用

Record3D APP设置：
需要APP购买USB的使用权限，然后这里设置为USB模式
回到Record界面
将手机和电脑进行连接
运行下面的程序，会显示找到了设备，APP上点击红色的启动按钮，PC端可以实时取流并显示。
官方demo-main.py

python 复制代码

import numpy as np
from record3d import Record3DStream
import cv2
from threading import Event


class DemoApp:
    def __init__(self):
        self.event = Event()
        self.session = None
        self.DEVICE_TYPE__TRUEDEPTH = 0
        self.DEVICE_TYPE__LIDAR = 1

    def on_new_frame(self):
        """
        This method is called from non-main thread, therefore cannot be used for presenting UI.
        """
        self.event.set()  # Notify the main thread to stop waiting and process new frame.

    def on_stream_stopped(self):
        print('Stream stopped')

    def connect_to_device(self, dev_idx):
        print('Searching for devices')
        devs = Record3DStream.get_connected_devices()
        print('{} device(s) found'.format(len(devs)))
        for dev in devs:
            print('\tID: {}\n\tUDID: {}\n'.format(dev.product_id, dev.udid))

        if len(devs) <= dev_idx:
            raise RuntimeError('Cannot connect to device #{}, try different index.'
                               .format(dev_idx))

        dev = devs[dev_idx]
        self.session = Record3DStream()
        self.session.on_new_frame = self.on_new_frame
        self.session.on_stream_stopped = self.on_stream_stopped
        self.session.connect(dev)  # Initiate connection and start capturing

    def get_intrinsic_mat_from_coeffs(self, coeffs):
        return np.array([[coeffs.fx,         0, coeffs.tx],
                         [        0, coeffs.fy, coeffs.ty],
                         [        0,         0,         1]])

    def start_processing_stream(self):
        while True:
            self.event.wait()  # Wait for new frame to arrive

            # Copy the newly arrived RGBD frame
            depth = self.session.get_depth_frame()
            rgb = self.session.get_rgb_frame()
            confidence = self.session.get_confidence_frame()
            intrinsic_mat = self.get_intrinsic_mat_from_coeffs(self.session.get_intrinsic_mat())
            camera_pose = self.session.get_camera_pose()  # Quaternion + world position (accessible via camera_pose.[qx|qy|qz|qw|tx|ty|tz])

            print(intrinsic_mat)

            # You can now e.g. create point cloud by projecting the depth map using the intrinsic matrix.

            # Postprocess it
            if self.session.get_device_type() == self.DEVICE_TYPE__TRUEDEPTH:
                depth = cv2.flip(depth, 1)
                rgb = cv2.flip(rgb, 1)

            rgb = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)

            # Show the RGBD Stream
            cv2.imshow('RGB', rgb)
            cv2.imshow('Depth', depth)

            if confidence.shape[0] > 0 and confidence.shape[1] > 0:
                cv2.imshow('Confidence', confidence * 100)

            cv2.waitKey(1)

            self.event.clear()


if __name__ == '__main__':
    app = DemoApp()
    app.connect_to_device(dev_idx=0)
    app.start_processing_stream()

增加获取camera_pose，实时输出pose并记录存储到文件中。

python 复制代码

import numpy as np
from record3d import Record3DStream
import cv2
from threading import Event
import time
import math
import csv
import os


class DemoApp:
    def __init__(self):
        self.event = Event()
        self.session = None
        self.DEVICE_TYPE__TRUEDEPTH = 0
        self.DEVICE_TYPE__LIDAR = 1
        # 初始化CSV文件相关
        self.csv_file = None
        self.csv_writer = None
        self.init_csv_file()

    def init_csv_file(self):
        """初始化CSV文件，写入表头"""
        # 生成带时间戳的文件名，避免重复
        timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime())
        self.csv_file_path = f"camera_pose_data_{timestamp}.csv"
        
        # 打开文件并写入表头
        self.csv_file = open(self.csv_file_path, 'w', newline='', encoding='utf-8')
        self.csv_writer = csv.writer(self.csv_file)
        # 表头：时间戳，x，y，z，rx，ry，rz
        self.csv_writer.writerow(['timestamp', 'x', 'y', 'z', 'rx', 'ry', 'rz'])
        print(f"相机位姿数据将保存到: {self.csv_file_path}")

    def quaternion_to_euler(self, qx, qy, qz, qw):
        """
        四元数转欧拉角（单位：弧度）
        四元数格式：qx, qy, qz, qw
        欧拉角顺序：roll(x), pitch(y), yaw(z)
        """
        # 计算roll (x轴旋转)
        sinr_cosp = 2 * (qw * qx + qy * qz)
        cosr_cosp = 1 - 2 * (qx * qx + qy * qy)
        roll = math.atan2(sinr_cosp, cosr_cosp)

        # 计算pitch (y轴旋转)
        sinp = 2 * (qw * qy - qz * qx)
        # 限制pitch范围在[-π/2, π/2]
        pitch = math.asin(max(min(sinp, 1), -1))

        # 计算yaw (z轴旋转)
        siny_cosp = 2 * (qw * qz + qx * qy)
        cosy_cosp = 1 - 2 * (qy * qy + qz * qz)
        yaw = math.atan2(siny_cosp, cosy_cosp)

        return roll, pitch, yaw

    def on_new_frame(self):
        """
        This method is called from non-main thread, therefore cannot be used for presenting UI.
        """
        self.event.set()  # Notify the main thread to stop waiting and process new frame.

    def on_stream_stopped(self):
        print('Stream stopped')

    def connect_to_device(self, dev_idx):
        print('Searching for devices')
        devs = Record3DStream.get_connected_devices()
        print('{} device(s) found'.format(len(devs)))
        for dev in devs:
            print('\tID: {}\n\tUDID: {}\n'.format(dev.product_id, dev.udid))

        if len(devs) <= dev_idx:
            raise RuntimeError('Cannot connect to device #{}, try different index.'
                               .format(dev_idx))

        dev = devs[dev_idx]
        self.session = Record3DStream()
        self.session.on_new_frame = self.on_new_frame
        self.session.on_stream_stopped = self.on_stream_stopped
        self.session.connect(dev)  # Initiate connection and start capturing

    def get_intrinsic_mat_from_coeffs(self, coeffs):
        return np.array([[coeffs.fx,         0, coeffs.tx],
                         [        0, coeffs.fy, coeffs.ty],
                         [        0,         0,         1]])

    def start_processing_stream(self):
        try:
            while True:
                self.event.wait()  # Wait for new frame to arrive

                # 获取当前时间戳（毫秒级）
                current_timestamp = int(time.time() * 1000)

                # Copy the newly arrived RGBD frame
                depth = self.session.get_depth_frame()
                rgb = self.session.get_rgb_frame()
                confidence = self.session.get_confidence_frame()
                intrinsic_mat = self.get_intrinsic_mat_from_coeffs(self.session.get_intrinsic_mat())
                camera_pose = self.session.get_camera_pose()  # Quaternion + world position (accessible via camera_pose.[qx|qy|qz|qw|tx|ty|tz])

                print("="*50)
                print("Intrinsic Matrix (内参矩阵):")
                print(intrinsic_mat)

                # 初始化数据变量
                tx = ty = tz = rx = ry = rz = None

                # 获取并输出相机位姿信息
                if camera_pose is not None:
                    # 提取四元数（姿态）
                    qx = camera_pose.qx
                    qy = camera_pose.qy
                    qz = camera_pose.qz
                    qw = camera_pose.qw
                    
                    # 提取世界坐标（位置）
                    tx = camera_pose.tx
                    ty = camera_pose.ty
                    tz = camera_pose.tz
                    
                    # 四元数转欧拉角
                    rx, ry, rz = self.quaternion_to_euler(qx, qy, qz, qw)
                        
                    # 输出位姿信息
                    print("\nCamera Pose (相机位姿):")
                    print(f"Quaternion (四元数) - qx: {qx:.6f}, qy: {qy:.6f}, qz: {qz:.6f}, qw: {qw:.6f}")
                    print(f"World Position (世界坐标) - tx: {tx:.6f}m, ty: {ty:.6f}m, tz: {tz:.6f}m")
                    print(f"Euler Angles (欧拉角/弧度) - rx: {rx:.6f}, ry: {ry:.6f}, rz: {rz:.6f}")
                    print(f"Euler Angles (欧拉角/角度) - rx: {math.degrees(rx):.2f}°, ry: {math.degrees(ry):.2f}°, rz: {math.degrees(rz):.2f}°")
                else:
                    print("\nCamera Pose: Not available (相机位姿不可用)")

                # 保存数据到CSV文件（仅当有有效位姿数据时）
                if all(v is not None for v in [tx, ty, tz, rx, ry, rz]):
                    self.csv_writer.writerow([
                        current_timestamp,
                        round(tx, 6),
                        round(ty, 6),
                        round(tz, 6),
                        round(rx, 6),
                        round(ry, 6),
                        round(rz, 6)
                    ])
                    # 立即刷新缓冲区，确保数据实时写入
                    self.csv_file.flush()
                    os.fsync(self.csv_file.fileno())
                else:
                    self.csv_writer.writerow([current_timestamp, None, None])
                    # 立即刷新缓冲区，确保数据实时写入
                    self.csv_file.flush()
                    os.fsync(self.csv_file.fileno())

                # You can now e.g. create point cloud by projecting the depth map using the intrinsic matrix.

                # Postprocess it
                if self.session.get_device_type() == self.DEVICE_TYPE__TRUEDEPTH:
                    depth = cv2.flip(depth, 1)
                    rgb = cv2.flip(rgb, 1)

                rgb = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)

                # Show the RGBD Stream
                cv2.imshow('RGB', rgb)
                cv2.imshow('Depth', depth)

                if confidence.shape[0] > 0 and confidence.shape[1] > 0:
                    cv2.imshow('Confidence', confidence * 100)

                cv2.waitKey(1)

                self.event.clear()
        finally:
            # 清理资源
            cv2.destroyAllWindows()
            if self.csv_file:
                self.csv_file.close()
                print(f"程序退出，数据文件已关闭: {self.csv_file_path}")


if __name__ == '__main__':
    app = DemoApp()
    try:
        app.connect_to_device(dev_idx=0)
        app.start_processing_stream()
    except Exception as e:
        print(f"程序异常: {e}")

        if app.csv_file:
            app.csv_file.close()
        cv2.destroyAllWindows()

Record3D 获取iphone RGBD 和 pose

目录

1.环境搭建

2.源码和实际使用