6. Gemini相机+yoloseg+foundationpose环境搭建及应用

内容主要是获取目标位姿，步骤如下：

1.获取相机的数据，然后通过相机内参将像素坐标转换为相机坐标；

2.yolov8的分割模型获取抓取物体的烟码；

3.把深度图的深度值和分割的彩图值结合构成3维数据；

4.foundationpose对3维数据找位姿。

1.环境搭建

项目用conda搭建环境。

系统：ubuntu22.04 ROS2

相机环境搭建参考https://blog.csdn.net/weixin_71719718/article/details/160549145?spm=1011.2124.3001.6209。

1.1 分割环境和open3d环境的搭建

终端运行以下代码即可：

复制代码

numpy==1.24.0
opencv-python
open3d 
pyyaml
scikit-learn==1.2.2
ultralytics==8.2.100
numpy==1.24.0

分割代码如下：

复制代码

import rclpy
from rclpy.node import Node
from sensor_msgs.msg import Image, CameraInfo
import cv2
import numpy as np
from ultralytics import YOLO

def imgmsg_to_cv2(img_msg):
    if img_msg.encoding == "16UC1":
        return np.frombuffer(img_msg.data, dtype=np.uint16).reshape(img_msg.height, img_msg.width)
    img = np.frombuffer(img_msg.data, dtype=np.uint8).reshape(img_msg.height, img_msg.width, -1)
    return img[..., ::-1] if img_msg.encoding == "rgb8" else img

class CameraAlignmentNode(Node):
    def __init__(self):
        super().__init__('camera_info_node')

        # ✅ 绝对正确的顺序：消息类型, 话题名, 回调函数, 队列深度
        self.color_sub = self.create_subscription(
            Image, '/camera/color/image_raw', self.color_callback, 10)
        self.depth_sub = self.create_subscription(
            Image, '/camera/depth/image_raw', self.depth_callback, 10)
        self.info_sub = self.create_subscription(
            CameraInfo, '/camera/color/camera_info', self.info_cb, 10)

        self.color = None
        self.depth = None
        self.K = None
        self.model = YOLO("best_v3.pt")

    def info_cb(self, msg):
        self.K = np.array(msg.k).reshape(3,3)
        self.get_logger().info(f"✅ fx={msg.k[0]:.1f}, fy={msg.k[4]:.1f}")
        self.info_sub.destroy()

    def color_callback(self, msg):
        self.color = imgmsg_to_cv2(msg)

    def depth_callback(self, msg):
        self.depth = imgmsg_to_cv2(msg)
        if self.color is not None:
            self.process()

    def process(self):
        h, w = self.color.shape[:2]
        depth = cv2.resize(self.depth, (w, h), interpolation=cv2.INTER_NEAREST)

        # YOLO分割
        res = self.model(self.color, conf=0.5, verbose=False)
        mask = np.zeros((h, w), dtype=np.uint8)
        if res[0].masks is not None:
            for m in res[0].masks.data:
                m = cv2.resize(m.cpu().numpy(), (w, h))
                mask = np.clip(mask + m*255, 0,255).astype(np.uint8)

        # 深度过滤
        seg_depth = depth[mask > 0]
        seg_depth = seg_depth[(seg_depth > 0) & (seg_depth <= 1500)]
        if seg_depth.size > 0:
            print(f"均值深度: {np.mean(seg_depth):.1f}mm")

        # 叠加显示
        overlay = self.color.copy()
        color_mask = np.zeros_like(overlay)
        color_mask[mask > 0] = (0,255,0)
        seg_img = cv2.addWeighted(overlay, 0.7, color_mask, 0.3, 0)

        cv2.imshow("Color + Segmentation", seg_img)
        cv2.waitKey(1)

def main(args=None):
    rclpy.init(args=args)
    node = CameraAlignmentNode()
    try:
        rclpy.spin(node)
    except KeyboardInterrupt:
        pass
    finally:
        cv2.destroyAllWindows()
        node.destroy_node()
        rclpy.shutdown()

if __name__ == '__main__':
    main()

1.2 foundationpose环境搭建

编译

编译如下：

复制代码

cd ~/ros2_ws
colcon build --packages-select camera_info_get
source install/setup.bash
ros2 run camera_info_get get_camera_info