系统:ubuntu22.04 ROS2
NVIDIA-SMI 535.309.01 Driver Version: 535.309.01 CUDA Version: 12.2
由于ros2环境是自带的python10,而foundationpose环境是python11,所以该demo用两个环境实现功能,ros2负责获取相机参数以及分割目标物体,foundationpose接收数据后获取目标物体的位姿态,中间由socket进行发送信息和接收信息。
1.最简单的代码
1.1 ros2获取相机参数,处理数据
代码如下:
#!/usr/bin/env python3
import rclpy
from rclpy.node import Node
from sensor_msgs.msg import Image, CameraInfo
from cv_bridge import CvBridge
import cv2
import numpy as np
from ultralytics import YOLO
import socket
import json
import base64
# -------- Socket 配置 --------
HOST = '127.0.0.1'
PORT = 12345
class CameraSenderNode(Node):
def __init__(self):
super().__init__('ros2_camera_sender')
self.bridge = CvBridge()
self.rgb_img = None
self.depth_img = None
self.K = None
# YOLO
self.yolo = YOLO("/home/wyq/ros2_ws/best_v3.pt")
self.yolo.to("cpu")
# Socket Server
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.bind((HOST, PORT))
self.sock.listen(1)
self.conn = None
self.get_logger().info(f"TCP Server listening {HOST}:{PORT}")
# 订阅
self.create_subscription(Image, '/camera/color/image_raw', self.rgb_cb, 10)
self.create_subscription(Image, '/camera/depth/image_raw', self.depth_cb, 10)
self.create_subscription(CameraInfo, '/camera/color/camera_info', self.caminfo_cb, 10)
# 后台接受连接
import threading
threading.Thread(target=self.accept_conn, daemon=True).start()
def accept_conn(self):
while True:
self.conn, addr = self.sock.accept()
self.get_logger().info(f"FP Connected: {addr}")
def caminfo_cb(self, msg):
self.K = np.array(msg.k).reshape(3,3)
def rgb_cb(self, msg):
self.rgb_img = self.bridge.imgmsg_to_cv2(msg, 'bgr8')
def depth_cb(self, msg):
# 转cv2
dtype = np.uint16 if msg.encoding == "16UC1" else np.uint8
depth_data = np.ndarray(
shape=(msg.height, msg.width, 1), dtype=dtype, buffer=msg.data
)
self.depth_img = depth_data
self.process()
def process(self):
if self.rgb_img is None or self.depth_img is None or self.K is None:
return
rgb = self.rgb_img
depth_raw = self.depth_img
h, w = rgb.shape[:2]
# 深度对齐 + 归一化
depth_aligned = cv2.resize(depth_raw, (w, h), interpolation=cv2.INTER_NEAREST)
depth = depth_aligned.astype(np.float32) / 1000.0
depth = np.clip(depth, 0.1, 3.0)
depth[np.isnan(depth)] = 1.0
depth[np.isinf(depth)] = 1.0
# YOLO
res = self.yolo.predict(rgb, conf=0.6, verbose=False)
mask = np.zeros((h,w), dtype=np.uint8)
if len(res)>0 and res[0].masks is not None:
m = res[0].masks.data[0].cpu().numpy()
m = cv2.resize(m, (w,h))
mask = (m>0.5).astype(np.uint8)
# 掩码简单提纯(保持你逻辑)
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close, iterations=2)
# 打包发送(json+base64)
def b64(arr):
return base64.b64encode(arr.tobytes()).decode('utf-8')
data = {
'rgb_b64': b64(rgb),
'rgb_shape': list(rgb.shape),
'depth_b64': b64(depth),
'depth_shape': list(depth.shape),
'mask_b64': b64(mask),
'mask_shape': list(mask.shape),
'K': self.K.tolist()
}
if self.conn:
try:
self.conn.sendall((json.dumps(data)+'\n').encode('utf-8'))
except Exception as e:
self.get_logger().error(f"Send err: {e}")
self.conn = None
# 可视化
overlay = cv2.addWeighted(rgb, 0.7, cv2.cvtColor(mask*255,cv2.COLOR_GRAY2BGR), 0.3,0)
cv2.imshow("overlay", overlay)
cv2.waitKey(1)
def main(args=None):
rclpy.init(args=args)
node = CameraSenderNode()
rclpy.spin(node)
cv2.destroyAllWindows()
rclpy.shutdown()
if __name__ == '__main__':
main()
1.2 foundationpose接收数据,获取目标物的位姿态
代码如下:
#!/usr/bin/env python3
import socket
import json
import base64
import numpy as np
import cv2
import trimesh
import sys
# -------- 路径 + FoundationPose --------
sys.path.insert(0, "/home/wyq/ros2_ws/FoundationPose")
from estimater import FoundationPose
# -------- Socket --------
HOST = '127.0.0.1'
PORT = 12345
# -------- 初始化FP --------
mesh = trimesh.load("/home/wyq/ros2_ws/phones.obj")
model_pts = mesh.vertices.astype(np.float32)
model_normals = mesh.vertex_normals.astype(np.float32)
fp = FoundationPose(
model_pts=model_pts,
model_normals=model_normals,
mesh=mesh,
symmetry_tfs=None,
debug=0
)
initialized = False
def from_b64(s, shape):
return np.frombuffer(base64.b64decode(s), dtype=np.uint8).reshape(shape)
def main():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
while True:
try:
sock.connect((HOST, PORT))
print("Connected to ROS2 sender")
break
except Exception as e:
print("Waiting ROS2...", e)
import time
time.sleep(2)
buf = b''
global initialized
while True:
data = sock.recv(65536)
if not data:
break
buf += data
while b'\n' in buf:
line, buf = buf.split(b'\n', 1)
try:
d = json.loads(line.decode('utf-8'))
# 解包
rgb = from_b64(d['rgb_b64'], d['rgb_shape'])
depth = np.frombuffer(base64.b64decode(d['depth_b64']), dtype=np.float32).reshape(d['depth_shape'])
mask = from_b64(d['mask_b64'], d['mask_shape'])
K = np.array(d['K'])
# FP
if not initialized:
print("⚡ FP init...")
pose = fp.register(K=K, rgb=rgb, depth=depth, ob_mask=mask, iteration=2)
initialized = True
print("✅ Init done, tracking...")
else:
pose = fp.track_one(rgb=rgb, depth=depth, K=K, iteration=2)
print("\n🎯 Pose:\n", pose)
except Exception as e:
print("ERR:", e)
if __name__ == '__main__':
main()
代码放在foundationpose的根目录。
2.环境搭建
两个代码用了两个环境,可以分别搭建环境,分别验证环境搭建是否成功。
2.1 Ros2及分割算法环境搭建
RTX 4060 支持 CUDA 12.x,最稳组合:
- Python:3.10(兼容 ROS2 Humble)
- PyTorch:2.4.x / 2.5.x
- CUDA:12.1(cu121)
创建 conda 环境
conda create -n seg_pose python=3.10 -y
conda activate seg_pose
安装 PyTorchCUDA 12.x(12.0/12.1/12.2)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
验证是否成功(必须做)
python -c "import torch; print('torch版本:', torch.__version__)"
python -c "print('CUDA可用:', torch.cuda.is_available())"
python -c "print('GPU:', torch.cuda.get_device_name(0))"
python -c "print('torch使用的CUDA:', torch.version.cuda)"
安装分割
# 分割模型(YOLO)
pip install ultralytics opencv-python numpy pillow
# 降级numpy
pip install numpy==1.24.4
#验证ultralytics是否可用
python -c "from ultralytics import YOLO; print('✅ YOLO OK')"
最后下载需要的模型后运行代码即可。
2.2 foundationpose环境安装
这里按照官网在本地安装,也可以用docker安装,可以看官网https://github.com/NVlabs/FoundationPose。
创建 conda 环境
conda env create -f environment.yml
conda activate foundationpose
安装prtorch
python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
安装PyTorch3D and NVDiffRast
export CUDA_HOME=/usr/local/cuda # or e.g. /usr/local/cuda-12.8
export PATH="$CUDA_HOME/bin:$PATH"
python -m pip install --no-build-isolation "git+https://github.com/facebookresearch/pytorch3d.git"
python -m pip install --no-build-isolation "git+https://github.com/NVlabs/nvdiffrast.git"
也可以先下载zip文件,再安装。
再安装python依赖的其他库
python -m pip install -r requirements.txt
bash build_all_conda.sh