yolov11的onnx模型C++调用
- 效果图
- 一、python调用
- 二、onnx模型导出
- 三、python的onnx调用
- 四、C++的onnx模型调用
- [五 、视频流的检测](#五 、视频流的检测)
- 后续
效果图
一、python调用
本文只记录生成的yolov11模型如何调用,其他可参考各种yolov11博客
模型下载:
yolo11模型及源码下载
模型调用:
下载好的python项目新建python文件
python
from ultralytics import YOLO
# 加载模型
#model = YOLO("yolo11n.pt")
model = YOLO("yolo11n-seg.pt")
results = model("cat.jpg")
results[0].show()
不同模型效果不一样,有检测、有实例分割
二、onnx模型导出
导出onnx模型即在刚刚的代码下添加一行即可,具体参数参照各种博客,此时,文件所在的文件夹会生成一个onnx的模型,这个模型即可在python或者c++中调用
python
from ultralytics import YOLO
# 加载模型
model = YOLO("yolo11n.pt")
# results = model("cat.jpg")
# results[0].show()
path = model.export(format="onnx",dynamic=False ,opset=12)
三、python的onnx调用
调用检测模型
python
# Ultralytics YOLO 🚀, AGPL-3.0 license
import argparse
import cv2
import numpy as np
import onnxruntime as ort
# 类外定义类别映射关系,使用字典格式
CLASS_NAMES = {
0: 'person',
1: 'bicycle',
2: 'car',
3: 'motorcycle',
4: 'airplane',
5: 'bus',
6: 'train',
7: 'truck',
8: 'boat',
9: 'traffic light',
10: 'fire hydrant',
11: 'stop sign',
12: 'parking meter',
13: 'bench',
14: 'bird',
15: 'cat',
16: 'dog',
17: 'horse',
18: 'sheep',
19: 'cow',
20: 'elephant',
21: 'bear',
22: 'zebra',
23: 'giraffe',
24: 'backpack',
25: 'umbrella',
26: 'handbag',
27: 'tie',
28: 'suitcase',
29: 'frisbee',
30: 'skis',
31: 'snowboard',
32: 'sports ball',
33: 'kite',
34: 'baseball bat',
35: 'baseball glove',
36: 'skateboard',
37: 'surfboard',
38: 'tennis racket',
39: 'bottle',
40: 'wine glass',
41: 'cup',
42: 'fork',
43: 'knife',
44: 'spoon',
45: 'bowl',
46: 'banana',
47: 'apple',
48: 'sandwich',
49: 'orange',
50: 'broccoli',
51: 'carrot',
52: 'hot dog',
53: 'pizza',
54: 'donut',
55: 'cake',
56: 'chair',
57: 'couch',
58: 'potted plant',
59: 'bed',
60: 'dining table',
61: 'toilet',
62: 'tv',
63: 'laptop',
64: 'mouse',
65: 'remote',
66: 'keyboard',
67: 'cell phone',
68: 'microwave',
69: 'oven',
70: 'toaster',
71: 'sink',
72: 'refrigerator',
73: 'book',
74: 'clock',
75: 'vase',
76: 'scissors',
77: 'teddy bear',
78: 'hair drier',
79: 'toothbrush',
# 可以添加更多类别...
}
class YOLO11:
"""YOLO11 目标检测模型类,用于处理推理和可视化。"""
def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
"""
初始化 YOLO11 类的实例。
参数:
onnx_model: ONNX 模型的路径。
input_image: 输入图像的路径。
confidence_thres: 用于过滤检测结果的置信度阈值。
iou_thres: 非极大值抑制(NMS)的 IoU(交并比)阈值。
"""
self.onnx_model = onnx_model
self.input_image = input_image
self.confidence_thres = confidence_thres
self.iou_thres = iou_thres
# 加载类别名称
self.classes = CLASS_NAMES
# 为每个类别生成一个颜色调色板
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
def preprocess(self):
"""
对输入图像进行预处理,以便进行推理。
返回:
image_data: 经过预处理的图像数据,准备进行推理。
"""
# 使用 OpenCV 读取输入图像
self.img = cv2.imread(self.input_image)
# 获取输入图像的高度和宽度
self.img_height, self.img_width = self.img.shape[:2]
# 将图像颜色空间从 BGR 转换为 RGB
img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
# 保持宽高比,进行 letterbox 填充, 使用模型要求的输入尺寸
img, self.ratio, (self.dw, self.dh) = self.letterbox(img, new_shape=(self.input_width, self.input_height))
# 通过除以 255.0 来归一化图像数据
image_data = np.array(img) / 255.0
# 将图像的通道维度移到第一维
image_data = np.transpose(image_data, (2, 0, 1)) # 通道优先
# 扩展图像数据的维度,以匹配模型输入的形状
image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
# 返回预处理后的图像数据
return image_data
def letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True):
"""
将图像进行 letterbox 填充,保持纵横比不变,并缩放到指定尺寸。
"""
shape = img.shape[:2] # 当前图像的宽高
print(f"Original image shape: {shape}")
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# 计算缩放比例
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) # 选择宽高中最小的缩放比
if not scaleup: # 仅缩小,不放大
r = min(r, 1.0)
# 缩放后的未填充尺寸
new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
# 计算需要的填充
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # 计算填充的尺寸
dw /= 2 # padding 均分
dh /= 2
# 缩放图像
if shape[::-1] != new_unpad: # 如果当前图像尺寸不等于 new_unpad,则缩放
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
# 为图像添加边框以达到目标尺寸
top, bottom = int(round(dh)), int(round(dh))
left, right = int(round(dw)), int(round(dw))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
print(f"Final letterboxed image shape: {img.shape}")
return img, (r, r), (dw, dh)
def postprocess(self, input_image, output):
"""
对模型输出进行后处理,以提取边界框、分数和类别 ID。
参数:
input_image (numpy.ndarray): 输入图像。
output (numpy.ndarray): 模型的输出。
返回:
numpy.ndarray: 包含检测结果的输入图像。
"""
# 转置并压缩输出,以匹配预期形状
outputs = np.transpose(np.squeeze(output[0]))
rows = outputs.shape[0]
boxes, scores, class_ids = [], [], []
# 计算缩放比例和填充
ratio = self.img_width / self.input_width, self.img_height / self.input_height
for i in range(rows):
classes_scores = outputs[i][4:]
max_score = np.amax(classes_scores)
if max_score >= self.confidence_thres:
class_id = np.argmax(classes_scores)
x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
# 将框调整到原始图像尺寸,考虑缩放和填充
x -= self.dw # 移除填充
y -= self.dh
x /= self.ratio[0] # 缩放回原图
y /= self.ratio[1]
w /= self.ratio[0]
h /= self.ratio[1]
left = int(x - w / 2)
top = int(y - h / 2)
width = int(w)
height = int(h)
boxes.append([left, top, width, height])
scores.append(max_score)
class_ids.append(class_id)
indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)
for i in indices:
box = boxes[i]
score = scores[i]
class_id = class_ids[i]
self.draw_detections(input_image, box, score, class_id)
return input_image
def draw_detections(self, img, box, score, class_id):
"""
在输入图像上绘制检测到的边界框和标签。
参数:
img: 用于绘制检测结果的输入图像。
box: 检测到的边界框。
score: 对应的检测分数。
class_id: 检测到的目标类别 ID。
返回:
None
"""
# 提取边界框的坐标
x1, y1, w, h = box
# 获取类别对应的颜色
color = self.color_palette[class_id]
# 在图像上绘制边界框
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
# 创建包含类别名和分数的标签文本
label = f"{self.classes[class_id]}: {score:.2f}"
# 计算标签文本的尺寸
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# 计算标签文本的位置
label_x = x1
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
# 绘制填充的矩形作为标签文本的背景
cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
cv2.FILLED)
# 在图像上绘制标签文本
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
def main(self):
# 使用 ONNX 模型创建推理会话,自动选择CPU或GPU
session = ort.InferenceSession(
self.onnx_model,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"] if ort.get_device() == "GPU" else [
"CPUExecutionProvider"],
)
# 打印模型的输入尺寸
print("YOLO11 🚀 目标检测 ONNXRuntime")
print("模型名称:", self.onnx_model)
# 获取模型的输入形状
model_inputs = session.get_inputs()
input_shape = model_inputs[0].shape
self.input_width = input_shape[2]
self.input_height = input_shape[3]
print(f"模型输入尺寸:宽度 = {self.input_width}, 高度 = {self.input_height}")
# 预处理图像数据,确保使用模型要求的尺寸 (640x640)
img_data = self.preprocess()
print("尺寸处理完毕")
# 使用预处理后的图像数据运行推理
outputs = session.run(None, {model_inputs[0].name: img_data})
# 对输出进行后处理以获取输出图像
return self.postprocess(self.img, outputs) # 输出图像
if __name__ == "__main__":
# 创建参数解析器以处理命令行参数
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="yolo11n-seg.onnx", help="输入你的 ONNX 模型路径。")
parser.add_argument("--img", type=str, default=r"2222.jpg", help="输入图像的路径。")
parser.add_argument("--conf-thres", type=float, default=0.5, help="置信度阈值")
parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU 阈值")
args = parser.parse_args()
# 使用指定的参数创建 YOLO11 类的实例
detection = YOLO11(args.model, args.img, args.conf_thres, args.iou_thres)
# 执行目标检测并获取输出图像
output_image = detection.main()
# 保存输出图像到文件
cv2.imwrite("det_result_picture.jpg", output_image)
print("图像已保存为 det_result_picture.jpg")
调用分割模型
python
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
YOLO11 分割模型 ONNXRuntime
功能1: 支持不用尺寸图像的输入
功能2: 支持可视化分割结果
"""
import argparse
import cv2
import numpy as np
import onnxruntime as ort
# 类外定义类别映射关系,使用字典格式
CLASS_NAMES = {
0: 'person',
1: 'bicycle',
2: 'car',
3: 'motorcycle',
4: 'airplane',
5: 'bus',
6: 'train',
7: 'truck',
8: 'boat',
9: 'traffic light',
10: 'fire hydrant',
11: 'stop sign',
12: 'parking meter',
13: 'bench',
14: 'bird',
15: 'cat',
16: 'dog',
17: 'horse',
18: 'sheep',
19: 'cow',
20: 'elephant',
21: 'bear',
22: 'zebra',
23: 'giraffe',
24: 'backpack',
25: 'umbrella',
26: 'handbag',
27: 'tie',
28: 'suitcase',
29: 'frisbee',
30: 'skis',
31: 'snowboard',
32: 'sports ball',
33: 'kite',
34: 'baseball bat',
35: 'baseball glove',
36: 'skateboard',
37: 'surfboard',
38: 'tennis racket',
39: 'bottle',
40: 'wine glass',
41: 'cup',
42: 'fork',
43: 'knife',
44: 'spoon',
45: 'bowl',
46: 'banana',
47: 'apple',
48: 'sandwich',
49: 'orange',
50: 'broccoli',
51: 'carrot',
52: 'hot dog',
53: 'pizza',
54: 'donut',
55: 'cake',
56: 'chair',
57: 'couch',
58: 'potted plant',
59: 'bed',
60: 'dining table',
61: 'toilet',
62: 'tv',
63: 'laptop',
64: 'mouse',
65: 'remote',
66: 'keyboard',
67: 'cell phone',
68: 'microwave',
69: 'oven',
70: 'toaster',
71: 'sink',
72: 'refrigerator',
73: 'book',
74: 'clock',
75: 'vase',
76: 'scissors',
77: 'teddy bear',
78: 'hair drier',
79: 'toothbrush',
# 可以添加更多类别...
}
# 定义类别对应的颜色,格式为 (R, G, B)
CLASS_COLORS = {
0: (255, 0, 0), # 类别 0 的颜色为青黄色
1: (255, 0, 255) # 类别 1 的颜色为红色
# 可以为其他类别指定颜色...
}
class YOLO11Seg:
def __init__(self, onnx_model):
# 创建 Ort 推理会话,选择 CPU 或 GPU 提供者
self.session = ort.InferenceSession(
onnx_model,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
if ort.get_device() == "GPU"
else ["CPUExecutionProvider"],
)
# 根据 ONNX 模型类型选择 Numpy 数据类型(支持 FP32 和 FP16)
self.ndtype = np.half if self.session.get_inputs()[0].type == "tensor(float16)" else np.single
# 获取模型的输入宽度和高度(YOLO11-seg 只有一个输入)
self.model_height, self.model_width = [x.shape for x in self.session.get_inputs()][0][-2:]
# 打印模型的输入尺寸
print("YOLO11 🚀 实例分割 ONNXRuntime")
print("模型名称:", onnx_model)
print(f"模型输入尺寸:宽度 = {self.model_width}, 高度 = {self.model_height}")
# 加载类别名称
self.classes = CLASS_NAMES
# 加载类别对应的颜色
self.class_colors = CLASS_COLORS
def get_color_for_class(self, class_id):
return self.class_colors.get(class_id, (255, 255, 0)) # 如果没有找到类别颜色,返回白色
def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32):
"""
完整的推理流程:预处理 -> 推理 -> 后处理
Args:
im0 (Numpy.ndarray): 原始输入图像
conf_threshold (float): 置信度阈值
iou_threshold (float): NMS 中的 IoU 阈值
nm (int): 掩膜数量
Returns:
boxes (List): 边界框列表
segments (List): 分割区域列表
masks (np.ndarray): [N, H, W] 输出掩膜
"""
# 图像预处理
im, ratio, (pad_w, pad_h) = self.preprocess(im0)
# ONNX 推理
preds = self.session.run(None, {self.session.get_inputs()[0].name: im})
# 后处理
boxes, segments, masks = self.postprocess(
preds,
im0=im0,
ratio=ratio,
pad_w=pad_w,
pad_h=pad_h,
conf_threshold=conf_threshold,
iou_threshold=iou_threshold,
nm=nm,
)
return boxes, segments, masks
def preprocess(self, img):
"""
图像预处理
Args:
img (Numpy.ndarray): 输入图像
Returns:
img_process (Numpy.ndarray): 处理后的图像
ratio (tuple): 宽高比例
pad_w (float): 宽度的填充
pad_h (float): 高度的填充
"""
# 调整输入图像大小并使用 letterbox 填充
shape = img.shape[:2] # 原始图像大小
new_shape = (self.model_height, self.model_width)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # 填充宽高
if shape[::-1] != new_unpad: # 调整图像大小
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
# 转换:HWC -> CHW -> BGR 转 RGB -> 除以 255 -> contiguous -> 添加维度
img = np.ascontiguousarray(np.einsum("HWC->CHW", img)[::-1], dtype=self.ndtype) / 255.0
img_process = img[None] if len(img.shape) == 3 else img
return img_process, ratio, (pad_w, pad_h)
def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32):
"""
推理后的结果后处理
Args:
preds (Numpy.ndarray): 来自 ONNX 的推理结果
im0 (Numpy.ndarray): [h, w, c] 原始输入图像
ratio (tuple): 宽高比例
pad_w (float): 宽度的填充
pad_h (float): 高度的填充
conf_threshold (float): 置信度阈值
iou_threshold (float): IoU 阈值
nm (int): 掩膜数量
Returns:
boxes (List): 边界框列表
segments (List): 分割区域列表
masks (np.ndarray): 掩膜数组
"""
x, protos = preds[0], preds[1] # 获取模型的两个输出:预测和原型
# 转换维度
x = np.einsum("bcn->bnc", x)
# 置信度过滤
x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold]
# 合并边界框、置信度、类别和掩膜
x = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]]
# NMS 过滤
x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
# 解析并返回结果
if len(x) > 0:
# 边界框格式转换:从 cxcywh -> xyxy
x[..., [0, 1]] -= x[..., [2, 3]] / 2
x[..., [2, 3]] += x[..., [0, 1]]
# 缩放边界框,使其与原始图像尺寸匹配
x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
x[..., :4] /= min(ratio)
# 限制边界框在图像边界内
x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
# 处理掩膜
masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape)
# 将掩膜转换为分割区域
segments = self.masks2segments(masks)
return x[..., :6], segments, masks # 返回边界框、分割区域和掩膜
else:
return [], [], []
@staticmethod
def masks2segments(masks):
"""
将掩膜转换为分割区域
Args:
masks (numpy.ndarray): 模型输出的掩膜,形状为 (n, h, w)
Returns:
segments (List): 分割区域的列表
"""
segments = []
for x in masks.astype("uint8"):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # 找到轮廓
if c:
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
c = np.zeros((0, 2)) # 如果没有找到分割区域,返回空数组
segments.append(c.astype("float32"))
return segments
@staticmethod
def crop_mask(masks, boxes):
"""
裁剪掩膜,使其与边界框对齐
Args:
masks (Numpy.ndarray): [n, h, w] 掩膜数组
boxes (Numpy.ndarray): [n, 4] 边界框
Returns:
(Numpy.ndarray): 裁剪后的掩膜
"""
n, h, w = masks.shape
x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)
r = np.arange(w, dtype=x1.dtype)[None, None, :]
c = np.arange(h, dtype=x1.dtype)[None, :, None]
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
def process_mask(self, protos, masks_in, bboxes, im0_shape):
"""
处理模型输出的掩膜
Args:
protos (numpy.ndarray): [mask_dim, mask_h, mask_w] 掩膜原型
masks_in (numpy.ndarray): [n, mask_dim] 掩膜数量
bboxes (numpy.ndarray): 缩放到原始图像尺寸的边界框
im0_shape (tuple): 原始输入图像的尺寸 (h,w,c)
Returns:
(numpy.ndarray): 处理后的掩膜
"""
c, mh, mw = protos.shape
masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN
masks = np.ascontiguousarray(masks)
masks = self.scale_mask(masks, im0_shape) # 将掩膜从 P3 尺寸缩放到原始输入图像大小
masks = np.einsum("HWN -> NHW", masks) # HWN -> NHW
masks = self.crop_mask(masks, bboxes) # 裁剪掩膜
return np.greater(masks, 0.5) # 返回二值化后的掩膜
@staticmethod
def scale_mask(masks, im0_shape, ratio_pad=None):
"""
将掩膜缩放至原始图像大小
Args:
masks (np.ndarray): 缩放和填充后的掩膜
im0_shape (tuple): 原始图像大小
ratio_pad (tuple): 填充与原始图像的比例
Returns:
masks (np.ndarray): 缩放后的掩膜
"""
im1_shape = masks.shape[:2]
if ratio_pad is None: # 计算比例
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # 比例
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # 填充
else:
pad = ratio_pad[1]
# 计算掩膜的边界
top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)) # y, x
bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" 应该是 2 或 3,但得到 {len(masks.shape)}')
masks = masks[top:bottom, left:right]
masks = cv2.resize(
masks, (im0_shape[1], im0_shape[0]), interpolation=cv2.INTER_LINEAR
) # 使用 INTER_LINEAR 插值调整大小
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
def draw_and_visualize(self, im, bboxes, segments, vis=False, save=True):
"""
绘制和可视化结果
Args:
im (np.ndarray): 原始图像,形状为 [h, w, c]
bboxes (numpy.ndarray): [n, 4],n 是边界框数量
segments (List): 分割区域的列表
vis (bool): 是否使用 OpenCV 显示图像
save (bool): 是否保存带注释的图像
Returns:
None
"""
# 创建图像副本
im_canvas = im.copy()
for (*box, conf, cls_), segment in zip(bboxes, segments):
# 获取类别对应的颜色
color = self.get_color_for_class(int(cls_))
# 绘制轮廓和填充掩膜
# cv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2) # 绘制白色边框
cv2.fillPoly(im_canvas, np.int32([segment]), color) # 使用类别对应的颜色填充多边形
# 绘制边界框
cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 1, cv2.LINE_AA)
# 在图像上绘制类别名称和置信度
cv2.putText(im, f"{self.classes[cls_]}: {conf:.3f}", (int(box[0]), int(box[1] - 9)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA)
# 将图像和绘制的多边形混合
im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0)
# 显示图像
if vis:
cv2.imshow("seg_result_picture", im)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 保存图像
if save:
cv2.imwrite("seg_result_picture.jpg", im)
if __name__ == "__main__":
# 创建命令行参数解析器
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default=r"yolo11n-seg.onnx", help="ONNX 模型路径")
parser.add_argument("--source", type=str,
default=r"cat.jpg",
help="输入图像路径")
parser.add_argument("--conf", type=float, default=0.6, help="置信度阈值")
parser.add_argument("--iou", type=float, default=0.45, help="NMS 的 IoU 阈值")
args = parser.parse_args()
# 加载模型
model = YOLO11Seg(args.model)
# 使用 OpenCV 读取图像
img = cv2.imread(args.source)
# 模型推理
boxes, segments, _ = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
# 如果检测到目标,绘制边界框和分割区域
if len(boxes) > 0:
model.draw_and_visualize(img, boxes, segments, vis=False, save=True)
四、C++的onnx模型调用
cpp
#include <onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <fstream>
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
cv::Mat frame = cv::imread("cat.jpg", 1);
std::string onnxpath = "yolo11m.onnx";
//step2:load labels
std::vector<std::string> labels;
std::ifstream inputFile("coco.names");
if (inputFile.is_open())
{
std::string classLine;
while (std::getline(inputFile, classLine))
labels.push_back(classLine);
inputFile.close();
}
//step-3:load onnx model
int ih = frame.rows;
int iw = frame.cols;
std::wstring modelPath = std::wstring(onnxpath.begin(), onnxpath.end());
Ort::SessionOptions session_options = Ort::SessionOptions();;
Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "yolov11");
std::cout << "onnxruntime inference try to use GPU Device" << std::endl;
Ort::Session session_(env, modelPath.c_str(), session_options);
std::vector<std::string> input_node_names;
std::vector<std::string> output_node_names;
size_t numInputNodes = session_.GetInputCount();
size_t numOutputNodes = session_.GetOutputCount();
Ort::AllocatorWithDefaultOptions allocator;
input_node_names.reserve(numInputNodes);
int input_w = 0;
int input_h = 0;
for (int i = 0; i < numInputNodes; i++) {
//onnx newest version-1.14
auto input_name = session_.GetInputNameAllocated(i, allocator);
input_node_names.push_back(input_name.get());
//onnx old version-1.8
//input_node_names.push_back(session_.GetInputName(i, allocator));
Ort::TypeInfo input_type_info = session_.GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_w = input_dims[3];
input_h = input_dims[2];
std::cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << std::endl;
}
//step-4:get output parameter
int output_h = 0;
int output_w = 0;
Ort::TypeInfo output_type_info = session_.GetOutputTypeInfo(0);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
output_h = output_dims[1];
output_w = output_dims[2];
std::cout << "output format : HxW = " << output_dims[1] << "x" << output_dims[2] << std::endl;
for (int i = 0; i < numOutputNodes; i++)
{
//onnx newest version-1.14
auto out_name = session_.GetOutputNameAllocated(i, allocator);
output_node_names.push_back(out_name.get());
//onnx old version-1.8
//output_node_names.push_back(session_.GetOutputName(i, allocator));
}
std::cout << "input: " << input_node_names[0] << " output: " << output_node_names[0] << std::endl;
//step-5:get infer result
int64 start = cv::getTickCount();
int w = frame.cols;
int h = frame.rows;
int _max = std::max(h, w);
cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
cv::Rect roi(0, 0, w, h);
frame.copyTo(image(roi));
// fix bug, boxes consistence!
float x_factor = image.cols / static_cast<float>(input_w);
float y_factor = image.rows / static_cast<float>(input_h);
cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(input_w, input_h), cv::Scalar(0, 0, 0), true, false);
size_t tpixels = input_h * input_w * 3;
std::array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };
// set input data and inference
auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
const std::array<const char*, 1> inputNames = { input_node_names[0].c_str() };
const std::array<const char*, 1> outNames = { output_node_names[0].c_str() };
std::vector<Ort::Value> ort_outputs;
try {
ort_outputs = session_.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
}
catch (std::exception e) {
std::cout << e.what() << std::endl;
}
// output data
const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
cv::Mat dout(output_h, output_w, CV_32F, (float*)pdata);
cv::Mat det_output = dout.t(); // 8400x84
// post-process
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
for (int i = 0; i < det_output.rows; i++) {
cv::Mat classes_scores = det_output.row(i).colRange(4, 84);
cv::Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
//between 0~1
if (score > 0.25)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
// NMS
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
int index = indexes[i];
int idx = classIds[index];
cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);
cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),
cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
std::string classString = labels[idx] + ' ' + std::to_string(confidences[idx]).substr(0, 4);
putText(frame, classString, cv::Point(boxes[index].tl().x, boxes[index].tl().y), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
cv::imshow("YOLOv11 onnxrunning", frame);
}
//calculate FPS render it
float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
putText(frame, cv::format("FPS: %.2f", 1.0 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
cv::imshow("YOLOv11 onnxrunning", frame);
cv::imwrite("result.jpg", frame);
cv::waitKey(0);
session_options.release();
session_.release();
return 0;
}
五 、视频流的检测
cpp
#include <onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <fstream>
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
int c = 0;
int frameRate = 10;
Mat frame;
namedWindow("video-demo", WINDOW_AUTOSIZE);
VideoCapture capture;
//连接视频
capture.open("b.mp4");
if (!capture.isOpened()) {
printf("could not load video data...\n");
return -1;
}
int frames = capture.get(CAP_PROP_FRAME_COUNT);//获取视频针数目(一帧就是一张图片)
double fps = capture.get(CAP_PROP_FPS);//获取每针视频的频率
// 获取帧的视频宽度,视频高度
Size size = Size(capture.get(CAP_PROP_FRAME_WIDTH), capture.get(CAP_PROP_FRAME_HEIGHT));
cout << frames << endl;
cout << fps << endl;
cout << size << endl;
//cv::Mat frame = cv::imread("2222.jpg",1);
std::string onnxpath = "yolo11n-seg.onnx";
//step2:load labels
std::vector<std::string> labels;
std::ifstream inputFile("coco.names");
if (inputFile.is_open())
{
std::string classLine;
while (std::getline(inputFile, classLine))
labels.push_back(classLine);
inputFile.close();
}
//step-3:load onnx model
std::wstring modelPath = std::wstring(onnxpath.begin(), onnxpath.end());
Ort::SessionOptions session_options= Ort::SessionOptions();;
Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "yolov11");
std::cout << "onnxruntime inference try to use GPU Device" << std::endl;
Ort::Session session_(env, modelPath.c_str(), session_options);
std::vector<std::string> input_node_names;
std::vector<std::string> output_node_names;
size_t numInputNodes = session_.GetInputCount();
size_t numOutputNodes = session_.GetOutputCount();
Ort::AllocatorWithDefaultOptions allocator;
input_node_names.reserve(numInputNodes);
int input_w = 0;
int input_h = 0;
for (int i = 0; i < numInputNodes; i++) {
//onnx newest version-1.14
auto input_name = session_.GetInputNameAllocated(i, allocator);
input_node_names.push_back(input_name.get());
//onnx old version-1.8
//input_node_names.push_back(session_.GetInputName(i, allocator));
Ort::TypeInfo input_type_info = session_.GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_w = input_dims[3];
input_h = input_dims[2];
std::cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << std::endl;
}
//step-4:get output parameter
int output_h = 0;
int output_w = 0;
Ort::TypeInfo output_type_info = session_.GetOutputTypeInfo(0);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
output_h = output_dims[1];
output_w = output_dims[2];
std::cout << "output format : HxW = " << output_dims[1] << "x" << output_dims[2] << std::endl;
for (int i = 0; i < numOutputNodes; i++)
{
//onnx newest version-1.14
auto out_name = session_.GetOutputNameAllocated(i, allocator);
output_node_names.push_back(out_name.get());
//onnx old version-1.8
//output_node_names.push_back(session_.GetOutputName(i, allocator));
}
std::cout << "input: " << input_node_names[0] << " output: " << output_node_names[0] << std::endl;
for (;;)
{
//将视频转给每一张张图进行处理
capture >> frame;
if (c % frameRate == 0)
{
//step-5:get infer result
int64 start = cv::getTickCount();
int w = frame.cols;
int h = frame.rows;
if (w>0 &&h>0)
{
int _max = std::max(h, w);
cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
cv::Rect roi(0, 0, w, h);
frame.copyTo(image(roi));
// fix bug, boxes consistence!
float x_factor = image.cols / static_cast<float>(input_w);
float y_factor = image.rows / static_cast<float>(input_h);
cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(input_w, input_h), cv::Scalar(0, 0, 0), true, false);
size_t tpixels = input_h * input_w * 3;
std::array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };
// set input data and inference
auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
const std::array<const char*, 1> inputNames = { input_node_names[0].c_str() };
const std::array<const char*, 1> outNames = { output_node_names[0].c_str() };
std::vector<Ort::Value> ort_outputs;
try {
ort_outputs = session_.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
}
catch (std::exception e) {
std::cout << e.what() << std::endl;
}
// output data
const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
cv::Mat dout(output_h, output_w, CV_32F, (float*)pdata);
cv::Mat det_output = dout.t(); // 8400x84
// post-process
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
for (int i = 0; i < det_output.rows; i++) {
cv::Mat classes_scores = det_output.row(i).colRange(4, 84);
cv::Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
//between 0~1
if (score > 0.25)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
// NMS
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
int index = indexes[i];
int idx = classIds[index];
cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);
cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),
cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
std::string classString = labels[idx] + ' ' + std::to_string(confidences[idx]).substr(0, 4);
putText(frame, classString, cv::Point(boxes[index].tl().x, boxes[index].tl().y), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
//cv::imshow("YOLOv11 onnxrunning", frame);
}
float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
putText(frame, cv::format("FPS: %.2f", 1.0 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
//视频播放完退出
if (frame.empty())break;
imshow("video-demo", frame);
//在视频播放期间按键退出
if (waitKey(33) >= 0) break;
}
}
c++;
}
//释放
capture.release();
session_options.release();
session_.release();
return 0;
}
后续
Qt的调用,当然也是很简单的!!!!