YOLOv11导出onnx模型并使用C++调用

引言

YOLO（You Only Look Once）系列是目标检测领域中非常流行的一组算法，因其快速的推理速度和良好的检测精度而闻名。随着版本的不断迭代，YOLOv11引入了多项改进，使其成为处理复杂场景下物体检测任务的强大工具。然而，将YOLOv11应用于生产环境中，尤其是跨平台部署时，我们需要将其导出为一个通用的格式，如ONNX（Open Neural Network Exchange），以便在不同的框架和语言中使用。本文将介绍如何将YOLOv11导出为ONNX模型，并演示如何使用C++调用该模型进行推理。

使用python导出onnx模型

一、安装Ultralytics库

bash 复制代码

pip install ultralytics

二、导出YOLOv11的onnx模型

ultralytics库安装成功后，执行以下代码：

cpp 复制代码

from ultralytics import YOLO

# 加载模型
model = YOLO("yolo11n.pt")
# 导出onnx格式
model.export(format="onnx")

执行成功后会产生一个yolo11n.onnx文件，该文件就是YOLOv11的onnx格式文件

使用C++调用onnx模型

一、在VS中安装onnxruntime

在VS的"工具"中找到NuGet包管理器，直接搜索onnxruntime, 然后安装即可

二、配置好OpenCV环境

在OpenCV官网下载：https://opencv.org/releases/

下载后双击解压后，进入build文件夹，得到以下目录：

在VS中点击【项目】->【属性】->【VC++目录】添加外部包含目录和库目录

点击【链接器】->【输入】，添加附加依赖项：

三、编写C++代码

下面是一个简单的C++代码示例，展示了如何加载ONNX模型并进行推理：

需要把yolo11n.onnx文件和coco.names文件复制到C++项目目录中

cpp 复制代码

#include <onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <fstream>

using namespace cv;
using namespace std;


int main(int argc, char** argv)
{
	//cv::Mat frame = cv::imread("test3.png", 1);
	std::string onnxpath = "yolo11n.onnx";

	//载入标签
	std::vector<std::string> labels;
	std::ifstream inputFile("coco.names");
	if (inputFile.is_open())
	{
		std::string classLine;
		while (std::getline(inputFile, classLine))
			labels.push_back(classLine);
		inputFile.close();
	}

	// 打开摄像头
	cv::VideoCapture capture(0, CAP_DSHOW);
	capture.set(cv::CAP_PROP_FRAME_WIDTH, 640);//宽度 
	capture.set(cv::CAP_PROP_FRAME_HEIGHT, 480);//高度
	capture.set(cv::CAP_PROP_FPS, 30);//帧率 帧/秒
	if (!capture.isOpened())
	{
		std::cout << "无法打开摄像头" << std::endl;
		return -1;
	}

	while (true) {
		cv::Mat frame;
		capture >> frame; // 读取视频帧

		//step-3:load onnx model
		int ih = frame.rows;
		int iw = frame.cols;
		std::wstring modelPath = std::wstring(onnxpath.begin(), onnxpath.end());
		Ort::SessionOptions session_options = Ort::SessionOptions(); 
		Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "yolov11n"); // 创建ONNX Runtime的环境

		//std::cout << "onnxruntime inference try to use GPU Device" << std::endl;
		Ort::Session session_(env, modelPath.c_str(), session_options);// 创建 ONNX Runtime 会话

		std::vector<std::string> input_node_names;
		std::vector<std::string> output_node_names;

		size_t numInputNodes = session_.GetInputCount(); // 获取 ONNX 模型的输入节点数量
		size_t numOutputNodes = session_.GetOutputCount(); // 获取 ONNX 模型的输出节点数量
		Ort::AllocatorWithDefaultOptions allocator;  //创建一个默认的内存分配器对象，用于分配内存
		input_node_names.reserve(numInputNodes); //为 input_node_names 向量预留足够的空间，以避免在添加元素时频繁重新分配内存

		int input_w = 0;
		int input_h = 0;
		for (int i = 0; i < numInputNodes; i++) {
			//onnx newest version-1.14
			auto input_name = session_.GetInputNameAllocated(i, allocator); //获取第 i 个输入节点的名称，并分配内存。
			input_node_names.push_back(input_name.get());


			Ort::TypeInfo input_type_info = session_.GetInputTypeInfo(i); // 获取第 i 个输入节点的类型信息
			auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); //从类型信息中获取张量的类型和形状信息
			auto input_dims = input_tensor_info.GetShape(); // 获取输入张量的形状
			input_w = input_dims[3]; // 从输入张量的形状中提取宽度（即第 4 维）
			input_h = input_dims[2]; // 从输入张量的形状中提取高度（即第 3 维）
			//std::cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << std::endl;
		}
	

		//step-4:get output parameter
		int output_h = 0;
		int output_w = 0;
		// 获取输出节点的类型和形状信息
		Ort::TypeInfo output_type_info = session_.GetOutputTypeInfo(0); 
		auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
		auto output_dims = output_tensor_info.GetShape(); 
		output_h = output_dims[1];
		output_w = output_dims[2];
		//std::cout << "output format : HxW = " << output_dims[1] << "x" << output_dims[2] << std::endl;
		for (int i = 0; i < numOutputNodes; i++)
		{
			//onnx newest version-1.14
			auto out_name = session_.GetOutputNameAllocated(i, allocator);
			output_node_names.push_back(out_name.get());
		}
		

		//step-5:get infer result
		int64 start = cv::getTickCount();
		int w = frame.cols;
		int h = frame.rows;
		int _max = std::max(h, w);
		cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
		cv::Rect roi(0, 0, w, h);
		frame.copyTo(image(roi));

		// fix bug, boxes consistence!
		float x_factor = image.cols / static_cast<float>(input_w);
		float y_factor = image.rows / static_cast<float>(input_h);

		cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(input_w, input_h), cv::Scalar(0, 0, 0), true, false);
		// 输入图像的总像素数
		size_t tpixels = input_h * input_w * 3;
		// 定义输入张量的形状
		std::array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };

		// set input data and inference
		auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); // 创建一个内存分配器信息对象，用于指定 ONNX Runtime 的内存分配方式
		// 创建一个 ONNX Runtime 的输入张量对象
		Ort::Value input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size()); 
		const std::array<const char*, 1> inputNames = { input_node_names[0].c_str() }; // 定义输入节点的名称
		const std::array<const char*, 1> outNames = { output_node_names[0].c_str() }; // 定义输出节点的名称。
		std::vector<Ort::Value> ort_outputs; // 定义一个向量，用于存储 ONNX Runtime 的输出张量
		try {
			// 执行推理
			ort_outputs = session_.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
		}
		catch (std::exception e) {
			std::cout << e.what() << std::endl;
		}
		/*********** 后处理 **************/
		// output data
		const float* pdata = ort_outputs[0].GetTensorMutableData<float>(); // 获取 ONNX Runtime 推理输出的数据指针
		cv::Mat dout(output_h, output_w, CV_32F, (float*)pdata); // 将推理输出数据转换为 OpenCV 的 cv::Mat 对象
		// 对输出数据进行转置操作
		cv::Mat det_output = dout.t(); // 8400x84

		// post-process
		std::vector<cv::Rect> boxes;
		std::vector<int> classIds;
		std::vector<float> confidences;

		for (int i = 0; i < det_output.rows; i++) {
			cv::Mat classes_scores = det_output.row(i).colRange(4, 84);
			cv::Point classIdPoint;
			double score;
			// 找到类别分数的最大值及其位置
			minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);

			//between 0～1
			if (score > 0.25)
			{
				float cx = det_output.at<float>(i, 0);
				float cy = det_output.at<float>(i, 1);
				float ow = det_output.at<float>(i, 2);
				float oh = det_output.at<float>(i, 3);
				int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
				int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
				int width = static_cast<int>(ow * x_factor);
				int height = static_cast<int>(oh * y_factor);
				cv::Rect box;
				box.x = x;
				box.y = y;
				box.width = width;
				box.height = height;

				boxes.push_back(box);
				classIds.push_back(classIdPoint.x);
				confidences.push_back(score);
			}
		}
		
		// NMS 对检测框进行非极大值抑制（NMS）
		std::vector<int> indexes;
		cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
		
		for (size_t i = 0; i < indexes.size(); i++) {
			int index = indexes[i]; // 表示预测结果的索引
			int idx = classIds[index]; // 获取类别索引
			cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);
			cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),
				cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);

			
			std::string classString = labels[idx] + ' ' + std::to_string(confidences[index]).substr(0, 4);
			putText(frame, classString, cv::Point(boxes[index].tl().x, boxes[index].tl().y), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
			
			
		}
		
		
		
		//calculate FPS render it
		float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
		
		putText(frame, cv::format("FPS: %.2f", 1.0 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
		cv::imshow("YOLOv11 onnxrunning", frame);
		
		
		// 按下ESC键退出循环
		session_options.release();
		session_.release();

		if (cv::waitKey(1) == 27)
			break;
		
	}
	

	
	return 0;
}