opencv+ONNX模型的推理

如前文我们已经编译出带dnn模块的opencv,如果使用简易版的opencv虽然也可以加载onxx模型但是无法利用GPU进行并行计算,导致推理速度比较慢。所以既然都有这个RTX4060Ti的环境了,为什么不使用并行计算呢。

opencv加载的模型通常是ONXX模型,使用yolov8训练自己的数据集得到的模型为.pt,所以通常需要将.pt转为ONXX模型。

(这里为什么使用yolov8),应为我发现使用yolov5训练的模型opencv无法加载,或者报加载错误问题。而使用yolov8训练完成之后,直接导出ONXX模型可以直接使用,原因暂时没找到可能为版本问题。(待更新)

当然也可以使用ONXX的API加载ONXX模型,ONXX的api个人感觉比较通用,但是这里我没使用

这里我们的版本

PyTorch版本: 2.6.0+cu126

torchvision版本: 0.21.0+cu126

Python版本: 3.10.0 | packaged by conda-forge | (default, Nov 20 2021, 02:18:13) [MSC v.1916 64 bit (AMD64)]

pytorch去官网下载

https://pytorch.org/

我们的环境是cuda12.6

选择对应的版本

Torchvision 是 PyTorch 深度学习框架的一个重要组成部分,专门用于处理计算机视觉任务。它提供了一系列工具和预训练模型,以帮助开发者在图像处理和视觉识别领域中更有效地工作。

https://mirror.nju.edu.cn/pytorch/whl/cu126/torchvision/

torchvision-0.21.0+cu126-cp310-cp310-win_amd64.whl这个版本

导出模型python代码

下载yolov8代码,安装所需环境

cpp 复制代码
from ultralytics import YOLO
import ultralytics
print(ultralytics.__version__)
# 加载训练完成的基于YOLOv8s预训练的自己的模型
model = YOLO('best.pt')

# 导出为 ONNX 格式,指定 opset_version 和简化选项
model.export(format='onnx', dynamic=False, opset=12)  # 尝试使用特定的opset版本

C++推理

inference.h

cpp 复制代码
#ifndef INFERENCE_H
#define INFERENCE_H

// Cpp native
#include <fstream>
#include <vector>
#include <string>
#include <random>

// OpenCV / DNN / Inference
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
struct Detection
{
	int class_id{ 0 };
	std::string className{};
	float confidence{ 0.0 };
	cv::Scalar color{};
	cv::Rect box{};
};

class Inference
{
public:
	Inference(const std::string& onnxModelPath, const cv::Size& modelInputShape = { 640, 640 }, const std::string& classesTxtFile = "", const bool& runWithCuda = true);
	std::vector<Detection> runInference(const cv::Mat& input);

private:
	void loadClassesFromFile();
	void loadOnnxNetwork();
	cv::Mat formatToSquare(const cv::Mat& source);

	std::string modelPath{};
	std::string classesPath{};
	bool cudaEnabled{};

	//std::vector<std::string> classes{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" };

	std::vector<std::string> classes{ "fire" };
	cv::Size2f modelShape{};

	float modelConfidenceThreshold{ 0.25 };
	float modelScoreThreshold{ 0.45 };
	float modelNMSThreshold{ 0.50 };

	bool letterBoxForSquare = true;

	cv::dnn::Net net;
};

#endif // INFERENCE_H

inference.cpp

cpp 复制代码
#include "inference.h"

Inference::Inference(const std::string& onnxModelPath, const cv::Size& modelInputShape, const std::string& classesTxtFile, const bool& runWithCuda)
{
	modelPath = onnxModelPath;
	modelShape = modelInputShape;
	classesPath = classesTxtFile;
	cudaEnabled = runWithCuda;

	loadOnnxNetwork();
	// loadClassesFromFile(); The classes are hard-coded for this example
}

std::vector<Detection> Inference::runInference(const cv::Mat& input)
{
	cv::Mat modelInput = input;
	if (letterBoxForSquare && modelShape.width == modelShape.height)
		modelInput = formatToSquare(modelInput);

	cv::Mat blob;
	cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false);
	net.setInput(blob);

	std::vector<cv::Mat> outputs;
	net.forward(outputs, net.getUnconnectedOutLayersNames());

	int rows = outputs[0].size[1];
	int dimensions = outputs[0].size[2];

	bool yolov8 = false;
	// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
	// yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
	if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
	{
		yolov8 = true;
		rows = outputs[0].size[2];
		dimensions = outputs[0].size[1];

		outputs[0] = outputs[0].reshape(1, dimensions);
		cv::transpose(outputs[0], outputs[0]);
	}
	float* data = (float*)outputs[0].data;

	float x_factor = modelInput.cols / modelShape.width;
	float y_factor = modelInput.rows / modelShape.height;

	std::vector<int> class_ids;
	std::vector<float> confidences;
	std::vector<cv::Rect> boxes;

	for (int i = 0; i < rows; ++i)
	{
		if (yolov8)
		{
			float* classes_scores = data + 4;

			cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
			cv::Point class_id;
			double maxClassScore;

			minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);

			if (maxClassScore > modelScoreThreshold)
			{
				confidences.push_back(maxClassScore);
				class_ids.push_back(class_id.x);

				float x = data[0];
				float y = data[1];
				float w = data[2];
				float h = data[3];

				int left = int((x - 0.5 * w) * x_factor);
				int top = int((y - 0.5 * h) * y_factor);

				int width = int(w * x_factor);
				int height = int(h * y_factor);

				boxes.push_back(cv::Rect(left, top, width, height));
			}
		}
		else // yolov5
		{
			float confidence = data[4];

			if (confidence >= modelConfidenceThreshold)
			{
				float* classes_scores = data + 5;

				cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
				cv::Point class_id;
				double max_class_score;

				minMaxLoc(scores, 0, &max_class_score, 0, &class_id);

				if (max_class_score > modelScoreThreshold)
				{
					confidences.push_back(confidence);
					class_ids.push_back(class_id.x);

					float x = data[0];
					float y = data[1];
					float w = data[2];
					float h = data[3];

					int left = int((x - 0.5 * w) * x_factor);
					int top = int((y - 0.5 * h) * y_factor);

					int width = int(w * x_factor);
					int height = int(h * y_factor);

					boxes.push_back(cv::Rect(left, top, width, height));
				}
			}
		}

		data += dimensions;
	}

	std::vector<int> nms_result;
	cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);

	std::vector<Detection> detections{};
	for (unsigned long i = 0; i < nms_result.size(); ++i)
	{
		int idx = nms_result[i];

		Detection result;
		result.class_id = class_ids[idx];
		result.confidence = confidences[idx];

		std::random_device rd;
		std::mt19937 gen(rd());
		std::uniform_int_distribution<int> dis(100, 255);
		result.color = cv::Scalar(dis(gen),
			dis(gen),
			dis(gen));

		result.className = classes[result.class_id];
		result.box = boxes[idx];

		detections.push_back(result);
	}

	return detections;
}

void Inference::loadClassesFromFile()
{
	std::ifstream inputFile(classesPath);
	if (inputFile.is_open())
	{
		std::string classLine;
		while (std::getline(inputFile, classLine))
			classes.push_back(classLine);
		inputFile.close();
	}
}

void Inference::loadOnnxNetwork()
{
	//net = cv::dnn::readNetFromONNX(modelPath);
	//if (cudaEnabled)
	//{
	//	std::cout << "\nRunning on CUDA" << std::endl;
	//	net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
	//	net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	//}
	//else
	//{
	//	std::cout << "\nRunning on CPU" << std::endl;
	//	net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
	//	net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	//}

	net = cv::dnn::readNetFromONNX(modelPath);
	if (cudaEnabled)
	{
		std::cout << "\nRunning on CUDA" << std::endl;
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	}
	else
	{
		std::cout << "\nRunning on CPU" << std::endl;
		// 使用默认的CPU后端,避免指定为DNN_BACKEND_OPENCV可能导致的TBB依赖
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	}


}

cv::Mat Inference::formatToSquare(const cv::Mat& source)
{
	int col = source.cols;
	int row = source.rows;
	int _max = MAX(col, row);
	cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
	source.copyTo(result(cv::Rect(0, 0, col, row)));
	return result;
}

main.cpp

cpp 复制代码
#include <iostream>
#include <vector>
#include <opencv2/opencv.hpp>
#include "inference.h"
//#include <onnxruntime_cxx_api.h>
#include <iostream>
#include <windows.h>
using namespace std;
using namespace cv;

int main(int argc, char** argv)
{

	// 动态加载 cuBLAS DLL
	HMODULE hCuBLAS = LoadLibrary(TEXT("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\cublasLt64_12.dll"));
	if (hCuBLAS == NULL) {
		cout << "无法加载 cuBLAS DLL" << endl;
		return -1;
	}


	bool runOnGPU = true;

	// 1. 设置你的onnx模型
	// Note that in this example the classes are hard-coded and 'classes.txt' is a place holder.
	Inference  inf("D:/vsworksapce/yolo/x64/Debug/models/best.onnx", cv::Size(640, 640), "classes.txt", runOnGPU); // classes.txt 可以缺失

	// 2. 设置你的输入图片
	std::vector<std::string> imageNames;
	imageNames.push_back("D:/vsworksapce/yolo/x64/Debug/images/fire520.jpg");
	//imageNames.push_back("zidane.jpg");

	for (int i = 0; i < imageNames.size(); ++i)
	{
		cv::Mat frame = cv::imread(imageNames[i]);

		// Inference starts here...
		std::vector<Detection> output = inf.runInference(frame);

		int detections = output.size();
		std::cout << "Number of detections:" << detections << std::endl;

		// feiyull
		// 这里需要resize下,否则结果不对
		//cv::resize(frame, frame, cv::Size(640, 640));

		for (int i = 0; i < detections; ++i)
		{
			Detection detection = output[i];

			cv::Rect box = detection.box;
			cv::Scalar color = detection.color;

			// Detection box
			cv::rectangle(frame, box, color, 2);

			// Detection box text
			std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
			cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
			cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

			cv::rectangle(frame, textBox, color, cv::FILLED);
			cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
		}
		cv::imshow("Inference", frame);
		cv::waitKey(0);
		cv::destroyAllWindows();
	}
}

//std::vector<float> preprocessImage(const cv::Mat& img, const std::vector<int64_t>& inputDims) {
//	int width = inputDims[3];
//	int height = inputDims[2];
//
//	// 调整大小
//	cv::Mat resizedImg;
//	cv::resize(img, resizedImg, cv::Size(width, height));
//
//	// 归一化到 [0, 1]
//	resizedImg.convertTo(resizedImg, CV_32F, 1.0 / 255.0);
//
//	// HWC -> CHW 格式转换
//	std::vector<cv::Mat> channels(3);
//	cv::split(resizedImg, channels);
//
//	std::vector<float> inputData(inputDims[1] * inputDims[2] * inputDims[3]);
//	auto dataPtr = inputData.data();
//
//	for (int i = 0; i < 3; ++i) {
//		std::memcpy(dataPtr, channels[i].data, sizeof(float) * channels[i].total());
//		dataPtr += channels[i].total();
//	}
//
//	return inputData;
//}
 解析YOLOv8输出并绘制边界框
//void drawPredictions(cv::Mat& image, const std::vector<float>& output, float conf_threshold, float nms_threshold, int num_classes = 80) {
//	// 假设每个预测包含 [x_center, y_center, width, height, confidence, class_scores...]
//	int num_predictions = output.size() / (5 + num_classes);
//	std::vector<cv::Rect> boxes;
//	std::vector<float> confidences;
//	std::vector<int> classIds;
//
//	for (int i = 0; i < num_predictions; ++i) {
//		float confidence = output[i * (5 + num_classes) + 4];
//		if (confidence < conf_threshold)
//			continue;
//
//		// 获取最大类别分数及其索引
//		int classId = 0;
//		float max_class_score = 0.0f;
//		for (int j = 0; j < num_classes; ++j) {
//			float score = output[i * (5 + num_classes) + 5 + j];
//			if (score > max_class_score) {
//				max_class_score = score;
//				classId = j;
//			}
//		}
//
//		// 确保最终得分(置信度*类别分数)超过阈值
//		float final_confidence = confidence * max_class_score;
//		if (final_confidence < conf_threshold)
//			continue;
//
//		confidences.push_back(final_confidence);
//		classIds.push_back(classId);
//
//		float x_center = output[i * (5 + num_classes)] * image.cols;
//		float y_center = output[i * (5 + num_classes) + 1] * image.rows;
//		float width = output[i * (5 + num_classes) + 2] * image.cols;
//		float height = output[i * (5 + num_classes) + 3] * image.rows;
//
//		int left = static_cast<int>(x_center - width / 2);
//		int top = static_cast<int>(y_center - height / 2);
//
//		boxes.emplace_back(left, top, static_cast<int>(width), static_cast<int>(height));
//	}
//
//	std::vector<int> indices;
//	cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indices);
//
//	for (int idx : indices) {
//		cv::Rect box = boxes[idx];
//		cv::rectangle(image, box.tl(), box.br(), cv::Scalar(0, 255, 0), 2);
//		std::string label = cv::format("Class %d: %.2f", classIds[idx], confidences[idx]);
//		int baseLine = 0;
//		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
//		cv::rectangle(image, cv::Point(box.x, box.y - labelSize.height),
//			cv::Point(box.x + labelSize.width, box.y + baseLine),
//			cv::Scalar(255, 255, 255), cv::FILLED);
//		cv::putText(image, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar());
//	}
//}
//int main() {
//	// 创建环境
//	Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
//
//	// 创建会话选项
//	Ort::SessionOptions session_options;
//
//	// ONNX 模型路径
//	const wchar_t* model_path = L"D:/vsworksapce/yolo/x64/Debug/models/yolov8sbak.onnx";
//
//	// 创建会话
//	Ort::Session session(env, model_path, session_options);
//
//	// 打印模型输入输出信息
//	size_t num_inputs = session.GetInputCount();
//	size_t num_outputs = session.GetOutputCount();
//
//	// 使用 GetInputNameAllocated 和 GetOutputNameAllocated 获取输入输出名字
//	for (size_t i = 0; i < num_inputs; i++) {
//		Ort::AllocatedStringPtr input_name = session.GetInputNameAllocated(i, Ort::AllocatorWithDefaultOptions());
//		std::cout << "Input Name: " << input_name.get() << std::endl;
//	}
//
//	for (size_t i = 0; i < num_outputs; i++) {
//		Ort::AllocatedStringPtr output_name = session.GetOutputNameAllocated(i, Ort::AllocatorWithDefaultOptions());
//		std::cout << "Output Name: " << output_name.get() << std::endl;
//	}
//
//	// 读取输入图像
//	cv::Mat img = cv::imread("D:/vsworksapce/yolo/x64/Debug/images/bus.jpg");
//	if (img.empty()) {
//		std::cerr << "Could not open or find the image!" << std::endl;
//		return -1;
//	}
//
//	// 获取输入维度
//	auto input_node_dims = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
//	auto inputData = preprocessImage(img, input_node_dims);
//
//	// 创建输入张量
//	Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
//	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
//		inputData.data(),
//		inputData.size(),
//		input_node_dims.data(),
//		input_node_dims.size());
//
//	// 推理
//	std::vector<const char*> input_names(num_inputs);
//	std::vector<const char*> output_names(num_outputs);
//
//	for (size_t i = 0; i < num_inputs; i++) {
//		input_names[i] = session.GetInputNameAllocated(i, Ort::AllocatorWithDefaultOptions()).get();
//	}
//	for (size_t i = 0; i < num_outputs; i++) {
//		output_names[i] = session.GetOutputNameAllocated(i, Ort::AllocatorWithDefaultOptions()).get();
//	}
//
//	auto output_tensors = session.Run(Ort::RunOptions{ nullptr },
//		input_names.data(),
//		&input_tensor,
//		num_inputs,
//		output_names.data(),
//		num_outputs);
//
//	// 处理输出...
 处理输出...
//	for (size_t i = 0; i < num_outputs; i++) {
//		auto& output_tensor = output_tensors[i]; // 使用引用而不是复制
//		float* floatValues = output_tensor.GetTensorMutableData<float>();
//		size_t length = output_tensor.GetTensorTypeAndShapeInfo().GetElementCount();
//
//		std::cout << "Output tensor " << i << ": " << length << " elements" << std::endl;
//		// 这里可以添加你的后处理逻辑
//	}
//
//	// 处理解析输出...
//	for (size_t i = 0; i < output_tensors.size(); ++i) {
//		float* floatValues = output_tensors[i].GetTensorMutableData<float>();
//		std::vector<float> output_vector(floatValues, floatValues + output_tensors[i].GetTensorTypeAndShapeInfo().GetElementCount());
//
//		// 绘制预测结果
//		drawPredictions(img, output_vector, 0.5, 0.4); // 设置置信度阈值和NMS阈值
//	}
//
//
//	// 显示带有预测结果的图像
//	cv::imshow("Detection Results", img);
//	cv::waitKey(0);
//	return 0;
//}

环境C++17

相关推荐
Sunhen_Qiletian5 分钟前
深度学习之模型的部署、web框架 服务端及客户端案例
人工智能·深度学习
分享牛6 分钟前
下一代BPMN
人工智能·语言模型·流程图
田里的水稻28 分钟前
FA_规划和控制(PC)-规律路图法(PRM)
人工智能·算法·机器学习·机器人·自动驾驶
AI周红伟29 分钟前
周红伟:具身机器人大爆炸了,机器人时代来临
大数据·人工智能·机器人·大模型·智能体·seedance
weixin_4462608530 分钟前
[特殊字符] 学习大型语言模型的实用指南 - 《Hands-On Large Language Models》
人工智能
yuezhilangniao36 分钟前
AI智能体AI开发「核心概念」速查手册
人工智能
LaughingZhu1 小时前
Product Hunt 每日热榜 | 2026-02-15
人工智能·经验分享·深度学习·神经网络·产品运营
带娃的IT创业者1 小时前
解密OpenClaw系列10-OpenClaw系统要求
人工智能·macos·ios·objective-c·ai智能体·智能体开发·openclaw
志栋智能1 小时前
AI驱动的数据库自动化巡检:捍卫数据王国的“智能中枢”
大数据·运维·数据库·人工智能·云原生·自动化
黑巧克力可减脂1 小时前
Vibe Coding技术方案选型:循道而行,择善而用——从古典智慧看AI编程范式的选型之道
人工智能·语言模型·软件工程·ai编程