opencv+ONNX模型的推理

如前文我们已经编译出带dnn模块的opencv,如果使用简易版的opencv虽然也可以加载onxx模型但是无法利用GPU进行并行计算,导致推理速度比较慢。所以既然都有这个RTX4060Ti的环境了,为什么不使用并行计算呢。

opencv加载的模型通常是ONXX模型,使用yolov8训练自己的数据集得到的模型为.pt,所以通常需要将.pt转为ONXX模型。

(这里为什么使用yolov8),应为我发现使用yolov5训练的模型opencv无法加载,或者报加载错误问题。而使用yolov8训练完成之后,直接导出ONXX模型可以直接使用,原因暂时没找到可能为版本问题。(待更新)

当然也可以使用ONXX的API加载ONXX模型,ONXX的api个人感觉比较通用,但是这里我没使用

这里我们的版本

PyTorch版本: 2.6.0+cu126

torchvision版本: 0.21.0+cu126

Python版本: 3.10.0 | packaged by conda-forge | (default, Nov 20 2021, 02:18:13) [MSC v.1916 64 bit (AMD64)]

pytorch去官网下载

https://pytorch.org/

我们的环境是cuda12.6

选择对应的版本

Torchvision 是 PyTorch 深度学习框架的一个重要组成部分,专门用于处理计算机视觉任务。它提供了一系列工具和预训练模型,以帮助开发者在图像处理和视觉识别领域中更有效地工作。

https://mirror.nju.edu.cn/pytorch/whl/cu126/torchvision/

torchvision-0.21.0+cu126-cp310-cp310-win_amd64.whl这个版本

导出模型python代码

下载yolov8代码,安装所需环境

cpp 复制代码
from ultralytics import YOLO
import ultralytics
print(ultralytics.__version__)
# 加载训练完成的基于YOLOv8s预训练的自己的模型
model = YOLO('best.pt')

# 导出为 ONNX 格式,指定 opset_version 和简化选项
model.export(format='onnx', dynamic=False, opset=12)  # 尝试使用特定的opset版本

C++推理

inference.h

cpp 复制代码
#ifndef INFERENCE_H
#define INFERENCE_H

// Cpp native
#include <fstream>
#include <vector>
#include <string>
#include <random>

// OpenCV / DNN / Inference
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
struct Detection
{
	int class_id{ 0 };
	std::string className{};
	float confidence{ 0.0 };
	cv::Scalar color{};
	cv::Rect box{};
};

class Inference
{
public:
	Inference(const std::string& onnxModelPath, const cv::Size& modelInputShape = { 640, 640 }, const std::string& classesTxtFile = "", const bool& runWithCuda = true);
	std::vector<Detection> runInference(const cv::Mat& input);

private:
	void loadClassesFromFile();
	void loadOnnxNetwork();
	cv::Mat formatToSquare(const cv::Mat& source);

	std::string modelPath{};
	std::string classesPath{};
	bool cudaEnabled{};

	//std::vector<std::string> classes{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" };

	std::vector<std::string> classes{ "fire" };
	cv::Size2f modelShape{};

	float modelConfidenceThreshold{ 0.25 };
	float modelScoreThreshold{ 0.45 };
	float modelNMSThreshold{ 0.50 };

	bool letterBoxForSquare = true;

	cv::dnn::Net net;
};

#endif // INFERENCE_H

inference.cpp

cpp 复制代码
#include "inference.h"

Inference::Inference(const std::string& onnxModelPath, const cv::Size& modelInputShape, const std::string& classesTxtFile, const bool& runWithCuda)
{
	modelPath = onnxModelPath;
	modelShape = modelInputShape;
	classesPath = classesTxtFile;
	cudaEnabled = runWithCuda;

	loadOnnxNetwork();
	// loadClassesFromFile(); The classes are hard-coded for this example
}

std::vector<Detection> Inference::runInference(const cv::Mat& input)
{
	cv::Mat modelInput = input;
	if (letterBoxForSquare && modelShape.width == modelShape.height)
		modelInput = formatToSquare(modelInput);

	cv::Mat blob;
	cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false);
	net.setInput(blob);

	std::vector<cv::Mat> outputs;
	net.forward(outputs, net.getUnconnectedOutLayersNames());

	int rows = outputs[0].size[1];
	int dimensions = outputs[0].size[2];

	bool yolov8 = false;
	// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
	// yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
	if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
	{
		yolov8 = true;
		rows = outputs[0].size[2];
		dimensions = outputs[0].size[1];

		outputs[0] = outputs[0].reshape(1, dimensions);
		cv::transpose(outputs[0], outputs[0]);
	}
	float* data = (float*)outputs[0].data;

	float x_factor = modelInput.cols / modelShape.width;
	float y_factor = modelInput.rows / modelShape.height;

	std::vector<int> class_ids;
	std::vector<float> confidences;
	std::vector<cv::Rect> boxes;

	for (int i = 0; i < rows; ++i)
	{
		if (yolov8)
		{
			float* classes_scores = data + 4;

			cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
			cv::Point class_id;
			double maxClassScore;

			minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);

			if (maxClassScore > modelScoreThreshold)
			{
				confidences.push_back(maxClassScore);
				class_ids.push_back(class_id.x);

				float x = data[0];
				float y = data[1];
				float w = data[2];
				float h = data[3];

				int left = int((x - 0.5 * w) * x_factor);
				int top = int((y - 0.5 * h) * y_factor);

				int width = int(w * x_factor);
				int height = int(h * y_factor);

				boxes.push_back(cv::Rect(left, top, width, height));
			}
		}
		else // yolov5
		{
			float confidence = data[4];

			if (confidence >= modelConfidenceThreshold)
			{
				float* classes_scores = data + 5;

				cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
				cv::Point class_id;
				double max_class_score;

				minMaxLoc(scores, 0, &max_class_score, 0, &class_id);

				if (max_class_score > modelScoreThreshold)
				{
					confidences.push_back(confidence);
					class_ids.push_back(class_id.x);

					float x = data[0];
					float y = data[1];
					float w = data[2];
					float h = data[3];

					int left = int((x - 0.5 * w) * x_factor);
					int top = int((y - 0.5 * h) * y_factor);

					int width = int(w * x_factor);
					int height = int(h * y_factor);

					boxes.push_back(cv::Rect(left, top, width, height));
				}
			}
		}

		data += dimensions;
	}

	std::vector<int> nms_result;
	cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);

	std::vector<Detection> detections{};
	for (unsigned long i = 0; i < nms_result.size(); ++i)
	{
		int idx = nms_result[i];

		Detection result;
		result.class_id = class_ids[idx];
		result.confidence = confidences[idx];

		std::random_device rd;
		std::mt19937 gen(rd());
		std::uniform_int_distribution<int> dis(100, 255);
		result.color = cv::Scalar(dis(gen),
			dis(gen),
			dis(gen));

		result.className = classes[result.class_id];
		result.box = boxes[idx];

		detections.push_back(result);
	}

	return detections;
}

void Inference::loadClassesFromFile()
{
	std::ifstream inputFile(classesPath);
	if (inputFile.is_open())
	{
		std::string classLine;
		while (std::getline(inputFile, classLine))
			classes.push_back(classLine);
		inputFile.close();
	}
}

void Inference::loadOnnxNetwork()
{
	//net = cv::dnn::readNetFromONNX(modelPath);
	//if (cudaEnabled)
	//{
	//	std::cout << "\nRunning on CUDA" << std::endl;
	//	net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
	//	net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	//}
	//else
	//{
	//	std::cout << "\nRunning on CPU" << std::endl;
	//	net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
	//	net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	//}

	net = cv::dnn::readNetFromONNX(modelPath);
	if (cudaEnabled)
	{
		std::cout << "\nRunning on CUDA" << std::endl;
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	}
	else
	{
		std::cout << "\nRunning on CPU" << std::endl;
		// 使用默认的CPU后端,避免指定为DNN_BACKEND_OPENCV可能导致的TBB依赖
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	}


}

cv::Mat Inference::formatToSquare(const cv::Mat& source)
{
	int col = source.cols;
	int row = source.rows;
	int _max = MAX(col, row);
	cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
	source.copyTo(result(cv::Rect(0, 0, col, row)));
	return result;
}

main.cpp

cpp 复制代码
#include <iostream>
#include <vector>
#include <opencv2/opencv.hpp>
#include "inference.h"
//#include <onnxruntime_cxx_api.h>
#include <iostream>
#include <windows.h>
using namespace std;
using namespace cv;

int main(int argc, char** argv)
{

	// 动态加载 cuBLAS DLL
	HMODULE hCuBLAS = LoadLibrary(TEXT("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\cublasLt64_12.dll"));
	if (hCuBLAS == NULL) {
		cout << "无法加载 cuBLAS DLL" << endl;
		return -1;
	}


	bool runOnGPU = true;

	// 1. 设置你的onnx模型
	// Note that in this example the classes are hard-coded and 'classes.txt' is a place holder.
	Inference  inf("D:/vsworksapce/yolo/x64/Debug/models/best.onnx", cv::Size(640, 640), "classes.txt", runOnGPU); // classes.txt 可以缺失

	// 2. 设置你的输入图片
	std::vector<std::string> imageNames;
	imageNames.push_back("D:/vsworksapce/yolo/x64/Debug/images/fire520.jpg");
	//imageNames.push_back("zidane.jpg");

	for (int i = 0; i < imageNames.size(); ++i)
	{
		cv::Mat frame = cv::imread(imageNames[i]);

		// Inference starts here...
		std::vector<Detection> output = inf.runInference(frame);

		int detections = output.size();
		std::cout << "Number of detections:" << detections << std::endl;

		// feiyull
		// 这里需要resize下,否则结果不对
		//cv::resize(frame, frame, cv::Size(640, 640));

		for (int i = 0; i < detections; ++i)
		{
			Detection detection = output[i];

			cv::Rect box = detection.box;
			cv::Scalar color = detection.color;

			// Detection box
			cv::rectangle(frame, box, color, 2);

			// Detection box text
			std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
			cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
			cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

			cv::rectangle(frame, textBox, color, cv::FILLED);
			cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
		}
		cv::imshow("Inference", frame);
		cv::waitKey(0);
		cv::destroyAllWindows();
	}
}

//std::vector<float> preprocessImage(const cv::Mat& img, const std::vector<int64_t>& inputDims) {
//	int width = inputDims[3];
//	int height = inputDims[2];
//
//	// 调整大小
//	cv::Mat resizedImg;
//	cv::resize(img, resizedImg, cv::Size(width, height));
//
//	// 归一化到 [0, 1]
//	resizedImg.convertTo(resizedImg, CV_32F, 1.0 / 255.0);
//
//	// HWC -> CHW 格式转换
//	std::vector<cv::Mat> channels(3);
//	cv::split(resizedImg, channels);
//
//	std::vector<float> inputData(inputDims[1] * inputDims[2] * inputDims[3]);
//	auto dataPtr = inputData.data();
//
//	for (int i = 0; i < 3; ++i) {
//		std::memcpy(dataPtr, channels[i].data, sizeof(float) * channels[i].total());
//		dataPtr += channels[i].total();
//	}
//
//	return inputData;
//}
 解析YOLOv8输出并绘制边界框
//void drawPredictions(cv::Mat& image, const std::vector<float>& output, float conf_threshold, float nms_threshold, int num_classes = 80) {
//	// 假设每个预测包含 [x_center, y_center, width, height, confidence, class_scores...]
//	int num_predictions = output.size() / (5 + num_classes);
//	std::vector<cv::Rect> boxes;
//	std::vector<float> confidences;
//	std::vector<int> classIds;
//
//	for (int i = 0; i < num_predictions; ++i) {
//		float confidence = output[i * (5 + num_classes) + 4];
//		if (confidence < conf_threshold)
//			continue;
//
//		// 获取最大类别分数及其索引
//		int classId = 0;
//		float max_class_score = 0.0f;
//		for (int j = 0; j < num_classes; ++j) {
//			float score = output[i * (5 + num_classes) + 5 + j];
//			if (score > max_class_score) {
//				max_class_score = score;
//				classId = j;
//			}
//		}
//
//		// 确保最终得分(置信度*类别分数)超过阈值
//		float final_confidence = confidence * max_class_score;
//		if (final_confidence < conf_threshold)
//			continue;
//
//		confidences.push_back(final_confidence);
//		classIds.push_back(classId);
//
//		float x_center = output[i * (5 + num_classes)] * image.cols;
//		float y_center = output[i * (5 + num_classes) + 1] * image.rows;
//		float width = output[i * (5 + num_classes) + 2] * image.cols;
//		float height = output[i * (5 + num_classes) + 3] * image.rows;
//
//		int left = static_cast<int>(x_center - width / 2);
//		int top = static_cast<int>(y_center - height / 2);
//
//		boxes.emplace_back(left, top, static_cast<int>(width), static_cast<int>(height));
//	}
//
//	std::vector<int> indices;
//	cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indices);
//
//	for (int idx : indices) {
//		cv::Rect box = boxes[idx];
//		cv::rectangle(image, box.tl(), box.br(), cv::Scalar(0, 255, 0), 2);
//		std::string label = cv::format("Class %d: %.2f", classIds[idx], confidences[idx]);
//		int baseLine = 0;
//		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
//		cv::rectangle(image, cv::Point(box.x, box.y - labelSize.height),
//			cv::Point(box.x + labelSize.width, box.y + baseLine),
//			cv::Scalar(255, 255, 255), cv::FILLED);
//		cv::putText(image, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar());
//	}
//}
//int main() {
//	// 创建环境
//	Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
//
//	// 创建会话选项
//	Ort::SessionOptions session_options;
//
//	// ONNX 模型路径
//	const wchar_t* model_path = L"D:/vsworksapce/yolo/x64/Debug/models/yolov8sbak.onnx";
//
//	// 创建会话
//	Ort::Session session(env, model_path, session_options);
//
//	// 打印模型输入输出信息
//	size_t num_inputs = session.GetInputCount();
//	size_t num_outputs = session.GetOutputCount();
//
//	// 使用 GetInputNameAllocated 和 GetOutputNameAllocated 获取输入输出名字
//	for (size_t i = 0; i < num_inputs; i++) {
//		Ort::AllocatedStringPtr input_name = session.GetInputNameAllocated(i, Ort::AllocatorWithDefaultOptions());
//		std::cout << "Input Name: " << input_name.get() << std::endl;
//	}
//
//	for (size_t i = 0; i < num_outputs; i++) {
//		Ort::AllocatedStringPtr output_name = session.GetOutputNameAllocated(i, Ort::AllocatorWithDefaultOptions());
//		std::cout << "Output Name: " << output_name.get() << std::endl;
//	}
//
//	// 读取输入图像
//	cv::Mat img = cv::imread("D:/vsworksapce/yolo/x64/Debug/images/bus.jpg");
//	if (img.empty()) {
//		std::cerr << "Could not open or find the image!" << std::endl;
//		return -1;
//	}
//
//	// 获取输入维度
//	auto input_node_dims = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
//	auto inputData = preprocessImage(img, input_node_dims);
//
//	// 创建输入张量
//	Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
//	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
//		inputData.data(),
//		inputData.size(),
//		input_node_dims.data(),
//		input_node_dims.size());
//
//	// 推理
//	std::vector<const char*> input_names(num_inputs);
//	std::vector<const char*> output_names(num_outputs);
//
//	for (size_t i = 0; i < num_inputs; i++) {
//		input_names[i] = session.GetInputNameAllocated(i, Ort::AllocatorWithDefaultOptions()).get();
//	}
//	for (size_t i = 0; i < num_outputs; i++) {
//		output_names[i] = session.GetOutputNameAllocated(i, Ort::AllocatorWithDefaultOptions()).get();
//	}
//
//	auto output_tensors = session.Run(Ort::RunOptions{ nullptr },
//		input_names.data(),
//		&input_tensor,
//		num_inputs,
//		output_names.data(),
//		num_outputs);
//
//	// 处理输出...
 处理输出...
//	for (size_t i = 0; i < num_outputs; i++) {
//		auto& output_tensor = output_tensors[i]; // 使用引用而不是复制
//		float* floatValues = output_tensor.GetTensorMutableData<float>();
//		size_t length = output_tensor.GetTensorTypeAndShapeInfo().GetElementCount();
//
//		std::cout << "Output tensor " << i << ": " << length << " elements" << std::endl;
//		// 这里可以添加你的后处理逻辑
//	}
//
//	// 处理解析输出...
//	for (size_t i = 0; i < output_tensors.size(); ++i) {
//		float* floatValues = output_tensors[i].GetTensorMutableData<float>();
//		std::vector<float> output_vector(floatValues, floatValues + output_tensors[i].GetTensorTypeAndShapeInfo().GetElementCount());
//
//		// 绘制预测结果
//		drawPredictions(img, output_vector, 0.5, 0.4); // 设置置信度阈值和NMS阈值
//	}
//
//
//	// 显示带有预测结果的图像
//	cv::imshow("Detection Results", img);
//	cv::waitKey(0);
//	return 0;
//}

环境C++17

相关推荐
墨利昂8 小时前
词向量:自然语言处理技术体系的核心基石
人工智能·自然语言处理
格林威8 小时前
可见光工业相机半导体制造领域中的应用
图像处理·人工智能·数码相机·计算机视觉·视觉检测·制造·工业相机
星期天要睡觉9 小时前
计算机视觉(opencv)——基于 MediaPipe 人体姿态检测
人工智能·opencv·计算机视觉
资讯全球9 小时前
2025机器人自动化打磨抛光设备及汽车零件打磨新技术10月应用解析
人工智能·机器人·自动化
数智前线9 小时前
京东零售的AI野心:为每个商家打造自己的“AI战队”
人工智能
Cl_rown去掉l变成C9 小时前
第N7周打卡:调用Gensim库训练Word2Vec模型
人工智能·自然语言处理·word2vec
腾讯云开发者10 小时前
腾讯云TVP走进美的,共探智能制造新范式
人工智能
一水鉴天10 小时前
整体设计 逻辑系统程序 之34七层网络的中台架构设计及链路对应讨论(含 CFR 规则与理 / 事代理界定)
人工智能·算法·公共逻辑
我星期八休息10 小时前
C++智能指针全面解析:原理、使用场景与最佳实践
java·大数据·开发语言·jvm·c++·人工智能·python
ECT-OS-JiuHuaShan10 小时前
《元推理框架技术白皮书》,人工智能领域的“杂交水稻“
人工智能·aigc·学习方法·量子计算·空间计算