使用c++推理yolov8的目标检测模型

文章目录

依赖库
导入onnx
cpp推理代码

依赖库

opencv4.7及以上

导入onnx

python 复制代码

yolo export model=./runs/detect/train2/weights/best.pt imgsz=640 format=onnx opset=12

cpp推理代码

cpp 复制代码

/*
yolov8目标检测推理
*/
#include<opencv.hpp>
#include<iostream>
#include<fstream>
#include<filesystem>
#include<random>


cv::Mat modify_image_size(const cv::Mat& img);


int main()
{
	namespace fs = std::filesystem;

	const char* onnx_file{ "./best.onnx" };
	constexpr int input_size[2]{ 640, 640 };
	double confidence_threshold = 0.5;
	double iou_threshold = 0.7;
	constexpr bool cuda_enabled{ false };
	std::vector<std::string> classesNames{ "back" };

	const std::string testPath("./027.png");
	// 分离出文件名
	std::string filename;
	if (std::filesystem::exists(std::filesystem::path(testPath)) == false)
	{
		std::cout << testPath << " is not exists." << std::endl;
		return -1;
	}
	else
	{
		filename = std::filesystem::path(testPath).filename().string();
		std::cout << "filename = " << filename << std::endl;
	}

	const std::string result_dir{ "./predictResult" };

	auto net = cv::dnn::readNetFromONNX(onnx_file);
	if (net.empty())
	{
		std::cerr << "Error: there are no layers in the network: " << onnx_file << std::endl;
		return -1;
	}
	if (cuda_enabled) {
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	}
	else {
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	}
	std::cout << "read model complete." << std::endl;

	if (!fs::exists(result_dir))
	{
		fs::create_directories(result_dir);
	}
#pragma region 预测
	cv::Mat frame = cv::imread(testPath, cv::IMREAD_COLOR);
	if (frame.empty())
	{
		std::cerr << "Warning: unable to load image: " << testPath << std::endl;
		return -1;
	}

	auto tstart = std::chrono::high_resolution_clock::now();
	cv::Mat bgr = modify_image_size(frame);
	std::cout << "bgr.size=" << bgr.size << std::endl;
#if 0
	cv::imshow("w", bgr);
	cv::waitKey();
#endif

	cv::Mat blob;
	cv::dnn::blobFromImage(bgr, blob, 1.0 / 255.0, cv::Size(input_size[1], input_size[0]), cv::Scalar(), true, false);
	net.setInput(blob);

	std::vector<cv::Mat> outputs;
	net.forward(outputs, net.getUnconnectedOutLayersNames());
	std::cout << "outputs.size = " << outputs.size() << std::endl;

	cv::Mat result = outputs[0];
	std::cout << "out0.size=" << result.size << std::endl;
#pragma endregion

	cv::Mat output0 = cv::Mat(outputs[0].size[1], outputs[0].size[2], CV_32FC1, outputs[0].data).t();
	std::cout << "output0.size=" << output0.size << std::endl;

#pragma region 后处理
	std::vector<int> class_ids;
	std::vector<float> confidences;
	std::vector<cv::Rect> boxes;

	float scalex = frame.cols * 1.f / input_size[1]; // note: image_preprocess function
	float scaley = frame.rows * 1.f / input_size[0];
	auto scale = (scalex > scaley) ? scalex : scaley;

	const float* data = (float*)output0.data;
	for (auto i = 0; i < output0.rows; ++i)
	{
		// 第5位是概率
		cv::Mat scores(1, classesNames.size(), CV_32FC1, (float*)data + 4);
		
		cv::Point class_id;		
		double max_class_score;

		//std::cout << scores << std::endl;
		cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
		//std::cout << max_class_score << ", " << class_id << std::endl;

		if (max_class_score > confidence_threshold) {
			confidences.emplace_back(max_class_score);
			class_ids.emplace_back(class_id.x);
			
			float x = data[0];
			float y = data[1];
			float w = data[2];
			float h = data[3];

			int left = std::max(0, std::min(int((x - 0.5 * w) * scale), frame.cols));
			int top = std::max(0, std::min(int((y - 0.5 * h) * scale), frame.rows));
			int width = std::max(0, std::min(int(w * scale), frame.cols - left));
			int height = std::max(0, std::min(int(h * scale), frame.rows - top));
			boxes.emplace_back(cv::Rect(left, top, width, height));
		}

		data += output0.cols;
	}
	std::vector<int> nms_result;
	cv::dnn::NMSBoxes(boxes, confidences, confidence_threshold, iou_threshold, nms_result);
	std::cout << nms_result.size() << std::endl;
#pragma endregion

	std::vector<int> ids;
	std::vector<float> confs;
	std::vector<cv::Rect> rects;
	for (size_t i = 0; i < nms_result.size(); ++i)
	{
		auto index = nms_result[i];

		ids.emplace_back(class_ids[index]);
		confs.emplace_back(confidences[index]);
		boxes[index] = boxes[index] & cv::Rect(0, 0, frame.cols, frame.rows);

		rects.emplace_back(boxes[index]);
	}

#pragma region 画图
	std::random_device rd;
	std::mt19937 gen(rd());
	std::uniform_int_distribution<int> dis(100, 255);
	std::vector<cv::Scalar> colors;
	for (auto i = 0; i < classesNames.size(); ++i)
	{
		colors.emplace_back(cv::Scalar(dis(gen), dis(gen), dis(gen)));
	}

	for (auto i = 0; i < ids.size(); ++i) 
	{
		cv::rectangle(frame, boxes[i], colors[ids[i]], 2);
		//cv::Rect rect = boxes[i]; // xywh

		std::string class_string = classesNames[ids[i]] + ' ' + std::to_string(confidences[i]).substr(0, 4);
		cv::Size text_size = cv::getTextSize(class_string, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
		cv::Rect text_box(boxes[i].x, boxes[i].y - 40, text_size.width + 10, text_size.height + 20);

		cv::rectangle(frame, text_box, colors[ids[i]], cv::FILLED);
		cv::putText(frame, class_string, cv::Point(boxes[i].x + 5, boxes[i].y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
	}

#if 1
	cv::imshow("Inference", frame);
	cv::waitKey(-1);
#endif
#pragma endregion

	return 0;
}


cv::Mat modify_image_size(const cv::Mat& img)
{
	auto max = std::max(img.rows, img.cols);
	cv::Mat ret = cv::Mat::zeros(max, max, CV_8UC3);
	img.copyTo(ret(cv::Rect(0, 0, img.cols, img.rows)));

	return ret;
}