基于OpenCV的YOLOv5图片检测

利用OpenCV的DNN模块加载onnx模型文件进行图片检测。

1、使用的yolov5工程代码，调用export.py导出onnx模型。

2、下载opencv版本，https://opencv.org/releases/

使用opencv版本4.5.3或以上，本文使用的opencv4.6.0

3、使用vc2015编写使用代码。

复制代码

// dnnUseOnnx.cpp : 定义控制台应用程序的入口点。

#include <fstream>
#include <iostream>
#include <string>
#include <map>
#include <opencv2/opencv.hpp>

struct DetectResult
{
	int classId;
	float score;
	cv::Rect box;
};

class YOLOv5Detector
{
public:
	void initConfig(std::string onnxpath, int iw, int ih, float threshold, bool bIsEnableCuda);
	void detect(cv::Mat& frame, std::vector<DetectResult>& result);

private:
	int input_w = 640;
	int input_h = 640;
	cv::dnn::Net net;
	int threshold_score = 0.25;
};

void YOLOv5Detector::initConfig(std::string onnxpath, int iw, int ih, float threshold, bool bIsEnableCuda)
{
	this->input_w = iw;
	this->input_h = ih;
	this->threshold_score = threshold;
	try
	{
		this->net = cv::dnn::readNetFromONNX(onnxpath);

		//依据情况选定是否使用CUDA
		if (bIsEnableCuda)
		{
			std::cout << "Attempty to use CUDA\n";
			net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
			net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
		}
		else
		{
			std::cout << "Running on CPU\n";
			net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
			net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
		}
	}
	catch (cv::Exception & e) {
		printf("exception %s\n", e.err.c_str());
	}
}

void YOLOv5Detector::detect(cv::Mat& frame, std::vector<DetectResult>& results)
{
	// 图象预处理 - 格式化操作
	int w = frame.cols;
	int h = frame.rows;
	int _max = std::max(h, w);
	cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
	if (frame.channels() == 1)
	{
		cv::cvtColor(frame, frame, cv::COLOR_GRAY2BGR);
	}
	cv::Rect roi(0, 0, w, h);
	frame.copyTo(image(roi));

	float x_factor = image.cols / 640.0f;
	float y_factor = image.rows / 640.0f;

	cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(this->input_w, this->input_h), cv::Scalar(0, 0, 0),
		true, false);
	this->net.setInput(blob);
	cv::Mat preds = this->net.forward("output0");//outputname，使用Netron看一下输出的名字，一般为output0或者output
	//如果preds里有Mat的维数大于2，那么设断点调试的时候，可以看到rows和cols都等于-1，当Mat的dims>2时，想要访问Mat的高和宽，可以通过size属性获取。如下：
	printf("output：%d,%d,%d\n", preds.size[0], preds.size[1], preds.size[2]);//打印输出：output：1,25200,85
	//YOLOV5的输出1,25200,85如何理解和解析
	//1、25200代表着检测框的数量，比如我们取出第一个检测框a，也就是[1,1，85]，取出来之后我们解析85，前五个为box的中点坐标、长宽值以及置信，后面80我们取Max（80个类别）中最大值，类别的处于多少行对应于label class.txt别中的类是哪一类别。
	cv::Mat det_output(preds.size[1], preds.size[2], CV_32F, preds.ptr<float>());

	float confidence_threshold = 0.5;
	std::vector<cv::Rect> boxes;
	boxes.clear();
	std::vector<int> classIds;
	classIds.clear();
	std::vector<float> confidences;
	confidences.clear();
	for (int i = 0; i < det_output.rows; i++)
	{
		float confidence = det_output.at<float>(i, 4);
		if (confidence < 0.45)
		{
			continue;
		}
		cv::Mat classes_scores = det_output.row(i).colRange(5, preds.size[2]);//colRange(5, num_class);num_class:最大分类数
		cv::Point classIdPoint;
		double score;
		minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);

		// 置信度 0～1之间
		if (score > this->threshold_score)
		{
			float cx = det_output.at<float>(i, 0);
			float cy = det_output.at<float>(i, 1);
			float ow = det_output.at<float>(i, 2);
			float oh = det_output.at<float>(i, 3);
			int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
			int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
			int width = static_cast<int>(ow * x_factor);
			int height = static_cast<int>(oh * y_factor);
			cv::Rect box;
			box.x = x;
			box.y = y;
			box.width = width;
			box.height = height;

			boxes.push_back(box);
			classIds.push_back(classIdPoint.x);
			confidences.push_back(score * confidence);
		}
	}

	// NMS
	std::vector<int> indexes;
	cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
	for (size_t i = 0; i < indexes.size(); i++)
	{
		DetectResult dr;
		int index = indexes[i];
		int idx = classIds[index];
		dr.box = boxes[index];
		dr.classId = idx;
		dr.score = confidences[index];
		cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);
		cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),
			cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
		results.push_back(dr);
	}


	std::ostringstream ss;
	std::vector<double> layersTimings;
	double freq = cv::getTickFrequency() / 1000.0;
	double time = net.getPerfProfile(layersTimings) / freq;
	ss << "FPS: " << 1000 / time << " ; time : " << time << " ms";
	putText(frame, ss.str(), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
}

std::map<int, std::string> classNames = { { 0, "person" },{ 1, "bicycle" },{ 2, "car" },{ 3, "motorcycle" } ,{ 4, "airplane" } ,{ 5, "bus" },{ 6, "train" },{ 7, "truck" },{ 8, "boat" },{ 9, "traffic light" },
{ 10, "fire hydrant" },{ 11, "stop sign'" },{ 12, "parking meter" },{ 13, "bench" } ,{ 14, "bird" } ,{ 15, "cat" },{ 16, "dog" },{ 17, "horse" },{ 18, "sheep" },{ 19, "cow" }, 
{ 20, "elephant" },{ 21, "bear" },{ 22, "zebra" },{ 23, "giraffe" } ,{ 24, "backpack" } ,{ 25, "umbrella" },{ 26, "handbag" },{ 27, "tie" },{ 28, "suitcase" },{ 29, "frisbee" },
{ 30, "skis" },{ 31, "snowboard" },{ 32, "sports ball" },{ 33, "kite" } ,{ 34, "baseball bat" } ,{ 35, "baseball glove" },{ 36, "skateboard" },{ 37, "surfboard" },{ 38, "tennis racket" },{ 39, "bottle" },
{ 40, "wine glass" },{ 41, "cup" },{ 42, "fork" },{ 43, "knife" } ,{ 44, "spoon" } ,{ 45, "bowl" },{ 46, "banana" },{ 47, "apple" },{ 48, "sandwich" },{ 49, "orange" },
{ 50, "broccoli" },{ 51, "carrot" },{ 52, "hot dog" },{ 53, "pizza" } ,{ 54, "donut" } ,{ 55, "cake" },{ 56, "chair" },{ 57, "couch" },{ 58, "potted plant" },{ 59, "bed" },
{ 60, "dining table" },{ 61, "toilet" },{ 62, "tv" },{ 63, "laptop" } ,{ 64, "mouse" } ,{ 65, "remote" },{ 66, "keyboard" },{ 67, "cell phone" },{ 68, "microwave" },{ 69, "oven" },
{ 70, "toaster" },{ 71, "sink" },{ 72, "refrigerator" },{ 73, "book" } ,{ 74, "clock" } ,{ 75, "vase" },{ 76, "scissors" },{ 77, "teddy bear" },{ 78, "hair drier" },{ 79, "toothbrush" }
};

int main(int argc, char* argv[])
{
	std::shared_ptr<YOLOv5Detector> detector = std::make_shared<YOLOv5Detector>();
	detector->initConfig(R"(D:\python-project\yolov5\yolov5s.onnx)", 640, 640, 0.25f, false);

	cv::Mat frame = cv::imread(R"(D:\python-project\yolov5\data\images\bus.jpg)");

	std::vector<DetectResult> results;
	detector->detect(frame, results);
	for (DetectResult& dr : results)
	{
		cv::Rect box = dr.box;
		cv::putText(frame, classNames[dr.classId]+ " "+ std::to_string(dr.score), cv::Point(box.tl().x, box.tl().y - 10), cv::FONT_HERSHEY_SIMPLEX,
			.5, cv::Scalar(0, 0, 0));
	}
	cv::imshow("OpenCV-DNN-yolov5", frame);
	cv::waitKey();
	results.clear();
}

运行效果：

注意事项：

1）、readNetFromONNX加载onnx模型出错。 interp_mode != "asymmetric"，这个错误信息表明你在使用OpenCV的readNetFromONNX函数加载ONNX模型时，模型中的某些节点的插值模式（interp_mode）不是"asymmetric"。

解决方法：使用opencv版本4.5.3或以上，本文使用的opencv4.6.0

2）、监测到目标比yolov5工程detect.py推理出来的目标少。

解决方法：det_output.row(i).colRange(5, num_class);num_class:最大分类数

复制代码

cv::Mat classes_scores = det_output.row(i).colRange(5, preds.size[2]);