目标检测:yolov7算法在RK3588上部署

YOLOv7是YOLO（You Only Look Once）系列实时目标检测算法的重要里程碑，由WongKinYiu团队在2022年提出。作为单阶段目标检测器的杰出代表，YOLOv7在保持高速推理的同时，显著提升了检测精度，成为工业界和学术界广泛应用的先进模型。

核心架构创新

YOLOv7采用精心设计的网络架构，引入扩展高效层聚合网络（E-ELAN），通过分组卷积、特征重排列等策略优化特征提取能力。该结构在保持梯度路径完整性的同时，增强了不同特征层的信息融合效率。模型还采用了基于串联的模型缩放方法，能够智能平衡计算复杂度与特征表达能力。

训练策略突破

YOLOv7提出了一系列创新的训练技术：

计划重参数化卷积：将训练时的多分支结构与推理时的单一分支巧妙结合，提升性能而不增加推理时间
辅助头训练：在中间层添加辅助检测头，加强梯度传播和特征学习
标签分配策略：改进的软标签分配方法，提供更精确的监督信号

性能表现卓越

在COCO数据集测试中，YOLOv7展现出色性能：在30FPS实时推理条件下达到56.8% AP，超越同规模YOLO版本和Transformer-based检测器。模型提供多种规格（YOLOv7-tiny到YOLOv7-W6），满足从移动端到服务器端的多样化部署需求。

应用优势明显

YOLOv7支持目标检测、实例分割、姿态估计等多任务，具有出色的工程实用性。其优化的计算效率使模型在边缘设备上也能流畅运行，为自动驾驶、视频监控、工业质检等领域提供了可靠的视觉解决方案。开源社区的活跃维护和持续优化，进一步巩固了YOLOv7在实时目标检测领域的领先地位。

总体而言，YOLOv7通过架构创新与训练策略的完美结合，在速度-精度权衡方面达到了新的高度，为实际应用场景提供了强有力的技术支撑。

github网址:

WongKinYiu/yolov7: Implementation of paper - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors (github.com)https://github.com/WongKinYiu/yolov7?tab=readme-ov-file

onnx转RKNN

复制代码

import cv2
import numpy as np

from rknn.api import RKNN
import os

if __name__ == '__main__':

    isQUANT = True  # True 

    platform = 'rk3588'
    NAME = "yolov7"
    ONNX_DIR = "./"
    OUT_DIR = "./"
    Width = 640
    Height = 640
    MODEL_PATH =  ONNX_DIR + NAME + '.onnx'
    RKNN_NAME = NAME
    if isQUANT:
        RKNN_NAME = NAME + '_quant'
    else:
        RKNN_NAME = NAME + '_FP16'
    
    NEED_BUILD_MODEL = True
    DATASET = './dataset.txt'
    im_file = '/home/zdl/rknn_project/rtdetrv2/d1/t_15_00083.bmp'

    # Create RKNN object
    rknn = RKNN(verbose=False,verbose_file="./outputlog.txt")


    RKNN_MODEL_PATH = './{}/{}.rknn'.format(OUT_DIR,RKNN_NAME)
    if NEED_BUILD_MODEL:
        
        # rknn.config(mean_values=[[0,0,0]], std_values=[[1,1,1]], target_platform=platform,disable_rules=['fuse_two_gather'] )
        rknn.config(mean_values=[[0,0,0]], std_values=[[255.0,255.0,255.0]],target_platform=platform)
        # rknn.config(mean_values=[[0]], std_values=[[255]],target_platform=platform)
        # Load model
        print('--> Loading model')
        ret = rknn.load_onnx(MODEL_PATH,outputs=["onnx::Reshape_491","onnx::Reshape_505","onnx::Reshape_519"])
        # ret = rknn.load_onnx(MODEL_PATH)
        if ret != 0:
            print('load model failed!')
            exit(ret)
        print('done')

        # Build model
        print('--> Building model')
        ret = rknn.build(do_quantization=isQUANT, dataset=DATASET)
        # rknn.accuracy_analysis(inputs=["./errors/2.jpg","./errors/3.jpg"],
        #                             output_dir='snapshot')

        # ret = rknn.accuracy_analysis(inputs=[im_file],output_dir='snapshot')
        if ret != 0:
            print('build model failed.')
            exit(ret)
        print('done')

        # Export rknn model
        if not os.path.exists(OUT_DIR):
            os.mkdir(OUT_DIR)
        print('--> Export RKNN model: {}'.format(RKNN_MODEL_PATH))
        ret = rknn.export_rknn(RKNN_MODEL_PATH)
        if ret != 0:
            print('Export rknn model failed.')
            exit(ret)
        print('done')
    else:
        ret = rknn.load_rknn(RKNN_MODEL_PATH)

    rknn.release()

cpp

复制代码

//
// Created by zdl on 25-1-20.
//
#include "YoloV5.h"


inline static int clamp(float val, int min, int max) {
    return val > min ? (val < max ? val : max) : min;
}

static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }

static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }

inline static int32_t __clip(float val, float min, float max)
{
    float f = val <= min ? min : (val >= max ? max : val);
    return f;
}

static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale)
{
    float dst_val = (f32 / scale) + zp;
    int8_t res = (int8_t)__clip(dst_val, -128, 127);
    return res;
}

static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }

int V5::proprecessV5(std::vector<rknn_output> output, std::vector<float> out_scales,
                            std::vector<int32_t> out_zps,
                            std::vector<cv::Rect> &boxes,
                            std::vector<float> &confidences,
                            std::vector<int> &classids,
                            std::vector<int> &anchorf,
                            float confThres)
{
    int num_output = output.size();
    int anchorfuture = 0;
    for (int n = 0; n < num_output; n++) {
        int grid_w = modelInputSize.width / strides[n]; // 宽
        int grid_h = modelInputSize.height / strides[n]; // 高
        int grid_len = grid_w * grid_h;
        int8_t *data = (int8_t*)output[n].buf;
//        std::cout << "size:  " << output[n].size << std::endl;

        float  thres      = unsigmoid(confThres);
        int8_t thres_i8   = qnt_f32_to_affine(thres, out_zps[n], out_scales[n]);
        for (int a = 0; a < 3; a++) {
            for (int i = 0; i < grid_h; i++) {
                for (int j = 0; j < grid_w; j++) {
                    int8_t box_confidence = data[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];
                    if (box_confidence >= thres_i8) {
                        int     offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;
                        int8_t* in_ptr = data + offset;
                        float   box_x  = sigmoid(deqnt_affine_to_f32(*in_ptr, out_zps[n], out_scales[n])) * 2.0 - 0.5;
                        float   box_y  = sigmoid(deqnt_affine_to_f32(in_ptr[grid_len], out_zps[n], out_scales[n])) * 2.0 - 0.5;
                        float   box_w  = sigmoid(deqnt_affine_to_f32(in_ptr[2 * grid_len], out_zps[n], out_scales[n])) * 2.0;
                        float   box_h  = sigmoid(deqnt_affine_to_f32(in_ptr[3 * grid_len], out_zps[n], out_scales[n])) * 2.0;
                        box_x          = (box_x + j) * (float)strides[n];
                        box_y          = (box_y + i) * (float)strides[n];
                        box_w          = box_w * box_w * (float)anchors.at(strides[n])[a][0];
                        box_h          = box_h * box_h * (float)anchors.at(strides[n])[a][1];
                        box_x -= (box_w / 2.0);
                        box_y -= (box_h / 2.0);

                        int8_t maxClassProbs = in_ptr[5 * grid_len];
                        int    maxClassId    = 0;
                        for (int k = 1; k < DetNumLabel; ++k) {
                            int8_t prob = in_ptr[(5 + k) * grid_len];
                            if (prob > maxClassProbs) {
                                maxClassId    = k;
                                maxClassProbs = prob;
                            }
                        }
                        if (maxClassProbs>thres_i8){
                            confidences.push_back(sigmoid(deqnt_affine_to_f32(maxClassProbs, out_zps[n], out_scales[n]))* sigmoid(deqnt_affine_to_f32(box_confidence, out_zps[n], out_scales[n])));
                            classids.push_back(maxClassId);
                            cv::Rect rect;
                            rect.x = box_x;
                            rect.y = box_y;
                            rect.width = box_w;
                            rect.height = box_h;
//                            boxes.push_back(rect);
                            boxes.push_back(cv::Rect(box_x, box_y, (int)((box_w)), (int)((box_h))));  // 记录框
                            anchorf.push_back(n);
                        }
                    }
                }
            }
        }
    }

    return 0;
}


cv::Mat V5::letterboxYolo(cv::Mat src, int &x_pad, int &y_pad, float &scale,int &channel)
{

    if(channel == 1)
    {
        cv::cvtColor(src,src,cv::COLOR_BGR2GRAY);
    }

    int input_w = modelInputSize.width;  // 模型输入的宽
    int input_h = modelInputSize.height; // 模型输入的高

    int w, h, x, y;
    float r_w = static_cast<float>(input_w) / src.cols;
    float r_h = static_cast<float>(input_h) / src.rows;

    if (r_h > r_w) {
        w = input_w;
        h = std::round(r_w * src.rows);  // 避免浮点数误差
        x = 0;
        y = (input_h - h) / 2;
        scale = r_w;
    } else {
        w = std::round(r_h * src.cols);
        h = input_h;
        x = (input_w - w) / 2;
        y = 0;
        scale = r_h;
    }

    x_pad = x;
    y_pad = y;

    cv::Mat re, out;

    if (src.channels() == 3) {
        re = cv::Mat(h, w, CV_8UC3);
        cv::resize(src, re, re.size(), 0, 0, cv::INTER_LINEAR);
        out = cv::Mat(input_h, input_w, CV_8UC3, cv::Scalar(114, 114, 114));
    } else if (src.channels() == 1) {
        re = cv::Mat(h, w, CV_8UC1);
        cv::resize(src, re, re.size(), 0, 0, cv::INTER_LINEAR);
        out = cv::Mat(input_h, input_w, CV_8UC1, cv::Scalar(114));
    } else {
        return cv::Mat(); // 返回空矩阵，避免错误
    }

    // 将缩放后的图像复制到填充后的图像
    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
    return out;
}


cv::Rect V5::get_rect(cv::Mat &img, cv::Rect bbox)
{
    float l, r, t, b;
    float r_w = modelInputSize.width / (img.cols * 1.0);
    float r_h = modelInputSize.height / (img.rows * 1.0);

    if (r_h > r_w) {
        l = bbox.x;
        r = bbox.x + bbox.width;
        t = bbox.y - (modelInputSize.height - r_w * img.rows) / 2;
        b = bbox.y + bbox.height - (modelInputSize.height - r_w * img.rows) / 2;

        l = l / r_w;
        r = r / r_w;
        t = t / r_w;
        b = b / r_w;
    } else {
        l = bbox.x - (modelInputSize.width - r_h * img.cols) / 2;
        r = bbox.x + bbox.width - (modelInputSize.width - r_h * img.cols) / 2;
        t = bbox.y;
        b = bbox.y + bbox.height;

        l = l / r_h;
        r = r / r_h;
        t = t / r_h;
        b = b / r_h;
        l = l < 0 ? 0 : l;
        t = r < 0 ? 0 : t;
    }

    return cv::Rect(round(l), round(t), round(r - l), round(b - t));
}

结果展示: