python环境的yolov11.rknn物体检测

1.首先是我手里生成的一个yolo11的.rknn模型:

2.比对一下yolov5的模型:

2.1 yolov5模型的后期处理:

python 复制代码
    outputs = rknn.inference(inputs=[img2], data_format=['nhwc'])
    np.save('./onnx_yolov5_0.npy', outputs[0])
    np.save('./onnx_yolov5_1.npy', outputs[1])
    np.save('./onnx_yolov5_2.npy', outputs[2])
    print('done')

    # post process
    input0_data = outputs[0]
    input1_data = outputs[1]
    input2_data = outputs[2]

    input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))
    input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))

    input_data = list()
    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))

然后:

python 复制代码
def yolov5_post_process(input_data):
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
               [59, 119], [116, 90], [156, 198], [373, 326]]

    boxes, classes, scores = [], [], []
    for input, mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

然后:

python 复制代码
def process(input, mask, anchors):

    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = input[..., 4]
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = input[..., 5:]

    box_xy = input[..., :2]*2 - 0.5

    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= int(IMG_SIZE/grid_h)

    box_wh = pow(input[..., 2:4]*2, 2)
    box_wh = box_wh * anchors

    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs

3.修改1 - 基于昨天在宿主机上成功执行的onnx代码:

python 复制代码
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import os
import sys
from math import exp
import cv2
import numpy as np

ROOT = os.getcwd()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

RKNN_MODEL = r'/home/firefly/app/models/sim_moonpie-640-640_rk3588.rknn'
IMG_PATH = '/home/firefly/app/images/cake26.jpg'
QUANTIZE_ON = False

CLASSES = ['moonpie', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
         'hair drier', 'toothbrush', 'moonpie']

meshgrid = []

class_num = len(CLASSES)
headNum = 3
strides = [8, 16, 32]
mapSize = [[80, 80], [40, 40], [20, 20]]
nmsThresh = 0.45
objectThresh = 0.5

input_imgH = 640
input_imgW = 640

from rknn.api import RKNN 
def rk3588_detect(model, pic, classes):
    rknn = RKNN(verbose=True)
    '''
    # model config
    rknn.config(mean_values=[[0, 0, 0]],
            std_values=[[255,255,255]],
            quant_img_RGB2BGR=False,
            target_platform='rk3588')
    '''
    rknn.load_rknn(path=model)
    rknn.init_runtime(target="rk3588", core_mask=RKNN.NPU_CORE_AUTO)
    outputs = rknn.inference(inputs=[pic], data_format=['nhwc'])
    return outputs

class DetectBox:
    def __init__(self, classId, score, xmin, ymin, xmax, ymax):
        self.classId = classId
        self.score = score
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax

class YOLOV11DetectObj:
    def __init__(self):
        pass

    def GenerateMeshgrid(self):
        for index in range(headNum):
            for i in range(mapSize[index][0]):
                for j in range(mapSize[index][1]):
                    meshgrid.append(j + 0.5)
                    meshgrid.append(i + 0.5)


    def IOU(self, xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):
        xmin = max(xmin1, xmin2)
        ymin = max(ymin1, ymin2)
        xmax = min(xmax1, xmax2)
        ymax = min(ymax1, ymax2)

        innerWidth = xmax - xmin
        innerHeight = ymax - ymin

        innerWidth = innerWidth if innerWidth > 0 else 0
        innerHeight = innerHeight if innerHeight > 0 else 0

        innerArea = innerWidth * innerHeight

        area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
        area2 = (xmax2 - xmin2) * (ymax2 - ymin2)

        total = area1 + area2 - innerArea

        return innerArea / total


    def NMS(self, detectResult):
        predBoxs = []

        sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)

        for i in range(len(sort_detectboxs)):
            xmin1 = sort_detectboxs[i].xmin
            ymin1 = sort_detectboxs[i].ymin
            xmax1 = sort_detectboxs[i].xmax
            ymax1 = sort_detectboxs[i].ymax
            classId = sort_detectboxs[i].classId

            if sort_detectboxs[i].classId != -1:
                predBoxs.append(sort_detectboxs[i])
                for j in range(i + 1, len(sort_detectboxs), 1):
                    if classId == sort_detectboxs[j].classId:
                        xmin2 = sort_detectboxs[j].xmin
                        ymin2 = sort_detectboxs[j].ymin
                        xmax2 = sort_detectboxs[j].xmax
                        ymax2 = sort_detectboxs[j].ymax
                        iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)
                        if iou > nmsThresh:
                            sort_detectboxs[j].classId = -1
        return predBoxs


    def sigmoid(self, x):
        return 1 / (1 + exp(-x))


    def postprocess(self, out, img_h, img_w):
        print('postprocess ... ')

        detectResult = []
        output = []
        for i in range(len(out)):
            print(out[i].shape)
            output.append(out[i].reshape((-1)))

        scale_h = img_h / input_imgH
        scale_w = img_w / input_imgW

        gridIndex = -2
        cls_index = 0
        cls_max = 0

        for index in range(headNum):
            reg = output[index * 2 + 0]
            cls = output[index * 2 + 1]

            for h in range(mapSize[index][0]):
                for w in range(mapSize[index][1]):
                    gridIndex += 2

                    if 1 == class_num:
                        cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
                        cls_index = 0
                    else:
                        for cl in range(class_num):
                            cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
                            if 0 == cl:
                                cls_max = cls_val
                                cls_index = cl
                            else:
                                if cls_val > cls_max:
                                    cls_max = cls_val
                                    cls_index = cl
                        cls_max = self.sigmoid(cls_max)

                    if cls_max > objectThresh:
                        regdfl = []
                        for lc in range(4):
                            sfsum = 0
                            locval = 0
                            for df in range(16):
                                temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
                                reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = temp
                                sfsum += temp

                            for df in range(16):
                                sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsum
                                locval += sfval * df
                            regdfl.append(locval)

                        x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]
                        y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]
                        x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]
                        y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]

                        xmin = x1 * scale_w
                        ymin = y1 * scale_h
                        xmax = x2 * scale_w
                        ymax = y2 * scale_h

                        xmin = xmin if xmin > 0 else 0
                        ymin = ymin if ymin > 0 else 0
                        xmax = xmax if xmax < img_w else img_w
                        ymax = ymax if ymax < img_h else img_h

                        box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)
                        detectResult.append(box)
        # NMS
        print('detectResult:', len(detectResult))
        predBox = self.NMS(detectResult)

        return predBox


    def precess_image(self, img_src, resize_w, resize_h):
        print(f'{type(img_src)}')
        image = cv2.resize(img_src, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = image.astype(np.float32)
        image /= 255.0

        return image


    def detect(self, img_path):
        self.GenerateMeshgrid()

        orig = cv2.imread(img_path)
        if orig is None:
            print(f"无法读取图像: {img_path}")
            return
        img_h, img_w = orig.shape[:2]
        image = self.precess_image(orig, input_imgW, input_imgH)

        image = image.transpose((2, 0, 1))
        image = np.expand_dims(image, axis=0)

        #image = np.ones((1, 3, 640, 640), dtype=np.uint8)
        # print(image.shape)

        #ort_session = ort.InferenceSession(ONNX_MODEL)
        #pred_results = (ort_session.run(None, {'data': image}))
        pred_results = rk3588_detect(RKNN_MODEL, image, CLASSES)

        out = []
        for i in range(len(pred_results)):
            out.append(pred_results[i])
        predbox = self.postprocess(out, img_h, img_w)

        print('obj num is :', len(predbox))

        for i in range(len(predbox)):
            xmin = int(predbox[i].xmin)
            ymin = int(predbox[i].ymin)
            xmax = int(predbox[i].xmax)
            ymax = int(predbox[i].ymax)
            classId = predbox[i].classId
            score = predbox[i].score

            cv2.rectangle(orig, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            ptext = (xmin, ymin)
            title = CLASSES[classId] + "%.2f" % score
            cv2.putText(orig, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)

        cv2.imwrite('./test_onnx_result.jpg', orig)


if __name__ == '__main__':
    print('This is main ....')
    img_path = IMG_PATH
    obj = YOLOV11DetectObj()
    obj.detect(img_path)

输出不对:

firefly@firefly:~/app/test$ python3 ./detect_rk3588.py

This is main ....

<class 'numpy.ndarray'>

I rknn-toolkit2 version: 2.3.0

I target set by user is: rk3588

postprocess ...

(1, 64, 80, 80)

(1, 81, 80, 80)

(1, 64, 40, 40)

(1, 81, 40, 40)

(1, 64, 20, 20)

(1, 81, 20, 20)

detectResult: 0

obj num is : 0

4.修改2 - 基于yolov5.rknn代码

4.1 似乎有如下映射关系:

似乎:yolo11.output5 == yolo5.output2,

yolo11.output3 == yolo5.output1,

yolo11.output1 == yolo5.output

python 复制代码
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import os
import sys
import urllib
import traceback
import time
import numpy as np
import cv2
from rknn.api import RKNN

ROOT = os.getcwd()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

# Model from https://github.com/airockchip/rknn_model_zoo  yolov11->selftrained. yolo11s?
ONNX_MODEL = r'/home/firefly/app/models/yolo11_selfgen.onnx'
RKNN_MODEL = r'/home/firefly/app/models/new_moonpie_yolo11_640x640.rknn'
IMG_PATH = r'/home/firefly/app/images/cake26.jpg'
QUANTIZE_ON = True
DATASET=r'./dataset.txt'

CLASSES = ['moonpie', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
         'hair drier', 'toothbrush']

meshgrid = []

class_num = len(CLASSES)
headNum = 3
strides = [8, 16, 32]
mapSize = [[80, 80], [40, 40], [20, 20]]
input_imgH = 640
input_imgW = 640
IMG_SIZE = input_imgH
QUANTIZE_ON = True

OBJ_THRESH = 0.25
NMS_THRESH = 0.45

def xywh2xyxy(x):
    # Convert [x, y, w, h] to [x1, y1, x2, y2]
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def process(input, mask, anchors):

    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = input[..., 4]
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = input[..., 5:]

    box_xy = input[..., :2]*2 - 0.5

    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= int(IMG_SIZE/grid_h)

    box_wh = pow(input[..., 2:4]*2, 2)
    box_wh = box_wh * anchors

    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs


def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with box threshold. It's a bit different with origin yolov5 post process!

    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.

    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    boxes = boxes.reshape(-1, 4)
    box_confidences = box_confidences.reshape(-1)
    box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])

    _box_pos = np.where(box_confidences >= OBJ_THRESH)
    boxes = boxes[_box_pos]
    box_confidences = box_confidences[_box_pos]
    box_class_probs = box_class_probs[_box_pos]

    class_max_score = np.max(box_class_probs, axis=-1)
    classes = np.argmax(box_class_probs, axis=-1)
    _class_pos = np.where(class_max_score >= OBJ_THRESH)

    boxes = boxes[_class_pos]
    classes = classes[_class_pos]
    scores = (class_max_score* box_confidences)[_class_pos]

    return boxes, classes, scores


def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.

    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.

    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def yolov5_post_process(input_data):
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
               [59, 119], [116, 90], [156, 198], [373, 326]]

    boxes, classes, scores = [], [], []
    for input, mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    boxes = np.concatenate(boxes)
    boxes = xywh2xyxy(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores


def draw(image, boxes, scores, classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    print("{:^12} {:^12}  {}".format('class', 'score', 'xmin, ymin, xmax, ymax'))
    print('-' * 50)
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = box
        top = int(top)
        left = int(left)
        right = int(right)
        bottom = int(bottom)

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)

        print("{:^12} {:^12.3f} [{:>4}, {:>4}, {:>4}, {:>4}]".format(CLASSES[cl], score, top, left, right, bottom))

def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)

def detect_on_ubuntu(onnx_model_path=ONNX_MODEL, rknn_path=RKNN_MODEL, image_path=IMG_PATH, classes=CLASSES):
    # Create RKNN object
    rknn = RKNN(verbose=True)

    # pre-process config
    print('--> Config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588')
    print('done')

    # Load ONNX model
    print('--> Loading model')
    ret = rknn.load_onnx(model=onnx_model_path)
    if ret != 0:
        print('Load model failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET)
    if ret != 0:
        print('Build model failed!')
        exit(ret)
    print('done')

    # Export RKNN model
    print('--> Export rknn model')
    ret = rknn.export_rknn(rknn_path)
    if ret != 0:
        print('Export rknn model failed!')
        exit(ret)
    print('done')

    # Init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    if ret != 0:
        print('Init runtime environment failed!')
        exit(ret)
    print('done')

    # Set inputs
    img = cv2.imread(image_path)
    # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

    # Inference
    print('--> Running model')
    img2 = np.expand_dims(img, 0)
    np.save('./raw_yolov11_in.npy', [img2])
    outputs = rknn.inference(inputs=[img2], data_format=['nhwc'])
    np.save('./onnx_yolov11_0_raw.npy', outputs[1])
    np.save('./onnx_yolov11_1_raw.npy', outputs[3])
    np.save('./onnx_yolov11_2_raw.npy', outputs[5])
    print('done')

    # post process
    input0_data = outputs[1]
    input1_data = outputs[3]
    input2_data = outputs[5]

    # 创建一个全零数组,用于填充
    # 计算需要填充的通道数
    pad_channels = 255 - len(classes)
    padding = np.zeros((1, pad_channels, 80, 80), dtype=np.float32)
    input0_data = np.concatenate((input0_data, padding), axis=1)
    padding = np.zeros((1, pad_channels, 40, 40), dtype=np.float32)
    input1_data = np.concatenate((input1_data, padding), axis=1)
    padding = np.zeros((1, pad_channels, 20, 20), dtype=np.float32)
    input2_data = np.concatenate((input2_data, padding), axis=1)
    np.save('./onnx_yolov11_0.npy', input0_data)
    np.save('./onnx_yolov11_1.npy', input1_data)
    np.save('./onnx_yolov11_2.npy', input2_data)
    
    input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))
    input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))

    input_data = list()
    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))

    boxes, classes, scores = yolov5_post_process(input_data)

    img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    if boxes is not None:
        draw(img_1, boxes, scores, classes)
        cv2.imwrite('result.jpg', img_1)
        print('Save results to result.jpg!')

    rknn.release()

def precess_image(img_src, resize_w, resize_h):
    orig = cv2.imread(img_src)
    if orig is None:
        print(f"无法读取图像: {img_path}")
        return
    img_h, img_w = orig.shape[:2]
    print(f'{type(orig)}')
    image = cv2.resize(orig, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.astype(np.float32)
    image /= 255.0

    '''
    # Set inputs
    img = cv2.imread(IMG_PATH)
    # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

    # Inference
    print('--> Running model')
    img2 = np.expand_dims(img, 0)
    '''

    return image

def detect_on_rk3588(rknn_path=RKNN_MODEL, image_path=IMG_PATH, classes=CLASSES):
    # Create RKNN object
    rknn = RKNN(verbose=True)

    # pre-process config
    print('--> Config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588')
    print('done')

    # Load ONNX model
    ret = rknn.load_rknn(rknn_path)
    if ret != 0:
        print('Export rknn model failed!')
        exit(ret)
    print('done')


    # Init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime(target="rk3588", core_mask=RKNN.NPU_CORE_AUTO)
    if ret != 0:
        print('Init runtime environment failed!')
        exit(ret)
    print('done')


    img2 = precess_image(image_path, input_imgW, input_imgH)
    np.save('./raw_yolov11_in.npy', [img2])
    outputs = rknn.inference(inputs=[img2], data_format=['nhwc'])
    np.save('./raw_yolov11_0.npy', outputs[1])
    np.save('./raw_yolov11_1.npy', outputs[3])
    np.save('./raw_yolov11_2.npy', outputs[5])
    print('done')

    # 创建一个全零数组,用于填充
    # 计算需要填充的通道数
    pad_channels = 255 - len(classes)
    padding = np.zeros((1, pad_channels, 80, 80), dtype=np.float32)
    input0_data = np.concatenate((input0_data, padding), axis=1)
    padding = np.zeros((1, pad_channels, 40, 40), dtype=np.float32)
    input1_data = np.concatenate((input1_data, padding), axis=1)
    padding = np.zeros((1, pad_channels, 20, 20), dtype=np.float32)
    input2_data = np.concatenate((input2_data, padding), axis=1)
    np.save('./modified_yolov11_0.npy', input0_data)
    np.save('./modified_yolov11_1.npy', input1_data)
    np.save('./modified_yolov11_2.npy', input2_data)
    
    input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))
    input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))

    input_data = list()
    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))

    boxes, classes, scores = yolov5_post_process(input_data)

    img = cv2.imread(image_path)
    img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    if boxes is not None:
        draw(img_1, boxes, scores, classes)
        cv2.imwrite('rknn_detect_result.jpg', img_1)
        print('Save results to rknn_detect_result.jpg!')
    else:
        print('target can not be found!')

    rknn.release()


if __name__ == '__main__':
    detect_on_ubuntu(ONNX_MODEL, RKNN_MODEL, IMG_PATH, CLASSES)
    #detect_on_rk3588(RKNN_MODEL, IMG_PATH, CLASSES)

5.结论:

最终通过仔细核对yolov5, yolo11, onnx几个模型的输入输出参数,发现了这样的事情:

  • yolov11 的输出参数有6个,大概率,按照上面的结论是,1,3,5相当于原来的输出参数0,1,2
  • yolov11的输出的6个参数,第二维尺寸,现在不是255(-1)个,而是len(classes)个。
  • 我的问题在于我在训练时把classes设置为81,而在导出.rknn时,仍然导出为80.所以,结果就是onnx模式访问正常,而.rknn方式访问错误。
  • yolo detect代码中的.save是瑞芯微的那些同志在调试接口时留下的一些调试语句,它们不必存在。
  • rknn代码输入输出参数建议u8化,在simluation环境传递的是float32。这个修改结束,应该速度会快不少。
  • 还有,yolo detect的matrix pack in pack out时,效率很低,他还在进行float转换。这个我没有仔细看代码,理论上,onnx的识别信息析取是更快的。

还在测试。如果确认最终稿的结论成立。我会在这个帖子里留下标记。上面的两端代码没有大问题,我在测试成功后会更新,现在就是对的。

相关推荐
CDN3605 分钟前
高防服务器被攻击后 IP 被封?黑洞解封与清洗策略设置
运维·服务器·tcp/ip
2401_8274999910 分钟前
python项目实战07-DeepSeek调用测试(本地部署)
linux·运维·服务器
longxibo11 分钟前
【Ubuntu datasophon1.2.1 二开之九:验证离线数据入湖】
大数据·linux·运维·ubuntu
rainy雨11 分钟前
精益班组建设通过标准化作业解决现场管理混乱难题,推动精益班组建设落地
大数据·运维·数据挖掘·数据分析·精益工程
似水এ᭄往昔15 分钟前
【Linux】--命令行参数和环境变量
linux·运维·服务器
linux修理工15 分钟前
在Debian上安装桌面环境并启用远程登录
linux·运维·服务器
靠沿21 分钟前
【递归、搜索与回溯算法】专题三——穷举vs暴搜vs深搜vs回溯vs剪枝
算法·机器学习·剪枝
Linux猿22 分钟前
汽车牌照数据集 YOLO 目标检测 | 可下载
yolo·目标检测·目标检测数据集·yolo目标检测·yolo目标检测数据集·汽车牌照数据集
不愿透露姓名的大鹏24 分钟前
Oracle Undo空间爆满急救指南(含在线切换+更优方案+避坑指南)
linux·运维·数据库·oracle
艾莉丝努力练剑27 分钟前
【Linux线程】Linux系统多线程(三):Linux线程 VS 进程,线程控制
java·linux·运维·服务器·c++·学习·ubuntu