YOLOV11-OBB之ONNX转RKNN并跑在模拟器上

接着上一篇，得到了onnx，那么就要转rknn，下面是转rknn并跑在模拟器上的代码，真干货
bash 复制代码
import argparse
import math
import json
import cv2
import numpy as np
from shapely.geometry import Polygon
from rknn.api import RKNN

CLASSES = ['plane', 'ship', 'storage tank', 'baseball diamond', 'tennis court',
           'basketball court', 'ground track field', 'harbor', 'bridge', 'large vehicle',
           'small vehicle', 'helicopter', 'roundabout', 'soccer ball field', 'swimming pool']

class DetectBox:
    __slots__ = ('classId', 'score', 'xmin', 'ymin', 'xmax', 'ymax', 'angle')
    def __init__(self, classId, score, xmin, ymin, xmax, ymax, angle):
        self.classId = classId
        self.score = score
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.angle = angle

def letterbox_resize(image, size, bg_color):
    if isinstance(image, str):
        image = cv2.imread(image)
    target_w, target_h = size
    h, w, _ = image.shape
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    img_resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
    offset_x = (target_w - new_w) // 2
    offset_y = (target_h - new_h) // 2
    canvas[offset_y:offset_y+new_h, offset_x:offset_x+new_w] = img_resized
    return canvas, scale, offset_x, offset_y

def rotate_rectangle(x1, y1, x2, y2, angle_rad):
    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2
    corners = np.array([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], dtype=np.float32)
    corners -= (cx, cy)
    cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
    rot_mat = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    rotated = corners @ rot_mat.T
    rotated += (cx, cy)
    return rotated

def intersection(poly1, poly2):
    poly1 = Polygon(np.array(poly1).reshape(4, 2))
    poly2 = Polygon(np.array(poly2).reshape(4, 2))
    if not poly1.is_valid or not poly2.is_valid:
        return 0.0
    inter = poly1.intersection(poly2).area
    union = poly1.area + poly2.area - inter
    return inter / union if union > 0 else 0.0

def nms_rotated(detections, iou_thresh=0.4):
    if len(detections) == 0:
        return []
    detections.sort(key=lambda x: x.score, reverse=True)
    keep = []
    while detections:
        best = detections.pop(0)
        keep.append(best)
        poly_best = rotate_rectangle(best.xmin, best.ymin, best.xmax, best.ymax, best.angle)
        poly_best_flat = poly_best.reshape(-1)
        detections = [d for d in detections if d.classId != best.classId or
                      intersection(poly_best_flat,
                                   rotate_rectangle(d.xmin, d.ymin, d.xmax, d.ymax, d.angle).reshape(-1)) <= iou_thresh]
    return keep

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x, axis=-1):
    e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
    return e_x / np.sum(e_x, axis=axis, keepdims=True)

def process_rknn_style(feature_map, stride, angle_offset, angle_flat, model_w, model_h, scale_w, scale_h, conf_thresh):
    _, C, H, W = feature_map.shape
    xywh = feature_map[:, :64, :, :].reshape(1, 64, -1)
    conf = sigmoid(feature_map[:, 64:, :, :]).reshape(-1)
    N = H * W
    class_num = len(CLASSES)
    detections = []
    for idx in range(class_num * N):
        if conf[idx] < conf_thresh:
            continue
        w_idx = idx % W
        h_idx = (idx % N) // W
        c = idx // N

        xywh_vec = xywh[0, :, h_idx * W + w_idx]
        xywh_vec = xywh_vec.reshape(1, 4, 16, 1)
        xywh_soft = softmax(xywh_vec, axis=2)
        bins = np.arange(16, dtype=np.float32).reshape(1, 1, 16, 1)
        xywh_decoded = np.sum(bins * xywh_soft, axis=2, keepdims=True).reshape(-1)
        xywh_add = xywh_decoded[:2] + xywh_decoded[2:]
        xywh_sub = (xywh_decoded[2:] - xywh_decoded[:2]) / 2.0

        angle_val = angle_flat[0, angle_offset + h_idx * W + w_idx]
        angle_rad = (angle_val - 0.25) * math.pi
        cos_a = math.cos(angle_rad)
        sin_a = math.sin(angle_rad)

        tx = xywh_sub[0] * cos_a - xywh_sub[1] * sin_a
        ty = xywh_sub[0] * sin_a + xywh_sub[1] * cos_a
        cx_pred = tx + w_idx + 0.5
        cy_pred = ty + h_idx + 0.5
        w_pred = xywh_add[0]
        h_pred = xywh_add[1]

        cx_pred *= stride
        cy_pred *= stride
        w_pred *= stride
        h_pred *= stride

        xmin = (cx_pred - w_pred/2) * scale_w
        ymin = (cy_pred - h_pred/2) * scale_h
        xmax = (cx_pred + w_pred/2) * scale_w
        ymax = (cy_pred + h_pred/2) * scale_h

        detections.append(DetectBox(c, conf[idx], xmin, ymin, xmax, ymax, angle_rad))
    return detections

def parse_outputs(outputs):
    angle_raw = None
    feat_maps = []
    for out in outputs:
        shape = out.shape
        if len(shape) == 3 and shape[0] == 1 and shape[2] == 8400:
            angle_raw = out.reshape(1, -1)
        elif len(shape) == 2 and shape[0] == 1 and shape[1] == 8400:
            angle_raw = out.reshape(1, -1)
        elif len(shape) == 4 and shape[0] == 1 and shape[1] == 79:
            feat_maps.append(out)
    if angle_raw is None or len(feat_maps) != 3:
        raise ValueError("无法自动识别角度输出或特征图，请检查模型输出形状")
    feat_maps_sorted = sorted(feat_maps, key=lambda x: x.shape[2], reverse=True)
    strides = []
    offsets = []
    offset = 0
    for fm in feat_maps_sorted:
        H = fm.shape[2]
        if H == 80:
            stride = 8
        elif H == 40:
            stride = 16
        elif H == 20:
            stride = 32
        else:
            raise ValueError(f"未知的特征图尺寸: {H}")
        strides.append(stride)
        offsets.append(offset)
        offset += H * H
    return angle_raw, feat_maps_sorted, strides, offsets

def convert_and_test(onnx_path, rknn_path, img_path, out_img_path, out_json_path, target_size=(640,640), conf_thresh=0.3, nms_thresh=0.4, target_platform='rk3588'):
    """
    1. 加载 ONNX 并构建 RKNN 模型（不量化）
    2. 导出 .rknn 文件
    3. 直接在模拟器上推理测试（使用 build 后的 RKNN 对象）
    """
    rknn = RKNN(verbose=True, verbose_file=rknn_path + ".log")
    
    # 配置模型：输入为 RGB 0-1 float32，不做额外归一化
    print("--> Config model")
    rknn.config(target_platform=target_platform, mean_values=[[0,0,0]], std_values=[[1,1,1]])
    print("Config model done")
    
    # 加载 ONNX
    print("--> Loading ONNX model")
    ret = rknn.load_onnx(model=onnx_path)
    if ret != 0:
        print("Load ONNX model failed!")
        return
    print("done")
    
    # 构建 RKNN（不量化）
    print("--> Building RKNN model (no quantization)")
    ret = rknn.build(do_quantization=False)
    if ret != 0:
        print("Build model failed!")
        return
    print("done")
    
    # 导出 RKNN 文件（可选）
    print("--> Export RKNN model")
    ret = rknn.export_rknn(rknn_path)
    if ret != 0:
        print("Export RKNN model failed!")
        # 不退出，继续测试
    else:
        print(f"RKNN model saved to {rknn_path}")
    
    # 初始化模拟器运行时（build 后直接调用）
    print("--> Init runtime (simulator)")
    ret = rknn.init_runtime()
    if ret != 0:
        print("Init runtime failed!")
        return
    print("done")
    
    # 读取并预处理图片
    img_orig = cv2.imread(img_path)
    if img_orig is None:
        print(f"Failed to read image: {img_path}")
        return
    orig_h, orig_w = img_orig.shape[:2]
    target_w, target_h = target_size
    img_padded, aspect_ratio, offset_x, offset_y = letterbox_resize(img_orig, (target_w, target_h), 114)
    img_rgb = cv2.cvtColor(img_padded, cv2.COLOR_BGR2RGB)
    img_float = img_rgb.astype(np.float32) / 255.0
    img_nchw = np.transpose(img_float, (2, 0, 1))[np.newaxis, ...]  # shape: (1,3,H,W)
    
    # 推理
    print("--> Running inference")
    outputs = rknn.inference(inputs=[img_nchw], data_format='nchw')
    print("Inference done")
    
    # 解析输出并后处理
    angle_raw, feat_maps_sorted, strides, offsets = parse_outputs(outputs)
    all_dets = []
    for fm, stride, offset in zip(feat_maps_sorted, strides, offsets):
        H, W = fm.shape[2], fm.shape[3]
        dets = process_rknn_style(fm, stride, offset, angle_raw, W, H, 1.0, 1.0, conf_thresh)
        all_dets.extend(dets)
    
    keep = nms_rotated(all_dets, nms_thresh)
    print(f"Detected {len(keep)} objects after NMS")
    
    # 绘制结果
    img_draw = img_orig.copy()
    normalized_detections = []
    for box in keep:
        xmin_orig = (box.xmin - offset_x) / aspect_ratio
        ymin_orig = (box.ymin - offset_y) / aspect_ratio
        xmax_orig = (box.xmax - offset_x) / aspect_ratio
        ymax_orig = (box.ymax - offset_y) / aspect_ratio
        pts = rotate_rectangle(xmin_orig, ymin_orig, xmax_orig, ymax_orig, box.angle)
        pts_norm = pts / np.array([orig_w, orig_h], dtype=np.float32)
        normalized_detections.append({
            "class_id": int(box.classId),
            "class_name": CLASSES[box.classId],
            "score": float(box.score),
            "corners": pts_norm.tolist()
        })
        cv2.polylines(img_draw, [np.array(pts, dtype=int)], True, (0,255,0), 2)
        label = f"{CLASSES[box.classId]} {box.score:.2f}"
        cv2.putText(img_draw, label, (int(xmin_orig), int(ymin_orig)-5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
    
    # 保存结果
    with open(out_json_path, 'w') as f:
        json.dump(normalized_detections, f, indent=2)
    cv2.imwrite(out_img_path, img_draw)
    print(f"Results saved to {out_img_path} and {out_json_path}")
    
    # 释放资源
    rknn.release()

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='ONNX to RKNN conversion and simulator test (no quantization)')
    parser.add_argument('--onnx', type=str, required=True, help='Input ONNX model path')
    parser.add_argument('--rknn', type=str, default='model.rknn', help='Output RKNN model path')
    parser.add_argument('--img', type=str, default='test.jpg', help='Test image path')
    parser.add_argument('--out', type=str, default='result.jpg', help='Output image path')
    parser.add_argument('--out_json', type=str, default='detections.json', help='Output JSON path')
    parser.add_argument('--target_size', type=int, nargs=2, default=[640,640], help='Model input size (width height)')
    parser.add_argument('--conf_thresh', type=float, default=0.3, help='Confidence threshold')
    parser.add_argument('--nms_thresh', type=float, default=0.4, help='NMS IoU threshold')
    parser.add_argument('--platform', type=str, default='rk3588', help='Target platform: rk3588/rk3568/rk3566/rk3562')
    args = parser.parse_args()
    
    convert_and_test(args.onnx, args.rknn, args.img, args.out, args.out_json,
                     tuple(args.target_size), args.conf_thresh, args.nms_thresh, args.platform)