06_yolox_s.onnx的推理验证

不使用官方的推理代码，直接另写一个推理代码，如下：
python 复制代码
import cv2
import numpy as np
import onnxruntime as ort

# -------------------- 工具函数 --------------------
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def preprocess(img, input_size=(640, 640)):
    h, w = img.shape[:2]
    scale = min(input_size[0]/h, input_size[1]/w)

    resized = cv2.resize(img, (int(w*scale), int(h*scale)))
    padded = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
    padded[:resized.shape[0], :resized.shape[1]] = resized

    img = padded[:, :, ::-1]  # BGR -> RGB
    img = img.transpose(2, 0, 1)  # HWC -> CHW
    img = np.expand_dims(img, axis=0).astype(np.float32)

    return img, scale

def nms(boxes, scores, iou_thr=0.5):
    x1, y1, x2, y2 = boxes.T
    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0, xx2 - xx1)
        h = np.maximum(0, yy2 - yy1)
        inter = w * h
        iou = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(iou < iou_thr)[0]
        order = order[inds + 1]

    return keep

# -------------------- decode --------------------
def decode_9outputs(outputs, input_size=(640, 640)):
    """
    outputs: list of 9 tensors [reg_p3, obj_p3, cls_p3, ...]
    """
    strides = [8, 16, 32]
    reg_list, obj_list, cls_list = [], [], []

    # 按层拆
    for i, stride in enumerate(strides):
        reg_list.append(outputs[i*3])
        obj_list.append(outputs[i*3 + 1])
        cls_list.append(outputs[i*3 + 2])

    # flatten
    bbox_list, obj_flat_list, cls_flat_list = [], [], []
    for reg, obj, cls in zip(reg_list, obj_list, cls_list):
        B, C, H, W = reg.shape
        reg = reg.reshape(B, C, -1).transpose(0, 2, 1)  # B, HW, 4
        obj = obj.reshape(B, 1, -1).transpose(0, 2, 1)  # B, HW, 1
        cls = cls.reshape(B, cls.shape[1], -1).transpose(0, 2, 1)  # B, HW, num_class

        bbox_list.append(reg)
        obj_flat_list.append(obj)
        cls_flat_list.append(cls)

    bbox = np.concatenate(bbox_list, axis=1)  # B, sum(HW), 4
    obj = np.concatenate(obj_flat_list, axis=1)  # B, sum(HW), 1
    cls = np.concatenate(cls_flat_list, axis=1)  # B, sum(HW), num_class

    obj = sigmoid(obj)
    cls = sigmoid(cls)
    scores = obj * cls  # B, sum(HW), num_class

    # grid
    grids, expanded_strides = [], []
    for i, stride in enumerate(strides):
        h = input_size[0] // stride
        w = input_size[1] // stride

        xv, yv = np.meshgrid(np.arange(w), np.arange(h))  # ✅ 修复点1
        grid = np.stack((xv, yv), axis=-1).reshape(-1, 2)

        # grid = grid + 0.5  # ✅ 修复点2（关键）

        grids.append(grid)
        expanded_strides.append(np.full((grid.shape[0], 1), stride))

    grids = np.concatenate(grids, axis=0)
    strides = np.concatenate(expanded_strides, axis=0)

    xy = (bbox[0, :, :2] + grids) * strides
    wh = np.exp(bbox[0, :, 2:4]) * strides
    x1y1 = xy - wh/2
    x2y2 = xy + wh/2
    boxes = np.concatenate([x1y1, x2y2], axis=-1)  # [N,4]

    return boxes, scores[0]

# -------------------- 推理 --------------------
def infer_yolox_onnx(onnx_path, image_path, conf_thresh=0.3, iou_thresh=0.5):
    img = cv2.imread(image_path)
    input_data, scale = preprocess(img)

    session = ort.InferenceSession(onnx_path)
    outputs = session.run(None, {"images": input_data})
    # ---------------- 保存原始输出（HWCN） ----------------
    for i, out in enumerate(outputs):
        # ONNX: NCHW -> HWCN
        #out_hwcn = np.transpose(out, (2, 3, 1, 0))

        # 保存为txt（拉平成一维）
        # 不做任何transpose，保持原始输出
        save_path = f"output_{i}_raw.txt"
        np.savetxt(save_path, out.reshape(-1), fmt="%.6f")

        #print(f"Saved: {save_path}, shape={out_hwcn.shape}")
        print(f"Saved: {save_path}, shape={out.shape}")

    boxes, scores = decode_9outputs(outputs, input_size=input_data.shape[-2:])

    cls_ids = scores.argmax(axis=1)
    conf = scores.max(axis=1)

    mask = conf > conf_thresh
    boxes = boxes[mask]
    conf = conf[mask]
    cls_ids = cls_ids[mask]

    keep = nms(boxes, conf, iou_thr=iou_thresh)

    for i in keep:
        x1, y1, x2, y2 = boxes[i] / scale
        cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(img, f"{cls_ids[i]}:{conf[i]:.2f}", (int(x1), int(y1)-5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    cv2.imwrite("result.jpg", img)
    print("Done -> result.jpg")

# -------------------- 运行 --------------------
if __name__ == "__main__":
    infer_yolox_onnx("yolox_9out.onnx", "dog_640.jpg")
推理结果