不使用官方的推理代码,直接另写一个推理代码,如下:
python
import cv2
import numpy as np
import onnxruntime as ort
# -------------------- 工具函数 --------------------
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def preprocess(img, input_size=(640, 640)):
h, w = img.shape[:2]
scale = min(input_size[0]/h, input_size[1]/w)
resized = cv2.resize(img, (int(w*scale), int(h*scale)))
padded = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
padded[:resized.shape[0], :resized.shape[1]] = resized
img = padded[:, :, ::-1] # BGR -> RGB
img = img.transpose(2, 0, 1) # HWC -> CHW
img = np.expand_dims(img, axis=0).astype(np.float32)
return img, scale
def nms(boxes, scores, iou_thr=0.5):
x1, y1, x2, y2 = boxes.T
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0, xx2 - xx1)
h = np.maximum(0, yy2 - yy1)
inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(iou < iou_thr)[0]
order = order[inds + 1]
return keep
# -------------------- decode --------------------
def decode_9outputs(outputs, input_size=(640, 640)):
"""
outputs: list of 9 tensors [reg_p3, obj_p3, cls_p3, ...]
"""
strides = [8, 16, 32]
reg_list, obj_list, cls_list = [], [], []
# 按层拆
for i, stride in enumerate(strides):
reg_list.append(outputs[i*3])
obj_list.append(outputs[i*3 + 1])
cls_list.append(outputs[i*3 + 2])
# flatten
bbox_list, obj_flat_list, cls_flat_list = [], [], []
for reg, obj, cls in zip(reg_list, obj_list, cls_list):
B, C, H, W = reg.shape
reg = reg.reshape(B, C, -1).transpose(0, 2, 1) # B, HW, 4
obj = obj.reshape(B, 1, -1).transpose(0, 2, 1) # B, HW, 1
cls = cls.reshape(B, cls.shape[1], -1).transpose(0, 2, 1) # B, HW, num_class
bbox_list.append(reg)
obj_flat_list.append(obj)
cls_flat_list.append(cls)
bbox = np.concatenate(bbox_list, axis=1) # B, sum(HW), 4
obj = np.concatenate(obj_flat_list, axis=1) # B, sum(HW), 1
cls = np.concatenate(cls_flat_list, axis=1) # B, sum(HW), num_class
obj = sigmoid(obj)
cls = sigmoid(cls)
scores = obj * cls # B, sum(HW), num_class
# grid
grids, expanded_strides = [], []
for i, stride in enumerate(strides):
h = input_size[0] // stride
w = input_size[1] // stride
xv, yv = np.meshgrid(np.arange(w), np.arange(h)) # ✅ 修复点1
grid = np.stack((xv, yv), axis=-1).reshape(-1, 2)
# grid = grid + 0.5 # ✅ 修复点2(关键)
grids.append(grid)
expanded_strides.append(np.full((grid.shape[0], 1), stride))
grids = np.concatenate(grids, axis=0)
strides = np.concatenate(expanded_strides, axis=0)
xy = (bbox[0, :, :2] + grids) * strides
wh = np.exp(bbox[0, :, 2:4]) * strides
x1y1 = xy - wh/2
x2y2 = xy + wh/2
boxes = np.concatenate([x1y1, x2y2], axis=-1) # [N,4]
return boxes, scores[0]
# -------------------- 推理 --------------------
def infer_yolox_onnx(onnx_path, image_path, conf_thresh=0.3, iou_thresh=0.5):
img = cv2.imread(image_path)
input_data, scale = preprocess(img)
session = ort.InferenceSession(onnx_path)
outputs = session.run(None, {"images": input_data})
# ---------------- 保存原始输出(HWCN) ----------------
for i, out in enumerate(outputs):
# ONNX: NCHW -> HWCN
#out_hwcn = np.transpose(out, (2, 3, 1, 0))
# 保存为txt(拉平成一维)
# 不做任何transpose,保持原始输出
save_path = f"output_{i}_raw.txt"
np.savetxt(save_path, out.reshape(-1), fmt="%.6f")
#print(f"Saved: {save_path}, shape={out_hwcn.shape}")
print(f"Saved: {save_path}, shape={out.shape}")
boxes, scores = decode_9outputs(outputs, input_size=input_data.shape[-2:])
cls_ids = scores.argmax(axis=1)
conf = scores.max(axis=1)
mask = conf > conf_thresh
boxes = boxes[mask]
conf = conf[mask]
cls_ids = cls_ids[mask]
keep = nms(boxes, conf, iou_thr=iou_thresh)
for i in keep:
x1, y1, x2, y2 = boxes[i] / scale
cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
cv2.putText(img, f"{cls_ids[i]}:{conf[i]:.2f}", (int(x1), int(y1)-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
cv2.imwrite("result.jpg", img)
print("Done -> result.jpg")
# -------------------- 运行 --------------------
if __name__ == "__main__":
infer_yolox_onnx("yolox_9out.onnx", "dog_640.jpg")
推理结果
