python
复制代码
import os
import cv2
import numpy as np
from class_type import CLASSES
# 设置对象置信度阈值和非极大值抑制(NMS)阈值。
OBJ_THRESH = 0.25
NMS_THRESH = 0.45
IMG_SIZE = (640, 640)
def filter_boxes(boxes, box_confidences, box_class_probs):
# 筛选出满足条件的框,根据置信度和类别概率筛选出有效的框。
box_confidences = box_confidences.reshape(-1)
# candidate, class_num = box_class_probs.shape
class_max_score = np.max(box_class_probs, axis=-1)
classes = np.argmax(box_class_probs, axis=-1)
_class_pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)
scores = (class_max_score * box_confidences)[_class_pos]
boxes = boxes[_class_pos]
classes = classes[_class_pos]
return boxes, classes, scores
def nms_boxes(boxes, scores):
# 使用非极大值抑制(NMS)来消除冗余框,保留最优的检测框。
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= NMS_THRESH)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
# def dfl(position):
# # 改进模型对目标边界框的回归预测,是一种增强的损失函数
# import torch
# x = torch.tensor(position)
# n, c, h, w = x.shape
# p_num = 4
# mc = c // p_num
# y = x.reshape(n, p_num, mc, h, w)
# y = y.softmax(2)
# acc_metrix = torch.tensor(range(mc)).float().reshape(1, 1, mc, 1, 1)
# y = (y * acc_metrix).sum(2)
# return y.numpy()
#############################################################
### 不需要torch
def dfl(position):
# 用来改进模型对目标边界框的回归预测
# print('111111111111111', position.shape)
n, c, h, w = position.shape
p_num = 4
mc = c // p_num
y = position.reshape(n, p_num, mc, h, w)
y = softmax(y, 2)
acc_metrix = np.arange(mc).reshape(1, 1, mc, 1, 1)
y = (y * acc_metrix).sum(2)
return y
def softmax(data, dim):
max = np.max(data, axis=dim, keepdims=True).repeat(data.shape[dim], axis=dim)
exps = np.exp(data - max)
return exps / np.sum(exps, axis=dim, keepdims=True)
#############################################################
def box_process(position):
# 处理边界框的坐标,将其转换为实际图像上的坐标。
grid_h, grid_w = position.shape[2:4]
col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h))
col = col.reshape(1, 1, grid_h, grid_w)
row = row.reshape(1, 1, grid_h, grid_w)
grid = np.concatenate((col, row), axis=1)
stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1, 2, 1, 1)
position = dfl(position)
box_xy = grid + 0.5 - position[:, 0:2, :, :]
box_xy2 = grid + 0.5 + position[:, 2:4, :, :]
xyxy = np.concatenate((box_xy * stride, box_xy2 * stride), axis=1)
return xyxy
def yolov8_post_process(input_data):
# 模型输出的原始预测结果经过后处理,以生成最终的检测结果
print(len(input_data))
boxes, scores, classes_conf = [], [], []
default_branch = 3 # 输入数据分成三部分进行处理
pair_per_branch = len(input_data) // default_branch
print("aaaaaaaaaaa",pair_per_branch)
# 处理每个分支数据
for i in range(default_branch):
boxes.append(box_process(input_data[pair_per_branch * i]))
classes_conf.append(input_data[pair_per_branch * i + 1])
scores.append(np.ones_like(input_data[pair_per_branch * i + 1][:, :1, :, :], dtype=np.float32))
# 将输入张量 _in 重新排列并展平
def sp_flatten(_in):
ch = _in.shape[1] # 获取输入的通道数
_in = _in.transpose(0, 2, 3, 1) # 将通道维度移到最后
return _in.reshape(-1, ch) # 将张量展平为二维
# 使用 sp_flatten 函数展平每个分支的 boxes、classes_conf 和 scores
boxes = [sp_flatten(_v) for _v in boxes]
classes_conf = [sp_flatten(_v) for _v in classes_conf]
scores = [sp_flatten(_v) for _v in scores]
# 将每个分支的展平数据连接成一个整体
boxes = np.concatenate(boxes)
scores = np.concatenate(scores)
classes_conf = np.concatenate(classes_conf)
# 过滤框
boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)
# nms--非极大值抑制
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = nms_boxes(b, s)
if len(keep) != 0:
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def draw(image, boxes, scores, classes):
# 画框
print("{:^12} {:^12} {}".format('class', 'score', 'xmin, ymin, xmax, ymax'))
print('-' * 50)
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = [int(_b) for _b in box]
# print("%s @ (%d %d %d %d) %.3f" % (CLASSES[cl], top, left, right, bottom, score))
cv2.rectangle(image, (top, left), (right, bottom), (0, 255, 0), 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255, 0), 2)
print("{:^12} {:^12.3f} [{:>4}, {:>4}, {:>4}, {:>4}]".format(CLASSES[cl], score, top, left, right, bottom))
return image