本文不生产技术,制作技术的搬运工!!!
前言
最近有项目需要在华为设备上部署一系列模型,作者学习了一些相关知识,在这里进行分享记录,有需要的朋友自取。
环境配置
由于作者拿到服务器时,已经配置好了基础环境,这里就不对CANN及其他相关驱动的安装进行介绍了,本章仅介绍conda环境的配置,以下是作者python推理时的环境,主要内容为python3.9,opencv,aclruntime,ais-bench,其中aclruntime和ais-bench需要手动下载whl文件安装,下载地址:ais-bench_workload/tool/ais_bench/README.md · Ascend/tools - Gitee.comhttps://gitee.com/ascend/tools/blob/master/ais-bench_workload/tool/ais_bench/README.md
bash
_libgcc_mutex 0.1 main defaults
_openmp_mutex 5.1 51_gnu defaults
aclruntime 0.0.2 pypi_0 pypi
ais-bench 0.0.2 pypi_0 pypi
attrs 25.3.0 pypi_0 pypi
bzip2 1.0.8 h998d150_6 defaults
ca-certificates 2025.2.25 hd43f75c_0 defaults
expat 2.7.1 h419075a_0 defaults
filelock 3.18.0 pypi_0 pypi
fsspec 2025.7.0 pypi_0 pypi
jinja2 3.1.6 pypi_0 pypi
ld_impl_linux-aarch64 2.40 h48e3ba3_0 defaults
libffi 3.4.4 h419075a_1 defaults
libgcc-ng 11.2.0 h1234567_1 defaults
libgomp 11.2.0 h1234567_1 defaults
libstdcxx-ng 11.2.0 h1234567_1 defaults
libxcb 1.17.0 hf66535e_0 defaults
markupsafe 3.0.2 pypi_0 pypi
mpmath 1.3.0 pypi_0 pypi
ncurses 6.4 h419075a_0 defaults
networkx 3.2.1 pypi_0 pypi
numpy 2.0.2 pypi_0 pypi
opencv-python 4.12.0.88 pypi_0 pypi
openssl 3.0.16 h998d150_0 defaults
pillow 11.3.0 pypi_0 pypi
pip 25.1 pyhc872135_2 defaults
pthread-stubs 0.3 hfd63f10_1 defaults
python 3.9.23 h89e7a61_0 defaults
readline 8.2 h998d150_0 defaults
setuptools 78.1.1 py39hd43f75c_0 defaults
sqlite 3.50.2 h998d150_1 defaults
sympy 1.13.1 pypi_0 pypi
tk 8.6.14 hb5ae6a8_1 defaults
torch 2.5.1 pypi_0 pypi
torch-npu 2.5.1 pypi_0 pypi
torchvision 0.20.1 pypi_0 pypi
tqdm 4.67.1 pypi_0 pypi
typing-extensions 4.14.1 pypi_0 pypi
tzdata 2025b h04d1e81_0 defaults
wheel 0.45.1 py39hd43f75c_0 defaults
xorg-libx11 1.8.12 hf66535e_1 defaults
xorg-libxau 1.0.12 hf66535e_0 defaults
xorg-libxdmcp 1.1.5 hf66535e_0 defaults
xorg-xorgproto 2024.1 h998d150_1 defaults
xz 5.6.4 h998d150_1 defaults
zlib 1.2.13 h998d150_1 defaults
模型转换
这里作者主要使用bs1和bs8,大家可以根据各自的需求确定
bash
source /usr/local/Ascend/ascend-toolkit/set_env.sh
atc --model=./models/pcb_comV2_20230329.onnx --framework=5 --output=./models/pcb_comV2_20230329_aipp --input_format=NCHW --input_shape="images:1,3,608,608" --soc_version=Ascend310P3 --insert_op_conf=./aipp.cfg
这里需要注意的是aipp.cfg是一个可选项,也可以不加,其作用是将预处理以算子的形式放入模型中用来加速的,其配置方法参考:
文档中心https://developer.huawei.com/consumer/cn/doc/hiai-Guides/aipp-configuration-file-description-0000001184089142作者这里由于输入图像的尺度不固定,无法将padding resize放入模型,仅对归一化做了优化处理,如果你的应用场景中,输入图像是固定尺寸的,可以将padding resize放入用以加速推理,以下是作者的aipp.cfg文件:
bash
aipp_op{
aipp_mode:static
input_format : RGB888_U8
src_image_size_w : 608
src_image_size_h : 608
csc_switch : false
rbuv_swap_switch : true
mean_chn_0: 0
mean_chn_1: 0
mean_chn_2: 0
var_reci_chn_0: 0.0039215686274509803921568627451
var_reci_chn_1: 0.0039215686274509803921568627451
var_reci_chn_2: 0.0039215686274509803921568627451
}
推理代码
检测代码修改自:基于昇腾310B4的YOLOv8目标检测推理_昇腾310b模型测试-CSDN博客
单batch_size非aipp推理
分类:EfficientNet-Multi-Head
这里的预处理代码需要根据自己训练代码中的预处理方式复现,作者这里使用的模型是多个分类头,如果是单个分类头需要修改后处理代码,不会的也可以私信作者寻求帮助。
python
from PIL import Image
import numpy as np
from ais_bench.infer.interface import InferSession
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def resize(image, size=224):
# 使用 PIL 的 resize 方法进行双线性插值缩放
return image.resize((size, size), Image.BILINEAR)
def center_crop(image, output_size=(224, 224)):
# 中心裁剪图像到指定大小
w, h = image.size
th, tw = output_size
x1 = int(round((w - tw) / 2.))
y1 = int(round((h - th) / 2.))
return image.crop((x1, y1, x1 + tw, y1 + th))
def to_tensor(image):
# 将图像转换为 NumPy 数组,并归一化到 [0, 1]
return np.array(image).astype(np.float32) / 255.0
def normalize(image, mean=[0., 0., 0.], std=[1., 1., 1.]):
# 对图像进行标准化
image = (image - mean) / std
return image
def val_transform(image):
# 手动定义的验证集变换流程
image = resize(image)
image = center_crop(image)
image = to_tensor(image)
image = normalize(image, mean=[0., 0., 0.], std=[1., 1., 1.])
# 转换为 CHW 格式 (通道优先)
image = np.transpose(image, (2, 0, 1))
image = np.expand_dims(image, axis=0)
image = image.astype(np.float32)
return image
if __name__ == '__main__':
model_path = "/data/classify/models/structure-0.82-0.88-0.8-0.82-bs1.om"
image_path = "/data/test_img/test.jpg"
session = InferSession(device_id=0, model_path=model_path)
image = Image.open(image_path)
image = val_transform(image)
print(image.shape)
print(type(image))
inputs = [image]
outputs = session.infer(inputs)
print(len(outputs))
for output in outputs:
output = sigmoid(output)
print(np.argmax(output), np.max(output))
检测:Yolov8
python
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from ais_bench.infer.interface import InferSession
import time
# 类别定义
# CLASSES = {
# 0: 'dfbl', 1: 'cp', 2: 'ddh', 3: 'aqd', 4: 'ry', 5: 'rt'
# }
CLASSES = {
0: 'cp'
}
# 置信度阈值
CONFIDENCE = 0.01
# NMS 的 IoU 阈值
IOU = 0.45
# 为每个类别分配随机颜色
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
"""
在图像上绘制边界框和类别标签
参数:
img - 原始图像
class_id - 类别ID
confidence - 置信度
x, y - 左上角坐标
x_plus_w, y_plus_h - 右下角坐标
"""
label = "{} {:.2f}".format(CLASSES[class_id], confidence)
color = colors[class_id]
# 画框
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
# 获取文本大小
label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_width, label_height = label_size
label_x = x
label_y = y - 10 if y - 10 > label_height else y + 10
# 背景框
cv2.rectangle(img, (label_x, label_y - label_height),
(label_x + label_width, label_y + label_height), color, cv2.FILLED)
# 文字
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 0), 1, cv2.LINE_AA)
def preprocess(img, imgsz=640):
"""图像预处理:缩放并归一化"""
h, w = img.shape[:2]
r = imgsz / max(h, w)
resized_img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)
# 填充到 imgsz x imgsz(保持比例)
padded_img = np.zeros((imgsz, imgsz, 3), dtype=np.uint8)
padded_img[:resized_img.shape[0], :resized_img.shape[1]] = resized_img
# HWC -> CHW 并归一化
tensor = padded_img.transpose(2, 0, 1).astype(np.float32) / 255.0
return np.expand_dims(tensor,axis=0) # 添加 batch 维度
def main(session, original_image):
"""
加载模型,执行推理,绘制检测框并保存结果图像
参数:
session - 模型
original_image - 图片值
返回:
original_image - 画框的图片
detections - 包含每个目标信息的列表
"""
height, width, _ = original_image.shape
# 变为正方形图像用于推理
length = max(height, width)
image = np.zeros((length, length, 3), np.uint8)
image[0:height, 0:width] = original_image
# 缩放因子
scale = length / 640
# 预处理图像
#blob = cv2.dnn.blobFromImage(image, scalefactor=1.0 / 255, size=(640, 640), swapRB=True)
img_tensor = preprocess(image)
inputs = [np.array(img_tensor)]
print(type(inputs))
# 推理
start_infer = time.time()
outputs = session.infer(inputs)
end_infer = time.time()
print("推理时间:", end_infer - start_infer)
# 模型推理
#outputs = session.infer(blob)
# 转换输出维度:从 (1, 84, 8400) -> (8400, 84)
outputs = np.squeeze(outputs)
#outputs = np.array([cv2.transpose(outputs[0][0])]) #这里作者的模型在转onnx时已经将输出转为8400,84,因此不需要再次转换
rows = outputs.shape[0]
boxes = []
scores = []
class_ids = []
# 解析输出
for i in range(rows):
classes_scores =outputs[i][4:]
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
if maxScore >= CONFIDENCE:
box = [
(outputs[i][0] -outputs[i][2] / 2) * scale, # x 左上角
(outputs[i][1] -outputs[i][3] / 2) * scale, # y 左上角
outputs[i][2] * scale, # 宽
outputs[i][3] * scale # 高
]
boxes.append(box)
scores.append(maxScore)
class_ids.append(maxClassIndex)
# 非极大值抑制
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, CONFIDENCE, IOU, 0.5)
detections = []
# 绘制边界框
for i in range(len(result_boxes)):
index = result_boxes[i]
box = boxes[index]
detection = {
"class_id": class_ids[index],
"class_name": CLASSES[class_ids[index]],
"confidence": scores[index],
"box": box,
"scale": scale,
}
detections.append(detection)
draw_bounding_box(
original_image,
class_ids[index],
scores[index],
round(box[0]),
round(box[1]),
round(box[0] + box[2]),
round(box[1] + box[3])
)
return original_image, detections
if __name__ == "__main__":
model_path = "/data/detect/models/illegal-bs1.om"
# 创建推理会话
session = InferSession(device_id=0, model_path=model_path)
# 图片推理
input_image_path = "/data/test_img/test.jpg"
image = cv2.imread(input_image_path)
draw_image, _ = main(session, image)
# cv2.imshow("Image Detection", draw_image)
cv2.imwrite("/data/test_img/test-result.jpg", draw_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
车牌识别:LPRNet
python
import numpy as np
from ais_bench.infer.interface import InferSession
import cv2
import time
CHARS = ['京', '沪', '津', '渝', '冀', '晋', '蒙', '辽', '吉', '黑',
'苏', '浙', '皖', '闽', '赣', '鲁', '豫', '鄂', '湘', '粤',
'桂', '琼', '川', '贵', '云', '藏', '陕', '甘', '青', '宁',
'新', '学', '港', '澳', '警', '使', '领', '应', '急', '挂',
'临','0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z', '-'
]
def load_image(file, img_size):
#image = cv2.imread(file)
image = cv2.imdecode(np.fromfile(file, dtype=np.uint8), cv2.IMREAD_COLOR)
# 缩放
image = cv2.resize(image, img_size)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 源框架不需要
# 归一化
image = (image.astype('float32') - 127.5) * 0.007843
# to tensor
#image = torch.from_numpy(image.transpose((2, 0, 1))).contiguous()
image = np.ascontiguousarray(image.transpose(2, 0, 1))
return image
def decode(preds):
last_chars_idx = len(CHARS) - 1
# greedy decode
pred_labels = []
labels = []
for i in range(preds.shape[0]):
pred = preds[i, :, :]
pred_label = []
for j in range(pred.shape[1]):
pred_label.append(np.argmax(pred[:, j], axis=0))
no_repeat_blank_label = []
pre_c = -1
for c in pred_label: # dropout repeate label and blank label
if (pre_c == c) or (c == last_chars_idx):
if c == last_chars_idx:
pre_c = c
continue
no_repeat_blank_label.append(c)
pre_c = c
pred_labels.append(no_repeat_blank_label)
for _, label in enumerate(pred_labels):
lb = ""
for i in label:
lb += CHARS[i]
labels.append(lb)
return labels, pred_labels
def pred_deal(prebs):
preb_labels = list()
for i in range(prebs.shape[0]):
preb = prebs[i, :, :]
preb_label = list()
for j in range(preb.shape[1]):
preb_label.append(np.argmax(preb[:, j], axis=0))
no_repeat_blank_label = list()
pre_c = preb_label[0]
if pre_c != len(CHARS) - 1:
no_repeat_blank_label.append(pre_c)
for c in preb_label: # dropout repeate label and blank label
if (pre_c == c) or (c == len(CHARS) - 1):
if c == len(CHARS) - 1:
pre_c = c
continue
no_repeat_blank_label.append(c)
pre_c = c
preb_labels.append(no_repeat_blank_label)
return preb_labels
if __name__ == '__main__':
image_path = "/data/test_img/test_license.jpg"
model_path = "/data/recognition/models/licplate-bs1.om"
image_size = (94,24)
session = InferSession(device_id=0, model_path=model_path)
image = load_image(image_path, image_size)
image = np.array(image)
image = np.expand_dims(image, axis=0)
#inputs = np.concatenate([image]*8,axis=0)
pred = session.infer([image])
print(pred)
labels, pred_labels = decode(pred[0])
print(labels)
多batch_size非aipp推理
分类:EfficientNet-Multi-Head
作者这里使用的是一张图复制8份,实际情况需要自行修改读取代码
python
from PIL import Image
import numpy as np
import os
from ais_bench.infer.interface import InferSession
# ---------- 后处理 ----------
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# ---------- 预处理 ----------
def resize(image, size=224):
return image.resize((size, size), Image.BILINEAR)
def center_crop(image, output_size=(224, 224)):
w, h = image.size
th, tw = output_size
x1 = int(round((w - tw) / 2.))
y1 = int(round((h - th) / 2.))
return image.crop((x1, y1, x1 + tw, y1 + th))
def to_tensor(image):
return np.array(image).astype(np.float32) / 255.0
def normalize(image, mean=[0., 0., 0.], std=[1., 1., 1.]):
return (image - mean) / std
def val_transform(image):
image = resize(image)
image = center_crop(image)
image = to_tensor(image)
image = normalize(image, mean=[0., 0., 0.], std=[1., 1., 1.])
image = np.transpose(image, (2, 0, 1)) # HWC -> CHW
image = np.expand_dims(image, axis=0) # 1xCxHxW
return image.astype(np.float32)
# ---------- 主流程 ----------
if __name__ == '__main__':
model_path = "/data/classify/models/structure-0.82-0.88-0.8-0.82.om"
image_path = "/data/test_img/test.jpg"
bs = 8 # 期望的 batch size
# 1. 创建推理会话
session = InferSession(device_id=0, model_path=model_path)
# 2. 读取并预处理图片(这里用同一张图重复 8 次演示)
pil_img = Image.open(image_path).convert('RGB')
tensor = val_transform(pil_img) # (1, 3, 224, 224)
# 3. 拼成 8 张
batch_input = np.concatenate([tensor] * bs, axis=0) # (8, 3, 224, 224)
print(batch_input.shape)
# 4. 推理
outputs = session.infer([batch_input]) # 注意用 list 包一层
# 5. 解析结果
print("Number of outputs:", len(outputs))
res_3 = []
for i in range(bs):
res_2 = []
for j in range(len(outputs)):
res_2.append(outputs[j][i])
res_3.append(res_2)
for i in range(len(res_3)):
res_bs = res_3[i]
for j in range(len(res_bs)):
res_head = res_bs[j]
output = sigmoid(res_head)
print("batch:",i,"head:",j,"reslut:",np.argmax(output),"conf:",np.max(output))
检测:Yolov8
python
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from ais_bench.infer.interface import InferSession
import time
# 类别定义
CLASSES = {
0: 'dfbl', 1: 'cp', 2: 'ddh', 3: 'aqd', 4: 'ry', 5: 'rt'
}
# CLASSES = {
# 0: 'cp'
# }
# 置信度阈值
CONFIDENCE = 0.01
# NMS 的 IoU 阈值
IOU = 0.45
# 为每个类别分配随机颜色
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
"""
在图像上绘制边界框和类别标签
参数:
img - 原始图像
class_id - 类别ID
confidence - 置信度
x, y - 左上角坐标
x_plus_w, y_plus_h - 右下角坐标
"""
label = "{} {:.2f}".format(CLASSES[class_id], confidence)
color = colors[class_id]
# 画框
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
# 获取文本大小
label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_width, label_height = label_size
label_x = x
label_y = y - 10 if y - 10 > label_height else y + 10
# 背景框
cv2.rectangle(img, (label_x, label_y - label_height),
(label_x + label_width, label_y + label_height), color, cv2.FILLED)
# 文字
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 0), 1, cv2.LINE_AA)
def batch_preprocess(images, imgsz=640):
"""
批量预处理图像:缩放并归一化
参数:
images - 图像列表 (长度应为batch_size)
imgsz - 目标尺寸
返回:
batch_tensor - 形状为(batch_size, 3, imgsz, imgsz)的numpy数组
scales - 每张图片的缩放比例列表
"""
batch_tensors = []
scales = []
for img in images:
h, w = img.shape[:2]
length = max(h, w)
scale = length / imgsz
scales.append(scale)
# 缩放图像
r = imgsz / length
resized_img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)
# 填充到 imgsz x imgsz
padded_img = np.zeros((imgsz, imgsz, 3), dtype=np.uint8)
padded_img[:resized_img.shape[0], :resized_img.shape[1]] = resized_img
# HWC -> CHW 并归一化
tensor = padded_img.transpose(2, 0, 1).astype(np.float32) / 255.0
batch_tensors.append(tensor)
return np.array(batch_tensors), scales
def process_output(output, original_image, scale):
"""
处理单张图片的推理输出
参数:
output - 单张图片的输出 (84, 8400)
original_image - 原始图像
scale - 缩放比例
返回:
processed_image - 处理后的图像
detections - 检测结果列表
"""
height, width = original_image.shape[:2]
rows = output.shape[0]
boxes = []
scores = []
class_ids = []
# 解析输出
for i in range(rows):
classes_scores = output[i][4:]
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
if maxScore >= CONFIDENCE:
box = [
(output[i][0] - output[i][2] / 2) * scale, # x 左上角
(output[i][1] - output[i][3] / 2) * scale, # y 左上角
output[i][2] * scale, # 宽
output[i][3] * scale # 高
]
boxes.append(box)
scores.append(maxScore)
class_ids.append(maxClassIndex)
# 非极大值抑制
if len(boxes) > 0:
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, CONFIDENCE, IOU, 0.5)
else:
result_boxes = []
detections = []
processed_image = original_image.copy()
# 绘制边界框
for i in range(len(result_boxes)):
index = result_boxes[i]
box = boxes[index]
detection = {
"class_id": class_ids[index],
"class_name": CLASSES[class_ids[index]],
"confidence": scores[index],
"box": box,
"scale": scale,
}
detections.append(detection)
draw_bounding_box(
processed_image,
class_ids[index],
scores[index],
max(0, round(box[0])),
max(0, round(box[1])),
min(width, round(box[0] + box[2])),
min(height, round(box[1] + box[3]))
)
return processed_image, detections
def main_batch(session, original_image, batch_size=8):
"""
批量处理图像(使用同一张图像复制多份)
参数:
session - 模型会话
original_image - 原始图像
batch_size - 批量大小
返回:
processed_images - 处理后的图像列表
all_detections - 每张图片的检测结果
"""
# 创建batch_size份相同图像的列表
image_list = [original_image.copy() for _ in range(batch_size)]
# 批量预处理
batch_tensor, scales = batch_preprocess(image_list)
# 推理
start_infer = time.time()
outputs = session.infer([batch_tensor])
end_infer = time.time()
print(f"批量推理时间 ({batch_size}张):", end_infer - start_infer)
# 处理每张图片的输出
processed_images = []
all_detections = []
for i in range(batch_size):
# 获取单张图片的输出 (84, 8400)
output_i = outputs[0][i]
processed_img, detections = process_output(output_i, image_list[i], scales[i])
processed_images.append(processed_img)
all_detections.append(detections)
return processed_images, all_detections
if __name__ == "__main__":
model_path = "/data/detect/models/illegal.om"
# 创建推理会话
session = InferSession(device_id=0, model_path=model_path)
temp = session.get_inputs()
# 图片推理
input_image_path = "/data/test_img/test_illegal.jpg"
image = cv2.imread(input_image_path)
# 批量处理 (batch_size=8)
batch_size = 8
start_pre = time.time()
processed_images, _ = main_batch(session, image, batch_size)
end_pre = time.time()
print(f"批量预处理时间 ({batch_size}张):", end_pre - start_pre)
# 保存结果
for i, img in enumerate(processed_images):
output_path = f"/data/test_img/test_illegall-result_{i}.jpg"
cv2.imwrite(output_path, img)
print(f"保存结果: {output_path}")
车牌识别:LPRNet
python
import numpy as np
from ais_bench.infer.interface import InferSession
import cv2
import time
CHARS = ['京', '沪', '津', '渝', '冀', '晋', '蒙', '辽', '吉', '黑',
'苏', '浙', '皖', '闽', '赣', '鲁', '豫', '鄂', '湘', '粤',
'桂', '琼', '川', '贵', '云', '藏', '陕', '甘', '青', '宁',
'新', '学', '港', '澳', '警', '使', '领', '应', '急', '挂',
'临','0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z', '-'
]
def load_image(file, img_size):
#image = cv2.imread(file)
image = cv2.imdecode(np.fromfile(file, dtype=np.uint8), cv2.IMREAD_COLOR)
# 缩放
image = cv2.resize(image, img_size)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 源框架没有
# 归一化
image = (image.astype('float32') - 127.5) * 0.007843
# to tensor
#image = torch.from_numpy(image.transpose((2, 0, 1))).contiguous()
image = np.ascontiguousarray(image.transpose(2, 0, 1))
return image
def decode(preds):
last_chars_idx = len(CHARS) - 1
# greedy decode
pred_labels = []
labels = []
for i in range(preds.shape[0]):
pred = preds[i, :, :]
pred_label = []
for j in range(pred.shape[1]):
pred_label.append(np.argmax(pred[:, j], axis=0))
no_repeat_blank_label = []
pre_c = -1
for c in pred_label: # dropout repeate label and blank label
if (pre_c == c) or (c == last_chars_idx):
if c == last_chars_idx:
pre_c = c
continue
no_repeat_blank_label.append(c)
pre_c = c
pred_labels.append(no_repeat_blank_label)
for _, label in enumerate(pred_labels):
lb = ""
for i in label:
lb += CHARS[i]
labels.append(lb)
return labels, pred_labels
def pred_deal(prebs):
preb_labels = list()
for i in range(prebs.shape[0]):
preb = prebs[i, :, :]
preb_label = list()
for j in range(preb.shape[1]):
preb_label.append(np.argmax(preb[:, j], axis=0))
no_repeat_blank_label = list()
pre_c = preb_label[0]
if pre_c != len(CHARS) - 1:
no_repeat_blank_label.append(pre_c)
for c in preb_label: # dropout repeate label and blank label
if (pre_c == c) or (c == len(CHARS) - 1):
if c == len(CHARS) - 1:
pre_c = c
continue
no_repeat_blank_label.append(c)
pre_c = c
preb_labels.append(no_repeat_blank_label)
return preb_labels
if __name__ == '__main__':
image_path = "/data/test_img/test_license_2.jpg"
model_path = "/data/recognition/models/licplate.om"
image_size = (94,24)
session = InferSession(device_id=0, model_path=model_path)
image = load_image(image_path, image_size)
image = np.array(image)
image = np.expand_dims(image, axis=0)
inputs = np.concatenate([image]*8,axis=0)
pred = session.infer([inputs])
labels, pred_labels = decode(pred[0])
print(labels)
多batch_size带aipp推理
这里作者仅对检测模型进行了测试,具体预处理代码如何修改要根据自己的aipp文件附带的功能决定,不要盲目照抄。
检测:Yolov8
python
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from ais_bench.infer.interface import InferSession
import time
# 类别定义
CLASSES = {
0: 'dfbl', 1: 'cp', 2: 'ddh', 3: 'aqd', 4: 'ry', 5: 'rt'
}
# CLASSES = {
# 0: 'cp'
# }
# 置信度阈值
CONFIDENCE = 0.01
# NMS 的 IoU 阈值
IOU = 0.45
# 为每个类别分配随机颜色
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
"""
在图像上绘制边界框和类别标签
参数:
img - 原始图像
class_id - 类别ID
confidence - 置信度
x, y - 左上角坐标
x_plus_w, y_plus_h - 右下角坐标
"""
label = "{} {:.2f}".format(CLASSES[class_id], confidence)
color = colors[class_id]
# 画框
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
# 获取文本大小
label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_width, label_height = label_size
label_x = x
label_y = y - 10 if y - 10 > label_height else y + 10
# 背景框
cv2.rectangle(img, (label_x, label_y - label_height),
(label_x + label_width, label_y + label_height), color, cv2.FILLED)
# 文字
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 0), 1, cv2.LINE_AA)
def batch_preprocess(images, imgsz=640):
"""
批量预处理图像:缩放并归一化
参数:
images - 图像列表 (长度应为batch_size)
imgsz - 目标尺寸
返回:
batch_tensor - 形状为(batch_size, 3, imgsz, imgsz)的numpy数组
scales - 每张图片的缩放比例列表
"""
batch_tensors = []
scales = []
for img in images:
h, w = img.shape[:2]
length = max(h, w)
scale = length / imgsz
scales.append(scale)
# 缩放图像
r = imgsz / length
resized_img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)
# 填充到 imgsz x imgsz
padded_img = np.zeros((imgsz, imgsz, 3), dtype=np.uint8)
padded_img[:resized_img.shape[0], :resized_img.shape[1]] = resized_img
# HWC -> CHW 并归一化
#tensor = padded_img.transpose(2, 0, 1).astype(np.float32) / 255.0 #aipp.cfg-back
batch_tensors.append(padded_img)
return np.array(batch_tensors), scales
def process_output(output, original_image, scale):
"""
处理单张图片的推理输出
参数:
output - 单张图片的输出 (84, 8400)
original_image - 原始图像
scale - 缩放比例
返回:
processed_image - 处理后的图像
detections - 检测结果列表
"""
height, width = original_image.shape[:2]
rows = output.shape[0]
boxes = []
scores = []
class_ids = []
# 解析输出
for i in range(rows):
classes_scores = output[i][4:]
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
if maxScore >= CONFIDENCE:
box = [
(output[i][0] - output[i][2] / 2) * scale, # x 左上角
(output[i][1] - output[i][3] / 2) * scale, # y 左上角
output[i][2] * scale, # 宽
output[i][3] * scale # 高
]
boxes.append(box)
scores.append(maxScore)
class_ids.append(maxClassIndex)
# 非极大值抑制
if len(boxes) > 0:
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, CONFIDENCE, IOU, 0.5)
else:
result_boxes = []
detections = []
processed_image = original_image.copy()
# 绘制边界框
for i in range(len(result_boxes)):
index = result_boxes[i]
box = boxes[index]
detection = {
"class_id": class_ids[index],
"class_name": CLASSES[class_ids[index]],
"confidence": scores[index],
"box": box,
"scale": scale,
}
detections.append(detection)
draw_bounding_box(
processed_image,
class_ids[index],
scores[index],
max(0, round(box[0])),
max(0, round(box[1])),
min(width, round(box[0] + box[2])),
min(height, round(box[1] + box[3]))
)
return processed_image, detections
def main_batch(session, original_image, batch_size=8):
"""
批量处理图像(使用同一张图像复制多份)
参数:
session - 模型会话
original_image - 原始图像
batch_size - 批量大小
返回:
processed_images - 处理后的图像列表
all_detections - 每张图片的检测结果
"""
# 创建batch_size份相同图像的列表
image_list = [original_image.copy() for _ in range(batch_size)]
# 批量预处理
batch_tensor, scales = batch_preprocess(image_list)
# 推理
start_infer = time.time()
outputs = session.infer([batch_tensor])
end_infer = time.time()
print(f"批量推理时间 ({batch_size}张):", end_infer - start_infer)
# 处理每张图片的输出
processed_images = []
all_detections = []
for i in range(batch_size):
# 获取单张图片的输出 (84, 8400)
output_i = outputs[0][i]
processed_img, detections = process_output(output_i, image_list[i], scales[i])
processed_images.append(processed_img)
all_detections.append(detections)
return processed_images, all_detections
if __name__ == "__main__":
model_path = "/data/detect/models/illegal_aipp-back.om"
# 创建推理会话
session = InferSession(device_id=0, model_path=model_path)
temp = session.get_inputs()
print(temp)
# 图片推理
input_image_path = "/data/test_img/test_illegal.jpg"
image = cv2.imread(input_image_path)
# 批量处理 (batch_size=8)
batch_size = 8
start_pre = time.time()
processed_images, _ = main_batch(session, image, batch_size)
end_pre = time.time()
print(f"批量预处理时间 ({batch_size}张):", end_pre - start_pre)
# 保存结果
for i, img in enumerate(processed_images):
output_path = f"/data/test_img/test_illegall-result_{i}.jpg"
cv2.imwrite(output_path, img)
print(f"保存结果: {output_path}")