整体分为2个部分,也就是2个模型,车辆检测、车型检测、车牌检测这3个功能是一个基于yolov5的模型实现,车牌识别是基于PaddleOCR中的PP-OCRv3的模型实现。
车辆检测数据集制作:
车辆检测、车型检测、车牌检测的数据集主要从coco数据集中选取出1.5w的数据,然后从中筛选出500张小汽车图片。从互联网爬取了500张卡车数据集。从ccpd中挑选了500张车牌数据集。标注采用labelimg标注。
车辆检测模型训练:
python3 train.py --img 640 --epochs 100 --data ./data/car_truck_plate.yaml --weights yolov5s.pt
车辆检测测试模型:
python3 detect.py --weights ./runs/train/exp11/weights/best.pt --source ./datasets/car_truck_plate/images/train/000000030198.jpg
车辆检测模型转化为onnx:
python export.py --weights runs/train/exp11/weights/best.pt --simplify --device 0 --half
通过上面代码,实现将模型从pt转化为onnx,并且转化为fp16的半精度。
车辆检测模型转化为om:
atc --input_shape="images:1,3,640,640" --out_nodes="/model.24/Transpose:0;/model.24/Transpose_1:0;/model.24/Transpose_2:0" --output_type=FP32 --input_format=NCHW --output="./yolov5_add_bs1_fp16" --soc_version=Ascend310P3 --framework=5 --model="./best.onnx" --insert_op_conf=./insert_op.cfg
车辆检测中一个问题:
今天测试的时候,发现华为的demo中utils.py里面preproc函数的实现还是有问题,自己进行了相应的修改。
def preproc(img, img_size, swap=(2, 0, 1)):
"""Resize the input image."""
if len(img.shape) == 3:
padding_image = np.ones((img_size[0], img_size[1], 3), dtype=np.uint8) * GRAY
else:
padding_image = np.ones(img_size, dtype=np.uint8) * GRAY
ratio = min(img_size[0] / img.shape[0], img_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
interpolation=cv2.INTER_AREA,
).astype(np.uint8)
top = int((img_size[0] - resized_img.shape[0])/2.0)
left = int((img_size[1] - resized_img.shape[1])/2.0)
padding_image[top:top + resized_img.shape[0], left:left+resized_img.shape[1]] = resized_img
return padding_image, ratio
车辆检测推理代码:
import os
import json
import cv2
import time
import numpy as np
from StreamManagerApi import StreamManagerApi, MxDataInput
import sys
from os.path import abspath, dirname
current_path = dirname(abspath(__file__))
sys.path.insert(0, current_path)
from plots import box_label, colors
from utils import scale_coords, xyxy2xywh, is_legal, preproc
names = ["car", "truck", "plate"]
class YOLOV5(object):
def __init__(self, pipieline_path="./YOLOV5/pipeline/car_plate.pipeline"):
# init stream manager
self.streamManagerApi = StreamManagerApi()
ret = self.streamManagerApi.InitManager()
if ret != 0:
print("Failed to init Stream manager, ret=%s" % str(ret))
exit()
# create streams by pipeline config file
with open(pipieline_path, 'rb') as f:
pipelineStr = f.read()
ret = self.streamManagerApi.CreateMultipleStreams(pipelineStr)
if ret != 0:
print("Failed to create Stream, ret=%s" % str(ret))
exit()
def process(self, image):
# Construct the input of the stream
dataInput = MxDataInput()
h0, w0 = image.shape[:2]
r = 640 / max(h0, w0) # ratio
input_shape = (640, 640)
pre_img = preproc(image, input_shape)[0]
pre_img = np.ascontiguousarray(pre_img)
image_bytes = cv2.imencode('.jpg', pre_img)[1].tobytes()
dataInput.data = image_bytes
# Inputs data to a specified stream based on streamName.
STREAMNAME = b'classification+detection'
INPLUGINID = 0
uniqueId = self.streamManagerApi.SendDataWithUniqueId(STREAMNAME, INPLUGINID, dataInput)
if uniqueId < 0:
print("Failed to send data to stream.")
exit()
# Obtain the inference result by specifying streamName and uniqueId.
inferResult = self.streamManagerApi.GetResultWithUniqueId(STREAMNAME, uniqueId, 10000)
if inferResult.errorCode != 0:
print("GetResultWithUniqueId error. errorCode=%d, errorMsg=%s" % (
inferResult.errorCode, inferResult.data.decode()))
exit()
results = json.loads(inferResult.data.decode())
#bboxes = []
#classVecs = []
gn = np.array(image.shape)[[1, 0, 1, 0]]
for num, info in enumerate(results['MxpiObject']):
xyxy = [int(info['x0']), int(info['y0']), int(info['x1']), int(info['y1'])]
classVec = info["classVec"]
xyxy = scale_coords(pre_img.shape[:2], np.array(xyxy), image.shape[:2])
xywh = (xyxy2xywh(xyxy.reshape(1, 4)) / gn).reshape(-1).tolist() # normalized xywh
results['MxpiObject'][num]["x0"] = int(xyxy[0])
results['MxpiObject'][num]["y0"] = int(xyxy[1])
results['MxpiObject'][num]["x1"] = int(xyxy[2])
results['MxpiObject'][num]["y1"] = int(xyxy[3])
# draw the result and save image
#for (xyxy, classVec) in zip(bboxes, classVecs):
return results
def __del__(self):
# destroy streams
try:
self.streamManagerApi.DestroyAllStreams()
except Exception as e:
print(e)
def draw(self, image, results):
# draw the result and save image
for info in results['MxpiObject']:
xyxy = [int(info['x0']), int(info['y0']), int(info['x1']), int(info['y1'])]
classVec = info["classVec"]
label = f'{classVec[0]["className"]} {classVec[0]["confidence"]:.4f}'
save_img = box_label(image, xyxy, label, color=colors[names.index(classVec[0]["className"])])
return save_img
if __name__ == '__main__':
# read image
#ORI_IMG_PATH = "./test_images/000000030198.jpg"
ORI_IMG_PATH = "./test_images/11.jpg"
image = cv2.imread(ORI_IMG_PATH, 1)
yolov5 = YOLOV5()
for i in range(20):
t1 = time.time()
results = yolov5.process(image)
t2 = time.time()
print("time", t2-t1)
print(results)
save_img = yolov5.draw(image, results)
cv2.imwrite('./result.jpg', save_img)
车辆检测测试效果:
车牌识别训练环境安装:
pip3 install paddlepaddle-gpu==2.5.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
git clone https://github.com/PaddlePaddle/PaddleOCR.git
cd PaddleOCR &&pip3 install -r requirements.txt
pip3 install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117
车牌识别数据集制作:
数据集基于CCPD2019、CCPD2020两个数据集合并制作。CCPD2019为传统蓝色车牌,CCPD2020为新能源绿色车牌。最终实现训练数据311343张,验证数据35974张,测试数据15286张。训练还是比较费时间的,A40单卡2000epoch大概训练了10天,其实200epoch的结果也是可以的。
数据目录结构如下,
其中,制作数据集的代码如下,主要实现车牌图片的扣取、车牌检测、车牌识别标签的制作。
import cv2
import os
import json
from tqdm import tqdm
import numpy as np
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
def make_label(img_dir, save_gt_folder, phase):
crop_img_save_dir = os.path.join(save_gt_folder, phase, 'crop_imgs')
os.makedirs(crop_img_save_dir, exist_ok=True)
f_det = open(os.path.join(save_gt_folder, phase, 'det.txt'), 'w', encoding='utf-8')
f_rec = open(os.path.join(save_gt_folder, phase, 'rec.txt'), 'w', encoding='utf-8')
i = 0
for filename in tqdm(os.listdir(os.path.join(img_dir, phase))):
str_list = filename.split('-')
if len(str_list) < 5:
continue
coord_list = str_list[3].split('_')
txt_list = str_list[4].split('_')
boxes = []
for coord in coord_list:
boxes.append([int(x) for x in coord.split("&")])
boxes = [boxes[2], boxes[3], boxes[0], boxes[1]]
lp_number = provinces[int(txt_list[0])] + alphabets[int(txt_list[1])] + ''.join([ads[int(x)] for x in txt_list[2:]])
# det
det_info = [{'points':boxes, 'transcription':lp_number}]
f_det.write('{}\t{}\n'.format(os.path.join(phase, filename), json.dumps(det_info, ensure_ascii=False)))
# rec
boxes = np.float32(boxes)
img = cv2.imread(os.path.join(img_dir, phase, filename))
# crop_img = img[int(boxes[:,1].min()):int(boxes[:,1].max()),int(boxes[:,0].min()):int(boxes[:,0].max())]
crop_img = get_rotate_crop_image(img, boxes)
crop_img_save_filename = '{}_{}.jpg'.format(i,'_'.join(txt_list))
crop_img_save_path = os.path.join(crop_img_save_dir, crop_img_save_filename)
cv2.imwrite(crop_img_save_path, crop_img)
f_rec.write('{}/crop_imgs/{}\t{}\n'.format(phase, crop_img_save_filename, lp_number))
i+=1
f_det.close()
f_rec.close()
def get_rotate_crop_image(img, points):
'''
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
img_dir = './CCPD2019'
save_gt_folder = './PPOCR'
# phase = 'train' # change to val and test to make val dataset and test dataset
for phase in ['train','val','test']:
make_label(img_dir, save_gt_folder, phase)
车牌识别模型训练:
首先基于PaddleOCR目录下configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml,修改出我们模型的配置文件,并放在数据集目录CCPD20192020下。
下载英文PP-OCRv3的预训练模型
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar
解压模型参数
cd pretrain_models
tar -xf en_PP-OCRv3_rec_train.tar && rm -rf en_PP-OCRv3_rec_train.tar
启动训练,
python3 tools/train.py -c ./CCPD20192020/en_PP-OCRv3_rec.yml -o Global.pretrained_model=./pretrain_models/en_PP-OCRv3_rec_train/best_accuracy
车牌识别训练模型转化推理inference模型:
pdmodel保存训练模型,包括你的网络和优化迭代;pdparams保存网络中更新的参数;pdopt保存所有的输入输出变量
模型转化,
python3 tools/export_model.py -c ./CCPD20192020/en_PP-OCRv3_rec.yml -o Global.pretrained_model=./output/v3_plate/best_model/model Global.save_inference_dir=./inference/rec_crnn/
atlas车牌识别crnn环境安装:
pip3 install paddle2onnx==1.0.5 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip3 install paddlepaddle==2.5.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip3 install protobuf==3.20.3 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip3 install textdistance -i https://pypi.tuna.tsinghua.edu.cn/simple
车牌识别模型paddle转化onnx:
paddle2onnx --model_dir ./ch_ppocr_server_v3.0_rec_infer/ --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ./ch_ppocr_server_v3.0_rec_infer/ch_ppocr_server_v3.0_rec_infer.onnx --opset_version 11 --enable_onnx_checker True
车牌识别模型onnx转化om:
atc --model=ch_ppocr_server_v3.0_rec_infer.onnx --framework=5 --input_format=NCHW --input_shape="x:1,3,48,144" --output=ch_ppocr_server_v3.0_rec_infer_bs1 --soc_version=Ascend310P3 --log=error --insert_op_conf=./aipp.cfg
车牌识别配置文件编写:
#cfg/crnn.cfg,其中69表示字符种类。
CLASS_NUM=69
OBJECT_NUM=18
WITH_ARGMAX=false
#crnn_keys.txt表示车牌的所有字符种类69个,具体包括32个省以及一些特殊字、数字、字母等组成。其中第一个字符为空,表示BLANK。
皖
沪
津
渝
冀
晋
蒙
辽
吉
黑
苏
浙
京
闽
赣
鲁
豫
鄂
湘
粤
桂
琼
川
贵
云
藏
陕
甘
青
宁
新
警
学
A
B
C
D
E
F
G
H
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
0
1
2
3
4
5
6
7
8
9
车牌识别推理代码:
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import MxpiDataType_pb2 as MxpiDataType
from StreamManagerApi import StreamManagerApi, MxDataInput, StringVector
def cv2AddChineseText(img, texts, positions, textColor=(255, 255, 255), textSize=30, fonts="./ChineseOCR/fonts/simsun.ttc"):
if (isinstance(img, np.ndarray)): # 判断是否OpenCV图片类型
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# 创建一个可以在给定图像上绘图的对象
draw = ImageDraw.Draw(img)
# 字体的格式
fontStyle = ImageFont.truetype(fonts, textSize, encoding="utf-8")
for text, position in zip(texts, positions):
# 绘制文本
draw.text(position, text, textColor, font=fontStyle)
# 转换回OpenCV格式
return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
class CRNN(object):
def __init__(self, pipeline_path = "./ChineseOCR/pipeline/chineseocr.pipeline"):
# init stream manager
self.stream_manager_api = StreamManagerApi()
ret = self.stream_manager_api.InitManager()
if ret != 0:
print("Failed to init Stream manager, ret=%s" % str(ret))
exit()
# create streams by pipeline config file
with open(pipeline_path, 'rb') as f:
pipelineStr = f.read()
ret = self.stream_manager_api.CreateMultipleStreams(pipelineStr)
if ret != 0:
print("Failed to create Stream, ret=%s" % str(ret))
exit()
def process_plate(self, image):
# Construct the input of the stream
INPLUGIN_ID = 0
data_input = MxDataInput()
width, height = 144, 48
resized_image = cv2.resize(
image,
(width, height),
interpolation=cv2.INTER_AREA,
).astype(np.uint8)
image_bytes = cv2.imencode('.jpg', resized_image)[1].tobytes()
data_input.data = image_bytes
STREAMNAME = b'chineseocr'
unique_id = self.stream_manager_api.SendData(STREAMNAME, b'appsrc0', data_input)
if unique_id < 0:
print("Failed to send data to stream.")
exit()
key_vec = StringVector()
key_vec.push_back(b'mxpi_textgenerationpostprocessor0')
infer_result = self.stream_manager_api.GetProtobuf(STREAMNAME, INPLUGIN_ID, key_vec)
if infer_result.size() == 0:
print("infer_result is null")
exit()
if infer_result[0].errorCode != 0:
print("GetProtobuf error. errorCode=%d" % (infer_result[0].errorCode))
exit()
result = MxpiDataType.MxpiTextsInfoList()
result.ParseFromString(infer_result[0].messageBuf)
CONTENT_PIC = str(result.textsInfoVec[0].text)[2:-2]
return CONTENT_PIC
def process_box_plate(self, image, boxes):
#boxes:lists
#box[x0, y0, x1, y1]
plates = []
for box in boxes:
x0, y0, x1, y1 = box
plate_image = image[y0:y1,x0:x1, :]
plate = self.process_plate(plate_image)
plates.append(plate)
return plates
def __del__(self):
# destroy streams
try:
self.stream_manager_api.DestroyAllStreams()
except Exception as e:
print(e)
def draw(self, image, boxes, plates):
texts, positions= [], []
for box, plate in zip(boxes, plates):
texts.append(plate)
positions.append([box[0],box[3]])
ploted_image = cv2AddChineseText(image, texts, positions, textColor=(255, 255, 255), textSize=30, fonts="./ChineseOCR/fonts/simsun.ttc")
return ploted_image
if __name__ == '__main__':
crnn = CRNN()
#img_path= "./test_images/cp.jpg"
img_path= "./test_images/10225_0_0_6_30_29_28_26.jpg"
image = cv2.imread(img_path, 1)
result = crnn.process_plate(image)
print(result)
img_path= "./test_images/car.jpg"
boxes = [[222, 523, 394, 571]]
image = cv2.imread(img_path, 1)
plates = crnn.process_box_plate(image, boxes)
print(plates)
ploted_image = crnn.draw(image, boxes, plates)
cv2.imwrite("drawed.jpg", ploted_image)
车牌识别测试:
车辆检测+车牌识别整体代码:
import cv2
from YOLOV5.yolov5 import YOLOV5
from ChineseOCR.CRNN import CRNN
class CAR_PLATE(object):
def __init__(self, yolov5_pipieline_path="./YOLOV5/pipeline/car_plate.pipeline", crnn_pipeline_path = "./ChineseOCR/pipeline/chineseocr.pipeline"):
self.yolov5 = YOLOV5(pipieline_path=yolov5_pipieline_path)
self.crnn = CRNN(pipeline_path = crnn_pipeline_path)
def process(self, image, isdraw=True):
results = self.yolov5.process(image)
if isdraw:
ploted_image = self.yolov5.draw(image, results)
boxes = []
indexes = []
for i, obj in enumerate(results["MxpiObject"]):
if int(obj["classVec"][0]["classId"])==2:
#plate
box = [obj["x0"], obj["y0"], obj["x1"], obj["y1"]]
boxes.append(box)
indexes.append(i)
plates = self.crnn.process_box_plate(image, boxes)
if isdraw:
ploted_image = self.crnn.draw(ploted_image, boxes, plates)
for i, plate in zip(indexes, plates):
results["MxpiObject"][i]["plate"] = plate
if isdraw:
return results, ploted_image
else:
return results
def __del__(self):
pass
if __name__ == '__main__':
car_plate = CAR_PLATE()
img_path= "./test_images/car.jpg"
boxes = [[222, 523, 394, 571]]
image = cv2.imread(img_path, 1)
results, ploted_image = car_plate.process(image)
cv2.imwrite("drawed.jpg", ploted_image)
车辆检测+车牌识别整体的一个问题:
__del__函数调用过程中会出现内存释放的报错,感觉应该是华为对于多个模型并存的内存释放,变量都使用的同一个导致的。这样一个在释放后指向空指针,另一个释放就出现报错。
车辆检测+车牌识别整体测试:
参考链接: