1、准备工作
目标检测数据的标注分为两种格式:
- xml 解释性标签,左上角+右下角的坐标
- txt 记事本文件,类别+x,y中心坐标+w,h的相对值
需要准备的数据有:
其中images为图像数据,labels为txt文本信息
注意:这里仅仅支持图像格式为 jpg 格式!!!
需要转换数据格式,可以参考这章:PYTHON 自动化办公:更改图片后缀_改变文件夹里面图片后缀名的pytorch代码-CSDN博客
classes.txt 文件需要自行准备,没有的话,只能遍历出来看看目标框的是什么:
关于目标检测任务中,YOLO(txt格式)标注文件的可视化_yolo标注可视化-CSDN博客
2、代码
如下:
python
import os
from PIL import Image
import shutil
from tqdm import tqdm
def main(IMG,LAB,CLS):
images = [i.split('.jpg')[0] for i in os.listdir(IMG)] # 数据
if os.path.exists('VOCdevkit'):
shutil.rmtree('VOCdevkit')
os.makedirs('VOCdevkit')
os.makedirs('VOCdevkit/VOC2007')
os.makedirs('VOCdevkit/VOC2007/JPEGImages')
os.makedirs('VOCdevkit/VOC2007/Annotations')
for img in tqdm(images):
with open(os.path.join('VOCdevkit/VOC2007/Annotations',img+'.xml'), 'w') as xml_files:
shutil.copy(os.path.join(IMG, img + '.jpg'),'VOCdevkit/VOC2007/JPEGImages')
image = Image.open(os.path.join(IMG, img + '.jpg'))
img_w, img_h = image.size
xml_files.write('<annotation>\n')
xml_files.write(' <folder>folder</folder>\n')
xml_files.write(f' <filename>{img}.jpg</filename>\n')
xml_files.write(' <source>\n')
xml_files.write(' <database>Unknown</database>\n')
xml_files.write(' </source>\n')
xml_files.write(' <size>\n')
xml_files.write(f' <width>{img_w}</width>\n')
xml_files.write(f' <height>{img_h}</height>\n')
xml_files.write(f' <depth>3</depth>\n')
xml_files.write(' </size>\n')
xml_files.write(' <segmented>0</segmented>\n')
with open(os.path.join(LAB,img + '.txt'), 'r') as f:
# 以列表形式返回每一行
lines = f.read().splitlines()
for each_line in lines:
line = each_line.split(' ')
xml_files.write(' <object>\n')
xml_files.write(f' <name>{CLS[int(line[0])]}</name>\n')
xml_files.write(' <pose>Unspecified</pose>\n')
xml_files.write(' <truncated>0</truncated>\n')
xml_files.write(' <difficult>0</difficult>\n')
xml_files.write(' <bndbox>\n')
center_x = round(float(line[1]) * img_w)
center_y = round(float(line[2]) * img_h)
bbox_w = round(float(line[3]) * img_w)
bbox_h = round(float(line[4]) * img_h)
xmin = str(int(center_x - bbox_w / 2))
ymin = str(int(center_y - bbox_h / 2))
xmax = str(int(center_x + bbox_w / 2))
ymax = str(int(center_y + bbox_h / 2))
xml_files.write(f' <xmin>{xmin}</xmin>\n')
xml_files.write(f' <ymin>{ymin}</ymin>\n')
xml_files.write(f' <xmax>{xmax}</xmax>\n')
xml_files.write(f' <ymax>{ymax}</ymax>\n')
xml_files.write(' </bndbox>\n')
xml_files.write(' </object>\n')
xml_files.write('</annotation>')
if __name__ == '__main__':
# images 需要是 jpg格式
yoloImages = 'yolodata/images'
yoloLabels = 'yolodata/labels'
classTxt = 'classes.txt'
txt = open(classTxt, 'r')
txt = txt.read().splitlines()
print(txt)
main(IMG=yoloImages,LAB=yoloLabels,CLS=txt)
效果如下:
3、其他代码
可视化代码可以参考:
python
from lxml import etree
import cv2
# 读取 xml 文件信息,并返回字典形式
def parse_xml_to_dict(xml):
if len(xml) == 0: # 遍历到底层,直接返回 tag对应的信息
return {xml.tag: xml.text}
result = {}
for child in xml:
child_result = parse_xml_to_dict(child) # 递归遍历标签信息
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result: # 因为object可能有多个,所以需要放入列表里
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
# xml 标注文件的可视化
def xmlShow(img, xml, save=True):
image = cv2.imread(img)
with open(xml, encoding='gb18030', errors='ignore') as fid: # 防止出现非法字符报错
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = parse_xml_to_dict(xml)["annotation"] # 读取 xml文件信息
ob = [] # 存放目标信息
for i in data['object']: # 提取检测框
name = str(i['name']) # 检测的目标类别
bbox = i['bndbox']
xmin = int(bbox['xmin'])
ymin = int(bbox['ymin'])
xmax = int(bbox['xmax'])
ymax = int(bbox['ymax'])
tmp = [name, xmin, ymin, xmax, ymax] # 单个检测框
ob.append(tmp)
# 绘制检测框
for name, x1, y1, x2, y2 in ob:
cv2.rectangle(image, (x1, y1), (x2, y2), color=(255, 0, 0), thickness=2) # 绘制矩形框
cv2.putText(image, name, (x1, y1 - 10), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.5, thickness=1, color=(0, 0, 255))
# 保存图像
if save:
cv2.imwrite('result.png', image)
# 展示图像
cv2.imshow('test', image)
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == "__main__":
import os
import random
img_path = r'.\VOCdevkit\VOC2007\JPEGImages' # 传入图片
image = [os.path.join(img_path, i) for i in os.listdir(img_path)]
r = random.randint(0, len(image) - 1) # 生成随机索引
image_path = image[r]
labels_path = image_path.replace('JPEGImages', 'Annotations') # 自动获取对应的 xml 标注文件
labels_path = labels_path.replace('.jpg', '.xml')
xmlShow(img=image_path, xml=labels_path, save=True)
至于xml生成标签的json字典文件,参考: