需求:
数据集的数据增强中,有时需要用到图像水平翻转的操作,图像水平翻转后,对应的xml标注文件也需要做坐标的调整。
解决方法:
使用python+opencv+import xml.etree.ElementTree对图像水平翻转和xml标注文件坐标调整。代码如下:
python
import cv2
import os
import glob
import xml.etree.ElementTree as et
def flip_images(source_dir):
images_list = glob.glob(os.path.join(source_dir, "*.jpg"))
index = 0
for image_path in images_list:
image = cv2.imread(image_path)
flip_image = cv2.flip(image,1)
cv2.imwrite(image_path.replace(".jpg", "_flip.jpg"), flip_image)
tree_ = et.ElementTree()
tree_.parse(image_path.replace(".jpg", ".xml"))
root = et.Element("annotation")
folder = et.SubElement(root, "folder")
folder.text = "images"
filename = et.SubElement(root, "filename")
filename.text = tree_.find(".//filename").text.replace(".jpg", "_flip.jpg")
path = et.SubElement(root, "path")
path.text = "/home/mapgoo/test"
source = et.SubElement(root, "source")
database = et.SubElement(source, "database")
database.text = "Unknown"
size = et.SubElement(root, "size")
width = et.SubElement(size, "width")
width.text = tree_.find(".//width").text
height = et.SubElement(size, "height")
height.text = tree_.find(".//height").text
depth = et.SubElement(size, "depth")
depth.text = "3"
segmented = et.SubElement(root, "segmented")
segmented.text = "0"
for bndbox in tree_.findall(".//object"):
xmin = bndbox.find(".//xmin")
ymin = bndbox.find(".//ymin")
xmax = bndbox.find(".//xmax")
ymax = bndbox.find(".//ymax")
xmin_text = xmin.text
ymin_text = ymin.text
xmax_text = xmax.text
ymax_text = ymax.text
object_ = et.SubElement(root, "object")
name = et.SubElement(object_, "name")
name.text = bndbox.find("name").text
pose = et.SubElement(object_, "pose")
pose.text = "Unspecified"
truncated = et.SubElement(object_, "truncated")
truncated.text = "0"
difficult = et.SubElement(object_, "difficult")
difficult.text = "0"
bndbox = et.SubElement(object_, "bndbox")
xmin = et.SubElement(bndbox, "xmin")
xmin.text = str(image.shape[1] - int(xmax_text))
ymin = et.SubElement(bndbox, "ymin")
ymin.text = ymin_text
xmax = et.SubElement(bndbox, "xmax")
xmax.text = str(image.shape[1] - int(xmin_text))
ymax = et.SubElement(bndbox, "ymax")
ymax.text = ymax_text
tree = et.ElementTree(root)
tree.write(image_path.replace(".jpg", "_flip.xml"), encoding="utf-8")
print(image_path, index)
index += 1
if __name__ == '__main__':
source_dir = "/home/Desktop/test"
flip_images(source_dir)