python
import xml.etree.ElementTree as ET
from xml.dom import minidom
# 读取XML文档
tree = ET.parse("./xml_3/z_20240827_001.xml")
root = tree.getroot()
# 获取size元素
size_find_0 = root.find("size")
# 获取width子元素
size_w = size_find_0.find("width")
# 获取元素中的文本
print(size_w.text)
# xml里面有多个 object 元素
# 查找 object 元素的全部
objects = root.findall('object')
for object_find_0 in objects:
# print('Tag:', child.tag)
# print('Text:', child.text)
# print('Attributes:', child.attrib)
# 获取 object 元素的子元素name
class_name = object_find_0.find("name")
print(class_name.text)
# 写入
# 保存
# 创建根元素
root = ET.Element("annotation")
folder_save = ET.SubElement(root, "folder")
folder_save.text = "f"
# 创建子元素
size_save = ET.SubElement(root, "size")
# 创建二级子元素 只需输入参数不同即可
size_w_save = ET.SubElement(size_save, "width")
size_w_save.text = "需要写入的内容"
with open("./name_i.xml", 'w', encoding='utf-8') as xml_file:
# 将 XML 元素树转换为字节串,编码为 utf-8
rough_string = ET.tostring(root, 'utf-8')
# 使用 minidom 模块解析生成的字节串,得到一个可操作的 XML 对象
reparsed = minidom.parseString(rough_string)
# 将重新解析后的 XML 对象转换为格式打印(pretty-print)的字符串形式,
# 其中 indent=" "表示使用两个空格作为缩进
string_ = reparsed.toprettyxml(indent=" ")
xml_file.write(string_)
xml 标注文件的拆分与合并
目的是方便检测单类别标注的正确性
例子:将xml拆分多个xml文件
python
import xml.etree.ElementTree as ET
import os
from xml.dom import minidom
# 全部类别
list_class = ['A1', 'A2', 'A3']
# xml标签路径 (绝对路径)
path_lab = "E:/lab/"
# 拆分保存xml的路径 (绝对路径)
save_xml_path = "E:/lab/ce/"
for xml_name in os.listdir(path_lab):
# xml_name = 'Camera12_20231001_30.xml'
# 1. 读取XML文档
tree = ET.parse(path_lab + xml_name)
root = tree.getroot()
# 存储 字典
dict_class = {}
for i in list_class:
dict_class[i] = []
# 创建单个文件夹
folder_name = save_xml_path + "/" + i + "/"
if not os.path.exists(folder_name):
os.mkdir(folder_name)
# print(f"文件夹 '{folder_name}' 创建成功。")
# else:
# print(f"文件夹 '{folder_name}' 已存在。")
# for i, j in dict_class.items():
# print(i, j)
size_find_0 = root.find("size")
size_w = size_find_0.find("width")
size_h = size_find_0.find("height")
size_d = size_find_0.find("depth")
#
# print(size_w.text)
# print(size_h.text)
# print(size_d.text)
# 分离文件名 与 文件后缀
name_lab, xml_ = os.path.splitext(xml_name)
folder_jpg = root.find("folder")
# print(folder_jpg.text)
path_jpg = root.find("path")
# print(path_jpg.text)
filename_jpg = root.find("filename")
# print(filename_jpg.text)
# 2. 查找 object 全部
objects = root.findall('object')
for object_find_0 in objects:
# print('Tag:', child.tag)
# print('Text:', child.text)
# print('Attributes:', child.attrib)
class_name = object_find_0.find("name")
class_bndbox = object_find_0.find("bndbox")
class_bndbox_xmin = class_bndbox.find("xmin")
class_bndbox_ymin = class_bndbox.find("ymin")
class_bndbox_xmax = class_bndbox.find("xmax")
class_bndbox_ymax = class_bndbox.find("ymax")
# print(class_name.text)
# print(class_bndbox_xmin.text)
# print(class_bndbox_ymin.text)
# print(class_bndbox_xmax.text)
# print(class_bndbox_ymax.text)
dict_class[class_name.text].append(
(class_name.text,
class_bndbox_xmin.text,
class_bndbox_ymin.text,
class_bndbox_xmax.text,
class_bndbox_ymax.text,
)
)
for ob_class, ob_list in dict_class.items():
# 创建根元素
root = ET.Element("annotation")
folder_save = ET.SubElement(root, "folder")
folder_save.text = folder_jpg.text
filename_jpg_save = ET.SubElement(root, "filename")
filename_jpg_save.text = filename_jpg.text
path_save_xml = ET.SubElement(root, "path")
path_save_xml.text = path_jpg.text
# 创建子元素
size_save = ET.SubElement(root, "size")
# 创建二级子元素 只需输入参数不同即可
size_w_save = ET.SubElement(size_save, "width")
size_w_save.text = size_w.text
size_h_save = ET.SubElement(size_save, "height")
size_h_save.text = size_h.text
size_d_save = ET.SubElement(size_save, "depth")
size_d_save.text = size_d.text
for ob_list_i in ob_list:
object_save = ET.SubElement(root, "object")
name_save = ET.SubElement(object_save, "name")
name_save.text = str(ob_list_i[0])
bndbox_save = ET.SubElement(object_save, "bndbox")
xmin_save = ET.SubElement(bndbox_save, "xmin")
xmin_save.text = str(ob_list_i[1])
ymin_save = ET.SubElement(bndbox_save, "ymin")
ymin_save.text = str(ob_list_i[2])
xmax_save = ET.SubElement(bndbox_save, "xmax")
xmax_save.text = str(ob_list_i[3])
ymax_save = ET.SubElement(bndbox_save, "ymax")
ymax_save.text = str(ob_list_i[4])
# 写入文件
if len(dict_class[ob_class]) != 0:
if ob_class in list_class:
path_save_i = save_xml_path + "/" + ob_class + "/" + xml_name
print(path_save_i)
# tree.write(path_save_i, encoding="utf-8", xml_declaration=True)
with open(path_save_i, 'w', encoding='utf-8') as xml_file:
# 将 XML 元素树转换为字节串,编码为 utf-8
rough_string = ET.tostring(root, 'utf-8')
# 使用 minidom 模块解析生成的字节串,得到一个可操作的 XML 对象
reparsed = minidom.parseString(rough_string)
# 将重新解析后的 XML 对象转换为格式打印(pretty-print)的字符串形式,
# 其中 indent=" "表示使用两个空格作为缩进
string_ = reparsed.toprettyxml(indent=" ")
xml_file.write(string_)
将拆分的xml合并为1个
python
import xml.etree.ElementTree as ET
import os
from xml.dom import minidom
# 合并和拆分的类别要一致
list_class = ['A1', 'A2', 'A3'']
# 标注图片路径
path_img = "E:/zyk_lab/ce/img/"
# xml 拆分的总路径
path_lab = "E:/zyk_lab/ce/ce/"
# xml 合并后保存的路径
path_lab_save = "E:/zyk_lab/ce/lab_ce/"
for img_name in os.listdir(path_img):
# img_name = "Camera12_20231001_31"
img_name = os.path.splitext(img_name)[0]
dict_class = {}
for i in list_class:
dict_class[i] = []
dict_class["width"] = 0
dict_class["height"] = 0
dict_class["depth"] = 0
dict_class["folder"] = "null"
dict_class["path"] = "null"
dict_class["filename"] = "null"
for file_1 in os.listdir(path_lab):
path_i = os.path.join(path_lab, file_1)
for xml_name in os.listdir(path_i):
if img_name == os.path.splitext(xml_name)[0]:
# 1. 读取XML文档
xml_path = os.path.join(path_i, xml_name)
tree = ET.parse(xml_path)
root = tree.getroot()
size_find_0 = root.find("size")
size_w = size_find_0.find("width").text
size_h = size_find_0.find("height").text
size_d = size_find_0.find("depth").text
folder_jpg = root.find("folder").text
# print(folder_jpg.text)
path_jpg = root.find("path").text
# print(path_jpg.text)
filename_jpg = root.find("filename").text
# print(filename_jpg.text)
dict_class["width"] = size_w
dict_class["height"] = size_h
dict_class["depth"] = size_d
dict_class["folder"] = folder_jpg
dict_class["path"] = path_jpg
dict_class["filename"] = filename_jpg
# 2. 查找 object 全部
objects = root.findall('object')
for object_find_0 in objects:
# print('Tag:', child.tag)
# print('Text:', child.text)
# print('Attributes:', child.attrib)
class_name = object_find_0.find("name")
class_bndbox = object_find_0.find("bndbox")
class_bndbox_xmin = class_bndbox.find("xmin")
class_bndbox_ymin = class_bndbox.find("ymin")
class_bndbox_xmax = class_bndbox.find("xmax")
class_bndbox_ymax = class_bndbox.find("ymax")
# print(class_name.text)
# print(class_bndbox_xmin.text)
# print(class_bndbox_ymin.text)
# print(class_bndbox_xmax.text)
# print(class_bndbox_ymax.text)
dict_class[class_name.text].append(
(class_name.text,
class_bndbox_xmin.text,
class_bndbox_ymin.text,
class_bndbox_xmax.text,
class_bndbox_ymax.text,
)
)
# 保存
# 创建根元素
root = ET.Element("annotation")
folder_save = ET.SubElement(root, "folder")
if dict_class["folder"] != "null":
folder_save.text = dict_class["folder"]
filename_jpg_save = ET.SubElement(root, "filename")
if dict_class["filename"] != "null":
filename_jpg_save.text = dict_class["filename"]
path_save_xml = ET.SubElement(root, "path")
if dict_class["path"] != "null":
path_save_xml.text = dict_class["path"]
# 创建子元素
size_save = ET.SubElement(root, "size")
# 创建二级子元素 只需输入参数不同即可
size_w_save = ET.SubElement(size_save, "width")
if dict_class["width"] != "null":
size_w_save.text = dict_class["width"]
size_h_save = ET.SubElement(size_save, "height")
if dict_class["height"] != "null":
size_h_save.text = dict_class["height"]
size_d_save = ET.SubElement(size_save, "depth")
if dict_class["depth"] != "null":
size_d_save.text = dict_class["depth"]
for ob_class, ob_list in dict_class.items():
print(ob_class, ob_list)
# 跳过你不需要写入的类别
if ob_class in ["b1", "b2"]:
continue
for ob_list_i in ob_list:
# print(ob_list_i)
object_save = ET.SubElement(root, "object")
name_save = ET.SubElement(object_save, "name")
name_save.text = str(ob_list_i[0])
bndbox_save = ET.SubElement(object_save, "bndbox")
xmin_save = ET.SubElement(bndbox_save, "xmin")
xmin_save.text = str(ob_list_i[1])
ymin_save = ET.SubElement(bndbox_save, "ymin")
ymin_save.text = str(ob_list_i[2])
xmax_save = ET.SubElement(bndbox_save, "xmax")
xmax_save.text = str(ob_list_i[3])
ymax_save = ET.SubElement(bndbox_save, "ymax")
ymax_save.text = str(ob_list_i[4])
# 写入文件
# if len(dict_class[ob_class]) != 0:
# if ob_class in list_class:
path_save_i = path_lab_save + img_name + ".xml"
print(path_save_i)
# tree.write(path_save_i, encoding="utf-8", xml_declaration=True)
with open(path_save_i, 'w', encoding='utf-8') as xml_file:
# 将 XML 元素树转换为字节串,编码为 utf-8
rough_string = ET.tostring(root, 'utf-8')
# 使用 minidom 模块解析生成的字节串,得到一个可操作的 XML 对象
reparsed = minidom.parseString(rough_string)
# 将重新解析后的 XML 对象转换为格式打印(pretty-print)的字符串形式,
# 其中 indent=" "表示使用两个空格作为缩进
string_ = reparsed.toprettyxml(indent=" ")
xml_file.write(string_)