使用Python将xml标注文件转换为coco json格式

文章目录

前言
一、读取xml文件
二、获取文件路径模块
[三、XML转COCO JSON模块](#三、XML转COCO JSON模块)
四、主程序
总结
- - 附：完整代码

前言

在计算机视觉领域，特别是目标检测任务中，不同的数据集采用了不同的标注格式。Pascal VOC数据集使用XML文件进行目标检测的标注，而Microsoft COCO数据集则采用JSON格式。为了方便模型训练，我们经常需要将XML格式的数据转换成JSON格式。本文将详细介绍如何用Python实现这一转换，并将代码分为几个模块进行讲解。

一、读取xml文件

XML文件读取模块，该模块负责解析XML文件，并从中提取有用的信息，如类别标签、边界框坐标以及图像尺寸等。

python 复制代码

def read_xml(xml_root):
    '''
    :param xml_root: .xml文件
    :return: dict('cat':['cat1',...],'bboxes':[[x1,y1,x2,y2],...],'whd':[w ,h,d])
    '''
    dict_info = {'cat': [], 'bboxes': [], 'box_wh': [], 'img_whd': []}
    if os.path.splitext(xml_root)[-1] == '.xml':
        tree = ET.parse(xml_root)  # ET是一个xml文件解析库，ET.parse（）打开xml文件。parse--"解析"
        root = tree.getroot()  # 获取根节点
        whd = root.find('size')
        whd = [whd.find('width').text, whd.find('height').text, whd.find('depth').text]
        dict_info['img_whd'] = whd
        for obj in root.findall('object'):  # 找到根节点下所有"object"节点
            cat = str(obj.find('name').text)  # 找到object节点下name子节点的值（字符串）
            bbox = obj.find('bndbox')
            x1, y1, x2, y2 = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
            b_w = x2 - x1 + 1
            b_h = y2 - y1 + 1

            dict_info['cat'].append(cat)
            dict_info['bboxes'].append([x1, y1, x2, y2])
            dict_info['box_wh'].append([b_w, b_h])
            
    else:
        pass
        # print('[inexistence]:{} suffix is not xml '.format(xml_root))
    return dict_info

二、获取文件路径模块

该模块遍历指定路径下的所有文件，筛选出符合特定后缀的文件路径和名称。

代码如下（示例）：

c 复制代码

def get_path_name(file_path,format='.jpg'):

    obj_path_lst = [os.path.join(root, file) for root, _, files in os.walk(file_path) for file in files if file.endswith(format)]

    obj_name_lst = [os.path.basename(p) for p in obj_path_lst]
    
    return obj_path_lst, obj_name_lst

三、XML转COCO JSON模块

该模块负责将XML文件转换为COCO JSON格式。

代码如下（示例）：

c 复制代码

def xml2cocojson(xml_root,  out_dir=None, assign_label=None,json_name=None,img_root=None):

    '''
    :param xml_root: xml文件所在路径，可以总路径
    :param out_dir:json文件保存地址
    :param assign_label: 提供训练列表，如['pedes', 'bus']，若为None则从xml中搜寻并自动给出
    :param json_name:保存json文件的名字
    :param img_root: 和xml_root格式一样，提供图片路径，用于获取高与宽
    :return:返回coco json 格式
    '''
    xml_root_lst, xml_names_lst = get_path_name(xml_root, format='.xml')
   
    json_name = json_name if json_name is not None else 'coco_data_format.json'
    out_dir = out_dir if out_dir else 'out_dir'
    os.makedirs(out_dir,exist_ok=True)
    out_dir_json = os.path.join(out_dir, json_name)

    # 若提供img_root获得路径与名称
    img_root_lst, img_name_lst = get_path_name(xml_root, format='.jpg') if img_root else None, None

    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}

    image_id = 10000000
    anation_id = 10000000
    label_lst = assign_label if  assign_label else []

    info={'vaild_img':0,'invaild_img':0}
    for i, xml_path in tqdm(enumerate(xml_root_lst)):
        xml_info = read_xml(xml_path)
        cat_lst = xml_info['cat']  # 类别是数字，从0 1 2 等
        img_w,img_h = int(xml_info['img_whd'][0]),int(xml_info['img_whd'][1])
        img_name = xml_names_lst[i][:-3] + 'jpg'

        if img_name_lst :  # 从图像中获取图像尺寸，高与宽
            import cv2
            j = list(img_name_lst).index(img_name)
            img_name=img_name_lst[j]
            img = cv2.imread(img_root_lst[int(j)])
            img_w,img_h = img.shape[:2]
        if len(cat_lst) < 1 : continue

        image_id+=1
        image = {'file_name': img_name, 'height': img_h, 'width': img_w, 'id': image_id}

        boxes_lst = xml_info['bboxes']
        for j, cat in tqdm(enumerate(cat_lst)):

            if not assign_label: # 未指定，添加类
                if cat not in label_lst:
                    label_lst.append(cat)

            b=boxes_lst[j]
            obj_width, obj_height = b[2] - b[0], b[3] - b[1]
            xmin,ymin=b[0],b[1]

            category_id = int(label_lst.index(cat)  + 1)  # 我使用类别数字从1开始，满足coco格式，当然也可以从0开始

            if image not in json_dict['images']:
                json_dict['images'].append(image)  # 将图像信息添加到json中

            anation_id = anation_id + 1

            ann = {'area': obj_width * obj_height, 'iscrowd': 0, 'image_id': image_id,
                   'bbox': [xmin, ymin, obj_width, obj_height],
                   'category_id': category_id, 'id': anation_id, 'ignore': 0,
                   'segmentation': []}
            json_dict['annotations'].append(ann)

    for cid, cate in enumerate(label_lst): # 我这里使用1开始的，当然也可以使用0开始
        cat = {'supercategory': 'FWW', 'id': cid + 1, 'name': cate}
        json_dict['categories'].append(cat)


    with open(out_dir_json, 'w') as f:
        json.dump(json_dict, f, indent=4)  # indent表示间隔长度

    print('saving json path:{}\n info:{}\ncategory list: {}'.format(out_dir_json,info,label_lst))

四、主程序

这是主函数部分，它调用上面定义的函数来执行XML到COCO JSON的转换。

python 复制代码

from tqdm import tqdm
import os
import json
import xml.etree.ElementTree as ET


if __name__ == '__main__':
    root = '/extend/Data'
    cat_lst = None
    xml2cocojson(root,   assign_label=cat_lst)

总结

通过上述步骤，我们可以将VOC XML格式的标注文件转换为COCO JSON格式，从而方便后续的模型训练。这个工具可以根据需要进一步扩展功能，例如支持多线程处理或更复杂的图像数据集。

附：完整代码

python 复制代码

from tqdm import tqdm
import os
import json
import xml.etree.ElementTree as ET

def read_xml(xml_root):
    '''
    :param xml_root: .xml文件
    :return: dict('cat':['cat1',...],'bboxes':[[x1,y1,x2,y2],...],'whd':[w ,h,d])
    '''
    dict_info = {'cat': [], 'bboxes': [], 'box_wh': [], 'img_whd': []}
    if os.path.splitext(xml_root)[-1] == '.xml':
        tree = ET.parse(xml_root)  # ET是一个xml文件解析库，ET.parse（）打开xml文件。parse--"解析"
        root = tree.getroot()  # 获取根节点
        whd = root.find('size')
        whd = [whd.find('width').text, whd.find('height').text, whd.find('depth').text]
        dict_info['img_whd'] = whd
        for obj in root.findall('object'):  # 找到根节点下所有"object"节点
            cat = str(obj.find('name').text)  # 找到object节点下name子节点的值（字符串）
            bbox = obj.find('bndbox')
            x1, y1, x2, y2 = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
            b_w = x2 - x1 + 1
            b_h = y2 - y1 + 1

            dict_info['cat'].append(cat)
            dict_info['bboxes'].append([x1, y1, x2, y2])
            dict_info['box_wh'].append([b_w, b_h])
            
    else:
        pass
        # print('[inexistence]:{} suffix is not xml '.format(xml_root))
    return dict_info

def get_path_name(file_path,format='.jpg'):

    obj_path_lst = [os.path.join(root, file) for root, _, files in os.walk(file_path) for file in files if file.endswith(format)]

    obj_name_lst = [os.path.basename(p) for p in obj_path_lst]
    
    return obj_path_lst, obj_name_lst

def xml2cocojson(xml_root,  out_dir=None, assign_label=None,json_name=None,img_root=None):

    '''
    :param xml_root: xml文件所在路径，可以总路径
    :param out_dir:json文件保存地址
    :param assign_label: 提供训练列表，如['pedes', 'bus']，若为None则从xml中搜寻并自动给出
    :param json_name:保存json文件的名字
    :param img_root: 和xml_root格式一样，提供图片路径，用于获取高与宽
    :return:返回coco json 格式
    '''
    xml_root_lst, xml_names_lst = get_path_name(xml_root, format='.xml')
   
    json_name = json_name if json_name is not None else 'coco_data_format.json'
    out_dir = out_dir if out_dir else 'out_dir'
    os.makedirs(out_dir,exist_ok=True)
    out_dir_json = os.path.join(out_dir, json_name)

    # 若提供img_root获得路径与名称
    img_root_lst, img_name_lst = get_path_name(xml_root, format='.jpg') if img_root else None, None

    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}

    image_id = 10000000
    anation_id = 10000000
    label_lst = assign_label if  assign_label else []

    info={'vaild_img':0,'invaild_img':0}
    for i, xml_path in tqdm(enumerate(xml_root_lst)):
        xml_info = read_xml(xml_path)
        cat_lst = xml_info['cat']  # 类别是数字，从0 1 2 等
        img_w,img_h = int(xml_info['img_whd'][0]),int(xml_info['img_whd'][1])
        img_name = xml_names_lst[i][:-3] + 'jpg'

        if img_name_lst :  # 从图像中获取图像尺寸，高与宽
            import cv2
            j = list(img_name_lst).index(img_name)
            img_name=img_name_lst[j]
            img = cv2.imread(img_root_lst[int(j)])
            img_w,img_h = img.shape[:2]
        if len(cat_lst) < 1 : continue

        image_id+=1
        image = {'file_name': img_name, 'height': img_h, 'width': img_w, 'id': image_id}

        boxes_lst = xml_info['bboxes']
        for j, cat in tqdm(enumerate(cat_lst)):

            if not assign_label: # 未指定，添加类
                if cat not in label_lst:
                    label_lst.append(cat)

            b=boxes_lst[j]
            obj_width, obj_height = b[2] - b[0], b[3] - b[1]
            xmin,ymin=b[0],b[1]

            category_id = int(label_lst.index(cat)  + 1)  # 我使用类别数字从1开始，满足coco格式，当然也可以从0开始

            if image not in json_dict['images']:
                json_dict['images'].append(image)  # 将图像信息添加到json中

            anation_id = anation_id + 1

            ann = {'area': obj_width * obj_height, 'iscrowd': 0, 'image_id': image_id,
                   'bbox': [xmin, ymin, obj_width, obj_height],
                   'category_id': category_id, 'id': anation_id, 'ignore': 0,
                   'segmentation': []}
            json_dict['annotations'].append(ann)

    for cid, cate in enumerate(label_lst): # 我这里使用1开始的，当然也可以使用0开始
        cat = {'supercategory': 'FWW', 'id': cid + 1, 'name': cate}
        json_dict['categories'].append(cat)


    with open(out_dir_json, 'w') as f:
        json.dump(json_dict, f, indent=4)  # indent表示间隔长度

    print('saving json path:{}\n info:{}\ncategory list: {}'.format(out_dir_json,info,label_lst))


if __name__ == '__main__':
    root = '/extend/Data'
    cat_lst = None
    xml2cocojson(root,   assign_label=cat_lst)