划分VOC数据集,以及转换为划分后的COCO数据集格式

1.VOC数据集

LabelImg是一款广泛应用于图像标注的开源工具,主要用于构建目标检测模型所需的数据集。Visual Object Classes(VOC)数据集作为一种常见的目标检测数据集,通过labelimg工具在图像中标注边界框和类别标签,为训练模型提供了必要的注解信息。VOC数据集源于对PASCAL挑战赛的贡献,涵盖多个物体类别,成为目标检测领域的重要基准之一,推动着算法性能的不断提升。

使用labelimg标注或者其他VOC标注工具标注后,会得到两个文件夹,如下:

bash 复制代码
Annotations    ------->>>  存放.xml标注信息文件
JPEGImages     ------->>>  存放图片文件

2.划分VOC数据集

如下代码是按照训练集:验证集 = 8:2来划分的,会找出没有对应.xml的图片文件,且划分的时候支持JPEGImages文件夹下有如下图片格式:

python 复制代码
['.jpg', '.png', '.gif', '.bmp', '.tiff', '.jpeg', '.webp', '.svg', '.psd', '.cr2', '.nef', '.dng']

整体代码为:

python 复制代码
import os
import random

image_extensions = ['.jpg', '.png', '.gif', '.bmp', '.tiff', '.jpeg', '.webp', '.svg', '.psd', '.cr2', '.nef', '.dng']


def split_voc_dataset(dataset_dir, train_ratio, val_ratio):
    if not (0 < train_ratio + val_ratio <= 1):
        print("Invalid ratio values. They should sum up to 1.")
        return

    annotations_dir = os.path.join(dataset_dir, 'Annotations')
    images_dir = os.path.join(dataset_dir, 'JPEGImages')
    output_dir = os.path.join(dataset_dir, 'ImageSets/Main')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dict_info = dict()
    # List all the image files in the JPEGImages directory
    for file in os.listdir(images_dir):
        if any(ext in file for ext in image_extensions):
            jpg_files, endwith = os.path.splitext(file)
            dict_info[jpg_files] = endwith

    # List all the XML files in the Annotations directory
    xml_files = [file for file in os.listdir(annotations_dir) if file.endswith('.xml')]
    random.shuffle(xml_files)

    num_samples = len(xml_files)
    num_train = int(num_samples * train_ratio)
    num_val = int(num_samples * val_ratio)

    train_xml_files = xml_files[:num_train]
    val_xml_files = xml_files[num_train:num_train + num_val]

    with open(os.path.join(output_dir, 'train_list.txt'), 'w') as train_file:
        for xml_file in train_xml_files:
            image_name = os.path.splitext(xml_file)[0]
            if image_name in dict_info:
                image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
                annotation_path = os.path.join('Annotations', xml_file)
                train_file.write(f'{image_path} {annotation_path}\n')
            else:
                print(f"没有找到图片 {os.path.join(images_dir, image_name)}")

    with open(os.path.join(output_dir, 'val_list.txt'), 'w') as val_file:
        for xml_file in val_xml_files:
            image_name = os.path.splitext(xml_file)[0]
            if image_name in dict_info:
                image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
                annotation_path = os.path.join('Annotations', xml_file)
                val_file.write(f'{image_path} {annotation_path}\n')
            else:
                print(f"没有找到图片 {os.path.join(images_dir, image_name)}")

    labels = set()
    for xml_file in xml_files:
        annotation_path = os.path.join(annotations_dir, xml_file)
        with open(annotation_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                if '<name>' in line:
                    label = line.strip().replace('<name>', '').replace('</name>', '')
                    labels.add(label)

    with open(os.path.join(output_dir, 'labels.txt'), 'w') as labels_file:
        for label in labels:
            labels_file.write(f'{label}\n')


if __name__ == "__main__":
    dataset_dir = 'BirdNest/'
    train_ratio = 0.8  # Adjust the train-validation split ratio as needed
    val_ratio = 0.2
    split_voc_dataset(dataset_dir, train_ratio, val_ratio)

划分好后的截图:

3.VOC转COCO格式

目前很多框架大多支持的是COCO格式,因为存放与使用起来方便,采用了json文件来代替xml文件。

python 复制代码
import json
import os
from xml.etree import ElementTree as ET


def parse_xml(dataset_dir, xml_file):
    xml_path = os.path.join(dataset_dir, xml_file)
    tree = ET.parse(xml_path)
    root = tree.getroot()

    objects = root.findall('object')
    annotations = []

    for obj in objects:
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)

        # Extract label from XML annotation
        label = obj.find('name').text
        if not label:
            print(f"Label not found in XML annotation. Skipping annotation.")
            continue

        annotations.append({
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmax,
            'ymax': ymax,
            'label': label
        })

    return annotations


def convert_to_coco_format(image_list_file, annotations_dir, output_json_file, dataset_dir):
    images = []
    annotations = []
    categories = []

    # Load labels
    with open(os.path.join(os.path.dirname(image_list_file), 'labels.txt'), 'r') as labels_file:
        label_lines = labels_file.readlines()
        categories = [{'id': i + 1, 'name': label.strip()} for i, label in enumerate(label_lines)]

    # Load image list file
    with open(image_list_file, 'r') as image_list:
        image_lines = image_list.readlines()
        for i, line in enumerate(image_lines):
            image_path, annotation_path = line.strip().split(' ')
            image_id = i + 1
            image_filename = os.path.basename(image_path)

            images.append({
                'id': image_id,
                'file_name': image_filename,
                'height': 0,  # You need to fill in the actual height of the image
                'width': 0,  # You need to fill in the actual width of the image
                'license': None,
                'flickr_url': None,
                'coco_url': None,
                'date_captured': None
            })

            # Load annotations from XML files
            xml_annotations = parse_xml(dataset_dir, annotation_path)
            for xml_annotation in xml_annotations:
                label = xml_annotation['label']
                category_id = next((cat['id'] for cat in categories if cat['name'] == label), None)
                if category_id is None:
                    print(f"Label '{label}' not found in categories. Skipping annotation.")
                    continue

                bbox = {
                    'xmin': xml_annotation['xmin'],
                    'ymin': xml_annotation['ymin'],
                    'xmax': xml_annotation['xmax'],
                    'ymax': xml_annotation['ymax']
                }

                annotations.append({
                    'id': len(annotations) + 1,
                    'image_id': image_id,
                    'category_id': category_id,
                    'bbox': [bbox['xmin'], bbox['ymin'], bbox['xmax'] - bbox['xmin'], bbox['ymax'] - bbox['ymin']],
                    'area': (bbox['xmax'] - bbox['xmin']) * (bbox['ymax'] - bbox['ymin']),
                    'segmentation': [],
                    'iscrowd': 0
                })

    coco_data = {
        'images': images,
        'annotations': annotations,
        'categories': categories
    }

    with open(output_json_file, 'w') as json_file:
        json.dump(coco_data, json_file, indent=4)


if __name__ == "__main__":
    # 根据需要调整路径
    dataset_dir = 'BirdNest/'
    image_sets_dir = 'BirdNest/ImageSets/Main/'
    train_list_file = os.path.join(image_sets_dir, 'train_list.txt')
    val_list_file = os.path.join(image_sets_dir, 'val_list.txt')
    output_train_json_file = os.path.join(dataset_dir, 'train_coco.json')
    output_val_json_file = os.path.join(dataset_dir, 'val_coco.json')

    convert_to_coco_format(train_list_file, image_sets_dir, output_train_json_file, dataset_dir)
    convert_to_coco_format(val_list_file, image_sets_dir, output_val_json_file, dataset_dir)
    print("The json file has been successfully generated!!!")

转COCO格式成功截图:

相关推荐
TY-202512 小时前
【CV 目标检测】Fast RCNN模型①——与R-CNN区别
人工智能·目标检测·目标跟踪·cnn
mozun20202 天前
《量子雷达》第4章 量子雷达的检测与估计 预习2025.8.14
目标检测·量子计算·量子雷达·光子·量子技术·检测估计
楚韵天工2 天前
基于多分类的工业异常声检测及应用
人工智能·深度学习·神经网络·目标检测·机器学习·分类·数据挖掘
老艾的AI世界2 天前
AI去、穿、换装软件下载,无内容限制,偷偷收藏
图像处理·人工智能·深度学习·神经网络·目标检测·机器学习·ai·换装·虚拟试衣·ai换装·一键换装
思通数据2 天前
AI视频监控:重构安防行业智能化新生态
人工智能·安全·目标检测·机器学习·计算机视觉·重构·数据挖掘
钓了猫的鱼儿3 天前
无人机航拍数据集|第14期 无人机水体污染目标检测YOLO数据集3000张yolov11/yolov8/yolov5可训练
yolo·目标检测·猫脸码客·yolo数据集·无人机航拍数据集·无人机水体污染目标检测
Debroon3 天前
CV 医学影像分类、分割、目标检测,之【血细胞分类】项目拆解
目标检测·分类·数据挖掘
知来者逆3 天前
VLMs开发——基于Qwen2.5-VL 实现视觉语言模型在目标检测中的层级结构与实现方法
目标检测·计算机视觉·目标跟踪·语言模型·多模态·vlms·qwen2.5-vl
Debroon4 天前
CV 医学影像分类、分割、目标检测,之【肝脏分割】项目拆解
目标检测·分类·数据挖掘
Debroon4 天前
CV 医学影像分类、分割、目标检测,之【腹腔多器官语义分割】项目拆解
目标检测·分类·数据挖掘