voc格式数据集转换到yolo格式

将VOC格式数据集转换为YOLO格式需要处理XML标注文件和图像文件，并将边界框坐标从绝对坐标转换为归一化坐标。以下是健壮的Python代码实现，支持主要的图像格式（如 .jpg, .png, .jpeg 等），并考虑了各种可能的异常情况。

VOC格式说明

标注文件 ：每个图像对应一个 .xml 文件，包含图像尺寸和边界框的绝对坐标。
图像文件 ：通常与标注文件同名，格式为 .jpg、.png 等。
类别文件 ：VOC数据集通常包含一个 classes.txt 文件，列出所有类别名称。

YOLO格式说明

标注文件 ：每个图像对应一个 .txt 文件，每行格式为 class_id x_center y_center width height，其中坐标和尺寸是归一化的（相对于图像宽度和高度）。
图像文件 ：与标注文件同名，格式为 .jpg、.png 等。
类别文件 ：classes.txt，每行一个类别名称。

转换代码

python 复制代码

import os
import xml.etree.ElementTree as ET
from tqdm import tqdm

def voc_to_yolo(voc_dir, output_dir):
    """
    将VOC格式数据集转换为YOLO格式。
    
    :param voc_dir: VOC格式数据集的根目录，包含Annotations和JPEGImages文件夹。
    :param output_dir: 输出的YOLO格式数据集目录。
    """
    # 路径设置
    annotations_dir = os.path.join(voc_dir, 'Annotations')
    images_dir = os.path.join(voc_dir, 'JPEGImages')
    classes_file = os.path.join(voc_dir, 'classes.txt')

    # 检查路径是否存在
    if not os.path.exists(annotations_dir):
        raise FileNotFoundError("Annotations 文件夹不存在！")
    if not os.path.exists(images_dir):
        raise FileNotFoundError("JPEGImages 文件夹不存在！")
    if not os.path.exists(classes_file):
        raise FileNotFoundError("classes.txt 文件不存在！")

    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    labels_dir = os.path.join(output_dir, 'labels')
    images_output_dir = os.path.join(output_dir, 'images')
    os.makedirs(labels_dir, exist_ok=True)
    os.makedirs(images_output_dir, exist_ok=True)

    # 读取类别
    with open(classes_file, 'r') as f:
        categories = [line.strip() for line in f.readlines()]
    category_to_id = {name: i for i, name in enumerate(categories)}

    # 遍历标注文件
    for xml_file in tqdm(os.listdir(annotations_dir)):
        if not xml_file.endswith('.xml'):
            continue

        # 解析XML文件
        xml_path = os.path.join(annotations_dir, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # 获取图像尺寸
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)

        # 获取图像文件名
        image_name = root.find('filename').text
        image_path = os.path.join(images_dir, image_name)
        if not os.path.exists(image_path):
            print(f"警告：图像文件 {image_name} 不存在，跳过该标注文件。")
            continue

        # 复制图像到输出目录
        image_output_path = os.path.join(images_output_dir, image_name)
        if not os.path.exists(image_output_path):
            os.system(f'cp "{image_path}" "{image_output_path}"')

        # 创建YOLO格式的标注文件
        label_name = os.path.splitext(image_name)[0] + '.txt'
        label_path = os.path.join(labels_dir, label_name)
        with open(label_path, 'w') as f:
            for obj in root.findall('object'):
                class_name = obj.find('name').text
                if class_name not in category_to_id:
                    print(f"警告：类别 {class_name} 不在 classes.txt 中，跳过该对象。")
                    continue

                class_id = category_to_id[class_name]
                bbox = obj.find('bndbox')
                x_min = float(bbox.find('xmin').text)
                y_min = float(bbox.find('ymin').text)
                x_max = float(bbox.find('xmax').text)
                y_max = float(bbox.find('ymax').text)

                # 计算归一化坐标
                x_center = (x_min + x_max) / 2 / width
                y_center = (y_min + y_max) / 2 / height
                bbox_width = (x_max - x_min) / width
                bbox_height = (y_max - y_min) / height

                # 写入YOLO格式的标注
                f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n")

    print(f"转换完成！YOLO格式数据集已保存到: {output_dir}")


# 示例调用
voc_dir = "path/to/voc_dataset"  # VOC数据集根目录
output_dir = "path/to/yolo_dataset"  # 输出的YOLO格式数据集目录
voc_to_yolo(voc_dir, output_dir)

代码说明

路径检查 ：确保 Annotations、JPEGImages 文件夹和 classes.txt 文件存在。
类别映射 ：从 classes.txt 读取类别名称，并映射到YOLO格式的类别ID。
XML解析 ：解析每个 .xml 文件，提取图像尺寸和边界框的绝对坐标。
坐标转换：将绝对坐标转换为归一化坐标。
文件复制：将图像文件复制到输出目录。
标注写入 ：将YOLO格式的标注写入 .txt 文件。

示例目录结构

输入（VOC格式）

复制代码

voc_dataset/
├── Annotations/
│   ├── image1.xml
│   ├── image2.xml
├── JPEGImages/
│   ├── image1.jpg
│   ├── image2.jpg
└── classes.txt

输出（YOLO格式）

复制代码

yolo_dataset/
├── images/
│   ├── image1.jpg
│   ├── image2.jpg
├── labels/
│   ├── image1.txt
│   ├── image2.txt
└── classes.txt

注意事项

图像格式支持 ：代码支持 .jpg、.png、.jpeg 等常见图像格式。
类别一致性 ：确保 classes.txt 中的类别名称与XML文件中的类别名称一致。
异常处理 ：代码会跳过不存在的图像文件或未在 classes.txt 中定义的类别。
图像复制：如果输出目录中已存在同名图像文件，则不会重复复制。

运行代码后，生成的YOLO格式数据集可以直接用于训练YOLO模型。