VOC XML 旋转框转换为 YOLO OBB 格式

VOC XML 旋转框转换为 YOLO OBB 格式:
输入：VOC XML (cx, cy, w, h, angle)
输出：YOLO OBB (class_id x1 y1 x2 y2 x3 y3 x4 y4) - 归一化坐标

示例如下:

输入：VOC XML (cx, cy, w, h, angle)

输出：YOLO OBB (class_id x1 y1 x2 y2 x3 y3 x4 y4) - 归一化坐标

代码如下:

bash 复制代码

"""
VOC XML 旋转框 转换为 YOLO OBB 格式
输入：VOC XML (cx, cy, w, h, angle)
输出：YOLO OBB (class_id x1 y1 x2 y2 x3 y3 x4 y4) - 归一化坐标
"""

import os
import xml.etree.ElementTree as ET
import numpy as np
from pathlib import Path


def rotated_box_to_poly(cx, cy, w, h, angle):
    """
    将旋转框（中心点+宽高+角度）转换为4个顶点坐标
    
    参数:
        cx, cy: 中心点坐标
        w, h: 宽度和高度
        angle: 旋转角度（弧度）
    
    返回:
        4个顶点坐标 [(x1,y1), (x2,y2), (x3,y3), (x4,y4)]
    """
    # 计算半宽和半高
    w_half = w / 2
    h_half = h / 2
    
    # 4个角点相对于中心的偏移（未旋转时）
    # 按顺时针顺序：左上、右上、右下、左下
    corners = np.array([
        [-w_half, -h_half],  # 左上
        [w_half, -h_half],   # 右上
        [w_half, h_half],    # 右下
        [-w_half, h_half]    # 左下
    ])
    
    # 旋转矩阵
    cos_a = np.cos(angle)
    sin_a = np.sin(angle)
    rotation_matrix = np.array([
        [cos_a, -sin_a],
        [sin_a, cos_a]
    ])
    
    # 应用旋转
    rotated_corners = corners @ rotation_matrix.T
    
    # 加上中心点坐标
    rotated_corners[:, 0] += cx
    rotated_corners[:, 1] += cy
    
    return rotated_corners


def parse_xml(xml_path):
    """
    解析 VOC XML 文件
    
    返回:
        image_width, image_height, objects_list
    """
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # 获取图像尺寸
    size = root.find('size')
    img_width = int(size.find('width').text)
    img_height = int(size.find('height').text)
    
    # 解析所有目标
    objects = []
    for obj in root.findall('object'):
        # 获取类别名称
        class_name = obj.find('name').text
        
        # 获取旋转框参数
        robndbox = obj.find('robndbox')
        if robndbox is not None:
            cx = float(robndbox.find('cx').text)
            cy = float(robndbox.find('cy').text)
            w = float(robndbox.find('w').text)
            h = float(robndbox.find('h').text)
            angle = float(robndbox.find('angle').text)
            
            objects.append({
                'class_name': class_name,
                'cx': cx,
                'cy': cy,
                'w': w,
                'h': h,
                'angle': angle
            })
    
    return img_width, img_height, objects


def convert_xml_to_yolo_obb(xml_path, output_path, class_mapping):
    """
    转换单个 XML 文件为 YOLO OBB 格式
    
    参数:
        xml_path: XML 文件路径
        output_path: 输出 txt 文件路径
        class_mapping: 类别名称到ID的映射字典
    """
    try:
        # 解析 XML
        img_width, img_height, objects = parse_xml(xml_path)
        
        # 转换每个目标
        yolo_lines = []
        for obj in objects:
            # 获取类别ID
            class_name = obj['class_name']
            if class_name not in class_mapping:
                print(f"警告: 未知类别 '{class_name}' 在文件 {xml_path}")
                continue
            
            class_id = class_mapping[class_name]
            
            # 转换为4个顶点
            corners = rotated_box_to_poly(
                obj['cx'], obj['cy'], 
                obj['w'], obj['h'], 
                obj['angle']
            )
            
            # 归一化坐标（除以图像宽高）
            corners[:, 0] /= img_width
            corners[:, 1] /= img_height
            
            # 确保坐标在 [0, 1] 范围内
            corners = np.clip(corners, 0, 1)
            
            # 格式化为 YOLO OBB 格式
            # class_id x1 y1 x2 y2 x3 y3 x4 y4
            line = f"{class_id}"
            for corner in corners:
                line += f" {corner[0]:.6f} {corner[1]:.6f}"
            
            yolo_lines.append(line)
        
        # 写入输出文件
        with open(output_path, 'w') as f:
            f.write('\n'.join(yolo_lines))
        
        return True, len(yolo_lines)
    
    except Exception as e:
        print(f"错误: 处理文件 {xml_path} 时出错: {str(e)}")
        return False, 0


def batch_convert(xml_dir, output_dir, class_mapping):
    """
    批量转换 XML 文件
    
    参数:
        xml_dir: XML 文件目录
        output_dir: 输出目录
        class_mapping: 类别映射字典
    """
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    # 获取所有 XML 文件
    xml_files = list(Path(xml_dir).glob('*.xml'))
    
    print(f"找到 {len(xml_files)} 个 XML 文件")
    print(f"开始转换...")
    print("-" * 60)
    
    success_count = 0
    total_objects = 0
    
    for i, xml_path in enumerate(xml_files, 1):
        # 生成输出文件名（保持相同的文件名，但扩展名改为 .txt）
        output_filename = xml_path.stem + '.txt'
        output_path = os.path.join(output_dir, output_filename)
        
        # 转换
        success, obj_count = convert_xml_to_yolo_obb(
            str(xml_path), 
            output_path, 
            class_mapping
        )
        
        if success:
            success_count += 1
            total_objects += obj_count
        
        # 每100个文件显示一次进度
        if i % 100 == 0:
            print(f"进度: {i}/{len(xml_files)} ({i*100//len(xml_files)}%)")
    
    print("-" * 60)
    print(f"转换完成!")
    print(f"成功: {success_count}/{len(xml_files)} 个文件")
    print(f"总目标数: {total_objects}")
    print(f"输出目录: {output_dir}")


if __name__ == "__main__":
    # ===== 配置参数 =====
    
    # 输入 XML 文件目录
    XML_DIR = r"Annotations_VOC"
    
    # 输出 YOLO OBB 标注目录
    OUTPUT_DIR = r"OBB_YOLO"
    
    # 类别映射：类别名称 -> 类别ID
    # 根据您的数据集定义类别
    CLASS_MAPPING = {
        'airport': 0,
        # 如果有其他类别，在这里添加
        # 'ship': 1,
        # 'vehicle': 2,
    }
    
    # ===== 执行转换 =====
    
    print("=" * 60)
    print("VOC XML 转 YOLO OBB 格式转换工具")
    print("=" * 60)
    print(f"输入目录: {XML_DIR}")
    print(f"输出目录: {OUTPUT_DIR}")
    print(f"类别映射: {CLASS_MAPPING}")
    print("=" * 60)
    print()
    
    # 检查输入目录是否存在
    if not os.path.exists(XML_DIR):
        print(f"错误: 输入目录不存在: {XML_DIR}")
        exit(1)
    
    # 执行批量转换
    batch_convert(XML_DIR, OUTPUT_DIR, CLASS_MAPPING)
    
    print()
    print("=" * 60)
    print("转换完成！")
    print()

需要修改的地方如下:

当然可以 👍 我来帮你系统地讲清楚"旋转目标检测（Rotated Object Detection）"这个研究方向，包括它的背景、核心问题、主流方法和应用场景------这部分内容非常适合写进论文或开题报告。

旋转目标检测的一些资料:

一、研究背景

在传统的目标检测（Object Detection）任务中，主流算法（如 YOLO、Faster R-CNN、DETR 等）通常假设目标是水平矩形（Horizontal Bounding Box, HBB） ，即目标的边界框与图像坐标轴平行。然而，在遥感图像、SAR 图像、工业检测、文本检测、港口船舶检测、电力巡检、航空影像 等任务中，目标往往存在任意角度的旋转 。

例如：

遥感图像中的飞机、船舶倾斜角各异；
SAR 图像中的车辆和建筑由于成像机制而存在旋转；
工业场景中零部件或烟雾目标方向不固定。

这就导致水平检测框出现大量背景冗余 和目标重叠 ，难以准确描述目标的方向与位置。

因此，**旋转目标检测（Rotated Object Detection, ROD）**应运而生。

二、核心问题

旋转目标检测的关键是：

如何精确表示 目标的旋转姿态并在回归与匹配阶段稳定学习。

1. 旋转框表示方式

目前常见的旋转框（Rotated Bounding Box, RBox）表示方式有：

五参数表示法 (x, y, w, h, θ) ：
其中 (x, y) 为框中心坐标，w、h 为宽高，θ 为旋转角（通常取 -90°~90° 或 0°~180°）。
四顶点坐标法 (x1, y1, x2, y2, x3, y3, x4, y4) ：
精确但计算复杂，难以直接回归。
基于向量或方向编码的方法（如极坐标表示）用于缓解角度不连续问题。

2. 角度不连续性问题（Angle Periodicity Problem）

由于角度具有周期性（例如 θ=179° 与 θ=-181° 等价），直接回归角度会导致梯度不连续。

主流解决思路：

使用 sin-cos 编码（将角度映射为连续空间）。
将角度离散化为分类问题（如 Oriented R-CNN）。
利用平滑损失函数或对称框设计。

三、主流算法发展脉络

旋转目标检测的发展大致分为三类方法：

（1）基于水平框扩展的旋转检测

代表：R²CNN、RRPN、RRoI Transformer

思路：在原有水平检测框基础上，增加角度预测或旋转 RoI 操作。

优点：对现有检测框架改动较小。

缺点：旋转框回归复杂、角度不稳定。

（2）基于锚框（Anchor-based）的方法

代表：Rotated RetinaNet、Gliding Vertex、CSL (Circular Smooth Label)

思路：为每个锚框增加角度参数，或采用平滑的角度回归机制。

优点：检测精度较高。

缺点：锚框设计复杂，计算量大。

（3）基于中心点或无锚框（Anchor-free）的方法

代表：PolarDet、FCOSR、YOLO-R、Rotated-DETR

思路：直接预测中心点与旋转参数，避免大量锚框匹配。

优点：结构简洁、速度快、易于迁移。

缺点：小目标与密集目标容易重叠。

四、损失函数与匹配策略

旋转检测中常见的损失函数包括：

Smooth L1 或 IoU-based Loss（如 Rotated IoU, SkewIoU, KFIoU, GIoU、SIoU 等）；
Angle Loss（用于稳定角度回归，如 CSL、Gaussian Circular Label）；
Rotated IoU/DIoU/GIoU Loss 计算更加复杂，需考虑四边形交并比。

匹配阶段通常采用 Rotated IoU 计算真实框与预测框的重叠度，以选择正负样本。

五、应用场景

旋转目标检测在多个高价值领域有显著应用：

场景	示例	特点
遥感图像	飞机、船舶、建筑检测	多角度目标密集分布
SAR图像	车辆、舰船识别	背景复杂、噪声多
工业检测	零部件、缺陷方向检测	形状不规则
文本检测	自然场景文本检测	倾斜文本常见
电梯监控	电瓶车、人物检测	角度多变，空间狭窄

六、当前挑战与研究方向

角度不连续与边界模糊问题
旋转框与水平框的匹配困难
复杂场景下的小目标检测与重叠抑制
如何与 Transformer、动态卷积、大核网络结合
轻量化与边缘设备部署（如你提到的基于 LubanCat-4 平台的应用）

七、总结

旋转目标检测通过引入旋转框回归与角度预测机制，解决了目标任意方向分布的定位难题。未来研究趋势是：

Anchor-free + Transformer + IoU-based loss 的融合；
轻量化结构设计（适配边缘计算平台）；
自适应角度建模 与 语义方向感知特征提取。

VOC XML 旋转框 转换为 YOLO OBB 格式