目标检测之YOLOV11自定义数据预处理——从原始标注到YOLO-OBB格式转换与验证

前言

从前面的文章《目标检测之YOLOV11的环境搭建》，可知，现在的yolo版本发布，与其说是一个好的算法，不如说是一个平台，不带集成了检测，分类，分割等系列的方法，同时还包括了这个算法的所有的过程，包括训练，验证，预测以及导出，这让我想起之前用yolo obb进行虾苗计数时候的种种，在配置源码的时候的过程是比较复杂的，obb这块当时是没有集成在ultralytics平台的，现在的版本种已经自动存在obb的方法的，因此我想再次用yolo obb对之前的虾苗检测重新做一次。

之前做过的例子请参考：
目标检测之YoloV5+旋转目标

一、背景与问题引入

在使用YOLOv11进行目标检测（尤其是旋转框检测，OBB）时，数据格式的适配是关键步骤。原始的标注格式是：x1 y1 x2 y2 x3 y3 x4 y4 class_name difficulty，而YOLO-OBB要求输入class_index x1 y1 x2 y2 x3 y3 x4 y4的特定格式，其中x1 y1 x2 y2 x3 y3 x4 y4是目标四个角点指定边界框，其坐标在 0 和 1 之间归一化，因此需要做一个简单的转换，当前这个的前提是我已经对原始数据进行了标注啦。

二、核心步骤：原始标注→YOLO-OBB格式转换

1. 原始标注分析

原始标注格式为：x1 y1 x2 y2 x3 y3 x4 y4 class_name difficulty（8个真实坐标+类别名+难度），在我的标签txt文件中是这样的：

bash 复制代码

947.1667987930293 472.63887625737107 519.8770434482032 545.9859254267041 503.1668012069706 448.6389237426289 930.4565565517969 375.2918745732959 shrimp 0

。YOLO-OBB要求格式为：class_index x1_norm y1_norm x2_norm y2_norm x3_norm y3_norm x4_norm y4_norm（类别索引+8个归一化坐标）。

2. 转换代码解析

核心逻辑如下：

python:media/a/data4t/DL/yolov11/convert_to_yolo_obb.py 复制代码

# 关键步骤1：读取图片尺寸（用于归一化）
img = cv2.imread(image_path)
img_width, img_height = img.shape[1], img.shape[0]

# 关键步骤2：坐标归一化（x_real/img_width, y_real/img_height）
normalized_coords = []
for i in range(0, 8, 2):
    x_real = coords[i]
    y_real = coords[i+1]
    x_norm = x_real / img_width
    y_norm = y_real / img_height
    normalized_coords.extend([x_norm, y_norm])

# 关键步骤3：写入YOLO格式（固定class_index=0，因你的数据仅一类）
yolo_line = "0 " + " ".join(map(str, normalized_coords))

具体代码如下：

python 复制代码

import os
import cv2
from pathlib import Path

def convert_label(original_label_dir: str, image_dir: str, output_dir: str) -> None:
    """
    将原始标注转换为YOLO-OBB格式
    
    Args:
        original_label_dir: 原始标注txt文件所在目录
        image_dir: 对应图片所在目录（与txt文件名前缀一致）
        output_dir: YOLO-OBB格式标注输出目录
    """
    # 创建输出目录（若不存在）
    os.makedirs(output_dir, exist_ok=True)

    # 遍历原始标注目录下的所有txt文件
    for txt_path in Path(original_label_dir).glob("*.txt"):
        # 获取图片路径（假设txt与图片同名，仅后缀不同）
        image_name = txt_path.stem + ".jpg"  # 假设图片是jpg格式，可根据实际情况修改
        image_path = os.path.join(image_dir, image_name)

        # 读取图片尺寸
        if not os.path.exists(image_path):
            print(f"警告：图片 {image_path} 不存在，跳过标注文件 {txt_path}")
            continue
        img = cv2.imread(image_path)
        if img is None:
            print(f"警告：无法读取图片 {image_path}，跳过标注文件 {txt_path}")
            continue
        img_width, img_height = img.shape[1], img.shape[0]  # (高度, 宽度, 通道数)

        # 读取原始标注并转换
        yolo_lines = []
        with open(txt_path, 'r') as f:
            for line in f:
                line = line.strip()
                if not line:  # 跳过空行
                    continue
                parts = line.split()
                # 原始格式：x1 y1 x2 y2 x3 y3 x4 y4 class_name difficulty
                if len(parts) != 10:
                    print(f"警告：无效标注行 {line}，格式应为8个坐标+类别+难度")
                    continue
                coords = list(map(float, parts[:8]))  # 前8个是坐标

                # 归一化坐标（x_norm = x_real / img_width，y_norm = y_real / img_height）
                normalized_coords = []
                for i in range(0, 8, 2):
                    x_real = coords[i]
                    y_real = coords[i+1]
                    x_norm = x_real / img_width
                    y_norm = y_real / img_height
                    normalized_coords.extend([x_norm, y_norm])

                # YOLO-OBB格式：class_index（0） x1_norm y1_norm x2_norm y2_norm x3_norm y3_norm x4_norm y4_norm
                yolo_line = "0 " + " ".join(map(str, normalized_coords))
                yolo_lines.append(yolo_line)

        # 写入YOLO格式标注文件
        output_path = os.path.join(output_dir, txt_path.name)
        with open(output_path, 'w') as f:
            f.write("\n".join(yolo_lines))
        print(f"转换完成：{txt_path.name} -> {output_path}")


def main() -> None:
    # -------------------------- 用户需修改的路径 --------------------------
    original_label_dir = "labelTxt"  # 原始标注txt目录
    image_dir = "/images"  # 对应图片目录
    output_dir = "/labels/"  # YOLO格式输出目录
    # -----------------------------------------------------------------------

    convert_label(original_label_dir, image_dir, output_dir)


if __name__ == "__main__":
    main()

输出示例：

bash 复制代码

0 0.5551974201600406 0.36924912207607113 0.3047344920563911 0.4265515042396125 0.2949395083276498 0.3504991591739288 0.5454024364312995 0.29319677701038743

三、验证：可视化转换后的标注

为确保转换正确，使用draw_obb_bbox.py在原图上绘制四边形框。

1. 可视化脚本`draw_obb_bbox.py`解析

核心逻辑：

python:media/a/data4t/DL/yolov11/draw_obb_bbox.py 复制代码

# 关键步骤1：读取归一化坐标并转换为像素坐标
points = []
for i in range(0, 8, 2):
    x_norm, y_norm = coords[i], coords[i+1]
    x_px = int(round(x_norm * w))
    y_px = int(round(y_norm * h))
    points.append((x_px, y_px))

# 关键步骤2：使用OpenCV绘制闭合四边形
pts = np.array(points, dtype=np.int32).reshape((-1, 1, 2))
cv2.polylines(img, [pts], isClosed=True, color=(0, 255, 0), thickness=2)

详细代码：

python 复制代码

import cv2
import numpy as np
import os

def draw_obb_bbox(label_path: str, image_path: str, output_path: str) -> None:
    """
    在图片上绘制YOLO-OBB格式的四边形边界框
    
    Args:
        label_path: 标注文件路径（.txt）
        image_path: 原图路径（.jpg/.png等）
        output_path: 输出结果图路径（.jpg/.png等）
    
    Raises:
        FileNotFoundError: 图片或标注文件不存在
    """
    # 校验文件存在性
    if not os.path.exists(label_path):
        raise FileNotFoundError(f"标注文件不存在: {label_path}")
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"图片文件不存在: {image_path}")

    # 读取图片并获取尺寸
    img = cv2.imread(image_path)
    h, w = img.shape[:2]  # (高度, 宽度, 通道数)

    # 读取并解析标注文件
    with open(label_path, 'r') as f:
        annotations = [line.strip() for line in f.readlines() if line.strip()]  # 过滤空行

    for ann in annotations:
        parts = ann.split()
        if len(parts) != 9:
            raise ValueError(f"无效标注行: {ann}\n格式应为: class_id x1 y1 x2 y2 x3 y3 x4 y4")

        class_id = int(parts[0])
        coords = list(map(float, parts[1:9]))  # 归一化坐标列表

        # 转换为像素坐标（四舍五入取整）
        points = []
        for i in range(0, 8, 2):
            x_norm, y_norm = coords[i], coords[i+1]
            x_px = int(round(x_norm * w))
            y_px = int(round(y_norm * h))
            points.append((x_px, y_px))

        # 绘制闭合四边形（绿色，线宽2）
        pts = np.array(points, dtype=np.int32).reshape((-1, 1, 2))
        cv2.polylines(img, [pts], isClosed=True, color=(0, 255, 0), thickness=2)

    # 保存结果图（自动创建输出目录）
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cv2.imwrite(output_path, img)
    print(f"成功绘制，结果保存至: {output_path}")


def main() -> None:
    """主函数，用于参数配置和流程控制"""
    # -------------------------- 用户需修改的路径 --------------------------
    label_file = "240298772856875_0.txt"
    image_file = "240298772856875_0.jpg"
    output_file = "240298772856875_0_vis.jpg"
    # -----------------------------------------------------------------------

    try:
        draw_obb_bbox(label_file, image_file, output_file)
    except Exception as e:
        print(f"绘制失败: {str(e)}")


if __name__ == "__main__":
    main()

2. 运行验证

修改main()函数中的路径参数后执行：

bash 复制代码

python draw_obb_bbox.py

输出结果保存在datasets/dota8/visualized/目录，可直接查看标注是否与原图目标对齐。

四、总结

通过convert_to_yolo_obb.py和draw_obb_bbox.py，我们完成了从原始四边形标注到YOLO-OBB格式的转换与验证。下一步可将处理后的数据放到指定目录，启动YOLOv11-OBB训练。

目标检测之YOLOV11自定义数据预处理——从原始标注到YOLO-OBB格式转换与验证

前言

一、背景与问题引入

二、核心步骤：原始标注→YOLO-OBB格式转换

1. 原始标注分析

2. 转换代码解析

三、验证：可视化转换后的标注

1. 可视化脚本draw_obb_bbox.py解析

2. 运行验证

四、总结

1. 可视化脚本`draw_obb_bbox.py`解析