CCPD 车牌数据集提取标注，并转为标准 YOLO 格式

目录

介绍

转换代码

下载地址：

介绍

CCPD 是一个大型的、多样化的、经过仔细标注的中国城市车牌开源数据集。CCPD 数据集主要分为 CCPD2019 数据集和 CCPD2020（CCPD-Green） 数据集。CCPD2019 数据集车牌类型仅有普通车牌（蓝色车牌），CCPD2020 数据集车牌类型仅有新能源车牌（绿色车牌）。

具体介绍可以看看这篇：

[深度学习] CCPD车牌数据集介绍_ccpd数据集-CSDN博客https://blog.csdn.net/LuohenYJ/article/details/117752120

下载地址在文末，只要数据集的直接下滑

下载后是这样的：

可以看到，该数据集包含的场景还是很丰富的，并且数据量也特多，cpdd_base 里甚至有 20w 数据，每个图片名字的文件就是该图片的标注（除了 ccpd_np 文件夹），例如下面这个文件：

01-86_91-298&341_449&414-458&394_308&410_304&357_454&341-0_0_14_28_24_26_29-124-24

我们按照 "-" 分割这个字符串，得到如下结果：

|-------------------------------------|-------------------------------------------|
| 01 | 编号 |
| 86_91 | 车牌水平偏转和垂直偏转角 |
| 298&341_449&414 | 车牌左上角坐标和右下角坐标（目标识别矩形框） |
| 458&394_308&410_304&357_454&341 | 车牌的四个角点，按照顺序分别为右下、左下、左上、右上（yolo-pose姿态识别） |
| 0_0_14_28_24_26_29 | 车牌号码 |
| 124 | 亮度 |
| 24 | 模糊度 |

转换代码

知道字符串各个含义以及连接规则后，则可以通过字符分割字符串，并提取需要的信息，并将

信息批量保存至 txt 文件即可，我这里为大家提供两段代码，一段提取车牌目标识别（仅矩形框），一段提取车牌姿态（矩形框+4个角点）。

车牌目标识别提取：

python 复制代码

import os
import cv2


def parse_bbox_from_filename(filename):
    """
    从文件名中解析出车牌目标矩形框的左上角、右下角坐标。
    """
    parts = filename.split('-')

    # 解析矩形框坐标
    bbox_part = parts[2].split('_')
    x1, y1 = map(int, bbox_part[0].split('&'))  # 左上角
    x2, y2 = map(int, bbox_part[1].split('&'))  # 右下角

    return (x1, y1), (x2, y2)


def get_bbox(x_min, y_min, x_max, y_max, img_w, img_h):
    """
    根据给定的最小最大坐标计算YOLO格式的bbox（中心点坐标、宽度、高度）。
    """
    x_center = round((x_min + x_max) / 2 / img_w, 6)
    y_center = round((y_min + y_max) / 2 / img_h, 6)
    width = round((x_max - x_min) / img_w, 6)
    height = round((y_max - y_min) / img_h, 6)

    return x_center, y_center, width, height


if __name__ == '__main__':
    images_dir = r'D:\tcy_works\data\CCPD2019\ccpd_base'  # 替换为你自己的图像路径
    save_dir = r'D:\tcy_works\data\CCPD_deal\CCPD2019\ccpd_base'  # 替换为你想保存结果的路径

    os.makedirs(os.path.join(save_dir, "labels"), exist_ok=True)

    image_files = [os.path.join(images_dir, f) for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.png'))]

    for image_path in image_files:
        filename = os.path.basename(image_path)

        try:
            (x_min, y_min), (x_max, y_max) = parse_bbox_from_filename(filename)
        except Exception as e:
            print(f"解析文件名失败: {filename}, 错误: {e}")
            continue

        img = cv2.imread(image_path)
        if img is None:
            print(f"读取图像失败: {image_path}")
            continue

        height, width = img.shape[:2]

        # 计算 bounding box
        x_center, y_center, w_bbox, h_bbox = get_bbox(x_min, y_min, x_max, y_max, width, height)

        # 构建 YOLO 格式标注（仅 bbox）
        class_id = 0  # 假设类别ID为0
        label_line = [class_id, x_center, y_center, w_bbox, h_bbox]
        label_line = list(map(str, label_line))

        # 保存 label 文件
        label_file = os.path.join(save_dir, "labels", os.path.splitext(filename)[0] + ".txt")
        with open(label_file, 'w') as f:
            f.write(' '.join(label_line) + '\n')

        print(f"已处理图像: {filename}")

    print("所有图像处理完成！")

车牌姿态提取：

python 复制代码

import os
import cv2


def parse_points_from_filename(filename):
    """
    从文件名中解析出车牌目标矩形框的左上角、右下角坐标以及车牌四个角点坐标。
    """
    parts = filename.split('-')

    # 解析矩形框坐标
    bbox_part = parts[2].split('_')
    x1, y1 = map(int, bbox_part[0].split('&'))  # 左上角
    x2, y2 = map(int, bbox_part[1].split('&'))  # 右下角

    # 解析角点坐标：parts[3] 如 '458&394_308&410_304&357_454&341'
    points = []
    for point in parts[3].split('_'):
        x, y = map(int, point.split('&'))
        points.append((x, y))

    return (x1, y1), (x2, y2), points


def normalize_points(points, img_w, img_h):
    """
    将点坐标归一化到 [0, 1] 范围内，并保留 6 位小数。
    返回列表格式：[x1, y1, vis1, x2, y2, vis2, ..., xn, yn, visn]
    """
    norm_points = []
    for x, y in points:
        x_norm = round(x / img_w, 6)
        y_norm = round(y / img_h, 6)
        visibility = 2.000000  # YOLO-Pose 中表示该关键点始终可见
        norm_points.extend([x_norm, y_norm, visibility])
    return norm_points


def get_bbox(x_min, y_min, x_max, y_max, img_w, img_h):
    """
    根据给定的最小最大坐标计算YOLO格式的bbox（中心点坐标、宽度、高度）。
    """
    x_center = round((x_min + x_max) / 2 / img_w, 6)
    y_center = round((y_min + y_max) / 2 / img_h, 6)
    width = round((x_max - x_min) / img_w, 6)
    height = round((y_max - y_min) / img_h, 6)

    return x_center, y_center, width, height


if __name__ == '__main__':
    images_dir = r'D:\tcy_works\data\CCPD2019\ccpd_base'  # 替换为你自己的图像路径
    save_dir = r'D:\tcy_works\data\CCPD_deal\CCPD2019\ccpd_base'  # 替换为你想保存结果的路径

    os.makedirs(os.path.join(save_dir, "labels"), exist_ok=True)

    image_files = [os.path.join(images_dir, f) for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.png'))]

    for image_path in image_files:
        filename = os.path.basename(image_path)

        try:
            (x_min, y_min), (x_max, y_max), points = parse_points_from_filename(filename)
        except Exception as e:
            print(f"解析文件名失败: {filename}, 错误: {e}")
            continue

        img = cv2.imread(image_path)
        if img is None:
            print(f"读取图像失败: {image_path}")
            continue

        height, width = img.shape[:2]

        # 计算 bounding box
        x_center, y_center, w_bbox, h_bbox = get_bbox(x_min, y_min, x_max, y_max, width, height)

        # 归一化角点坐标 + 添加可见性（每个角点加一个 2.000000）
        norm_points = normalize_points(points, width, height)

        # 构建 YOLO-Pose 格式标注
        class_id = 0  # 假设类别ID为0
        label_line = [class_id, x_center, y_center, w_bbox, h_bbox] + norm_points
        label_line = list(map(lambda x: f"{x:.6f}", label_line))  # 统一保留6位小数

        # 保存 label 文件
        label_file = os.path.join(save_dir, "labels", os.path.splitext(filename)[0] + ".txt")
        with open(label_file, 'w') as f:
            f.write(' '.join(label_line) + '\n')

        print(f"已处理图像: {filename}")

    print("所有图像处理完成！")

下载地址：

百度网盘（密码：ol3j）https://pan.baidu.com/share/init?surl=JSpc9BZXFlPkXxRK4qUCyw

drive.google.comhttps://drive.google.com/file/d/1m8w1kFxnCEiqz_-t2vTcgrgqNIv986PR/view?usp=sharing

感谢您的观看！！！