如何将yolo训练图像数据库的某个分类的图像取出来

比如我只想从数据集中取手机的图像，来用于我的训练，懒得自己一张一张标注，方法如下

复制代码

# -*- coding: utf-8 -*-
import json
import os
import shutil
from pathlib import Path
from tqdm import tqdm
import cv2
from collections import defaultdict

DEBUG = True  # 调试阶段先开着

# cap
kind = "val"
if kind == "val":
    ann_file = r"F:\coco_pictures\annotations_trainval2017\annotations\instances_val2017.json"
    img_dir = r"F:\coco_pictures\cocoval2017\val2017"
    out_dir = r"E:\picture\coco\cellphone_val2017"
    out_dir_result = r"E:\picture\coco\cellphone_val2017_box"
    out_dir_result_txt = r"E:\picture\coco\cellphone_val2017_box_txt"
elif kind == "train":
    # ===== 按需修改 =====
    ann_file = r"F:\coco_pictures\annotations_trainval2017\annotations\instances_train2017.json"
    img_dir = r"F:\coco_pictures\coco_train2017\train2017"
    out_dir = r"E:\picture\coco\cellphone_train2017"             # 原图复制到这里
    out_dir_result = r"E:\picture\coco\cellphone_train2017_box"  # 画框后的结果图
    out_dir_result_txt = r"E:\picture\coco\cellphone_train2017_box_txt"  # <- 修正为 train
    # ===================

Path(out_dir).mkdir(parents=True, exist_ok=True)
Path(out_dir_result).mkdir(parents=True, exist_ok=True)
Path(out_dir_result_txt).mkdir(parents=True, exist_ok=True)

# 1) 读取标注文件
with open(ann_file, "r", encoding="utf-8") as f:
    coco = json.load(f)

# 2) 找到 cell phone 类别 id
cellphone_id = None
for cat in coco["categories"]:
    if cat["name"].lower() == "cell phone":
        cellphone_id = int(cat["id"])
        break
if cellphone_id is None:
    raise RuntimeError("未在 categories 中找到 'cell phone'")

print("cell phone category_id =", cellphone_id)

# 3) 建立 image_id -> 元信息
id2img = {int(img["id"]): img for img in coco["images"]}

# 4) 收集每张图的所有手机标注
imgid_to_bboxes = defaultdict(list)
for ann in coco["annotations"]:
    if int(ann.get("category_id", -1)) == cellphone_id:
        imgid_to_bboxes[int(ann["image_id"])].append(ann["bbox"])  # COCO [x,y,w,h] (float)

# 5) 遍历含手机的图像：复制原图 + 画框另存 + 写坐标txt
cellphone_img_ids = list(imgid_to_bboxes.keys())
print(f"共 {len(cellphone_img_ids)} 张图含有手机标注。")

for img_id in tqdm(cellphone_img_ids):
    meta = id2img.get(img_id)
    if not meta:
        continue
    file_name = meta["file_name"]
    src = os.path.join(img_dir, file_name)
    dst_img_copy = os.path.join(out_dir, file_name)
    dst_boxed = os.path.join(out_dir_result, file_name)
    dst_txt = os.path.join(out_dir_result_txt, Path(file_name).stem + ".txt")

    # 复制原图
    if not os.path.exists(src):
        continue
    if not os.path.exists(dst_img_copy):
        Path(os.path.dirname(dst_img_copy)).mkdir(parents=True, exist_ok=True)
        shutil.copy(src, dst_img_copy)

    # 读图并绘制所有 bbox，同时收集裁边后的框用于写txt
    im = cv2.imread(src)
    if im is None:
        continue
    H, W = im.shape[:2]

    clipped_boxes_xyxy = []  # 用于写 txt：x1 y1 x2 y2（int）
    for bbox in imgid_to_bboxes[img_id]:
        x, y, w, h = bbox  # COCO: [x,y,w,h]
        # 裁剪到图像边界并转为 int
        x1 = max(0, min(int(round(x)), W - 1))
        y1 = max(0, min(int(round(y)), H - 1))
        x2 = max(0, min(int(round(x + w)), W - 1))
        y2 = max(0, min(int(round(y + h)), H - 1))
        if x2 <= x1 or y2 <= y1:
            continue

        clipped_boxes_xyxy.append((x1, y1, x2, y2))

        # 画矩形框 & 标签
        cv2.rectangle(im, (x1, y1), (x2, y2), (0, 255, 0), thickness=2)
        label = "cell phone"
        (tw, th), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
        ty1 = max(0, y1 - th - 6)
        cv2.rectangle(im, (x1, ty1), (x1 + tw + 6, ty1 + th + 4), (0, 255, 0), thickness=-1)
        cv2.putText(im, label, (x1 + 3, ty1 + th + 1),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), thickness=2, lineType=cv2.LINE_AA)

    # 保存带框图
    Path(os.path.dirname(dst_boxed)).mkdir(parents=True, exist_ok=True)
    cv2.imwrite(dst_boxed, im, [cv2.IMWRITE_JPEG_QUALITY, 95])

    # 保存坐标 txt：每行一个框：x1 y1 x2 y2
    # 若没有有效框，写空文件或跳过均可；这里选择写空文件便于对齐清单
    with open(dst_txt, "w", encoding="utf-8") as ftxt:
        for (x1, y1, x2, y2) in clipped_boxes_xyxy:
            ftxt.write(f"{x1} {y1} {x2} {y2}\n")

print(f"完成！原图复制到：{out_dir}")
print(f"完成！带框结果图输出到：{out_dir_result}")
print(f"完成！坐标TXT输出到：{out_dir_result_txt}（每行：x1 y1 x2 y2）")

如何降低误报（提升精确率）的实操建议

门限分级
- 提高 cell phone 类 的专属 conf_thr（类特异阈值）；或对小目标额外提高阈值（面积/长宽<阈值时+Δ）。
后处理约束
- 最小/最大面积过滤（相对图像面积），去除远处噪点或过大异常框。
- 纵横比过滤：手机通常纵横比在一定范围（含壳/角度会变化，可设宽松区间）。
- 类别共现/上下文 ：要求与手、人 或桌面/键盘/屏幕等场景共现；否则降权或拒绝（用多任务检测或语义分割、场景分类辅助）。
两段式判别（Cascade）
- 检测器先召回，再用一个轻量分类器 或 Patch 质量判别器二筛（比如 MobileNet/RepVGG 小模型），对候选框裁剪后复判，显著减少FP。
困难负样本挖掘（Hard Negative Mining）
- 用本脚本导出的误报图集做"负样本再训练"：将误报区域打负标签或加"非手机"对比样本，继续微调。
数据增强针对性
- 加入反手机相似物（遥控器、移动电源、充电器、书本边角、黑色矩形图案、广告牌等）的负样本；
- 光照/模糊/噪声/尺度/角度增强，降低模型将"黑色高对比矩形"错当手机的概率。
NMS与多尺度
- 合理调 iou（NMS）与 conf；对于密集小目标，试 diou-nms / soft-nms；
- 多尺度测试（或训练）以稳住不同距离下的外观差异。
模型层面
- 选择更合适的输入分辨率（手机通常小目标，略提分辨率会降FP/漏检）；
- 若部署在 OpenVINO/RKNN 等，注意量化误差：用代表性数据做校准，并检查感兴趣类上的 mAP 变化。