【无标题】 - 技术栈

Mmdetection框架使用

太久没用有点忘了，记录一下

环境篇

环境篇没啥好说的主要是有几个包要注意一下

在原先的环境上多了这几个环境包

powershell 复制代码

conda create -n mmdet python=3.9 anaconda
pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install -U openmim
mim install mmengine
mim install "mmcv>=2.0.0"
https://github.com/open-mmlab/mmdetection.git
cd mmdetection
pip install -v -e .
pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1 -i https://pypi.mirrors.ustc.edu.cn/simple
pip install numpy==1.26.4 -i https://pypi.mirrors.ustc.edu.cn/simple
pip install opencv-python==4.8.1.78 opencv-python-headless==4.8.1.78 -i https://pypi.tuna.tsinghua.edu.cn/simple

要注意的是有几个包会冲突实测下来这几个能完美兼容（numpy和opencv-python）

脚本转换

yolo2coco

python 复制代码

# --coding:utf-8--
# 声明文件编码为utf-8

# --coding:utf-8--
# 声明文件编码为utf-8

import os  # 导入os模块，用于操作文件和目录
import cv2  # 导入OpenCV库，用于图像处理
import json  # 导入json模块，用于处理JSON格式数据
from tqdm import tqdm  # 导入tqdm模块，用于显示进度条
import argparse  # 导入argparse模块，用于解析命令行参数

classes = ['ship']  # 定义类别列表，当前只有一个类别：'ship'

# 创建ArgumentParser对象，用于解析命令行参数
parser = argparse.ArgumentParser()
parser.add_argument('--image_path', default=r'/root/dataset/images/train', type=str, help="path of images")  # 图片路径
parser.add_argument('--label_path', default=r'/root/dataset/labels/train', type=str, help="path of labels .txt")  # 标签路径
parser.add_argument('--save_path', type=str, default='/root/dataset/coco-labels/train/train.json', help="if not split the dataset, give a path to a json file")  # 保存路径
arg = parser.parse_args()  # 解析命令行参数

def yolo2coco(arg):
    print("Loading data from ", arg.image_path, arg.label_path)  # 打印加载数据的路径信息

    # 确保图片路径和标签路径存在
    assert os.path.exists(arg.image_path), f"Image path {arg.image_path} does not exist"
    assert os.path.exists(arg.label_path), f"Label path {arg.label_path} does not exist"

    originImagesDir = arg.image_path  # 图片路径
    originLabelsDir = arg.label_path  # 标签路径

    # 获取图片目录下的所有文件名
    indexes = os.listdir(originImagesDir)

    # 初始化COCO格式的数据结构
    dataset = {'categories': [], 'annotations': [], 'images': []}

    # 添加类别信息到COCO格式中
    for i, cls in enumerate(classes, 0):
        dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'})

    ann_id_cnt = 0  # 初始化标注ID计数器

    # 遍历图片目录中的每个文件
    for k, index in enumerate(tqdm(indexes)):
        txtFile = f'{index[:index.rfind(".")]}.txt'  # 获取对应的标签文件名
        stem = index[:index.rfind(".")]  # 获取文件名（不带扩展名）

        try:
            im = cv2.imread(os.path.join(originImagesDir, index))  # 读取图片
            height, width, _ = im.shape  # 获取图片的高度、宽度
        except Exception as e:
            print(f'{os.path.join(originImagesDir, index)} read error.\nerror:{e}')  # 如果读取失败，打印错误信息
            continue  # 跳过该图片

        # 如果没有对应的标签文件，则跳过
        if not os.path.exists(os.path.join(originLabelsDir, txtFile)):
            continue

        # 添加图片信息到COCO格式中
        dataset['images'].append({
            'file_name': index,
            'id': stem,
            'width': width,
            'height': height
        })

        # 读取标签文件内容
        with open(os.path.join(originLabelsDir, txtFile), 'r') as fr:
            labelList = fr.readlines()  # 按行读取标签内容

        # 遍历标签文件中的每一行
        for label in labelList:
            label = label.strip().split()  # 去除空格并分割字段
            
            # 跳过空行（负样本图片的标签文件可能包含空行）
            if len(label) == 0:
                continue
            
            # 验证标签格式是否正确（YOLO格式应该有5个值：class x y w h）
            if len(label) != 5:
                print(f"Warning: Invalid label format in {txtFile}: {label}")
                continue
            
            try:
                x = float(label[1])  # 归一化中心点x坐标
                y = float(label[2])  # 归一化中心点y坐标
                w = float(label[3])  # 归一化宽度
                h = float(label[4])  # 归一化高度
            except ValueError as e:
                print(f"Warning: Invalid label values in {txtFile}: {label}, error: {e}")
                continue

            # 将YOLO格式的(x, y, w, h)转换为COCO格式的(x1, y1, x2, y2)
            H, W, _ = im.shape
            x1 = (x - w / 2) * W
            y1 = (y - h / 2) * H
            x2 = (x + w / 2) * W
            y2 = (y + h / 2) * H

            # 计算实际的宽度和高度
            width = max(0, x2 - x1)
            height = max(0, y2 - y1)

            # 添加标注信息到COCO格式中
            dataset['annotations'].append({
                'area': width * height,  # 目标区域面积
                'bbox': [x1, y1, width, height],  # 边界框坐标
                'category_id': int(label[0]),  # 类别ID
                'id': ann_id_cnt,  # 标注ID
                'image_id': stem,  # 图片ID
                'iscrowd': 0,  # 是否为人群目标
                'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]  # 分割信息（矩形顶点）
            })
            ann_id_cnt += 1  # 更新标注ID计数器

    # 将COCO格式的数据保存为JSON文件
    folder = os.path.dirname(arg.save_path)
    if folder and not os.path.exists(folder):
        os.makedirs(folder)
    
    with open(arg.save_path, 'w') as f:
        json.dump(dataset, f)
        print('Save annotation to {}'.format(arg.save_path))

if __name__ == "__main__":
    yolo2coco(arg)  # 主程序入口，调用yolo2coco函数

改路径就行

训练

powershell 复制代码

 python tools/train.py <your-config-file>

验证

powershell 复制代码

 python tools/test.py <your-config-file> <your-model-weights-file> --out <save-pickle-path>

mmdet2yolo

python 复制代码

import os
import torch
import cv2
import math
import tqdm
import argparse
import json
import pickle
import numpy as np
from prettytable import PrettyTable

def clip_boxes(boxes, shape):
    # Clip boxes (xyxy) to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[..., 0].clamp_(0, shape[1])  # x1
        boxes[..., 1].clamp_(0, shape[0])  # y1
        boxes[..., 2].clamp_(0, shape[1])  # x2
        boxes[..., 3].clamp_(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2

def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    # Rescale boxes (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    boxes[..., [0, 2]] -= pad[0]  # x padding
    boxes[..., [1, 3]] -= pad[1]  # y padding
    boxes[..., :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes

def box_iou(box1, box2, eps=1e-7):
    """
    Calculate intersection-over-union (IoU) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    (a1, a2), (b1, b2) = box1.float().unsqueeze(1).chunk(2, 2), box2.float().unsqueeze(0).chunk(2, 2)
    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp_(0).prod(2)
    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)

def process_batch(detections, labels, iouv):
    """
    Return correct prediction matrix
    Arguments:
        detections (array[N, 6]), x1, y1, x2, y2, conf, class
        labels (array[M, 5]), class, x1, y1, x2, y2
    Returns:
        correct (array[N, 10]), for 10 IoU levels
    """
    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
    iou = box_iou(labels[:, 1:], detections[:, :4])
    correct_class = labels[:, 0:1] == detections[:, 5]
    for i in range(len(iouv)):
        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
        if x[0].shape[0]:
            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
            if x[0].shape[0] > 1:
                matches = matches[matches[:, 2].argsort()[::-1]]
                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
            correct[matches[:, 1].astype(int), i] = True
    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)

def smooth(y, f=0.05):
    # Box filter of fraction f
    nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
    p = np.ones(nf // 2)  # ones padding
    yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
    return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed

def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=''):
    """ Compute the average precision, given the recall and precision curves. """
    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # Find unique classes
    unique_classes, nt = np.unique(target_cls, return_counts=True)
    nc = unique_classes.shape[0]  # number of classes, number of detections

    # Create Precision-Recall curve and compute AP for each class
    px, py = np.linspace(0, 1, 1000), []  # for plotting
    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
    for ci, c in enumerate(unique_classes):
        i = pred_cls == c
        n_l = nt[ci]  # number of labels
        n_p = i.sum()  # number of predictions
        if n_p == 0 or n_l == 0:
            continue

        # Accumulate FPs and TPs
        fpc = (1 - tp[i]).cumsum(0)
        tpc = tp[i].cumsum(0)

        # Recall
        recall = tpc / (n_l + eps)  # recall curve
        r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases

        # Precision
        precision = tpc / (tpc + fpc)  # precision curve
        p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score

        # AP from recall-precision curve
        for j in range(tp.shape[1]):
            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
            if plot and j == 0:
                py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5

    # Compute F1 (harmonic mean of precision and recall)
    f1 = 2 * p * r / (p + r + eps)

    i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
    p, r, f1 = p[:, i], r[:, i], f1[:, i]
    tp = (r * nt).round()  # true positives
    fp = (tp / (p + eps) - tp).round()  # false positives
    return tp, fp, p, r, f1, ap, unique_classes.astype(int)

def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves """
    # Append sentinel values to beginning and end
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([1.0], precision, [0.0]))

    # Compute the precision envelope
    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))

    # Integrate area under curve
    method = 'interp'  # methods: 'continuous', 'interp'
    if method == 'interp':
        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
    else:  # 'continuous'
        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve

    return ap, mpre, mrec

def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--label_coco', type=str, default='/root/dataset/coco-labels/test/test.json', help='label coco path')
    parser.add_argument('--pred_coco', type=str, default='/root/program/mmdetection-main/tood.pkl', help='pred coco path')
    parser.add_argument('--iou', type=float, default=0.7, help='iou threshold')
    parser.add_argument('--conf', type=float, default=0.001, help='conf threshold')
    parser.add_argument('--img_size', type=int, default=640, help='default image size for unlabeled images')  # 新增参数
    opt = parser.parse_known_args()[0]
    return opt

if __name__ == '__main__':
    opt = parse_opt()

    iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
    niou = iouv.numel()
    stats = []

    # 加载标注数据
    label_coco_json_path, pred_coco_json_path = opt.label_coco, opt.pred_coco
    with open(label_coco_json_path) as f:
        label = json.load(f)

    # 获取类别名称
    classes = []
    for data in label['categories']:
        classes.append(data['name'])

    # ========== 修正1：构建映射关系 + 收集标注图片ID ==========
    image_id_hw_dict = {}          # 标注image_id -> (height, width)
    image_name_to_id_dict = {}     # 图片文件名（无后缀）-> 标注image_id
    annotated_image_ids = set()    # 所有有标注的image_id集合
    for data in label['images']:
        image_id = data['id']
        image_id_hw_dict[image_id] = [data['height'], data['width']]
        # 提取图片文件名（无后缀）
        image_file_name = os.path.splitext(os.path.basename(data['file_name']))[0]
        image_name_to_id_dict[image_file_name] = image_id
        annotated_image_ids.add(image_id)

    # ========== 修正2：初始化label_id_dict，包含所有标注image_id（负样本为空列表） ==========
    label_id_dict = {img_id: [] for img_id in image_id_hw_dict.keys()}
    for data in tqdm.tqdm(label['annotations'], desc='Process label...'):
        img_id = data['image_id']
        category_id = data['category_id']
        x_min, y_min, w, h = data['bbox'][0], data['bbox'][1], data['bbox'][2], data['bbox'][3]
        x_max, y_max = x_min + w, y_min + h
        label_id_dict[img_id].append(np.array([int(category_id), x_min, y_min, x_max, y_max]))

    # ========== 修正3：处理预测数据（改进版） ==========
    pred_id_dict = {}
    if pred_coco_json_path.endswith('json'):
        with open(pred_coco_json_path) as f:
            pred = json.load(f)
        for data in tqdm.tqdm(pred, desc='Process pred...'):
            img_id = data['image_id']
            if img_id not in pred_id_dict:
                pred_id_dict[img_id] = []
            score = data['score']
            category_id = data['category_id']
            x_min, y_min, w, h = data['bbox'][0], data['bbox'][1], data['bbox'][2], data['bbox'][3]
            x_max, y_max = x_min + w, y_min + h
            pred_id_dict[img_id].append(np.array([x_min, y_min, x_max, y_max, float(score), int(category_id)]))
    else:
        with open(pred_coco_json_path, 'rb') as f:
            pred = pickle.load(f)
        for data in tqdm.tqdm(pred, desc='Process pred...'):
            # 提取预测图片的文件名（无后缀）
            image_file_name = os.path.splitext(os.path.basename(data['img_path']))[0]
            # 对无标注的图片，生成临时唯一ID
            if image_file_name in image_name_to_id_dict:
                img_id = image_name_to_id_dict[image_file_name]
            else:
                img_id = f"unlabeled_{image_file_name}"
                # 新增：为无标注图片添加默认尺寸信息
                if img_id not in image_id_hw_dict:
                    image_id_hw_dict[img_id] = [opt.img_size, opt.img_size]
                # 新增：为无标注图片初始化空的label列表
                if img_id not in label_id_dict:
                    label_id_dict[img_id] = []
            
            if img_id not in pred_id_dict:
                pred_id_dict[img_id] = []

            # 遍历预测实例
            for i in range(data['pred_instances']['labels'].size(0)):
                score = data['pred_instances']['scores'][i]
                category_id = data['pred_instances']['labels'][i]
                bboxes = data['pred_instances']['bboxes'][i]
                x_min, y_min, x_max, y_max = bboxes.cpu().detach().numpy()
                pred_id_dict[img_id].append(np.array([x_min, y_min, x_max, y_max, float(score), int(category_id)]))

    # ========== 修正4：遍历所有图片ID（改进版） ==========
    all_image_ids = set(image_id_hw_dict.keys()).union(set(pred_id_dict.keys()))
    
    # 统计信息
    total_images = len(all_image_ids)
    negative_samples = 0
    positive_samples = 0
    
    for image_id in tqdm.tqdm(all_image_ids, desc="Cal mAP..."):
        # 1. 获取标注数据
        label_data = np.array(label_id_dict.get(image_id, []))
        nl = label_data.shape[0]
        
        # 统计负样本和正样本数量
        if nl == 0:
            negative_samples += 1
        else:
            positive_samples += 1

        # 2. 获取预测数据
        if image_id in pred_id_dict:
            pred_data = torch.from_numpy(np.array(pred_id_dict[image_id]))
        else:
            pred_data = torch.empty((0, 6))
        npr = pred_data.shape[0]

        correct = torch.zeros(npr, niou, dtype=torch.bool)
        
        # 3. 处理不同情况
        if npr == 0:
            # 无预测的情况
            if nl:  
                # 有标注但无预测，记录为漏检（FN）
                stats.append((correct, *torch.zeros((2, 0)), torch.from_numpy(label_data[:, 0])))
            # 负样本无预测的情况（TN），跳过（不影响mAP计算）
            continue

        # 4. 核心逻辑：根据是否有标注来处理
        if nl == 0:
            # 负样本（无标注）：所有预测都是FP（False Positive）
            correct = torch.zeros(npr, niou, dtype=torch.bool)
        else:
            # 正样本（有标注）：通过IoU匹配来判断TP/FP
            correct = process_batch(pred_data, torch.from_numpy(label_data), iouv)
        
        # 5. 处理target_cls
        target_cls = torch.from_numpy(label_data[:, 0]) if nl > 0 else torch.tensor([], dtype=torch.int64)
        stats.append((correct, pred_data[:, 4], pred_data[:, 5], target_cls))

    # 打印统计信息
    print(f"\n========== 数据集统计 ==========")
    print(f"总图片数: {total_images}")
    print(f"正样本数（有标注）: {positive_samples}")
    print(f"负样本数（无标注）: {negative_samples}")
    print(f"负样本比例: {negative_samples/total_images*100:.2f}%")
    print(f"================================\n")

    # 计算mAP
    if len(stats) == 0:
        print("警告：无有效数据用于计算mAP")
    else:
        # 合并统计数据
        stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]
        if len(stats) >= 4 and stats[0].size > 0:
            tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats)
            print(f'precision: {p}')
            print(f'recall: {r}')
            print(f'mAP@0.5: {ap[:, 0]}')

            # 生成结果表格
            table = PrettyTable()
            table.title = "Metrics"
            table.field_names = ["Classes", 'Precision', 'Recall', 'mAP50', 'mAP50-95']
            table.add_row(['all', f'{np.mean(p):.3f}', f'{np.mean(r):.3f}', f'{np.mean(ap[:, 0]):.3f}', f'{np.mean(ap):.3f}'])
            for cls_idx, cls_name in enumerate(classes):
                if cls_idx < len(p):
                    table.add_row([cls_name, f'{p[cls_idx]:.3f}', f'{r[cls_idx]:.3f}', f'{ap[cls_idx, 0]:.3f}', f'{ap[cls_idx, :].mean():.3f}'])
                else:
                    table.add_row([cls_name, '0.000', '0.000', '0.000', '0.000'])
            print(table)
        else:
            print("警告：统计数据不足，无法计算mAP")