DETR 目标检测

DETR 目标检测

根据DETR官方源代码,写一个打框可可视化脚本(适用于NWPU-VHR-10数据集)

注意:

1、如果是自己的数据集,修改num_classes参数值为自己的数据种类类别 + 1

2、定义CLASSES和COLORS,每个类别对应一个颜色即可。

3、修改代码中的路径为自己的路径

可参考文章

https://blog.csdn.net/qq_45836365/article/details/128252220

javascript 复制代码
import glob
import math
import argparse
import numpy as np
from models.detr import DETR
from models.backbone import Backbone, build_backbone
from models.transformer import build_transformer
from PIL import Image
import cv2
import requests
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
import torchvision.models as models

torch.set_grad_enabled(False)
import os


def get_args_parser():
    parser = argparse.ArgumentParser('Set transformer detector', add_help=False)
    parser.add_argument('--lr', default=1e-4, type=float)
    parser.add_argument('--lr_backbone', default=1e-5, type=float)
    parser.add_argument('--batch_size', default=2, type=int)
    parser.add_argument('--weight_decay', default=1e-4, type=float)
    parser.add_argument('--epochs', default=300, type=int)
    parser.add_argument('--lr_drop', default=200, type=int)
    parser.add_argument('--clip_max_norm', default=0.1, type=float, help='gradient clipping max norm')
    # Model parameters
    parser.add_argument('--frozen_weights', type=str, default=None,
                        help="Path to the pretrained model. If set, only the mask head will be trained")  # * Backbone
    parser.add_argument('--backbone', default='resnet50', type=str, help="Name of the convolutional backbone to use")
    parser.add_argument('--dilation', action='store_true',
                        help="If true, we replace stride with dilation in the last convolutional block (DC5)")
    parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
                        help="Type of positional embedding to use on top of the image features")
    # * Transformer
    parser.add_argument('--enc_layers', default=6, type=int, help="Number of encoding layers in the transformer")
    parser.add_argument('--dec_layers', default=6, type=int, help="Number of decoding layers in the transformer")
    parser.add_argument('--dim_feedforward', default=2048, type=int,
                        help="Intermediate size of the feedforward layers in the transformer blocks")
    parser.add_argument('--hidden_dim', default=256, type=int,
                        help="Size of the embeddings (dimension of the transformer)")
    parser.add_argument('--dropout', default=0.1, type=float, help="Dropout applied in the transformer")
    parser.add_argument('--nheads', default=8, type=int,
                        help="Number of attention heads inside the transformer's attentions")
    parser.add_argument('--num_queries', default=100, type=int, help="Number of query slots")
    parser.add_argument('--pre_norm', action='store_true')
    # * Segmentation
    parser.add_argument('--masks', action='store_true', help="Train segmentation head if the flag is provided")
    # Loss
    parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false',
                        help="Disables auxiliary decoding losses (loss at each layer)")  # * Matcher
    parser.add_argument('--set_cost_class', default=1, type=float, help="Class coefficient in the matching cost")
    parser.add_argument('--set_cost_bbox', default=5, type=float, help="L1 box coefficient in the matching cost")
    parser.add_argument('--set_cost_giou', default=2, type=float,
                        help="giou box coefficient in the matching cost")  # * Loss coefficients
    parser.add_argument('--mask_loss_coef', default=1, type=float)
    parser.add_argument('--dice_loss_coef', default=1, type=float)
    parser.add_argument('--bbox_loss_coef', default=5, type=float)
    parser.add_argument('--giou_loss_coef', default=2, type=float)
    parser.add_argument('--eos_coef', default=0.1, type=float,
                        help="Relative classification weight of the no-object class")
    # dataset parameters
    parser.add_argument('--dataset_file', default='coco')
    parser.add_argument('--coco_path', type=str)
    parser.add_argument('--coco_panoptic_path', type=str)
    parser.add_argument('--remove_difficult', action='store_true')
    parser.add_argument('--output_dir', default='', help='path where to save, empty for no saving')
    parser.add_argument('--device', default='cuda', help='device to use for training / testing')
    parser.add_argument('--seed', default=42, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, metavar='N', help='start epoch')
    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--num_workers', default=2, type=int)
    # distributed training parameters
    parser.add_argument('--world_size', default=1, type=int, help='number of distributed processes')
    parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
    return parser


CLASSES = ['airplane', 'ship', 'storage tank', 'baseball diamond', 'tennis court', 'basketball court',
           'ground track field', 'harbor', 'bridge', 'vehicle']
COLORS = [(120, 120, 120), (180, 120, 120), (6, 230, 230), (80, 50, 50),

          (4, 200, 3), (120, 120, 80), (140, 140, 140), (204, 5, 255),

          (230, 230, 230), (4, 250, 7),
          ]
transform_input = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)


def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b


def plot_results(pil_img, prob, boxes, save_path):
    lw = max(round(sum(pil_img.shape) / 2 * 0.003), 2)
    tf = max(lw - 1, 1)
    colors = COLORS
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
        c1 = p.argmax()
        text = f'{CLASSES[c1 - 1]}:{p[c1]:0.2f}'
        cv2.rectangle(pil_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), colors[c1 - 1], thickness=lw,
                      lineType=cv2.LINE_AA)
        if text:
            tf = max(lw - 1, 1)
            w, h = cv2.getTextSize(text, 0, fontScale=lw / 3, thickness=tf)[0]
            cv2.rectangle(pil_img, (int(xmin), int(ymin)), (int(xmin) + w, int(ymin) - h - 3), colors[c1 - 1], -1,
                          cv2.LINE_AA)
            cv2.putText(pil_img, text, (int(xmin), int(ymin) - 2), 0, lw / 3, (255, 255, 255), thickness=tf,
                        lineType=cv2.LINE_AA)
    Image.fromarray(ori_img).save(save_path)


parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
args = parser.parse_args()
backbone = build_backbone(args)
transform = build_transformer(args)
model = DETR(backbone=backbone, transformer=transform, num_classes=11, num_queries=100)
model_path = '/home/admin1/pywork/xuebing_pywork/detr-main/outs/checkpoint0299.pth'  # 保存的预训练好的模型pth文件,用于验证
model_data = torch.load(model_path)['model']
model.load_state_dict(model_data)
model.eval()

paths = os.listdir('/home/admin1/pywork/xuebing_pywork/mmdetection-main/data/coco/val2017')  # 待验证的图片路径
for path in paths:
    if os.path.splitext(path)[1] == ".png":
        im = cv2.imread(path)
        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    else:
        im = Image.open('/home/admin1/pywork/xuebing_pywork/mmdetection-main/data/coco/val2017' + '/' + path)
        # mean-std normalize the input image (batch-size: 1)
        img = transform_input(im).unsqueeze(0)
    # propagate through the model
    outputs = model(img)
    # keep only predictions with 0.9+ confidence
    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.9
    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
    # 保存验证结果地址
    img_save_path = '/home/admin1/pywork/xuebing_pywork/detr-main/infer_results/' + \
                    os.path.splitext(os.path.split(path)[1])[0] + '.jpg'
    ori_img = np.array(im)
    plot_results(ori_img, probas[keep], bboxes_scaled, img_save_path)
相关推荐
星期天要睡觉19 分钟前
计算机视觉(opencv)实战二十一——基于 SIFT 和 FLANN 的指纹图像匹配与认证
人工智能·opencv·计算机视觉
victory043123 分钟前
wav2vec微调进行疾病语音分类任务
人工智能·分类·数据挖掘
semantist@语校30 分钟前
第二十篇|SAMU教育学院的教育数据剖析:制度阈值、能力矩阵与升学网络
大数据·数据库·人工智能·百度·语言模型·矩阵·prompt
IT_陈寒1 小时前
React 性能优化必杀技:这5个Hook组合让你的应用提速50%!
前端·人工智能·后端
剪一朵云爱着1 小时前
一文入门:机器学习
人工智能·机器学习
hi0_61 小时前
机器学习实战(一): 什么是机器学习
人工智能·机器学习·机器人·机器学习实战
ChinaRainbowSea1 小时前
9. LangChain4j + 整合 Spring Boot
java·人工智能·spring boot·后端·spring·langchain·ai编程
有Li2 小时前
基于联邦学习与神经架构搜索的可泛化重建:用于加速磁共振成像|文献速递-最新医学人工智能文献
论文阅读·人工智能·文献·医学生
桃花键神2 小时前
从传统到智能:3D 建模流程的演进与 AI 趋势 —— 以 Blender 为例
人工智能·3d·blender
星期天要睡觉2 小时前
计算机视觉(opencv)实战十七——图像直方图均衡化
人工智能·opencv·计算机视觉