DETR 目标检测

DETR 目标检测

根据DETR官方源代码,写一个打框可可视化脚本(适用于NWPU-VHR-10数据集)

注意:

1、如果是自己的数据集,修改num_classes参数值为自己的数据种类类别 + 1

2、定义CLASSES和COLORS,每个类别对应一个颜色即可。

3、修改代码中的路径为自己的路径

可参考文章

https://blog.csdn.net/qq_45836365/article/details/128252220

javascript 复制代码
import glob
import math
import argparse
import numpy as np
from models.detr import DETR
from models.backbone import Backbone, build_backbone
from models.transformer import build_transformer
from PIL import Image
import cv2
import requests
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
import torchvision.models as models

torch.set_grad_enabled(False)
import os


def get_args_parser():
    parser = argparse.ArgumentParser('Set transformer detector', add_help=False)
    parser.add_argument('--lr', default=1e-4, type=float)
    parser.add_argument('--lr_backbone', default=1e-5, type=float)
    parser.add_argument('--batch_size', default=2, type=int)
    parser.add_argument('--weight_decay', default=1e-4, type=float)
    parser.add_argument('--epochs', default=300, type=int)
    parser.add_argument('--lr_drop', default=200, type=int)
    parser.add_argument('--clip_max_norm', default=0.1, type=float, help='gradient clipping max norm')
    # Model parameters
    parser.add_argument('--frozen_weights', type=str, default=None,
                        help="Path to the pretrained model. If set, only the mask head will be trained")  # * Backbone
    parser.add_argument('--backbone', default='resnet50', type=str, help="Name of the convolutional backbone to use")
    parser.add_argument('--dilation', action='store_true',
                        help="If true, we replace stride with dilation in the last convolutional block (DC5)")
    parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
                        help="Type of positional embedding to use on top of the image features")
    # * Transformer
    parser.add_argument('--enc_layers', default=6, type=int, help="Number of encoding layers in the transformer")
    parser.add_argument('--dec_layers', default=6, type=int, help="Number of decoding layers in the transformer")
    parser.add_argument('--dim_feedforward', default=2048, type=int,
                        help="Intermediate size of the feedforward layers in the transformer blocks")
    parser.add_argument('--hidden_dim', default=256, type=int,
                        help="Size of the embeddings (dimension of the transformer)")
    parser.add_argument('--dropout', default=0.1, type=float, help="Dropout applied in the transformer")
    parser.add_argument('--nheads', default=8, type=int,
                        help="Number of attention heads inside the transformer's attentions")
    parser.add_argument('--num_queries', default=100, type=int, help="Number of query slots")
    parser.add_argument('--pre_norm', action='store_true')
    # * Segmentation
    parser.add_argument('--masks', action='store_true', help="Train segmentation head if the flag is provided")
    # Loss
    parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false',
                        help="Disables auxiliary decoding losses (loss at each layer)")  # * Matcher
    parser.add_argument('--set_cost_class', default=1, type=float, help="Class coefficient in the matching cost")
    parser.add_argument('--set_cost_bbox', default=5, type=float, help="L1 box coefficient in the matching cost")
    parser.add_argument('--set_cost_giou', default=2, type=float,
                        help="giou box coefficient in the matching cost")  # * Loss coefficients
    parser.add_argument('--mask_loss_coef', default=1, type=float)
    parser.add_argument('--dice_loss_coef', default=1, type=float)
    parser.add_argument('--bbox_loss_coef', default=5, type=float)
    parser.add_argument('--giou_loss_coef', default=2, type=float)
    parser.add_argument('--eos_coef', default=0.1, type=float,
                        help="Relative classification weight of the no-object class")
    # dataset parameters
    parser.add_argument('--dataset_file', default='coco')
    parser.add_argument('--coco_path', type=str)
    parser.add_argument('--coco_panoptic_path', type=str)
    parser.add_argument('--remove_difficult', action='store_true')
    parser.add_argument('--output_dir', default='', help='path where to save, empty for no saving')
    parser.add_argument('--device', default='cuda', help='device to use for training / testing')
    parser.add_argument('--seed', default=42, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, metavar='N', help='start epoch')
    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--num_workers', default=2, type=int)
    # distributed training parameters
    parser.add_argument('--world_size', default=1, type=int, help='number of distributed processes')
    parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
    return parser


CLASSES = ['airplane', 'ship', 'storage tank', 'baseball diamond', 'tennis court', 'basketball court',
           'ground track field', 'harbor', 'bridge', 'vehicle']
COLORS = [(120, 120, 120), (180, 120, 120), (6, 230, 230), (80, 50, 50),

          (4, 200, 3), (120, 120, 80), (140, 140, 140), (204, 5, 255),

          (230, 230, 230), (4, 250, 7),
          ]
transform_input = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)


def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b


def plot_results(pil_img, prob, boxes, save_path):
    lw = max(round(sum(pil_img.shape) / 2 * 0.003), 2)
    tf = max(lw - 1, 1)
    colors = COLORS
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
        c1 = p.argmax()
        text = f'{CLASSES[c1 - 1]}:{p[c1]:0.2f}'
        cv2.rectangle(pil_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), colors[c1 - 1], thickness=lw,
                      lineType=cv2.LINE_AA)
        if text:
            tf = max(lw - 1, 1)
            w, h = cv2.getTextSize(text, 0, fontScale=lw / 3, thickness=tf)[0]
            cv2.rectangle(pil_img, (int(xmin), int(ymin)), (int(xmin) + w, int(ymin) - h - 3), colors[c1 - 1], -1,
                          cv2.LINE_AA)
            cv2.putText(pil_img, text, (int(xmin), int(ymin) - 2), 0, lw / 3, (255, 255, 255), thickness=tf,
                        lineType=cv2.LINE_AA)
    Image.fromarray(ori_img).save(save_path)


parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
args = parser.parse_args()
backbone = build_backbone(args)
transform = build_transformer(args)
model = DETR(backbone=backbone, transformer=transform, num_classes=11, num_queries=100)
model_path = '/home/admin1/pywork/xuebing_pywork/detr-main/outs/checkpoint0299.pth'  # 保存的预训练好的模型pth文件,用于验证
model_data = torch.load(model_path)['model']
model.load_state_dict(model_data)
model.eval()

paths = os.listdir('/home/admin1/pywork/xuebing_pywork/mmdetection-main/data/coco/val2017')  # 待验证的图片路径
for path in paths:
    if os.path.splitext(path)[1] == ".png":
        im = cv2.imread(path)
        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    else:
        im = Image.open('/home/admin1/pywork/xuebing_pywork/mmdetection-main/data/coco/val2017' + '/' + path)
        # mean-std normalize the input image (batch-size: 1)
        img = transform_input(im).unsqueeze(0)
    # propagate through the model
    outputs = model(img)
    # keep only predictions with 0.9+ confidence
    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.9
    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
    # 保存验证结果地址
    img_save_path = '/home/admin1/pywork/xuebing_pywork/detr-main/infer_results/' + \
                    os.path.splitext(os.path.split(path)[1])[0] + '.jpg'
    ori_img = np.array(im)
    plot_results(ori_img, probas[keep], bboxes_scaled, img_save_path)
相关推荐
周润发的弟弟23 分钟前
2025年Java在中国开发语言排名分析报告
人工智能
杭州泽沃电子科技有限公司27 分钟前
工业环境电缆火灾预防的分布式光纤在线监测
运维·人工智能·科技·安全
没有梦想的咸鱼185-1037-166328 分钟前
AI大模型支持下的:CMIP6数据分析与可视化、降尺度技术与气候变化的区域影响、极端气候分析
人工智能·python·深度学习·机器学习·chatgpt·数据挖掘·数据分析
柠檬味拥抱1 小时前
基于自适应信号处理的AI Agent多任务协同控制方法研究
人工智能
唐丙斯城1 小时前
新能源汽车热管理仿真:蒙特卡洛助力神经网络训练
人工智能·神经网络·汽车
楚禾Noah2 小时前
【设计模式实战】原型模式 + 工厂模式:AI Agent 配置中心
人工智能·设计模式·原型模式
灵智工坊LingzhiAI2 小时前
基于深度学习的中草药识别系统:从零到部署的完整实践
人工智能·深度学习
今天也要学习吖2 小时前
Azure TTS Importer:一键导入,将微软TTS语音接入你的阅读软件!
人工智能·学习·microsoft·ai·大模型·aigc·azure
星期天要睡觉3 小时前
(纯新手教学)计算机视觉(opencv)实战八——四种边缘检测详解:Sobel、Scharr、Laplacian、Canny
人工智能·opencv·计算机视觉
一念&3 小时前
今日科技热点 | 量子计算突破、AI芯片与5G加速行业变革
人工智能·科技·量子计算