DETR 目标检测

DETR 目标检测

根据DETR官方源代码,写一个打框可可视化脚本(适用于NWPU-VHR-10数据集)

注意:

1、如果是自己的数据集,修改num_classes参数值为自己的数据种类类别 + 1

2、定义CLASSES和COLORS,每个类别对应一个颜色即可。

3、修改代码中的路径为自己的路径

可参考文章

https://blog.csdn.net/qq_45836365/article/details/128252220

javascript 复制代码
import glob
import math
import argparse
import numpy as np
from models.detr import DETR
from models.backbone import Backbone, build_backbone
from models.transformer import build_transformer
from PIL import Image
import cv2
import requests
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
import torchvision.models as models

torch.set_grad_enabled(False)
import os


def get_args_parser():
    parser = argparse.ArgumentParser('Set transformer detector', add_help=False)
    parser.add_argument('--lr', default=1e-4, type=float)
    parser.add_argument('--lr_backbone', default=1e-5, type=float)
    parser.add_argument('--batch_size', default=2, type=int)
    parser.add_argument('--weight_decay', default=1e-4, type=float)
    parser.add_argument('--epochs', default=300, type=int)
    parser.add_argument('--lr_drop', default=200, type=int)
    parser.add_argument('--clip_max_norm', default=0.1, type=float, help='gradient clipping max norm')
    # Model parameters
    parser.add_argument('--frozen_weights', type=str, default=None,
                        help="Path to the pretrained model. If set, only the mask head will be trained")  # * Backbone
    parser.add_argument('--backbone', default='resnet50', type=str, help="Name of the convolutional backbone to use")
    parser.add_argument('--dilation', action='store_true',
                        help="If true, we replace stride with dilation in the last convolutional block (DC5)")
    parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
                        help="Type of positional embedding to use on top of the image features")
    # * Transformer
    parser.add_argument('--enc_layers', default=6, type=int, help="Number of encoding layers in the transformer")
    parser.add_argument('--dec_layers', default=6, type=int, help="Number of decoding layers in the transformer")
    parser.add_argument('--dim_feedforward', default=2048, type=int,
                        help="Intermediate size of the feedforward layers in the transformer blocks")
    parser.add_argument('--hidden_dim', default=256, type=int,
                        help="Size of the embeddings (dimension of the transformer)")
    parser.add_argument('--dropout', default=0.1, type=float, help="Dropout applied in the transformer")
    parser.add_argument('--nheads', default=8, type=int,
                        help="Number of attention heads inside the transformer's attentions")
    parser.add_argument('--num_queries', default=100, type=int, help="Number of query slots")
    parser.add_argument('--pre_norm', action='store_true')
    # * Segmentation
    parser.add_argument('--masks', action='store_true', help="Train segmentation head if the flag is provided")
    # Loss
    parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false',
                        help="Disables auxiliary decoding losses (loss at each layer)")  # * Matcher
    parser.add_argument('--set_cost_class', default=1, type=float, help="Class coefficient in the matching cost")
    parser.add_argument('--set_cost_bbox', default=5, type=float, help="L1 box coefficient in the matching cost")
    parser.add_argument('--set_cost_giou', default=2, type=float,
                        help="giou box coefficient in the matching cost")  # * Loss coefficients
    parser.add_argument('--mask_loss_coef', default=1, type=float)
    parser.add_argument('--dice_loss_coef', default=1, type=float)
    parser.add_argument('--bbox_loss_coef', default=5, type=float)
    parser.add_argument('--giou_loss_coef', default=2, type=float)
    parser.add_argument('--eos_coef', default=0.1, type=float,
                        help="Relative classification weight of the no-object class")
    # dataset parameters
    parser.add_argument('--dataset_file', default='coco')
    parser.add_argument('--coco_path', type=str)
    parser.add_argument('--coco_panoptic_path', type=str)
    parser.add_argument('--remove_difficult', action='store_true')
    parser.add_argument('--output_dir', default='', help='path where to save, empty for no saving')
    parser.add_argument('--device', default='cuda', help='device to use for training / testing')
    parser.add_argument('--seed', default=42, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, metavar='N', help='start epoch')
    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--num_workers', default=2, type=int)
    # distributed training parameters
    parser.add_argument('--world_size', default=1, type=int, help='number of distributed processes')
    parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
    return parser


CLASSES = ['airplane', 'ship', 'storage tank', 'baseball diamond', 'tennis court', 'basketball court',
           'ground track field', 'harbor', 'bridge', 'vehicle']
COLORS = [(120, 120, 120), (180, 120, 120), (6, 230, 230), (80, 50, 50),

          (4, 200, 3), (120, 120, 80), (140, 140, 140), (204, 5, 255),

          (230, 230, 230), (4, 250, 7),
          ]
transform_input = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)


def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b


def plot_results(pil_img, prob, boxes, save_path):
    lw = max(round(sum(pil_img.shape) / 2 * 0.003), 2)
    tf = max(lw - 1, 1)
    colors = COLORS
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
        c1 = p.argmax()
        text = f'{CLASSES[c1 - 1]}:{p[c1]:0.2f}'
        cv2.rectangle(pil_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), colors[c1 - 1], thickness=lw,
                      lineType=cv2.LINE_AA)
        if text:
            tf = max(lw - 1, 1)
            w, h = cv2.getTextSize(text, 0, fontScale=lw / 3, thickness=tf)[0]
            cv2.rectangle(pil_img, (int(xmin), int(ymin)), (int(xmin) + w, int(ymin) - h - 3), colors[c1 - 1], -1,
                          cv2.LINE_AA)
            cv2.putText(pil_img, text, (int(xmin), int(ymin) - 2), 0, lw / 3, (255, 255, 255), thickness=tf,
                        lineType=cv2.LINE_AA)
    Image.fromarray(ori_img).save(save_path)


parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
args = parser.parse_args()
backbone = build_backbone(args)
transform = build_transformer(args)
model = DETR(backbone=backbone, transformer=transform, num_classes=11, num_queries=100)
model_path = '/home/admin1/pywork/xuebing_pywork/detr-main/outs/checkpoint0299.pth'  # 保存的预训练好的模型pth文件,用于验证
model_data = torch.load(model_path)['model']
model.load_state_dict(model_data)
model.eval()

paths = os.listdir('/home/admin1/pywork/xuebing_pywork/mmdetection-main/data/coco/val2017')  # 待验证的图片路径
for path in paths:
    if os.path.splitext(path)[1] == ".png":
        im = cv2.imread(path)
        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    else:
        im = Image.open('/home/admin1/pywork/xuebing_pywork/mmdetection-main/data/coco/val2017' + '/' + path)
        # mean-std normalize the input image (batch-size: 1)
        img = transform_input(im).unsqueeze(0)
    # propagate through the model
    outputs = model(img)
    # keep only predictions with 0.9+ confidence
    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.9
    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
    # 保存验证结果地址
    img_save_path = '/home/admin1/pywork/xuebing_pywork/detr-main/infer_results/' + \
                    os.path.splitext(os.path.split(path)[1])[0] + '.jpg'
    ori_img = np.array(im)
    plot_results(ori_img, probas[keep], bboxes_scaled, img_save_path)
相关推荐
HackTorjan2 小时前
2026年5月29日:全球首个通用人工智能操作系统正式发布,开启人机协同新纪元
人工智能
刘大猫.3 小时前
智造短剧新引擎:火山引擎上线「火山剧创 1.0」,制作效率提升 80%
人工智能·ai·chatgpt·机器人·大模型·火山引擎·短剧新引擎
红尘散仙3 小时前
我把终端小说阅读器接上了 AI Agent:TRNovel 现在能用 skill 生成书源了
人工智能·后端·rust
雅菲奥朗3 小时前
企业级 AI 自动化|OpenClaw 龙虾实战与认证
运维·人工智能·自动化·openclaw
HIT_Weston3 小时前
99、【Agent】【OpenCode】task 工具提示词(Slash command)(一)
人工智能·agent·opencode
25 Hz3 小时前
Mind 爱好者时空表征刊 第24期 | 时间结构学习、空间对时间表征的补偿、事件内部的时间扭曲……
人工智能
心中有国也有家3 小时前
GE图引擎深度解析——CANN的计算图优化与执行引擎
人工智能·pytorch·python·学习·numpy
海兰4 小时前
【文字三国志:第一篇】天命重构,大语言模型(LLM)动态生成文言风格的叙事文本的文字游戏
人工智能·游戏·语言模型
cxr8284 小时前
高分子复合材料 AI 逆向设计合——验证闭环、决策优化与中试放大
人工智能·材料逆向设计合成
litble4 小时前
如何速成LLM以伪装成一个AI研究者(6)——LoRA,Adapter,P-tuning,量化,QLoRA
人工智能·lora·量化·peft·qlora·高效微调