@tda4 我正在开发一个用于自动驾驶的可行驶区域分割的语义分割模型,输入的是车辆在自动驾驶中使用改的俯视图进行训练,我接到一个任务,需要计算模型在,3、4、5米内fsd的iou。3米是指,现实世界中的3米,对应的输入图片上,模型训练用图片原始尺寸是1088x960,其中1个像素对应现实世界中的1厘米,模型输入和输出尺寸是544x480,模型的验证代码和fsd测试集都是现成的,我可以轻松计算出模型在fsd测试集上的指标,我用的是@tda4/test_custom_tda2.py ,可是我怎么计算3、4、5米内的呢,3、4、5米是在从图片中心开始算,在现实世界中画一个3、4、5米的正方形。要求修改的代码尽可能少,用中文回答。
import argparse
import json
import os
from pathlib import Path
from threading import Thread
import numpy as np
import torch
import yaml
from tqdm import tqdm
import tda2imgprocess
from models.experimental import *
from utils.datasets import create_dataloader
from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, check_requirements, \
box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path, colorstr
from utils.metrics import ap_per_class, ConfusionMatrix, batch_pix_accuracy,batch_pix_accuracy_class, batch_intersection_union,batch_pix_accuracy_class_tda2 # 后两个新增分割
from utils.plots import plot_images, output_to_target, plot_study_txt
from utils.torch_utils import select_device, time_synchronized
import torch.nn.functional as F
import cv2
from models.yolo import Model
from utils.loss import ComputeLoss, SegmentationLosses, SegFocalLoss, OhemCELoss, ProbOhemCrossEntropy2d,PoseLoss
import SegmentationDataset
"""
test_custom.py与test.py的区别仅在加载器上从Cityscapes改成了Custom
新版训练测试(loader的mode为"testval")可以把验证集长边resize到base-size输入到网络, 但mask仍然是原图尺寸, 以下代码自动把网络输出双线性插值到原图算指标
调用示例:
python test.py --data cityscapes_det.yaml --segdata ./data/citys --weights ./best.pt --img-size 640 --base-size 640
即相比原版yolov5多 --segdata 和 --base-size两个参数
"""
Cityscapes_COLORMAP = [
[128, 64, 128],
0,0,0\], \[244, 35, 232\], \[0, 0, 192\], \[70, 70, 70\], \[102, 102, 0\], \[190, 153, 153\], \[153, 153, 153\], \[250, 170, 30\], \[220, 220, 0\], \[107, 142, 35\], \[152, 251, 152\], \[0, 130, 180\], \[220, 20, 60\], \[255, 0, 0\], \[0, 0, 70\], \[0, 60, 100\], \[0, 80, 100\], \[0, 0, 230\], \[119, 11, 32\],
def label2image(pred, COLORMAP=Cityscapes_COLORMAP):
colormap = np.array(COLORMAP, dtype='uint8')
X = pred.astype('int32')
return colormap[X, :]
计算以图像中心为原点、实距为边长的正方形区域 IoU(默认用于 FSD)
def compute_center_square_iou(pred_logits, target, meters=(3, 4, 5), orig_hw=(1088, 960), fsd_class=1):
"""
pred_logits: [B, C, H, W],网络输出(尚未 argmax)
target: [B, H, W],分割标签
meters: 需要统计的米数列表
orig_hw: 原始图高宽(H, W),对应 1 像素 = 1 cm
fsd_class: FSD 正类类别 id(默认 1)
"""
h, w = target.shape[1], target.shape[2]
将实距(cm)转换为当前尺度的像素长度,假设输入从 orig_hw 等比缩放到当前 h, w
ratio_h = h / float(orig_hw[0]) # 原图->当前输入尺度的缩放比(高)
ratio_w = w / float(orig_hw[1]) # 原图->当前输入尺度的缩放比(宽)
pred_cls = pred_logits.argmax(1) # 取预测类别
若有 255 忽略标签,则不计入 IoU
valid_mask = torch.ones_like(target, dtype=torch.bool)
if (target == 255).any():
valid_mask = target != 255
inters, unions = {}, {}
center_h, center_w = h // 2, w // 2
for m in meters:
side_h = int(m * 100 * ratio_h) # m 米对应的像素边长(高方向,米->厘米->像素)
side_w = int(m * 100 * ratio_w) # m 米对应的像素边长(宽方向)
half_h = side_h // 2
half_w = side_w // 2
top = max(0, center_h - half_h)
bottom = min(h, center_h + half_h)
left = max(0, center_w - half_w)
right = min(w, center_w + half_w)
roi_mask = torch.zeros_like(target, dtype=torch.bool)
roi_mask[:, top:bottom, left:right] = True
roi_mask = roi_mask & valid_mask
pred_fsd = pred_cls == fsd_class # 预测为 FSD 区域
gt_fsd = target == fsd_class # 标注为 FSD 区域
inter = ((pred_fsd & gt_fsd) & roi_mask).sum().item()
union = (((pred_fsd | gt_fsd) & roi_mask)).sum().item()
inters[m] = inter
unions[m] = union
return inters, unions
def seg_validation_tda2(model, n_segcls, valloader, device, epoch,half_precision=True):
Fast test during the training
meters = (3, 4, 5) # 以米为单位的中心正方形边长,用于统计局部 IoU
def eval_batch(model, image, target, half,imagepath):
outputs = model(image)
outputs = gather(outputs, 0, dim=0)
if n_segcls == 2:
pred = outputs[1][1] # 1是分割 源码
pred_ori = outputs[1][1]
if n_segcls == 6:
pred = outputs[1][0]#源码
pred_ori = outputs[1][0]
if n_segcls == 6:
print("image.shape:",image.shape)
img_draw_rm=SegmentationDataset.tensor2img(image[0])
img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")
pred_draw_rm=torch.argmax(pred[0], 0)
SegmentationDataset.draw_segmentation_mask(img_draw_rm, pred_draw_rm, f"/ai/ypli/tmp/{epoch}_val_rm_pred.png")
else:
print("image.shape:",image.shape)
img_draw_fsd=SegmentationDataset.tensor2img(image[0])
img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")
pred_draw_fsd=torch.argmax(pred[0], 0)
SegmentationDataset.draw_segmentation_mask(img_draw_fsd, pred_draw_fsd, f"/ai/ypli/tmp/{epoch}_val_fsd_pred.png")
target = target.to(device, non_blocking=True)
pred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)
maskpred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)[0]
mask = label2image(maskpred.max(axis=0)[1].cpu().numpy(), Cityscapes_COLORMAP)[:, :, ::-1]#用于画图
correct, labeled,total_pixel = batch_pix_accuracy_class(pred, target,n_segcls)
correct, labeled,total_pixel = batch_pix_accuracy_class_tda2(pred.data, target,n_segcls,imagepath)#
inter, union = batch_intersection_union(pred.data, target, n_segcls)#
center_inter, center_union = compute_center_square_iou(pred.data, target, meters=meters, orig_hw=(1088, 960), fsd_class=1) # 计算中心 3/4/5 米 FSD 交并
return correct, labeled, inter, union,total_pixel,mask,center_inter,center_union
from copy import deepcopy
model2=deepcopy(model)
half = device.type != 'cpu' and half_precision # half precision only supported on CUDA
if half:#lyp
model2.half()
model2.eval()
total_inter, total_union, total_correct, total_label = 0, 0, 0, 0
total_c = []
tbar = tqdm(valloader, desc='\r')
numtime = 1
region_inter = {m: 0 for m in meters} # 累计各米数的交集像素
region_union = {m: 0 for m in meters} # 累计各米数的并集像素
for i, (image, target,imagepath) in enumerate(tbar):
from PIL import Image, ImageDraw
import random
img=Image.open(imagepath[0]).convert('RGB').resize((480, 544))
randnum=str(random.randint(1, 1000))
print(target.shape)#[8, 544, 480]
SegmentationDataset.draw_segmentation_mask(img, target[0], f"/ai/ypli/res/res1{randnum}.png")#有问题
img2=SegmentationDataset.tensor2img(image[0])
img2.save("/ai/ypli/res/tmp.jpg")
print(image[0].shape)#[3, 544, 480]
print(type(image[0]))#torch.Size([3, 544, 480])
# img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")
SegmentationDataset.draw_segmentation_mask(img2, target[0], f"/ai/ypli/res/res2{randnum}.png")#tda4原始加载方式可视化2是对的 1是错的
image = image.to(device, non_blocking=True)
image = image.half() if half else image.float()
print(image.shape)
with torch.no_grad():
correct, labeled, inter, union ,total_pixel,mask,center_inter,center_union= eval_batch(model2, image, target, half,imagepath)
#将测试集中的图片进行可视化保存
imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')
imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')
tda2imgprocess.save_mask(mask,imagepath)
dst_fsd = cv2.addWeighted(mask, 0.4, imagename, 0.6, 0)
cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', imagename)
cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', dst_fsd)
#将测试集中的图片进行可视化保存
print('success')
exit()
total_correct += correct
total_label += labeled
total_inter += inter
total_union += union
total_c += total_pixel
for m in meters: # 累加当前 batch 在各米数正方形内的交并
region_inter[m] += center_inter[m]
region_union[m] += center_union[m]
pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)
total_c = np.array(total_c)
pixAcc_class = 1.0 * total_c[:,0] / (np.spacing(1) + total_c[:,1])
IoU = 1.0 * total_inter / (np.spacing(1) + total_union)
numtime += 1
mIoU = IoU.mean()
tbar.set_description(
'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))
if i >= len(tbar) - 1:
print('mIoU= ',str(mIoU))
for j in range(len(IoU)):
print("class:" + str(j) + "result------" +str(IoU[j]) + "--ACC----" + str(pixAcc_class[j]))
if len(IoU) == 14:
tbar.set_description(
'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \
, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))
tbar.set_description(
'Acc: %.2f, Acc: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \
, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))
tbar.set_description(
'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \
, IoU[5], IoU[6]))
if len(IoU) == 2:
tbar.set_description(
'pixAcc: %.3f, mIoU: %.3f, class0: %.3f, class1: %.3f' % (pixAcc, mIoU, IoU[0], IoU[1]))
for m in meters:
region_iou = region_inter[m] / (np.spacing(1) + region_union[m]) # 避免除零
print(f'{m}m 内 FSD IoU: {region_iou:.4f}') # 输出中心区域 IoU
return mIoU,IoU
def seg_validation(model, n_segcls, valloader, device, half_precision=True):
Fast test during the training
meters = (3, 4, 5) # 以米为单位的中心正方形边长,用于统计局部 IoU
def eval_batch(model, image, target, half,imagepath):
outputs = model(image)
outputs = gather(outputs, 0, dim=0)
if n_segcls == 2:
pred = outputs[1][1] # 1是分割 源码
pred_ori = outputs[1][1]
if n_segcls == 6:
pred = outputs[1][0]#源码
pred_ori = outputs[1][0]
#到这里为止都正确
return None, None, None, None,None,pred_ori
print(type(pred))
print(type(target))
print(pred.shape)
print(target.shape)
exit()
target = target.to(device, non_blocking=True)
print(pred.shape) # torch.Size([64, 6, 544, 480])
print(target.shape) # torch.Size([64, 544, 480])
print(type(target)) # torch.Size([64, 544, 480])
exit(0)
pred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)
maskpred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)[0]
mask = label2image(maskpred.max(axis=0)[1].cpu().numpy(), Cityscapes_COLORMAP)[:, :, ::-1]#用于画图
correct, labeled,total_pixel = batch_pix_accuracy_class(pred, target,n_segcls)
correct, labeled,total_pixel = batch_pix_accuracy_class_tda2(pred.data, target,n_segcls,imagepath)#
inter, union = batch_intersection_union(pred.data, target, n_segcls)#
center_inter, center_union = compute_center_square_iou(pred.data, target, meters=meters, orig_hw=(1088, 960), fsd_class=1) # 计算中心 3/4/5 米 FSD 交并
return correct, labeled, inter, union,total_pixel,mask,center_inter,center_union
from copy import deepcopy
model2=deepcopy(model)
half = device.type != 'cpu' and half_precision # half precision only supported on CUDA
if half:#lyp
model2.half()
model2.eval()
total_inter, total_union, total_correct, total_label = 0, 0, 0, 0
total_c = []
tbar = tqdm(valloader, desc='\r')
numtime = 1
region_inter = {m: 0 for m in meters} # 累计各米数的交集像素
region_union = {m: 0 for m in meters} # 累计各米数的并集像素
for i, (image, target,imagepath) in enumerate(tbar):
from PIL import Image, ImageDraw
import random
img=Image.open(imagepath[0]).convert('RGB').resize((480, 544))
randnum=str(random.randint(1, 1000))
SegmentationDataset.draw_segmentation_mask(img, target, f"/ai/ypli/res/res{randnum}.png")
exit()
image = image.to(device, non_blocking=True)
image = image.half() if half else image.float()
print(image.shape)
with torch.no_grad():
correct, labeled, inter, union ,total_pixel,mask,center_inter,center_union= eval_batch(model2, image, target, half,imagepath)
#将测试集中的图片进行可视化保存
imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')
imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')
tda2imgprocess.save_mask(mask,imagepath)
dst_fsd = cv2.addWeighted(mask, 0.4, imagename, 0.6, 0)
cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', imagename)
cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', dst_fsd)
#将测试集中的图片进行可视化保存
print('success')
exit()
total_correct += correct
total_label += labeled
total_inter += inter
total_union += union
total_c += total_pixel
for m in meters: # 累加当前 batch 在各米数正方形内的交并
region_inter[m] += center_inter[m]
region_union[m] += center_union[m]
pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)
total_c = np.array(total_c)
pixAcc_class = 1.0 * total_c[:,0] / (np.spacing(1) + total_c[:,1])
IoU = 1.0 * total_inter / (np.spacing(1) + total_union)
numtime += 1
mIoU = IoU.mean()
tbar.set_description(
'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))
if i >= len(tbar) - 1:
print('mIoU= ',str(mIoU))
for j in range(len(IoU)):
print("class:" + str(j) + "result------" +str(IoU[j]) + "--ACC----" + str(pixAcc_class[j]))
if len(IoU) == 14:
tbar.set_description(
'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \
, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))
tbar.set_description(
'Acc: %.2f, Acc: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \
, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))
tbar.set_description(
'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \
, IoU[5], IoU[6]))
if len(IoU) == 2:
tbar.set_description(
'pixAcc: %.3f, mIoU: %.3f, class0: %.3f, class1: %.3f' % (pixAcc, mIoU, IoU[0], IoU[1]))
for m in meters:
region_iou = region_inter[m] / (np.spacing(1) + region_union[m]) # 避免除零
print(f'{m}m 内 FSD IoU: {region_iou:.4f}') # 输出中心区域 IoU
return mIoU,IoU
def segtest(weights, root="data/citys", batch_size=16, half_precision=True, n_segcls=19, base_size=2048): # 会使用原始尺寸测, 未考虑尺寸对不齐, 图片尺寸应为32倍数
device = select_device(opt.device, batch_size=batch_size)
model = attempt_load_tda4(weights, map_location=device) # load FP32 model
testvalloader = SegmentationDataset.get_rm_loader_tda2(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader
testvalloader = SegmentationDataset.get_rm_loader(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader
seg_validation(model, n_segcls, testvalloader, device, half_precision)
def segtest_fsd(weights, root="data/citys", batch_size=16, half_precision=True, n_segcls=19, base_size=2048): # 会使用原始尺寸测, 未考虑尺寸对不齐, 图片尺寸应为32倍数
device = select_device(opt.device, batch_size=batch_size)
model = attempt_load_tda4(weights, map_location=device) # load FP32 model
testvalloader = SegmentationDataset.get_custom_loader_tda2(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader
testvalloader = SegmentationDataset.get_custom_loader(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader
seg_validation(model, n_segcls, testvalloader, device, half_precision)
def test(data,
weights=None,
batch_size=64,
imgsz=640,
conf_thres=0.001,
iou_thres=0.6, # for NMS
save_json=False,
single_cls=False,
augment=False,
verbose=False,
model=None,
dataloader=None,
save_dir=Path(''), # for saving images
save_txt=False, # for auto-labelling
save_hybrid=False, # for hybrid auto-labelling
save_conf=False, # save auto-label confidences
plots=False,
wandb_logger=None,
compute_loss=False,
half_precision=True,
is_coco=False):
Initialize/load model and set device
training = model is not None
if training: # called by train.py
device = next(model.parameters()).device # get model device
else: # called directly
set_logging()
device = select_device(opt.device, batch_size=batch_size)
Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
Load model
model = attempt_load(weights, map_location=device) # load FP32 model
model = Model(opt.cfg, ch=3, nc=3).to('cpu') # create
gs = max(int(model.stride.max()), 32) # grid size (max stride)
imgsz = check_img_size(imgsz, s=gs) # check img_size
Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
if device.type != 'cpu' and torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
Half
half = device.type != 'cpu' and half_precision # half precision only supported on CUDA
half = False
if half:
model.half()
Configure
model.eval()
if isinstance(data, str):
is_coco = data.endswith('coco.yaml')
with open(data) as f:
data = yaml.load(f, Loader=yaml.SafeLoader)
check_dataset(data) # check
nc = 1 if single_cls else int(data['nc']) # number of classes
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
niou = iouv.numel()
Logging
log_imgs = 0
if wandb_logger and wandb_logger.wandb:
log_imgs = min(wandb_logger.log_imgs, 100)
Dataloader
if not training:
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
task = opt.task if opt.task in ('train', 'val', 'test') else 'val' # path to train/val/test images
dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True,
prefix=colorstr(f'{task}: '))[0]
#/ai/DataSets/OD_FSD_zh/psd_v2.0/data/txt_dataset/ann_zh/test_img_list_v5.txt
#data[task]
#/ai/DataSets/OD_FSD_zh/psd_v2.0/data/83_psd_20240723_占用属性优化/alldata/test_img_list.txt
seen = 0
confusion_matrix = ConfusionMatrix(nc=nc)
names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
coco91class = coco80_to_coco91_class()
s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
loss = torch.zeros(3, device=device)
jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
numtotal = 0
numocc,numvip,numwoman,numdisabled,numcharging = 0,0,0,0,0
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
img = img.to(device, non_blocking=True)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
targets = targets.to(device)
nb, _, height, width = img.shape # batch size, channels, height, width
with torch.no_grad():
Run model
t = time_synchronized()
out, train_out = model(img, augment=augment)[0] # inference and training outputs 修改[0]新模型输出[0]是检测
t0 += time_synchronized() - t
Compute loss
if compute_loss:
Hyperparameters 配置超参数
with open(opt.hyp) as f:
hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps
model.hyp = hyp
model.gr = 1.0
compute_loss = PoseLoss(model)
loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls
Run NMS
targets[:, 2:6] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
t = time_synchronized()
out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=False)
t1 += time_synchronized() - t
Statistics per image
for si, pred in enumerate(out):
perd_slot = pred[:, 14:]
pred = torch.cat((pred[:, :5], pred[:, 13:14]), 1)
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
tcls = labels[:, 0].tolist() if nl else [] # target class
tocc = labels[:, 13].tolist() if nl else []
tvip = labels[:, 14].tolist() if nl else []
twoman = labels[:, 15].tolist() if nl else []
tdisable = labels[:, 16].tolist() if nl else []
tcharging = labels[:, 17].tolist() if nl else []
path = Path(paths[si])
seen += 1
if len(pred) == 0:
if nl:
stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
continue
Predictions
predn = pred.clone()
scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred
Append to text file
if save_txt:
gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh
for *xyxy, conf, cls in predn.tolist():
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
W&B logging - Media Panel Plots
if len(wandb_images) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation
if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:
box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": "%s %.3f" % (names[cls], conf),
"scores": {"class_score": conf},
"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
wandb_images.append(wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name))
wandb_logger.log_training_progress(predn, path, names) if wandb_logger and wandb_logger.wandb_run else None
Append to pycocotools JSON dictionary
if save_json:
[{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
box = xyxy2xywh(predn[:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
for p, b in zip(pred.tolist(), box.tolist()):
jdict.append({'image_id': image_id,
'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5)})
Assign all predictions as incorrect
correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
if nl:
detected = [] # target indices
tcls_tensor = labels[:, 0]
target boxes
tbox = xywh2xyxy(labels[:, 1:5])
scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels
if plots:
confusion_matrix.process_batch(predn, torch.cat((labels[:, 0:1], tbox), 1))
Per target class
for cls in torch.unique(tcls_tensor):
ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices
pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices
Search for detections
if pi.shape[0]:
Prediction to target ious
ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices
Append detections
detected_set = set()
for j in (ious > iouv[0]).nonzero(as_tuple=False):
d = ti[i[j]] # detected target
if d.item() not in detected_set:
detected_set.add(d.item())
detected.append(d)
correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn
if len(detected) == nl: # all targets already located in image
break
Append statistics (correct, conf, pcls, tcls)
stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
Plot images
if plots and batch_i < 3:
f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels
Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions
Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()
Compute statistics
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
if len(stats) and stats[0].any():
p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95
mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
else:
nt = torch.zeros(1)
Print results
pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format
print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
Print results per class
if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
for i, c in enumerate(ap_class):
print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
Print speeds
t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple
if not training:
print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
Plots
if plots:
confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
if wandb_logger and wandb_logger.wandb:
val_batches = [wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]
wandb_logger.log({"Validation": val_batches})
if wandb_images:
wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})
Save JSON
if save_json and len(jdict):
w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights
anno_json = '../coco/annotations/instances_val2017.json' # annotations json
pred_json = str(save_dir / f"{w}_predictions.json") # predictions json
print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
with open(pred_json, 'w') as f:
json.dump(jdict, f)
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
anno = COCO(anno_json) # init annotations api
pred = anno.loadRes(pred_json) # init predictions api
eval = COCOeval(anno, pred, 'bbox')
if is_coco:
eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate
eval.evaluate()
eval.accumulate()
eval.summarize()
map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
except Exception as e:
print(f'pycocotools unable to run: {e}')
Return results
model.float() # for training
if not training:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
maps = np.zeros(nc) + map
for i, c in enumerate(ap_class):
maps[c] = ap[i]
return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
if name == 'main':
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--cfg', type=str, default='models/yolov5s_custom_seg.yaml', help='model.yaml path')#
parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
#runs/train/exp52/weights/exp52_last_114_v2.0.21_20241129.pt
#runs/train/exp4/weights/best.pt
#./TTERMSegnetV30_train_epoch_52.pth
"/ai/zhdata/lyp/multiyolov5_point_v2/runs/train/tda2-v17-tda2pretrain-tda4loadervaler/weights/last.pt"
parser.add_argument('--weights', nargs='+', type=str, default="/ai/zhdata/lyp/multiyolov5_point_v2/runs/train/222/weights/lyp_last_53.pt", help='model.pt path(s)') #'runs/train/exp51/weights/last.pt'
parser.add_argument('--data', type=str, default='data/custom.yaml', help='*.data path')
path = ["/ai/DataSets/TopViewMultiTaskPerc_xmlin/freeSpace/annotations/tmp_zh/val_v41_cpp_20250903_FSD.txt"]
path = ["/ai/ypli/multiyolov5_point_v2/data/tda2/train_fsd_1.txt"]
path = ["/ai/ypli/multiyolov5_point_v2/data/tda2/test_fsd.txt"]
path = ['/ai/DataSets/TopViewMultiTaskPerc_xmlin/freeSpace/annotations/ann_zh/val_zh_v23_20250312_gt.txt']
path_rm = ['/ai/DataSets/TopViewMultiTaskPerc_xmlin/roadmarking/annotations/ann_zh/val_v24_fullLabel0103_gt_v1.txt']
path_rm = ["/ai/ypli/multiyolov5_point_v2/data/txt/val_v30_20250305_gt_64.txt"]
path_rm = ["./1img.txt"]
path_rm = ["/ai/zhdata/lyp/jiansudai/100jiansudai.txt"]
path_rm = ["/ai/ypli/multiyolov5_point_v2/data/tda2/train_rm_1.txt"]
path_rm = ["/ai/ypli/multiyolov5_point_v2/data/tda2/test_rm.txt"]
path_rm = ['/ai/DataSets/TopViewMultiTaskPerc_xmlin/roadmarking/annotations/20240411_生态园and保定对向路沿/2task_rm_20240405_wuluhong/2task_rm_20240405_wuluhong_gt.txt']
parser.add_argument('--segdata', default=path, help='root path of segmentation data')#type=list,
parser.add_argument('--segdata_rm', default=path_rm, help='root path of segmentation data')#type=list,
parser.add_argument('--batch-size', type=int, default=1, help='size of each image batch')
parser.add_argument('--img-size', type=int, default=544, help='inference size (pixels)')
parser.add_argument('--base-size', type=int, default=544, help='long side of segtest image you want to input network')
parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--task', default='val', help='train, val, test, speed or study')
parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
parser.add_argument('--project', default='runs/test', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
opt.save_json |= opt.data.endswith('coco.yaml')
opt.data = check_file(opt.data) # check file
print(opt)
check_requirements()
if opt.task in ('train', 'val', 'test'): # run normally
print(1111)
test(opt.data,
opt.weights,
opt.batch_size,
opt.img_size,
opt.conf_thres,
opt.iou_thres,
opt.save_json,
opt.single_cls,
opt.augment,
opt.verbose,
save_txt=opt.save_txt | opt.save_hybrid,
save_hybrid=opt.save_hybrid,
save_conf=opt.save_conf,
)
elif opt.task == 'speed': # speed benchmarks
for w in opt.weights:
test(opt.data, w, opt.batch_size, opt.img_size, 0.25, 0.45, save_json=False, plots=False)
elif opt.task == 'study': # run over a range of settings and save/plot
# python test.py --task study --data coco.yaml --iou 0.7 --weights yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt
x = list(range(256, 1536 + 128, 128)) # x axis (image sizes)
for w in opt.weights:
f = f'study_{Path(opt.data).stem}_{Path(w).stem}.txt' # filename to save to
y = [] # y axis
for i in x: # img-size
print(f'\nRunning {f} point {i}...')
r, _, t = test(opt.data, w, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json,
plots=False)
y.append(r + t) # results and times
np.savetxt(f, y, fmt='%10.4g') # save
os.system('zip -r study.zip study_*.txt')
plot_study_txt(x=x) # plot
batch_size=64 #opt.batch_size
segtest_fsd(root=opt.segdata, weights=opt.weights, batch_size=batch_size, n_segcls=2, base_size=opt.base_size) # 19 for cityscapes
segtest(root=opt.segdata_rm, weights=opt.weights, batch_size=batch_size, n_segcls=6, base_size=opt.base_size) # 19 for cityscapes
变更说明(已尽量少改动,均在tda4/test_custom_tda2.py内):
- 新增compute_center_square_iou,按原始尺寸1088x960且1px=1cm的假设,把3/4/5米正方形映射到当前544x480尺度,基于中心区域计算FSD(默认类别1)交并比。
- 在seg_validation_tda2与seg_validation中引入meters=(3,4,5),累加各米数区域的交并集,并在评估结束时打印"3m/4m/5m 内 FSD IoU"。原有全图mIoU/IoU逻辑保持不变。
关键代码:
test_custom_tda2.pyLines 115-156
计算以图像中心为原点、实距为边长的正方形区域 IoU
def compute_center_square_iou(pred_logits, target, meters=(3, 4, 5), orig_hw=(1088, 960), fsd_class=1):
...
for m in meters:
...
inter = ((pred_fsd & gt_fsd) & roi_mask).sum().item()
union = (((pred_fsd | gt_fsd) & roi_mask)).sum().item()
inters[m] = inter
unions[m] = union
return inters, unions
使用方式:
- 按原有方式运行test_custom_tda2.py验证FSD(例如python test_custom_tda2.py ...)。在控制台输出中会额外出现:
- 3m 内 FSD IoU: ...
- 4m 内 FSD IoU: ...
- 5m 内 FSD IoU: ...
- 默认FSD正类为1;若数据标签不同,可在函数调用处调整fsd_class。
结合代码,详细说明下你是怎么实现的
实现思路(在 tda4/test_custom_tda2.py 中的少量改动):
- 新增工具函数:compute_center_square_iou。假设原始标注尺寸是 1088x960,1 像素=1 厘米;当前输入/输出为 544x480,因此用 ratio_h = h / 1088、ratio_w = w / 960 将 3/4/5 米(米→厘米→像素)映射到当前尺度,基于图像中心裁出正方形 ROI,再对 ROI 内的预测/标签做交并计算。默认 FSD 正类 id 为 1,如需调整可改参数。
- 在两个验证函数 seg_validation_tda2 和 seg_validation 中:
- 增加 meters = (3, 4, 5) 配置。
- 每次 batch 返回时附带中心区域的 center_inter/center_union。
- 在循环中累计各米数的交并。
- 评估结束后打印 3m/4m/5m 中心正方形内的 FSD IoU,原有全图 mIoU/IoU 输出不变。
关键代码片段:
test_custom_tda2.pyLines 115-156
计算以图像中心为原点、实距为边长的正方形区域 IoU
def compute_center_square_iou(pred_logits, target, meters=(3, 4, 5), orig_hw=(1088, 960), fsd_class=1):
h, w = target.shape[1], target.shape[2]
ratio_h = h / float(orig_hw[0])
ratio_w = w / float(orig_hw[1])
pred_cls = pred_logits.argmax(1)
valid_mask = target != 255 if (target == 255).any() else torch.ones_like(target, dtype=torch.bool)
center_h, center_w = h // 2, w // 2
inters, unions = {}, {}
for m in meters:
side_h = int(m * 100 * ratio_h)
side_w = int(m * 100 * ratio_w)
half_h, half_w = side_h // 2, side_w // 2
top, bottom = max(0, center_h - half_h), min(h, center_h + half_h)
left, right = max(0, center_w - half_w), min(w, center_w + half_w)
roi_mask = torch.zeros_like(target, dtype=torch.bool)
roi_mask[:, top:bottom, left:right] = True
roi_mask &= valid_mask
pred_fsd = pred_cls == fsd_class
gt_fsd = target == fsd_class
inter = ((pred_fsd & gt_fsd) & roi_mask).sum().item()
union = (((pred_fsd | gt_fsd) & roi_mask)).sum().item()
inters[m], unions[m] = inter, union
return inters, unions
test_custom_tda2.pyLines 193-236
def seg_validation_tda2(...):
meters = (3, 4, 5)
def eval_batch(...):
...
correct, labeled,total_pixel = batch_pix_accuracy_class_tda2(...)
inter, union = batch_intersection_union(...)
center_inter, center_union = compute_center_square_iou(pred.data, target, meters=meters, orig_hw=(1088, 960), fsd_class=1)
return correct, labeled, inter, union,total_pixel,mask,center_inter,center_union
...
region_inter = {m: 0 for m in meters}; region_union = {m: 0 for m in meters}
...
correct, labeled, inter, union ,total_pixel,mask,center_inter,center_union = eval_batch(...)
...
for m in meters:
region_inter[m] += center_inter[m]; region_union[m] += center_union[m]
...
for m in meters:
region_iou = region_inter[m] / (np.spacing(1) + region_union[m])
print(f'{m}m 内 FSD IoU: {region_iou:.4f}')
同样的逻辑在 seg_validation 中也添加了 meters、累计和打印。运行方式保持不变,执行原来的验证脚本即可在控制台看到额外的 3/4/5 米中心区域 IoU 输出。