@tda4 我正在开发一个用于自动驾驶的可行驶区域分割的语义分割模型，输入的是车辆在自动驾驶中使用改的俯视图进行训练，我接到一个任务，需要计算模型在，3、4、5米内fsd的iou。3米是指，现实世界中的3米，对应的输入图片上，模型训练用图片原始尺寸是1088x960，其中1个像素对应现实世界中的1厘米，模型输入和输出尺寸是544x480，模型的验证代码和fsd测试集都是现成的，我可以轻松计算出模型在fsd测试集上的指标，我用的是@tda4/test_custom_tda2.py ，可是我怎么计算3、4、5米内的呢，3、4、5米是在从图片中心开始算，在现实世界中画一个3、4、5米的正方形。要求修改的代码尽可能少，用中文回答。

import argparse

import json

import os

from pathlib import Path

from threading import Thread

import numpy as np

import torch

import yaml

from tqdm import tqdm

import tda2imgprocess

from models.experimental import *

from utils.datasets import create_dataloader

from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, check_requirements, \

box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path, colorstr

from utils.metrics import ap_per_class, ConfusionMatrix, batch_pix_accuracy,batch_pix_accuracy_class, batch_intersection_union,batch_pix_accuracy_class_tda2 # 后两个新增分割

from utils.plots import plot_images, output_to_target, plot_study_txt

from utils.torch_utils import select_device, time_synchronized

import torch.nn.functional as F

import cv2

from models.yolo import Model

from utils.loss import ComputeLoss, SegmentationLosses, SegFocalLoss, OhemCELoss, ProbOhemCrossEntropy2d,PoseLoss

import SegmentationDataset

"""

test_custom.py与test.py的区别仅在加载器上从Cityscapes改成了Custom

新版训练测试(loader的mode为"testval")可以把验证集长边resize到base-size输入到网络, 但mask仍然是原图尺寸, 以下代码自动把网络输出双线性插值到原图算指标

调用示例:

python test.py --data cityscapes_det.yaml --segdata ./data/citys --weights ./best.pt --img-size 640 --base-size 640

即相比原版yolov5多 --segdata 和 --base-size两个参数

"""

Cityscapes_COLORMAP = [

[128, 64, 128],

0,0,0\], \[244, 35, 232\], \[0, 0, 192\], \[70, 70, 70\], \[102, 102, 0\], \[190, 153, 153\], \[153, 153, 153\], \[250, 170, 30\], \[220, 220, 0\], \[107, 142, 35\], \[152, 251, 152\], \[0, 130, 180\], \[220, 20, 60\], \[255, 0, 0\], \[0, 0, 70\], \[0, 60, 100\], \[0, 80, 100\], \[0, 0, 230\], \[119, 11, 32\],

def label2image(pred, COLORMAP=Cityscapes_COLORMAP):

colormap = np.array(COLORMAP, dtype='uint8')

X = pred.astype('int32')

return colormap[X, :]

计算以图像中心为原点、实距为边长的正方形区域 IoU（默认用于 FSD）

def compute_center_square_iou(pred_logits, target, meters=(3, 4, 5), orig_hw=(1088, 960), fsd_class=1):

"""

pred_logits: [B, C, H, W]，网络输出（尚未 argmax）

target: [B, H, W]，分割标签

meters: 需要统计的米数列表

orig_hw: 原始图高宽（H, W），对应 1 像素 = 1 cm

fsd_class: FSD 正类类别 id（默认 1）

"""

h, w = target.shape[1], target.shape[2]

将实距（cm）转换为当前尺度的像素长度，假设输入从 orig_hw 等比缩放到当前 h, w

ratio_h = h / float(orig_hw[0]) # 原图->当前输入尺度的缩放比（高）

ratio_w = w / float(orig_hw[1]) # 原图->当前输入尺度的缩放比（宽）

pred_cls = pred_logits.argmax(1) # 取预测类别

若有 255 忽略标签，则不计入 IoU

valid_mask = torch.ones_like(target, dtype=torch.bool)

if (target == 255).any():

valid_mask = target != 255

inters, unions = {}, {}

center_h, center_w = h // 2, w // 2

for m in meters:

side_h = int(m * 100 * ratio_h) # m 米对应的像素边长（高方向，米->厘米->像素）

side_w = int(m * 100 * ratio_w) # m 米对应的像素边长（宽方向）

half_h = side_h // 2

half_w = side_w // 2

top = max(0, center_h - half_h)

bottom = min(h, center_h + half_h)

left = max(0, center_w - half_w)

right = min(w, center_w + half_w)

roi_mask = torch.zeros_like(target, dtype=torch.bool)

roi_mask[:, top:bottom, left:right] = True

roi_mask = roi_mask & valid_mask

pred_fsd = pred_cls == fsd_class # 预测为 FSD 区域

gt_fsd = target == fsd_class # 标注为 FSD 区域

inter = ((pred_fsd & gt_fsd) & roi_mask).sum().item()

union = (((pred_fsd | gt_fsd) & roi_mask)).sum().item()

inters[m] = inter

unions[m] = union

return inters, unions

def seg_validation_tda2(model, n_segcls, valloader, device, epoch,half_precision=True):

Fast test during the training

meters = (3, 4, 5) # 以米为单位的中心正方形边长，用于统计局部 IoU

def eval_batch(model, image, target, half,imagepath):

outputs = model(image)

outputs = gather(outputs, 0, dim=0)

if n_segcls == 2:

pred = outputs[1][1] # 1是分割源码

pred_ori = outputs[1][1]

if n_segcls == 6:

pred = outputs[1][0]#源码

pred_ori = outputs[1][0]

if n_segcls == 6:

print("image.shape:",image.shape)

img_draw_rm=SegmentationDataset.tensor2img(image[0])

img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")

pred_draw_rm=torch.argmax(pred[0], 0)

SegmentationDataset.draw_segmentation_mask(img_draw_rm, pred_draw_rm, f"/ai/ypli/tmp/{epoch}_val_rm_pred.png")

else:

print("image.shape:",image.shape)

img_draw_fsd=SegmentationDataset.tensor2img(image[0])

img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")

pred_draw_fsd=torch.argmax(pred[0], 0)

SegmentationDataset.draw_segmentation_mask(img_draw_fsd, pred_draw_fsd, f"/ai/ypli/tmp/{epoch}_val_fsd_pred.png")

target = target.to(device, non_blocking=True)

pred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)

maskpred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)[0]

mask = label2image(maskpred.max(axis=0)[1].cpu().numpy(), Cityscapes_COLORMAP)[:, :, ::-1]#用于画图

correct, labeled,total_pixel = batch_pix_accuracy_class(pred, target,n_segcls)

correct, labeled,total_pixel = batch_pix_accuracy_class_tda2(pred.data, target,n_segcls,imagepath)#

inter, union = batch_intersection_union(pred.data, target, n_segcls)#

center_inter, center_union = compute_center_square_iou(pred.data, target, meters=meters, orig_hw=(1088, 960), fsd_class=1) # 计算中心 3/4/5 米 FSD 交并

return correct, labeled, inter, union,total_pixel,mask,center_inter,center_union

from copy import deepcopy

model2=deepcopy(model)

half = device.type != 'cpu' and half_precision # half precision only supported on CUDA

if half:#lyp

model2.half()

model2.eval()

total_inter, total_union, total_correct, total_label = 0, 0, 0, 0

total_c = []

tbar = tqdm(valloader, desc='\r')

numtime = 1

region_inter = {m: 0 for m in meters} # 累计各米数的交集像素

region_union = {m: 0 for m in meters} # 累计各米数的并集像素

for i, (image, target,imagepath) in enumerate(tbar):

from PIL import Image, ImageDraw

import random

img=Image.open(imagepath[0]).convert('RGB').resize((480, 544))

randnum=str(random.randint(1, 1000))

print(target.shape)#[8, 544, 480]

SegmentationDataset.draw_segmentation_mask(img, target[0], f"/ai/ypli/res/res1{randnum}.png")#有问题

img2=SegmentationDataset.tensor2img(image[0])

img2.save("/ai/ypli/res/tmp.jpg")

print(image[0].shape)#[3, 544, 480]

print(type(image[0]))#torch.Size([3, 544, 480])

# img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")

SegmentationDataset.draw_segmentation_mask(img2, target[0], f"/ai/ypli/res/res2{randnum}.png")#tda4原始加载方式可视化2是对的 1是错的

image = image.to(device, non_blocking=True)

image = image.half() if half else image.float()

print(image.shape)

with torch.no_grad():

correct, labeled, inter, union ,total_pixel,mask,center_inter,center_union= eval_batch(model2, image, target, half,imagepath)

#将测试集中的图片进行可视化保存

imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')

tda2imgprocess.save_mask(mask,imagepath)

dst_fsd = cv2.addWeighted(mask, 0.4, imagename, 0.6, 0)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', imagename)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', dst_fsd)

#将测试集中的图片进行可视化保存

print('success')

exit()

total_correct += correct

total_label += labeled

total_inter += inter

total_union += union

total_c += total_pixel

for m in meters: # 累加当前 batch 在各米数正方形内的交并

region_inter[m] += center_inter[m]

region_union[m] += center_union[m]

pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)

total_c = np.array(total_c)

pixAcc_class = 1.0 * total_c[:,0] / (np.spacing(1) + total_c[:,1])

IoU = 1.0 * total_inter / (np.spacing(1) + total_union)

numtime += 1

mIoU = IoU.mean()

tbar.set_description(

'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))

if i >= len(tbar) - 1:

print('mIoU= ',str(mIoU))

for j in range(len(IoU)):

print("class:" + str(j) + "result------" +str(IoU[j]) + "--ACC----" + str(pixAcc_class[j]))

if len(IoU) == 14:

tbar.set_description(

'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))

tbar.set_description(

'Acc: %.2f, Acc: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))

tbar.set_description(

'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6]))

if len(IoU) == 2:

tbar.set_description(

'pixAcc: %.3f, mIoU: %.3f, class0: %.3f, class1: %.3f' % (pixAcc, mIoU, IoU[0], IoU[1]))

for m in meters:

region_iou = region_inter[m] / (np.spacing(1) + region_union[m]) # 避免除零

print(f'{m}m 内 FSD IoU: {region_iou:.4f}') # 输出中心区域 IoU

return mIoU,IoU

def seg_validation(model, n_segcls, valloader, device, half_precision=True):

Fast test during the training

meters = (3, 4, 5) # 以米为单位的中心正方形边长，用于统计局部 IoU

def eval_batch(model, image, target, half,imagepath):

outputs = model(image)

outputs = gather(outputs, 0, dim=0)

if n_segcls == 2:

pred = outputs[1][1] # 1是分割源码

pred_ori = outputs[1][1]

if n_segcls == 6:

pred = outputs[1][0]#源码

pred_ori = outputs[1][0]

#到这里为止都正确

return None, None, None, None,None,pred_ori

print(type(pred))

print(type(target))

print(pred.shape)

print(target.shape)

exit()

target = target.to(device, non_blocking=True)

print(pred.shape) # torch.Size([64, 6, 544, 480])

print(target.shape) # torch.Size([64, 544, 480])

print(type(target)) # torch.Size([64, 544, 480])

exit(0)

pred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)

maskpred = F.interpolate(pred, (target.shape[1], target.shape[2]), mode='bilinear', align_corners=True)[0]

mask = label2image(maskpred.max(axis=0)[1].cpu().numpy(), Cityscapes_COLORMAP)[:, :, ::-1]#用于画图

correct, labeled,total_pixel = batch_pix_accuracy_class(pred, target,n_segcls)

correct, labeled,total_pixel = batch_pix_accuracy_class_tda2(pred.data, target,n_segcls,imagepath)#

inter, union = batch_intersection_union(pred.data, target, n_segcls)#

center_inter, center_union = compute_center_square_iou(pred.data, target, meters=meters, orig_hw=(1088, 960), fsd_class=1) # 计算中心 3/4/5 米 FSD 交并

return correct, labeled, inter, union,total_pixel,mask,center_inter,center_union

from copy import deepcopy

model2=deepcopy(model)

half = device.type != 'cpu' and half_precision # half precision only supported on CUDA

if half:#lyp

model2.half()

model2.eval()

total_inter, total_union, total_correct, total_label = 0, 0, 0, 0

total_c = []

tbar = tqdm(valloader, desc='\r')

numtime = 1

region_inter = {m: 0 for m in meters} # 累计各米数的交集像素

region_union = {m: 0 for m in meters} # 累计各米数的并集像素

for i, (image, target,imagepath) in enumerate(tbar):

from PIL import Image, ImageDraw

import random

img=Image.open(imagepath[0]).convert('RGB').resize((480, 544))

randnum=str(random.randint(1, 1000))

SegmentationDataset.draw_segmentation_mask(img, target, f"/ai/ypli/res/res{randnum}.png")

exit()

image = image.to(device, non_blocking=True)

image = image.half() if half else image.float()

print(image.shape)

with torch.no_grad():

correct, labeled, inter, union ,total_pixel,mask,center_inter,center_union= eval_batch(model2, image, target, half,imagepath)

#将测试集中的图片进行可视化保存

imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')

tda2imgprocess.save_mask(mask,imagepath)

dst_fsd = cv2.addWeighted(mask, 0.4, imagename, 0.6, 0)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', imagename)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', dst_fsd)

#将测试集中的图片进行可视化保存

print('success')

exit()

total_correct += correct

total_label += labeled

total_inter += inter

total_union += union

total_c += total_pixel

for m in meters: # 累加当前 batch 在各米数正方形内的交并

region_inter[m] += center_inter[m]

region_union[m] += center_union[m]

pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)

total_c = np.array(total_c)

pixAcc_class = 1.0 * total_c[:,0] / (np.spacing(1) + total_c[:,1])

IoU = 1.0 * total_inter / (np.spacing(1) + total_union)

numtime += 1

mIoU = IoU.mean()

tbar.set_description(

'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))

if i >= len(tbar) - 1:

print('mIoU= ',str(mIoU))

for j in range(len(IoU)):

print("class:" + str(j) + "result------" +str(IoU[j]) + "--ACC----" + str(pixAcc_class[j]))

if len(IoU) == 14:

tbar.set_description(

, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))

tbar.set_description(

'Acc: %.2f, Acc: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))

tbar.set_description(

'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6]))

if len(IoU) == 2:

tbar.set_description(

'pixAcc: %.3f, mIoU: %.3f, class0: %.3f, class1: %.3f' % (pixAcc, mIoU, IoU[0], IoU[1]))

for m in meters:

region_iou = region_inter[m] / (np.spacing(1) + region_union[m]) # 避免除零

print(f'{m}m 内 FSD IoU: {region_iou:.4f}') # 输出中心区域 IoU

return mIoU,IoU

def segtest(weights, root="data/citys", batch_size=16, half_precision=True, n_segcls=19, base_size=2048): # 会使用原始尺寸测, 未考虑尺寸对不齐, 图片尺寸应为32倍数

device = select_device(opt.device, batch_size=batch_size)

model = attempt_load_tda4(weights, map_location=device) # load FP32 model

testvalloader = SegmentationDataset.get_rm_loader_tda2(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader

testvalloader = SegmentationDataset.get_rm_loader(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader

seg_validation(model, n_segcls, testvalloader, device, half_precision)

def segtest_fsd(weights, root="data/citys", batch_size=16, half_precision=True, n_segcls=19, base_size=2048): # 会使用原始尺寸测, 未考虑尺寸对不齐, 图片尺寸应为32倍数

device = select_device(opt.device, batch_size=batch_size)

model = attempt_load_tda4(weights, map_location=device) # load FP32 model

testvalloader = SegmentationDataset.get_custom_loader_tda2(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader

testvalloader = SegmentationDataset.get_custom_loader(root, batch_size=batch_size, split="val", mode="val", workers=8, base_size=base_size) #get_custom_loader

seg_validation(model, n_segcls, testvalloader, device, half_precision)

def test(data,

weights=None,

batch_size=64,

imgsz=640,

conf_thres=0.001,

iou_thres=0.6, # for NMS

save_json=False,

single_cls=False,

augment=False,

verbose=False,

model=None,

dataloader=None,

save_dir=Path(''), # for saving images

save_txt=False, # for auto-labelling

save_hybrid=False, # for hybrid auto-labelling

save_conf=False, # save auto-label confidences

plots=False,

wandb_logger=None,

compute_loss=False,

half_precision=True,

is_coco=False):

Initialize/load model and set device

training = model is not None

if training: # called by train.py

device = next(model.parameters()).device # get model device

else: # called directly

set_logging()

device = select_device(opt.device, batch_size=batch_size)

Directories

save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run

(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir

Load model

model = attempt_load(weights, map_location=device) # load FP32 model

model = Model(opt.cfg, ch=3, nc=3).to('cpu') # create

gs = max(int(model.stride.max()), 32) # grid size (max stride)

imgsz = check_img_size(imgsz, s=gs) # check img_size

Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99

if device.type != 'cpu' and torch.cuda.device_count() > 1:

model = nn.DataParallel(model)

Half

half = device.type != 'cpu' and half_precision # half precision only supported on CUDA

half = False

if half:

model.half()

Configure

model.eval()

if isinstance(data, str):

is_coco = data.endswith('coco.yaml')

with open(data) as f:

data = yaml.load(f, Loader=yaml.SafeLoader)

check_dataset(data) # check

nc = 1 if single_cls else int(data['nc']) # number of classes

iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95

niou = iouv.numel()

Logging

log_imgs = 0

if wandb_logger and wandb_logger.wandb:

log_imgs = min(wandb_logger.log_imgs, 100)

Dataloader

if not training:

if device.type != 'cpu':

model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once

task = opt.task if opt.task in ('train', 'val', 'test') else 'val' # path to train/val/test images

dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True,

prefix=colorstr(f'{task}: '))[0]

#/ai/DataSets/OD_FSD_zh/psd_v2.0/data/txt_dataset/ann_zh/test_img_list_v5.txt

#data[task]

#/ai/DataSets/OD_FSD_zh/psd_v2.0/data/83_psd_20240723_占用属性优化/alldata/test_img_list.txt

seen = 0

confusion_matrix = ConfusionMatrix(nc=nc)

names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}

coco91class = coco80_to_coco91_class()

s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')

p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.

loss = torch.zeros(3, device=device)

jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []

numtotal = 0

numocc,numvip,numwoman,numdisabled,numcharging = 0,0,0,0,0

for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):

img = img.to(device, non_blocking=True)

img = img.half() if half else img.float() # uint8 to fp16/32

img /= 255.0 # 0 - 255 to 0.0 - 1.0

targets = targets.to(device)

nb, _, height, width = img.shape # batch size, channels, height, width

with torch.no_grad():

Run model

t = time_synchronized()

out, train_out = model(img, augment=augment)[0] # inference and training outputs 修改[0]新模型输出[0]是检测

t0 += time_synchronized() - t

Compute loss

if compute_loss:

Hyperparameters 配置超参数

with open(opt.hyp) as f:

hyp = yaml.load(f, Loader=yaml.SafeLoader) # load hyps

model.hyp = hyp

model.gr = 1.0

compute_loss = PoseLoss(model)

loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls

Run NMS

targets[:, 2:6] *= torch.Tensor([width, height, width, height]).to(device) # to pixels

lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling

t = time_synchronized()

out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=False)

t1 += time_synchronized() - t

Statistics per image

for si, pred in enumerate(out):

perd_slot = pred[:, 14:]

pred = torch.cat((pred[:, :5], pred[:, 13:14]), 1)

labels = targets[targets[:, 0] == si, 1:]

nl = len(labels)

tcls = labels[:, 0].tolist() if nl else [] # target class

tocc = labels[:, 13].tolist() if nl else []

tvip = labels[:, 14].tolist() if nl else []

twoman = labels[:, 15].tolist() if nl else []

tdisable = labels[:, 16].tolist() if nl else []

tcharging = labels[:, 17].tolist() if nl else []

path = Path(paths[si])

seen += 1

if len(pred) == 0:

if nl:

stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))

continue

Predictions

predn = pred.clone()

scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred

Append to text file

if save_txt:

gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh

for *xyxy, conf, cls in predn.tolist():

xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh

line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format

with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:

f.write(('%g ' * len(line)).rstrip() % line + '\n')

W&B logging - Media Panel Plots

if len(wandb_images) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation

if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:

box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},

"class_id": int(cls),

"box_caption": "%s %.3f" % (names[cls], conf),

"scores": {"class_score": conf},

"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]

boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space

wandb_images.append(wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name))

wandb_logger.log_training_progress(predn, path, names) if wandb_logger and wandb_logger.wandb_run else None

Append to pycocotools JSON dictionary

if save_json:

[{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...

image_id = int(path.stem) if path.stem.isnumeric() else path.stem

box = xyxy2xywh(predn[:, :4]) # xywh

box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner

for p, b in zip(pred.tolist(), box.tolist()):

jdict.append({'image_id': image_id,

'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),

'bbox': [round(x, 3) for x in b],

'score': round(p[4], 5)})

Assign all predictions as incorrect

correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)

if nl:

detected = [] # target indices

tcls_tensor = labels[:, 0]

target boxes

tbox = xywh2xyxy(labels[:, 1:5])

scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels

if plots:

confusion_matrix.process_batch(predn, torch.cat((labels[:, 0:1], tbox), 1))

Per target class

for cls in torch.unique(tcls_tensor):

ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices

pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices

Search for detections

if pi.shape[0]:

Prediction to target ious

ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices

Append detections

detected_set = set()

for j in (ious > iouv[0]).nonzero(as_tuple=False):

d = ti[i[j]] # detected target

if d.item() not in detected_set:

detected_set.add(d.item())

detected.append(d)

correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn

if len(detected) == nl: # all targets already located in image

break

Append statistics (correct, conf, pcls, tcls)

stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

Plot images

if plots and batch_i < 3:

f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels

Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()

f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions

Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()

Compute statistics

stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy

if len(stats) and stats[0].any():

p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)

ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95

mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()

nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class

else:

nt = torch.zeros(1)

Print results

pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format

print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

Print results per class

if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):

for i, c in enumerate(ap_class):

print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

Print speeds

t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple

if not training:

print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)

Plots

if plots:

confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))

if wandb_logger and wandb_logger.wandb:

val_batches = [wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]

wandb_logger.log({"Validation": val_batches})

if wandb_images:

wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})

Save JSON

if save_json and len(jdict):

w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights

anno_json = '../coco/annotations/instances_val2017.json' # annotations json

pred_json = str(save_dir / f"{w}_predictions.json") # predictions json

print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)

with open(pred_json, 'w') as f:

json.dump(jdict, f)

try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb

from pycocotools.coco import COCO

from pycocotools.cocoeval import COCOeval

anno = COCO(anno_json) # init annotations api

pred = anno.loadRes(pred_json) # init predictions api

eval = COCOeval(anno, pred, 'bbox')

if is_coco:

eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate

eval.evaluate()

eval.accumulate()

eval.summarize()

map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)

except Exception as e:

print(f'pycocotools unable to run: {e}')

Return results

model.float() # for training

if not training:

s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''

print(f"Results saved to {save_dir}{s}")

maps = np.zeros(nc) + map

for i, c in enumerate(ap_class):

maps[c] = ap[i]

return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t

if name == 'main':

parser = argparse.ArgumentParser(prog='test.py')

parser.add_argument('--cfg', type=str, default='models/yolov5s_custom_seg.yaml', help='model.yaml path')#

parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')

#runs/train/exp52/weights/exp52_last_114_v2.0.21_20241129.pt

#runs/train/exp4/weights/best.pt

#./TTERMSegnetV30_train_epoch_52.pth

"/ai/zhdata/lyp/multiyolov5_point_v2/runs/train/tda2-v17-tda2pretrain-tda4loadervaler/weights/last.pt"

parser.add_argument('--weights', nargs='+', type=str, default="/ai/zhdata/lyp/multiyolov5_point_v2/runs/train/222/weights/lyp_last_53.pt", help='model.pt path(s)') #'runs/train/exp51/weights/last.pt'

parser.add_argument('--data', type=str, default='data/custom.yaml', help='*.data path')

path = ["/ai/DataSets/TopViewMultiTaskPerc_xmlin/freeSpace/annotations/tmp_zh/val_v41_cpp_20250903_FSD.txt"]

path = ["/ai/ypli/multiyolov5_point_v2/data/tda2/train_fsd_1.txt"]

path = ["/ai/ypli/multiyolov5_point_v2/data/tda2/test_fsd.txt"]

path = ['/ai/DataSets/TopViewMultiTaskPerc_xmlin/freeSpace/annotations/ann_zh/val_zh_v23_20250312_gt.txt']

path_rm = ['/ai/DataSets/TopViewMultiTaskPerc_xmlin/roadmarking/annotations/ann_zh/val_v24_fullLabel0103_gt_v1.txt']

path_rm = ["/ai/ypli/multiyolov5_point_v2/data/txt/val_v30_20250305_gt_64.txt"]

path_rm = ["./1img.txt"]

path_rm = ["/ai/zhdata/lyp/jiansudai/100jiansudai.txt"]

path_rm = ["/ai/ypli/multiyolov5_point_v2/data/tda2/train_rm_1.txt"]

path_rm = ["/ai/ypli/multiyolov5_point_v2/data/tda2/test_rm.txt"]

path_rm = ['/ai/DataSets/TopViewMultiTaskPerc_xmlin/roadmarking/annotations/20240411_生态园and保定对向路沿/2task_rm_20240405_wuluhong/2task_rm_20240405_wuluhong_gt.txt']

parser.add_argument('--segdata', default=path, help='root path of segmentation data')#type=list,

parser.add_argument('--segdata_rm', default=path_rm, help='root path of segmentation data')#type=list,

parser.add_argument('--batch-size', type=int, default=1, help='size of each image batch')

parser.add_argument('--img-size', type=int, default=544, help='inference size (pixels)')

parser.add_argument('--base-size', type=int, default=544, help='long side of segtest image you want to input network')

parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')

parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')

parser.add_argument('--task', default='val', help='train, val, test, speed or study')

parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')

parser.add_argument('--augment', action='store_true', help='augmented inference')

parser.add_argument('--verbose', action='store_true', help='report mAP by class')

parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')

parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')

parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')

parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')

parser.add_argument('--project', default='runs/test', help='save to project/name')

parser.add_argument('--name', default='exp', help='save to project/name')

parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')

opt = parser.parse_args()

opt.save_json |= opt.data.endswith('coco.yaml')

opt.data = check_file(opt.data) # check file

print(opt)

check_requirements()

if opt.task in ('train', 'val', 'test'): # run normally

print(1111)

test(opt.data,

opt.weights,

opt.batch_size,

opt.img_size,

opt.conf_thres,

opt.iou_thres,

opt.save_json,

opt.single_cls,

opt.augment,

opt.verbose,

save_txt=opt.save_txt | opt.save_hybrid,

save_hybrid=opt.save_hybrid,

save_conf=opt.save_conf,

)

elif opt.task == 'speed': # speed benchmarks

for w in opt.weights:

test(opt.data, w, opt.batch_size, opt.img_size, 0.25, 0.45, save_json=False, plots=False)

elif opt.task == 'study': # run over a range of settings and save/plot

# python test.py --task study --data coco.yaml --iou 0.7 --weights yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt

x = list(range(256, 1536 + 128, 128)) # x axis (image sizes)

for w in opt.weights:

f = f'study_{Path(opt.data).stem}_{Path(w).stem}.txt' # filename to save to

y = [] # y axis

for i in x: # img-size

print(f'\nRunning {f} point {i}...')

r, _, t = test(opt.data, w, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json,

plots=False)

y.append(r + t) # results and times

np.savetxt(f, y, fmt='%10.4g') # save

os.system('zip -r study.zip study_*.txt')

plot_study_txt(x=x) # plot

batch_size=64 #opt.batch_size

segtest_fsd(root=opt.segdata, weights=opt.weights, batch_size=batch_size, n_segcls=2, base_size=opt.base_size) # 19 for cityscapes

segtest(root=opt.segdata_rm, weights=opt.weights, batch_size=batch_size, n_segcls=6, base_size=opt.base_size) # 19 for cityscapes

变更说明（已尽量少改动，均在tda4/test_custom_tda2.py内）：

新增compute_center_square_iou，按原始尺寸1088x960且1px=1cm的假设，把3/4/5米正方形映射到当前544x480尺度，基于中心区域计算FSD（默认类别1）交并比。

在seg_validation_tda2与seg_validation中引入meters=(3,4,5)，累加各米数区域的交并集，并在评估结束时打印"3m/4m/5m 内 FSD IoU"。原有全图mIoU/IoU逻辑保持不变。

关键代码：

test_custom_tda2.pyLines 115-156

计算以图像中心为原点、实距为边长的正方形区域 IoU

def compute_center_square_iou(pred_logits, target, meters=(3, 4, 5), orig_hw=(1088, 960), fsd_class=1):

...

for m in meters:

...

inter = ((pred_fsd & gt_fsd) & roi_mask).sum().item()

union = (((pred_fsd | gt_fsd) & roi_mask)).sum().item()

inters[m] = inter

unions[m] = union

return inters, unions

使用方式：

按原有方式运行test_custom_tda2.py验证FSD（例如python test_custom_tda2.py ...）。在控制台输出中会额外出现：

3m 内 FSD IoU: ...

4m 内 FSD IoU: ...

5m 内 FSD IoU: ...

默认FSD正类为1；若数据标签不同，可在函数调用处调整fsd_class。

结合代码，详细说明下你是怎么实现的

实现思路（在 tda4/test_custom_tda2.py 中的少量改动）：

新增工具函数：compute_center_square_iou。假设原始标注尺寸是 1088x960，1 像素=1 厘米；当前输入/输出为 544x480，因此用 ratio_h = h / 1088、ratio_w = w / 960 将 3/4/5 米（米→厘米→像素）映射到当前尺度，基于图像中心裁出正方形 ROI，再对 ROI 内的预测/标签做交并计算。默认 FSD 正类 id 为 1，如需调整可改参数。

在两个验证函数 seg_validation_tda2 和 seg_validation 中：

增加 meters = (3, 4, 5) 配置。

每次 batch 返回时附带中心区域的 center_inter/center_union。

在循环中累计各米数的交并。

评估结束后打印 3m/4m/5m 中心正方形内的 FSD IoU，原有全图 mIoU/IoU 输出不变。

关键代码片段：

test_custom_tda2.pyLines 115-156

计算以图像中心为原点、实距为边长的正方形区域 IoU

def compute_center_square_iou(pred_logits, target, meters=(3, 4, 5), orig_hw=(1088, 960), fsd_class=1):

h, w = target.shape[1], target.shape[2]

ratio_h = h / float(orig_hw[0])

ratio_w = w / float(orig_hw[1])

pred_cls = pred_logits.argmax(1)

valid_mask = target != 255 if (target == 255).any() else torch.ones_like(target, dtype=torch.bool)

center_h, center_w = h // 2, w // 2

inters, unions = {}, {}

for m in meters:

side_h = int(m * 100 * ratio_h)

side_w = int(m * 100 * ratio_w)

half_h, half_w = side_h // 2, side_w // 2

top, bottom = max(0, center_h - half_h), min(h, center_h + half_h)

left, right = max(0, center_w - half_w), min(w, center_w + half_w)

roi_mask = torch.zeros_like(target, dtype=torch.bool)

roi_mask[:, top:bottom, left:right] = True

roi_mask &= valid_mask

pred_fsd = pred_cls == fsd_class

gt_fsd = target == fsd_class

inter = ((pred_fsd & gt_fsd) & roi_mask).sum().item()

union = (((pred_fsd | gt_fsd) & roi_mask)).sum().item()

inters[m], unions[m] = inter, union

return inters, unions

test_custom_tda2.pyLines 193-236

def seg_validation_tda2(...):

meters = (3, 4, 5)

def eval_batch(...):

...

correct, labeled,total_pixel = batch_pix_accuracy_class_tda2(...)

inter, union = batch_intersection_union(...)

center_inter, center_union = compute_center_square_iou(pred.data, target, meters=meters, orig_hw=(1088, 960), fsd_class=1)

return correct, labeled, inter, union,total_pixel,mask,center_inter,center_union

...

region_inter = {m: 0 for m in meters}; region_union = {m: 0 for m in meters}

...

correct, labeled, inter, union ,total_pixel,mask,center_inter,center_union = eval_batch(...)

...

for m in meters:

region_inter[m] += center_inter[m]; region_union[m] += center_union[m]

...

for m in meters:

region_iou = region_inter[m] / (np.spacing(1) + region_union[m])

print(f'{m}m 内 FSD IoU: {region_iou:.4f}')

同样的逻辑在 seg_validation 中也添加了 meters、累计和打印。运行方式保持不变，执行原来的验证脚本即可在控制台看到额外的 3/4/5 米中心区域 IoU 输出。

动态分辨率的ioufsd

[128, 64, 128],

计算以图像中心为原点、实距为边长的正方形区域 IoU（默认用于 FSD）

将实距（cm）转换为当前尺度的像素长度，假设输入从 orig_hw 等比缩放到当前 h, w

若有 255 忽略标签，则不计入 IoU

Fast test during the training

outputs = gather(outputs, 0, dim=0)

pred_ori = outputs[1][1]

pred_ori = outputs[1][0]

print("image.shape:",image.shape)

img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")

print("image.shape:",image.shape)

img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")

correct, labeled,total_pixel = batch_pix_accuracy_class(pred, target,n_segcls)

from PIL import Image, ImageDraw

import random

img=Image.open(imagepath[0]).convert('RGB').resize((480, 544))

randnum=str(random.randint(1, 1000))

print(target.shape)#[8, 544, 480]

SegmentationDataset.draw_segmentation_mask(img, target[0], f"/ai/ypli/res/res1{randnum}.png")#有问题

img2=SegmentationDataset.tensor2img(image[0])

img2.save("/ai/ypli/res/tmp.jpg")

print(image[0].shape)#[3, 544, 480]

print(type(image[0]))#torch.Size([3, 544, 480])

# img_draw_rm.save(f"/ai/ypli/input_rm/{epoch}_{i}_rm_input.png")

SegmentationDataset.draw_segmentation_mask(img2, target[0], f"/ai/ypli/res/res2{randnum}.png")#tda4原始加载方式可视化2是对的 1是错的

print(image.shape)

imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')

imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')

tda2imgprocess.save_mask(mask,imagepath)

dst_fsd = cv2.addWeighted(mask, 0.4, imagename, 0.6, 0)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', imagename)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', dst_fsd)

print('success')

exit()

tbar.set_description(

'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))

tbar.set_description(

'Acc: %.2f, Acc: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))

tbar.set_description(

'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6]))

Fast test during the training

outputs = gather(outputs, 0, dim=0)

pred_ori = outputs[1][1]

pred_ori = outputs[1][0]

return None, None, None, None,None,pred_ori

print(type(pred))

print(type(target))

print(pred.shape)

print(target.shape)

exit()

print(pred.shape) # torch.Size([64, 6, 544, 480])

print(target.shape) # torch.Size([64, 544, 480])

print(type(target)) # torch.Size([64, 544, 480])

exit(0)

correct, labeled,total_pixel = batch_pix_accuracy_class(pred, target,n_segcls)

from PIL import Image, ImageDraw

import random

img=Image.open(imagepath[0]).convert('RGB').resize((480, 544))

randnum=str(random.randint(1, 1000))

SegmentationDataset.draw_segmentation_mask(img, target, f"/ai/ypli/res/res{randnum}.png")

exit()

print(image.shape)

imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')

imagename = (image[0].cpu().numpy().transpose(1,2,0) * 255).astype('uint8')

tda2imgprocess.save_mask(mask,imagepath)

dst_fsd = cv2.addWeighted(mask, 0.4, imagename, 0.6, 0)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', imagename)

cv2.imwrite('/ai/ypli/res/' + str(i) + '.jpg', dst_fsd)

print('success')

exit()

tbar.set_description(

'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))

tbar.set_description(

'Acc: %.2f, Acc: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f, 7: %.2f, 8: %.2f, 9: %.2f, 10: %.2f, 11: %.2f, 12: %.2f, 13: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \

, IoU[5], IoU[6], IoU[7], IoU[8], IoU[9], IoU[10], IoU[11], IoU[12], IoU[13]))

tbar.set_description(

'Acc: %.2f, mIoU: %.2f, 0: %.2f, 1: %.2f, 2: %.2f, 3: %.2f, 4: %.2f, 5: %.2f, 6: %.2f ' % (pixAcc, mIoU, IoU[0], IoU[1], IoU[2], IoU[3], IoU[4] \