Yolov11实战使用教程
- YOLO11介绍
- YOLOv11使用
-
- 1.下载项目
- [2. 准备数据集](#2. 准备数据集)
-
- [2.1. 使用公开数据集](#2.1. 使用公开数据集)
- [2.2. 使用自定义数据集](#2.2. 使用自定义数据集)
- [3. 训练&推理](#3. 训练&推理)
- [4. 模型转化](#4. 模型转化)
YOLO11介绍
YOLO11是由Ultralytics团队于2024年9月30日发布的,它是YOLO(You Only Look Once)系列中的最新成员。YOLO11在之前版本的YOLO基础上引入了新功能和改进,以进一步提高性能和灵活性。这使得YOLO11成为目标检测和跟踪、实例分割、图像分类和姿态估计等多种计算机视觉任务的理想选择。
注:具体对网络的讲解,请见其他博主吧~
开源地址:
YOLOv11使用
1.下载项目
- 打开yolo官网:https://github.com/ultralytics/ultralytics
- 下载代码:
cmd
https://github.com/ultralytics/ultralytics.git
- 下载一个预训练模型
2. 准备数据集
2.1. 使用公开数据集
https://github.com/ultralytics/ultralytics/tree/main/docs/en/datasets
这里面存储yolo支持的所有公开数据的data.yaml,下载数据集后,可以直接使用。
2.2. 使用自定义数据集
- 使用labelImg进行数据标注,注意选择yolo格式,可以标注目标检测和分割【如果不会labelImg,请见其他博主~】
- 标注完,需要注意images和labels的存放位置,images和labels可以放在同一文件夹中,也可以分别存放在images/和labels/
-
可选\]可以生成**数据集.txt**,里面存储图像的绝对路径
/path/to/dataset/A/images/1.jpg
/path/to/dataset/A/images/2.jpg
...
4. 准备data.yaml
```yaml
path: /path/to/dataset/ # 数据集的根目录,里面存储train/val/test/所有数据集
train:
- train/A/A.txt # yaml中可以直接放txt
- train/B/images/ # 也可以放图像的文件夹路径,yolo代码会默认labels路径在train/B/labels/,如果没有该文件夹,代码默认图像没有标注(即为负样本)
val:
- val/A/A.txt
test:
- test/A/A.txt
names:
0: 0对应标签名称
1: 1对应标签名称
...
3. 训练&推理
官网已经提供了每种任务的训练/验证/推理代码:
https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo
下面博主提供自己的版本 【以目标检测为理】
博主风格:
- 会在一次模型训练后,连带着推理一起做,保存在同一个文件夹中
- 在推理后,会做数据分析,
2.1. 每个类别的漏报、误报的指标计算
2.2. 每个类别的高宽散点图
python
from ultralytics import YOLO
import os
import shutil
import argparse
import numpy as np
import torch
from collections import defaultdict
import matplotlib.pyplot as plt
import os
import cvzone
# os.environ['CUDA_VISIBLE_DEVICES']='1'
analyze_pred_wh=[]
analyze_pred_y2=[]
class YOLOEvaluator:
def __init__(self, num_classes, conf_threshold=0.001, iou_threshold=0.6):
self.num_classes = num_classes
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
self.analyze_pred_wh=[]
self.analyze_pred_y2=[]
def parse_yolo_txt(self, txt_path, img_width, img_height):
"""解析YOLO格式的txt文件"""
boxes = []
if os.path.exists(txt_path):
with open(txt_path, 'r') as f:
lines = f.readlines()
for line in lines:
if line.strip():
parts = line.strip().split()
class_id = int(parts[0])
x_center = float(parts[1]) * img_width
y_center = float(parts[2]) * img_height
width = float(parts[3]) * img_width
height = float(parts[4]) * img_height
# 转换为x1, y1, x2, y2格式
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center + height / 2
boxes.append([class_id, x1, y1, x2, y2])
return np.array(boxes) if boxes else np.empty((0, 5))
def draw_bbox(self,boxes,img,attribute='gt',name_dic={0:'smoke',1:'fire'}):
"""绘制bbox"""
for box in boxes:
if attribute=='gt':
class_id,x1, y1, x2, y2=box
elif attribute=='pred':
class_id,x1, y1, x2, y2, conf=box
x1, y1, x2, y2,class_id = int(x1), int(y1), int(x2), int(y2),int(class_id)
w, h = x2 - x1, y2 - y1
currentClass = name_dic[class_id] # 类别名称
# 在图像中显示
if attribute=='gt':
myColor = (0, 0, 255)
cvzone.putTextRect(img, f"{currentClass} ",
(max(0, x1 + 10), max(35, y1 - 10)), 1, 1,
(0, 255, 255), colorR=(0, 0, 0))
cvzone.cornerRect(img, (x1, y1, w, h), l=3, colorR=myColor, t=2, rt=2) # BGR
elif attribute=='pred':
myColor = (0, 255, 0)
cvzone.putTextRect(img, f"{currentClass} {conf}",
(max(0, x1 + 10), max(35, y1 - 10)), 1, 1,
(0, 255, 255), colorR=(0, 0, 0))
cvzone.cornerRect(img, (x1, y1, w, h), l=30, colorR=myColor, t=5)
return img
def box_iou(self, box1, box2):
"""计算两个框的IoU"""
# box格式: [x1, y1, x2, y2]
x1_max = max(box1[0], box2[0])
y1_max = max(box1[1], box2[1])
x2_min = min(box1[2], box2[2])
y2_min = min(box1[3], box2[3])
if x2_min <= x1_max or y2_min <= y1_max:
return 0.0
intersection = (x2_min - x1_max) * (y2_min - y1_max)
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
union = area1 + area2 - intersection
return intersection / union if union > 0 else 0.0
def compute_ap(self, recall, precision):
"""计算Average Precision"""
# 添加端点
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([1.], precision, [0.]))
# 计算precision的单调递减序列
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# 计算曲线下面积
i = np.where(mrec[1:] != mrec[:-1])[0]
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def evaluate_single_image(self, pred_boxes, gt_txt_path, img_width, img_height,save_img,save_path,name_dic,img=None,analyze_pred=False):
"""评估单张图片"""
# pred_boxes格式: [[class_id,x1, y1, x2, y2, conf], ...]
# gt_boxes: YOLO格式的真值boxes
global analyze_pred_wh,analyze_pred_y2
gt_boxes = self.parse_yolo_txt(gt_txt_path, img_width, img_height)
obj_results = defaultdict(lambda: {'tp': [],
'conf': [],
'pred_count': 0,
'gt_count': 0,
})
img_results={
'TP':0,
'FP':0,
'FN':0,
'TN':0,
}
# 统计每个类别的GT数量
for gt_box in gt_boxes:
class_id = int(gt_box[0])
obj_results[class_id]['gt_count'] += 1
# 按置信度排序预测框
if len(pred_boxes) > 0:
pred_boxes = sorted(pred_boxes, key=lambda x: x[0], reverse=True)
# 对每个预测框进行匹配
gt_matched = [False] * len(gt_boxes)
TN=0
for pred_box in pred_boxes:
if pred_box[5] < self.conf_threshold:
continue
pred_class = int(pred_box[0])
obj_results[pred_class]['pred_count'] += 1
obj_results[pred_class]['conf'].append(pred_box[5])
# 寻找最佳匹配的GT框
best_iou = 0
best_gt_idx = -1
for gt_idx, gt_box in enumerate(gt_boxes):
if int(gt_box[0]) != pred_class or gt_matched[gt_idx]:
continue
iou = self.box_iou(pred_box[1:5], gt_box[1:5])
if iou > best_iou:
best_iou = iou
best_gt_idx = gt_idx
# 判断是否为True Positive
if best_iou >= self.iou_threshold and best_gt_idx != -1:
obj_results[pred_class]['tp'].append(1)
gt_matched[best_gt_idx] = True
TN=1
else:
obj_results[pred_class]['tp'].append(0)
if len(gt_boxes)>0: # 存在异常
if TN==1:
img_results['TN']=1
else:
img_results['FP']=1 # 漏报
else: # 正常样本
if len(pred_boxes) > 0:
img_results['FN']=1 #误报
else:
img_results['TP']=1
# 绘制pred bbox
if save_img and img_results['TP']!=1: # 如果是正常样本且无误报就不保存
img = self.draw_bbox(gt_boxes,img,attribute='gt',name_dic=name_dic)
img = self.draw_bbox(pred_boxes,img,attribute='pred',name_dic=name_dic)
cv2.imwrite(save_path, img)
# 统计数据分布
if analyze_pred:
for pred_bbox in pred_boxes:
class_id,x1, y1, x2, y2, conf=pred_bbox
w=x2-x1
h=y2-y1
analyze_pred_wh.append([w,h])
analyze_pred_y2.append(y2)
# print(results)
return obj_results,img_results
def compute_metrics(self, all_results):
"""计算最终指标"""
aps_50 = []
aps_50_95 = []
precisions = []
recalls = []
for class_id in range(self.num_classes):
if class_id not in all_results:
aps_50.append(0)
aps_50_95.append(0)
precisions.append(0)
recalls.append(0)
continue
tp = np.array(all_results[class_id]['tp'])
conf = np.array(all_results[class_id]['conf'])
gt_count = all_results[class_id]['gt_count']
if len(tp) == 0:
aps_50.append(0)
aps_50_95.append(0)
precisions.append(0)
recalls.append(0)
continue
# 按置信度排序
sort_idx = np.argsort(-conf)
tp = tp[sort_idx]
# 计算累积的TP和FP
tp_cumsum = np.cumsum(tp)
fp_cumsum = np.cumsum(1 - tp)
# 计算precision和recall 每个图像的
precision = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-16)
recall = tp_cumsum / (gt_count + 1e-16)
# 计算AP@0.5
ap_50 = self.compute_ap(recall, precision)
aps_50.append(ap_50)
# 计算AP@0.5:0.95
ap_50_95_scores = []
for iou_thresh in np.arange(0.5, 1.0, 0.05):
# 重新计算该IoU阈值下的TP/FP(这里简化处理)
ap_50_95_scores.append(ap_50 * (0.5 / iou_thresh)) # 简化计算
aps_50_95.append(np.mean(ap_50_95_scores))
# 记录最终的precision和recall
if len(precision) > 0:
precisions.append(precision[-1])
recalls.append(recall[-1])
else:
precisions.append(0)
recalls.append(0)
return {
'mAP50': np.mean(aps_50),
'mAP50_95': np.mean(aps_50_95),
'precision': np.mean(precisions),
'recall': np.mean(recalls),
# 'recall':np.sum(tp) / (gt_count + 1e-16),
'class_aps_50': aps_50,
'class_aps_50_95': aps_50_95,
'class_precisions': precisions,
'class_recalls': recalls
}
def bboxes_wh_scatter(analyze_pred_wh,data_path,colors):
'''
绘制每个pred bbox的w,h
'''
plt.figure(figsize=(20, 20)) # 设置图的大小
# 分解维度成高和宽坐标
widths = [dim[0] for dim in analyze_pred_wh]
heights = [dim[1] for dim in analyze_pred_wh]
# 选择颜色
color = colors[0]
norm_color = [color[0]/255, color[1]/255,color[2]/255]
plt.xticks(range(0, 2561, 10))
plt.yticks(range(0, 1441, 10))
plt.scatter(widths, heights, label='smoke', color=norm_color, alpha=0.6, edgecolors='w')
# 添加标题和轴标签
plt.title('Image Dimensions by Category')
plt.xlabel('Width')
plt.ylabel('Height')
# 显示图例
plt.legend(title='Categories', loc='center left', bbox_to_anchor=(1, 0.5))
# 显示网格
plt.grid(True, linestyle='--', alpha=0.7)
# 确保图表内容不会被裁剪
# plt.tight_layout(rect=[0, 0, 0.85, 1])
plt.tight_layout()
# 保存图表
filename=os.path.join(data_path,f'bboxes_hw.jpg')
plt.savefig(filename, format='png', dpi=300) # 保存图像,设置格式和分辨率
print('每个类别的高宽散点图绘制成功!')
import cv2
from tqdm import tqdm
# 批量评估多张图片
def evaluate_yolo_dataset(model, image_paths, gt_txt_paths,save_img=False,save_root='',
conf_threshold=0.3,iou_threshold=0.3,analyze_pred=False):
"""批量评估多张图片"""
evaluator = YOLOEvaluator(num_classes=2,conf_threshold=conf_threshold,iou_threshold=iou_threshold)
all_results = defaultdict(lambda: {'tp': [],
'conf': [],
'pred_count': 0,
'gt_count': 0,
})
Business_dic={
'TP':0,
'FP':0,
'FN':0,
'TN':0,
}
for img_path, gt_path in tqdm(zip(image_paths, gt_txt_paths), total=len(image_paths)):
# 推理
results = model(img_path,verbose=False)
# 读取图片
img = cv2.imread(img_path)
height, width = img.shape[:2]
# 解析预测结果
pred_boxes = []
for result in results:
name_dic=result.names
boxes = result.boxes
if boxes is not None:
for i in range(len(boxes)):
x1, y1, x2, y2 = boxes.xyxy[i].cpu().numpy()
conf = boxes.conf[i].cpu().numpy()
class_id = boxes.cls[i].cpu().numpy()
pred_boxes.append([class_id,x1, y1, x2, y2, conf])
print([class_id,x1, y1, x2, y2, conf])
# 评估
save_path=os.path.join(save_root,img_path.split('/')[-1])
obj_results,img_results = evaluator.evaluate_single_image(
pred_boxes, gt_path, width, height,save_img,save_path,name_dic,img,analyze_pred=analyze_pred
)
# 合并结果
for class_id, class_results in obj_results.items():
all_results[class_id]['tp'].extend(class_results['tp'])
all_results[class_id]['conf'].extend(class_results['conf'])
all_results[class_id]['pred_count'] += class_results['pred_count']
all_results[class_id]['gt_count'] += class_results['gt_count']
Business_dic['TP'] += img_results['TP']
Business_dic['FP'] += img_results['FP']
Business_dic['FN'] += img_results['FN']
Business_dic['TN'] += img_results['TN']
metrics = evaluator.compute_metrics(all_results)
return metrics,Business_dic,
def test(model_path,test_txt_path,save_root,test_if_save=False,args=None):
if test_if_save:
os.makedirs(save_root,exist_ok=True)
if not os.path.exists(test_txt_path):
print("测试txt文件不存在!",test_txt_path)
return
if not os.path.exists(model_path):
print("model文件不存在!",model_path)
return
# 加载模型
model = YOLO(model_path,task='detect',verbose=False)
image_paths,gt_txt_paths=[],[]
with open(test_txt_path, 'r', encoding='utf-8') as f:
for line in f:
image_path = line.strip()
txt_path=image_path.replace('.jpg','.txt')
txt_path=txt_path.replace('/images/','/labels/')
gt_txt_paths.append(txt_path)
image_paths.append(image_path)
# 多张图片评估
batch_metrics,Business_dic= evaluate_yolo_dataset(
model=model,
image_paths=image_paths,
gt_txt_paths=gt_txt_paths,
save_img=test_if_save,
save_root=save_root,
conf_threshold=0.2,
iou_threshold=0.2,
analyze_pred=args.analyze_pred
)
wubao=round(Business_dic['FN']/(Business_dic['TP']+Business_dic['FN']),3) if (Business_dic['TP']+Business_dic['FN'])!=0 else 0 # 误报率
precison=round(Business_dic['TP']/(Business_dic['TP']+Business_dic['FP']),3) if (Business_dic['TP']+Business_dic['FP'])!=0 else 0
fpr=round(Business_dic['FP']/(Business_dic['FP']+Business_dic['TN']),3) if (Business_dic['FP']+Business_dic['TN'])!=0 else 0 # 漏报率
acc=round((Business_dic['TP']+Business_dic['TN'])/(Business_dic['FP']+Business_dic['TN']+Business_dic['TP']+Business_dic['FN']),3)
mingzhong = round(Business_dic['TN']/(Business_dic['FP']+Business_dic['TN']),3) if (Business_dic['FP']+Business_dic['TN'])!=0 else 0 # 漏报率
print("\n批量评估结果:")
print(f"mAP@0.5: {batch_metrics['mAP50']:.4f}")
print(f"mAP@0.5:0.95: {batch_metrics['mAP50_95']:.4f}")
print(f"Precision: {batch_metrics['precision']:.4f}")
print(f"漏报率: {fpr*100:.2f}% [{Business_dic['FP']}/{(Business_dic['FP']+Business_dic['TN'])}]")
print(f"误报率: {wubao*100:.2f}% [{Business_dic['FN']}/{(Business_dic['TP']+Business_dic['FN'])}]")
print(f"命中率: {mingzhong*100:.2f}% [{Business_dic['TN']}/{(Business_dic['FP']+Business_dic['TN'])}]")
print(f"准确率: {acc*100:.2f}% [{(Business_dic['TP']+Business_dic['TN'])}/{(Business_dic['FP']+Business_dic['TN']+Business_dic['TP']+Business_dic['FN'])}]")
return analyze_pred_wh,analyze_pred_y2
def train(project_root,args):
# 将数据yaml复制到project文件中
os.makedirs(os.path.join(project_root,args.project,args.project_name), exist_ok=True)
shutil.copy(os.path.join(project_root,args.data_yaml_name),
os.path.join(project_root,args.project,args.project_name,'data.yaml'))
print('+++++++++++保存data.yaml+++++++++++++++')
print(os.path.join(project_root,args.project,args.project_name,'data.yaml') )
# 加载预训练模型
if args.resume:
resume=os.path.join(project_root,args.project,args.project_name,'weights','last.pt')
model = YOLO(resume,task='detect')
else:
resume=False
model = YOLO(os.path.join(project_root,"yolo11s.pt"),task='detect')
# 训练模型
train_results = model.train(
data=os.path.join(project_root,args.data_yaml_name), # Path to dataset configuration file
epochs=args.epochs, # Number of training epochs
imgsz=args.imgsz, # Image size for training
batch=args.batch,
device=args.device, # Device to run on (e.g., 'cpu', 0, [0,1,2,3])
task='detect',
project=args.project,
name=args.project_name,
exist_ok=True,
# verbose=False,
resume=resume,
cfg=os.path.join(project_root,'ultralytics/cfg/default.yaml')
)
def draw_bbox(boxes,img,attribute='gt',name_dic={0:'smoke',1:'fire'}):
"""绘制bbox"""
for box in boxes:
if attribute=='gt':
class_id,x1, y1, x2, y2=box
elif attribute=='pred':
class_id,x1, y1, x2, y2, conf=box
x1, y1, x2, y2,class_id = int(x1), int(y1), int(x2), int(y2),int(class_id)
w, h = x2 - x1, y2 - y1
currentClass = name_dic[class_id] # 类别名称
# 在图像中显示
if attribute=='gt':
myColor = (0, 0, 255)
cvzone.putTextRect(img, f"{currentClass} ",
(max(0, x1 + 10), max(35, y1 - 10)), 1, 1,
(0, 255, 255), colorR=(0, 0, 0))
cvzone.cornerRect(img, (x1, y1, w, h), l=3, colorR=myColor, t=2, rt=2) # BGR
elif attribute=='pred':
myColor = (0, 255, 0)
cvzone.putTextRect(img, f"{currentClass} {conf}",
(max(0, x1 + 10), max(35, y1 - 10)), 1, 1,
(0, 255, 255), colorR=(0, 0, 0))
cvzone.cornerRect(img, (x1, y1, w, h), l=30, colorR=myColor, t=5)
return img
# 绝对路径
project_root='/path/project/ultralytics-main/'
def main(args):
global analyze_pred_wh,analyze_pred_y2
# 模型训练
train(project_root,args)
# 模型推理
for test_txt_path in args.test_txt_path_list:
print(f'=====测试txt文件:{test_txt_path}=====')
test(
model_path=os.path.join(project_root,args.project,args.project_name,'weights','best.pt'),
save_root=os.path.join(project_root,args.project,args.project_name,'result',os.path.splitext(os.path.basename(test_txt_path))[0]),
test_if_save=args.test_if_save,
args=args
)
# 每个类别的高宽散点图
if args.analyze_pred:
import random
colors = [(random.randint(0,255),random.randint(0,255),random.randint(0,255)) for _ in range(2)]
data_path=os.path.join(project_root,args.project,args.project_name)
bboxes_wh_scatter(analyze_pred_wh=analyze_pred_wh,data_path=data_path,colors=colors)
def Parser():
parser = argparse.ArgumentParser(description='训练&测试模型')
parser.add_argument('--project', type=str, default='项目名称',help="")
parser.add_argument('--project_name',type=str, default='本次模型训练版本',help="")
parser.add_argument('--data_yaml_name', type=str, default='data.yaml',help="")
parser.add_argument('--epochs', type=int, default=150,help="")
parser.add_argument('--batch', type=int, default=0.95,help="")
parser.add_argument('--device', type=list, default=-1,help="") # 可以指定某个device,也可以直接输入-1,代码会自动选择最优显卡
parser.add_argument('--imgsz', type=int, default=1280,help="") # 训练/推理尺寸
parser.add_argument('--test_txt_name', type=list, default=[ # 博主习惯训练完之后,直接推理test图片,并保存结果
'/path/to/test/C/C.txt',
'/path/to/test/D/D.txt'
],help="") #
parser.add_argument('--test_if_save_jpg', type=bool, default=True,help="") # 是否需要保存判断错误的test图像
parser.add_argument('--test_if_save_txt', type=bool, default=True,help="") # 是否需要保存判断错误的test图像的txt标注
parser.add_argument('--resume', type=bool, default=True,help="") # 是否继续训练断点模型
parser.add_argument('--analyze_pred', type=bool, default=False,help="") # 是否分析test的bbox数据分布
args = parser.parse_args()
return args
if __name__ == "__main__":
args = Parser()
main(args)
4. 模型转化
训练模型的最终目标是将其部署到实际应用中。Ultralytics YOLO 中的导出模式提供了多种选项,可将您训练好的模型导出为不同格式,使其能够在各种平台和设备上部署。
官网教程以及参数详解请见:https://docs.ultralytics.com/zh/modes/export/#arguments
- 导出到 ONNX 或 OpenVINO,CPU 速度提升高达 3 倍。
- 导出到 TensorRT,GPU 速度提升高达 5 倍。
在转化过程中可以设置half,int8参数来对模型进行量化
python
from ultralytics import YOLO
model = YOLO('/path/project/model/best.pt')
# Export the model to TensorRT
model.export(format='engine',imgsz=1280,simplify=True,half=True,dynamic=False,batch=1)
# Export the model to onnx
model.export(format="onnx")