
目标检测:YOLO系列(YOLOv3 ~ YOLOv8)
一、YOLO核心思想
1.1 统一检测框架
python
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import warnings
warnings.filterwarnings('ignore')
print("=" * 60)
print("YOLO:You Only Look Once")
print("=" * 60)
# YOLO vs R-CNN对比
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# R-CNN(两阶段)
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('R-CNN系列:两阶段检测', fontsize=11)
stages1 = [
("候选区域\n生成", 0.2, 0.5),
("特征提取\n+分类", 0.5, 0.5),
("后处理\nNMS", 0.8, 0.5),
]
for label, x, y in stages1:
circle = plt.Circle((x, y), 0.1, color='lightcoral', ec='black')
ax1.add_patch(circle)
ax1.text(x, y, label, ha='center', va='center', fontsize=8)
ax1.annotate('', xy=(0.4, 0.5), xytext=(0.3, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
ax1.annotate('', xy=(0.7, 0.5), xytext=(0.6, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
# YOLO(单阶段)
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('YOLO:单阶段检测', fontsize=11)
stages2 = [
("输入图像", 0.2, 0.5),
("CNN\n一次性预测", 0.5, 0.5),
("检测结果\n边界框+类别", 0.8, 0.5),
]
for label, x, y in stages2:
circle = plt.Circle((x, y), 0.1, color='lightgreen', ec='black')
ax2.add_patch(circle)
ax2.text(x, y, label, ha='center', va='center', fontsize=8)
ax2.annotate('', xy=(0.4, 0.5), xytext=(0.3, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
ax2.annotate('', xy=(0.7, 0.5), xytext=(0.6, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
plt.tight_layout()
plt.show()
print("\n💡 YOLO核心思想:")
print(" 将检测视为回归问题,一次性预测所有边界框和类别概率")
print(" - 单阶段: 不需要候选区域生成")
print(" - 速度快: 45 FPS (YOLOv1)")
print(" - 端到端: 统一训练")
二、网格与Anchor Box
2.1 网格划分
python
def grid_cells():
"""YOLO网格划分可视化"""
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# 7x7网格
ax1 = axes[0]
ax1.set_title('7×7网格划分', fontsize=12)
for i in range(8):
ax1.axhline(y=i/7, color='gray', linestyle='-', alpha=0.5)
ax1.axvline(x=i/7, color='gray', linestyle='-', alpha=0.5)
# 随机物体
objects = [(0.3, 0.4), (0.6, 0.7), (0.8, 0.3)]
for obj in objects:
ax1.plot(obj[0], obj[1], 'ro', markersize=8)
# 确定物体所在网格
grid_x = int(obj[0] * 7)
grid_y = int(obj[1] * 7)
rect = Rectangle((grid_x/7, grid_y/7), 1/7, 1/7,
linewidth=2, edgecolor='red', facecolor='none')
ax1.add_patch(rect)
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)
ax1.set_aspect('equal')
# 每个网格预测
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('每个网格预测', fontsize=12)
grid_content = """
每个网格预测:
1. 边界框 (B个):
- x, y: 中心坐标(相对于网格)
- w, h: 宽高(相对于整图)
- confidence: 置信度
2. 类别概率 (C个):
- P(class_i | object)
输出张量: S × S × (B*5 + C)
例: S=7, B=2, C=20 → 7×7×30
"""
ax2.text(0.05, 0.95, grid_content, transform=ax2.transAxes,
fontsize=10, verticalalignment='top', fontfamily='monospace')
plt.suptitle('YOLO网格划分与预测', fontsize=14)
plt.tight_layout()
plt.show()
grid_cells()
2.2 Anchor Box可视化
python
def anchor_boxes_yolo():
"""YOLO Anchor Box可视化"""
print("\n" + "=" * 60)
print("Anchor Box在YOLO中")
print("=" * 60)
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# 不同尺度的Anchor
ax1 = axes[0]
ax1.set_title('Anchor Box示例', fontsize=12)
anchors = [
(0.1, 0.15, '小物体'),
(0.2, 0.25, '中等物体'),
(0.3, 0.4, '大物体')
]
center = (0.5, 0.5)
colors = ['blue', 'green', 'red']
for (w, h, label), color in zip(anchors, colors):
rect = Rectangle((center[0]-w/2, center[1]-h/2), w, h,
facecolor='none', edgecolor=color, linewidth=2)
ax1.add_patch(rect)
ax1.text(center[0]+w/2+0.02, center[1]+h/2, label, color=color, fontsize=9)
ax1.plot(center[0], center[1], 'ko', markersize=8)
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
# Anchor尺寸说明
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('Anchor Box设计', fontsize=12)
anchor_text = """
📐 Anchor Box设计原则:
1. 使用K-Means聚类
- 在训练集上聚类边界框尺寸
- 距离度量: d(box, centroid) = 1 - IoU
2. YOLOv2/v3使用5个Anchor
- 小: (10×13), (16×30), (33×23)
- 中: (30×61), (62×45)
- 大: (59×119), (116×90), (156×198)
3. 每个网格预测多个Anchor
- 选择IoU最大的Anchor负责预测
- 不同Anchor负责不同尺寸的物体
"""
ax2.text(0.05, 0.95, anchor_text, transform=ax2.transAxes,
fontsize=10, verticalalignment='top', fontfamily='monospace')
plt.tight_layout()
plt.show()
anchor_boxes_yolo()
三、YOLO损失函数
3.1 损失函数组成
python
def yolo_loss():
"""YOLO损失函数详解"""
print("\n" + "=" * 60)
print("YOLO损失函数")
print("=" * 60)
# 损失函数公式
print("\n📐 YOLO损失函数组成:")
print(" L = L_coord + L_conf + L_class")
# 可视化损失组成
fig, axes = plt.subplots(1, 3, figsize=(14, 4))
# 1. 定位损失
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('定位损失 (L_coord)', fontsize=10)
coord_formula = """
L_coord = λ_coord Σ (x - x̂)² + (y - ŷ)²
+ λ_coord Σ (√w - √ŵ)² + (√h - √ĥ)²
特点:
• 只计算有物体的网格
• λ_coord = 5 (增大定位权重)
• 使用平方根处理宽高差异
"""
ax1.text(0.05, 0.95, coord_formula, transform=ax1.transAxes,
fontsize=8, verticalalignment='top', fontfamily='monospace')
# 2. 置信度损失
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('置信度损失 (L_conf)', fontsize=10)
conf_formula = """
L_conf = Σ (C - Ĉ)²
+ λ_noobj Σ (C - Ĉ)²
特点:
• 有物体: 正常计算
• 无物体: λ_noobj = 0.5 (降低权重)
• C = IoU(预测框, 真实框)
"""
ax2.text(0.05, 0.95, conf_formula, transform=ax2.transAxes,
fontsize=8, verticalalignment='top', fontfamily='monospace')
# 3. 分类损失
ax3 = axes[2]
ax3.axis('off')
ax3.set_title('分类损失 (L_class)', fontsize=10)
class_formula = """
L_class = Σ (p_i(c) - p̂_i(c))²
特点:
• 只计算有物体的网格
• 使用MSE(YOLOv1)
• 后续版本改用交叉熵
"""
ax3.text(0.05, 0.95, class_formula, transform=ax3.transAxes,
fontsize=8, verticalalignment='top', fontfamily='monospace')
plt.suptitle('YOLO损失函数组成', fontsize=12)
plt.tight_layout()
plt.show()
yolo_loss()
四、YOLO版本演进
4.1 YOLOv3核心改进
python
def yolo_v3():
"""YOLOv3核心改进"""
print("\n" + "=" * 60)
print("YOLOv3:多尺度预测")
print("=" * 60)
# YOLOv3架构图
fig, ax = plt.subplots(figsize=(12, 6))
ax.axis('off')
# 输入
input_box = plt.Rectangle((0.05, 0.7), 0.1, 0.12,
facecolor='lightgray', ec='black')
ax.add_patch(input_box)
ax.text(0.1, 0.76, '输入\n416×416', ha='center', va='center', fontsize=8)
# Darknet-53
backbone = plt.Rectangle((0.25, 0.7), 0.2, 0.12,
facecolor='lightblue', ec='black')
ax.add_patch(backbone)
ax.text(0.35, 0.76, 'Darknet-53\n(特征提取)', ha='center', va='center', fontsize=8)
ax.annotate('', xy=(0.25, 0.76), xytext=(0.15, 0.76),
arrowprops=dict(arrowstyle='->', lw=1))
# 多尺度预测
scales = [
(0.55, 0.7, '大物体\n预测', '52×52', 'lightgreen'),
(0.55, 0.45, '中物体\n预测', '26×26', 'lightyellow'),
(0.55, 0.2, '小物体\n预测', '13×13', 'lightcoral'),
]
for x, y, label, size, color in scales:
box = plt.Rectangle((x, y), 0.15, 0.12,
facecolor=color, ec='black')
ax.add_patch(box)
ax.text(x+0.075, y+0.06, f'{label}\n{size}', ha='center', va='center', fontsize=7)
# 连接线
ax.annotate('', xy=(x, y+0.06), xytext=(0.45, 0.76),
arrowprops=dict(arrowstyle='->', lw=1, connectionstyle='arc3,rad=-0.1'))
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_title('YOLOv3架构:多尺度特征金字塔', fontsize=12)
plt.tight_layout()
plt.show()
print("\n📊 YOLOv3改进点:")
print(" 1. 多尺度预测: 3个尺度(13×13, 26×26, 52×52)")
print(" 2. 残差连接: Darknet-53借鉴ResNet")
print(" 3. 9个Anchor: 每个尺度3个")
print(" 4. 二元交叉熵: 替代MSE做分类")
print(" 5. 特征金字塔: FPN结构融合多尺度特征")
yolo_v3()
4.2 YOLOv4-v8演进
python
def yolo_evolution():
"""YOLO版本演进"""
print("\n" + "=" * 60)
print("YOLO版本演进")
print("=" * 60)
# 版本对比表
versions = {
'YOLOv1': {
'年份': 2016,
'特点': '开创性工作,统一检测框架',
'创新': '网格划分、端到端回归'
},
'YOLOv2': {
'年份': 2017,
'特点': 'Anchor Box、多尺度训练',
'创新': 'BatchNorm、高分辨率分类器'
},
'YOLOv3': {
'年份': 2018,
'特点': '多尺度预测、Darknet-53',
'创新': 'FPN、残差连接'
},
'YOLOv4': {
'年份': 2020,
'特点': 'Mish激活、CSPNet、PANet',
'创新': 'Mosaic增强、CIoU损失'
},
'YOLOv5': {
'年份': 2020,
'特点': 'PyTorch实现、易用性',
'创新': 'Focus层、自适应Anchor'
},
'YOLOv6': {
'年份': 2022,
'特点': '重参数化、EfficientRep',
'创新': 'SimSPPF、Decoupled Head'
},
'YOLOv7': {
'年份': 2022,
'特点': 'E-ELAN、辅助头训练',
'创新': '扩展高效层聚合网络'
},
'YOLOv8': {
'年份': 2023,
'特点': '无Anchor、Decoupled Head',
'创新': 'TaskAlignedAssigner、C2f模块'
}
}
# 绘制时间线
fig, ax = plt.subplots(figsize=(12, 6))
ax.axis('off')
y_pos = 0.7
for name, info in versions.items():
# 节点
circle = plt.Circle((info['年份']-2015, y_pos), 0.03,
color='lightblue', ec='black')
ax.add_patch(circle)
# 标签
ax.text(info['年份']-2015, y_pos+0.05, name, ha='center', fontsize=9, fontweight='bold')
ax.text(info['年份']-2015, y_pos-0.05, f"{info['年份']}", ha='center', fontsize=8)
y_pos -= 0.12
# 时间线
ax.plot([0, 8], [0.5, 0.5], 'k-', linewidth=2)
ax.set_xlim(0, 8)
ax.set_ylim(0, 1)
ax.set_title('YOLO版本演进时间线', fontsize=14)
plt.tight_layout()
plt.show()
print("\n📊 主要演进方向:")
print(" 1. 骨干网络: Darknet → CSPNet → EfficientRep")
print(" 2. 检测头: Coupled → Decoupled")
print(" 3. Anchor: 固定Anchor → 自适应 → 无Anchor")
print(" 4. 损失函数: MSE → CIOU + DFL")
print(" 5. 数据增强: 基础 → Mosaic + MixUp")
yolo_evolution()
4.3 YOLOv8核心改进
python
def yolo_v8():
"""YOLOv8核心改进"""
print("\n" + "=" * 60)
print("YOLOv8:最新进展")
print("=" * 60)
# YOLOv8架构图
fig, ax = plt.subplots(figsize=(12, 8))
ax.axis('off')
# 骨干网络
backbone = plt.Rectangle((0.1, 0.7), 0.2, 0.15,
facecolor='lightblue', ec='black')
ax.add_patch(backbone)
ax.text(0.2, 0.775, 'CSPDarknet\n(C2f模块)', ha='center', va='center', fontsize=8)
# 颈部
neck = plt.Rectangle((0.4, 0.7), 0.2, 0.15,
facecolor='lightgreen', ec='black')
ax.add_patch(neck)
ax.text(0.5, 0.775, 'PAN-FPN\n(特征金字塔)', ha='center', va='center', fontsize=8)
ax.annotate('', xy=(0.4, 0.775), xytext=(0.3, 0.775),
arrowprops=dict(arrowstyle='->', lw=1))
# 检测头
head = plt.Rectangle((0.7, 0.7), 0.2, 0.15,
facecolor='lightcoral', ec='black')
ax.add_patch(head)
ax.text(0.8, 0.775, 'Decoupled Head\n(解耦头)', ha='center', va='center', fontsize=8)
ax.annotate('', xy=(0.7, 0.775), xytext=(0.6, 0.775),
arrowprops=dict(arrowstyle='->', lw=1))
# 输出
outputs = [
(0.2, 0.45, '分类分支', 'lightyellow'),
(0.5, 0.45, '回归分支', 'lightyellow'),
(0.8, 0.45, 'IoU分支', 'lightyellow'),
]
for x, y, label, color in outputs:
box = plt.Rectangle((x-0.08, y-0.05), 0.16, 0.1,
facecolor=color, ec='black')
ax.add_patch(box)
ax.text(x, y, label, ha='center', va='center', fontsize=8)
ax.annotate('', xy=(x, y+0.05), xytext=(x+0.02, 0.7),
arrowprops=dict(arrowstyle='->', lw=1))
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_title('YOLOv8架构', fontsize=12)
plt.tight_layout()
plt.show()
print("\n📊 YOLOv8关键改进:")
print(" 1. 无Anchor: TaskAlignedAssigner")
print(" 2. 解耦头: 分类和回归分离")
print(" 3. C2f模块: 更轻量的CSP结构")
print(" 4. DFL损失: Distribution Focal Loss")
print(" 5. 多任务: 同时输出分类、回归、IoU")
yolo_v8()
五、代码实现示例
5.1 YOLOv8推理
python
def yolo_inference():
"""YOLOv8推理示例"""
print("\n" + "=" * 60)
print("YOLOv8推理代码")
print("=" * 60)
code = """
# YOLOv8推理示例
from ultralytics import YOLO
import cv2
# 加载模型
model = YOLO('yolov8n.pt') # nano版本
# 预测
results = model('image.jpg')
# 处理结果
for result in results:
boxes = result.boxes
for box in boxes:
# 边界框坐标
x1, y1, x2, y2 = box.xyxy[0].tolist()
# 置信度
confidence = box.conf[0].item()
# 类别
class_id = box.cls[0].item()
class_name = model.names[class_id]
print(f"{class_name}: {confidence:.2f}, box=[{x1:.0f}, {y1:.0f}, {x2:.0f}, {y2:.0f}]")
# 实时检测
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
results = model(frame)
annotated_frame = results[0].plot()
cv2.imshow('YOLOv8', annotated_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
"""
print(code)
yolo_inference()
5.2 YOLOv8训练
python
def yolo_training():
"""YOLOv8训练示例"""
print("\n" + "=" * 60)
print("YOLOv8训练代码")
print("=" * 60)
code = """
# YOLOv8训练示例
from ultralytics import YOLO
# 加载预训练模型
model = YOLO('yolov8n.pt')
# 训练
results = model.train(
data='coco128.yaml', # 数据集配置
epochs=100, # 训练轮数
imgsz=640, # 输入图像尺寸
batch=16, # 批次大小
device=0, # GPU设备
workers=8, # 数据加载线程
lr0=0.01, # 初始学习率
weight_decay=0.0005, # 权重衰减
momentum=0.937, # 动量
augment=True, # 数据增强
exist_ok=True # 覆盖现有结果
)
# 验证
metrics = model.val()
print(f"mAP50-95: {metrics.box.map:.4f}")
# 导出模型
model.export(format='onnx') # 导出ONNX
"""
print(code)
yolo_training()
六、总结
| 版本 | 核心创新 | 特点 |
|---|---|---|
| YOLOv1 | 统一检测框架 | 网格划分、端到端 |
| YOLOv2 | Anchor Box | 多尺度训练、BatchNorm |
| YOLOv3 | 多尺度预测 | FPN、Darknet-53 |
| YOLOv4 | 结构优化 | Mosaic、CSPNet、CIoU |
| YOLOv5 | PyTorch实现 | 易用性、自适应Anchor |
| YOLOv8 | 无Anchor设计 | 解耦头、DFL损失 |
核心概念回顾:
- 网格: 将图像划分为S×S个网格,每个网格负责检测中心落在该网格的物体
- Anchor Box: 预定义的先验框,聚类得到,多尺度多比例
- 损失函数: 定位损失 + 置信度损失 + 分类损失
选择建议:
- 实时性要求高 → YOLOv8-nano/s
- 精度要求高 → YOLOv8-l/x
- 移动端部署 → YOLOv8-nano
- 研究学习 → YOLOv3(经典)