04-进阶方向： 01-计算机视觉（CV）——目标检测（YOLO系列）

目标检测：YOLO系列（YOLOv3 ~ YOLOv8）

一、YOLO核心思想

1.1 统一检测框架

python 复制代码

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("YOLO：You Only Look Once")
print("=" * 60)

# YOLO vs R-CNN对比
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# R-CNN（两阶段）
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('R-CNN系列：两阶段检测', fontsize=11)

stages1 = [
    ("候选区域\n生成", 0.2, 0.5),
    ("特征提取\n+分类", 0.5, 0.5),
    ("后处理\nNMS", 0.8, 0.5),
]

for label, x, y in stages1:
    circle = plt.Circle((x, y), 0.1, color='lightcoral', ec='black')
    ax1.add_patch(circle)
    ax1.text(x, y, label, ha='center', va='center', fontsize=8)

ax1.annotate('', xy=(0.4, 0.5), xytext=(0.3, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
ax1.annotate('', xy=(0.7, 0.5), xytext=(0.6, 0.5), arrowprops=dict(arrowstyle='->', lw=1))

# YOLO（单阶段）
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('YOLO：单阶段检测', fontsize=11)

stages2 = [
    ("输入图像", 0.2, 0.5),
    ("CNN\n一次性预测", 0.5, 0.5),
    ("检测结果\n边界框+类别", 0.8, 0.5),
]

for label, x, y in stages2:
    circle = plt.Circle((x, y), 0.1, color='lightgreen', ec='black')
    ax2.add_patch(circle)
    ax2.text(x, y, label, ha='center', va='center', fontsize=8)

ax2.annotate('', xy=(0.4, 0.5), xytext=(0.3, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
ax2.annotate('', xy=(0.7, 0.5), xytext=(0.6, 0.5), arrowprops=dict(arrowstyle='->', lw=1))

plt.tight_layout()
plt.show()

print("\n💡 YOLO核心思想:")
print("   将检测视为回归问题，一次性预测所有边界框和类别概率")
print("   - 单阶段: 不需要候选区域生成")
print("   - 速度快: 45 FPS (YOLOv1)")
print("   - 端到端: 统一训练")

二、网格与Anchor Box

2.1 网格划分

python 复制代码

def grid_cells():
    """YOLO网格划分可视化"""
    
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    # 7x7网格
    ax1 = axes[0]
    ax1.set_title('7×7网格划分', fontsize=12)
    
    for i in range(8):
        ax1.axhline(y=i/7, color='gray', linestyle='-', alpha=0.5)
        ax1.axvline(x=i/7, color='gray', linestyle='-', alpha=0.5)
    
    # 随机物体
    objects = [(0.3, 0.4), (0.6, 0.7), (0.8, 0.3)]
    for obj in objects:
        ax1.plot(obj[0], obj[1], 'ro', markersize=8)
        # 确定物体所在网格
        grid_x = int(obj[0] * 7)
        grid_y = int(obj[1] * 7)
        rect = Rectangle((grid_x/7, grid_y/7), 1/7, 1/7, 
                         linewidth=2, edgecolor='red', facecolor='none')
        ax1.add_patch(rect)
    
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.set_aspect('equal')
    
    # 每个网格预测
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('每个网格预测', fontsize=12)
    
    grid_content = """
    每个网格预测:
    
    1. 边界框 (B个):
       - x, y: 中心坐标(相对于网格)
       - w, h: 宽高(相对于整图)
       - confidence: 置信度
    
    2. 类别概率 (C个):
       - P(class_i | object)
    
    输出张量: S × S × (B*5 + C)
    
    例: S=7, B=2, C=20 → 7×7×30
    """
    
    ax2.text(0.05, 0.95, grid_content, transform=ax2.transAxes,
            fontsize=10, verticalalignment='top', fontfamily='monospace')
    
    plt.suptitle('YOLO网格划分与预测', fontsize=14)
    plt.tight_layout()
    plt.show()

grid_cells()

2.2 Anchor Box可视化

python 复制代码

def anchor_boxes_yolo():
    """YOLO Anchor Box可视化"""
    
    print("\n" + "=" * 60)
    print("Anchor Box在YOLO中")
    print("=" * 60)
    
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    # 不同尺度的Anchor
    ax1 = axes[0]
    ax1.set_title('Anchor Box示例', fontsize=12)
    
    anchors = [
        (0.1, 0.15, '小物体'),
        (0.2, 0.25, '中等物体'),
        (0.3, 0.4, '大物体')
    ]
    
    center = (0.5, 0.5)
    colors = ['blue', 'green', 'red']
    
    for (w, h, label), color in zip(anchors, colors):
        rect = Rectangle((center[0]-w/2, center[1]-h/2), w, h,
                        facecolor='none', edgecolor=color, linewidth=2)
        ax1.add_patch(rect)
        ax1.text(center[0]+w/2+0.02, center[1]+h/2, label, color=color, fontsize=9)
    
    ax1.plot(center[0], center[1], 'ko', markersize=8)
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.set_aspect('equal')
    ax1.grid(True, alpha=0.3)
    
    # Anchor尺寸说明
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('Anchor Box设计', fontsize=12)
    
    anchor_text = """
    📐 Anchor Box设计原则:
    
    1. 使用K-Means聚类
       - 在训练集上聚类边界框尺寸
       - 距离度量: d(box, centroid) = 1 - IoU
    
    2. YOLOv2/v3使用5个Anchor
       - 小: (10×13), (16×30), (33×23)
       - 中: (30×61), (62×45)
       - 大: (59×119), (116×90), (156×198)
    
    3. 每个网格预测多个Anchor
       - 选择IoU最大的Anchor负责预测
       - 不同Anchor负责不同尺寸的物体
    """
    
    ax2.text(0.05, 0.95, anchor_text, transform=ax2.transAxes,
            fontsize=10, verticalalignment='top', fontfamily='monospace')
    
    plt.tight_layout()
    plt.show()

anchor_boxes_yolo()

三、YOLO损失函数

3.1 损失函数组成

python 复制代码

def yolo_loss():
    """YOLO损失函数详解"""
    
    print("\n" + "=" * 60)
    print("YOLO损失函数")
    print("=" * 60)
    
    # 损失函数公式
    print("\n📐 YOLO损失函数组成:")
    print("   L = L_coord + L_conf + L_class")
    
    # 可视化损失组成
    fig, axes = plt.subplots(1, 3, figsize=(14, 4))
    
    # 1. 定位损失
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('定位损失 (L_coord)', fontsize=10)
    
    coord_formula = """
    L_coord = λ_coord Σ (x - x̂)² + (y - ŷ)²
            + λ_coord Σ (√w - √ŵ)² + (√h - √ĥ)²
    
    特点:
    • 只计算有物体的网格
    • λ_coord = 5 (增大定位权重)
    • 使用平方根处理宽高差异
    """
    
    ax1.text(0.05, 0.95, coord_formula, transform=ax1.transAxes,
            fontsize=8, verticalalignment='top', fontfamily='monospace')
    
    # 2. 置信度损失
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('置信度损失 (L_conf)', fontsize=10)
    
    conf_formula = """
    L_conf = Σ (C - Ĉ)²
            + λ_noobj Σ (C - Ĉ)²
    
    特点:
    • 有物体: 正常计算
    • 无物体: λ_noobj = 0.5 (降低权重)
    • C = IoU(预测框, 真实框)
    """
    
    ax2.text(0.05, 0.95, conf_formula, transform=ax2.transAxes,
            fontsize=8, verticalalignment='top', fontfamily='monospace')
    
    # 3. 分类损失
    ax3 = axes[2]
    ax3.axis('off')
    ax3.set_title('分类损失 (L_class)', fontsize=10)
    
    class_formula = """
    L_class = Σ (p_i(c) - p̂_i(c))²
    
    特点:
    • 只计算有物体的网格
    • 使用MSE（YOLOv1）
    • 后续版本改用交叉熵
    """
    
    ax3.text(0.05, 0.95, class_formula, transform=ax3.transAxes,
            fontsize=8, verticalalignment='top', fontfamily='monospace')
    
    plt.suptitle('YOLO损失函数组成', fontsize=12)
    plt.tight_layout()
    plt.show()

yolo_loss()

四、YOLO版本演进

4.1 YOLOv3核心改进

python 复制代码

def yolo_v3():
    """YOLOv3核心改进"""
    
    print("\n" + "=" * 60)
    print("YOLOv3：多尺度预测")
    print("=" * 60)
    
    # YOLOv3架构图
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.axis('off')
    
    # 输入
    input_box = plt.Rectangle((0.05, 0.7), 0.1, 0.12,
                              facecolor='lightgray', ec='black')
    ax.add_patch(input_box)
    ax.text(0.1, 0.76, '输入\n416×416', ha='center', va='center', fontsize=8)
    
    # Darknet-53
    backbone = plt.Rectangle((0.25, 0.7), 0.2, 0.12,
                             facecolor='lightblue', ec='black')
    ax.add_patch(backbone)
    ax.text(0.35, 0.76, 'Darknet-53\n(特征提取)', ha='center', va='center', fontsize=8)
    ax.annotate('', xy=(0.25, 0.76), xytext=(0.15, 0.76),
               arrowprops=dict(arrowstyle='->', lw=1))
    
    # 多尺度预测
    scales = [
        (0.55, 0.7, '大物体\n预测', '52×52', 'lightgreen'),
        (0.55, 0.45, '中物体\n预测', '26×26', 'lightyellow'),
        (0.55, 0.2, '小物体\n预测', '13×13', 'lightcoral'),
    ]
    
    for x, y, label, size, color in scales:
        box = plt.Rectangle((x, y), 0.15, 0.12,
                            facecolor=color, ec='black')
        ax.add_patch(box)
        ax.text(x+0.075, y+0.06, f'{label}\n{size}', ha='center', va='center', fontsize=7)
        
        # 连接线
        ax.annotate('', xy=(x, y+0.06), xytext=(0.45, 0.76),
                   arrowprops=dict(arrowstyle='->', lw=1, connectionstyle='arc3,rad=-0.1'))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('YOLOv3架构：多尺度特征金字塔', fontsize=12)
    
    plt.tight_layout()
    plt.show()
    
    print("\n📊 YOLOv3改进点:")
    print("   1. 多尺度预测: 3个尺度(13×13, 26×26, 52×52)")
    print("   2. 残差连接: Darknet-53借鉴ResNet")
    print("   3. 9个Anchor: 每个尺度3个")
    print("   4. 二元交叉熵: 替代MSE做分类")
    print("   5. 特征金字塔: FPN结构融合多尺度特征")

yolo_v3()

4.2 YOLOv4-v8演进

python 复制代码

def yolo_evolution():
    """YOLO版本演进"""
    
    print("\n" + "=" * 60)
    print("YOLO版本演进")
    print("=" * 60)
    
    # 版本对比表
    versions = {
        'YOLOv1': {
            '年份': 2016,
            '特点': '开创性工作，统一检测框架',
            '创新': '网格划分、端到端回归'
        },
        'YOLOv2': {
            '年份': 2017,
            '特点': 'Anchor Box、多尺度训练',
            '创新': 'BatchNorm、高分辨率分类器'
        },
        'YOLOv3': {
            '年份': 2018,
            '特点': '多尺度预测、Darknet-53',
            '创新': 'FPN、残差连接'
        },
        'YOLOv4': {
            '年份': 2020,
            '特点': 'Mish激活、CSPNet、PANet',
            '创新': 'Mosaic增强、CIoU损失'
        },
        'YOLOv5': {
            '年份': 2020,
            '特点': 'PyTorch实现、易用性',
            '创新': 'Focus层、自适应Anchor'
        },
        'YOLOv6': {
            '年份': 2022,
            '特点': '重参数化、EfficientRep',
            '创新': 'SimSPPF、Decoupled Head'
        },
        'YOLOv7': {
            '年份': 2022,
            '特点': 'E-ELAN、辅助头训练',
            '创新': '扩展高效层聚合网络'
        },
        'YOLOv8': {
            '年份': 2023,
            '特点': '无Anchor、Decoupled Head',
            '创新': 'TaskAlignedAssigner、C2f模块'
        }
    }
    
    # 绘制时间线
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.axis('off')
    
    y_pos = 0.7
    for name, info in versions.items():
        # 节点
        circle = plt.Circle((info['年份']-2015, y_pos), 0.03, 
                           color='lightblue', ec='black')
        ax.add_patch(circle)
        
        # 标签
        ax.text(info['年份']-2015, y_pos+0.05, name, ha='center', fontsize=9, fontweight='bold')
        ax.text(info['年份']-2015, y_pos-0.05, f"{info['年份']}", ha='center', fontsize=8)
        
        y_pos -= 0.12
    
    # 时间线
    ax.plot([0, 8], [0.5, 0.5], 'k-', linewidth=2)
    
    ax.set_xlim(0, 8)
    ax.set_ylim(0, 1)
    ax.set_title('YOLO版本演进时间线', fontsize=14)
    
    plt.tight_layout()
    plt.show()
    
    print("\n📊 主要演进方向:")
    print("   1. 骨干网络: Darknet → CSPNet → EfficientRep")
    print("   2. 检测头: Coupled → Decoupled")
    print("   3. Anchor: 固定Anchor → 自适应 → 无Anchor")
    print("   4. 损失函数: MSE → CIOU + DFL")
    print("   5. 数据增强: 基础 → Mosaic + MixUp")

yolo_evolution()

4.3 YOLOv8核心改进

python 复制代码

def yolo_v8():
    """YOLOv8核心改进"""
    
    print("\n" + "=" * 60)
    print("YOLOv8：最新进展")
    print("=" * 60)
    
    # YOLOv8架构图
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.axis('off')
    
    # 骨干网络
    backbone = plt.Rectangle((0.1, 0.7), 0.2, 0.15,
                             facecolor='lightblue', ec='black')
    ax.add_patch(backbone)
    ax.text(0.2, 0.775, 'CSPDarknet\n(C2f模块)', ha='center', va='center', fontsize=8)
    
    # 颈部
    neck = plt.Rectangle((0.4, 0.7), 0.2, 0.15,
                         facecolor='lightgreen', ec='black')
    ax.add_patch(neck)
    ax.text(0.5, 0.775, 'PAN-FPN\n(特征金字塔)', ha='center', va='center', fontsize=8)
    ax.annotate('', xy=(0.4, 0.775), xytext=(0.3, 0.775),
               arrowprops=dict(arrowstyle='->', lw=1))
    
    # 检测头
    head = plt.Rectangle((0.7, 0.7), 0.2, 0.15,
                         facecolor='lightcoral', ec='black')
    ax.add_patch(head)
    ax.text(0.8, 0.775, 'Decoupled Head\n(解耦头)', ha='center', va='center', fontsize=8)
    ax.annotate('', xy=(0.7, 0.775), xytext=(0.6, 0.775),
               arrowprops=dict(arrowstyle='->', lw=1))
    
    # 输出
    outputs = [
        (0.2, 0.45, '分类分支', 'lightyellow'),
        (0.5, 0.45, '回归分支', 'lightyellow'),
        (0.8, 0.45, 'IoU分支', 'lightyellow'),
    ]
    
    for x, y, label, color in outputs:
        box = plt.Rectangle((x-0.08, y-0.05), 0.16, 0.1,
                            facecolor=color, ec='black')
        ax.add_patch(box)
        ax.text(x, y, label, ha='center', va='center', fontsize=8)
        ax.annotate('', xy=(x, y+0.05), xytext=(x+0.02, 0.7),
                   arrowprops=dict(arrowstyle='->', lw=1))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('YOLOv8架构', fontsize=12)
    
    plt.tight_layout()
    plt.show()
    
    print("\n📊 YOLOv8关键改进:")
    print("   1. 无Anchor: TaskAlignedAssigner")
    print("   2. 解耦头: 分类和回归分离")
    print("   3. C2f模块: 更轻量的CSP结构")
    print("   4. DFL损失: Distribution Focal Loss")
    print("   5. 多任务: 同时输出分类、回归、IoU")

yolo_v8()

五、代码实现示例

5.1 YOLOv8推理

python 复制代码

def yolo_inference():
    """YOLOv8推理示例"""
    
    print("\n" + "=" * 60)
    print("YOLOv8推理代码")
    print("=" * 60)
    
    code = """
# YOLOv8推理示例
from ultralytics import YOLO
import cv2

# 加载模型
model = YOLO('yolov8n.pt')  # nano版本

# 预测
results = model('image.jpg')

# 处理结果
for result in results:
    boxes = result.boxes
    for box in boxes:
        # 边界框坐标
        x1, y1, x2, y2 = box.xyxy[0].tolist()
        # 置信度
        confidence = box.conf[0].item()
        # 类别
        class_id = box.cls[0].item()
        class_name = model.names[class_id]
        
        print(f"{class_name}: {confidence:.2f}, box=[{x1:.0f}, {y1:.0f}, {x2:.0f}, {y2:.0f}]")

# 实时检测
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    results = model(frame)
    annotated_frame = results[0].plot()
    cv2.imshow('YOLOv8', annotated_frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
"""
    
    print(code)

yolo_inference()

5.2 YOLOv8训练

python 复制代码

def yolo_training():
    """YOLOv8训练示例"""
    
    print("\n" + "=" * 60)
    print("YOLOv8训练代码")
    print("=" * 60)
    
    code = """
# YOLOv8训练示例
from ultralytics import YOLO

# 加载预训练模型
model = YOLO('yolov8n.pt')

# 训练
results = model.train(
    data='coco128.yaml',   # 数据集配置
    epochs=100,            # 训练轮数
    imgsz=640,             # 输入图像尺寸
    batch=16,              # 批次大小
    device=0,              # GPU设备
    workers=8,             # 数据加载线程
    lr0=0.01,              # 初始学习率
    weight_decay=0.0005,   # 权重衰减
    momentum=0.937,        # 动量
    augment=True,          # 数据增强
    exist_ok=True          # 覆盖现有结果
)

# 验证
metrics = model.val()
print(f"mAP50-95: {metrics.box.map:.4f}")

# 导出模型
model.export(format='onnx')  # 导出ONNX
"""
    
    print(code)

yolo_training()

六、总结

版本	核心创新	特点
YOLOv1	统一检测框架	网格划分、端到端
YOLOv2	Anchor Box	多尺度训练、BatchNorm
YOLOv3	多尺度预测	FPN、Darknet-53
YOLOv4	结构优化	Mosaic、CSPNet、CIoU
YOLOv5	PyTorch实现	易用性、自适应Anchor
YOLOv8	无Anchor设计	解耦头、DFL损失

核心概念回顾：

网格: 将图像划分为S×S个网格，每个网格负责检测中心落在该网格的物体
Anchor Box: 预定义的先验框，聚类得到，多尺度多比例
损失函数: 定位损失 + 置信度损失 + 分类损失

选择建议：

实时性要求高 → YOLOv8-nano/s
精度要求高 → YOLOv8-l/x
移动端部署 → YOLOv8-nano
研究学习 → YOLOv3（经典）