
目标检测:SSD(Single Shot MultiBox Detector)
一、SSD核心思想
1.1 SSD vs 其他检测器
python
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import warnings
warnings.filterwarnings('ignore')
print("=" * 60)
print("SSD:单阶段多尺度检测器")
print("=" * 60)
# SSD vs YOLO vs Faster R-CNN对比
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# Faster R-CNN
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('Faster R-CNN\n(两阶段)', fontsize=10)
stages = [("RPN", 0.3), ("ROI Pooling", 0.6), ("分类+回归", 0.9)]
for label, x in stages:
circle = plt.Circle((x, 0.5), 0.1, color='lightcoral', ec='black')
ax1.add_patch(circle)
ax1.text(x, 0.5, label, ha='center', va='center', fontsize=7)
ax1.annotate('', xy=(0.5, 0.5), xytext=(0.4, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
ax1.annotate('', xy=(0.8, 0.5), xytext=(0.7, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
# YOLO
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('YOLO\n(单阶段,单尺度)', fontsize=10)
circle = plt.Circle((0.5, 0.5), 0.15, color='lightgreen', ec='black')
ax2.add_patch(circle)
ax2.text(0.5, 0.5, 'CNN\n一次性预测', ha='center', va='center', fontsize=8)
# SSD
ax3 = axes[2]
ax3.axis('off')
ax3.set_title('SSD\n(单阶段,多尺度)', fontsize=10)
# 多尺度特征图
scales = [(0.2, 0.7, '大物体\n预测', 'lightblue'),
(0.4, 0.5, '中物体\n预测', 'lightgreen'),
(0.6, 0.3, '小物体\n预测', 'lightyellow')]
for x, y, label, color in scales:
circle = plt.Circle((x, y), 0.08, color=color, ec='black')
ax3.add_patch(circle)
ax3.text(x, y, label, ha='center', va='center', fontsize=6)
ax3.annotate('', xy=(0.5, 0.5), xytext=(0.28, 0.65), arrowprops=dict(arrowstyle='->', lw=1))
ax3.annotate('', xy=(0.5, 0.5), xytext=(0.48, 0.45), arrowprops=dict(arrowstyle='->', lw=1))
ax3.annotate('', xy=(0.5, 0.5), xytext=(0.68, 0.25), arrowprops=dict(arrowstyle='->', lw=1))
plt.suptitle('SSD vs 其他检测器', fontsize=14)
plt.tight_layout()
plt.show()
print("\n💡 SSD核心思想:")
print(" 1. 单阶段: 直接预测边界框和类别,无需候选区域")
print(" 2. 多尺度: 使用不同大小的特征图检测不同尺寸的物体")
print(" 3. 默认框: 在每个特征图位置预定义多个默认框")
print(" 4. 速度快: 同时达到高精度和高速度")
二、多尺度特征图
2.1 特征金字塔
python
def multi_scale_features():
"""多尺度特征图可视化"""
print("\n" + "=" * 60)
print("多尺度特征图")
print("=" * 60)
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
# 不同尺度的特征图
scales = [
(0, 0, '38×38\n(大特征图)', '检测小物体', 38),
(0, 1, '19×19\n(中等特征图)', '检测中等物体', 19),
(0, 2, '10×10\n(小特征图)', '检测大物体', 10),
(1, 0, '5×5', '', 5),
(1, 1, '3×3', '', 3),
(1, 2, '1×1', '', 1),
]
for idx, (row, col, title, desc, size) in enumerate(scales):
ax = axes[row, col]
# 创建网格
for i in range(size):
ax.axhline(y=i/size, color='gray', linestyle='-', alpha=0.3)
ax.axvline(x=i/size, color='gray', linestyle='-', alpha=0.3)
# 每个网格点代表一个感受野
step = 1/size
for i in range(size):
for j in range(size):
rect = Rectangle((j*step, i*step), step, step,
facecolor='none', edgecolor='blue', alpha=0.5)
ax.add_patch(rect)
ax.set_title(f'{title}\n{desc}', fontsize=10)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_aspect('equal')
ax.axis('off')
plt.suptitle('SSD多尺度特征图', fontsize=14)
plt.tight_layout()
plt.show()
print("\n📊 特征图与感受野:")
print(" - 浅层特征图(大尺寸): 感受野小 → 检测小物体")
print(" - 深层特征图(小尺寸): 感受野大 → 检测大物体")
print("\n SSD使用6个尺度的特征图:")
print(" 38×38 → 19×19 → 10×10 → 5×5 → 3×3 → 1×1")
multi_scale_features()
2.2 特征图上的检测
python
def feature_map_detection():
"""特征图上的检测"""
fig, ax = plt.subplots(figsize=(12, 8))
ax.axis('off')
# 绘制特征金字塔
levels = [
{'size': 38, 'y': 0.7, 'color': 'lightblue', 'label': 'Conv4_3\n38×38'},
{'size': 19, 'y': 0.55, 'color': 'lightgreen', 'label': 'Conv7\n19×19'},
{'size': 10, 'y': 0.4, 'color': 'lightyellow', 'label': 'Conv8_2\n10×10'},
{'size': 5, 'y': 0.25, 'color': 'lightcoral', 'label': 'Conv9_2\n5×5'},
{'size': 3, 'y': 0.1, 'color': 'lightpink', 'label': 'Conv10_2\n3×3'},
]
for level in levels:
size = level['size']
y = level['y']
color = level['color']
label = level['label']
# 绘制特征图
for i in range(size):
ax.axhline(y=y + i/size*0.15, xmin=0.1, xmax=0.4,
color='gray', alpha=0.3, linewidth=0.5)
ax.axvline(x=0.1 + i/size*0.3, ymin=y, ymax=y+0.15,
color='gray', alpha=0.3, linewidth=0.5)
# 标注
rect = Rectangle((0.1, y), 0.3, 0.15,
facecolor=color, ec='black', alpha=0.7)
ax.add_patch(rect)
ax.text(0.25, y+0.075, label, ha='center', va='center', fontsize=8)
# 检测头
head_rect = Rectangle((0.55, y), 0.15, 0.15,
facecolor='lightgray', ec='black')
ax.add_patch(head_rect)
ax.text(0.625, y+0.075, '检测头\n(分类+回归)', ha='center', va='center', fontsize=7)
# 连接线
ax.annotate('', xy=(0.55, y+0.075), xytext=(0.4, y+0.075),
arrowprops=dict(arrowstyle='->', lw=1))
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_title('SSD多尺度特征图检测', fontsize=14)
plt.tight_layout()
plt.show()
print("\n📐 每个特征图位置的预测:")
print(" 对于每个特征图上的每个位置,预测:")
print(" - 默认框偏移: (cx, cy, w, h)")
print(" - 类别分数: (c1, c2, ..., ck)")
print("\n 总预测数 = Σ (feature_size² × num_default_boxes)")
feature_map_detection()
三、默认框(Default Box)
3.1 默认框设计
python
def default_boxes():
"""默认框设计"""
print("\n" + "=" * 60)
print("默认框(Default Box)")
print("=" * 60)
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# 不同尺度和比例的默认框
ax1 = axes[0]
ax1.set_title('默认框设计', fontsize=12)
# 不同尺度的默认框
scales = [0.1, 0.2, 0.3]
ratios = [0.5, 1.0, 2.0]
colors = ['blue', 'green', 'red']
center = (0.5, 0.5)
for scale, color in zip(scales, colors):
for ratio in ratios:
w = scale * np.sqrt(ratio)
h = scale / np.sqrt(ratio)
rect = Rectangle((center[0]-w/2, center[1]-h/2), w, h,
facecolor='none', edgecolor=color, linewidth=1.5)
ax1.add_patch(rect)
ax1.plot(center[0], center[1], 'ko', markersize=8)
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
# 默认框参数说明
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('默认框参数', fontsize=12)
param_text = """
📐 默认框参数:
1. 尺度 (Scale):
s_k = s_min + (s_max - s_min) * (k-1)/(m-1)
其中 s_min=0.2, s_max=0.9
2. 比例 (Aspect Ratio):
a_r ∈ {1, 2, 3, 1/2, 1/3}
3. 每个默认框预测:
- 边界框偏移 (Δcx, Δcy, Δw, Δh)
- 类别分数 (C个类别)
4. 总默认框数:
Σ (feature_size² × num_boxes)
"""
ax2.text(0.05, 0.95, param_text, transform=ax2.transAxes,
fontsize=10, verticalalignment='top', fontfamily='monospace')
plt.suptitle('SSD默认框设计', fontsize=14)
plt.tight_layout()
plt.show()
print("\n📊 默认框数量统计:")
print(" 特征图尺寸: 38×38, 19×19, 10×10, 5×5, 3×3, 1×1")
print(" 每位置默认框数: 4, 6, 6, 6, 4, 4")
print(" 总默认框数: 38²×4 + 19²×6 + 10²×6 + 5²×6 + 3²×4 + 1²×4 = 8732")
default_boxes()
四、SSD网络结构
4.1 完整架构
python
def ssd_architecture():
"""SSD完整架构"""
print("\n" + "=" * 60)
print("SSD网络架构")
print("=" * 60)
fig, ax = plt.subplots(figsize=(14, 10))
ax.axis('off')
# 基础网络(VGG16)
base = plt.Rectangle((0.05, 0.8), 0.2, 0.12,
facecolor='lightblue', ec='black')
ax.add_patch(base)
ax.text(0.15, 0.86, 'VGG16\n(基础网络)', ha='center', va='center', fontsize=8)
# 额外卷积层
extra = plt.Rectangle((0.35, 0.8), 0.2, 0.12,
facecolor='lightgreen', ec='black')
ax.add_patch(extra)
ax.text(0.45, 0.86, '额外卷积层\n(多尺度特征)', ha='center', va='center', fontsize=8)
ax.annotate('', xy=(0.35, 0.86), xytext=(0.25, 0.86),
arrowprops=dict(arrowstyle='->', lw=1))
# 多尺度检测头
detection_heads = [
(0.65, 0.8, 'Conv4_3\n(38×38)', 'lightyellow'),
(0.65, 0.65, 'Conv7\n(19×19)', 'lightyellow'),
(0.65, 0.5, 'Conv8_2\n(10×10)', 'lightyellow'),
(0.65, 0.35, 'Conv9_2\n(5×5)', 'lightyellow'),
(0.65, 0.2, 'Conv10_2\n(3×3)', 'lightyellow'),
(0.65, 0.05, 'Conv11_2\n(1×1)', 'lightyellow'),
]
for x, y, label, color in detection_heads:
box = plt.Rectangle((x, y), 0.15, 0.1,
facecolor=color, ec='black')
ax.add_patch(box)
ax.text(x+0.075, y+0.05, label, ha='center', va='center', fontsize=7)
# 连接到额外卷积层
ax.annotate('', xy=(x, y+0.05), xytext=(0.55, y+0.05),
arrowprops=dict(arrowstyle='->', lw=1))
# NMS后处理
nms = plt.Rectangle((0.05, 0.05), 0.2, 0.12,
facecolor='lightcoral', ec='black')
ax.add_patch(nms)
ax.text(0.15, 0.11, 'NMS\n(非极大值抑制)', ha='center', va='center', fontsize=8)
# 连接所有检测头到NMS
for _, y, _, _ in detection_heads:
ax.annotate('', xy=(0.25, y+0.05), xytext=(0.25, 0.11),
arrowprops=dict(arrowstyle='->', lw=1, connectionstyle='arc3,rad=0.3'))
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_title('SSD网络架构', fontsize=14)
plt.tight_layout()
plt.show()
print("\n📊 SSD网络组成:")
print(" 1. 基础网络: VGG16(截断)")
print(" 2. 额外卷积层: 产生多尺度特征图")
print(" 3. 检测头: 每个特征图上的卷积预测")
print(" 4. NMS: 后处理去除重复框")
ssd_architecture()
五、训练与损失函数
5.1 匹配策略
python
def matching_strategy():
"""默认框与真实框匹配"""
print("\n" + "=" * 60)
print("默认框匹配策略")
print("=" * 60)
fig, ax = plt.subplots(figsize=(10, 8))
# 真实框
gt_rect = Rectangle((0.3, 0.4), 0.4, 0.3,
facecolor='none', edgecolor='red', linewidth=2)
ax.add_patch(gt_rect)
ax.text(0.5, 0.55, '真实框', ha='center', va='center', fontsize=10, color='red')
# 默认框
default_boxes_list = [
(0.2, 0.35, 0.15, 0.2, 'blue', 0.3),
(0.4, 0.45, 0.2, 0.25, 'green', 0.7),
(0.6, 0.5, 0.25, 0.2, 'green', 0.65),
(0.35, 0.3, 0.1, 0.15, 'blue', 0.2),
(0.55, 0.6, 0.2, 0.3, 'green', 0.5),
]
for x, y, w, h, color, iou in default_boxes_list:
rect = Rectangle((x-w/2, y-h/2), w, h,
facecolor='none', edgecolor=color, linewidth=1.5, alpha=0.7)
ax.add_patch(rect)
if iou > 0.5:
ax.text(x, y, f'IoU={iou}', ha='center', va='center', fontsize=8, color='green')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_aspect('equal')
ax.set_title('默认框匹配(IoU > 0.5为正样本)', fontsize=12)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print("\n📐 匹配策略:")
print(" 1. 每个真实框匹配IoU最大的默认框")
print(" 2. 默认框与任一真实框IoU > 0.5则匹配")
print(" 3. 未匹配的默认框为负样本")
print(" 4. 正负样本比例1:3平衡")
matching_strategy()
5.2 损失函数
python
def ssd_loss():
"""SSD损失函数"""
print("\n" + "=" * 60)
print("SSD损失函数")
print("=" * 60)
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# 损失组成
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('损失函数组成', fontsize=12)
loss_text = """
📐 SSD总损失:
L(x, c, l, g) = 1/N [L_conf(x, c) + α L_loc(x, l, g)]
其中:
• N: 正样本数量
• L_conf: 置信度损失(分类)
• L_loc: 定位损失(回归)
• α: 权重系数(通常α=1)
定位损失 (Smooth L1):
L_loc = Σ smooth_L1(l_i^m - g_j^m)
置信度损失 (Softmax):
L_conf = -log(softmax(c_i^p))
"""
ax1.text(0.05, 0.95, loss_text, transform=ax1.transAxes,
fontsize=10, verticalalignment='top', fontfamily='monospace')
# 正负样本平衡
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('难例挖掘', fontsize=12)
hard_mining_text = """
🔧 难例挖掘 (Hard Negative Mining):
问题: 负样本远多于正样本
解决方案:
1. 按置信度损失排序所有负样本
2. 选择损失最大的负样本
3. 保持正负样本比例 1:3
作用:
• 平衡正负样本
• 加速收敛
• 提高检测精度
"""
ax2.text(0.05, 0.95, hard_mining_text, transform=ax2.transAxes,
fontsize=10, verticalalignment='top', fontfamily='monospace')
plt.tight_layout()
plt.show()
ssd_loss()
六、代码实现示例
6.1 SSD检测头
python
def ssd_detection_head():
"""SSD检测头实现"""
print("\n" + "=" * 60)
print("SSD检测头代码")
print("=" * 60)
code = """
import torch
import torch.nn as nn
import torch.nn.functional as F
class SSDDetectionHead(nn.Module):
def __init__(self, in_channels, num_classes, num_anchors):
super(SSDDetectionHead, self).__init__()
# 分类分支
self.cls_head = nn.Conv2d(in_channels, num_anchors * num_classes,
kernel_size=3, padding=1)
# 回归分支
self.loc_head = nn.Conv2d(in_channels, num_anchors * 4,
kernel_size=3, padding=1)
def forward(self, x):
batch_size = x.size(0)
# 分类预测
cls_pred = self.cls_head(x)
cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous()
cls_pred = cls_pred.view(batch_size, -1, self.num_classes)
# 位置预测
loc_pred = self.loc_head(x)
loc_pred = loc_pred.permute(0, 2, 3, 1).contiguous()
loc_pred = loc_pred.view(batch_size, -1, 4)
return cls_pred, loc_pred
class SSD(nn.Module):
def __init__(self, num_classes=21):
super(SSD, self).__init__()
self.num_classes = num_classes
# 基础网络 VGG16
self.base = self._vgg_layers()
# 额外层
self.extras = self._extra_layers()
# 检测头配置
self.detection_heads = nn.ModuleList([
SSDDetectionHead(512, num_classes, 4), # conv4_3
SSDDetectionHead(1024, num_classes, 6), # conv7
SSDDetectionHead(512, num_classes, 6), # conv8_2
SSDDetectionHead(256, num_classes, 6), # conv9_2
SSDDetectionHead(256, num_classes, 4), # conv10_2
SSDDetectionHead(256, num_classes, 4), # conv11_2
])
# 默认框配置
self.default_boxes = self._generate_default_boxes()
def forward(self, x):
sources = []
# 提取基础网络特征
for layer in self.base:
x = layer(x)
if layer == self.base[23]: # conv4_3
sources.append(x)
# 提取额外层特征
for layer in self.extras:
x = layer(x)
sources.append(x)
# 多尺度检测
cls_preds = []
loc_preds = []
for source, head in zip(sources, self.detection_heads):
cls, loc = head(source)
cls_preds.append(cls)
loc_preds.append(loc)
cls_preds = torch.cat(cls_preds, dim=1)
loc_preds = torch.cat(loc_preds, dim=1)
return cls_preds, loc_preds
"""
print(code)
ssd_detection_head()
6.2 默认框生成
python
def default_boxes_generation():
"""默认框生成代码"""
print("\n" + "=" * 60)
print("默认框生成")
print("=" * 60)
code = """
def generate_default_boxes(feature_maps, aspect_ratios, scales):
"""生成默认框"""
default_boxes = []
for k, (feature_size, ratios, scale) in enumerate(zip(feature_maps, aspect_ratios, scales)):
# 特征图上的每个位置
for i in range(feature_size):
for j in range(feature_size):
# 中心坐标
cx = (j + 0.5) / feature_size
cy = (i + 0.5) / feature_size
# 尺度
s_k = scale
s_k_plus = scales[min(k+1, len(scales)-1)]
# 添加第一个默认框
default_boxes.append([cx, cy, s_k, s_k])
# 添加第二个默认框
default_boxes.append([cx, cy, np.sqrt(s_k * s_k_plus), np.sqrt(s_k * s_k_plus)])
# 添加比例框
for ratio in ratios:
w = s_k * np.sqrt(ratio)
h = s_k / np.sqrt(ratio)
default_boxes.append([cx, cy, w, h])
if ratio != 1:
default_boxes.append([cx, cy, h, w])
return torch.tensor(default_boxes)
# 配置
feature_maps = [38, 19, 10, 5, 3, 1]
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
scales = [0.1, 0.2, 0.375, 0.55, 0.725, 0.9]
default_boxes = generate_default_boxes(feature_maps, aspect_ratios, scales)
print(f"总默认框数: {len(default_boxes)}")
"""
print(code)
default_boxes_generation()
七、总结
| 组件 | 作用 | 关键参数 |
|---|---|---|
| 基础网络 | 特征提取 | VGG16 |
| 额外层 | 多尺度特征 | 卷积层 |
| 检测头 | 分类+回归 | 3×3卷积 |
| 默认框 | 先验框 | 尺度、比例 |
| NMS | 去重 | IoU阈值 |
SSD vs YOLO对比:
| 特性 | SSD | YOLO |
|---|---|---|
| 多尺度检测 | ✅ 是 | YOLOv3后支持 |
| 默认框 | 密集采样 | Anchor Box |
| 速度 | 快 | 更快 |
| 小物体检测 | 较好 | 一般 |
核心要点:
- 多尺度特征图是关键创新
- 默认框密集覆盖不同尺寸物体
- 端到端训练,单阶段检测
- 速度与精度的良好平衡