一、搭建模型
1.1 基础卷积单元与核心分解模块搭建
Inception v3 的一大特点是将每个常规卷积层都标配了批归一化(BatchNorm)和激活函数(ReLU)。同时,它不再使用单一的 Inception 块,而是设计了 A、B、C 三种不同内部结构的特征提取块。
python
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
# 标配的基础卷积单元 (Conv + BN + ReLU)
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super().__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
return self.relu(self.bn(self.conv(x)))
# Inception-A 模块:用于网络浅层(将 5x5 分解为两个 3x3)
class InceptionA(nn.Module):
def __init__(self, in_channels, pool_features):
super().__init__()
self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1)
self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1)
self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2) # 实际中常化为两个3x3,此处按基础图示编写
self.branch3x3db_1 = BasicConv2d(in_channels, 64, kernel_size=1)
self.branch3x3db_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)
self.branch3x3db_3 = BasicConv2d(96, 96, kernel_size=3, padding=1)
self.branch_pool = nn.Sequential(
nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
BasicConv2d(in_channels, pool_features, kernel_size=1)
)
def forward(self, x):
outputs = [self.branch1x1(x), self.branch5x5_2(self.branch5x5_1(x)),
self.branch3x3db_3(self.branch3x3db_2(self.branch3x3db_1(x))), self.branch_pool(x)]
return torch.cat(outputs, 1)
# Inception-B 模块:用于网络中层(核心创新:1xn 与 nx1 非对称卷积分解)
class InceptionB(nn.Module):
def __init__(self, in_channels, channels_7x7):
super().__init__()
self.branch1x1 = BasicConv2d(in_channels, 192, kernel_size=1)
c7 = channels_7x7
self.branch7x7_1 = BasicConv2d(in_channels, c7, kernel_size=1)
self.branch7x7_2 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3))
self.branch7x7_3 = BasicConv2d(c7, 192, kernel_size=(7, 1), padding=(3, 0))
self.branch7x7dbl_1 = BasicConv2d(in_channels, c7, kernel_size=1)
self.branch7x7dbl_2 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0))
self.branch7x7dbl_3 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3))
self.branch7x7dbl_4 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0))
self.branch7x7dbl_5 = BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3))
self.branch_pool = nn.Sequential(
nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
BasicConv2d(in_channels, 192, kernel_size=1)
)
def forward(self, x):
outputs = [self.branch1x1(x), self.branch7x7_3(self.branch7x7_2(self.branch7x7_1(x))),
self.branch7x7dbl_5(self.branch7x7dbl_4(self.branch7x7dbl_3(self.branch7x7dbl_2(self.branch7x7dbl_1(x))))), self.branch_pool(x)]
return torch.cat(outputs, 1)
# Inception-C 模块:用于网络深层(更激进的并行非对称拆解)
class InceptionC(nn.Module):
def __init__(self, in_channels):
super().__init__()
self.branch1x1 = BasicConv2d(in_channels, 320, kernel_size=1)
self.branch3x3_1 = BasicConv2d(in_channels, 384, kernel_size=1)
self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1))
self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0))
self.branch3x3dbl_1 = BasicConv2d(in_channels, 448, kernel_size=1)
self.branch3x3dbl_2 = BasicConv2d(448, 384, kernel_size=3, padding=1)
self.branch3x3dbl_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1))
self.branch3x3dbl_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0))
self.branch_pool = nn.Sequential(
nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
BasicConv2d(in_channels, 192, kernel_size=1)
)
def forward(self, x):
out1 = self.branch1x1(x)
out2_init = self.branch3x3_1(x)
out2 = torch.cat([self.branch3x3_2a(out2_init), self.branch3x3_2b(out2_init)], 1)
out3_init = self.branch3x3dbl_2(self.branch3x3dbl_1(x))
out3 = torch.cat([self.branch3x3dbl_3a(out3_init), self.branch3x3dbl_3b(out3_init)], 1)
out4 = self.branch_pool(x)
return torch.cat([out1, out2, out3, out4], 1)
1.2 尺寸缩减(Reduction)与辅助分类模块搭建
为了避免下采样时产生池化瓶颈,v3 引入了专门的 Reduction 模块来进行高效率的特征图尺寸减半与通道倍增。同时,为了加速中层梯度的回传,模型提供了辅助分类器(AuxLogits)。
python
# Reduction-A 模块:高效率网格缩减(尺寸减半,通道扩张)
class ReductionA(nn.Module):
def __init__(self, in_channels, k, l, m, n):
super().__init__()
self.branch3x3 = BasicConv2d(in_channels, k, kernel_size=3, stride=2)
self.branch3x3dbl_1 = BasicConv2d(in_channels, l, kernel_size=1)
self.branch3x3dbl_2 = BasicConv2d(l, m, kernel_size=3, padding=1)
self.branch3x3dbl_3 = BasicConv2d(m, n, kernel_size=3, stride=2)
self.branch_pool = nn.MaxPool2d(kernel_size=3, stride=2)
def forward(self, x):
return torch.cat([self.branch3x3(x), self.branch3x3dbl_3(self.branch3x3dbl_2(self.branch3x3dbl_1(x))), self.branch_pool(x)], 1)
# 辅助分类器模块 (AuxLogits)
class InceptionAux(nn.Module):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv0 = BasicConv2d(in_channels, 128, kernel_size=1)
self.conv1 = BasicConv2d(128, 768, kernel_size=5)
self.conv1.stddev = 0.01
self.fc = nn.Linear(768, num_classes)
self.fc.stddev = 0.001
def forward(self, x):
x = F.avg_pool2d(x, kernel_size=5, stride=3)
x = self.conv0(x)
x = self.conv1(x)
x = F.adaptive_avg_pool2d(x, (1, 1))
x = torch.flatten(x, 1)
x = self.fc(x)
return x
1.3 组装完整Inception v3网络与模拟运行测试
现在将前面准备的所有"积木块"按照结构图组装成最终的完整模型。Inception v3 的标准图像输入尺寸为 299x299,这与 v1 版本的 224x224 有明显的不同。
python
class InceptionV3(nn.Module):
def __init__(self, num_classes=1000, aux_logits=True):
super().__init__()
self.aux_logits = aux_logits
# Stem部分(浅层常规特征提取)
self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, stride=2)
self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3)
self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1)
self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3)
# 阶段 1: 堆叠 InceptionA
self.Mixed_5b = InceptionA(192, pool_features=32)
self.Mixed_5c = InceptionA(256, pool_features=64)
self.Mixed_5d = InceptionA(288, pool_features=64)
# 尺寸缩减 1
self.Mixed_6a = ReductionA(288, k=384, l=64, m=96, n=96)
# 阶段 2: 堆叠 InceptionB
self.Mixed_6b = InceptionB(768, channels_7x7=128)
self.Mixed_6c = InceptionB(768, channels_7x7=160)
self.Mixed_6d = InceptionB(768, channels_7x7=160)
self.Mixed_6e = InceptionB(768, channels_7x7=192)
# 辅助分类器输入点
if aux_logits:
self.AuxLogits = InceptionAux(768, num_classes)
# 阶段 3: 堆叠 InceptionC
# 简略合并下采样过渡
self.Mixed_7a = ReductionA(768, k=192, l=192, m=256, n=256)
self.Mixed_7b = InceptionC(1280)
self.Mixed_7c = InceptionC(2048)
# 主分类头
self.fc = nn.Linear(2048, num_classes)
def forward(self, x):
# Stem
x = self.Conv2d_1a_3x3(x)
x = self.Conv2d_2a_3x3(x)
x = self.Conv2d_2b_3x3(x)
x = F.max_pool2d(x, kernel_size=3, stride=2)
x = self.Conv2d_3b_1x1(x)
x = self.Conv2d_4a_3x3(x)
x = F.max_pool2d(x, kernel_size=3, stride=2)
# InceptionA 阶段
x = self.Mixed_5b(x)
x = self.Mixed_5c(x)
x = self.Mixed_5d(x)
# 缩减与 InceptionB 阶段
x = self.Mixed_6a(x)
x = self.Mixed_6b(x)
x = self.Mixed_6c(x)
x = self.Mixed_6d(x)
x = self.Mixed_6e(x)
# 提取辅助训练分支
if self.training and self.aux_logits:
aux = self.AuxLogits(x)
# 缩减与 InceptionC 阶段
x = self.Mixed_7a(x)
x = self.Mixed_7b(x)
x = self.Mixed_7c(x)
# 全局平均池化
x = F.adaptive_avg_pool2d(x, (1, 1))
x = F.dropout(x, training=self.training)
x = torch.flatten(x, 1)
x = self.fc(x)
if self.training and self.aux_logits:
return x, aux
return x
# 模拟前向传播测试结构
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = InceptionV3(num_classes=4, aux_logits=False).to(device) # 以4分类天气识别为例
print("模拟前向传播,输出模型层级详情")
summary(model, input_size=(3, 299, 299))
二、实战-完成天气识别案例
2.1 数据加载与预处理 (适配 v3 尺寸)
python
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings("ignore")
# 1. 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"当前计算节点已接入: {device}")
data_dir = './data'
# 2. Inception v3 专属数据预处理
transform = transforms.Compose([
transforms.Resize((299, 299)), # 关键点:v3 必须强转为 299x299
transforms.RandomHorizontalFlip(), # 随机翻转一下云朵或雨丝的朝向,增加泛化
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 3. 数据集读取与划分 (8:2)
try:
full_ds = datasets.ImageFolder(root=data_dir, transform=transform)
train_size = int(0.8 * len(full_ds))
test_size = len(full_ds) - train_size
train_ds, test_ds = random_split(full_ds, [train_size, test_size])
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=False)
num_classes = len(full_ds.classes)
print(f"天气数据已成功载入,共 {num_classes} 种天气: {full_ds.classes}")
except Exception as e:
print(f"找不到天气文件夹,错误信息: {e}")
exit()
2.2 带有辅助分支的特制训练循环
为节省手打几百行网络结构的精力,实际工程中通常直接调用 PyTorch 官方封装好的 Inception v3 模型结构,只需要修改最后的分类数量即可。
python
from torchvision import models
# 调取官方 Inception v3 骨架
model = models.inception_v3(pretrained=False, aux_logits=True)
# 修改主分类头和辅助分类头,适配我们的天气类别数
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, num_classes)
model = model.to(device)
# 损失函数与优化器
loss_fn = nn.CrossEntropyLoss()
# Inception v3 结构较深,Adam 优化器比较适合快速收敛
optimizer = optim.Adam(model.parameters(), lr=0.0001)
# 专属训练函数 (包含 AuxLogits 处理)
def train_v3(dataloader, model, loss_fn, optimizer):
model.train() # 开启训练模式
for x, y in dataloader:
x, y = x.to(device), y.to(device)
# 训练模式下,v3 会返回主输出和辅助输出
outputs, aux_outputs = model(x)
# 计算主损失和辅助损失
loss1 = loss_fn(outputs, y)
loss2 = loss_fn(aux_outputs, y)
# 总损失 = 主损失 + 0.4 * 辅助损失 (官方推荐权重)
loss = loss1 + 0.4 * loss2
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 专属测试函数
def test_v3(dataloader, model, loss_fn):
model.eval() # 开启验证模式
test_loss, correct = 0, 0
with torch.no_grad():
for x, y in dataloader:
x, y = x.to(device), y.to(device)
# 验证模式下,v3 自动关闭辅助分支,只返回一个预测结果
pred = model(x)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
return test_loss / len(dataloader), correct / len(dataloader.dataset)
2.3 启动引擎与结果可视化
python
print("开始天气识别训练")
epochs = 20
train_acc_hist, test_acc_hist = [], []
for epoch in range(epochs):
train_v3(train_dl, model, loss_fn, optimizer)
_, train_acc = test_v3(train_dl, model, loss_fn)
_, test_acc = test_v3(test_dl, model, loss_fn)
train_acc_hist.append(train_acc)
test_acc_hist.append(test_acc)
print(f"Epoch {epoch+1:02d}/{epochs} | 训练集准确率: {train_acc:.1%} | 验证集准确率: {test_acc:.1%}")
print("所有天气观测任务计算完毕")
# 画出准确率曲线
plt.figure(figsize=(8, 5))
plt.plot(train_acc_hist, label='Train Accuracy', marker='o')
plt.plot(test_acc_hist, label='Test Accuracy', marker='x')
plt.title('Inception v3 - Weather Classification Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

三、总结
3.1 核心改良:大卷积核的"非对称分解"(Factorized Convolutions)
在 Inception v1 中,网络通过组合 3×3 和 5×5 的卷积来实现多尺度观测。但在 v3 中,研究人员发现大尺寸卷积核的计算冗余过高,并提出了两种极其精妙的拆解手段:
对称分解 (5×5→2×3×3):在 Inception-A 中,一个 5×5 的卷积被两个并联串连的 3×3 卷积代替。它们的感受野完全一致,但参数量从 5×5=25 骤降到了 2×(3×3)=18,降低了近 28%。
非对称分解 (3×3→1×3+3×1):在 Inception-B 和 C 中,更激进地将一个 3×3 卷积核拆解为一个 1×3 卷积紧跟一个 3×1 卷积。通过在空间维度上做一维可分离计算,不仅使网络对横向和纵向的边界纹理(如天气识别中的条状雨丝、地平线云层)更加敏感,还使计算开销进一步压缩。
3.2 模块的多样性定制(针对不同层级的 A / B / C 设计)
上周我们使用同一种 Inception 模块堆叠到底,而 v3 做到了因地制宜:
Inception-A:安置在网络的低层(输入尺寸为 35×35 左右时),主要职责是用常规小卷积密集捕获小局部的图像元素。
Inception-B:安置在网络的中层(输入尺寸降为 17×17 时代),开始运用非对称的 1×7 与 7×1 卷积来拉长观测纵深,提取长宽跨度大的语义信息。
Inception-C:安置在网络的高层(输入尺寸极小,仅为 8×8 左右时),由于特征已经极度抽象,各个分支内部采用平行的 1×3 和 3×1 进行二次拓宽,实现特征表达能力的最后冲刺。
3.3 在本期"天气识别案例"中的实战考核
在这周的天气识别任务中,Inception v3 有着天然的优势:
对天空大背景的多尺度捕捉:云层的漫延通常范围很大(需要大感受野),而垂直的落雨或局部地面的雾气往往属于细长型的几何特征。Inception-B 和 C 模块中的非对称长条形卷积 (1×7,1×3),可以完美契合这种气象特征的天然方向性。
输入尺寸升级带来的清晰度优势:输入从 224 提升到 299,意味着图像可以保留更加细微的气象颗粒纹理(例如细微的冰雹、轻微的薄雾层)。需要注意,在用 DataLoader 写天气预处理时,一定要把 transforms.Resize 的目标参数改成 (299, 299),否则在网络运行到中间层时会导致矩阵尺寸对不齐而引发崩溃。