一、前期准备
python
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings("ignore")
# 设置计算设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"当前使用的计算设备是: {device}")
# 设定数据集路径
data_dir = './data'
# 图像预处理与标准归一化
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(), # 数据增强:随机水平翻转,防止过拟合
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 加载猴痘数据集并划分
try:
full_ds = datasets.ImageFolder(root=data_dir, transform=transform)
train_size = int(0.8 * len(full_ds))
test_size = len(full_ds) - train_size
train_ds, test_ds = random_split(full_ds, [train_size, test_size])
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=False)
num_classes = len(full_ds.classes)
print(f"数据加载成功,一共有 {num_classes} 个类别: {full_ds.classes}")
except Exception as e:
print(f"未能正确加载文件夹 '{data_dir}',错误信息: {e}")
exit()
二、 搭建 Inception v1 主干网络
python
# 基础卷积单元 (Conv + BN + ReLU)
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super().__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
return self.relu(self.bn(self.conv(x)))
# 核心多尺度并行特征提取模块 (Inception Block)
class Inception_block(nn.Module):
def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
super().__init__()
# 分支1: 1x1 卷积
self.branch1 = BasicConv2d(in_channels, out_1x1, kernel_size=1)
# 分支2: 1x1 降维 -> 3x3 卷积
self.branch2 = nn.Sequential(
BasicConv2d(in_channels, red_3x3, kernel_size=1),
BasicConv2d(red_3x3, out_3x3, kernel_size=3, padding=1)
)
# 分支3: 1x1 降维 -> 5x5 卷积
self.branch3 = nn.Sequential(
BasicConv2d(in_channels, red_5x5, kernel_size=1),
BasicConv2d(red_5x5, out_5x5, kernel_size=5, padding=2)
)
# 分支4: 3x3 最大池化 -> 1x1 卷积
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
BasicConv2d(in_channels, out_1x1pool, kernel_size=1)
)
def forward(self, x):
b1 = self.branch1(x)
b2 = self.branch2(x)
b3 = self.branch3(x)
b4 = self.branch4(x)
# 在通道维度上进行拼接
return torch.cat([b1, b2, b3, b4], dim=1)
# 组装完整的 Inception v1 网络结构
class InceptionV1_Classifier(nn.Module):
def __init__(self, num_classes):
super().__init__()
# 浅层特征提取
self.stem = nn.Sequential(
BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
BasicConv2d(64, 64, kernel_size=1),
BasicConv2d(64, 192, kernel_size=3, padding=1),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
# Inception 堆叠阶段
self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32) # 输出通道 256
self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64) # 输出通道 480
self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64) # 输出通道 512
self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64) # 输出通道 512
self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64) # 输出通道 512
self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64) # 输出通道 528
self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128)# 输出通道 832
self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)# 输出通道 832
self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)# 输出通道 1024
# 全局平均池化与最终分类输出
self.classifier = nn.Sequential(
nn.AdaptiveAvgPool2d((1, 1)),
nn.Dropout(0.4),
nn.Flatten(),
nn.Linear(1024, num_classes)
)
def forward(self, x):
x = self.stem(x)
x = self.maxpool3(self.inception3b(self.inception3a(x)))
x = self.maxpool4(self.inception4e(self.inception4d(self.inception4c(self.inception4b(self.inception4a(x))))))
x = self.inception5b(self.inception5a(x))
return self.classifier(x)
# 初始化模型并推送到指定设备
model = InceptionV1_Classifier(num_classes=num_classes).to(device)
三、 模型训练与测试阶段
python
loss_fn = nn.CrossEntropyLoss()
# 采用 Adam 优化器,并使用较小的学习率进行精细调节
optimizer = optim.Adam(model.parameters(), lr=0.0003)
def train(dataloader, model, loss_fn, optimizer):
model.train()
for x, y in dataloader:
x, y = x.to(device), y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
def test(dataloader, model, loss_fn):
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for x, y in dataloader:
x, y = x.to(device), y.to(device)
pred = model(x)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
return test_loss / len(dataloader), correct / len(dataloader.dataset)
print("开始训练基于 Inception v1 的猴痘识别模型...")
epochs = 20
train_acc_hist, test_acc_hist = [], []
train_loss_hist, test_loss_hist = [], []
for epoch in range(epochs):
train(train_dl, model, loss_fn, optimizer)
train_loss, train_acc = test(train_dl, model, loss_fn)
test_loss, test_acc = test(test_dl, model, loss_fn)
train_acc_hist.append(train_acc)
test_acc_hist.append(test_acc)
train_loss_hist.append(train_loss)
test_loss_hist.append(test_loss)
print(f"Epoch {epoch+1:02d}/{epochs}: Train Acc {train_acc:.1%}, Test Acc {test_acc:.1%}")
print("猴痘识别模型训练完成")
四、结果可视化
python
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_acc_hist, label='Train')
plt.plot(test_acc_hist, label='Test')
plt.legend()
plt.title('Accuracy Curve')
plt.subplot(1, 2, 2)
plt.plot(train_loss_hist, label='Train')
plt.plot(test_loss_hist, label='Test')
plt.legend()
plt.title('Loss Curve')
plt.show()

五、 本周课程核心知识点总结
5.1 Inception v1 相关知识
如果把传统的神经网络比作一个只能单线程工作、死磕到底的修理工,那Inception v1就是一个拥有不同工具、能同时开工的专家团队。
多尺度并行观测
在这个网络里,最关键的结构叫"Inception模块"。当一张图片传到这个模块时,它不会只用一种眼光去看,而是同时分出四条路:
用 1x1 的小视野去盯像素级的细节;
用 3x3 的中等视野去看局部的纹理;
用 5x5 的大视野去观察整体的轮廓;
再加一个池化层去提取最显眼的特征。
最后,它把这四种不同视野看到的结果拼接在一起(torch.cat)。
1x1 卷积
如果同时开动那么多大视野的观察,普通的电脑是会被庞大的计算量撑爆的。Inception极其聪明地在大视野观察前,加入了一个 1x1 的卷积层。它的作用就像是"压缩包",在保留关键信息的前提下,把厚厚的数据通道变薄(降维),极大减少了计算量。
5.2 Inception v1在本期猴痘病识别实验中的运用
这周的任务是让 AI 区分出健康的皮肤和感染了猴痘的皮肤。Inception v1 的特性,简直是为这种医学图像量身定制的:
猴痘在皮肤上的表现是非常复杂的。如果用传统的单一网络,很容易顾此失彼(看见了小疹子就忽略了整体泛红,看清了大溃烂就漏掉了边缘的水泡)。而 Inception 模块里 1x1、3x3、5x5 齐上阵的并行结构,让网络能够同时捕捉微小的独立疱疹和成片的大面积感染区,大大提高了诊断的准确率。
在网络的最后,Inception 抛弃了传统臃肿的全连接层,使用了"全局平均池化(Global Average Pooling)"的结构,把前面提取出的所有病理特征浓缩,最后只通过一个极小的分类器(输出维度等于我们数据集中设定的类别数,比如健康/猴痘两类),得出最终的诊断概率。既精准,又不容易因为过度学习某几张图片而产生"死板"的偏见(防止过拟合)。