知识回顾
- 数据增强
- 卷积神经网络定义的写法
- batch 归一化:调整一个批次的分布,常用于图像数据
- 特征图:只有卷积操作输出的才叫特征图
- 调度器:直接修改基础学习率
作业:
python
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
# 1. 训练基础参数
batch_size = 64
epochs = 20
lr = 0.01
weight_decay = 5e-4 # 权重衰减,防止过拟合
# 2. CNN结构可配置参数(修改这里即可调整CNN结构)
conv_channels = [32, 64, 128] # 各卷积块输出通道数,改长度=改卷积块数量,改数值=改通道数
kernel_size = 3 # 卷积核尺寸
padding = 1 # 卷积填充
pool_kernel = 2 # 池化核尺寸
dropout_p = 0.5 # Dropout概率
fc_hidden = 512 # 全连接层隐藏神经元数
# 3. 优化器选择:可选 "SGD" / "AdamW"
optimizer_type = "SGD"
# 4. 调度器选择:可选 "StepLR" / "ReduceLROnPlateau" / "CosineAnnealingLR" / "ExponentialLR"
scheduler_type = "ReduceLROnPlateau"
# 各调度器对应参数(无需修改的调度器,参数留空即可)
scheduler_params = {
"StepLR": {"step_size": 5, "gamma": 0.5}, # 每5轮学习率×0.5
"ReduceLROnPlateau": {"mode": "min", "factor": 0.5, "patience": 2}, # 验证损失2轮不降则×0.5
"CosineAnnealingLR": {"T_max": epochs}, # 余弦退火,周期=训练总轮次
"ExponentialLR": {"gamma": 0.95} # 每轮学习率×0.95
}
# ======================================================================
# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Zen Hei", "Heiti TC"]
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
# 检查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
print(f"当前配置:优化器={optimizer_type} | 调度器={scheduler_type} | 卷积通道={conv_channels} | Dropout={dropout_p}")
# 1. 数据预处理(保持原增强策略,保证对比公平性)
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
# 2. 加载CIFAR-10数据集
train_dataset = datasets.CIFAR10(
root='./data', train=True, download=True, transform=train_transform
)
test_dataset = datasets.CIFAR10(
root='./data', train=False, transform=test_transform
)
# 3. 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
classes = ('飞机', '汽车', '鸟', '猫', '鹿', '狗', '青蛙', '马', '船', '卡车')
# 4. 可配置CNN模型(自动计算特征维度,改conv_channels无需手动调整全连接层)
class ConfigurableCNN(nn.Module):
def __init__(self, in_channels=3, num_classes=10, conv_channels=[32,64,128],
kernel_size=3, padding=1, pool_kernel=2, dropout_p=0.5, fc_hidden=512):
super(ConfigurableCNN, self).__init__()
self.conv_layers = nn.Sequential() # 卷积层容器,自动构建
self.bn_layers = nn.Sequential() # 批量归一化容器
self.pool = nn.MaxPool2d(kernel_size=pool_kernel, stride=pool_kernel)
self.relu = nn.ReLU(inplace=True) # 原地激活,节省内存
self.dropout = nn.Dropout(p=dropout_p)
# 自动构建卷积块(根据conv_channels长度,动态生成N个卷积块)
prev_channels = in_channels
for i, curr_channels in enumerate(conv_channels):
# 卷积层:prev_channels→curr_channels,保持尺寸(padding=1)
self.conv_layers.add_module(f'conv{i+1}', nn.Conv2d(
prev_channels, curr_channels, kernel_size, padding=padding
))
# 批量归一化层
self.bn_layers.add_module(f'bn{i+1}', nn.BatchNorm2d(curr_channels))
prev_channels = curr_channels # 更新前一通道数
# 自动计算卷积层输出的特征维度(避免硬编码,适配任意conv_channels/池化配置)
with torch.no_grad():
dummy_input = torch.randn(1, in_channels, 32, 32) # 虚拟输入(1,3,32,32)
conv_output = self._forward_conv(dummy_input)
self.flatten_dim = conv_output.numel() # 展平后的总维度
# 全连接层(分类器)
self.fc_layers = nn.Sequential(
nn.Linear(self.flatten_dim, fc_hidden),
nn.ReLU(inplace=True),
self.dropout,
nn.Linear(fc_hidden, num_classes)
)
# 单独的卷积层前向传播(用于计算特征维度+主前向)
def _forward_conv(self, x):
for conv, bn in zip(self.conv_layers, self.bn_layers):
x = self.relu(bn(conv(x))) # 卷积→BN→激活(标准顺序,比激活→BN效果更好)
x = self.pool(x) # 每个卷积块后接池化,尺寸减半
return x
def forward(self, x):
x = self._forward_conv(x) # 卷积层处理
x = x.view(x.size(0), -1) # 展平:[batch, C, H, W] → [batch, C*H*W]
x = self.fc_layers(x) # 全连接层分类
return x
# 5. 初始化模型、损失函数、优化器、调度器
# 初始化可配置CNN
model = ConfigurableCNN(
conv_channels=conv_channels,
kernel_size=kernel_size,
padding=padding,
pool_kernel=pool_kernel,
dropout_p=dropout_p,
fc_hidden=fc_hidden
).to(device)
# 损失函数(交叉熵损失,适配CIFAR-10分类)
criterion = nn.CrossEntropyLoss()
# 优化器(支持SGD/AdamW,带权重衰减)
if optimizer_type == "SGD":
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
elif optimizer_type == "AdamW":
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
else:
raise ValueError(f"不支持的优化器:{optimizer_type},可选 SGD / AdamW")
# 调度器(支持4种主流调度器,自动匹配参数)
if scheduler_type == "StepLR":
scheduler = optim.lr_scheduler.StepLR(optimizer, **scheduler_params["StepLR"])
elif scheduler_type == "ReduceLROnPlateau":
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, **scheduler_params["ReduceLROnPlateau"])
elif scheduler_type == "CosineAnnealingLR":
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, **scheduler_params["CosineAnnealingLR"])
elif scheduler_type == "ExponentialLR":
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, **scheduler_params["ExponentialLR"])
else:
raise ValueError(f"不支持的调度器:{scheduler_type},可选 StepLR / ReduceLROnPlateau / CosineAnnealingLR / ExponentialLR")
# 6. 训练模型(记录iteration损失+epoch准确率/损失,适配所有调度器)
def train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs):
model.train()
# 记录训练过程数据
all_iter_losses = [] # 所有batch的损失
iter_indices = [] # batch序号
train_acc_history = [] # 每轮训练准确率
test_acc_history = [] # 每轮测试准确率
train_loss_history = [] # 每轮训练平均损失
test_loss_history = [] # 每轮测试平均损失
for epoch in range(epochs):
running_loss = 0.0
correct = 0
total = 0
# 训练轮次
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad() # 梯度清零
output = model(data) # 前向传播
loss = criterion(output, target) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 更新参数
# 记录当前batch的损失和序号
all_iter_losses.append(loss.item())
iter_indices.append(epoch * len(train_loader) + batch_idx + 1)
# 统计训练准确率和累计损失
running_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
# 每100个batch打印日志
if (batch_idx + 1) % 100 == 0:
print(f'Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
f'| 单Batch损失: {loss.item():.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}')
# 计算当前epoch的训练指标
epoch_train_loss = running_loss / len(train_loader)
epoch_train_acc = 100. * correct / total
train_acc_history.append(epoch_train_acc)
train_loss_history.append(epoch_train_loss)
# 测试轮次(评估模式,关闭Dropout/BN更新)
model.eval()
test_loss = 0
correct_test = 0
total_test = 0
with torch.no_grad(): # 关闭梯度计算,节省内存和时间
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
_, predicted = output.max(1)
total_test += target.size(0)
correct_test += predicted.eq(target).sum().item()
# 计算当前epoch的测试指标
epoch_test_loss = test_loss / len(test_loader)
epoch_test_acc = 100. * correct_test / total_test
test_acc_history.append(epoch_test_acc)
test_loss_history.append(epoch_test_loss)
# 调度器步长更新(适配不同调度器:部分需要测试损失,部分直接更新)
if scheduler_type == "ReduceLROnPlateau":
scheduler.step(epoch_test_loss) # 按需衰减:基于验证损失
else:
scheduler.step() # 固定策略衰减:StepLR/CosineAnnealingLR/ExponentialLR
# 打印每轮训练结果
print(f'\nEpoch {epoch+1}/{epochs} 完成 | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%')
print(f'训练损失: {epoch_train_loss:.4f} | 测试损失: {epoch_test_loss:.4f}\n')
model.train() # 切回训练模式
# 绘制训练曲线
plot_iter_losses(all_iter_losses, iter_indices)
plot_epoch_metrics(train_acc_history, test_acc_history, train_loss_history, test_loss_history)
return epoch_test_acc # 返回最终测试准确率
# 7. 绘制每个iteration的损失曲线
def plot_iter_losses(losses, indices):
plt.figure(figsize=(12, 4))
plt.plot(indices, losses, 'b-', alpha=0.6, label='单Batch损失')
# 绘制滑动平均损失(更清晰看趋势)
window_size = 50
if len(losses) >= window_size:
smooth_loss = np.convolve(losses, np.ones(window_size)/window_size, mode='valid')
smooth_indices = indices[window_size-1:]
plt.plot(smooth_indices, smooth_loss, 'r-', linewidth=2, label=f'{window_size}步滑动平均损失')
plt.xlabel('Iteration(Batch序号)')
plt.ylabel('损失值')
plt.title(f'训练损失变化(调度器:{scheduler_type} | 卷积通道:{conv_channels})')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# 8. 绘制每个epoch的准确率和损失曲线
def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss):
epochs = range(1, len(train_acc) + 1)
plt.figure(figsize=(14, 5))
# 准确率曲线
plt.subplot(1, 2, 1)
plt.plot(epochs, train_acc, 'b-', linewidth=2, label='训练准确率')
plt.plot(epochs, test_acc, 'r-', linewidth=2, label='测试准确率')
plt.xlabel('Epoch(训练轮次)')
plt.ylabel('准确率 (%)')
plt.title('训练/测试准确率变化')
plt.legend()
plt.grid(True, alpha=0.3)
plt.ylim(0, 100) # 准确率范围0-100%
# 损失曲线
plt.subplot(1, 2, 2)
plt.plot(epochs, train_loss, 'b-', linewidth=2, label='训练损失')
plt.plot(epochs, test_loss, 'r-', linewidth=2, label='测试损失')
plt.xlabel('Epoch(训练轮次)')
plt.ylabel('损失值')
plt.title('训练/测试损失变化')
plt.legend()
plt.grid(True, alpha=0.3)
plt.suptitle(f'训练指标汇总(优化器:{optimizer_type} | 调度器:{scheduler_type})', fontsize=14)
plt.tight_layout()
plt.show()
# 9. 执行训练和测试
if __name__ == '__main__':
print("="*50)
print("开始训练CIFAR-10分类模型...")
print("="*50)
final_accuracy = train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs)
print("="*50)
print(f"训练完成!最终测试准确率: {final_accuracy:.2f}%")
print("="*50)
# 可选保存模型
# torch.save(model.state_dict(), f'cifar10_cnn_{scheduler_type}_{conv_channels}.pth')
# print(f"模型已保存为: cifar10_cnn_{scheduler_type}_{conv_channels}.pth")