Day44 简单CNN - 技术栈

知识回顾

数据增强
卷积神经网络定义的写法
batch 归一化：调整一个批次的分布，常用于图像数据
特征图：只有卷积操作输出的才叫特征图
调度器：直接修改基础学习率
作业：
python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np


# 1. 训练基础参数
batch_size = 64
epochs = 20
lr = 0.01
weight_decay = 5e-4  # 权重衰减，防止过拟合

# 2. CNN结构可配置参数（修改这里即可调整CNN结构）
conv_channels = [32, 64, 128]  # 各卷积块输出通道数，改长度=改卷积块数量，改数值=改通道数
kernel_size = 3                # 卷积核尺寸
padding = 1                    # 卷积填充
pool_kernel = 2                # 池化核尺寸
dropout_p = 0.5                # Dropout概率
fc_hidden = 512                # 全连接层隐藏神经元数

# 3. 优化器选择：可选 "SGD" / "AdamW"
optimizer_type = "SGD"

# 4. 调度器选择：可选 "StepLR" / "ReduceLROnPlateau" / "CosineAnnealingLR" / "ExponentialLR"
scheduler_type = "ReduceLROnPlateau"
# 各调度器对应参数（无需修改的调度器，参数留空即可）
scheduler_params = {
    "StepLR": {"step_size": 5, "gamma": 0.5},        # 每5轮学习率×0.5
    "ReduceLROnPlateau": {"mode": "min", "factor": 0.5, "patience": 2},  # 验证损失2轮不降则×0.5
    "CosineAnnealingLR": {"T_max": epochs},          # 余弦退火，周期=训练总轮次
    "ExponentialLR": {"gamma": 0.95}                 # 每轮学习率×0.95
}
# ======================================================================

# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Zen Hei", "Heiti TC"]
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

# 检查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
print(f"当前配置：优化器={optimizer_type} | 调度器={scheduler_type} | 卷积通道={conv_channels} | Dropout={dropout_p}")

# 1. 数据预处理（保持原增强策略，保证对比公平性）
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# 2. 加载CIFAR-10数据集
train_dataset = datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform
)
test_dataset = datasets.CIFAR10(
    root='./data', train=False, transform=test_transform
)

# 3. 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
classes = ('飞机', '汽车', '鸟', '猫', '鹿', '狗', '青蛙', '马', '船', '卡车')

# 4. 可配置CNN模型（自动计算特征维度，改conv_channels无需手动调整全连接层）
class ConfigurableCNN(nn.Module):
    def __init__(self, in_channels=3, num_classes=10, conv_channels=[32,64,128],
                 kernel_size=3, padding=1, pool_kernel=2, dropout_p=0.5, fc_hidden=512):
        super(ConfigurableCNN, self).__init__()
        self.conv_layers = nn.Sequential()  # 卷积层容器，自动构建
        self.bn_layers = nn.Sequential()    # 批量归一化容器
        self.pool = nn.MaxPool2d(kernel_size=pool_kernel, stride=pool_kernel)
        self.relu = nn.ReLU(inplace=True)   # 原地激活，节省内存
        self.dropout = nn.Dropout(p=dropout_p)
        
        # 自动构建卷积块（根据conv_channels长度，动态生成N个卷积块）
        prev_channels = in_channels
        for i, curr_channels in enumerate(conv_channels):
            # 卷积层：prev_channels→curr_channels，保持尺寸（padding=1）
            self.conv_layers.add_module(f'conv{i+1}', nn.Conv2d(
                prev_channels, curr_channels, kernel_size, padding=padding
            ))
            # 批量归一化层
            self.bn_layers.add_module(f'bn{i+1}', nn.BatchNorm2d(curr_channels))
            prev_channels = curr_channels  # 更新前一通道数
        
        # 自动计算卷积层输出的特征维度（避免硬编码，适配任意conv_channels/池化配置）
        with torch.no_grad():
            dummy_input = torch.randn(1, in_channels, 32, 32)  # 虚拟输入（1,3,32,32）
            conv_output = self._forward_conv(dummy_input)
            self.flatten_dim = conv_output.numel()  # 展平后的总维度
        
        # 全连接层（分类器）
        self.fc_layers = nn.Sequential(
            nn.Linear(self.flatten_dim, fc_hidden),
            nn.ReLU(inplace=True),
            self.dropout,
            nn.Linear(fc_hidden, num_classes)
        )

    # 单独的卷积层前向传播（用于计算特征维度+主前向）
    def _forward_conv(self, x):
        for conv, bn in zip(self.conv_layers, self.bn_layers):
            x = self.relu(bn(conv(x)))  # 卷积→BN→激活（标准顺序，比激活→BN效果更好）
            x = self.pool(x)            # 每个卷积块后接池化，尺寸减半
        return x

    def forward(self, x):
        x = self._forward_conv(x)      # 卷积层处理
        x = x.view(x.size(0), -1)      # 展平：[batch, C, H, W] → [batch, C*H*W]
        x = self.fc_layers(x)          # 全连接层分类
        return x

# 5. 初始化模型、损失函数、优化器、调度器
# 初始化可配置CNN
model = ConfigurableCNN(
    conv_channels=conv_channels,
    kernel_size=kernel_size,
    padding=padding,
    pool_kernel=pool_kernel,
    dropout_p=dropout_p,
    fc_hidden=fc_hidden
).to(device)

# 损失函数（交叉熵损失，适配CIFAR-10分类）
criterion = nn.CrossEntropyLoss()

# 优化器（支持SGD/AdamW，带权重衰减）
if optimizer_type == "SGD":
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
elif optimizer_type == "AdamW":
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
else:
    raise ValueError(f"不支持的优化器：{optimizer_type}，可选 SGD / AdamW")

# 调度器（支持4种主流调度器，自动匹配参数）
if scheduler_type == "StepLR":
    scheduler = optim.lr_scheduler.StepLR(optimizer, **scheduler_params["StepLR"])
elif scheduler_type == "ReduceLROnPlateau":
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, **scheduler_params["ReduceLROnPlateau"])
elif scheduler_type == "CosineAnnealingLR":
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, **scheduler_params["CosineAnnealingLR"])
elif scheduler_type == "ExponentialLR":
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, **scheduler_params["ExponentialLR"])
else:
    raise ValueError(f"不支持的调度器：{scheduler_type}，可选 StepLR / ReduceLROnPlateau / CosineAnnealingLR / ExponentialLR")

# 6. 训练模型（记录iteration损失+epoch准确率/损失，适配所有调度器）
def train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs):
    model.train()
    # 记录训练过程数据
    all_iter_losses = []  # 所有batch的损失
    iter_indices = []     # batch序号
    train_acc_history = []  # 每轮训练准确率
    test_acc_history = []   # 每轮测试准确率
    train_loss_history = [] # 每轮训练平均损失
    test_loss_history = []  # 每轮测试平均损失

    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        # 训练轮次
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()  # 梯度清零
            output = model(data)   # 前向传播
            loss = criterion(output, target)  # 计算损失
            loss.backward()        # 反向传播
            optimizer.step()       # 更新参数

            # 记录当前batch的损失和序号
            all_iter_losses.append(loss.item())
            iter_indices.append(epoch * len(train_loader) + batch_idx + 1)

            # 统计训练准确率和累计损失
            running_loss += loss.item()
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()

            # 每100个batch打印日志
            if (batch_idx + 1) % 100 == 0:
                print(f'Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
                      f'| 单Batch损失: {loss.item():.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}')

        # 计算当前epoch的训练指标
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = 100. * correct / total
        train_acc_history.append(epoch_train_acc)
        train_loss_history.append(epoch_train_loss)

        # 测试轮次（评估模式，关闭Dropout/BN更新）
        model.eval()
        test_loss = 0
        correct_test = 0
        total_test = 0
        with torch.no_grad():  # 关闭梯度计算，节省内存和时间
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                _, predicted = output.max(1)
                total_test += target.size(0)
                correct_test += predicted.eq(target).sum().item()

        # 计算当前epoch的测试指标
        epoch_test_loss = test_loss / len(test_loader)
        epoch_test_acc = 100. * correct_test / total_test
        test_acc_history.append(epoch_test_acc)
        test_loss_history.append(epoch_test_loss)

        # 调度器步长更新（适配不同调度器：部分需要测试损失，部分直接更新）
        if scheduler_type == "ReduceLROnPlateau":
            scheduler.step(epoch_test_loss)  # 按需衰减：基于验证损失
        else:
            scheduler.step()  # 固定策略衰减：StepLR/CosineAnnealingLR/ExponentialLR

        # 打印每轮训练结果
        print(f'\nEpoch {epoch+1}/{epochs} 完成 | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%')
        print(f'训练损失: {epoch_train_loss:.4f} | 测试损失: {epoch_test_loss:.4f}\n')

        model.train()  # 切回训练模式

    # 绘制训练曲线
    plot_iter_losses(all_iter_losses, iter_indices)
    plot_epoch_metrics(train_acc_history, test_acc_history, train_loss_history, test_loss_history)

    return epoch_test_acc  # 返回最终测试准确率

# 7. 绘制每个iteration的损失曲线
def plot_iter_losses(losses, indices):
    plt.figure(figsize=(12, 4))
    plt.plot(indices, losses, 'b-', alpha=0.6, label='单Batch损失')
    # 绘制滑动平均损失（更清晰看趋势）
    window_size = 50
    if len(losses) >= window_size:
        smooth_loss = np.convolve(losses, np.ones(window_size)/window_size, mode='valid')
        smooth_indices = indices[window_size-1:]
        plt.plot(smooth_indices, smooth_loss, 'r-', linewidth=2, label=f'{window_size}步滑动平均损失')
    plt.xlabel('Iteration（Batch序号）')
    plt.ylabel('损失值')
    plt.title(f'训练损失变化（调度器：{scheduler_type} | 卷积通道：{conv_channels}）')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

# 8. 绘制每个epoch的准确率和损失曲线
def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss):
    epochs = range(1, len(train_acc) + 1)
    plt.figure(figsize=(14, 5))

    # 准确率曲线
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_acc, 'b-', linewidth=2, label='训练准确率')
    plt.plot(epochs, test_acc, 'r-', linewidth=2, label='测试准确率')
    plt.xlabel('Epoch（训练轮次）')
    plt.ylabel('准确率 (%)')
    plt.title('训练/测试准确率变化')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.ylim(0, 100)  # 准确率范围0-100%

    # 损失曲线
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_loss, 'b-', linewidth=2, label='训练损失')
    plt.plot(epochs, test_loss, 'r-', linewidth=2, label='测试损失')
    plt.xlabel('Epoch（训练轮次）')
    plt.ylabel('损失值')
    plt.title('训练/测试损失变化')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.suptitle(f'训练指标汇总（优化器：{optimizer_type} | 调度器：{scheduler_type}）', fontsize=14)
    plt.tight_layout()
    plt.show()

# 9. 执行训练和测试
if __name__ == '__main__':
    print("="*50)
    print("开始训练CIFAR-10分类模型...")
    print("="*50)
    final_accuracy = train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs)
    print("="*50)
    print(f"训练完成！最终测试准确率: {final_accuracy:.2f}%")
    print("="*50)
    # 可选保存模型
    # torch.save(model.state_dict(), f'cifar10_cnn_{scheduler_type}_{conv_channels}.pth')
    # print(f"模型已保存为: cifar10_cnn_{scheduler_type}_{conv_channels}.pth")