DAY 40

DAY 40 简单 CNN

知识回顾:

  1. 数据增强

  2. 卷积神经网络定义的写法

  3. batch 归一化:调整一个批次的分布,常用与图像数据

  4. 特征图:只有卷积操作输出的才叫特征图

  5. 调度器:直接修改基础学习率

卷积操作常见流程如下:

  • 输入 → 卷积层 → Batch 归一化层(可选)→ 池化层 → 激活函数 → 下一层

  • Flatten → Dense (with Dropout, 可选) → Dense (Output)

作业:尝试手动修改不同的调度器和 CNN 的结构,观察训练的差异。

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# ---------------------- 1. 数据准备 ----------------------
# 数据预处理(CIFAR-10)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # CIFAR-10官方均值/标准差
])

# 加载数据集
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 设备选择
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 20  # 统一训练轮次,保证对比公平


# ---------------------- 2. 定义不同的CNN结构 ----------------------
# 原CNN(对照组)
class CNN_Base(nn.Module):
    def __init__(self):
        super(CNN_Base, self).__init__()
        # 卷积块1
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        # 卷积块2
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        # 卷积块3
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(2)
        # 全连接层
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool1(self.relu1(self.bn1(self.conv1(x))))
        x = self.pool2(self.relu2(self.bn2(self.conv2(x))))
        x = self.pool3(self.relu3(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 4 * 4)
        x = self.dropout(self.relu3(self.fc1(x)))
        x = self.fc2(x)
        return x


# 变体1:轻量化CNN(减少通道数、简化结构)
class CNN_Light(nn.Module):
    def __init__(self):
        super(CNN_Light, self).__init__()
        # 卷积块1(通道数减半)
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        # 卷积块2(减少1个卷积块)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        # 全连接层(神经元数减半)
        self.fc1 = nn.Linear(32 * 8 * 8, 256)  # 尺寸:32×8×8(因少了1次池化)
        self.dropout = nn.Dropout(0.3)  # 降低Dropout率
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool1(self.relu1(self.bn1(self.conv1(x))))  # 尺寸:16×16×16
        x = self.pool2(self.relu2(self.bn2(self.conv2(x))))  # 尺寸:32×8×8
        x = x.view(-1, 32 * 8 * 8)
        x = self.dropout(self.relu1(self.fc1(x)))
        x = self.fc2(x)
        return x


# 变体2:加深CNN(增加卷积块、提升通道数)
class CNN_Deep(nn.Module):
    def __init__(self):
        super(CNN_Deep, self).__init__()
        # 卷积块1
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        # 卷积块2
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        # 卷积块3
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(2)
        # 新增卷积块4
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.relu4 = nn.ReLU()
        self.pool4 = nn.MaxPool2d(2)  # 新增池化
        # 全连接层(适配新尺寸)
        self.fc1 = nn.Linear(256 * 2 * 2, 512)  # 尺寸:256×2×2(多1次池化)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool1(self.relu1(self.bn1(self.conv1(x))))
        x = self.pool2(self.relu2(self.bn2(self.conv2(x))))
        x = self.pool3(self.relu3(self.bn3(self.conv3(x))))
        x = self.pool4(self.relu4(self.bn4(self.conv4(x))))  # 新增卷积块处理
        x = x.view(-1, 256 * 2 * 2)
        x = self.dropout(self.relu3(self.fc1(x)))
        x = self.fc2(x)
        return x


# ---------------------- 3. 训练函数 ----------------------
def train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs):
    model.train()
    all_iter_losses = []
    iter_indices = []
    train_acc_history = []
    test_acc_history = []
    train_loss_history = []
    test_loss_history = []

    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            # 记录损失
            iter_loss = loss.item()
            all_iter_losses.append(iter_loss)
            iter_indices.append(epoch * len(train_loader) + batch_idx + 1)

            # 统计训练指标
            running_loss += iter_loss
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()

            if (batch_idx + 1) % 100 == 0:
                print(f'Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
                      f'| 单Batch损失: {iter_loss:.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}')

        # 训练指标统计
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = 100. * correct / total
        train_acc_history.append(epoch_train_acc)
        train_loss_history.append(epoch_train_loss)

        # 测试阶段
        model.eval()
        test_loss = 0
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                _, predicted = output.max(1)
                total_test += target.size(0)
                correct_test += predicted.eq(target).sum().item()
        epoch_test_loss = test_loss / len(test_loader)
        epoch_test_acc = 100. * correct_test / total_test
        test_acc_history.append(epoch_test_acc)
        test_loss_history.append(epoch_test_loss)

        # 更新学习率(不同调度器的step方式一致)
        scheduler.step(epoch_test_loss)

        print(f'Epoch {epoch+1}/{epochs} 完成 | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%')

    # 绘图
    plot_iter_losses(all_iter_losses, iter_indices)
    plot_epoch_metrics(train_acc_history, test_acc_history, train_loss_history, test_loss_history)
    return epoch_test_acc


# ---------------------- 4. 绘图函数 ----------------------
def plot_iter_losses(losses, indices):
    plt.figure(figsize=(10, 4))
    plt.plot(indices, losses, 'b-', alpha=0.7, label='Iteration Loss')
    plt.xlabel('Iteration(Batch序号)')
    plt.ylabel('损失值')
    plt.title('每个 Iteration 的训练损失')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss):
    epochs = range(1, len(train_acc) + 1)
    plt.figure(figsize=(12, 4))
    # 准确率
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_acc, 'b-', label='训练准确率')
    plt.plot(epochs, test_acc, 'r-', label='测试准确率')
    plt.xlabel('Epoch')
    plt.ylabel('准确率 (%)')
    plt.title('训练和测试准确率')
    plt.legend()
    plt.grid(True)
    # 损失
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_loss, 'b-', label='训练损失')
    plt.plot(epochs, test_loss, 'r-', label='测试损失')
    plt.xlabel('Epoch')
    plt.ylabel('损失值')
    plt.title('训练和测试损失')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


# ---------------------- 5. 多个对比实验 ----------------------
criterion = nn.CrossEntropyLoss()  # 统一损失函数

# 实验1:原CNN + ReduceLROnPlateau(对照组)
print("\n===== 实验1:原CNN + ReduceLROnPlateau =====")
model1 = CNN_Base().to(device)
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
scheduler1 = optim.lr_scheduler.ReduceLROnPlateau(optimizer1, mode='min', patience=3, factor=0.5)
acc1 = train(model1, train_loader, test_loader, criterion, optimizer1, scheduler1, device, epochs)


# 实验2:原CNN + StepLR(仅改调度器)
print("\n===== 实验2:原CNN + StepLR =====")
model2 = CNN_Base().to(device)
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)
scheduler2 = optim.lr_scheduler.StepLR(optimizer2, step_size=5, gamma=0.5)  # 每5个epoch降50%
acc2 = train(model2, train_loader, test_loader, criterion, optimizer2, scheduler2, device, epochs)


# 实验3:轻量化CNN + ReduceLROnPlateau(仅改结构)
print("\n===== 实验3:轻量化CNN + ReduceLROnPlateau =====")
model3 = CNN_Light().to(device)
optimizer3 = optim.Adam(model3.parameters(), lr=0.001)
scheduler3 = optim.lr_scheduler.ReduceLROnPlateau(optimizer3, mode='min', patience=3, factor=0.5)
acc3 = train(model3, train_loader, test_loader, criterion, optimizer3, scheduler3, device, epochs)


# 实验4:加深CNN + CosineAnnealingLR(改结构+调度器)
print("\n===== 实验4:加深CNN + CosineAnnealingLR =====")
model4 = CNN_Deep().to(device)
optimizer4 = optim.Adam(model4.parameters(), lr=0.001)
scheduler4 = optim.lr_scheduler.CosineAnnealingLR(optimizer4, T_max=10)  # 周期10个epoch
acc4 = train(model4, train_loader, test_loader, criterion, optimizer4, scheduler4, device, epochs)


# 打印最终结果对比
print("\n===== 所有实验最终测试准确率 =====")
print(f"实验1(原CNN+ReduceLROnPlateau):{acc1:.2f}%")
print(f"实验2(原CNN+StepLR):{acc2:.2f}%")
print(f"实验3(轻量化CNN+ReduceLROnPlateau):{acc3:.2f}%")
print(f"实验4(加深CNN+CosineAnnealingLR):{acc4:.2f}%")

@浙大疏锦行

相关推荐
LaughingZhu8 分钟前
Product Hunt 每日热榜 | 2026-01-12
人工智能·经验分享·深度学习·神经网络·产品运营
美团技术团队12 分钟前
AAAI 2026 | 美团技术团队学术论文精选
人工智能
不如自挂东南吱14 分钟前
空间相关性 和 怎么捕捉空间相关性
人工智能·深度学习·算法·机器学习·时序数据库
xiaozhazha_15 分钟前
2026 新规落地,金融级远程会议软件选型:快鹭会议AI 与合规技术双驱动
人工智能·金融
小鸡吃米…24 分钟前
机器学习中的简单线性回归
人工智能·机器学习·线性回归
程途拾光15835 分钟前
中文界面跨职能泳道图制作教程 PC
大数据·论文阅读·人工智能·信息可视化·流程图
长颈鹿仙女37 分钟前
深度学习详解拟合,过拟合,欠拟合
人工智能·深度学习
CORNERSTONE36540 分钟前
智能制造为什么要实现EMS和MES的集成
大数据·人工智能·制造
weixin_6688986444 分钟前
Ascend LlamaFactory微调书生模型
人工智能
全栈技术负责人1 小时前
AI驱动开发 (AI-DLC) 实战经验分享:重构人机协作的上下文工程
人工智能·重构