Day44

DAY 44 预训练模型+CBAM 模块

知识点回顾:

  1. resnet结构解析

  2. CBAM 放置位置的思考

  3. 针对预训练模型的训练策略

a. 差异化学学习率

b. 三阶段微调

作业:

  1. 好好理解下 resnet18 的模型结构

  2. 尝试对 vgg16+cbam 进行微调策略

python 复制代码
# ===================== 1. 环境准备与库导入 =====================
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import time

# 设置中文字体
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False

# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

# ===================== 2. CBAM模块定义(复用经典实现) =====================
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, ratio=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(in_channels, in_channels // ratio, bias=False),
            nn.ReLU(),
            nn.Linear(in_channels // ratio, in_channels, bias=False)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        b, c, h, w = x.shape
        avg_out = self.fc(self.avg_pool(x).view(b, c))
        max_out = self.fc(self.max_pool(x).view(b, c))
        attention = self.sigmoid(avg_out + max_out).view(b, c, 1, 1)
        return x * attention

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        pool_out = torch.cat([avg_out, max_out], dim=1)
        attention = self.conv(pool_out)
        return x * self.sigmoid(attention)

class CBAM(nn.Module):
    def __init__(self, in_channels, ratio=16, kernel_size=7):
        super().__init__()
        self.channel_attn = ChannelAttention(in_channels, ratio)
        self.spatial_attn = SpatialAttention(kernel_size)

    def forward(self, x):
        x = self.channel_attn(x)
        x = self.spatial_attn(x)
        return x

# ===================== 3. VGG16+CBAM模型构建 =====================
class VGG16_CBAM(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        # 加载预训练VGG16(去掉分类头)
        vgg16_pretrained = torchvision.models.vgg16(pretrained=True)
        self.features = vgg16_pretrained.features  # VGG16特征提取层
        
        # 在VGG16的5个卷积块后插入CBAM(适配VGG16的features结构)
        # VGG16 features结构:block1(0-4), block2(5-9), block3(10-16), block4(17-23), block5(24-30)
        self.cbam1 = CBAM(in_channels=64)   # block1输出通道64
        self.cbam2 = CBAM(in_channels=128)  # block2输出通道128
        self.cbam3 = CBAM(in_channels=256)  # block3输出通道256
        self.cbam4 = CBAM(in_channels=512)  # block4输出通道512
        self.cbam5 = CBAM(in_channels=512)  # block5输出通道512
        
        # 替换VGG16的分类头(适配CIFAR10的10分类)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),  # CIFAR10经VGG16 features后尺寸为1x1
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        # 前向传播:VGG16卷积块 + 对应CBAM
        x = self.features[:5](x)    # block1
        x = self.cbam1(x)
        x = self.features[5:10](x)  # block2
        x = self.cbam2(x)
        x = self.features[10:17](x) # block3
        x = self.cbam3(x)
        x = self.features[17:24](x) # block4
        x = self.cbam4(x)
        x = self.features[24:](x)   # block5
        x = self.cbam5(x)
        
        # 展平+分类头
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ===================== 4. 分阶段微调核心函数 =====================
def set_trainable_layers(model, trainable_parts):
    """
    冻结所有层,仅解冻包含trainable_parts关键词的层
    """
    print(f"\n---> 解冻以下部分并设为可训练: {trainable_parts}")
    for name, param in model.named_parameters():
        param.requires_grad = False  # 先全冻结
        for part in trainable_parts:
            if part in name:
                param.requires_grad = True
                break

def train_staged_finetuning(model, criterion, train_loader, test_loader, device, epochs):
    optimizer = None
    # 初始化历史记录
    all_iter_losses, iter_indices = [], []
    train_acc_history, test_acc_history = [], []
    train_loss_history, test_loss_history = [], []

    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        
        # --- 分阶段调整冻结层和学习率 ---
        if epoch == 1:
            print("\n" + "="*50 + "\n🚀 **阶段 1:训练CBAM模块和分类头**\n" + "="*50)
            # 解冻CBAM(cbam1-cbam5)和分类头(classifier)
            set_trainable_layers(model, ["cbam", "classifier"])
            optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
        elif epoch == 6:
            print("\n" + "="*50 + "\n✈️ **阶段 2:解冻VGG16高层卷积(features.24:)**\n" + "="*50)
            # 解冻CBAM + 分类头 + VGG16 block5(features.24后)
            set_trainable_layers(model, ["cbam", "classifier", "features.24", "features.25", "features.26", "features.27", "features.28", "features.29", "features.30"])
            optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
        elif epoch == 21:
            print("\n" + "="*50 + "\n🛰️ **阶段 3:解冻所有层,全局微调**\n" + "="*50)
            # 解冻所有层
            for param in model.parameters():
                param.requires_grad = True
            optimizer = optim.Adam(model.parameters(), lr=1e-5)
        
        # --- 训练循环 ---
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            # 记录每个iteration的损失
            iter_loss = loss.item()
            all_iter_losses.append(iter_loss)
            iter_indices.append((epoch - 1) * len(train_loader) + batch_idx + 1)
            
            running_loss += iter_loss
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
            
            # 每100个batch打印一次
            if (batch_idx + 1) % 100 == 0:
                print(f'Epoch: {epoch}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
                      f'| 单Batch损失: {iter_loss:.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}')
        
        # 计算epoch级训练指标
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = 100. * correct / total
        train_loss_history.append(epoch_train_loss)
        train_acc_history.append(epoch_train_acc)

        # --- 测试循环 ---
        model.eval()
        test_loss, correct_test, total_test = 0, 0, 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                _, predicted = output.max(1)
                total_test += target.size(0)
                correct_test += predicted.eq(target).sum().item()
        
        # 计算epoch级测试指标
        epoch_test_loss = test_loss / len(test_loader)
        epoch_test_acc = 100. * correct_test / total_test
        test_loss_history.append(epoch_test_loss)
        test_acc_history.append(epoch_test_acc)
        
        # 打印epoch结果
        print(f'Epoch {epoch}/{epochs} 完成 | 耗时: {time.time() - epoch_start_time:.2f}s | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%')
    
    # 训练结束绘图
    print("\n训练完成! 开始绘制结果图表...")
    plot_iter_losses(all_iter_losses, iter_indices)
    plot_epoch_metrics(train_acc_history, test_acc_history, train_loss_history, test_loss_history)
    
    return epoch_test_acc

# ===================== 5. 可视化函数 =====================
def plot_iter_losses(losses, indices):
    plt.figure(figsize=(10, 4))
    plt.plot(indices, losses, 'b-', alpha=0.7, label='Iteration Loss')
    plt.xlabel('Iteration(Batch序号)')
    plt.ylabel('损失值')
    plt.title('每个 Iteration 的训练损失')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss):
    epochs = range(1, len(train_acc) + 1)
    plt.figure(figsize=(12, 4))
    
    # 准确率曲线
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_acc, 'b-', label='训练准确率')
    plt.plot(epochs, test_acc, 'r-', label='测试准确率')
    plt.xlabel('Epoch')
    plt.ylabel('准确率 (%)')
    plt.title('训练和测试准确率')
    plt.legend()
    plt.grid(True)
    
    # 损失曲线
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_loss, 'b-', label='训练损失')
    plt.plot(epochs, test_loss, 'r-', label='测试损失')
    plt.xlabel('Epoch')
    plt.ylabel('损失值')
    plt.title('训练和测试损失')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()

# ===================== 6. 数据加载(CIFAR10) =====================
# 数据预处理(适配VGG16的输入要求)
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16默认输入224x224
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(224, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet归一化
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 加载CIFAR10数据集
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# ===================== 7. 执行训练 =====================
if __name__ == "__main__":
    # 初始化模型
    model = VGG16_CBAM(num_classes=10).to(device)
    criterion = nn.CrossEntropyLoss()
    epochs = 50

    # 开始分阶段微调
    print("开始使用带分阶段微调策略的VGG16+CBAM模型进行训练...")
    final_accuracy = train_staged_finetuning(model, criterion, train_loader, test_loader, device, epochs)
    print(f"训练完成!最终测试准确率: {final_accuracy:.2f}%")

    # 保存模型(可选)
    # torch.save(model.state_dict(), 'vgg16_cbam_finetuned.pth')
    # print("模型已保存为: vgg16_cbam_finetuned.pth")

复习日

作业: day43的时候我们安排大家对自己找的数据集用简单cnn训练,现在可以尝试下借助这几天的知识来实现精度的进一步提高

#神经网络调参指南

知识点回顾:

  1. 随机种子

  2. 内参的初始化

  3. 神经网络调参指南

a. 参数的分类

b. 调参的顺序

c. 各部分参数的调整心得

作业: 对于 day41 的简单 cnn,看看是否可以借助调参指南进一步提高精度。

相关推荐
NAGNIP1 小时前
一文搞懂深度学习中的通用逼近定理!
人工智能·算法·面试
冬奇Lab2 小时前
一天一个开源项目(第36篇):EverMemOS - 跨 LLM 与平台的长时记忆 OS,让 Agent 会记忆更会推理
人工智能·开源·资讯
冬奇Lab2 小时前
OpenClaw 源码深度解析(一):Gateway——为什么需要一个"中枢"
人工智能·开源·源码阅读
AngelPP6 小时前
OpenClaw 架构深度解析:如何把 AI 助手搬到你的个人设备上
人工智能
宅小年6 小时前
Claude Code 换成了Kimi K2.5后,我再也回不去了
人工智能·ai编程·claude
九狼6 小时前
Flutter URL Scheme 跨平台跳转
人工智能·flutter·github
ZFSS6 小时前
Kimi Chat Completion API 申请及使用
前端·人工智能
天翼云开发者社区8 小时前
春节复工福利就位!天翼云息壤2500万Tokens免费送,全品类大模型一键畅玩!
人工智能·算力服务·息壤
知识浅谈8 小时前
教你如何用 Gemini 将课本图片一键转为精美 PPT
人工智能
Ray Liang8 小时前
被低估的量化版模型,小身材也能干大事
人工智能·ai·ai助手·mindx