Python6.18打卡（day50）

预训练模型+CBAM模块

resnet结构解析
CBAM放置位置的思考
针对预训练模型的训练策略
1. 差异化学习率
2. 三阶段微调

cbam定义与预处理

python 复制代码

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
 
# 定义通道注意力
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, ratio=16):
        """
        通道注意力机制初始化
        参数:
            in_channels: 输入特征图的通道数
            ratio: 降维比例，用于减少参数量，默认为16
        """
        super().__init__()
        # 全局平均池化，将每个通道的特征图压缩为1x1，保留通道间的平均值信息
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        # 全局最大池化，将每个通道的特征图压缩为1x1，保留通道间的最显著特征
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        # 共享全连接层，用于学习通道间的关系
        # 先降维（除以ratio），再通过ReLU激活，最后升维回原始通道数
        self.fc = nn.Sequential(
            nn.Linear(in_channels, in_channels // ratio, bias=False),  # 降维层
            nn.ReLU(),  # 非线性激活函数
            nn.Linear(in_channels // ratio, in_channels, bias=False)   # 升维层
        )
        # Sigmoid函数将输出映射到0-1之间，作为各通道的权重
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        """
        前向传播函数
        参数:
            x: 输入特征图，形状为 [batch_size, channels, height, width]
        返回:
            调整后的特征图，通道权重已应用
        """
        # 获取输入特征图的维度信息，这是一种元组的解包写法
        b, c, h, w = x.shape
        # 对平均池化结果进行处理：展平后通过全连接网络
        avg_out = self.fc(self.avg_pool(x).view(b, c))
        # 对最大池化结果进行处理：展平后通过全连接网络
        max_out = self.fc(self.max_pool(x).view(b, c))
        # 将平均池化和最大池化的结果相加并通过sigmoid函数得到通道权重
        attention = self.sigmoid(avg_out + max_out).view(b, c, 1, 1)
        # 将注意力权重与原始特征相乘，增强重要通道，抑制不重要通道
        return x * attention #这个运算是pytorch的广播机制
 
## 空间注意力模块
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        # 通道维度池化
        avg_out = torch.mean(x, dim=1, keepdim=True)  # 平均池化：(B,1,H,W)
        max_out, _ = torch.max(x, dim=1, keepdim=True)  # 最大池化：(B,1,H,W)
        pool_out = torch.cat([avg_out, max_out], dim=1)  # 拼接：(B,2,H,W)
        attention = self.conv(pool_out)  # 卷积提取空间特征
        return x * self.sigmoid(attention)  # 特征与空间权重相乘
 
## CBAM模块
class CBAM(nn.Module):
    def __init__(self, in_channels, ratio=16, kernel_size=7):
        super().__init__()
        self.channel_attn = ChannelAttention(in_channels, ratio)
        self.spatial_attn = SpatialAttention(kernel_size)
 
    def forward(self, x):
        x = self.channel_attn(x)
        x = self.spatial_attn(x)
        return x
    
 
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
 
# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题
 
# 检查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
 
# 数据预处理（与原代码一致）
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
 
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
 
# 加载数据集（与原代码一致）
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=test_transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
 
import torch
import torchvision.models as models
from torchinfo import summary #之前的内容说了，推荐用他来可视化模型结构，信息最全
 
# 加载 ResNet18（预训练）
model = models.resnet18(pretrained=True)
model.eval()
 
# 输出模型结构和参数概要
summary(model, input_size=(1, 3, 224, 224))

python 复制代码

==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 112, 112]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 112, 112]         128
├─ReLU: 1-3                              [1, 64, 112, 112]         --
├─MaxPool2d: 1-4                         [1, 64, 56, 56]           --
├─Sequential: 1-5                        [1, 64, 56, 56]           --
│    └─BasicBlock: 2-1                   [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-1                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-3                    [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-4                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-6                    [1, 64, 56, 56]           --
│    └─BasicBlock: 2-2                   [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-7                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-8             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-9                    [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-10                 [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-11            [1, 64, 56, 56]           128
│    │    └─ReLU: 3-12                   [1, 64, 56, 56]           --
├─Sequential: 1-6                        [1, 128, 28, 28]          --
│    └─BasicBlock: 2-3                   [1, 128, 28, 28]          --
│    │    └─Conv2d: 3-13                 [1, 128, 28, 28]          73,728
│    │    └─BatchNorm2d: 3-14            [1, 128, 28, 28]          256
│    │    └─ReLU: 3-15                   [1, 128, 28, 28]          --
│    │    └─Conv2d: 3-16                 [1, 128, 28, 28]          147,456
│    │    └─BatchNorm2d: 3-17            [1, 128, 28, 28]          256
│    │    └─Sequential: 3-18             [1, 128, 28, 28]          8,448
│    │    └─ReLU: 3-19                   [1, 128, 28, 28]          --
│    └─BasicBlock: 2-4                   [1, 128, 28, 28]          --
│    │    └─Conv2d: 3-20                 [1, 128, 28, 28]          147,456
│    │    └─BatchNorm2d: 3-21            [1, 128, 28, 28]          256
│    │    └─ReLU: 3-22                   [1, 128, 28, 28]          --
│    │    └─Conv2d: 3-23                 [1, 128, 28, 28]          147,456
│    │    └─BatchNorm2d: 3-24            [1, 128, 28, 28]          256
│    │    └─ReLU: 3-25                   [1, 128, 28, 28]          --
├─Sequential: 1-7                        [1, 256, 14, 14]          --
│    └─BasicBlock: 2-5                   [1, 256, 14, 14]          --
│    │    └─Conv2d: 3-26                 [1, 256, 14, 14]          294,912
│    │    └─BatchNorm2d: 3-27            [1, 256, 14, 14]          512
│    │    └─ReLU: 3-28                   [1, 256, 14, 14]          --
│    │    └─Conv2d: 3-29                 [1, 256, 14, 14]          589,824
│    │    └─BatchNorm2d: 3-30            [1, 256, 14, 14]          512
│    │    └─Sequential: 3-31             [1, 256, 14, 14]          33,280
│    │    └─ReLU: 3-32                   [1, 256, 14, 14]          --
│    └─BasicBlock: 2-6                   [1, 256, 14, 14]          --
│    │    └─Conv2d: 3-33                 [1, 256, 14, 14]          589,824
│    │    └─BatchNorm2d: 3-34            [1, 256, 14, 14]          512
│    │    └─ReLU: 3-35                   [1, 256, 14, 14]          --
│    │    └─Conv2d: 3-36                 [1, 256, 14, 14]          589,824
│    │    └─BatchNorm2d: 3-37            [1, 256, 14, 14]          512
│    │    └─ReLU: 3-38                   [1, 256, 14, 14]          --
├─Sequential: 1-8                        [1, 512, 7, 7]            --
│    └─BasicBlock: 2-7                   [1, 512, 7, 7]            --
│    │    └─Conv2d: 3-39                 [1, 512, 7, 7]            1,179,648
│    │    └─BatchNorm2d: 3-40            [1, 512, 7, 7]            1,024
│    │    └─ReLU: 3-41                   [1, 512, 7, 7]            --
│    │    └─Conv2d: 3-42                 [1, 512, 7, 7]            2,359,296
│    │    └─BatchNorm2d: 3-43            [1, 512, 7, 7]            1,024
│    │    └─Sequential: 3-44             [1, 512, 7, 7]            132,096
│    │    └─ReLU: 3-45                   [1, 512, 7, 7]            --
│    └─BasicBlock: 2-8                   [1, 512, 7, 7]            --
│    │    └─Conv2d: 3-46                 [1, 512, 7, 7]            2,359,296
│    │    └─BatchNorm2d: 3-47            [1, 512, 7, 7]            1,024
│    │    └─ReLU: 3-48                   [1, 512, 7, 7]            --
│    │    └─Conv2d: 3-49                 [1, 512, 7, 7]            2,359,296
│    │    └─BatchNorm2d: 3-50            [1, 512, 7, 7]            1,024
│    │    └─ReLU: 3-51                   [1, 512, 7, 7]            --
├─AdaptiveAvgPool2d: 1-9                 [1, 512, 1, 1]            --
├─Linear: 1-10                           [1, 1000]                 513,000
==========================================================================================
Total params: 11,689,512
Trainable params: 11,689,512
Non-trainable params: 0
Total mult-adds (G): 1.81
==========================================================================================
Input size (MB): 0.60
Forward/backward pass size (MB): 39.75
Params size (MB): 46.76
Estimated Total Size (MB): 87.11
==========================================================================================

所以完全可以在不破坏其核心结构的情况下，将CBAM模块无缝地"注入"到预训练的ResNet中。这样做的逻辑是：

保留原始结构：原始的残差块负责提取核心特征。
增强特征：紧随其后的CBAM模块对这些提取出的特征进行"精炼"，告诉模型应该"关注什么"（what - 通道注意力）和"在哪里关注"（where - 空间注意力）。
不破坏预训练权重：原始残差块的预训练权重得以完整保留，我们只是在其后增加了一个新的、需要从头学习的模块。

自定义ResNet18模型

python 复制代码

import torch
import torch.nn as nn
from torchvision import models
 
# 自定义ResNet18模型，插入CBAM模块
class ResNet18_CBAM(nn.Module):
    def __init__(self, num_classes=10, pretrained=True, cbam_ratio=16, cbam_kernel=7):
        super().__init__()
        # 加载预训练ResNet18
        self.backbone = models.resnet18(pretrained=pretrained) 
        
        # 修改首层卷积以适应32x32输入（CIFAR10）
        self.backbone.conv1 = nn.Conv2d(
            in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False
        )
        self.backbone.maxpool = nn.Identity()  # 移除原始MaxPool层（因输入尺寸小）
        
        # 在每个残差块组后添加CBAM模块
        self.cbam_layer1 = CBAM(in_channels=64, ratio=cbam_ratio, kernel_size=cbam_kernel)
        self.cbam_layer2 = CBAM(in_channels=128, ratio=cbam_ratio, kernel_size=cbam_kernel)
        self.cbam_layer3 = CBAM(in_channels=256, ratio=cbam_ratio, kernel_size=cbam_kernel)
        self.cbam_layer4 = CBAM(in_channels=512, ratio=cbam_ratio, kernel_size=cbam_kernel)
        
        # 修改分类头
        self.backbone.fc = nn.Linear(in_features=512, out_features=num_classes)
 
    def forward(self, x):
        # 主干特征提取
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)  # [B, 64, 32, 32]
        
        # 第一层残差块 + CBAM
        x = self.backbone.layer1(x)  # [B, 64, 32, 32]
        x = self.cbam_layer1(x)
        
        # 第二层残差块 + CBAM
        x = self.backbone.layer2(x)  # [B, 128, 16, 16]
        x = self.cbam_layer2(x)
        
        # 第三层残差块 + CBAM
        x = self.backbone.layer3(x)  # [B, 256, 8, 8]
        x = self.cbam_layer3(x)
        
        # 第四层残差块 + CBAM
        x = self.backbone.layer4(x)  # [B, 512, 4, 4]
        x = self.cbam_layer4(x)
        
        # 全局平均池化 + 分类
        x = self.backbone.avgpool(x)  # [B, 512, 1, 1]
        x = torch.flatten(x, 1)  # [B, 512]
        x = self.backbone.fc(x)  # [B, 10]
        return x
    
# 初始化模型并移至设备
model = ResNet18_CBAM().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

python 复制代码

===============================================================================================
Layer (type:depth-idx)                        Output Shape              Param #
===============================================================================================
ResNet18_CBAM                                 [1, 10]                   --
├─ResNet: 1-9                                 --                        (recursive)
│    └─Conv2d: 2-1                            [1, 64, 32, 32]           1,728
│    └─BatchNorm2d: 2-2                       [1, 64, 32, 32]           128
│    └─ReLU: 2-3                              [1, 64, 32, 32]           --
│    └─Sequential: 2-4                        [1, 64, 32, 32]           --
│    │    └─BasicBlock: 3-1                   [1, 64, 32, 32]           73,984
│    │    └─BasicBlock: 3-2                   [1, 64, 32, 32]           73,984
├─CBAM: 1-2                                   [1, 64, 32, 32]           --
│    └─ChannelAttention: 2-5                  [1, 64, 32, 32]           --
│    │    └─AdaptiveAvgPool2d: 3-3            [1, 64, 1, 1]             --
│    │    └─Sequential: 3-4                   [1, 64]                   512
│    │    └─AdaptiveMaxPool2d: 3-5            [1, 64, 1, 1]             --
│    │    └─Sequential: 3-6                   [1, 64]                   (recursive)
│    │    └─Sigmoid: 3-7                      [1, 64]                   --
│    └─SpatialAttention: 2-6                  [1, 64, 32, 32]           --
│    │    └─Conv2d: 3-8                       [1, 1, 32, 32]            98
│    │    └─Sigmoid: 3-9                      [1, 1, 32, 32]            --
├─ResNet: 1-9                                 --                        (recursive)
│    └─Sequential: 2-7                        [1, 128, 16, 16]          --
│    │    └─BasicBlock: 3-10                  [1, 128, 16, 16]          230,144
│    │    └─BasicBlock: 3-11                  [1, 128, 16, 16]          295,424
├─CBAM: 1-4                                   [1, 128, 16, 16]          --
│    └─ChannelAttention: 2-8                  [1, 128, 16, 16]          --
│    │    └─AdaptiveAvgPool2d: 3-12           [1, 128, 1, 1]            --
│    │    └─Sequential: 3-13                  [1, 128]                  2,048
│    │    └─AdaptiveMaxPool2d: 3-14           [1, 128, 1, 1]            --
│    │    └─Sequential: 3-15                  [1, 128]                  (recursive)
│    │    └─Sigmoid: 3-16                     [1, 128]                  --
│    └─SpatialAttention: 2-9                  [1, 128, 16, 16]          --
│    │    └─Conv2d: 3-17                      [1, 1, 16, 16]            98
│    │    └─Sigmoid: 3-18                     [1, 1, 16, 16]            --
├─ResNet: 1-9                                 --                        (recursive)
│    └─Sequential: 2-10                       [1, 256, 8, 8]            --
│    │    └─BasicBlock: 3-19                  [1, 256, 8, 8]            919,040
│    │    └─BasicBlock: 3-20                  [1, 256, 8, 8]            1,180,672
├─CBAM: 1-6                                   [1, 256, 8, 8]            --
│    └─ChannelAttention: 2-11                 [1, 256, 8, 8]            --
│    │    └─AdaptiveAvgPool2d: 3-21           [1, 256, 1, 1]            --
│    │    └─Sequential: 3-22                  [1, 256]                  8,192
│    │    └─AdaptiveMaxPool2d: 3-23           [1, 256, 1, 1]            --
│    │    └─Sequential: 3-24                  [1, 256]                  (recursive)
│    │    └─Sigmoid: 3-25                     [1, 256]                  --
│    └─SpatialAttention: 2-12                 [1, 256, 8, 8]            --
│    │    └─Conv2d: 3-26                      [1, 1, 8, 8]              98
│    │    └─Sigmoid: 3-27                     [1, 1, 8, 8]              --
├─ResNet: 1-9                                 --                        (recursive)
│    └─Sequential: 2-13                       [1, 512, 4, 4]            --
│    │    └─BasicBlock: 3-28                  [1, 512, 4, 4]            3,673,088
│    │    └─BasicBlock: 3-29                  [1, 512, 4, 4]            4,720,640
├─CBAM: 1-8                                   [1, 512, 4, 4]            --
│    └─ChannelAttention: 2-14                 [1, 512, 4, 4]            --
│    │    └─AdaptiveAvgPool2d: 3-30           [1, 512, 1, 1]            --
│    │    └─Sequential: 3-31                  [1, 512]                  32,768
│    │    └─AdaptiveMaxPool2d: 3-32           [1, 512, 1, 1]            --
│    │    └─Sequential: 3-33                  [1, 512]                  (recursive)
│    │    └─Sigmoid: 3-34                     [1, 512]                  --
│    └─SpatialAttention: 2-15                 [1, 512, 4, 4]            --
│    │    └─Conv2d: 3-35                      [1, 1, 4, 4]              98
│    │    └─Sigmoid: 3-36                     [1, 1, 4, 4]              --
├─ResNet: 1-9                                 --                        (recursive)
│    └─AdaptiveAvgPool2d: 2-16                [1, 512, 1, 1]            --
│    └─Linear: 2-17                           [1, 10]                   5,130
===============================================================================================
Total params: 11,217,874
Trainable params: 11,217,874
Non-trainable params: 0
Total mult-adds (M): 555.65
===============================================================================================
Input size (MB): 0.01
Forward/backward pass size (MB): 9.86
Params size (MB): 44.87
Estimated Total Size (MB): 54.74
===============================================================================================

可以看到cbam加在了每个layer的后面

模型训练加评估

三阶段式解冻与微调 (Progressive Unfreezing)

阶段一 (Epoch 1-5): 预热"实习生"

解冻部分: 仅解冻分类头 (`fc`) 和所有 `CBAM` 模块。

冻结部分: 冻结 ResNet18 的所有主干卷积层 (`conv1`, `bn1`, `layer1` 至 `layer4`)。

目标: 先利用强大的预训练特征，让模型快速学习新任务的分类边界，同时让注意力模块找到初步的关注点。

学习率: `1e-3` (使用较高学习率加速收敛)。

阶段二 (Epoch 6-20): 唤醒"高层专家"

解冻部分: 在上一阶段的基础上，额外解冻高层语义相关的卷积层 (`layer3`, `layer4`)。

冻结部分: 底层特征提取层 (`conv1`, `bn1`, `layer1`, `layer2`) 仍然冻结。

目标: 释放模型的高层特征提取能力，使其适应新任务的抽象概念 (例如"鸟的轮廓"比"一条边"更抽象)。

学习率: `1e-4` (降低学习率，避免新解冻的层因梯度过大而破坏其宝贵的预训练权重)。

阶段三 (Epoch 21-50): 全员协同微调

解冻部分: 解冻模型的所有层，进行端到端微调。

冻结部分: 无。

目标: 让模型的底层特征 (如边缘、纹理) 也与新任务进行对齐，做最后的精细化调整，提升整体性能。

学习率: `1e-5` (使用最低的学习率，在整个模型上缓慢、稳定地进行全局优化)。

python 复制代码

import time
 
# ======================================================================
# 4. 结合了分阶段策略和详细打印的训练函数
# ======================================================================
def set_trainable_layers(model, trainable_parts):
    print(f"\n---> 解冻以下部分并设为可训练: {trainable_parts}")
    for name, param in model.named_parameters():
        param.requires_grad = False
        for part in trainable_parts:
            if part in name:
                param.requires_grad = True
                break
 
def train_staged_finetuning(model, criterion, train_loader, test_loader, device, epochs):
    optimizer = None
    
    # 初始化历史记录列表，与你的要求一致
    all_iter_losses, iter_indices = [], []
    train_acc_history, test_acc_history = [], []
    train_loss_history, test_loss_history = [], []
 
    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        
        # --- 动态调整学习率和冻结层 ---
        if epoch == 1:
            print("\n" + "="*50 + "\n🚀 **阶段 1：训练注意力模块和分类头**\n" + "="*50)
            set_trainable_layers(model, ["cbam", "backbone.fc"])
            optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
        elif epoch == 6:
            print("\n" + "="*50 + "\n✈️ **阶段 2：解冻高层卷积层 (layer3, layer4)**\n" + "="*50)
            set_trainable_layers(model, ["cbam", "backbone.fc", "backbone.layer3", "backbone.layer4"])
            optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
        elif epoch == 21:
            print("\n" + "="*50 + "\n🛰️ **阶段 3：解冻所有层，进行全局微调**\n" + "="*50)
            for param in model.parameters(): param.requires_grad = True
            optimizer = optim.Adam(model.parameters(), lr=1e-5)
        
        # --- 训练循环 ---
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            # 记录每个iteration的损失
            iter_loss = loss.item()
            all_iter_losses.append(iter_loss)
            iter_indices.append((epoch - 1) * len(train_loader) + batch_idx + 1)
            
            running_loss += iter_loss
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
            
            # 按你的要求，每100个batch打印一次
            if (batch_idx + 1) % 100 == 0:
                print(f'Epoch: {epoch}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
                      f'| 单Batch损失: {iter_loss:.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}')
        
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = 100. * correct / total
        train_loss_history.append(epoch_train_loss)
        train_acc_history.append(epoch_train_acc)
 
        # --- 测试循环 ---
        model.eval()
        test_loss, correct_test, total_test = 0, 0, 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                _, predicted = output.max(1)
                total_test += target.size(0)
                correct_test += predicted.eq(target).sum().item()
        
        epoch_test_loss = test_loss / len(test_loader)
        epoch_test_acc = 100. * correct_test / total_test
        test_loss_history.append(epoch_test_loss)
        test_acc_history.append(epoch_test_acc)
        
        # 打印每个epoch的最终结果
        print(f'Epoch {epoch}/{epochs} 完成 | 耗时: {time.time() - epoch_start_time:.2f}s | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%')
    
    # 训练结束后调用绘图函数
    print("\n训练完成! 开始绘制结果图表...")
    plot_iter_losses(all_iter_losses, iter_indices)
    plot_epoch_metrics(train_acc_history, test_acc_history, train_loss_history, test_loss_history)
    
    # 返回最终的测试准确率
    return epoch_test_acc
 
# ======================================================================
# 5. 绘图函数定义
# ======================================================================
def plot_iter_losses(losses, indices):
    plt.figure(figsize=(10, 4))
    plt.plot(indices, losses, 'b-', alpha=0.7, label='Iteration Loss')
    plt.xlabel('Iteration（Batch序号）')
    plt.ylabel('损失值')
    plt.title('每个 Iteration 的训练损失')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
 
def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss):
    epochs = range(1, len(train_acc) + 1)
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_acc, 'b-', label='训练准确率')
    plt.plot(epochs, test_acc, 'r-', label='测试准确率')
    plt.xlabel('Epoch')
    plt.ylabel('准确率 (%)')
    plt.title('训练和测试准确率')
    plt.legend(); plt.grid(True)
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_loss, 'b-', label='训练损失')
    plt.plot(epochs, test_loss, 'r-', label='测试损失')
    plt.xlabel('Epoch')
    plt.ylabel('损失值')
    plt.title('训练和测试损失')
    plt.legend(); plt.grid(True)
    plt.tight_layout()
    plt.show()
 
# ======================================================================
# 6. 执行训练
# ======================================================================
model = ResNet18_CBAM().to(device)
criterion = nn.CrossEntropyLoss()
epochs = 50
 
print("开始使用带分阶段微调策略的ResNet18+CBAM模型进行训练...")
final_accuracy = train_staged_finetuning(model, criterion, train_loader, test_loader, device, epochs)
print(f"训练完成！最终测试准确率: {final_accuracy:.2f}%")
 
# torch.save(model.state_dict(), 'resnet18_cbam_finetuned.pth')
# print("模型已保存为: resnet18_cbam_finetuned.pth")

@浙大疏锦行