Day57 PythonStudy - 技术栈

复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
import numpy as np
import time

# ============================
# 设置中文字体和预训练模型下载路径
# ============================
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

os.environ['TORCH_HOME'] = r'E:\PyStudy'
print(f"预训练模型将下载到: {os.environ['TORCH_HOME']}")

# 如果目录不存在，则创建
if not os.path.exists(os.environ['TORCH_HOME']):
    os.makedirs(os.environ['TORCH_HOME'])
    print(f"已创建目录: {os.environ['TORCH_HOME']}")

# ============================
# CBAM模块定义
# ============================
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, ratio=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // ratio, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(in_channels // ratio, in_channels, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        out = torch.cat([avg_out, max_out], dim=1)
        out = self.conv(out)
        return self.sigmoid(out)

class CBAM(nn.Module):
    def __init__(self, in_channels, ratio=16, kernel_size=7):
        super().__init__()
        self.channel_attention = ChannelAttention(in_channels, ratio)
        self.spatial_attention = SpatialAttention(kernel_size)

    def forward(self, x):
        out = x * self.channel_attention(x)
        out = out * self.spatial_attention(out)
        return out

# ============================
# VGG16 + CBAM 模型
# ============================
class VGG16_CBAM(nn.Module):
    def __init__(self, num_classes=10, pretrained=True, cbam_ratio=16, cbam_kernel=7):
        super().__init__()
        
        # 打印预训练模型下载信息
        if pretrained:
            print(f"正在下载VGG16预训练权重到: {os.environ['TORCH_HOME']}")
            print("文件大小约528MB，请耐心等待...")
        
        # 加载预训练的VGG16（现在会下载到E:\PyStudy）
        vgg = models.vgg16(pretrained=pretrained)
        
        # 获取VGG16的特征提取部分（去除最后的分类层）
        self.features = vgg.features
        
        # 在VGG的特定层后添加CBAM模块
        self.cbam_layer1 = CBAM(in_channels=64, ratio=cbam_ratio, kernel_size=cbam_kernel)
        self.cbam_layer2 = CBAM(in_channels=128, ratio=cbam_ratio, kernel_size=cbam_kernel)
        self.cbam_layer3 = CBAM(in_channels=256, ratio=cbam_ratio, kernel_size=cbam_kernel)
        self.cbam_layer4 = CBAM(in_channels=512, ratio=cbam_ratio, kernel_size=cbam_kernel)
        self.cbam_layer5 = CBAM(in_channels=512, ratio=cbam_ratio, kernel_size=cbam_kernel)
        
        # 自适应池化层
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        
        # 修改分类头
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes)
        )
        
        # 根据输入尺寸调整
        self._modify_for_small_input()

    def _modify_for_small_input(self):
        """修改VGG以更好地适应小尺寸输入（如32x32）"""
        self.features[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        
        if hasattr(self.features, 'stride'):
            for i, layer in enumerate(self.features):
                if isinstance(layer, nn.MaxPool2d):
                    self.features[i] = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

    def forward(self, x):
        x = self.features[0:4](x)
        x = self.features[4](x)
        x = self.cbam_layer1(x)
        
        x = self.features[5:9](x)
        x = self.features[9](x)
        x = self.cbam_layer2(x)
        
        x = self.features[10:16](x)
        x = self.features[16](x)
        x = self.cbam_layer3(x)
        
        x = self.features[17:23](x)
        x = self.features[23](x)
        x = self.cbam_layer4(x)
        
        x = self.features[24:30](x)
        x = self.features[30](x)
        x = self.cbam_layer5(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ============================
# 简化版VGG16_CBAM
# ============================
class VGG16_CBAM_Simple(nn.Module):
    def __init__(self, num_classes=10, pretrained=True):
        super().__init__()
        
        if pretrained:
            print(f"正在下载VGG16预训练权重到: {os.environ['TORCH_HOME']}")
        
        vgg = models.vgg16(pretrained=pretrained)
        self.features = nn.Sequential(*list(vgg.features.children())[:10])
        self.cbam = CBAM(in_channels=128)
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(128, 256),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.cbam(x)
        x = self.classifier(x)
        return x

# ============================
# 训练函数（更新版，无需tqdm）
# ============================
def train_epoch(model, dataloader, criterion, optimizer, device):
    """训练一个epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    iteration_losses = []  # 记录每个iteration的损失
    iteration_indices = []  # 记录iteration序号
    
    print(f"训练中... (共{len(dataloader)}个batch)")
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        # 记录每个iteration的损失
        iteration_losses.append(loss.item())
        iteration_indices.append(batch_idx + 1)
        
        # 每10个batch打印一次进度
        if (batch_idx + 1) % 10 == 0 or (batch_idx + 1) == len(dataloader):
            current_acc = 100. * correct / total
            avg_loss = running_loss / (batch_idx + 1)
            print(f"  Batch [{batch_idx+1}/{len(dataloader)}] - "
                  f"Loss: {loss.item():.4f} | "
                  f"Avg Loss: {avg_loss:.4f} | "
                  f"Acc: {current_acc:.2f}%")
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc, iteration_losses, iteration_indices

def validate(model, dataloader, criterion, device):
    """验证函数"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    print(f"验证中... (共{len(dataloader)}个batch)")
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            # 每5个batch打印一次进度
            if (batch_idx + 1) % 5 == 0 or (batch_idx + 1) == len(dataloader):
                current_acc = 100. * correct / total
                print(f"  Batch [{batch_idx+1}/{len(dataloader)}] - "
                      f"Acc: {current_acc:.2f}%")
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

# ============================
# 微调函数（更新版，50个epoch，无需tqdm）
# ============================
def fine_tune_vgg16_cbam(model, train_loader, val_loader, num_epochs=50, lr=0.001, device='cuda'):
    """微调VGG16+CBAM模型"""
    model = model.to(device)
    
    # 定义损失函数
    criterion = nn.CrossEntropyLoss()
    
    # 优化器：不同层使用不同学习率
    optimizer = optim.SGD([
        {'params': model.features.parameters(), 'lr': lr * 0.1},
        {'params': model.cbam_layer1.parameters()},
        {'params': model.cbam_layer2.parameters()},
        {'params': model.cbam_layer3.parameters()},
        {'params': model.cbam_layer4.parameters()},
        {'params': model.cbam_layer5.parameters()},
        {'params': model.classifier.parameters(), 'lr': lr}
    ], lr=lr, momentum=0.9, weight_decay=5e-4)
    
    # 学习率调度器（注意：移除了verbose参数）
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', patience=5, factor=0.5
    )
    
    # 训练历史记录
    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': [],
        'iteration_losses': [], 'iteration_indices': [],
        'learning_rates': []
    }
    
    best_acc = 0.0
    start_time = time.time()
    
    print(f"开始训练，共 {num_epochs} 个epoch...")
    print("=" * 70)
    
    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 50)
        
        # 训练
        train_loss, train_acc, iter_losses, iter_indices = train_epoch(
            model, train_loader, criterion, optimizer, device
        )
        
        # 验证
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        # 更新学习率
        scheduler.step(val_loss)
        
        # 记录学习率
        current_lr = optimizer.param_groups[0]['lr']
        history['learning_rates'].append(current_lr)
        
        # 保存历史
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['iteration_losses'].extend(iter_losses)
        history['iteration_indices'].extend([epoch+1] * len(iter_losses))
        
        # 计算epoch耗时
        epoch_time = time.time() - epoch_start_time
        
        # 打印结果
        print(f"\nEpoch {epoch+1} 结果:")
        print(f"  ├── 训练损失: {train_loss:.4f}")
        print(f"  ├── 训练准确率: {train_acc:.2f}%")
        print(f"  ├── 验证损失: {val_loss:.4f}")
        print(f"  ├── 验证准确率: {val_acc:.2f}%")
        print(f"  ├── 学习率: {current_lr:.6f}")
        print(f"  └── 耗时: {epoch_time:.1f}秒")
        
        # 保存最佳模型
        if val_acc > best_acc:
            best_acc = val_acc
            save_path = os.path.join(os.environ['TORCH_HOME'], 'vgg16_cbam_best.pth')
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_acc': train_acc,
                'val_acc': val_acc,
                'history': history
            }, save_path)
            print(f"  ✓ 最佳模型已保存到: {save_path}")
        
        print("=" * 70)
    
    total_time = time.time() - start_time
    print(f"\n训练完成！")
    print(f"总耗时: {total_time:.1f}秒 ({total_time/60:.1f}分钟)")
    print(f"平均每个epoch耗时: {total_time/num_epochs:.1f}秒")
    print(f"最佳验证准确率: {best_acc:.2f}%")
    
    return history

# ============================
# 绘图函数定义
# ============================
def plot_iter_losses(losses, indices):
    """绘制每个iteration的训练损失"""
    plt.figure(figsize=(12, 5))
    
    # 计算移动平均（窗口大小为50）
    window_size = 50
    if len(losses) > window_size:
        moving_avg = np.convolve(losses, np.ones(window_size)/window_size, mode='valid')
        moving_indices = indices[window_size-1:]
        
        plt.subplot(1, 2, 1)
        plt.plot(indices, losses, 'b-', alpha=0.3, label='原始损失', linewidth=0.5)
        plt.plot(moving_indices, moving_avg, 'r-', label=f'{window_size}步移动平均', linewidth=2)
        plt.xlabel('Iteration（Batch序号）')
        plt.ylabel('损失值')
        plt.title('每个Iteration的训练损失')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.subplot(1, 2, 2)
        plt.plot(moving_indices, moving_avg, 'r-', linewidth=2)
        plt.xlabel('Iteration（Batch序号）')
        plt.ylabel('损失值')
        plt.title('损失移动平均')
        plt.grid(True, alpha=0.3)
    else:
        plt.plot(indices, losses, 'b-', alpha=0.7, label='Iteration Loss')
        plt.xlabel('Iteration（Batch序号）')
        plt.ylabel('损失值')
        plt.title('每个Iteration的训练损失')
        plt.legend()
        plt.grid(True)
    
    plt.tight_layout()
    plt.show()

def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss):
    """绘制每个epoch的训练和测试指标"""
    epochs = range(1, len(train_acc) + 1)
    
    plt.figure(figsize=(15, 5))
    
    # 准确率子图
    plt.subplot(1, 3, 1)
    plt.plot(epochs, train_acc, 'b-', linewidth=2, label='训练准确率')
    plt.plot(epochs, test_acc, 'r-', linewidth=2, label='测试准确率')
    plt.xlabel('Epoch')
    plt.ylabel('准确率 (%)')
    plt.title('训练和测试准确率')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # 损失子图
    plt.subplot(1, 3, 2)
    plt.plot(epochs, train_loss, 'b-', linewidth=2, label='训练损失')
    plt.plot(epochs, test_loss, 'r-', linewidth=2, label='测试损失')
    plt.xlabel('Epoch')
    plt.ylabel('损失值')
    plt.title('训练和测试损失')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # 训练/测试准确率差值
    plt.subplot(1, 3, 3)
    accuracy_gap = [train - test for train, test in zip(train_acc, test_acc)]
    plt.plot(epochs, accuracy_gap, 'g-', linewidth=2)
    plt.axhline(y=0, color='r', linestyle='--', alpha=0.5)
    plt.xlabel('Epoch')
    plt.ylabel('准确率差值 (%)')
    plt.title('训练和测试准确率差值')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

def plot_learning_rate(learning_rates):
    """绘制学习率变化"""
    plt.figure(figsize=(10, 4))
    epochs = range(1, len(learning_rates) + 1)
    
    plt.plot(epochs, learning_rates, 'b-o', linewidth=2, markersize=6)
    plt.xlabel('Epoch')
    plt.ylabel('学习率')
    plt.title('学习率变化')
    plt.yscale('log')  # 对数坐标，更清晰显示变化
    plt.grid(True, alpha=0.3)
    
    # 添加标注
    for i, (epoch, lr) in enumerate(zip(epochs, learning_rates)):
        if i == 0 or i == len(learning_rates) - 1 or lr != learning_rates[i-1]:
            plt.annotate(f'{lr:.2e}', xy=(epoch, lr), xytext=(5, 5),
                        textcoords='offset points', fontsize=9)
    
    plt.tight_layout()
    plt.show()

def plot_training_summary(history):
    """绘制完整的训练摘要"""
    # 1. 绘制epoch级别的指标
    plot_epoch_metrics(
        history['train_acc'], 
        history['val_acc'], 
        history['train_loss'], 
        history['val_loss']
    )
    
    # 2. 绘制iteration级别的损失
    plot_iter_losses(history['iteration_losses'], history['iteration_indices'])
    
    # 3. 绘制学习率变化
    if 'learning_rates' in history and len(history['learning_rates']) > 0:
        plot_learning_rate(history['learning_rates'])
    
    # 4. 打印最终统计信息
    print("\n" + "="*60)
    print("训练摘要:")
    print("="*60)
    print(f"最终训练准确率: {history['train_acc'][-1]:.2f}%")
    print(f"最终验证准确率: {history['val_acc'][-1]:.2f}%")
    print(f"最佳验证准确率: {max(history['val_acc']):.2f}% (第{history['val_acc'].index(max(history['val_acc']))+1}轮)")
    print(f"最终训练损失: {history['train_loss'][-1]:.4f}")
    print(f"最终验证损失: {history['val_loss'][-1]:.4f}")

# ============================
# 主程序：完整的训练流程
# ============================
if __name__ == "__main__":
    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    # ============================
    # 数据预处理和加载（使用您提供的数据加载器）
    # ============================
    print("\n准备数据加载器...")
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])

    # 加载数据集
    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
    test_dataset = datasets.CIFAR10(root='./data', train=False, transform=test_transform)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    print(f"训练集大小: {len(train_dataset)}")
    print(f"测试集大小: {len(test_dataset)}")
    print(f"训练batch数: {len(train_loader)}")
    print(f"测试batch数: {len(test_loader)}")
    
    # ============================
    # 创建模型
    # ============================
    print("\n创建VGG16_CBAM模型...")
    use_simple_model = False  # 设置为True使用简化版，False使用完整版
    
    if use_simple_model:
        model = VGG16_CBAM_Simple(num_classes=10, pretrained=True)
    else:
        model = VGG16_CBAM(num_classes=10, pretrained=True)
    
    # 统计参数量
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"总参数量: {total_params:,}")
    print(f"可训练参数量: {trainable_params:,}")
    
    # ============================
    # 开始训练
    # ============================
    print("\n" + "="*60)
    print("开始VGG16+CBAM模型训练")
    print("="*60)
    
    history = fine_tune_vgg16_cbam(
        model=model,
        train_loader=train_loader,
        val_loader=test_loader,  # 使用test_loader作为验证集
        num_epochs=50,
        lr=0.001,
        device=device
    )
    
    # ============================
    # 绘制训练结果
    # ============================
    print("\n绘制训练结果...")
    plot_training_summary(history)
    
    # # ============================
    # # 保存最终模型
    # # ============================
    # final_model_path = os.path.join(os.environ['TORCH_HOME'], 'vgg16_cbam_final.pth')
    # torch.save({
    #     'model_state_dict': model.state_dict(),
    #     'history': history,
    #     'config': {
    #         'num_classes': 10,
    #         'pretrained': True,
    #         'simple_version': use_simple_model
    #     }
    # }, final_model_path)
    # print(f"\n最终模型已保存到: {final_model_path}")