Day58 PythonStudy

@浙大疏锦行

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR

# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

# 1. 增强的数据预处理和数据增强
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),          # 随机裁剪
    transforms.RandomHorizontalFlip(p=0.5),        # 随机水平翻转
    transforms.RandomRotation(15),                 # 随机旋转
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # 颜色抖动
    transforms.ToTensor(),                         # 转换为张量
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))  # CIFAR-10标准归一化
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

# 2. 加载CIFAR-10数据集
train_dataset = datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform_train
)

test_dataset = datasets.CIFAR10(
    root='./data',
    train=False,
    transform=transform_test
)

# 3. 创建数据加载器(调整batch_size)
batch_size = 128  # 增大batch_size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# 4. 改进的MLP模型(添加批归一化和更多层)
class ImprovedMLP(nn.Module):
    def __init__(self, hidden_layers=[1024, 512, 256], dropout_rate=0.3):
        super(ImprovedMLP, self).__init__()
        self.flatten = nn.Flatten()
        
        layers = []
        input_size = 3072  # 3*32*32
        
        # 动态构建隐藏层
        for i, hidden_size in enumerate(hidden_layers):
            layers.append(nn.Linear(input_size, hidden_size))
            layers.append(nn.BatchNorm1d(hidden_size))  # 添加批归一化
            layers.append(nn.ReLU(inplace=True))
            layers.append(nn.Dropout(dropout_rate))
            input_size = hidden_size
        
        # 输出层
        layers.append(nn.Linear(input_size, 10))
        
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.network(self.flatten(x))

      
# 检查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

# 初始化模型(选择其中一个)
model = ImprovedMLP(hidden_layers=[2048, 1024, 512, 256], dropout_rate=0.4)
# model = MLPWithResidual()

model = model.to(device)

# 参数统计
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"总参数数量: {total_params:,}")
print(f"可训练参数数量: {trainable_params:,}")

# 使用标签平滑的损失函数
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing
        
    def forward(self, x, target):
        log_probs = torch.nn.functional.log_softmax(x, dim=-1)
        nll_loss = -log_probs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -log_probs.mean(dim=-1)
        loss = (1 - self.smoothing) * nll_loss + self.smoothing * smooth_loss
        return loss.mean()

# 选择损失函数
criterion = LabelSmoothingCrossEntropy(smoothing=0.1)  # 或使用 nn.CrossEntropyLoss()

# 优化器调参
optimizer = optim.AdamW(model.parameters(), 
                       lr=0.001, 
                       weight_decay=1e-4,  # 权重衰减
                       betas=(0.9, 0.999))

# 学习率调度器
scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6)
# scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

# 6. 增强的训练函数
def train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs):
    model.train()
    
    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []
    all_iter_losses = []
    iter_indices = []
    
    best_accuracy = 0.0
    
    for epoch in range(epochs):
        # 训练阶段
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        # 使用混合精度训练(如果可用)
        scaler = torch.cuda.amp.GradScaler() if device.type == 'cuda' else None
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            
            optimizer.zero_grad()
            
            if scaler:
                # 混合精度训练
                with torch.cuda.amp.autocast():
                    output = model(data)
                    loss = criterion(output, target)
                
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
            
            # 记录损失
            iter_loss = loss.item()
            all_iter_losses.append(iter_loss)
            iter_indices.append(epoch * len(train_loader) + batch_idx + 1)
            
            running_loss += iter_loss
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
            
            # 梯度裁剪
            if scaler:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            else:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            if (batch_idx + 1) % 50 == 0:
                acc = 100. * correct / total
                print(f'Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
                      f'| 损失: {iter_loss:.4f} | 准确率: {acc:.2f}%')
        
        # 计算训练统计
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = 100. * correct / total
        train_losses.append(epoch_train_loss)
        train_accuracies.append(epoch_train_acc)
        
        # 测试阶段
        model.eval()
        test_loss = 0
        correct_test = 0
        total_test = 0
        
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                _, predicted = output.max(1)
                total_test += target.size(0)
                correct_test += predicted.eq(target).sum().item()
        
        epoch_test_loss = test_loss / len(test_loader)
        epoch_test_acc = 100. * correct_test / total_test
        test_losses.append(epoch_test_loss)
        test_accuracies.append(epoch_test_acc)
        
        # 更新学习率
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(epoch_test_acc)
        else:
            scheduler.step()
        
        # 保存最佳模型
        if epoch_test_acc > best_accuracy:
            best_accuracy = epoch_test_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'accuracy': best_accuracy,
            }, 'best_model.pth')
        
        # 打印epoch总结
        current_lr = optimizer.param_groups[0]['lr']
        print(f'Epoch {epoch+1}/{epochs} 完成 | '
              f'训练损失: {epoch_train_loss:.4f} | 训练准确率: {epoch_train_acc:.2f}% | '
              f'测试损失: {epoch_test_loss:.4f} | 测试准确率: {epoch_test_acc:.2f}% | '
              f'学习率: {current_lr:.6f}')
        print('-' * 80)
    
    # 绘制结果
    plot_results(train_losses, test_losses, train_accuracies, test_accuracies, all_iter_losses, iter_indices)
    
    return best_accuracy

# 7. 绘制综合结果
def plot_results(train_losses, test_losses, train_accs, test_accs, iter_losses, iter_indices):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. 迭代损失
    axes[0, 0].plot(iter_indices, iter_losses, 'b-', alpha=0.5, linewidth=0.5)
    axes[0, 0].set_xlabel('Iteration(Batch序号)')
    axes[0, 0].set_ylabel('损失值')
    axes[0, 0].set_title('每个 Iteration 的训练损失')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. 每个epoch的训练和测试损失
    epochs = range(1, len(train_losses) + 1)
    axes[0, 1].plot(epochs, train_losses, 'b-', label='训练损失')
    axes[0, 1].plot(epochs, test_losses, 'r-', label='测试损失')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('损失值')
    axes[0, 1].set_title('训练和测试损失')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. 每个epoch的训练和测试准确率
    axes[1, 0].plot(epochs, train_accs, 'b-', label='训练准确率')
    axes[1, 0].plot(epochs, test_accs, 'r-', label='测试准确率')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('准确率 (%)')
    axes[1, 0].set_title('训练和测试准确率')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # 4. 损失和准确率的滑动平均
    window_size = 100
    smoothed_losses = np.convolve(iter_losses, np.ones(window_size)/window_size, mode='valid')
    smoothed_indices = iter_indices[window_size-1:]
    
    axes[1, 1].plot(smoothed_indices, smoothed_losses, 'g-', linewidth=2)
    axes[1, 1].set_xlabel('Iteration(Batch序号)')
    axes[1, 1].set_ylabel('损失值(滑动平均)')
    axes[1, 1].set_title(f'训练损失({window_size}次迭代滑动平均)')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()


def train_single_fold(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs, fold_num):
    best_acc = 0.0
    
    for epoch in range(epochs):
        model.train()
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        
        scheduler.step()
        
        # 验证
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                _, predicted = output.max(1)
                total += target.size(0)
                correct += predicted.eq(target).sum().item()
        
        val_acc = 100. * correct / total
        if val_acc > best_acc:
            best_acc = val_acc
        
        print(f'Fold {fold_num} | Epoch {epoch+1}/{epochs} | 验证准确率: {val_acc:.2f}%')
    
    return best_acc

# 9. 主执行部分
if __name__ == "__main__":
    # 选择训练模式
    training_mode = "normal"  # 可选: "normal", "kfold"
    
    if training_mode == "normal":
        # 正常训练
        epochs = 50
        print("开始训练模型...")
        best_accuracy = train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs)
        print(f"训练完成!最佳测试准确率: {best_accuracy:.2f}%")
        
        # 加载最佳模型并测试
        checkpoint = torch.load('best_model.pth')
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()
        
        # 最终测试
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                _, predicted = output.max(1)
                total += target.size(0)
                correct += predicted.eq(target).sum().item()
        
        final_acc = 100. * correct / total
        print(f"最终测试准确率: {final_acc:.2f}%")
相关推荐
2401_857683543 分钟前
C++代码静态检测
开发语言·c++·算法
2401_838472518 分钟前
内存泄漏自动检测系统
开发语言·c++·算法
开发者小天13 分钟前
python中的class类
开发语言·python
idwangzhen22 分钟前
GEO优化系统哪家更专业
python·信息可视化
2501_9333295525 分钟前
Infoseek数字公关AI中台技术解析:如何构建企业级舆情监测与智能处置系统
开发语言·人工智能
m0_7066532326 分钟前
基于C++的爬虫框架
开发语言·c++·算法
梵刹古音27 分钟前
【C语言】 数据类型的分类
c语言·开发语言
diediedei33 分钟前
嵌入式数据库C++集成
开发语言·c++·算法
xie0510_35 分钟前
string模拟实现
开发语言·c++·算法
diediedei40 分钟前
机器学习模型部署:将模型转化为Web API
jvm·数据库·python