周报5.31

本周复现了AlexNet网络结构

特征提取层 (self.features) 由 5 个卷积层 + 3 个最大池化层组成

分类器 (self.classifier) 由 3 个全连接层组成

python 复制代码
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        
        self.features = nn.Sequential(
            # Conv1: 11x11 kernel, 4x4 stride, 3 input channels
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv2: 5x5 kernel, groups=2 for split across GPUs
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv3: 3x3 kernel
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            # Conv4: 3x3 kernel
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            # Conv5: 3x3 kernel
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        
        self.classifier = nn.Sequential(
            # FC6
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            
            # FC7
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            
            # FC8
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

AlexNet 模型训练脚本 ,用于在 CIFAR-10 数据集上训练深度学习模型

加载 CIFAR-10 数据集(50,000 训练 + 10,000 验证),应用数据增强,创建 AlexNet 模型,移动到 GPU/CPU 设备 训练循环 30 个 Epoch,每个 Epoch 包含:前向传播 → 计算损失 → 反向传播 → 参数更新,每个 Epoch 结束后在验证集上评估准确率,保存验证准确率最高的模型权重到文件

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import os
import time
from alexnet import AlexNet


def main():
    print("="*80)
    print("AlexNet Training Started")
    print("="*80)
    print(f"Start Time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}")
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    if torch.cuda.is_available():
        print(f"GPU Name: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

    transform_train = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_val = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    valset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)
    
    print(f"\nDataset: CIFAR-10")
    print(f"Training samples: {len(trainset)}")
    print(f"Validation samples: {len(valset)}")

    num_classes = 10
    print(f"Number of classes: {num_classes}")

    train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(valset, batch_size=64, shuffle=False, num_workers=2)
    model = AlexNet(num_classes=num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    os.makedirs('./checkpoints', exist_ok=True)

    print(f"\nModel: AlexNet")
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters()):,}")
    print(f"Optimizer: SGD (lr=0.01, momentum=0.9, weight_decay=5e-4)")
    print(f"Learning rate scheduler: StepLR (step_size=10, gamma=0.1)")
    print(f"Batch size: 64")

    num_epochs = 30
    print(f"Number of epochs: {num_epochs}")
    print(f"Checkpoint directory: ./checkpoints")
    print("="*80)
    print("Starting training...\n")

    best_acc = 0.0
    training_start_time = time.time()

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        epoch_start = time.time()

        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += targets.size(0)
            train_correct += predicted.eq(targets).sum().item()

            if batch_idx % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx}/{len(train_loader)}], '
                      f'Loss: {train_loss/(batch_idx+1):.4f}, Acc: {100.*train_correct/train_total:.2f}%')

        train_acc = 100. * train_correct / train_total
        epoch_time = time.time() - epoch_start

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()

        val_acc = 100. * val_correct / val_total

        print(f'\nEpoch [{epoch+1}/{num_epochs}] Summary:')
        print(f'Train Loss: {train_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss/len(val_loader):.4f}, Val Acc: {val_acc:.2f}%')
        print(f'Epoch Time: {epoch_time:.2f}s\n')

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f'./checkpoints/alexnet_best.pth')
            print(f'Best model saved with accuracy: {best_acc:.2f}%\n')

        scheduler.step()

    total_training_time = time.time() - training_start_time
    print("="*80)
    print("Training Finished")
    print("="*80)
    print(f"End Time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}")
    print(f"Total Training Time: {total_training_time/60:.2f} minutes")
    print(f"Total Training Time: {total_training_time/3600:.2f} hours")
    print(f"Best Validation Accuracy: {best_acc:.2f}%")
    print(f"Best Model Saved: ./checkpoints/alexnet_best.pth")
    print("="*80)


if __name__ == '__main__':
    main()

AlexNet 模型推理脚本 ,用于评估训练好的模型在 CIFAR-10 测试集上的性能

加载训练好的模型权重 alexnet_best.pth,加载 CIFAR-10 测试集(10,000 张图片),对每张图片进行前向推理,计算 Top-1 准确率 (预测概率最高的类别是否正确),计算 Top-5 准确率 (真实标签是否在前 5 个预测中), 输出分类别准确率和错误率

相关推荐
それども1 小时前
怎么理解TCP的状态
java·网络·网络协议·tcp/ip·dubbo
其实防守也摸鱼1 小时前
告别单个变量,用列表和字典批量管理你的 Python 数据
开发语言·网络·软件测试·python·web安全·数据结构,编程教程
欧神附体1231 小时前
计算机网络之专业名词中英文解释(第一弹)
网络
ylscode1 小时前
Pentest Swarm AI:开源群体智能架构如何重构自主渗透测试的边界
网络·安全·安全威胁分析
weixin_429630262 小时前
3.51 Centra-Net:一种跨场景的集中式视觉定位网络
网络
江华森2 小时前
Linux 内核调优 + TCP/IP 协议栈深度解析 + 低延迟网络优化
linux·网络·tcp/ip
范范@2 小时前
Python进阶 网络编程笔记-多进程
网络·笔记·python
很懒的程序员雄3 小时前
华为eNSP静态路由实验教学
网络
哇嘎呀3 小时前
BGP邻居建立路由发布实验
网络