周报5.31

本周复现了AlexNet网络结构

特征提取层 (self.features) 由 5 个卷积层 + 3 个最大池化层组成

分类器 (self.classifier) 由 3 个全连接层组成

python 复制代码
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        
        self.features = nn.Sequential(
            # Conv1: 11x11 kernel, 4x4 stride, 3 input channels
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv2: 5x5 kernel, groups=2 for split across GPUs
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv3: 3x3 kernel
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            # Conv4: 3x3 kernel
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            # Conv5: 3x3 kernel
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        
        self.classifier = nn.Sequential(
            # FC6
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            
            # FC7
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            
            # FC8
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

AlexNet 模型训练脚本 ,用于在 CIFAR-10 数据集上训练深度学习模型

加载 CIFAR-10 数据集(50,000 训练 + 10,000 验证),应用数据增强,创建 AlexNet 模型,移动到 GPU/CPU 设备 训练循环 30 个 Epoch,每个 Epoch 包含:前向传播 → 计算损失 → 反向传播 → 参数更新,每个 Epoch 结束后在验证集上评估准确率,保存验证准确率最高的模型权重到文件

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import os
import time
from alexnet import AlexNet


def main():
    print("="*80)
    print("AlexNet Training Started")
    print("="*80)
    print(f"Start Time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}")
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    if torch.cuda.is_available():
        print(f"GPU Name: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

    transform_train = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_val = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    valset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)
    
    print(f"\nDataset: CIFAR-10")
    print(f"Training samples: {len(trainset)}")
    print(f"Validation samples: {len(valset)}")

    num_classes = 10
    print(f"Number of classes: {num_classes}")

    train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(valset, batch_size=64, shuffle=False, num_workers=2)
    model = AlexNet(num_classes=num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    os.makedirs('./checkpoints', exist_ok=True)

    print(f"\nModel: AlexNet")
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters()):,}")
    print(f"Optimizer: SGD (lr=0.01, momentum=0.9, weight_decay=5e-4)")
    print(f"Learning rate scheduler: StepLR (step_size=10, gamma=0.1)")
    print(f"Batch size: 64")

    num_epochs = 30
    print(f"Number of epochs: {num_epochs}")
    print(f"Checkpoint directory: ./checkpoints")
    print("="*80)
    print("Starting training...\n")

    best_acc = 0.0
    training_start_time = time.time()

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        epoch_start = time.time()

        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += targets.size(0)
            train_correct += predicted.eq(targets).sum().item()

            if batch_idx % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx}/{len(train_loader)}], '
                      f'Loss: {train_loss/(batch_idx+1):.4f}, Acc: {100.*train_correct/train_total:.2f}%')

        train_acc = 100. * train_correct / train_total
        epoch_time = time.time() - epoch_start

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()

        val_acc = 100. * val_correct / val_total

        print(f'\nEpoch [{epoch+1}/{num_epochs}] Summary:')
        print(f'Train Loss: {train_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss/len(val_loader):.4f}, Val Acc: {val_acc:.2f}%')
        print(f'Epoch Time: {epoch_time:.2f}s\n')

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f'./checkpoints/alexnet_best.pth')
            print(f'Best model saved with accuracy: {best_acc:.2f}%\n')

        scheduler.step()

    total_training_time = time.time() - training_start_time
    print("="*80)
    print("Training Finished")
    print("="*80)
    print(f"End Time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}")
    print(f"Total Training Time: {total_training_time/60:.2f} minutes")
    print(f"Total Training Time: {total_training_time/3600:.2f} hours")
    print(f"Best Validation Accuracy: {best_acc:.2f}%")
    print(f"Best Model Saved: ./checkpoints/alexnet_best.pth")
    print("="*80)


if __name__ == '__main__':
    main()

AlexNet 模型推理脚本 ,用于评估训练好的模型在 CIFAR-10 测试集上的性能

加载训练好的模型权重 alexnet_best.pth,加载 CIFAR-10 测试集(10,000 张图片),对每张图片进行前向推理,计算 Top-1 准确率 (预测概率最高的类别是否正确),计算 Top-5 准确率 (真实标签是否在前 5 个预测中), 输出分类别准确率和错误率

相关推荐
网络研究院5 天前
2026年网络安全
网络·安全·法律·法规·趋势·发展
酣大智5 天前
ARP代理--工作原理
运维·网络·arp·arp代理
treesforest5 天前
AI安全系统如何识别异常访问?IP风险识别正在成为关键能力
网络·人工智能·tcp/ip·安全·web安全
shushangyun_5 天前
2026年快消品B2B系统推荐:支持终端门店订货、促销政策自动化的工具?
java·运维·网络·数据库·人工智能·spring·自动化
2601_961845155 天前
粉笔行测题库|系统班|刷题
网络·百度·微信·微信公众平台·facebook·新浪微博
程序猿阿伟5 天前
《Chrome离线扩展安装的底层逻辑与场景落地指南》
服务器·网络·chrome
InHand云飞小白5 天前
无人值守站点网络困境?工业级路由器IR315破解连接难题
网络·物联网·4g·工业路由器·4g路由器·iiot·蜂窝路由器
森G5 天前
75、服务器源码解析---------云视频服务项目
linux·服务器·网络·c++·qt
江华森5 天前
TCP/IP 协议栈实战 — 7 个实验详解
网络·tcp/ip·智能路由器
酉鬼女又兒5 天前
零基础入门计算机网络运输层:端到端通信核心作用、端口号分类规则、复用分用工作机制及UDP与TCP协议全方位对比详解
网络·网络协议·tcp/ip·计算机网络·考研·udp·php