Day 52 神经网络调参指南

@浙大疏锦行

推荐的调参顺序
  1. 设置随机种子
  2. 选择合适的权重初始化
  3. 调整学习率和调度器
  4. 添加正则化
  5. 配置早停
python 复制代码
import torch
import torch.nn as nn

# 定义一个简单的线性模型
# 输入2个维度,输出1个维度
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        # 线性层:y = w1*x1 + w2*x2 + b
        self.linear = nn.Linear(2, 1)
        
    def forward(self, x):
        return self.linear(x)

# 创建模型实例(每次运行权重不同)
model = SimpleNet()

# 查看模型参数
print("模型参数(未设置种子,每次运行可能不同):")
for name, param in model.named_parameters():
    print(f"{name}: {param.data}")
python 复制代码
import torch
import torch.nn as nn

# 演示梯度消失问题
def demo_gradient_problem():
    """展示权重初始化对梯度的影响"""
    
    # 定义深层网络
    class DeepNet(nn.Module):
        def __init__(self, init_scale):
            super().__init__()
            layers = []
            for _ in range(10):  # 10层网络
                linear = nn.Linear(100, 100, bias=False)
                # 使用指定的初始化尺度
                nn.init.normal_(linear.weight, mean=0, std=init_scale)
                layers.append(linear)
                layers.append(nn.Tanh())
            self.net = nn.Sequential(*layers)
            
        def forward(self, x):
            return self.net(x)
    
    # 测试不同初始化尺度
    for scale in [0.01, 0.1, 1.0]:
        model = DeepNet(scale)
        x = torch.randn(1, 100)
        output = model(x)
        print(f"初始化std={scale}: 输出均值={output.mean().item():.6f}, 输出std={output.std().item():.6f}")

demo_gradient_problem()
python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

# 演示不同学习率调度器
def demo_lr_schedulers():
    """展示不同学习率调度器的效果"""
    
    epochs = 100
    initial_lr = 0.1
    
    schedulers = {
        'StepLR': lambda opt: optim.lr_scheduler.StepLR(opt, step_size=30, gamma=0.1),
        'ExponentialLR': lambda opt: optim.lr_scheduler.ExponentialLR(opt, gamma=0.95),
        'CosineAnnealingLR': lambda opt: optim.lr_scheduler.CosineAnnealingLR(opt, T_max=100),
        'ReduceLROnPlateau': lambda opt: optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', patience=10)
    }
    
    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    axes = axes.flatten()
    
    for idx, (name, scheduler_fn) in enumerate(schedulers.items()):
        # 创建简单模型和优化器
        model = nn.Linear(10, 1)
        optimizer = optim.SGD(model.parameters(), lr=initial_lr)
        scheduler = scheduler_fn(optimizer)
        
        lrs = []
        for epoch in range(epochs):
            lrs.append(optimizer.param_groups[0]['lr'])
            if name == 'ReduceLROnPlateau':
                scheduler.step(1.0)  # 需要传入loss值
            else:
                scheduler.step()
        
        axes[idx].plot(lrs)
        axes[idx].set_title(name)
        axes[idx].set_xlabel('Epoch')
        axes[idx].set_ylabel('Learning Rate')
        axes[idx].grid(True)
    
    plt.tight_layout()
    plt.show()

demo_lr_schedulers()
python 复制代码
import torch.optim as optim

# L2正则化(权重衰减)在PyTorch中通过weight_decay参数实现
model = nn.Linear(100, 10)

# 创建带L2正则化的优化器
optimizer = optim.Adam(
    model.parameters(), 
    lr=0.001, 
    weight_decay=1e-4  # L2正则化系数
)

print(f"优化器配置: {optimizer}")
print(f"L2正则化系数: {optimizer.param_groups[0]['weight_decay']}")
python 复制代码
class EarlyStopping:
    """
    早停类:监控验证集性能,在性能不再提升时停止训练
    """
    def __init__(self, patience=7, min_delta=0, verbose=True):
        """
        Args:
            patience: 允许多少个epoch性能不提升
            min_delta: 性能提升的最小阈值
            verbose: 是否打印信息
        """
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_model_state = None
        
    def __call__(self, val_loss, model):
        score = -val_loss  # 转换为越大越好
        
        if self.best_score is None:
            # 第一次调用
            self.best_score = score
            self.save_checkpoint(model)
        elif score < self.best_score + self.min_delta:
            # 性能没有提升
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter}/{self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            # 性能提升
            self.best_score = score
            self.save_checkpoint(model)
            self.counter = 0
            
    def save_checkpoint(self, model):
        """保存模型状态"""
        self.best_model_state = model.state_dict().copy()
        if self.verbose:
            print(f'验证损失下降,保存模型...')
            
    def load_best_model(self, model):
        """加载最佳模型"""
        model.load_state_dict(self.best_model_state)
        return model

# 使用示例
print("早停策略使用示例:")
print("""
early_stopping = EarlyStopping(patience=7)

for epoch in range(epochs):
    train_loss = train(model, train_loader)
    val_loss = validate(model, val_loader)
    
    early_stopping(val_loss, model)
    
    if early_stopping.early_stop:
        print("Early stopping triggered!")
        break

# 加载最佳模型
model = early_stopping.load_best_model(model)
""")
相关推荐
Dxy123931021620 分钟前
Python路径算法简介
开发语言·python·算法
躺平的赶海人29 分钟前
python opencv实现相机内参标定之安装OpenCv
python·opencv·计算机视觉
满满和米兜29 分钟前
【Java基础】-I/O-字符流
java·开发语言·python
echome88841 分钟前
Python 装饰器详解:从入门到精通的 7 个实用案例
开发语言·python
子木HAPPY阳VIP42 分钟前
【无标题】
java·python·mysql
2501_921649491 小时前
低延迟量化交易数据 API:从架构设计到性能优化的完整实践指南
python·websocket·金融·量化
无心水1 小时前
2、5分钟上手|PyPDF2 快速提取PDF文本
java·linux·分布式·后端·python·架构·pdf
代码的乐趣1 小时前
支持selenium的chrome driver更新到147.0.7727.56
chrome·python·selenium
码上实战1 小时前
到底Java 适不适合做 AI 呢?
java·人工智能·后端·python·ai
reasonsummer1 小时前
【教学类-160-02】20260409 AI视频培训-练习2“豆包AI视频《小班-抢玩具》+豆包图片风格:手办”
python·音视频·ai视频·豆包·通义万相