@浙大疏锦行
推荐的调参顺序
- 设置随机种子
- 选择合适的权重初始化
- 调整学习率和调度器
- 添加正则化
- 配置早停
python
复制代码
import torch
import torch.nn as nn
# 定义一个简单的线性模型
# 输入2个维度,输出1个维度
class SimpleNet(nn.Module):
def __init__(self):
super(SimpleNet, self).__init__()
# 线性层:y = w1*x1 + w2*x2 + b
self.linear = nn.Linear(2, 1)
def forward(self, x):
return self.linear(x)
# 创建模型实例(每次运行权重不同)
model = SimpleNet()
# 查看模型参数
print("模型参数(未设置种子,每次运行可能不同):")
for name, param in model.named_parameters():
print(f"{name}: {param.data}")
python
复制代码
import torch
import torch.nn as nn
# 演示梯度消失问题
def demo_gradient_problem():
"""展示权重初始化对梯度的影响"""
# 定义深层网络
class DeepNet(nn.Module):
def __init__(self, init_scale):
super().__init__()
layers = []
for _ in range(10): # 10层网络
linear = nn.Linear(100, 100, bias=False)
# 使用指定的初始化尺度
nn.init.normal_(linear.weight, mean=0, std=init_scale)
layers.append(linear)
layers.append(nn.Tanh())
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
# 测试不同初始化尺度
for scale in [0.01, 0.1, 1.0]:
model = DeepNet(scale)
x = torch.randn(1, 100)
output = model(x)
print(f"初始化std={scale}: 输出均值={output.mean().item():.6f}, 输出std={output.std().item():.6f}")
demo_gradient_problem()
python
复制代码
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
# 演示不同学习率调度器
def demo_lr_schedulers():
"""展示不同学习率调度器的效果"""
epochs = 100
initial_lr = 0.1
schedulers = {
'StepLR': lambda opt: optim.lr_scheduler.StepLR(opt, step_size=30, gamma=0.1),
'ExponentialLR': lambda opt: optim.lr_scheduler.ExponentialLR(opt, gamma=0.95),
'CosineAnnealingLR': lambda opt: optim.lr_scheduler.CosineAnnealingLR(opt, T_max=100),
'ReduceLROnPlateau': lambda opt: optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', patience=10)
}
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
axes = axes.flatten()
for idx, (name, scheduler_fn) in enumerate(schedulers.items()):
# 创建简单模型和优化器
model = nn.Linear(10, 1)
optimizer = optim.SGD(model.parameters(), lr=initial_lr)
scheduler = scheduler_fn(optimizer)
lrs = []
for epoch in range(epochs):
lrs.append(optimizer.param_groups[0]['lr'])
if name == 'ReduceLROnPlateau':
scheduler.step(1.0) # 需要传入loss值
else:
scheduler.step()
axes[idx].plot(lrs)
axes[idx].set_title(name)
axes[idx].set_xlabel('Epoch')
axes[idx].set_ylabel('Learning Rate')
axes[idx].grid(True)
plt.tight_layout()
plt.show()
demo_lr_schedulers()
python
复制代码
import torch.optim as optim
# L2正则化(权重衰减)在PyTorch中通过weight_decay参数实现
model = nn.Linear(100, 10)
# 创建带L2正则化的优化器
optimizer = optim.Adam(
model.parameters(),
lr=0.001,
weight_decay=1e-4 # L2正则化系数
)
print(f"优化器配置: {optimizer}")
print(f"L2正则化系数: {optimizer.param_groups[0]['weight_decay']}")
python
复制代码
class EarlyStopping:
"""
早停类:监控验证集性能,在性能不再提升时停止训练
"""
def __init__(self, patience=7, min_delta=0, verbose=True):
"""
Args:
patience: 允许多少个epoch性能不提升
min_delta: 性能提升的最小阈值
verbose: 是否打印信息
"""
self.patience = patience
self.min_delta = min_delta
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.best_model_state = None
def __call__(self, val_loss, model):
score = -val_loss # 转换为越大越好
if self.best_score is None:
# 第一次调用
self.best_score = score
self.save_checkpoint(model)
elif score < self.best_score + self.min_delta:
# 性能没有提升
self.counter += 1
if self.verbose:
print(f'EarlyStopping counter: {self.counter}/{self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
# 性能提升
self.best_score = score
self.save_checkpoint(model)
self.counter = 0
def save_checkpoint(self, model):
"""保存模型状态"""
self.best_model_state = model.state_dict().copy()
if self.verbose:
print(f'验证损失下降,保存模型...')
def load_best_model(self, model):
"""加载最佳模型"""
model.load_state_dict(self.best_model_state)
return model
# 使用示例
print("早停策略使用示例:")
print("""
early_stopping = EarlyStopping(patience=7)
for epoch in range(epochs):
train_loss = train(model, train_loader)
val_loss = validate(model, val_loader)
early_stopping(val_loss, model)
if early_stopping.early_stop:
print("Early stopping triggered!")
break
# 加载最佳模型
model = early_stopping.load_best_model(model)
""")