4.权重衰减(weight decay)

4.1 手动实现权重衰减

python 复制代码
import torch
from torch import nn
from torch.utils.data import TensorDataset,DataLoader
import matplotlib.pyplot as plt
def synthetic_data(w,b,num_inputs):
    X=torch.normal(0,1,size=(num_inputs,w.shape[0]))
    y=X@w+b
    y+=torch.normal(0,0.1,size=y.shape)
    return X,y
def load_array(data,batch_size,is_train=True):
    dataset=TensorDataset(*data)
    return DataLoader(dataset,batch_size=batch_size,shuffle=is_train)
def init_params(num_inputs):
    w=torch.normal(0,1,size=(num_inputs,1),requires_grad=True)
    b=torch.zeros(1,requires_grad=True)
    return [w,b]
def l2_penalty(w):
    return 0.5*torch.sum(w.pow(2))

def linear_reg(X,w,b):
    return torch.matmul(X,w)+b
def mse_loss(y_hat,y):
    return (y_hat-y)**2/2
def sgd(params,lr,batch_size):
    for params in params:
        params.data-=lr*params.grad/batch_size
        params.grad.zero_()
def evaluate_loss(net, data_iter, loss):
    total_loss, total_samples = 0.0, 0
    for X, y in data_iter:
        l = loss(net(X), y)
        total_loss += l.sum().item()
        total_samples += y.numel()
    return total_loss / total_samples
n_train,n_test,num_inputs,batch_size=20,100,200,5
true_w,true_b=torch.ones((num_inputs,1))*0.01,0.05
train_data=synthetic_data(true_w,true_b,n_train)
test_data=synthetic_data(true_w,true_b,n_test)
train_iter=load_array(train_data,batch_size)
test_iter=load_array(test_data,batch_size,is_train=False)
w,b=init_params(num_inputs)
net=lambda X:linear_reg(X,w,b)
loss=mse_loss
num_epochs,lr,lambd=10,0.05,3
#animator=SimpleAnimator()
for epoch in range(num_epochs):
    for X,y in train_iter:
        l=loss(net(X),y)+lambd*l2_penalty(w)
        l.sum().backward()
        sgd([w,b],lr,batch_size)
    if (epoch+1)%5==0:
        train_loss=evaluate_loss(net,train_iter,loss)
        test_loss=evaluate_loss(net,test_iter,loss)
        #animator.add(epoch+1,train_loss,test_loss)
        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f},test Loss: {test_loss:.4f}")
print('w的L2范数是:', torch.norm(w).item())
plt.show()

4.2 简单实现权重衰减

python 复制代码
import torch
from torch import nn
from torch.utils.data import TensorDataset,DataLoader
import matplotlib.pyplot as plt
def synthetic_data(w,b,num_inputs):
    X=torch.normal(0,1,size=(num_inputs,w.shape[0]))
    y=X@w+b
    y+=torch.normal(0,0.1,size=y.shape)
    return X,y
def load_array(data,batch_size,is_train=True):
    dataset=TensorDataset(*data)
    return DataLoader(dataset,batch_size=batch_size,shuffle=is_train)
def init_params(num_inputs):
    w=torch.normal(0,1,size=(num_inputs,1),requires_grad=True)
    b=torch.zeros(1,requires_grad=True)
    return [w,b]
def l2_penalty(w):
    return 0.5*torch.sum(w.pow(2))
def linear_reg(X,w,b):
    return torch.matmul(X,w)+b
def mse_loss(y_hat,y):
    return ((y_hat-y)**2).sum()/2
def evaluate_loss(net, data_iter, loss):
    total_loss, total_samples = 0.0, 0
    for X, y in data_iter:
        l = loss(net(X), y)
        total_loss += l.item()*y.shape[0]
        total_samples += y.numel()
    return total_loss / total_samples
n_train,n_test,num_inputs,batch_size=20,100,200,5
true_w,true_b=torch.ones((num_inputs,1))*0.01,0.05
train_data=synthetic_data(true_w,true_b,n_train)
test_data=synthetic_data(true_w,true_b,n_test)
train_iter=load_array(train_data,batch_size)
test_iter=load_array(test_data,batch_size,is_train=False)
w,b=init_params(num_inputs)
net=lambda X:linear_reg(X,w,b)
loss=mse_loss
num_epochs,lr,lambd=100,0.001,3
optimizer=torch.optim.SGD([w,b],lr=lr,weight_decay=0.001)
#animator=SimpleAnimator()
for epoch in range(num_epochs):
    for X,y in train_iter:
        optimizer.zero_grad()
        l=loss(net(X),y)
        l.backward()
        #sgd([w,b],lr,batch_size)
        optimizer.step() 
    if (epoch+1)%5==0:
        train_loss=evaluate_loss(net,train_iter,loss)
        test_loss=evaluate_loss(net,test_iter,loss)
        #animator.add(epoch+1,train_loss,test_loss)
        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f},test Loss: {test_loss:.4f}")
print('w的L2范数是:', torch.norm(w).item())
plt.show()
相关推荐
慕ゞ笙9 分钟前
2025年Ubuntu24.04系统安装以及深度学习环境配置
人工智能·深度学习
量化Mike14 分钟前
【python报错】解决卸载Python时报错问题:No Python installation was detected
开发语言·python
2501_9417987326 分钟前
Python高性能网络爬虫实战:异步IO与多线程结合代码解析
开发语言·python
java1234_小锋26 分钟前
基于Python深度学习的车辆车牌识别系统(PyTorch2卷积神经网络CNN+OpenCV4实现)视频教程 - 车牌矩阵定位
python·深度学习·cnn·车牌识别
linzeyang40 分钟前
Advent of Code 2025 挑战全手写代码 Day 1 - 秘密入口
python·github
阿郎_20111 小时前
python自动化脚本-下载小说
python·selenium·网络爬虫
ERP老兵-冷溪虎山1 小时前
Python/JS/Go/Java同步学习(第五十篇半)四语言“path路径详解“对照表: 看完这篇定位文件就通透了(附源码/截图/参数表/避坑指南)
java·javascript·python·golang·中医编程·编程四语言同步学·path路径详解
Data_agent1 小时前
1688获得1688公司档案信息API,python请求示例
开发语言·数据库·python
Christo33 小时前
ICML-2019《Optimal Transport for structured data with application on graphs》
人工智能·算法·机器学习·数据挖掘
vx_vxbs663 小时前
【SSM高校普法系统】(免费领源码+演示录像)|可做计算机毕设Java、Python、PHP、小程序APP、C#、爬虫大数据、单片机、文案
android·java·python·mysql·小程序·php·idea