9. 优化器

9.1 优化器

① 损失函数调用backward方法,就可以调用损失函数的反向传播方法,就可以求出我们需要调节的梯度,我们就可以利用我们的优化器就可以根据梯度对参数进行调整,达到整体误差降低的目的。

② 梯度要清零,如果梯度不清零会导致梯度累加。

9.2 神经网络优化一轮

python 复制代码
import torch
import torchvision
from torch import nn 
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)       
dataloader = DataLoader(dataset, batch_size=64,drop_last=True)

class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()        
        self.model1 = Sequential(
            Conv2d(3,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,64,5,padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024,64),
            Linear(64,10)
        )
        
    def forward(self, x):
        x = self.model1(x)
        return x
    
loss = nn.CrossEntropyLoss() # 交叉熵    
tudui = Tudui()
optim = torch.optim.SGD(tudui.parameters(),lr=0.01)   # 随机梯度下降优化器
for data in dataloader:
    imgs, targets = data
    outputs = tudui(imgs)
    result_loss = loss(outputs, targets) # 计算实际输出与目标输出的差距
    optim.zero_grad()  # 梯度清零
    result_loss.backward() # 反向传播,计算损失函数的梯度
    optim.step()   # 根据梯度,对网络的参数进行调优
    print(result_loss) # 对数据只看了一遍,只看了一轮,所以loss下降不大

结果:

复制代码
Files already downloaded and verified
tensor(2.2978, grad_fn=<NllLossBackward0>)
tensor(2.2988, grad_fn=<NllLossBackward0>)
tensor(2.3163, grad_fn=<NllLossBackward0>)
tensor(2.3253, grad_fn=<NllLossBackward0>)
tensor(2.2952, grad_fn=<NllLossBackward0>)
tensor(2.3066, grad_fn=<NllLossBackward0>)
tensor(2.3085, grad_fn=<NllLossBackward0>)
tensor(2.3106, grad_fn=<NllLossBackward0>)
tensor(2.2960, grad_fn=<NllLossBackward0>)
tensor(2.3053, grad_fn=<NllLossBackward0>)
tensor(2.2892, grad_fn=<NllLossBackward0>)
tensor(2.3090, grad_fn=<NllLossBackward0>)
tensor(2.2956, grad_fn=<NllLossBackward0>)
tensor(2.3041, grad_fn=<NllLossBackward0>)
tensor(2.3012, grad_fn=<NllLossBackward0>)
tensor(2.3043, grad_fn=<NllLossBackward0>)
tensor(2.2760, grad_fn=<NllLossBackward0>)
tensor(2.3051, grad_fn=<NllLossBackward0>)
tensor(2.2951, grad_fn=<NllLossBackward0>)
tensor(2.3168, grad_fn=<NllLossBackward0>)
tensor(2.3140, grad_fn=<NllLossBackward0>)
tensor(2.3096, grad_fn=<NllLossBackward0>)
tensor(2.2945, grad_fn=<NllLossBackward0>)
tensor(2.3115, grad_fn=<NllLossBackward0>)
tensor(2.2987, grad_fn=<NllLossBackward0>)
tensor(2.3029, grad_fn=<NllLossBackward0>)
tensor(2.3096, grad_fn=<NllLossBackward0>)
tensor(2.3064, grad_fn=<NllLossBackward0>)
tensor(2.3161, grad_fn=<NllLossBackward0>)
tensor(2.3129, grad_fn=<NllLossBackward0>)
tensor(2.2903, grad_fn=<NllLossBackward0>)
tensor(2.3043, grad_fn=<NllLossBackward0>)
tensor(2.3034, grad_fn=<NllLossBackward0>)
tensor(2.3169, grad_fn=<NllLossBackward0>)
tensor(2.3090, grad_fn=<NllLossBackward0>)
tensor(2.3039, grad_fn=<NllLossBackward0>)
tensor(2.3019, grad_fn=<NllLossBackward0>)
tensor(2.3071, grad_fn=<NllLossBackward0>)
tensor(2.3018, grad_fn=<NllLossBackward0>)
tensor(2.3083, grad_fn=<NllLossBackward0>)
tensor(2.2994, grad_fn=<NllLossBackward0>)
tensor(2.2909, grad_fn=<NllLossBackward0>)
tensor(2.3130, grad_fn=<NllLossBackward0>)
tensor(2.2993, grad_fn=<NllLossBackward0>)
tensor(2.2906, grad_fn=<NllLossBackward0>)
tensor(2.3084, grad_fn=<NllLossBackward0>)
tensor(2.3123, grad_fn=<NllLossBackward0>)
tensor(2.2931, grad_fn=<NllLossBackward0>)
tensor(2.3059, grad_fn=<NllLossBackward0>)
tensor(2.3117, grad_fn=<NllLossBackward0>)
tensor(2.2975, grad_fn=<NllLossBackward0>)
tensor(2.3109, grad_fn=<NllLossBackward0>)
tensor(2.3029, grad_fn=<NllLossBackward0>)
tensor(2.3020, grad_fn=<NllLossBackward0>)
tensor(2.3022, grad_fn=<NllLossBackward0>)
tensor(2.3005, grad_fn=<NllLossBackward0>)
tensor(2.2920, grad_fn=<NllLossBackward0>)
tensor(2.3016, grad_fn=<NllLossBackward0>)
tensor(2.3053, grad_fn=<NllLossBackward0>)
tensor(2.3082, grad_fn=<NllLossBackward0>)
tensor(2.3011, grad_fn=<NllLossBackward0>)
tensor(2.3040, grad_fn=<NllLossBackward0>)
tensor(2.3130, grad_fn=<NllLossBackward0>)
tensor(2.2981, grad_fn=<NllLossBackward0>)
tensor(2.2977, grad_fn=<NllLossBackward0>)
tensor(2.2994, grad_fn=<NllLossBackward0>)
tensor(2.3075, grad_fn=<NllLossBackward0>)
tensor(2.3016, grad_fn=<NllLossBackward0>)
tensor(2.2966, grad_fn=<NllLossBackward0>)
tensor(2.3015, grad_fn=<NllLossBackward0>)
tensor(2.3000, grad_fn=<NllLossBackward0>)
tensor(2.2953, grad_fn=<NllLossBackward0>)
tensor(2.2958, grad_fn=<NllLossBackward0>)
tensor(2.2977, grad_fn=<NllLossBackward0>)
tensor(2.2928, grad_fn=<NllLossBackward0>)
tensor(2.2989, grad_fn=<NllLossBackward0>)
tensor(2.2968, grad_fn=<NllLossBackward0>)
tensor(2.2982, grad_fn=<NllLossBackward0>)
tensor(2.2912, grad_fn=<NllLossBackward0>)
tensor(2.3005, grad_fn=<NllLossBackward0>)
tensor(2.2909, grad_fn=<NllLossBackward0>)
tensor(2.2940, grad_fn=<NllLossBackward0>)
tensor(2.2959, grad_fn=<NllLossBackward0>)
tensor(2.2993, grad_fn=<NllLossBackward0>)
tensor(2.2933, grad_fn=<NllLossBackward0>)
tensor(2.2951, grad_fn=<NllLossBackward0>)
tensor(2.2824, grad_fn=<NllLossBackward0>)
tensor(2.2987, grad_fn=<NllLossBackward0>)
tensor(2.2961, grad_fn=<NllLossBackward0>)
tensor(2.2914, grad_fn=<NllLossBackward0>)
tensor(2.3025, grad_fn=<NllLossBackward0>)
tensor(2.2895, grad_fn=<NllLossBackward0>)
tensor(2.2943, grad_fn=<NllLossBackward0>)
tensor(2.2974, grad_fn=<NllLossBackward0>)
tensor(2.2977, grad_fn=<NllLossBackward0>)
tensor(2.3069, grad_fn=<NllLossBackward0>)
tensor(2.2972, grad_fn=<NllLossBackward0>)
tensor(2.2979, grad_fn=<NllLossBackward0>)
tensor(2.2932, grad_fn=<NllLossBackward0>)
tensor(2.2940, grad_fn=<NllLossBackward0>)
tensor(2.3014, grad_fn=<NllLossBackward0>)
tensor(2.2958, grad_fn=<NllLossBackward0>)
tensor(2.3013, grad_fn=<NllLossBackward0>)
tensor(2.2953, grad_fn=<NllLossBackward0>)
tensor(2.2951, grad_fn=<NllLossBackward0>)
tensor(2.3116, grad_fn=<NllLossBackward0>)
tensor(2.2916, grad_fn=<NllLossBackward0>)
tensor(2.2871, grad_fn=<NllLossBackward0>)
tensor(2.2975, grad_fn=<NllLossBackward0>)
tensor(2.2950, grad_fn=<NllLossBackward0>)
tensor(2.3039, grad_fn=<NllLossBackward0>)
tensor(2.2901, grad_fn=<NllLossBackward0>)
tensor(2.2950, grad_fn=<NllLossBackward0>)
tensor(2.2958, grad_fn=<NllLossBackward0>)
tensor(2.2893, grad_fn=<NllLossBackward0>)
tensor(2.2917, grad_fn=<NllLossBackward0>)
tensor(2.3001, grad_fn=<NllLossBackward0>)
tensor(2.2988, grad_fn=<NllLossBackward0>)
tensor(2.3069, grad_fn=<NllLossBackward0>)
tensor(2.3083, grad_fn=<NllLossBackward0>)
tensor(2.2841, grad_fn=<NllLossBackward0>)
tensor(2.2932, grad_fn=<NllLossBackward0>)
tensor(2.2857, grad_fn=<NllLossBackward0>)
tensor(2.2971, grad_fn=<NllLossBackward0>)
tensor(2.2999, grad_fn=<NllLossBackward0>)
tensor(2.2911, grad_fn=<NllLossBackward0>)
tensor(2.2977, grad_fn=<NllLossBackward0>)
tensor(2.3027, grad_fn=<NllLossBackward0>)
tensor(2.2940, grad_fn=<NllLossBackward0>)
tensor(2.2939, grad_fn=<NllLossBackward0>)
tensor(2.2950, grad_fn=<NllLossBackward0>)
tensor(2.2951, grad_fn=<NllLossBackward0>)
tensor(2.3000, grad_fn=<NllLossBackward0>)
tensor(2.2935, grad_fn=<NllLossBackward0>)
tensor(2.2817, grad_fn=<NllLossBackward0>)
tensor(2.2977, grad_fn=<NllLossBackward0>)
tensor(2.3067, grad_fn=<NllLossBackward0>)
tensor(2.2742, grad_fn=<NllLossBackward0>)
tensor(2.2964, grad_fn=<NllLossBackward0>)
tensor(2.2927, grad_fn=<NllLossBackward0>)
tensor(2.2941, grad_fn=<NllLossBackward0>)
tensor(2.3003, grad_fn=<NllLossBackward0>)
tensor(2.2965, grad_fn=<NllLossBackward0>)
tensor(2.2908, grad_fn=<NllLossBackward0>)
tensor(2.2885, grad_fn=<NllLossBackward0>)
tensor(2.2984, grad_fn=<NllLossBackward0>)
tensor(2.3009, grad_fn=<NllLossBackward0>)
tensor(2.2931, grad_fn=<NllLossBackward0>)
tensor(2.2856, grad_fn=<NllLossBackward0>)
tensor(2.2907, grad_fn=<NllLossBackward0>)
tensor(2.2938, grad_fn=<NllLossBackward0>)
tensor(2.2880, grad_fn=<NllLossBackward0>)
tensor(2.2975, grad_fn=<NllLossBackward0>)
tensor(2.2922, grad_fn=<NllLossBackward0>)
tensor(2.2966, grad_fn=<NllLossBackward0>)
tensor(2.2804, grad_fn=<NllLossBackward0>)

9.3 神经网络优化多轮

python 复制代码
import torch
import torchvision
from torch import nn 
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)       
dataloader = DataLoader(dataset, batch_size=64,drop_last=True)

class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()        
        self.model1 = Sequential(
            Conv2d(3,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,64,5,padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024,64),
            Linear(64,10)
        )
        
    def forward(self, x):
        x = self.model1(x)
        return x
    
loss = nn.CrossEntropyLoss() # 交叉熵    
tudui = Tudui()
optim = torch.optim.SGD(tudui.parameters(),lr=0.01)   # 随机梯度下降优化器
for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        result_loss = loss(outputs, targets) # 计算实际输出与目标输出的差距
        optim.zero_grad()  # 梯度清零
        result_loss.backward() # 反向传播,计算损失函数的梯度
        optim.step()   # 根据梯度,对网络的参数进行调优
        running_loss = running_loss + result_loss
    print(running_loss) # 对这一轮所有误差的总和

结果:

复制代码
Files already downloaded and verified
tensor(358.1069, grad_fn=<AddBackward0>)
tensor(353.8411, grad_fn=<AddBackward0>)
tensor(337.3790, grad_fn=<AddBackward0>)
tensor(317.3237, grad_fn=<AddBackward0>)
tensor(307.6762, grad_fn=<AddBackward0>)
tensor(298.2425, grad_fn=<AddBackward0>)
tensor(289.7010, grad_fn=<AddBackward0>)
tensor(282.7116, grad_fn=<AddBackward0>)
tensor(275.8972, grad_fn=<AddBackward0>)
tensor(269.5961, grad_fn=<AddBackward0>)
tensor(263.8480, grad_fn=<AddBackward0>)
tensor(258.5006, grad_fn=<AddBackward0>)
tensor(253.4671, grad_fn=<AddBackward0>)
tensor(248.7994, grad_fn=<AddBackward0>)
tensor(244.4917, grad_fn=<AddBackward0>)
tensor(240.5728, grad_fn=<AddBackward0>)
tensor(236.9719, grad_fn=<AddBackward0>)
tensor(233.6264, grad_fn=<AddBackward0>)
tensor(230.4298, grad_fn=<AddBackward0>)
tensor(227.3427, grad_fn=<AddBackward0>)

9.4 神经网络学习率优化

python 复制代码
import torch
import torchvision
from torch import nn 
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)       
dataloader = DataLoader(dataset, batch_size=64,drop_last=True)

class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()        
        self.model1 = Sequential(
            Conv2d(3,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,64,5,padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024,64),
            Linear(64,10)
        )
        
    def forward(self, x):
        x = self.model1(x)
        return x
    
loss = nn.CrossEntropyLoss() # 交叉熵    
tudui = Tudui()
optim = torch.optim.SGD(tudui.parameters(),lr=0.01)   # 随机梯度下降优化器
scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=5, gamma=0.1) # 每过 step_size 更新一次优化器,更新是学习率为原来的学习率的的 0.1 倍    
for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        result_loss = loss(outputs, targets) # 计算实际输出与目标输出的差距
        optim.zero_grad()  # 梯度清零
        result_loss.backward() # 反向传播,计算损失函数的梯度
        optim.step()   # 根据梯度,对网络的参数进行调优
        scheduler.step() # 学习率太小了,所以20个轮次后,相当于没走多少
        running_loss = running_loss + result_loss
    print(running_loss) # 对这一轮所有误差的总和

结果:

复制代码
Files already downloaded and verified
tensor(359.4722, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
tensor(359.4630, grad_fn=<AddBackward0>)
相关推荐
xin_nai2 分钟前
LeetCode热题100(Java)(5)普通数组
算法·leetcode·职场和发展
数据皮皮侠AI3 分钟前
中国城市可再生能源数据集(2005-2021)|顶刊 Sci Data 11 种能源面板
大数据·人工智能·笔记·能源·1024程序员节
G31135422737 分钟前
如何用 QClaw 龙虾做一个规律作息健康助理 Agent
大数据·人工智能·ai·云计算
幂律智能8 分钟前
零售行业合同管理数智化转型解决方案
大数据·人工智能·零售
旺财矿工9 分钟前
零基础搭建 OpenClaw 2.6.6 Win11 本地化运行环境
人工智能·openclaw·小龙虾·龙虾·openclaw安装包
九成宫11 分钟前
动手学深度学习PyTorch版初步安装过程
人工智能·pytorch·深度学习
Traving Yu11 分钟前
Prompt提示词工程
人工智能·prompt
NOCSAH12 分钟前
统好AI CRM功能解析:智能录入与跟进
人工智能
旖-旎12 分钟前
深搜练习(组合)(5)
c++·算法·深度优先·力扣
He少年13 分钟前
【AI 辅助编程做设备数据采集:一个真实项目的迭代复盘(OpenSpec 驱动)】
人工智能