Neural Network学习笔记4

完整的模型训练套路

train.py

python 复制代码
import torch
import torchvision
from torch.utils.data import DataLoader
# 引入自定义的网络模型
from torch.utils.tensorboard import SummaryWriter

from model import *

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root="dataset_transform", train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="dataset_transform", train=False, transform=torchvision.transforms.ToTensor(),
                                          download=True)
# length 长度 获取数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))

# 利用 DataLoader 来加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

# 搭建神经网络:
# 一般情况下我们会把网络放到单独的python文件里,通常命名为model.py,然后再本文件头部引入就可以了
# class Zrf(nn.Module):
#     def __init__(self):
#         super(Zrf, self).__init__()
#         # Sequential 序列
#         self.model = Sequential(
#             # padding=2 是根据输入输出的H,W计算出来的
#             Conv2d(3, 32, 5, 1, padding=2), 输入通道,输出通道,卷积核尺寸,步长,padding要用公式算
#             MaxPool2d(2),
#             Conv2d(32, 32, 5, 1, padding=2),
#             MaxPool2d(2),
#             Conv2d(32, 64, 5, 1, padding=2),
#             MaxPool2d(2),
#             Flatten(),
#             Linear(1024, 64),
#             Linear(64, 10)
#         )
#
#     def forward(self, x):
#         x = self.model(x)
#         return x

# 创建网络模型
zrf = Zrf()

# 损失函数
loss_fn = nn.CrossEntropyLoss()

# 优化器
# learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(zrf.parameters(), lr=learning_rate)

# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 10

# 添加tensorboard
writer = SummaryWriter("../log_train")

for i in range(epoch):
    print("--------第 {} 轮训练开始--------".format(i+1))

    # 训练步骤开始
    zrf.train() # 设置训练模式(本模型中这一行可以不写)
    for data in train_dataloader:
        imgs, targets = data
        outputs = zrf(imgs)
        loss = loss_fn(outputs, targets)

        # 优化器优化模型
        optimizer.zero_grad() # 在进行反向传播来计算梯度时,要先将梯度置为0,防止之前计算出来的梯度的影响
        loss.backward() # 计算梯度
        optimizer.step() # 根据梯度对卷积核参数进行调优

        total_train_step = total_train_step + 1
        if total_train_step % 100 == 0:
            print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)


    # 为了看模型有没有训练好,所以在训练完一轮之后,在测试数据集上进行测试
    # 以测试数据集上的损失来判断
    # 以下部分没有梯度,测试时不需要调优
    # 测试步骤开始
    zrf.eval()  # 设置评估模式(本模型中这一行可以不写)
    total_test_loss = 0
    # 计算整体正确率
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            outputs = zrf(imgs)
            loss = loss_fn(outputs, targets)

            # 计算整体正确率
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy = total_accuracy + accuracy

            total_test_loss = total_test_loss + loss.item()
    print("整体测试集上的Loss:{}",format(total_test_loss))
    print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
    total_test_step = total_test_step + 1
    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)

    torch.save(zrf, "zrf_{}.pth".format(i)) 
    # torch.save(zrf.state_dict(), "zrf_{}.pth".format(i))
    print("模型已保存")
writer.close()ssssssssaaaassxcscwq

model.py

python 复制代码
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential

# 搭建神经网络

class Zrf(nn.Module):
    def __init__(self):
        super(Zrf, self).__init__()
        # Sequential 序列
        self.model = Sequential(
            # padding=2 是根据输入输出的H,W计算出来的
            Conv2d(3, 32, 5, 1, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, 1, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, 1, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x

if __name__ == '__main__':
    # 一般在这里测试网络的正确性
    zrf = Zrf()
    input = torch.ones((64, 3, 32, 32)) # 64batch_size,3通道,32x32
    output = zrf(input)
    print(output.shape)

关于正确率计算的一点说明

python 复制代码
import torch

outputs = torch.tensor([[0.1, 0.2],
                        [0.3, 0.4]])
print(outputs.argmax(1)) # 1或0代表着方向,1是横向看
# tensor([1, 1]) 最大值是0.3 0.4
print(outputs.argmax(0)) # 0是纵向看
# tensor([1, 1]) 最大值是0.2 0.4
# outputs = torch.tensor([[0.1, 0.2],
#                         [0.05, 0.4]])
# print(outputs.argmax(0))
# # tensor([0, 1]) 最大值是0.1 0.4
preds = outputs.argmax(1)
targets = torch.tensor([0, 1])
print((preds == targets).sum())

利用GPU进行训练train_gpu

train_gpu.py

第一种GPU训练方法

python 复制代码
# 对模型,数据(输入、标注),损失函数的后面,加 .cuda()

import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time


# 准备数据集
train_data = torchvision.datasets.CIFAR10(root="dataset_transform", train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="dataset_transform", train=False, transform=torchvision.transforms.ToTensor(),
                                          download=True)
# length 长度 获取数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))

train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)


class Zrf(nn.Module):
    def __init__(self):
        super(Zrf, self).__init__()
        # Sequential 序列
        self.model = Sequential(
            Conv2d(3, 32, 5, 1, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, 1, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, 1, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x

# 创建网络模型
zrf = Zrf()
# -------------------利用GPU训练-------------------#
if torch.cuda.is_available():
    zrf = zrf.cuda()

# 损失函数
loss_fn = nn.CrossEntropyLoss()
# -------------------利用GPU训练-------------------#
if torch.cuda.is_available():
    loss_fn = loss_fn.cuda()


# 优化器
learning_rate = 1e-2
optimizer = torch.optim.SGD(zrf.parameters(), lr=learning_rate)


# 设置训练网络的一些参数
total_train_step = 0
total_test_step = 0
epoch = 10

# 添加tensorboard
writer = SummaryWriter("../log_train")

start_time = time.time()

for i in range(epoch):
    print("--------第 {} 轮训练开始--------".format(i+1))

    # 训练步骤开始
    zrf.train()
    for data in train_dataloader:
        imgs, targets = data
        # -------------------利用GPU训练-------------------#
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            targets = targets.cuda()
        outputs = zrf(imgs)
        loss = loss_fn(outputs, targets)
        # 优化器优化模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_step = total_train_step + 1
        if total_train_step % 100 == 0:
            end_time = time.time()
            print(end_time - start_time)
            print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)

    # 测试步骤开始
    zrf.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            # -------------------利用GPU训练-------------------#
            if torch.cuda.is_available():
                imgs = imgs.cuda()
                targets = targets.cuda()
            outputs = zrf(imgs)
            loss = loss_fn(outputs, targets)
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy = total_accuracy + accuracy
            total_test_loss = total_test_loss + loss.item()
    print("整体测试集上的Loss:{}",format(total_test_loss))
    print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
    total_test_step = total_test_step + 1
    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)

    torch.save(zrf, "zrf_{}.pth".format(i))
    print("模型已保存")
writer.close()

第二种GPU训练方法

python 复制代码
# .to(device)
# device = torch.device("cpu")
# torch.device("cuda")
# torch.device("cuda:0")
# torch.device("cuda:1")


import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time

# 定义训练的设备
# device = torch.device("cpu")
# device = torch.device("cuda")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 准备数据集
train_data = torchvision.datasets.CIFAR10(root="dataset_transform", train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="dataset_transform", train=False, transform=torchvision.transforms.ToTensor(),
                                          download=True)
# length 长度 获取数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))

train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)


class Zrf(nn.Module):
    def __init__(self):
        super(Zrf, self).__init__()
        # Sequential 序列
        self.model = Sequential(
            Conv2d(3, 32, 5, 1, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, 1, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, 1, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x

# 创建网络模型
zrf = Zrf()
# -------------------利用GPU训练-------------------#
zrf.to(device)  # 可以不重新赋值
# zrf = zrf.to(device)

# 损失函数
loss_fn = nn.CrossEntropyLoss()
# -------------------利用GPU训练-------------------#
loss_fn.to(device) # 可以不重新赋值
# loss_fn = loss_fn.to(device)


# 优化器
learning_rate = 1e-2
optimizer = torch.optim.SGD(zrf.parameters(), lr=learning_rate)


# 设置训练网络的一些参数
total_train_step = 0
total_test_step = 0
epoch = 10

# 添加tensorboard
writer = SummaryWriter("../log_train")

start_time = time.time()

for i in range(epoch):
    print("--------第 {} 轮训练开始--------".format(i+1))

    # 训练步骤开始
    zrf.train()
    for data in train_dataloader:
        imgs, targets = data
        # -------------------利用GPU训练-------------------#
        # 必须重新赋值
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = zrf(imgs)
        loss = loss_fn(outputs, targets)
        # 优化器优化模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_step = total_train_step + 1
        if total_train_step % 100 == 0:
            end_time = time.time()
            print(end_time - start_time)
            print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)

    # 测试步骤开始
    zrf.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            # -------------------利用GPU训练-------------------#
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = zrf(imgs)
            loss = loss_fn(outputs, targets)
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy = total_accuracy + accuracy
            total_test_loss = total_test_loss + loss.item()
    print("整体测试集上的Loss:{}",format(total_test_loss))
    print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
    total_test_step = total_test_step + 1
    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)

    torch.save(zrf, "zrf_{}.pth".format(i))
    print("模型已保存")
writer.close()

利用GPU训练前一百次的时间: 4.680064678192139

没有GPU: 6.723153114318848

完整的模型验证套路

(测试、demo)利用已经训练好的模型,然后给他提供输入

相关推荐
kinl201831 分钟前
cs2385_note1 (lec6-lec8)
笔记
FatHonor42 分钟前
【golang学习之旅】使用VScode安装配置Go开发环境
vscode·学习·golang
Edward111111111 小时前
3月24 内部类
学习
我不是程序猿儿1 小时前
【嵌入式】适合 STM32 初学者BootLoader 入门学习心得
linux·stm32·单片机·嵌入式硬件·学习
中屹指纹浏览器2 小时前
2026住宅IP网络环境下指纹浏览器稳定性优化与工程实践
经验分享·笔记
液态不合群2 小时前
一文学习 Spring 声明式事务源码全流程总结
java·学习·spring
云边散步2 小时前
godot2D游戏教程系列二(20)
笔记·学习·音视频
CyanMind2 小时前
IsaacLab 训练范式探索(二):从“上帝视角”到实机落地的蒸馏学习
学习
吃个糖糖2 小时前
Open3D学习点云读取与显示
学习
DANGAOGAO2 小时前
Transformer学习
深度学习·学习·transformer