深度学习:从图片数据到模型训练(十分类)

环境准备

首先,我们需要安装并导入所需的库:

复制代码
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from collections import Counter

接着,定义一些超参数:

复制代码
BATCHSIZE=100
DOWNLOAD_MNIST=False
EPOCHS=20
LR=0.001

数据加载与预处理

我们使用CIFAR-10数据集,并进行预处理:

复制代码
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

模型定义

我们定义了三个模型:CNNNet、LeNet和VGG。

复制代码
class CNNNet(nn.Module):
    def __init__(self):
        super(CNNNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=36, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1296, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x=self.pool1(F.relu(self.conv1(x)))
        x=self.pool2(F.relu(self.conv2(x)))
        x=x.view(-1, 36*6*6)
        x=F.relu(self.fc2(F.relu(self.fc1(x))))
        return x

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1  = nn.Linear(16*5*5, 120)
        self.fc2  = nn.Linear(120, 84)
        self.fc3  = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 36, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.aap=nn.AdaptiveAvgPool2d(1)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(36, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.aap(x)
        x = x.view(x.shape[0], -1)
        x = self.fc3(x)
        return x

cfg = {
    'VGG16':[64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19':[64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 'M', 512, 512, 'M'],
}

class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == "M":
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                            nn.BatchNorm2d(x),
                            nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

模型训练与评估

我们将三个模型放在一个列表中,然后进行训练和评估。

复制代码
# 把3个网络模型放在一个列表里
nlps=[net1.to(device),net2.to(device),net3.to(device)]

optimizer=torch.optim.Adam([{"params":nlp.parameters()} for nlp in nlps],lr=LR)

loss_function=nn.CrossEntropyLoss()

for ep in range(EPOCHS):
    for img,label in trainloader:
        img,label=img.to(device),label.to(device)
        optimizer.zero_grad()#10个网络清除梯度
        for nlp in nlps:
            nlp.train()
            out=nlp(img)
            loss=loss_function(out,label)
            loss.backward()#网络们获得梯度
            optimizer.step()

pre=[]
vote_correct=[0 for i in range(len(nlps))]
for img,label in testloader:
    img,label=img.to(device),label.to(device)
    for i, mlp in enumerate(nlps):
        mlp.eval()
        out=mlp(img)
        _,prediction=torch.max(out,1) #按行取最大值
        pre_num=prediction.cpu().numpy()
        nlps_correct[i]+=(pre_num==label.cpu().numpy()).sum()
        pre.append(pre_num)
    arr=np.array(pre)
    pre.clear()
    result=[Counter(arr[:,i].most_common(1)[0][0] for i in range(BATCHSIZE)]
    vote_correct=(result == label.cpu().numpy()).sum()
    print("epoch:" + str(ep)+"集成模型的正确率"+str(vote_correct/len(testloader)))

模型集成

在测试阶段,我们对每个模型的预测结果进行投票,选择出现次数最多的类别作为最终预测结果。

复制代码
for idx, correct in enumerate(nlps_correct):
    print("模型"+str(idx)+"的正确率为:"+str(correct/len(testloader)))

通过模型集成,我们可以看到VGG16模型在20次迭代中的正确率从58.39%提升到89.24%,显示了模型集成在提升分类性能方面的有效性。

相关推荐
递归不收敛5 小时前
大语言模型(LLM)入门笔记:嵌入向量与位置信息
人工智能·笔记·语言模型
之墨_5 小时前
【大语言模型】—— 自注意力机制及其变体(交叉注意力、因果注意力、多头注意力)的代码实现
人工智能·语言模型·自然语言处理
2301_821919926 小时前
深度学习(四)
pytorch·深度学习
从孑开始6 小时前
ManySpeech.MoonshineAsr 使用指南
人工智能·ai·c#·.net·私有化部署·语音识别·onnx·asr·moonshine
涛涛讲AI6 小时前
一段音频多段字幕,让音频能够流畅自然对应字幕 AI生成视频,扣子生成剪映视频草稿
人工智能·音视频·语音识别
可触的未来,发芽的智生6 小时前
新奇特:黑猫警长的纳米世界,忆阻器与神经网络的智慧
javascript·人工智能·python·神经网络·架构
WWZZ20257 小时前
快速上手大模型:机器学习2(一元线性回归、代价函数、梯度下降法)
人工智能·算法·机器学习·计算机视觉·机器人·大模型·slam
孤狼灬笑7 小时前
深度学习经典分类(算法分析与案例)
rnn·深度学习·算法·cnn·生成模型·fnn
AKAMAI7 小时前
数据孤岛破局之战 :跨业务分析的难题攻坚
运维·人工智能·云计算