深度学习入门：softmax回归的总结

1.加载数据集

python 复制代码

import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l

d2l.use_svg_display()

# 定义获取工作进程数的函数
def get_dataloader_workers():
    return 4

# 定义加载数据的函数
def load_data_fashion_mnist(batch_size, resize=None):
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    
    mnist_train = torchvision.datasets.FashionMNIST(
        root='./data', train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root='./data', train=False, transform=trans, download=True)
    
    return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers()),
            data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=get_dataloader_workers()))

# 调用自己的函数
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

# 测试形状
print(next(iter(train_iter))[0].shape)

2.定义权重和偏置

python 复制代码

num_inputs = 784
num_outputs = 10
W = torch.normal(0,0.01,size=(num_inputs,num_outputs),requires_grad=True)
b = torch.zeros(num_outputs,requires_grad=True)

3.对特征值矩阵进行纵向压缩和横向压缩

python 复制代码

X = torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
X.sum(0,keepdim = True),X.sum(1,keepdim=True)

4.定义softmax函数，获得概率

python 复制代码

def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1,keepdim = True)
    return X_exp/partition

5.计算特征值矩阵的概率

python 复制代码

X = torch.normal(0,1,(2,5))
X_prob = softmax(X)
X_prob,X_prob.sum(1)

6.定义net函数，把特征值矩阵的行数变成等于权重矩阵的列数，便于点乘，计算出y=xw+b的值，即y的预测值,计算其概率

python 复制代码

def net(X):
    return softmax(torch.matmul(X.reshape((-1,W.shape[0])),W)+b)

7.交叉熵损失函数，让概率大的损失越小，概率越小损失越大

python 复制代码

def cross_entropy(y_hat,y):
    return -torch.log(y_hat[range(len(y_hat)),y])
cross_entropy(y_hat,y)

8.定义accuracy函数，返回预测正确的个数

python 复制代码

def accuracy(y_hat,y):
    if len(y_hat.shape)>1 and y_hat.shape[1]>1:
        y_hat = y_hat.argmax(axis=1)                #沿着列方向找到最大值的位置
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())
accuracy(y_hat,y)/len(y)

9.定义evaluate_accuracy函数，返回分类正确的准确率

python 复制代码

def evaluate_accuracy(net,data_iter):
    if isinstance(net,torch.nn.Module):
        net.eval()                         #切换到评估模式
    metric = Accumulator(2)            #一个可以累加两个值的容器，metric[0]累计分类正确的样本数，metric[1]累计总样本数
    for X,y in data_iter:
        metric.add(accuracy(net(X),y),y.numel())      #net[X]预测结果,accuracy(net(X),y)这个批次里分类正确的个数,y.numel这个批次总共多少张图
    return metric[0]/metric[1]                    #分类正确的样本数/总样本数

10.train_epoch_ch3函数，返回平均损失和训练准确率

python 复制代码

def train_epoch_ch3(net,train_iter,loss,updater):
    if isinstance(net,torch.nn.Module):
        net.train()
    metric = Accumulator(3)         #累加3个数：损失总和，正确数，样本数
    for X,y in train_iter:
        y_hat = net(X)
        l = loss(y_hat,y)
        if isinstance(updater,torch.optim.Optimizer):
            updater.zero_grad()              #梯度清零
            l.backward()                  #反向传播，计算梯度
            updater.step()                 #更新梯度
            metric.add(
                float(l) * len(y),accuracy(y_hat,y),   #损失总和，正确个数，本批样本数
                y.size().numel())
        else:
            l.sum().backward()
            updater(X.shape[0])
            metric.add(float(l.sum()),accuracy(y_hat,y),y.numel())
    return metric[0]/metric[2],metric[1]/metric[2]                     #总损失/总样本数=平均损失，总正确数/总样本数=训练准确率

11.train_ch3函数，将训练损失，训练准确率和测试准确率画到图上

python 复制代码

def train_ch3(net,train_iter,test_iter,loss,num_epochs,updater):
    animator = Animator(xlabel='epoch',xlim=[1,num_epochs],ylim=[0.3,0.9],
                        legend=['train loss','train acc','test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net,train_iter,loss,updater)
        test_acc = evaluate_accuracy(net,test_iter)                     #在测试集上算准确率
        animator.add(epoch+1,train_metrics+ (test_acc,))                   #把三个值画到图上
    train_loss,train_acc = train_metrics

12.predict_ch3函数，把预测的数字转化为文本标签，拼接在真实标签的下一行

python 复制代码

def predict_ch3(net,test_iter,n=6):
    for X,y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)           #把数字标签转化为文字
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))      #获取预测标签
    titles = [true + '\n' + pred for true,pred in zip(trues,preds)]      #拼接标题：真实标签\n预测标签
    d2l.show_images(X[0:n].reshape((n,28,28)),1,n,titles=titles[0:n])        #只显示前n张图片，1行n列
predict_ch3(net,test_iter)