LeNet网络的实现
python
import torch
from torch import nn
from d2l import torch as d2l
x = 28
net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * (x/4 - 2) * (x/4 - 2), 120), nn.Sigmoid(),
nn.Linear(120, 84), nn.Sigmoid(),
nn.Linear(84, 10))
输入图像是单通道 x*x大小
- 卷积层。
输入一个通道,输出六个通道,卷积核大小5*5,填充2,步幅1,因此输出图像大小不变。 - 平均汇聚层。
核大小2*2,步幅2,因此输出图像大小减半。(x/2)(x/2) - 卷积层。
输入6通道,输出16通道,核大小5,输出图像大小减4.(x/2-4) (x/2 - 4) - 平均汇聚层。
核大小2*2,步幅2,输出大小减半。(x/4-2)(x/4-2) - 全连接层。
输入大小: 16 * (x/4 - 2) * (x/4 - 2)
输出大小: 10
测试函数
python
def evaluate_accuracy_gpu(net , data_iter,device=None):
if isinstance(net , nn.Module):
net.eval()
if not device:
# 获取第一个参数所在的设备,把以后的数据放在同一个设备上
device = next(iter(net.parameters())).device
metric = d2l.Accumulator(2)
with torch.no_grad():
for X , y in data_iter:
if isinstance(X , list):
X = [x.to(device) for x in X]
else:
X = X.to(device)
y = y.to(device)
metric.add(d2l.accuracy(net(X),y) , y.numel())
return metric[0] / metric[1]
训练和测试
python
def train_ch6(net , train_iter , test_iter, num_epochs , lr ,device):
# 初始化权重
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
print( ' training on ' , device)
net.to(device)
# 优化器
optimizer = torch.optim.SGD(net.parameters(), lr)
# 损失函数
loss = nn.CrossEntropyLoss()
animator = d2l.Animator(xlabel='epoch',xlim=[1 , num_epochs],
legend=['train loss','train acc','test acc'])
timer , num_batches = d2l.Timer() , len(train_iter)
for epoch in range(num_epochs):
metric = d2l.Accumulator(3)
net.train()
for i , (X, y ) in enumerate(train_iter):
timer.start()
optimizer.zero_grad()
X , y = X.to(device) , y.to(device)
y_hat = net(X)
l = loss(y_hat , y)
l.backward()
optimizer.step()
with torch.no_grad():
metric.add(l*X.shape[0] , d2l.accuracy(y_hat , y) , X.shape[0])
timer.stop()
train_l = metric[0] / metric[2]
train_acc = metric[1] / metric[2]
if (i+1) % (num_batches //5) ==0 or i ==num_batches - 1:
animator.add(epoch + (i+1) /num_batches,
(train_l , train_acc , None))
test_acc = evaluate_accuracy_gpu(net , test_iter)
animator.add(epoch+1 ,(None , None , test_acc))
print(f'loss {train_l:.3f},train_acc {train_acc:.3f} , '
f'test_acc{test_acc:.3f}')
print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec'
f'on {str(device)}')