文章目录
- [1 卷积](#1 卷积)
- [2 反/逆卷积](#2 反/逆卷积)
- [3 MaxUnpool / ConvTranspose](#3 MaxUnpool / ConvTranspose)
- [4 encoder-decoder](#4 encoder-decoder)
- [5 可视化](#5 可视化)
学习参考来自:
1 卷积
输入
卷积核
步长为 1,卷起来形式如下
输出的每个结果和输入的 9 个数值有关系
更直观的写成如下展开的矩阵乘形式
填零和 stride 与 kernel size 有关
2 反/逆卷积
相比逆卷积 (Deconvolution),转置卷积 (Transposed Convolution) 是一个更为合适的叫法
上述过程反过来,输入的一个数值与输出的 9 个数值有关
把原来的 W W W 转置一下即可实现该功能,当然转置后的 W W W 也是需要去学习更新的
矩阵乘可以看到,输入的每个值影响到了输出的 9 个值
3 MaxUnpool / ConvTranspose
搞个代码简单的看看效果
py
"maxpool"
m = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, return_indices=True)
input_data = torch.tensor([[[
[1, 2, 8, 7],
[3, 4, 6, 5],
[9, 10, 16, 15],
[13, 14, 12, 11]
]]], dtype=torch.float32)
print(input_data.shape) # torch.Size([1, 1, 4, 4])
out, indices = m(input_data)
print(out, "\n", indices)
output
py
tensor([[[[ 4., 8.],
[14., 16.]]]])
tensor([[[[ 5, 2],
[13, 10]]]])
py
"maxuppooling"
n = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=0)
out = n(out, indices, output_size=input_data.size())
print(out)
output
py
tensor([[[[ 0., 0., 8., 0.],
[ 0., 4., 0., 0.],
[ 0., 0., 16., 0.],
[ 0., 14., 0., 0.]]]])
在使用 MaxUnpool 的时候要特别注意, 需要在 maxpool 的时候保存 indices. 否则会报错
下面看看其在网络中的简单应用
py
import torch.nn as nn
import torch
"MaxUnpool"
class ConvDAE(nn.Module):
def __init__(self):
super().__init__()
# input: batch x 3 x 32 x 32 -> output: batch x 16 x 16 x 16
self.encoder = nn.Sequential(
nn.Conv2d(3, 16, 3, stride=1, padding=1), # batch x 16 x 32 x 32
nn.ReLU(),
nn.BatchNorm2d(16),
nn.MaxPool2d(2, stride=2, return_indices=True)
)
self.unpool = nn.MaxUnpool2d(2, stride=2, padding=0)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
nn.BatchNorm2d(16),
nn.ConvTranspose2d(16, 3, 3, stride=1, padding=1, output_padding=0),
nn.ReLU()
)
def forward(self, x):
out, indices = self.encoder(x) # torch.Size([1, 16, 16, 16])
out = self.unpool(out, indices) # torch.Size([1, 16, 32, 32])
out = self.decoder(out) # torch.Size([1, 3, 64, 64])
return out
if __name__ == "__main__":
DAE = ConvDAE()
x = torch.randn((1, 3, 32, 32))
DAE(x)
网络结构比较简单,encoder 降低图片分辨率至 1/2,通道数不变
unpool 反 max pooling 恢复图片分辨率
decoder 反卷积提升图片分辨率
4 encoder-decoder
再看一个稍微复杂的 encoder-decoder 结构
py
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
# -------
# encode
# -------
self.encode1 = nn.Sequential(
# 第一层
nn.Conv1d(kernel_size=25, in_channels=1, out_channels=32, stride=1, padding=12), # (1,784)->(32,784)
nn.BatchNorm1d(32), # 加上BN的结果
nn.ReLU(),
nn.MaxPool1d(kernel_size=3, stride=3, padding=1, return_indices=True), # (32,784)->(32,262)
)
self.encode2 = nn.Sequential(
# 第二层
nn.Conv1d(kernel_size=25, in_channels=32, out_channels=64, stride=1, padding=12), # (32,262)->(64,262)
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(kernel_size=3, stride=3, padding=1, return_indices=True), # (batchsize,64,262)->(batchsize,64,88)
)
self.encode3 = nn.Sequential(
nn.Linear(in_features=88*64, out_features=1024),
nn.Linear(in_features=1024, out_features=30)
)
# -------
# decode
# -------
self.unpooling1 = nn.MaxUnpool1d(kernel_size=3, stride=3, padding=1) # (batchsize,64,262)<-(batchsize,64,88)
self.unpooling2 = nn.MaxUnpool1d(kernel_size=3, stride=3, padding=1) # (32,784)<-(32,262)
self.decode1 = nn.Sequential(
# 第一层
nn.ReLU(),
nn.BatchNorm1d(64),
nn.ConvTranspose1d(kernel_size=25, in_channels=64, out_channels=32, stride=1, padding=12), # (32,262)<-(64,262)
)
# 第二层
self.decode2 = nn.Sequential(
nn.ReLU(),
nn.BatchNorm1d(32), # 加上BN的结果
nn.ConvTranspose1d(kernel_size=25, in_channels=32, out_channels=1, stride=1, padding=12), # (1,784)<-(32,784)
)
self.decode3 = nn.Sequential(
nn.Linear(in_features=30, out_features=1024),
nn.Linear(in_features=1024, out_features=88*64)
)
def forward(self, x):
# encode
x = x.view(x.size(0),1,-1) # 将图片摊平 torch.Size([1, 1, 784])
x,indices1 = self.encode1(x) # 卷积层 torch.Size([1, 32, 262])
x,indices2 = self.encode2(x) # 卷积层 torch.Size([1, 64, 88])
x = x.view(x.size(0), -1) # 展开 torch.Size([1, 5632])
x = self.encode3(x) # 全连接层 torch.Size([1, 30])
# decode
x = self.decode3(x) # torch.Size([1, 5632])
x = x.view(x.size(0), 64, 88) # torch.Size([1, 64, 88])
x = self.unpooling1(x, indices2) # torch.Size([1, 64, 262])
x = self.decode1(x) # torch.Size([1, 32, 262])
x = self.unpooling2(x, indices1) # torch.Size([1, 32, 784])
x = self.decode2(x) # torch.Size([1, 1, 784])
return x
if __name__ == "__main__":
x = torch.randn((1, 1, 28, 28))
autoencoder = autoencoder()
autoencoder(x)
结构草图如下所示
主要展示的是 nn.ConvTranspose
与 nn.MaxUnpool
的运用,nn.MaxUnpool
要记得 indices
应用主要是 1d,2d 同理可以拓展
5 可视化
简单的实验,输入 MNIST 原始图片,conv+max pooling 下采样,maxunpooling+transposed conv 回原图,看看效果
载入相关库,载入数据集
py
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import cv2
import matplotlib.pyplot as plt
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
num_epochs = 5
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='./',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
图像可视化的前期工作
py
def imshow(img):
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
搭建神经网络,及其初始化
py
# 搭建网络
class CNNMNIST(nn.Module):
def __init__(self):
super(CNNMNIST,self).__init__()
self.conv1 = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,stride=1,padding=0)
self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2,padding=0,return_indices=True)
self.unpool1 = nn.MaxUnpool2d(kernel_size=2,stride=2,padding=0)
self.unconv1 = nn.ConvTranspose2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=0)
def forward(self,x):
# encode
out = self.conv1(x) # torch.Size([100, 1, 26, 26])
out,indices = self.pool1(out) # torch.Size([100, 1, 13, 13])
# deocde
out = self.unpool1(out,indices,output_size=out1.size()) # torch.Size([100, 1, 26, 26])
out = self.unconv1(out) # torch.Size([100, 1, 28, 28])
return out
# 网络的初始化
model = CNNMNIST().to(device)
print(model)
output
py
CNNMNIST(
(conv1): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(unpool1): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))
(unconv1): ConvTranspose2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
)
网络训练与保存
py
# 定义优化器和损失函数
criterion = nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# 进行训练
model.train()
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, images)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
# 计算Loss
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format
(epoch+1, num_epochs, i+1, total_step, loss.item()))
"save model"
torch.save(model, "model.pkl")
output
py
Epoch [1/5], Step [100/600], Loss: 0.0764
Epoch [1/5], Step [200/600], Loss: 0.0549
Epoch [1/5], Step [300/600], Loss: 0.0457
Epoch [1/5], Step [400/600], Loss: 0.0468
Epoch [1/5], Step [500/600], Loss: 0.0443
Epoch [1/5], Step [600/600], Loss: 0.0452
Epoch [2/5], Step [100/600], Loss: 0.0445
Epoch [2/5], Step [200/600], Loss: 0.0427
Epoch [2/5], Step [300/600], Loss: 0.0407
Epoch [2/5], Step [400/600], Loss: 0.0432
Epoch [2/5], Step [500/600], Loss: 0.0414
Epoch [2/5], Step [600/600], Loss: 0.0413
Epoch [3/5], Step [100/600], Loss: 0.0415
Epoch [3/5], Step [200/600], Loss: 0.0420
Epoch [3/5], Step [300/600], Loss: 0.0425
Epoch [3/5], Step [400/600], Loss: 0.0413
Epoch [3/5], Step [500/600], Loss: 0.0416
Epoch [3/5], Step [600/600], Loss: 0.0414
Epoch [4/5], Step [100/600], Loss: 0.0401
Epoch [4/5], Step [200/600], Loss: 0.0409
Epoch [4/5], Step [300/600], Loss: 0.0418
Epoch [4/5], Step [400/600], Loss: 0.0412
Epoch [4/5], Step [500/600], Loss: 0.0407
Epoch [4/5], Step [600/600], Loss: 0.0405
Epoch [5/5], Step [100/600], Loss: 0.0411
Epoch [5/5], Step [200/600], Loss: 0.0412
Epoch [5/5], Step [300/600], Loss: 0.0406
Epoch [5/5], Step [400/600], Loss: 0.0407
Epoch [5/5], Step [500/600], Loss: 0.0409
Epoch [5/5], Step [600/600], Loss: 0.0401
模型载入,可视化结果
py
"load model"
model = torch.load("model.pkl")
"visual"
dataiter = iter(train_loader)
images, lables = dataiter.next()
imshow(torchvision.utils.make_grid(images, nrow=10))
plt.show()
images = images.to(device)
# Forward pass
outputs = model(images)
imshow(torchvision.utils.make_grid(outputs.cpu().squeeze(0), nrow=10))
plt.show()
MNIST 多图的可视化,可以借鉴借鉴,核心代码为 torchvision.utils.make_grid
部分输入
部分输出
换成纯卷积的失真率更少
py
class CNNMNIST(nn.Module):
def __init__(self):
super(CNNMNIST,self).__init__()
self.conv1 = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,stride=1,padding=0)
self.conv2 = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=2,stride=2,padding=0)
self.unconv1 = nn.ConvTranspose2d(in_channels=1, out_channels=1, kernel_size=2, stride=2, padding=0)
self.unconv2 = nn.ConvTranspose2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=0)
def forward(self,x):
# encode
out = self.conv1(x) # torch.Size([100, 1, 26, 26])
out = self.conv2(out) # torch.Size([100, 1, 13, 13])
# deocde
out = self.unconv1(out) # torch.Size([100, 1, 26, 26])
out = self.unconv2(out) # torch.Size([100, 1, 28, 28])
return out
输入
输出