DCGAN
This code defines a Deep Convolutional Generative Adversarial Network (DCGAN) for generating images of cars.
cpp
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torchvision import datasets
from torchvision import transforms
from torchvision.utils import save_image
import numpy as np
import datetime
import os, sys
import glob
from PIL import Image
from matplotlib.pyplot import imshow, imsave
%matplotlib inline
MODEL_NAME = 'DCGAN'
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
IMAGE_DIM = (32, 32, 3)
def get_sample_image(G, n_noise):
"""
save sample 100 images
"""
z = torch.randn(10, n_noise).to(DEVICE)
y_hat = G(z).view(10, 3, 28, 28).permute(0, 2, 3, 1) # (100, 28, 28)
result = (y_hat.detach().cpu().numpy()+1)/2.
class Discriminator(nn.Module):
"""
Convolutional Discriminator for MNIST
"""
def __init__(self, in_channel=1, num_classes=1):
super(Discriminator, self).__init__()
self.conv = nn.Sequential(
# 28 -> 14
nn.Conv2d(in_channel, 512, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2),
# 14 -> 7
nn.Conv2d(512, 256, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2),
# 7 -> 4
nn.Conv2d(256, 128, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2),
#
nn.Conv2d(128, 128, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2),
nn.AdaptiveAvgPool2d(1),
)
self.fc = nn.Sequential(
# reshape input, 128 -> 1
nn.Linear(128, 1),
nn.Sigmoid(),
)
def forward(self, x, y=None):
y_ = self.conv(x)
y_ = y_.view(y_.size(0), -1)
y_ = self.fc(y_)
return y_
return result
class Generator(nn.Module):
"""
Convolutional Generator for MNIST
"""
def __init__(self, out_channel=1, input_size=100, num_classes=784):
super(Generator, self).__init__()
assert IMAGE_DIM[0] % 2**4 == 0, 'Should be divided 16'
self.init_dim = (IMAGE_DIM[0] // 2**4, IMAGE_DIM[1] // 2**4)
self.fc = nn.Sequential(
nn.Linear(input_size, self.init_dim[0]*self.init_dim[1]*512),
nn.ReLU(),
)
self.conv = nn.Sequential(
nn.Conv2d(512, 512, 3, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(),
# x2
nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(),
# x2
nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(),
# x2
nn.ConvTranspose2d(128, 128, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(),
# x2
nn.ConvTranspose2d(128, out_channel, 4, stride=2, padding=1, bias=False),
nn.Tanh(),
)
def forward(self, x, y=None):
x = x.view(x.size(0), -1)
y_ = self.fc(x)
y_ = y_.view(y_.size(0), 512, self.init_dim[0], self.init_dim[1])
y_ = self.conv(y_)
return y_
class CARS(Dataset):
'''
CARS Dataset
You should download this dataset from below url.
url: https://ai.stanford.edu/~jkrause/cars/car_dataset.html
'''
def __init__(self, data_path, transform=None):
'''
Args:
data_path (str): path to dataset
'''
self.data_path = data_path
self.transform = transform
self.fpaths = sorted(glob.glob(os.path.join(data_path, '*.jpg')))
gray_lst = [266, 1085, 2176, 3048, 3439, 3469, 3539, 4577, 4848, 5177, 5502, 5713, 6947, 7383, 7693, 7774, 8137, 8144]
for num in gray_lst:
self.fpaths.remove(os.path.join(data_path, '{:05d}.jpg'.format(num)))
def __getitem__(self, idx):
img = self.transform(Image.open(self.fpaths[idx]))
return img
def __len__(self):
return len(self.fpaths)
D = Discriminator(in_channel=IMAGE_DIM[-1]).to(DEVICE)
G = Generator(out_channel=IMAGE_DIM[-1]).to(DEVICE)
# D.load_state_dict('D_dc.pkl')
# G.load_state_dict('G_dc.pkl')
transform = transforms.Compose([transforms.Resize((IMAGE_DIM[0],IMAGE_DIM[1])),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5),
std=(0.5, 0.5, 0.5))
]
)
dataset = CARS(data_path='/home/yangyangii/git/cars_train', transform=transform)
逐行解释代码
cpp
这段代码定义了一个DCGAN的判别器模型和一个获取样本图像的函数。
`MODEL_NAME`是DCGAN模型的名称,`DEVICE`是判断是否使用cuda的设备。`IMAGE_DIM`是图像的维度。
`get_sample_image`函数用于保存100个样本图像。首先使用正态分布生成一个大小为`n_noise`的随机向量`z`,并将其发送到设备上。然后将随机向量通过生成器`G`得到生成的图像`y_hat`,并将其reshape为(10, 3, 28, 28)的形状,并按照维度顺序重新排列为(10, 28, 28, 3)。最后返回将生成的图像转换为numpy数组并进行归一化处理的结果。
`Discriminator`类是一个用于MNIST数据集的卷积判别器模型。该模型包含几个卷积层和全连接层。卷积层部分使用了`nn.Conv2d`进行卷积操作,`nn.BatchNorm2d`进行批归一化操作,`nn.LeakyReLU`进行LeakyReLU激活操作,`nn.AdaptiveAvgPool2d`进行自适应平均池化操作。全连接层部分使用了`nn.Linear`进行线性变换操作,`nn.Sigmoid`进行Sigmoid激活操作。
`forward`函数是判别器模型的前向传播方法。输入`x`经过卷积层和reshape操作后,经过全连接层得到输出`y_`。若有标签`y`,则在最后通过softmax函数将输出映射到[0,1]之间;若无标签,则直接返回输出结果。
最后返回结果。