- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊
本周任务:
根据GAN、CGAN、SGAN及它们的框架图,写出ACGAN代码。
框架图
从图中可以看到,ACGAN的前半部分类似于CGAN,后半部分类似于SGAN,因此,代码前半部分模仿CGAN,后半部分模仿SGAN
配置代码
python
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from torchvision.utils import make_grid
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
import matplotlib.pyplot as plt
import datetime
import argparse
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 128
这里先定义基本常量,作用相当于parser = argparse.ArgumentParser()
python
class Args:
n_epochs = 200
batch_size = 64
lr = 0.0002
b1 = 0.5
b2 = 0.999
n_cpu = 8
latent_dim = 100
img_size = 128 # 看图像类型
channels = 3 # 看图像类型
sample_interval = 400
opt = Args()
print(opt)
python
train_transform = transforms.Compose([
transforms.Resize(128),
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])])
train_dataset = datasets.ImageFolder(root="F:/365data/G3/rps/", transform=train_transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=6)
python
def show_images(images):
fig, ax = plt.subplots(figsize=(20, 20))
ax.set_xticks([]); ax.set_yticks([])
ax.imshow(make_grid(images.detach(), nrow=22).permute(1, 2, 0))
def show_batch(dl):
for images, _ in dl:
show_images(images)
break
python
image_shape = (3, 128, 128)
image_dim = int(np.prod(image_shape))
latent_dim = 100
n_classes = 3
embedding_dim = 100
python
# 自定义权重初始化函数,用于初始化生成器和判别器的权重
def weights_init(m):
# 获取当前层的类名
classname = m.__class__.__name__
# 如果当前层是卷积层(类名中包含 'Conv' )
if classname.find('Conv') != -1:
# 使用正态分布随机初始化权重,均值为0,标准差为0.02
torch.nn.init.normal_(m.weight, 0.0, 0.02)
# 如果当前层是批归一化层(类名中包含 'BatchNorm' )
elif classname.find('BatchNorm') != -1:
# 使用正态分布随机初始化权重,均值为1,标准差为0.02
torch.nn.init.normal_(m.weight, 1.0, 0.02)
# 将偏置项初始化为全零
torch.nn.init.zeros_(m.bias)
python
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
# 定义条件标签的生成器部分,用于将标签映射到嵌入空间中
# n_classes:条件标签的总数
# embedding_dim:嵌入空间的维度
self.label_conditioned_generator = nn.Sequential(
nn.Embedding(n_classes, embedding_dim), # 使用Embedding层将条件标签映射为稠密向量
nn.Linear(embedding_dim, 16) # 使用线性层将稠密向量转换为更高维度
)
# 定义潜在向量的生成器部分,用于将噪声向量映射到图像空间中
# latent_dim:潜在向量的维度
self.latent = nn.Sequential(
nn.Linear(latent_dim, 4*4*512), # 使用线性层将潜在向量转换为更高维度
nn.LeakyReLU(0.2, inplace=True) # 使用LeakyReLU激活函数进行非线性映射
)
# 定义生成器的主要结构,将条件标签和潜在向量合并成生成的图像
self.model = nn.Sequential(
# 反卷积层1:将合并后的向量映射为64x8x8的特征图
nn.ConvTranspose2d(513, 64*8, 4, 2, 1, bias=False),
nn.BatchNorm2d(64*8, momentum=0.1, eps=0.8), # 批标准化
nn.ReLU(True), # ReLU激活函数
# 反卷积层2:将64x8x8的特征图映射为64x4x4的特征图
nn.ConvTranspose2d(64*8, 64*4, 4, 2, 1, bias=False),
nn.BatchNorm2d(64*4, momentum=0.1, eps=0.8),
nn.ReLU(True),
# 反卷积层3:将64x4x4的特征图映射为64x2x2的特征图
nn.ConvTranspose2d(64*4, 64*2, 4, 2, 1, bias=False),
nn.BatchNorm2d(64*2, momentum=0.1, eps=0.8),
nn.ReLU(True),
# 反卷积层4:将64x2x2的特征图映射为64x1x1的特征图
nn.ConvTranspose2d(64*2, 64*1, 4, 2, 1, bias=False),
nn.BatchNorm2d(64*1, momentum=0.1, eps=0.8),
nn.ReLU(True),
# 反卷积层5:将64x1x1的特征图映射为3x64x64的RGB图像
nn.ConvTranspose2d(64*1, 3, 4, 2, 1, bias=False),
nn.Tanh() # 使用Tanh激活函数将生成的图像像素值映射到[-1, 1]范围内
)
def forward(self, inputs):
noise_vector, label = inputs
# 通过条件标签生成器将标签映射为嵌入向量
label_output = self.label_conditioned_generator(label)
# 将嵌入向量的形状变为(batch_size, 1, 4, 4),以便与潜在向量进行合并
label_output = label_output.view(-1, 1, 4, 4)
# 通过潜在向量生成器将噪声向量映射为潜在向量
latent_output = self.latent(noise_vector)
# 将潜在向量的形状变为(batch_size, 512, 4, 4),以便与条件标签进行合并
latent_output = latent_output.view(-1, 512, 4, 4)
# 将条件标签和潜在向量在通道维度上进行合并,得到合并后的特征图
concat = torch.cat((latent_output, label_output), dim=1)
# 通过生成器的主要结构将合并后的特征图生成为RGB图像
image = self.model(concat)
return image
python
generator = Generator().to(device)
generator.apply(weights_init)
print(generator)
python
from torchinfo import summary
summary(generator)
python
=================================================================
Layer (type:depth-idx) Param #
=================================================================
Generator --
├─Sequential: 1-1 --
│ └─Embedding: 2-1 300
│ └─Linear: 2-2 1,616
├─Sequential: 1-2 --
│ └─Linear: 2-3 827,392
│ └─LeakyReLU: 2-4 --
├─Sequential: 1-3 --
│ └─ConvTranspose2d: 2-5 4,202,496
│ └─BatchNorm2d: 2-6 1,024
│ └─ReLU: 2-7 --
│ └─ConvTranspose2d: 2-8 2,097,152
│ └─BatchNorm2d: 2-9 512
│ └─ReLU: 2-10 --
│ └─ConvTranspose2d: 2-11 524,288
│ └─BatchNorm2d: 2-12 256
│ └─ReLU: 2-13 --
│ └─ConvTranspose2d: 2-14 131,072
│ └─BatchNorm2d: 2-15 128
│ └─ReLU: 2-16 --
│ └─ConvTranspose2d: 2-17 3,072
│ └─Tanh: 2-18 --
=================================================================
Total params: 7,789,308
Trainable params: 7,789,308
Non-trainable params: 0
=================================================================
以上基本上是CGAN代码的前半部分,到生成器代码为止,以下为SGAN代码的部分
python
class Discriminator(nn.Module):
def __init__(self, in_channels=3):
super(Discriminator, self).__init__()
def discriminator_block(in_filters, out_filters, bn=True):
"""Returns layers of each discriminator block"""
block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
if bn:
block.append(nn.BatchNorm2d(out_filters, 0.8))
return block
self.conv_blocks = nn.Sequential(
*discriminator_block(opt.channels, 16, bn=False),
*discriminator_block(16, 32),
*discriminator_block(32, 64),
*discriminator_block(64, 128),
)
# The height and width of downsampled image
ds_size = opt.img_size // 2 ** 4
# Output layers
self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())
self.aux_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, opt.num_classes + 1), nn.Softmax())
def forward(self, img):
out = self.conv_blocks(img)
out = out.view(out.shape[0], -1)
validity = self.adv_layer(out)
label = self.aux_layer(out)
return validity, label
python
discriminator = Discriminator().to(device)
discriminator.apply(weights_init)
print(discriminator)
python
summary(discriminator)
python
adversarial_loss = nn.BCELoss()
auxiliary_loss = torch.nn.CrossEntropyLoss()
def generator_loss(fake_output, label):
gen_loss = adversarial_loss(fake_output, label)
return gen_loss
def discriminator_loss(output, label):
disc_loss = adversarial_loss(output, label)
return disc_loss
cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
python
learning_rate = 0.0002
optimizer_G = optim.Adam(generator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
python
for epoch in range(opt.n_epochs):
for i, (imgs, labels) in enumerate(train_loader):
batch_size = imgs.shape[0]
# Adversarial ground truths
valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)
fake_aux_gt = Variable(LongTensor(batch_size).fill_(opt.num_classes), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(FloatTensor))
labels = Variable(labels.type(LongTensor))
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
# Sample noise and labels as generator input
z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, opt.latent_dim))))
# Generate a batch of images
gen_imgs = generator((z,labels))
# Loss measures generator's ability to fool the discriminator
validity, _ = discriminator(gen_imgs)
g_loss = adversarial_loss(validity, valid)
g_loss.backward()
optimizer_G.step()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Loss for real images
real_pred, real_aux = discriminator(real_imgs)
d_real_loss = (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels)) / 2
# Loss for fake images
fake_pred, fake_aux = discriminator(gen_imgs.detach())
d_fake_loss = (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, fake_aux_gt)) / 2
# Total discriminator loss
d_loss = (d_real_loss + d_fake_loss) / 2
# Calculate discriminator accuracy
pred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)
gt = np.concatenate([labels.data.cpu().numpy(), fake_aux_gt.data.cpu().numpy()], axis=0)
d_acc = np.mean(np.argmax(pred, axis=1) == gt)
d_loss.backward()
optimizer_D.step()
batches_done = epoch * len(train_loader) + i
if batches_done % opt.sample_interval == 0:
save_image(gen_imgs.data[:25], "images/%d.png" % batches_done, nrow=5, normalize=True)
print(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %d%%] [G loss: %f]"
% (epoch, opt.n_epochs, i, len(train_loader), d_loss.item(), 100 * d_acc, g_loss.item())
)
运行过程
python
e:\anaconda3\envs\PGPU\lib\site-packages\torch\nn\modules\container.py:139: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
input = module(input)
[Epoch 0/50] [Batch 19/20] [D loss: 1.455980, acc: 40%] [G loss: 0.490490]
[Epoch 1/50] [Batch 19/20] [D loss: 1.222127, acc: 72%] [G loss: 0.681366]
[Epoch 2/50] [Batch 19/20] [D loss: 1.224287, acc: 63%] [G loss: 0.916321]
[Epoch 3/50] [Batch 19/20] [D loss: 1.111225, acc: 70%] [G loss: 1.007028]
[Epoch 4/50] [Batch 19/20] [D loss: 1.184606, acc: 75%] [G loss: 0.696607]
[Epoch 5/50] [Batch 19/20] [D loss: 1.352154, acc: 55%] [G loss: 0.747507]
[Epoch 6/50] [Batch 19/20] [D loss: 1.403305, acc: 52%] [G loss: 0.869919]
[Epoch 7/50] [Batch 19/20] [D loss: 1.311451, acc: 50%] [G loss: 0.880048]
[Epoch 8/50] [Batch 19/20] [D loss: 1.413715, acc: 50%] [G loss: 0.674482]
[Epoch 9/50] [Batch 19/20] [D loss: 1.326531, acc: 54%] [G loss: 0.609503]
[Epoch 10/50] [Batch 19/20] [D loss: 1.449468, acc: 48%] [G loss: 0.620321]
[Epoch 11/50] [Batch 19/20] [D loss: 1.367987, acc: 53%] [G loss: 0.717428]
[Epoch 12/50] [Batch 19/20] [D loss: 1.286323, acc: 55%] [G loss: 0.748294]
[Epoch 13/50] [Batch 19/20] [D loss: 1.374772, acc: 51%] [G loss: 0.849943]
[Epoch 14/50] [Batch 19/20] [D loss: 1.303872, acc: 55%] [G loss: 0.887458]
[Epoch 15/50] [Batch 19/20] [D loss: 1.338245, acc: 59%] [G loss: 0.566128]
[Epoch 16/50] [Batch 19/20] [D loss: 1.386614, acc: 59%] [G loss: 0.737729]
[Epoch 17/50] [Batch 19/20] [D loss: 1.378518, acc: 55%] [G loss: 0.559435]
[Epoch 18/50] [Batch 19/20] [D loss: 1.421224, acc: 53%] [G loss: 0.639280]
[Epoch 19/50] [Batch 19/20] [D loss: 1.314460, acc: 54%] [G loss: 0.695454]
[Epoch 20/50] [Batch 19/20] [D loss: 1.279016, acc: 56%] [G loss: 0.810150]
[Epoch 21/50] [Batch 19/20] [D loss: 1.364004, acc: 53%] [G loss: 0.736294]
[Epoch 22/50] [Batch 19/20] [D loss: 1.364638, acc: 52%] [G loss: 0.990328]
[Epoch 23/50] [Batch 19/20] [D loss: 1.322828, acc: 53%] [G loss: 0.731904]
[Epoch 24/50] [Batch 19/20] [D loss: 1.317570, acc: 50%] [G loss: 0.839391]
[Epoch 25/50] [Batch 19/20] [D loss: 1.330042, acc: 55%] [G loss: 0.755845]
[Epoch 26/50] [Batch 19/20] [D loss: 1.354234, acc: 55%] [G loss: 0.652750]
[Epoch 27/50] [Batch 19/20] [D loss: 1.383858, acc: 55%] [G loss: 0.677340]
[Epoch 28/50] [Batch 19/20] [D loss: 1.384538, acc: 52%] [G loss: 0.621817]
[Epoch 29/50] [Batch 19/20] [D loss: 1.314232, acc: 54%] [G loss: 0.783550]
[Epoch 30/50] [Batch 19/20] [D loss: 1.328900, acc: 54%] [G loss: 0.709978]
[Epoch 31/50] [Batch 19/20] [D loss: 1.326728, acc: 54%] [G loss: 0.804180]
[Epoch 32/50] [Batch 19/20] [D loss: 1.346232, acc: 52%] [G loss: 0.775322]
[Epoch 33/50] [Batch 19/20] [D loss: 1.290386, acc: 56%] [G loss: 0.939839]
[Epoch 34/50] [Batch 19/20] [D loss: 1.395943, acc: 50%] [G loss: 0.582599]
[Epoch 35/50] [Batch 19/20] [D loss: 1.394045, acc: 52%] [G loss: 0.716685]
[Epoch 36/50] [Batch 19/20] [D loss: 1.391289, acc: 51%] [G loss: 0.747493]
[Epoch 37/50] [Batch 19/20] [D loss: 1.369082, acc: 50%] [G loss: 0.719075]
[Epoch 38/50] [Batch 19/20] [D loss: 1.401712, acc: 53%] [G loss: 0.645679]
[Epoch 39/50] [Batch 19/20] [D loss: 1.279735, acc: 57%] [G loss: 0.710965]
[Epoch 40/50] [Batch 19/20] [D loss: 1.363157, acc: 56%] [G loss: 0.589386]
[Epoch 41/50] [Batch 19/20] [D loss: 1.334075, acc: 53%] [G loss: 0.774654]
[Epoch 42/50] [Batch 19/20] [D loss: 1.358592, acc: 51%] [G loss: 0.726460]
[Epoch 43/50] [Batch 19/20] [D loss: 1.389814, acc: 50%] [G loss: 0.703020]
[Epoch 44/50] [Batch 19/20] [D loss: 1.363462, acc: 53%] [G loss: 0.691942]
[Epoch 45/50] [Batch 19/20] [D loss: 1.362092, acc: 55%] [G loss: 0.727146]
[Epoch 46/50] [Batch 19/20] [D loss: 1.360469, acc: 53%] [G loss: 0.696875]
[Epoch 47/50] [Batch 19/20] [D loss: 1.385563, acc: 52%] [G loss: 0.661834]
[Epoch 48/50] [Batch 19/20] [D loss: 1.376729, acc: 50%] [G loss: 0.753325]
[Epoch 49/50] [Batch 19/20] [D loss: 1.370506, acc: 51%] [G loss: 0.687326]
总结
- 从框架图上看,ACGAN就是由CGAN和SGAN结合而来
- 因此结合的代码也可以成功运行
- 所以,ACGAN的输入为随机噪声z+条件信息C,而最终输出为真(真1、真2...) 或假也就是SGAN中的分类器功能