INTRODUCTION
GANs有一个非常大的问题,就是训练的结果非常不稳定,很有可能训练出来的模型生成的是一个乱七八糟的东西。
GANs have been known to be unstable to train, often resulting in generators that produce nonsensical outputs
这篇论文的贡献
- 提出并评估了一系列的在卷积对抗生成神经网络(Convolutional GANs)上的限制。为了使得他们能更稳定地训练。我们称这种网络为深度卷积神经网络(DCGANs)
We propose and evaluate a set of constraintson the architectural topology of Convolutional GANs that make them stable to train in most settings. We name this class of architectures Deep Convolutional GANs (DCGAN)
- 我们使用训练好的判别器来做图片的分类任务,相比去其他的非监督学习算法更具有竞争力的效果。
We use the trained discriminators for image classification tasks, showing competitive performance with other unsupervised algorithms.
- 我们设想这个过滤器(filter)通过GANs来学习。并且,从经验上表现出,这个过滤器有学习到如何去画一个特定的物体。
We visualize the filters learnt by GANs and empirically show that specific filters have learned to draw specific objects
- 我们展示了这个生成器有非常有趣的向量计算属性,允许简单的在很多有语义的生成样本进行操作。
Weshowthatthegeneratorshaveinterestingvectorarithmeticpropertiesallowingforeasy manipulation of many semantic qualities of generated samples.
Approach and Model Architecture
历史上尝试去增强GANs通过CNNs去建模这个图片可以说是非常不成功。
这个想法从LAPGANs的作者去发展
我们也遇到了很多困难,在尝试用CNN的结构来提高GANs的过程中。
然而,在扩展了这个模型探索之后,我们认识到了一族结构。这种结构,使得可以在一些数据集上的稳定地训练结果。训练得到更好的结果,并且更深的生成模型。
- 首先是,用给定步长的卷积的卷积网络代替所有的确定空间池化函数,允许它学习它自己空间的样本生成器和判别器。
- 第二是,在卷积特征的顶部,向着排除全相连的层的趋势。比较好的一个结果手就是用全局的平均池化优化了图的分类模型。但是会发现,虽然会增加稳定性,但是会降低收敛的速度。适中的方法是: GANs第一层,是一个高斯噪声的分布Z,作为输入,但是结果需要被reshape转成一个4维的矩阵。用来作为卷积栈的开始工作。
- 第二中,所对应图交待了生成器的过程。提到了,这被称之为(错误的叫法,逆卷积的过程),(正确的叫法,分数步长的卷积,fractional-strided convolutions)。判别器,的话最后一个卷积层的结果需要给sigmoid函数。
- 第三,Batch Nuormalization,使得学习通过正则化之后的输入有0的均值和1的方差的正态分布。这是为了避免不好的初始化问题,然后让梯度进入到更深层次的模型中。但是需要避免batchnorm应用到生成器的输出层和判别器的输出层。直接的将batchnorm应用到所有层,会导致震荡或者是模型的不稳定。
- ReLU应用在生成器,但是最后一个层需要用Tanh函数。我们观察到,使用绑定的激活函数让模型更快地使得训练中的分布的颜色空间饱和。
- 在判别器上,我们发现LeakyReLU表现的更好。这篇文章基于原始的GANs,但是做了下面的这些修改
具体的修改细节
pytorch实现
下面代码的输出结果其实不是很好
原因是我的EPOCH设置为了1。如果你设置为更大的数值效果会好很多。
原因是,这个速度实在是太慢了。
而且,由于是把所有的特征都放在一起,没有说有一个分类的操作,就直接开始来训练了。怪不得清华小哥们提出了TripleGANs的思路!
那么多不是一个类的东西来混在一起来特征寻找,效果肯定是非常糟糕啦!
但是下面的代码还是具有研究意义的(因为只需要在数据集上做一下操作,就可以变成一个正常的东西了。。这个效率实在是太低)。我会再改写一个只有一个生成目标的DCGANs。
import torch
import torch.nn as nn
import torchvision
import torch.utils.data as Data
import matplotlib.pyplot as plt
# Hyper Parameters
EPOCH = 1 # 训练整批数据多少次
BATCH_SIZE = 50
LR = 0.0002 # 学习率
DOWNLOAD_MNIST = True # 已经下载好的话,会自动跳过的
len_Z = 100 # random input.channal for Generator
g_hidden_channal = 64
d_hidden_channal = 64
image_channal = 1 # mnist数据为黑白的只有一维
# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
root='./mnist/', # 保存或者提取位置
train=True, # this is training data
transform=torchvision.transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成
# torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了
)
test_data = torchvision.datasets.MNIST(
root='./mnist/',
train=False
)
# 训练集丢BATCH_SIZE个, 图片大小为28*28
train_loader = Data.DataLoader(
dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True # 是否打乱顺序
)
class Generator(nn.Module):
def __init__(self, len_Z, hidden_channal, output_channal):
super(Generator, self).__init__()
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=len_Z,
out_channels=hidden_channal * 4,
kernel_size=4,
),
nn.BatchNorm2d(hidden_channal * 4),
nn.ReLU()
)
# [BATCH, hidden_channal * 4 , 4, 4]
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 4,
out_channels=hidden_channal * 2,
kernel_size=3, # 保证生成图像大小为28
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.ReLU()
)
#
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.ReLU()
)
self.layer4 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal,
out_channels=output_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.Tanh()
)
def forward(self, x):
# [50, 100, 1, 1]
out = self.layer1(x)
# [50, 256, 4, 4]
# print(out.shape)
out = self.layer2(out)
# [50, 128, 7, 7]
# print(out.shape)
out = self.layer3(out)
# [50, 64, 14, 14]
# print(out.shape)
out = self.layer4(out)
# print(out.shape)
# [50, 1, 28, 28]
return out
# # Test Generator
# G = Generator(len_Z, g_hidden_channal, image_channal)
# data = torch.randn((BATCH_SIZE, len_Z, 1, 1))
# print(G(data).shape)
class Discriminator(nn.Module):
def __init__(self, input_channal, hidden_channal):
super(Discriminator, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(
in_channels=input_channal,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.LeakyReLU(0.2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal,
out_channels=hidden_channal * 2,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.LeakyReLU(0.2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal * 4,
kernel_size=3,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 4),
nn.LeakyReLU(0.2)
)
self.layer4 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 4,
out_channels=1,
kernel_size=4,
stride=1,
padding=0
),
nn.Sigmoid()
)
# [BATCH, 1, 1, 1]
def forward(self, x):
# print(x.shape)
out = self.layer1(x)
# print(out.shape)
out = self.layer2(out)
# print(out.shape)
out = self.layer3(out)
# print(out.shape)
out = self.layer4(out)
return out
# # Test Discriminator
# D = Discriminator(1, d_hidden_channal)
# data = torch.randn((BATCH_SIZE, 1, 28, 28))
# print(D(data).shape)
G = Generator(len_Z, g_hidden_channal, image_channal)
D = Discriminator(image_channal, g_hidden_channal)
# loss & optimizer
criterion = nn.BCELoss()
optimD = torch.optim.Adam(D.parameters(), lr=LR)
optimG = torch.optim.Adam(G.parameters(), lr=LR)
label_Real = torch.FloatTensor(BATCH_SIZE).data.fill_(1)
label_Fake = torch.FloatTensor(BATCH_SIZE).data.fill_(0)
for epoch in range(EPOCH):
for step, (images, imagesLabel) in enumerate(train_loader):
G_ideas = torch.randn((BATCH_SIZE, len_Z, 1, 1))
G_paintings = G(G_ideas)
prob_artist0 = D(images) # D try to increase this prob
prob_artist1 = D(G_paintings)
p0 = torch.squeeze(prob_artist0)
p1 = torch.squeeze(prob_artist1)
errD_real = criterion(p0, label_Real)
errD_fake = criterion(p1, label_Fake)
# errD_fake.backward()
errD = errD_fake + errD_real
errG = criterion(p1, label_Real)
optimD.zero_grad()
errD.backward(retain_graph=True)
optimD.step()
optimG.zero_grad()
errG.backward(retain_graph=True)
optimG.step()
if (step+1) % 100 == 0:
picture = torch.squeeze(G_paintings[0]).detach().numpy()
plt.imshow(picture, cmap=plt.cm.gray_r)
plt.show()
可以选定特定的数字的DCGANs
根据了我写了另外一篇文章
- MNIST选取特定数值的训练集
- 利用上面的代码,加上上面链接中的代码。结合一下,就是下面的版本了
生成器的进化过程:
import torch
import torch.nn as nn
import torchvision
import torch.utils.data as Data
import matplotlib.pyplot as plt
# Hyper Parameters
EPOCH = 10 # 训练整批数据多少次
BATCH_SIZE = 50
LR = 0.0002 # 学习率
DOWNLOAD_MNIST = False # 已经下载好的话,会自动跳过的
len_Z = 100 # random input.channal for Generator
g_hidden_channal = 64
d_hidden_channal = 64
image_channal = 1 # mnist数据为黑白的只有一维
# Mnist 手写数字
class myMNIST(torchvision.datasets.MNIST):
def __init__(self, root, train=True, transform=None, target_transform=None, download=False, targetNum=None):
super(myMNIST, self).__init__(
root,
train=train,
transform=transform,
target_transform=target_transform,
download=download)
if targetNum:
self.train_data = self.train_data[self.train_labels == targetNum]
self.train_data = self.train_data[:int(self.__len__() / BATCH_SIZE) * BATCH_SIZE]
self.train_labels = self.train_labels[self.train_labels == targetNum][
:int(self.__len__() / BATCH_SIZE) * BATCH_SIZE]
def __len__(self):
if self.train:
return self.train_data.shape[0]
else:
return 10000
train_data = myMNIST(
root='./mnist/', # 保存或者提取位置
train=True, # this is training data
transform=torchvision.transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成
# torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了
targetNum=1
)
print(len(train_data))
# print(train_data.shape)
# 训练集丢BATCH_SIZE个, 图片大小为28*28
train_loader = Data.DataLoader(
dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True # 是否打乱顺序
)
class Generator(nn.Module):
def __init__(self, len_Z, hidden_channal, output_channal):
super(Generator, self).__init__()
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=len_Z,
out_channels=hidden_channal * 4,
kernel_size=4,
),
nn.BatchNorm2d(hidden_channal * 4),
nn.ReLU()
)
# [BATCH, hidden_channal * 4 , 4, 4]
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 4,
out_channels=hidden_channal * 2,
kernel_size=3, # 保证生成图像大小为28
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.ReLU()
)
#
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.ReLU()
)
self.layer4 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal,
out_channels=output_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.Tanh()
)
def forward(self, x):
# [50, 100, 1, 1]
out = self.layer1(x)
# [50, 256, 4, 4]
# print(out.shape)
out = self.layer2(out)
# [50, 128, 7, 7]
# print(out.shape)
out = self.layer3(out)
# [50, 64, 14, 14]
# print(out.shape)
out = self.layer4(out)
# print(out.shape)
# [50, 1, 28, 28]
return out
# # Test Generator
# G = Generator(len_Z, g_hidden_channal, image_channal)
# data = torch.randn((BATCH_SIZE, len_Z, 1, 1))
# print(G(data).shape)
class Discriminator(nn.Module):
def __init__(self, input_channal, hidden_channal):
super(Discriminator, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(
in_channels=input_channal,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.LeakyReLU(0.2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal,
out_channels=hidden_channal * 2,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.LeakyReLU(0.2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal * 4,
kernel_size=3,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 4),
nn.LeakyReLU(0.2)
)
self.layer4 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 4,
out_channels=1,
kernel_size=4,
stride=1,
padding=0
),
nn.Sigmoid()
)
# [BATCH, 1, 1, 1]
def forward(self, x):
# print(x.shape)
out = self.layer1(x)
# print(out.shape)
out = self.layer2(out)
# print(out.shape)
out = self.layer3(out)
# print(out.shape)
out = self.layer4(out)
return out
# # Test Discriminator
# D = Discriminator(1, d_hidden_channal)
# data = torch.randn((BATCH_SIZE, 1, 28, 28))
# print(D(data).shape)
G = Generator(len_Z, g_hidden_channal, image_channal)
D = Discriminator(image_channal, g_hidden_channal)
# loss & optimizer
criterion = nn.BCELoss()
optimD = torch.optim.Adam(D.parameters(), lr=LR)
optimG = torch.optim.Adam(G.parameters(), lr=LR)
label_Real = torch.FloatTensor(BATCH_SIZE).data.fill_(1)
label_Fake = torch.FloatTensor(BATCH_SIZE).data.fill_(0)
for epoch in range(EPOCH):
for step, (images, imagesLabel) in enumerate(train_loader):
G_ideas = torch.randn((BATCH_SIZE, len_Z, 1, 1))
G_paintings = G(G_ideas)
prob_artist0 = D(images) # D try to increase this prob
prob_artist1 = D(G_paintings)
p0 = torch.squeeze(prob_artist0)
p1 = torch.squeeze(prob_artist1)
errD_real = criterion(p0, label_Real)
errD_fake = criterion(p1, label_Fake)
# errD_fake.backward()
errD = errD_fake + errD_real
errG = criterion(p1, label_Real)
optimD.zero_grad()
errD.backward(retain_graph=True)
optimD.step()
optimG.zero_grad()
errG.backward(retain_graph=True)
optimG.step()
picture = torch.squeeze(G_paintings[0]).detach().numpy()
plt.imshow(picture, cmap=plt.cm.gray_r)
plt.show()