CGAN条件生成式对抗网络
论文摘要
- 提出了一个基于生成对抗网络的条件生成式模型;
- 在原模型基础上,会输入额外的数据作为条件;
- 在原模型基础上,对生成器和判别器都进行了修改;
- 在MNIST数据集上,新模型可以生成以数字类别标签为条件的手写数字图像;
- 新模型还可以用来做多模态学习,可以生成输入图像相关的描述标签;
1.CGAN研究背景
![在这里插入图片描述](https://img-blog.csdnimg.cn/20210412104107189.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDA1Njk0OA==,size_16,color_FFFFFF,t_70
2.研究意义
这里值得注意的是可以参考Attentive Normalization for Conditional Image Generation最前沿的这篇论文,也是CGAN的一个改进形式
3. 模型总览
4. 价值函数
这里相对于原始的GAN的价值函数,CGAN可以说就是多了一个y的条件输入;在Generator以及Descriminator上它都多了一个y标签作为输入;所以变为,生成器的输入是噪声和标签,输出还是生成图;判别器的输入是生成图,真实图以及标签,输出还是真和假。这个是表面,真正的本质,朋友你看看这个损失函数你就能明白了。可以看到,无论是D还是G,他们的概率表达都变成了条件概率公式。
也就是说,这个目标函数实在输入为y特定标签的时候成立的,对于不同的标签,可以理解为有不同的目标函数。也就可以将CGAN理解为一个包含了所有种类个数n的生成器集合所以这样,通过更改标签,我们可以得倒自己需要的生成图像也可以用同一个生成网络生成不同的目标。
5. 实验结果
6.最后话不多说上代码,这里仅展示MNIS部分的代码
- 首先是models部分
import torch.nn as nn
import torch.nn.functional as F
import torch
##############################
# U-NET
##############################
class UNetDown(nn.Module):
def __init__(self, in_size, out_size, normalize=True, dropout=0.0):
super(UNetDown, self).__init__()
model = [nn.Conv2d(in_size, out_size, 4, stride=2, padding=1, bias=False)]
if normalize:
model.append(nn.BatchNorm2d(out_size, 0.8))
model.append(nn.LeakyReLU(0.2))
if dropout:
model.append(nn.Dropout(dropout))
self.model = nn.Sequential(*model)
def forward(self, x):
return self.model(x)
class UNetUp(nn.Module):
def __init__(self, in_size, out_size, dropout=0.0):
super(UNetUp, self).__init__()
model = [
nn.ConvTranspose2d(in_size, out_size, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(out_size, 0.8),
nn.ReLU(inplace=True),
]
if dropout:
model.append(nn.Dropout(dropout))
self.model = nn.Sequential(*model)
def forward(self, x, skip_input):
x = self.model(x)
out = torch.cat((x, skip_input), 1)
return out
class Generator(nn.Module):
def __init__(self, input_shape):
super(Generator, self).__init__()
channels, _, _ = input_shape
self.down1 = UNetDown(channels, 64, normalize=False)
self.down2 = UNetDown(64, 128)
self.down3 = UNetDown(128 + channels, 256, dropout=0.5)
self.down4 = UNetDown(256, 512, dropout=0.5)
self.down5 = UNetDown(512, 512, dropout=0.5)
self.down6 = UNetDown(512, 512, dropout=0.5)
self.up1 = UNetUp(512, 512, dropout=0.5)
self.up2 = UNetUp(1024, 512, dropout=0.5)
self.up3 = UNetUp(1024, 256, dropout=0.5)
self.up4 = UNetUp(512, 128)
self.up5 = UNetUp(256 + channels, 64)
final = [nn.Upsample(scale_factor=2), nn.Conv2d(128, channels, 3, 1, 1), nn.Tanh()]
self.final = nn.Sequential(*final)
def forward(self, x, x_lr):
# U-Net generator with skip connections from encoder to decoder
d1 = self.down1(x)
d2 = self.down2(d1)
d2 = torch.cat((d2, x_lr), 1)
d3 = self.down3(d2)
d4 = self.down4(d3)
d5 = self.down5(d4)
d6 = self.down6(d5)
u1 = self.up1(d6, d5)
u2 = self.up2(u1, d4)
u3 = self.up3(u2, d3)
u4 = self.up4(u3, d2)
u5 = self.up5(u4, d1)
return self.final(u5)
class Discriminator(nn.Module):
def __init__(self, input_shape):
super(Discriminator, self).__init__()
channels, height, width = input_shape
# Calculate output of image discriminator (PatchGAN)
patch_h, patch_w = int(height / 2 ** 3), int(width / 2 ** 3)
self.output_shape = (1, patch_h, patch_w)
def discriminator_block(in_filters, out_filters, stride, normalize):
"""Returns layers of each discriminator block"""
layers = [nn.Conv2d(in_filters, out_filters, 3, stride, 1)]
if normalize:
layers.append(nn.InstanceNorm2d(out_filters))
layers.append(nn.LeakyReLU(0.2, inplace=True))
return layers
layers = []
in_filters = channels
for out_filters, stride, normalize in [(64, 2, False), (128, 2, True), (256, 2, True), (512, 1, True)]:
layers.extend(discriminator_block(in_filters, out_filters, stride, normalize))
in_filters = out_filters
layers.append(nn.Conv2d(out_filters, 1, 3, 1, 1))
self.model = nn.Sequential(*layers)
def forward(self, img):
return self.model(img)
- 其次是我么你的CGAN主函数模型
import argparse
import os
import numpy as np
import math
import torchvision.transforms as transforms
from torchvision.utils import save_image
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
from datasets import *
from models import *
import torch.nn as nn
import torch.nn.functional as F
import torch
os.makedirs("images", exist_ok=True)
parser = argparse.ArgumentParser()
parser.add_argument("--n_epochs", type=int, default=200, help="number of epochs of training")
parser.add_argument("--batch_size", type=int, default=8, help="size of the batches")
parser.add_argument("--dataset_name", type=str, default="img_align_celeba", help="name of the dataset")
parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
parser.add_argument("--latent_dim", type=int, default=100, help="dimensionality of the latent space")
parser.add_argument("--img_size", type=int, default=128, help="size of each image dimension")
parser.add_argument("--mask_size", type=int, default=32, help="size of random mask")
parser.add_argument("--channels", type=int, default=3, help="number of image channels")
parser.add_argument("--sample_interval", type=int, default=500, help="interval between image sampling")
opt = parser.parse_args()
print(opt)
cuda = True if torch.cuda.is_available() else False
input_shape = (opt.channels, opt.img_size, opt.img_size)
# Loss function
adversarial_loss = torch.nn.MSELoss()
# Initialize generator and discriminator
generator = Generator(input_shape)
discriminator = Discriminator(input_shape)
if cuda:
generator.cuda()
discriminator.cuda()
adversarial_loss.cuda()
# Initialize weights
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
# Dataset loader
transforms_ = [
transforms.Resize((opt.img_size, opt.img_size), Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]
transforms_lr = [
transforms.Resize((opt.img_size // 4, opt.img_size // 4), Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]
dataloader = DataLoader(
ImageDataset("../../data/%s" % opt.dataset_name, transforms_x=transforms_, transforms_lr=transforms_lr),
batch_size=opt.batch_size,
shuffle=True,
num_workers=opt.n_cpu,
)
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
def apply_random_mask(imgs):
idx = np.random.randint(0, opt.img_size - opt.mask_size, (imgs.shape[0], 2))
masked_imgs = imgs.clone()
for i, (y1, x1) in enumerate(idx):
y2, x2 = y1 + opt.mask_size, x1 + opt.mask_size
masked_imgs[i, :, y1:y2, x1:x2] = -1
return masked_imgs
def save_sample(saved_samples):
# Generate inpainted image
gen_imgs = generator(saved_samples["masked"], saved_samples["lowres"])
# Save sample
sample = torch.cat((saved_samples["masked"].data, gen_imgs.data, saved_samples["imgs"].data), -2)
save_image(sample, "images/%d.png" % batches_done, nrow=5, normalize=True)
saved_samples = {}
for epoch in range(opt.n_epochs):
for i, batch in enumerate(dataloader):
imgs = batch["x"]
imgs_lr = batch["x_lr"]
masked_imgs = apply_random_mask(imgs)
# Adversarial ground truths
valid = Variable(Tensor(imgs.shape[0], *discriminator.output_shape).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.shape[0], *discriminator.output_shape).fill_(0.0), requires_grad=False)
if cuda:
imgs = imgs.type(Tensor)
imgs_lr = imgs_lr.type(Tensor)
masked_imgs = masked_imgs.type(Tensor)
real_imgs = Variable(imgs)
imgs_lr = Variable(imgs_lr)
masked_imgs = Variable(masked_imgs)
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
# Generate a batch of images
gen_imgs = generator(masked_imgs, imgs_lr)
# Loss measures generator's ability to fool the discriminator
g_loss = adversarial_loss(discriminator(gen_imgs), valid)
g_loss.backward()
optimizer_G.step()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Measure discriminator's ability to classify real from generated samples
real_loss = adversarial_loss(discriminator(real_imgs), valid)
fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake)
d_loss = 0.5 * (real_loss + fake_loss)
d_loss.backward()
optimizer_D.step()
print(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
% (epoch, opt.n_epochs, i, len(dataloader), d_loss.item(), g_loss.item())
)
# Save first ten samples
if not saved_samples:
saved_samples["imgs"] = real_imgs[:1].clone()
saved_samples["masked"] = masked_imgs[:1].clone()
saved_samples["lowres"] = imgs_lr[:1].clone()
elif saved_samples["imgs"].size(0) < 10:
saved_samples["imgs"] = torch.cat((saved_samples["imgs"], real_imgs[:1]), 0)
saved_samples["masked"] = torch.cat((saved_samples["masked"], masked_imgs[:1]), 0)
saved_samples["lowres"] = torch.cat((saved_samples["lowres"], imgs_lr[:1]), 0)
batches_done = epoch * len(dataloader) + i
if batches_done % opt.sample_interval == 0:
save_sample(saved_samples)