文章目录
cycleGAN原理
一般的unsupervised conditional generation的处理办法
参考一下,信息量很大
cycleGAN处理unsupervised conditional generation的办法:
比较正常的思路:
给一个domian X的image_x, 通过NN后变成domian Y的image_x,问题就解决了,但是NN学习需要目标啊,目标就是image_x是不是domian Y,还有就是和image_x像不像,也就是上图中的real or not , match or not,这个实际上可以假如GAN的framework,在保证像的同时,让domain 越来越match。
前面提到过一种思路:使用vgg的features来保证图片像,使用features内部之间的内积和表示图片风格,一种伪DL的实现方法:https://blog.csdn.net/weixin_40759186/article/details/87804316
我们一般的GAN的Discriminator都是一个目标,怎么实现real or not && match or not?real or not通过reconstruct error实现,match or not通过训练一个domian classifier实现。
cycleGAN的思路:
real or not通过reconstruct error实现,match or not通过训练一个domian classifier实现。
match or not通过训练一个domian classifier实现:generator 生成的fake data和样本的real data训练domian classifier,可以得到两个Discriminator: D_x和D_y。
real or not通过reconstruct error实现:x_domainx —>generator 生成的fake data: x_domainy—>generator 生成的reconstruct x^ _domianx,比较x^_domianx和x_domainx。
cycleGAN实现:
Discriminator的结构:
class Discriminator(nn.Module):
def __init__(self, conv_dim=64):
super(Discriminator, self).__init__()
# Define all convolutional layers
# Should accept an RGB image as input and output a single value
# Convolutional layers, increasing in depth
# first layer has *no* batchnorm
self.conv1 = conv(3, conv_dim, 4, batch_norm=False) # x, y = 64, depth 64
self.conv2 = conv(conv_dim, conv_dim*2, 4) # (32, 32, 128)
self.conv3 = conv(conv_dim*2, conv_dim*4, 4) # (16, 16, 256)
self.conv4 = conv(conv_dim*4, conv_dim*8, 4) # (8, 8, 512)
# Classification layer
self.conv5 = conv(conv_dim*8, 1, 4, stride=1, batch_norm=False)
def forward(self, x):
# relu applied to all conv layers but last
out = F.relu(self.conv1(x))
out = F.relu(self.conv2(out))
out = F.relu(self.conv3(out))
out = F.relu(self.conv4(out))
# last, classification layer
out = self.conv5(out)
return out
Discriminator的Loss:
这个就是平常的GAN的Discriminator,让real loss底,fake loss高
def real_mse_loss(D_out):
# how close is the produced output from being "real"?
return torch.mean((D_out-1)**2)
def fake_mse_loss(D_out):
# how close is the produced output from being "false"?
return torch.mean(D_out**2)
## First: D_X, real and fake loss components ##
# Train with real images
d_x_optimizer.zero_grad()
# 1. Compute the discriminator losses on real images
out_x = D_X(images_X)
D_X_real_loss = real_mse_loss(out_x)
# Train with fake images
# 2. Generate fake images that look like domain X based on real images in domain Y
fake_X = G_YtoX(images_Y)
# 3. Compute the fake loss for D_X
out_x = D_X(fake_X)
D_X_fake_loss = fake_mse_loss(out_x)
# 4. Compute the total loss and perform backprop
d_x_loss = D_X_real_loss + D_X_fake_loss
Generator的结构:
Generator的结构图:
这个一个典型的autoencoder结构
Residual Block:
# residual block class
class ResidualBlock(nn.Module):
"""Defines a residual block.
This adds an input x to a convolutional layer (applied to x) with the same size input and output.
These blocks allow a model to learn an effective transformation from one domain to another.
"""
def __init__(self, conv_dim):
super(ResidualBlock, self).__init__()
# conv_dim = number of inputs
# define two convolutional layers + batch normalization that will act as our residual function, F(x)
# layers should have the same shape input as output; I suggest a kernel_size of 3
self.conv_layer1 = conv(in_channels=conv_dim, out_channels=conv_dim,
kernel_size=3, stride=1, padding=1, batch_norm=True)
self.conv_layer2 = conv(in_channels=conv_dim, out_channels=conv_dim,
kernel_size=3, stride=1, padding=1, batch_norm=True)
def forward(self, x):
# apply a ReLu activation the outputs of the first layer
# return a summed output, x + resnet_block(x)
out_1 = F.relu(self.conv_layer1(x))
out_2 = x + self.conv_layer2(out_1)
return out_2
Autoencoder实现:
class CycleGenerator(nn.Module):
def __init__(self, conv_dim=64, n_res_blocks=6):
super(CycleGenerator, self).__init__()
# 1. Define the encoder part of the generator
# initial convolutional layer given, below
self.conv1 = conv(3, conv_dim, 4)
self.conv2 = conv(conv_dim, conv_dim*2, 4)
self.conv3 = conv(conv_dim*2, conv_dim*4, 4)
# 2. Define the resnet part of the generator
# Residual blocks
res_layers = []
for layer in range(n_res_blocks):
res_layers.append(ResidualBlock(conv_dim*4))
# use sequential to create these layers
self.res_blocks = nn.Sequential(*res_layers)
# 3. Define the decoder part of the generator
# two transpose convolutional layers and a third that looks a lot like the initial conv layer
self.deconv1 = deconv(conv_dim*4, conv_dim*2, 4)
self.deconv2 = deconv(conv_dim*2, conv_dim, 4)
# no batch norm on last layer
self.deconv3 = deconv(conv_dim, 3, 4, batch_norm=False)
def forward(self, x):
"""Given an image x, returns a transformed image."""
# define feedforward behavior, applying activations as necessary
out = F.relu(self.conv1(x))
out = F.relu(self.conv2(out))
out = F.relu(self.conv3(out))
out = self.res_blocks(out)
out = F.relu(self.deconv1(out))
out = F.relu(self.deconv2(out))
# tanh applied to last layer
out = F.tanh(self.deconv3(out))
return out
# helper deconv function
def deconv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True):
"""Creates a transpose convolutional layer, with optional batch normalization.
"""
layers = []
# append transpose conv layer
layers.append(nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, bias=False))
# optional batch norm layer
if batch_norm:
layers.append(nn.BatchNorm2d(out_channels))
return nn.Sequential(*layers)
Generator的Loss:
注意这里是一个cycle之后再去更新G1和G2的梯度
def real_mse_loss(D_out):
# how close is the produced output from being "real"?
return torch.mean((D_out-1)**2)
def fake_mse_loss(D_out):
# how close is the produced output from being "false"?
return torch.mean(D_out**2)
def cycle_consistency_loss(real_im, reconstructed_im, lambda_weight):
# calculate reconstruction loss
# as absolute value difference between the real and reconstructed images
reconstr_loss = torch.mean(torch.abs(real_im - reconstructed_im))
# return weighted loss
return lambda_weight*reconstr_loss
# =========================================
# TRAIN THE GENERATORS
# =========================================
## First: generate fake X images and reconstructed Y images ##
g_optimizer.zero_grad()
# 1. Generate fake images that look like domain X based on real images in domain Y
fake_X = G_YtoX(images_Y)
# 2. Compute the generator loss based on domain X
out_x = D_X(fake_X)
g_YtoX_loss = real_mse_loss(out_x)
# 3. Create a reconstructed y
# 4. Compute the cycle consistency loss (the reconstruction loss)
reconstructed_Y = G_XtoY(fake_X)
reconstructed_y_loss = cycle_consistency_loss(images_Y, reconstructed_Y, lambda_weight=10)
## Second: generate fake Y images and reconstructed X images ##
# 1. Generate fake images that look like domain Y based on real images in domain X
fake_Y = G_XtoY(images_X)
# 2. Compute the generator loss based on domain Y
out_y = D_Y(fake_Y)
g_XtoY_loss = real_mse_loss(out_y)
# 3. Create a reconstructed x
# 4. Compute the cycle consistency loss (the reconstruction loss)
reconstructed_X = G_YtoX(fake_Y)
reconstructed_x_loss = cycle_consistency_loss(images_X, reconstructed_X, lambda_weight=10)
# 5. Add up all generator and reconstructed losses and perform backprop
g_total_loss = g_YtoX_loss + g_XtoY_loss + reconstructed_y_loss + reconstructed_x_loss
整体结构:
需要训练2个Discriminators和2个Generators:
有人可能要问训练一个Generator可不可以? X->G->Y, Y->G->X,真把Generator当作万能的?
def create_model(g_conv_dim=64, d_conv_dim=64, n_res_blocks=6):
"""Builds the generators and discriminators."""
# Instantiate generators
G_XtoY = CycleGenerator(conv_dim=g_conv_dim, n_res_blocks=n_res_blocks)
G_YtoX = CycleGenerator(conv_dim=g_conv_dim, n_res_blocks=n_res_blocks)
# Instantiate discriminators
D_X = Discriminator(conv_dim=d_conv_dim)
D_Y = Discriminator(conv_dim=d_conv_dim)
# move models to GPU, if available
if torch.cuda.is_available():
device = torch.device("cuda:0")
G_XtoY.to(device)
G_YtoX.to(device)
D_X.to(device)
D_Y.to(device)
print('Models moved to GPU.')
else:
print('Only CPU available.')
return G_XtoY, G_YtoX, D_X, D_Y
优化器:
看到没G_XtoY和G_YtoX是在一个cycle更新的
import torch.optim as optim
# hyperparams for Adam optimizer
lr=0.0002
beta1=0.5
beta2=0.999 # default value
g_params = list(G_XtoY.parameters()) + list(G_YtoX.parameters()) # Get generator parameters
# Create optimizers for the generators and discriminators
g_optimizer = optim.Adam(g_params, lr, [beta1, beta2])
d_x_optimizer = optim.Adam(D_X.parameters(), lr, [beta1, beta2])
d_y_optimizer = optim.Adam(D_Y.parameters(), lr, [beta1, beta2])
训练过程:
先训练D_X,先训练D_Y,再训练G_XtoY和G_YtoX
def training_loop(dataloader_X, dataloader_Y, test_dataloader_X, test_dataloader_Y,
n_epochs=1000):
print_every=10
# keep track of losses over time
losses = []
test_iter_X = iter(test_dataloader_X)
test_iter_Y = iter(test_dataloader_Y)
# Get some fixed data from domains X and Y for sampling. These are images that are held
# constant throughout training, that allow us to inspect the model's performance.
fixed_X = test_iter_X.next()[0]
fixed_Y = test_iter_Y.next()[0]
fixed_X = scale(fixed_X) # make sure to scale to a range -1 to 1
fixed_Y = scale(fixed_Y)
# batches per epoch
iter_X = iter(dataloader_X)
iter_Y = iter(dataloader_Y)
batches_per_epoch = min(len(iter_X), len(iter_Y))
for epoch in range(1, n_epochs+1):
# Reset iterators for each epoch
if epoch % batches_per_epoch == 0:
iter_X = iter(dataloader_X)
iter_Y = iter(dataloader_Y)
images_X, _ = iter_X.next()
images_X = scale(images_X) # make sure to scale to a range -1 to 1
images_Y, _ = iter_Y.next()
images_Y = scale(images_Y)
# move images to GPU if available (otherwise stay on CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
images_X = images_X.to(device)
images_Y = images_Y.to(device)
# ============================================
# TRAIN THE DISCRIMINATORS
# ============================================
## First: D_X, real and fake loss components ##
# Train with real images
d_x_optimizer.zero_grad()
# 1. Compute the discriminator losses on real images
out_x = D_X(images_X)
D_X_real_loss = real_mse_loss(out_x)
# Train with fake images
# 2. Generate fake images that look like domain X based on real images in domain Y
fake_X = G_YtoX(images_Y)
# 3. Compute the fake loss for D_X
out_x = D_X(fake_X)
D_X_fake_loss = fake_mse_loss(out_x)
# 4. Compute the total loss and perform backprop
d_x_loss = D_X_real_loss + D_X_fake_loss
d_x_loss.backward()
d_x_optimizer.step()
## Second: D_Y, real and fake loss components ##
# Train with real images
d_y_optimizer.zero_grad()
# 1. Compute the discriminator losses on real images
out_y = D_Y(images_Y)
D_Y_real_loss = real_mse_loss(out_y)
# Train with fake images
# 2. Generate fake images that look like domain Y based on real images in domain X
fake_Y = G_XtoY(images_X)
# 3. Compute the fake loss for D_Y
out_y = D_Y(fake_Y)
D_Y_fake_loss = fake_mse_loss(out_y)
# 4. Compute the total loss and perform backprop
d_y_loss = D_Y_real_loss + D_Y_fake_loss
d_y_loss.backward()
d_y_optimizer.step()
# =========================================
# TRAIN THE GENERATORS
# =========================================
## First: generate fake X images and reconstructed Y images ##
g_optimizer.zero_grad()
# 1. Generate fake images that look like domain X based on real images in domain Y
fake_X = G_YtoX(images_Y)
# 2. Compute the generator loss based on domain X
out_x = D_X(fake_X)
g_YtoX_loss = real_mse_loss(out_x)
# 3. Create a reconstructed y
# 4. Compute the cycle consistency loss (the reconstruction loss)
reconstructed_Y = G_XtoY(fake_X)
reconstructed_y_loss = cycle_consistency_loss(images_Y, reconstructed_Y, lambda_weight=10)
## Second: generate fake Y images and reconstructed X images ##
# 1. Generate fake images that look like domain Y based on real images in domain X
fake_Y = G_XtoY(images_X)
# 2. Compute the generator loss based on domain Y
out_y = D_Y(fake_Y)
g_XtoY_loss = real_mse_loss(out_y)
# 3. Create a reconstructed x
# 4. Compute the cycle consistency loss (the reconstruction loss)
reconstructed_X = G_YtoX(fake_Y)
reconstructed_x_loss = cycle_consistency_loss(images_X, reconstructed_X, lambda_weight=10)
# 5. Add up all generator and reconstructed losses and perform backprop
g_total_loss = g_YtoX_loss + g_XtoY_loss + reconstructed_y_loss + reconstructed_x_loss
g_total_loss.backward()
g_optimizer.step()