用PaddlePaddle实现LSGAN
LSGAN
近几年来 GAN 是十分火热的,由 Goodfellow 在 14 年发表论文 Generative Adversarial Nets [1] 开山之作以来,生成式对抗网络一直都备受机器学习领域的关注,这种两人零和博弈的思想十分有趣,充分体现了数学的美感。从 GAN 到 WGAN [2] 的优化,再到本文介绍的 LSGAN [3],再到最近很火的 BigGAN [4],可以说生成式对抗网络的魅力无穷,而且它的用处也是非常奇妙,如今还被用在例如无负样本的情况下如何训练分类器,例如 AnoGAN [5]。
LSGAN 这篇经典的论文主要工作是把交叉熵损失函数换做了最小二乘损失函数,这样做作者认为改善了传统 GAN 的两个问题,即传统 GAN 生成的图片质量不高,而且训练过程十分不稳定。LSGAN 试图使用不同的距离度量来构建一个更加稳定而且收敛更快的,生成质量高的对抗网络。
传统 GAN 的损失函数:
minG maxD VGAN(D,G)=Ex∼pdata(x)[log D(x)]+Ez∼pz(z)[log (1−D(G(z)))]min_G \space max_D \space V_{GAN}(D,G) = E_{x\sim p_{data}(x)}[log \space D(x)] + E_{z\sim p_z(z)}[log \space (1-D(G(z)))]minG maxD VGAN(D,G)=Ex∼pdata(x)[log D(x)]+Ez∼pz(z)[log (1−D(G(z)))]
LSGAN 的损失函数:
minD VLSGAN(D)=12Ex∼pdata(x)[(D(x)−b)2]+12Ez∼pz(z)[(D(G(z))−a)2]min_D \space V_{LSGAN}(D) = \frac{1}{2} E_{x\sim p_{data}(x)}[(D(x)-b)^2] + \frac{1}{2} E_{z\sim p_z(z)}[(D(G(z))-a)^2]minD VLSGAN(D)=21Ex∼pdata(x)[(D(x)−b)2]+21Ez∼pz(z)[(D(G(z))−a)2]
minG VLSGAN(G)=12Ez∼pz(z)[(D(G(z))−c)2]min_G \space V_{LSGAN}(G) = \frac{1}{2} E_{z\sim p_z(z)}[(D(G(z))-c)^2]minG VLSGAN(G)=21Ez∼pz(z)[(D(G(z))−c)2]
其中 GGG 为生成器(Generator),DDD 为判别器(Discriminator),zzz 为噪音,它可以服从归一化或者高斯分布,pdata(x)p_{data}(x)pdata(x) 为真实数据 xxx 服从的概率分布,pz(z)p_z(z)pz(z) 为 z 服从的概率分布。Ex∼pdata(x)E_{x\sim p_{data}(x)}Ex∼pdata(x) 为期望值,Ez∼pz(z)E_{z\sim p_z(z)}Ez∼pz(z) 同为期望值。
相关论文
Generative Adversarial Networks (GAN)
Wasserstein GAN (WGAN)
Least Squares Generative Adversarial Networks (LSGAN)
Large Scale GAN Training for High Fidelity Natural Image Synthesis (BigGAN)
Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery (AnoGAN)
相关博客
LSGAN:最小二乘生成对抗网络
生成对抗网络GAN系列(四)--- LSGAN
下载安装命令
## CPU版本安装命令
pip install -f https://paddlepaddle.org.cn/pip/oschina/cpu paddlepaddle
## GPU版本安装命令
pip install -f https://paddlepaddle.org.cn/pip/oschina/gpu paddlepaddle-gpu
注意
本项目代码需要使用GPU环境来运行:
并且检查相关参数设置, 例如use_gpu, fluid.CUDAPlace(0)等处是否设置正确.
In[1]
# 解压 mnist 数据集
! cd data/data65 && gzip -dfk t10k-labels-idx1-ubyte.gz
! cd data/data65 && gzip -dfk t10k-images-idx3-ubyte.gz
! cd data/data65 && gzip -dfk train-labels-idx1-ubyte.gz
! cd data/data65 && gzip -dfk train-images-idx3-ubyte.gz
In[2]
%matplotlib inline
import os
import numpy as np
from time import clock
from struct import unpack
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import paddle
import paddle.fluid as fluid
In[3]
# 一些参数
config = {
'input_shape': (100, 1, 1), # 输入噪声的 shape
'image_shape': (1, 28, 28), # mnist 图像尺寸
'batch_size': 128, # 训练时每个 batch 的大小
'num_epochs': 11, # 训练总轮数
'use_gpu': True, # 是否使用 GPU
'lr': 0.0001, # 学习率,同时用于生成器和判别器
'model_save_dir': 'model', # 模型固化路径
}
In[4]
# 统一的 logger 配置
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
logger = None
def init_log_config():
"""
初始化日志相关配置
:return:
"""
global logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)
log_path = os.path.join(os.getcwd(), 'logs')
if not os.path.exists(log_path):
os.makedirs(log_path)
log_name = os.path.join(log_path, 'train.log')
sh = logging.StreamHandler()
fh = logging.FileHandler(log_name, mode='w')
fh.setLevel(logging.DEBUG)
formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
fh.setFormatter(formatter)
sh.setFormatter(formatter)
logger.handlers = []
logger.addHandler(sh)
logger.addHandler(fh)
init_log_config()
In[5]
# 数据集读取代码
def readDataset(filename):
'''
读取 mnist 数据集
@param filename: 解压后的数据集文件
'''
print("Reading %s" % filename)
f = open(filename, "rb")
magic, = unpack(">L", f.read(4))
count, = unpack(">L", f.read(4))
dtype = "images" if magic == 2051 else "labels"
if dtype == "images":
data = []
width, = unpack(">L", f.read(4))
height, = unpack(">L", f.read(4))
print(" Image size: [%d, %d]" % (width, height))
config['image_shape'] = (1, width, height)
for i in range(0, count):
print(" Reading image: %d / %d" % (i+1, count), end="\r")
array = [unpack("B", f.read(1))[0] for j in range(0, width*height)]
array = np.array([array[j:j+width] for j in range(0, width*height, width)]).reshape(1, width, height)
data.append(array)
print("")
elif dtype == "labels":
data = [unpack("B", f.read(1))[0] for i in range(0, count)]
#print(" The first 10 labels are:")
#for i in range(0, 10): print(" %d" % data[i])
print("")
f.close()
return np.array(data)
# 只需训练集图像
train_images = readDataset('data/data65/train-images-idx3-ubyte')
# train_labels = readDataset('data/data65/train-labels-idx1-ubyte')
# test_images = readDataset('data/data65/t10k-images-idx3-ubyte')
# test_labels = readDataset('data/data65/t10k-labels-idx1-ubyte')
In[6]
# 定义两个 reader
def data_reader(data, shape):
def r():
for img in data:
yield (img.reshape(shape).astype('float32') - 127.0) / 127.0
return r
def random_reader(shape):
def r():
while True:
yield np.random.normal(0.0, 1.0, shape).astype('float32')
return r
np.random.seed(int(clock()*1e6))
# 读取 mnist 图像作为判别器真值输入
mnist_reader = paddle.batch(paddle.reader.shuffle(data_reader(train_images, config['image_shape']), 1024),
batch_size=config['batch_size'])
# 随机噪声作为生成器输入
input_reader = paddle.batch(random_reader(config['input_shape']), batch_size=config['batch_size'])
In[7]
# 图像绘制代码
def show_image_grid(images, epoch=None):
'''
按 8x8 方格排列,绘制多个图像
@param images: 图像数组,shape = (?, 1, 28, 28)
@param epoch: 标题的 Epoch 数字
'''
fig = plt.figure(figsize=(5, 5))
fig.suptitle("Epoch {}".format(epoch))
gs = plt.GridSpec(8, 8)
gs.update(wspace=0.05, hspace=0.05)
for i, image in enumerate(images[:64]):
ax = plt.subplot(gs[i])
plt.axis('off')
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_aspect('equal')
plt.imshow(image[0], cmap='gray')
plt.show()
In[8]
# 网络结构定义
def generator(z, name="G"):
''' 生成网络 '''
with fluid.unique_name.guard(name+'_'):
fc1 = fluid.layers.fc(input = z, size = 1024)
fc1 = fluid.layers.fc(fc1, size = 128 * 7 * 7)
fc1 = fluid.layers.batch_norm(fc1,act = 'tanh')
fc1 = fluid.layers.reshape(fc1, shape=(-1, 128, 7, 7))
conv1 = fluid.layers.conv2d(fc1, num_filters = 4*64,
filter_size=5, stride=1,
padding=2, act='tanh')
conv1 = fluid.layers.reshape(conv1, shape=(-1,64,14,14))
conv2 = fluid.layers.conv2d(conv1, num_filters = 4*32,
filter_size=5, stride=1,
padding=2, act='tanh')
conv2 = fluid.layers.reshape(conv2, shape=(-1,32,28,28))
conv3 = fluid.layers.conv2d(conv2, num_filters = 1,
filter_size=5, stride=1,
padding=2, act='tanh')
# conv3 = fluid.layers.reshape(conv3, shape=(-1,1,28,28))
return conv3
def discriminator(image, name="D"):
''' 判别网络 '''
with fluid.unique_name.guard(name+'_'):
conv1 = fluid.layers.conv2d(input=image, num_filters=32, filter_size=6, stride=2, padding=2)
conv1_act = fluid.layers.leaky_relu(conv1)
conv2 = fluid.layers.conv2d(conv1_act, num_filters=64, filter_size=6, stride=2, padding=2)
conv2 = fluid.layers.batch_norm(conv2)
conv2_act = fluid.layers.leaky_relu(conv2)
fc1 = fluid.layers.reshape(conv2_act, shape=(-1,64*7*7))
fc1 = fluid.layers.fc(fc1, size=512)
fc1_bn = fluid.layers.batch_norm(fc1)
fc1_act = fluid.layers.leaky_relu(fc1_bn)
fc2 = fluid.layers.fc(fc1_act, size=1)
return fc2
In[9]
# LSGAN 结构与 Program 构建
def get_params(program, prefix):
all_params = program.global_block().all_parameters()
return [t.name for t in all_params if t.name.startswith(prefix)]
# 生成网络
G_program = fluid.Program()
with fluid.program_guard(G_program):
z = fluid.layers.data(name='z', shape=config['input_shape'])
# 用生成器G生成样本图片
G_sample = generator(z)
infer_program = G_program.clone(for_test=True)
# 用判别器D判别生成的样本
D_fake = discriminator(G_sample)
ones = fluid.layers.fill_constant_batch_size_like(z, shape=[-1, 1], dtype='float32', value=1)
# G损失
# G Least square cost
G_loss = fluid.layers.mean(fluid.layers.square_error_cost(D_fake,ones))/2.
# 获取G的参数
G_params = get_params(G_program, "G")
# 使用Adam优化器
G_optimizer = fluid.optimizer.Adam(learning_rate=config['lr'])
# 训练G
G_optimizer.minimize(G_loss, parameter_list = G_params)
# logger.info('G_params: {}'.format(G_params))
# 判别网络 real
D_R_program = fluid.Program()
with fluid.program_guard(D_R_program):
real = fluid.layers.data(name='img', shape=config['image_shape'])
# 用判别器D判别真实的样本
D_real = discriminator(real)
# D损失
D_real_loss = fluid.layers.mean(fluid.layers.square(D_real-1.))/2.
# 获取D的参数列表
D_real_params = get_params(D_R_program, "D")
# 使用Adam优化
D_optimizer = fluid.optimizer.Adam(learning_rate=config['lr'])
D_optimizer.minimize(D_real_loss, parameter_list = D_real_params)
# logger.info('D_real_params: {}'.format(D_real_params))
# 判别网络 fake
D_F_program = fluid.Program()
with fluid.program_guard(D_F_program):
z = fluid.layers.data(name='z', shape=config['input_shape'])
# 用生成器G生成样本图片
G_sample = generator(z)
# 用判别器D判别生成的样本
D_fake = discriminator(G_sample)
# D Least square cost
D_fake_loss = fluid.layers.mean(fluid.layers.square(D_fake))/2.
# 获取D的参数列表
D_fake_params = get_params(D_F_program, "D")
# 使用Adam优化
D_optimizer = fluid.optimizer.Adam(learning_rate=config['lr'])
D_optimizer.minimize(D_fake_loss, parameter_list = D_fake_params)
# logger.info('D_fake_params: {}'.format(D_fake_params))
In[10]
# 训练代码
def train():
place = fluid.CUDAPlace(0) if config['use_gpu'] and fluid.core.is_compiled_with_cuda() else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(program=fluid.default_startup_program())
for epoch in range(config['num_epochs']):
epoch_d_fake_loss = []
epoch_d_real_loss = []
epoch_g_loss = []
for i, real_image in enumerate(mnist_reader()):
r_d_fake = exe.run(D_F_program, fetch_list=[D_fake_loss], feed={ 'z': np.array(next(input_reader())) })
epoch_d_fake_loss.append(np.mean(r_d_fake))
r_d_real = exe.run(D_R_program, fetch_list=[D_real_loss], feed={ 'img': np.array(real_image) })
epoch_d_real_loss.append(np.mean(r_d_real))
r_g = exe.run(G_program, fetch_list=[G_loss], feed={ 'z': np.array(next(input_reader())) })
epoch_g_loss.append(np.mean(r_g))
if i % 50 == 0:
logger.info("Epoch %d batch %d d_fake %.6f d_real %.6f g %.6f" % (
epoch, i, np.mean(epoch_d_fake_loss),np.mean(epoch_d_real_loss),np.mean(epoch_g_loss)
))
r_i = exe.run(infer_program, fetch_list=[G_sample], feed={ 'z': np.array(next(input_reader())) })
show_image_grid(r_i[0], epoch)
logger.info('Train end.')
if __name__ == '__main__':
train()
In[11]
# 模型固化代码
def freeze_model():
""" 模型固化函数 """
# 模型定义
z = fluid.layers.data(name='z', shape=config['input_shape'], dtype='float32')
model = generator(z)
# 固化模型
exe = fluid.Executor(fluid.CPUPlace())
fluid.io.save_inference_model(config['model_save_dir'], ['z'], model, exe, infer_program)
freeze_model()
In[12]
# 模型预测代码
def infer():
""" 模型预测函数 """
place = fluid.CUDAPlace(0) if config['use_gpu'] else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# 加载先前固化的模型
[inference_program, feed_target_names, fetch_list] = fluid.io.load_inference_model(dirname=config['model_save_dir'], executor=exe)
# 生成预测数据读取器
infer_reader = paddle.batch(random_reader(config['input_shape']), batch_size=64, drop_last=False)
result = exe.run(inference_program, fetch_list=fetch_list, feed={feed_target_names[0]: np.array(next(infer_reader()))})
show_image_grid(result[0], 0)
infer()