GANS学习

在这里插入图片描述在这里插入图片描述代码:

"""
Deep Convolutional GANs
本章中,将构建一个深度卷积生成对抗网络。简称:DCGAN。DCGAN论文发表于2015年,论文地址:[论文链接](https://arxiv.org/pdf/1511.06434.pdf).
我们将在[Street View House Numbers](http://ufldl.stanford.edu/housenumbers/) (SVHN)数据集基础上训练DCGAN。\
该数据集来源于谷歌街景中房屋门牌数字(RGB图片)。 SVHN相比MNIST,彩色的,且种类更丰富。
故此,我们需要一个更深且更强大的网络:使用卷积层。且非常有必要使用批归一化(batch normalization)。
相比之前 gan_mnist网络,唯一区别也在此(其他操作基本相同)。
"""
import pickle as pkl
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy.io import loadmat
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
import matplotlib as mpl

# 设置字符集,防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False

# 若没有数据,则下载数据。

data_dir = 'data/'
if not isdir(data_dir):
    raise Exception("Data directory doesn't exist!")

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile(data_dir + "train_32x32.mat"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
        urlretrieve(
            'http://ufldl.stanford.edu/housenumbers/train_32x32.mat',
            data_dir + 'train_32x32.mat',
            pbar.hook)
if not isfile(data_dir + "test_32x32.mat"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Testing Set') as pbar:
        urlretrieve(
            'http://ufldl.stanford.edu/housenumbers/test_32x32.mat',
            data_dir + 'test_32x32.mat',
            pbar.hook)
# loadmat()函数作用:Load MATLAB file.
trainset = loadmat(data_dir + 'train_32x32.mat')  # <class 'tuple'>: (32, 32, 3, 73257)
testset = loadmat(data_dir + 'test_32x32.mat')   # <class 'tuple'>: (32, 32, 3, 26032)


# 展示一些图片
def show_svhn_images():
    # 从trainnet的样本中,随机生成36个样本的index!
    idx = np.random.randint(0, trainset['X'].shape[3], size=36)
    fig, axes = plt.subplots(6, 6, sharex=True, sharey=True, figsize=(5, 5), )
    for ii, ax in zip(idx, axes.flatten()):
        ax.imshow(trainset['X'][:, :, :, ii], aspect='equal')
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()

# todo 数据做预处理。一、缩放图片值域范围到:(-1,1),因为生成器输出值也是这个范围。
#      二、对数据集做了拆分:测试、验证集。
def scale(x, feature_range=(-1, 1)):
    # 缩放到(0, 1)
    x = ((x - x.min()) / (255 - x.min()))

    # 缩放到(-1,1)
    min, max = feature_range
    x = x * (max - min) + min
    return x

class Dataset:
    def __init__(self, train, test, val_frac=0.5, shuffle=False, scale_func=None):
        split_idx = int(len(test['y']) * (1 - val_frac))  # 取得test数据集的分割点索引号
        # 将原test数据集拆分成features和target,并切分成test和valid 2个数据集
        self.test_x, self.valid_x = test['X'][:, :, :, :split_idx], test['X'][:, :, :, split_idx:]
        self.test_y, self.valid_y = test['y'][:split_idx], test['y'][split_idx:]
        self.train_x, self.train_y = train['X'], train['y']

        """
        np.rollaxis(a, axis, start=0):
            对于a(数组)向后滚动指定axis轴,直到指定的位置start(默认为0轴),其他轴的相对位置不变。
        """
        self.train_x = np.rollaxis(self.train_x, 3)  # 即将样本数量的轴从 3--->0
        self.valid_x = np.rollaxis(self.valid_x, 3)
        self.test_x = np.rollaxis(self.test_x, 3)

        if scale_func is None:
            self.scaler = scale
        else:
            self.scaler = scale_func
        self.shuffle = shuffle

    def batches(self, batch_size):
        if self.shuffle:
            idx = np.arange(len(self.train_x))
            # 按照idx数量,随机打乱顺序
            """
            np.random.shuffle()作用,如果是多维数据,则沿着第一轴进行打乱!!
                >>> arr = np.arange(10)
                >>> np.random.shuffle(arr)
                >>> arr
                [1 7 5 2 9 4 3 6 0 8]
            """
            np.random.shuffle(idx)
            self.train_x = self.train_x[idx]
            self.train_y = self.train_y[idx]
        # 生成批次迭代器!!!
        # n_batches = len(self.train_y) // batch_size
        for ii in range(0, len(self.train_y), batch_size):
            x = self.train_x[ii:ii + batch_size]
            y = self.train_y[ii:ii + batch_size]
            yield self.scaler(x), y

# todo-Inputs占位符
def model_inputs(real_dim, z_dim):
    inputs_real = tf.placeholder(tf.float32, (None, *real_dim), name='input_real')
    print(*real_dim)
    inputs_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')

    return inputs_real, inputs_z

# todo-构建生成器网络
"""
构建生成网络。输入是:噪音向量 `z`。同样,网络输出是:$tanh$输出, 输出尺寸是:32x32x3(匹配SVHN图片尺寸)。
- 不同之处:使用卷积层生成新图片。
       第一层是全连接层,并重塑为:图片。比如尺寸:4x4x1024。然后使用批归一化( batch normalization)再接leaky ReLU。
       第二层是:转置卷积层(一般在前面层基础上:深度减半,宽和高加倍)。并再次使用批归一化( batch normalization)再接leakyReLU。
       对每一层的步骤总结起来就是: 转置卷积 > 批归一化 > Lrelu。
       可以一直堆栈,直到得到最终的尺寸: 32x32x3。
**特别注意**  就SVHN数据集而言,我们最终要得到的形状是:32x32x3.
API:
- tf.layers.conv2d_transpose(inputs,filters,kernel_size,strides=(1, 1),padding='valid')  
  - 参数:inputs:input tensor.
  - filters: Integer 输出的通道数==滤波器的个数
  - kernel_size:卷积核大小 
  - strides:步幅
  
- tf.layers.batch_normalization(inputs,axis=-1,momentum=0.99,training=False)
  - 参数:axis: Integer, 沿着哪一轴做归一化 (默认是最后1轴). 
  - training:  a Python boolean.决定是否是训练模式(训练时候是True,推理时候是False)
         ****特别注意****   when training, the moving_mean and moving_variance need to be updated.
  By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they
  need to be added as a dependency to the `train_op`. Also, be sure to add
  any batch_normalization ops before getting the update_ops collection.
  Otherwise, update_ops will be empty, and training/inference will not work
  properly.
"""
def generator(z, output_dim, reuse=False, alpha=0.2, training=True):
    with tf.variable_scope('generator', reuse=reuse):
        # 第一层 全连接
        x1 = tf.layers.dense(z, 4 * 4 * 512)

        # 重塑 并开始卷积栈

        x1 = tf.reshape(x1, (-1, 4, 4, 512))
        x1 = tf.layers.batch_normalization(x1, training=training)
        x1 = tf.maximum(alpha * x1, x1)
        # 4x4x512 now

        x2 = tf.layers.conv2d_transpose(x1, 256, 5, strides=2, padding='same')
        x2 = tf.layers.batch_normalization(x2, training=training)
        x2 = tf.maximum(alpha * x2, x2)
        # 8x8x256 now

        x3 = tf.layers.conv2d_transpose(x2, 128, 5, strides=2, padding='same')
        x3 = tf.layers.batch_normalization(x3, training=training)
        x3 = tf.maximum(alpha * x3, x3)
        # 16x16x128 now

        # 输出层
        logits = tf.layers.conv2d_transpose(x3, output_dim, 5, strides=2, padding='same')
        # 32x32x3 now

        out = tf.tanh(logits)
        return out

# todo-创建辨别器网络
"""
创建辨别网络:就是基本的CNN分类器。输入图片维度为:32x32x3 。几层Conv2d-FC-得到输出logits--sigmoid激活。
关于卷积层的深度建议:第一层的滤波器可以从16, 32, 64 开始,然后在下一层中成倍增长即可。注意在 DCGAN 论文中, 
下采样(downsampling)只使用了卷积层,没有用池化层。 

除了第一层卷积层和输出层,均需要使用函数 `tf.layers.batch_normalization`做批归一化。归纳起来就是:Conv2d> BN>LReLU。

**注意**在本项目中,批归一化层使用的是该批次的统计量 (等于将`training`参数设置为 `True`)。因为在本项目中,辨别器用途主要是帮助训练生成器。
但,如果在其他项目中需要使用辨别器做推理/预测,那么需要恰当的设置`training`参数。
"""
def discriminator(x, reuse=False, alpha=0.2):
    with tf.variable_scope('discriminator', reuse=reuse):
        # 输入图片大小为: 32x32x3
        x1 = tf.layers.conv2d(x, 64, 5, strides=2, padding='same')
        # tips:第一层卷积不用批归一化
        relu1 = tf.maximum(alpha * x1, x1)
        # 16x16x64

        x2 = tf.layers.conv2d(relu1, 128, 5, strides=2, padding='same')
        bn2 = tf.layers.batch_normalization(x2, training=True)
        relu2 = tf.maximum(alpha * bn2, bn2)
        # 8x8x128

        x3 = tf.layers.conv2d(relu2, 256, 5, strides=2, padding='same')
        bn3 = tf.layers.batch_normalization(x3, training=True)
        relu3 = tf.maximum(alpha * bn3, bn3)
        # 4x4x256

        # 扁平层
        flat = tf.reshape(relu3, (-1, 4 * 4 * 256))
        logits = tf.layers.dense(flat, 1)
        out = tf.sigmoid(logits)

        return out, logits

# todo-计算模型损失
def model_loss(input_real, input_z, output_dim, alpha=0.2):
    """
    Get the loss for the discriminator and generator
    :param input_real: Images from the real dataset
    :param input_z: Z input
    :param out_channel_dim: The number of channels in the output image
    :return: A tuple of (discriminator loss, generator loss)
    """
    g_model = generator(input_z, output_dim, alpha=alpha) # gan生成的图片 32x32x3
    d_model_real, d_logits_real = discriminator(input_real, alpha=alpha) # 真实值最后的判别 out, logits
    d_model_fake, d_logits_fake = discriminator(g_model, reuse=True, alpha=alpha) # 伪造值最后的判别 out, logits

    d_loss_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=d_model_real, labels=tf.ones_like(d_logits_real)))
    d_loss_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=d_model_fake, labels=tf.zeros_like(d_logits_fake)))
    g_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=d_model_fake, labels=tf.ones_like(d_logits_fake)))

    d_loss = d_loss_real + d_loss_fake
    return d_loss, g_loss

# todo-优化器
"""
训练opt需要包裹在 `with tf.control_dependencies`模块中,以便批归一化层可以更新更新moving_mean和moving_variance。

- tf.control_dependencies(control_inputs)
  - with g.control_dependencies([a, b, c]):
  - `d` and `e` will only run after `a`, `b`, and `c` have executed.
    - d = ...
    - e = ...
"""

def model_opt(d_loss, g_loss, learning_rate, beta1):
    """
    Get optimization operations
    :param d_loss: Discriminator loss Tensor
    :param g_loss: Generator loss Tensor
    :param learning_rate: Learning Rate Placeholder
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :return: A tuple of (discriminator training operation, generator training operation)
    """
    # 获取d和G变量
    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if var.name.startswith('discriminator')]
    g_vars = [var for var in t_vars if var.name.startswith('generator')]

    # 保存并更新moving_mean和moving_variance后,在执行opt
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(d_loss, var_list=d_vars)
        g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(g_loss, var_list=g_vars)

    return d_train_opt, g_train_opt, g_vars

# todo 超参数设置-GANs对超参数极其敏感。请尝试设置超参数
real_size = (32, 32, 3)
z_size = 100
learning_rate = 0.0002
batch_size = 128
epochs = 25
alpha = 0.2
beta1 = 0.5
# todo-创建网络图 构建模型
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
    input_real, input_z = model_inputs(real_size, z_size)
    d_loss, g_loss = model_loss(input_real, input_z, real_size[2], alpha=alpha)
    d_opt, g_opt, g_vars = model_opt(d_loss, g_loss, learning_rate, beta1)

# 定义一个可视化函数,在训练过程中可视化。
def view_samples(epoch, samples, nrows, ncols, figsize=(5, 5)):
    fig, axes = plt.subplots(figsize=figsize, nrows=nrows, ncols=ncols,
                             sharey=True, sharex=True)
    for ax, img in zip(axes.flatten(), samples[epoch]):
        ax.axis('off')
        img = ((img - img.min()) * 255 / (img.max() - img.min())).astype(np.uint8)
        ax.set_adjustable('box-forced')
        im = ax.imshow(img, aspect='equal')

    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()
    return fig, axes


"""
定义了一个训练网络的函数。注意到在`generator`中,生成了一些图片并可视化,为此将 `training`设置为 `False`,保证批归一化层使用的全局统计量,
而不是批统计量。
同时,我们在生成器的opt中设置了`net.input_real`占位符,生成器优化实际不需要该数据,但因我们使用 `tf.control_dependencies`模块创建了 
`model_opt`,否则会报错。
"""

def train(dataset, epochs, batch_size, print_every=10,
          show_every=100, figsize=(5, 5)):
    saver = tf.train.Saver(var_list=g_vars)
    sample_z = np.random.uniform(-1, 1, size=(72, z_size))

    samples, losses = [], []
    steps = 0

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        for e in range(epochs):
            for x, y in dataset.batches(batch_size):
                steps += 1

                # 生成随机噪音样本传入G网络
                batch_z = np.random.uniform(-1, 1, size=(batch_size, z_size))

                # 执行opt
                _ = sess.run(d_opt, feed_dict={input_real: x, input_z: batch_z})
                _ = sess.run(g_opt, feed_dict={input_z: batch_z, input_real: x})

                if steps % print_every == 0:
                    # 每一次迭代结束,获取损失并打印
                    train_loss_d = d_loss.eval({input_z: batch_z, input_real: x})
                    train_loss_g = g_loss.eval({input_z: batch_z})

                    print("Epoch {}/{}...".format(e + 1, epochs),
                          "Discriminator Loss: {:.4f}...".format(train_loss_d),
                          "Generator Loss: {:.4f}".format(train_loss_g))
                    # 保存损失数据
                    losses.append((train_loss_d, train_loss_g))

                if steps % show_every == 0:
                    plt.ion()
                    gen_samples = sess.run(
                        generator(input_z, 3, reuse=True, training=False),
                        feed_dict={input_z: sample_z})
                    samples.append(gen_samples)
                    _ = view_samples(-1, samples, 6, 12, figsize=figsize)
                    plt.pause(3)
                    plt.close()

        saver.save(sess, './checkpoints/generator.ckpt')

    with open('samples.pkl', 'wb') as f:
        pkl.dump(samples, f)
    return losses, samples

def show_train_losses(losses):
    fig, ax = plt.subplots()
    losses = np.array(losses)
    plt.plot(losses.T[0], label='Discriminator', alpha=0.5)
    plt.plot(losses.T[1], label='Generator', alpha=0.5)
    plt.title("Training Losses")
    plt.legend()
    plt.show()

# todo-新生成一些噪音,传入保存的生产网络中,并生成全新的图片。
def generator_for_show():
    saver = tf.train.Saver(var_list=g_vars)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
        sample_z = np.random.uniform(-1, 1, size=(16, z_size))
        gen_samples = sess.run(
            generator(input_z, real_size[2], reuse=True, alpha=0.2, training=True),
            feed_dict={input_z: sample_z})
    _ = view_samples(-1, [gen_samples], 4, 4)


if __name__=='__main__':
    # show_svhn_images()
    # 创建网络
    # dataset = Dataset(trainset, testset)
    # losses, samples = train(dataset, epochs, batch_size, figsize=(10, 5))
    # show_train_losses(losses)
    # _ = view_samples(-1, samples, 6, 12, figsize=(10, 5))
    generator_for_show()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值