tensorflow(神经网络)学习笔记(六)对抗神经网络(DCGAN)实战(学习笔记)

利用MNIST数据集中的手写数字图像让神经网络学习到手写图片

#%%
"""
1.Data provider
    a. Image data
    b. random vector
2. Build compute graph
    a. generator
    b. discriminator
    c. DCGAN
        connect d and g
        define loss
        define train_op
3. training process
"""

import os
import sys
import tensorflow as tf
from tensorflow import logging
from tensorflow import gfile
import pprint
import pickle
import numpy as np
import random
import math
from PIL import Image
from tensorflow.examples.tutorials.mnist import input_data

minst = input_data.read_data_sets(r'.\deep_learn\text2image\MNIST_data')

output_dir = r'.\deep_learn\text2image\local_run'
if not gfile.Exists(output_dir):
    gfile.MakeDirs(output_dir)
    
def get_default_params():
    return tf.contrib.training.HParams(
        z_dim = 100,
        init_conv_size = 4,
        g_channels = [128, 64, 32, 1],
        d_channels = [32, 64, 128, 256],
        batch_size = 128,
        learning_rate = 0.002,
        beta1 =0.5,
        img_size = 32,
    )
hps = get_default_params()
print(hps.img_size)
print(minst.train.images.shape)
#%%
class MnistData:
    def __init__(self, mnist_train, z_dim, img_size):
        self._data = mnist_train
        self._example_num = len(self._data)
        # 生成随机向量
        self._z_data = np.random.standard_normal((self._example_num, z_dim))
        self._indicator = 0
        self._resize_mnist_img(img_size)
        self._random_shuffle()
        
    def _random_shuffle(self):
        p = np.random.permutation(self._example_num)
        self._z_data = self._z_data[p]
        self._data = self._data[p]
        
    def _resize_mnist_img(self, img_size):
        """Resize mnist image to goal img_size [28,28] -> [32,32]
        How?
        1.  numpy -> PIL img
        2.  PIL img -> resize
        3.  PIL img -> numpy
        """
        # 原数据为[0,1] 
        data = np.asarray(self._data * 255, np.uint8)
        # [example_num, 784] -> [example_num, 28, 28]
        data = data.reshape((self._example_num, 28, 28))
        new_data = []
        for i in range(self._example_num):
            img = data[i] # np [28,28]
            img = Image.fromarray(img) # np -> 图片
            img = img.resize((img_size, img_size)) # 图片扩大
            img = np.asarray(img) # 图片转 np
            img = img.reshape((img_size, img_size, 1)) # np - > [32,32] 
            new_data.append(img)
        # [ num_example, 1024]
        new_data = np.asarray(new_data, dtype=np.float32)
        # self._data: [num_example, img_size, img_size, 1]
        new_data = new_data/ 127.5 -1 # 归一化 -> [-1, 1]
        self._data = new_data
    
    def next_batch(self, batch_size):
        end_indicator = self._indicator + batch_size
        if end_indicator > self._example_num:
            self._random_shuffle()
            self._indicator = 0
            end_indicator = self._indicator + batch_size
        assert end_indicator < self._example_num
        
        batch_data = self._data[self._indicator: end_indicator]
        batch_z = self._z_data[self._indicator: end_indicator]
        self._indicator = end_indicator
        return batch_data, batch_z

mnist_data = MnistData(minst.train.images, hps.z_dim, hps.img_size)
batch_data,batch_z = mnist_data.next_batch(5)

#%%
def conv2d_transpose(inputs, out_channel, name,
                     training, with_bn_relu = True):
    # with_bn_relu: 反卷积的最后一层是不需要经过relu, 经过tanh就输出了
    """Wrapper of conv2d transpose. """
    with tf.variable_scope(name):
        conv2d_trans = tf.layers.conv2d_transpose(inputs,
                                                  out_channel,
                                                  [5,5],
                                                  strides= (2, 2),
                                                  padding='same')
        if with_bn_relu:
            bn = tf.layers.batch_normalization(conv2d_trans,
                                               training= training)
            return tf.nn.relu(bn)
        else:
            return conv2d_trans

def conv2d(inputs, out_channel, name, training):
    """Wrapper of conv2d"""
    def leaky_relu(x, leak=0.2, name= ''):
        return tf.maximum(x, x*leak, name=name)
    
    with tf.variable_scope(name):
        # 卷积操作
        conv2d_output = tf.layers.conv2d(inputs,
                                         out_channel,
                                         [5,5],
                                         strides= (2,2),
                                         padding='same')
        bn = tf.layers.batch_normalization(conv2d_output, training=training)
        
        return leaky_relu(bn, name='outputs')

class Generator:
    """Generator of GAN."""
    def __init__(self, channels, init_conv_size):
        self._channels = channels
        self._init_conv_size = init_conv_size
        # 重复调用
        self._reuse = False
        
    def __call__(self, inputs, training):
        # 把任何的输入编程tensor
        inputs = tf.convert_to_tensor(inputs)
        with tf.variable_scope('generator', reuse=self._reuse):
            """
            构建DC_GAN的第一步
            random_vector -> fc -> self._chammels[0] * init_conv_size**2
            -> reshape -> [init_conv_size, init_conv_size, channels[0]]
            """
            with tf.variable_scope('inputs_conv'):
                fc = tf.layers.dense(
                    inputs,
                    self._channels[0] * self._init_conv_size * self._init_conv_size
                )
                conv0 = tf.reshape(fc,
                                   [-1,
                                    self._init_conv_size,
                                    self._init_conv_size,
                                    self._channels[0]])
                bn0 = tf.layers.batch_normalization(conv0, training=training)
                
                relu0 = tf.nn.relu(bn0)
                
            deconv_inputs = relu0
            for i in range(1, len(self._channels)):
                # 判断是否是最后一层,是,就不用加relu,不是就要加relu
                with_bn_relu = (i != len(self._channels) - 1)
                # 进行反卷积
                deconv_inputs = conv2d_transpose(
                    deconv_inputs,
                    self._channels[i],
                    "deconv-{}".format(i),
                    training,
                    with_bn_relu
                )
            img_inputs = deconv_inputs
            with tf.variable_scope('generate_imgs'):
                # imgs value range: [-1,1]
                imgs = tf.tanh(img_inputs, name='imgs')
        self._reuse = True
        self.variables = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES,
            scope='generator'
        )
        return imgs

class Discriminator:
    """Discriminator of GAN."""
    def __init__(self, channels):
        self._channels = channels
        self._reuse = False
    
    def __call__(self, inputs, training):
        inputs = tf.convert_to_tensor(inputs, dtype=tf.float32)
        
        conv_inputs = inputs
        with tf.variable_scope('discriminator', reuse=self._reuse):
            for i in range(len(self._channels)):
                conv_inputs = conv2d(conv_inputs,
                                     self._channels[i],
                                     'conv-{}'.format(i),
                                     training)
            fc_inputs = conv_inputs
            with tf.variable_scope('fc'):
                flatten = tf.layers.flatten(fc_inputs)
                logits = tf.layers.dense(flatten, 2, name='logits')
        self._reuse = True
        self.variables = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES,
            scope='discriminator'
        )    
        return logits

class DCGAN:
    """DCGAN implementation."""
    def __init__(self, hps):
        g_channels = hps.g_channels
        d_channels = hps.d_channels
        
        self._batch_size = hps.batch_size
        self._init_conv_size = hps.init_conv_size
        self._z_dim = hps.z_dim
        self._img_size = hps.img_size
        
        self._generator = Generator(g_channels, self._init_conv_size)
        self._discriminator = Discriminator(d_channels)
        
    def build(self):
        """Builds the whole compute graph."""
        self._z_placeholder = tf.placeholder(
            tf.float32, (self._batch_size, self._z_dim)
        )
        # 真实的图像
        self._img_placeholer = tf.placeholder(
            tf.float32,
            (self._batch_size, self._img_size, self._img_size, 1)
        )
        # 生成假的图像
        generated_imgs = self._generator(
            self._z_placeholder, training=True
        )
        
        fake_img_logits = self._discriminator(
            generated_imgs, training=True
        )
        real_img_logits = self._discriminator(
            self._img_placeholer, training=True
        )
        # 分别使用不同的损失函数进行对G和D进行提升
        # G-loss
        loss_on_fake_to_real = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                # 真实用1来表示
                labels=tf.ones([self._batch_size], dtype=tf.int64),
                logits= fake_img_logits
            )
        )
        # 两个D-loss
        loss_on_fake_to_fake = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.zeros([self._batch_size], dtype=tf.int64),
                logits= fake_img_logits
            )
        )
        
        loss_on_real_to_real = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.ones([self._batch_size], dtype=tf.int64),
                logits= real_img_logits
            )
        )
        tf.add_to_collection('g_losses', loss_on_fake_to_real)
        tf.add_to_collection('d_losses', loss_on_fake_to_fake)
        tf.add_to_collection('d_losses', loss_on_real_to_real)
        
        loss = {
            'g': tf.add_n(tf.get_collection('g_losses'),
                          name='total_g_loss'),
            'd': tf.add_n(tf.get_collection('d_losses'),
                          name='total_d_loss')
        }
        return (self._z_placeholder, self._img_placeholer, generated_imgs, loss)
        
    # train_op
    def build_train_op(self, losses, learning_rate, beta1):
        """Builds train op, should be called after build is called."""
        g_opt = tf.train.AdamOptimizer(
            learning_rate=learning_rate,
            beta1=beta1
        )
        d_opt = tf.train.AdamOptimizer(
            learning_rate=learning_rate,
            beta1=beta1
        )
        
        g_opt_op = g_opt.minimize(losses['g'], var_list=self._generator.variables)
        d_opt_op = d_opt.minimize(losses['d'], var_list=self._discriminator.variables)
        # 进行交叉训练
        with tf.control_dependencies([g_opt_op, d_opt_op]):
            return tf.no_op(name='train')
        
dcgan = DCGAN(hps)
z_placehodler, img_plcaehodler, generated_imgs, losses = dcgan.build()
train_op = dcgan.build_train_op(losses, hps.learning_rate, hps.beta1)
        
#%%
def combine_imgs(batch_imgs, img_size, rows=8, cols=16):
    """Combines small images in a batch into a big pic."""
    # batch_imgs: [batch_size, img_size, img_size, 1]
    result_big_img = []
    for i in range(rows):
        row_imgs = []
        for j in range(cols):
            # [img_size, img_size, 1]
            img = batch_imgs[cols* i + j]
            img = img.reshape((img_size, img_size))
            img = (img+1)*127.5
            row_imgs.append(img)
        # 横向拼接
        row_imgs = np.hstack(row_imgs)
        result_big_img.append(row_imgs)
    # [8*32, 16*32], 纵向拼接
    result_big_img = np.vstack(result_big_img)
    result_big_img = np.asarray(result_big_img, np.uint8)
    result_big_img = Image.fromarray(result_big_img)
    return result_big_img

init_op = tf.global_variables_initializer()
train_steps = 10000

with tf.Session() as sess:
    sess.run(init_op)
    for step in range(train_steps):
        batch_imgs, batch_z = mnist_data.next_batch(hps.batch_size)
        fetches = [train_op, losses['g'], losses['d']]
        should_sample = (step+1) % 50 == 0
        if should_sample:
            fetches += [generated_imgs]
        output_values = sess.run(fetches,
                                 feed_dict={
                                     z_placehodler: batch_z,
                                     img_plcaehodler: batch_imgs
                                 })
        _, g_loss_val, d_loss_val = output_values[0:3]
        logging.info('step: %4d, g_loss: %4.3f, d_loss: %4.3f' % (step, g_loss_val, d_loss_val))
        
        if should_sample:
            gen_imgs_val = output_values[3]
            gen_img_path = os.path.join(output_dir, '%05d-gen.jpg' % (step + 1))
            gt_img_path = os.path.join(output_dir, '%05d-gt.jpg' % (step + 1))
            gen_img = combine_imgs(gen_imgs_val, hps.img_size)
            gt_img = combine_imgs(batch_imgs, hps.img_size)
            gen_img.save(gen_img_path)
            gt_img.save(gt_img_path)

#%%




  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值