利用MNIST数据集中的手写数字图像让神经网络学习到手写图片
#%%
"""
1.Data provider
a. Image data
b. random vector
2. Build compute graph
a. generator
b. discriminator
c. DCGAN
connect d and g
define loss
define train_op
3. training process
"""
import os
import sys
import tensorflow as tf
from tensorflow import logging
from tensorflow import gfile
import pprint
import pickle
import numpy as np
import random
import math
from PIL import Image
from tensorflow.examples.tutorials.mnist import input_data
minst = input_data.read_data_sets(r'.\deep_learn\text2image\MNIST_data')
output_dir = r'.\deep_learn\text2image\local_run'
if not gfile.Exists(output_dir):
gfile.MakeDirs(output_dir)
def get_default_params():
return tf.contrib.training.HParams(
z_dim = 100,
init_conv_size = 4,
g_channels = [128, 64, 32, 1],
d_channels = [32, 64, 128, 256],
batch_size = 128,
learning_rate = 0.002,
beta1 =0.5,
img_size = 32,
)
hps = get_default_params()
print(hps.img_size)
print(minst.train.images.shape)
#%%
class MnistData:
def __init__(self, mnist_train, z_dim, img_size):
self._data = mnist_train
self._example_num = len(self._data)
# 生成随机向量
self._z_data = np.random.standard_normal((self._example_num, z_dim))
self._indicator = 0
self._resize_mnist_img(img_size)
self._random_shuffle()
def _random_shuffle(self):
p = np.random.permutation(self._example_num)
self._z_data = self._z_data[p]
self._data = self._data[p]
def _resize_mnist_img(self, img_size):
"""Resize mnist image to goal img_size [28,28] -> [32,32]
How?
1. numpy -> PIL img
2. PIL img -> resize
3. PIL img -> numpy
"""
# 原数据为[0,1]
data = np.asarray(self._data * 255, np.uint8)
# [example_num, 784] -> [example_num, 28, 28]
data = data.reshape((self._example_num, 28, 28))
new_data = []
for i in range(self._example_num):
img = data[i] # np [28,28]
img = Image.fromarray(img) # np -> 图片
img = img.resize((img_size, img_size)) # 图片扩大
img = np.asarray(img) # 图片转 np
img = img.reshape((img_size, img_size, 1)) # np - > [32,32]
new_data.append(img)
# [ num_example, 1024]
new_data = np.asarray(new_data, dtype=np.float32)
# self._data: [num_example, img_size, img_size, 1]
new_data = new_data/ 127.5 -1 # 归一化 -> [-1, 1]
self._data = new_data
def next_batch(self, batch_size):
end_indicator = self._indicator + batch_size
if end_indicator > self._example_num:
self._random_shuffle()
self._indicator = 0
end_indicator = self._indicator + batch_size
assert end_indicator < self._example_num
batch_data = self._data[self._indicator: end_indicator]
batch_z = self._z_data[self._indicator: end_indicator]
self._indicator = end_indicator
return batch_data, batch_z
mnist_data = MnistData(minst.train.images, hps.z_dim, hps.img_size)
batch_data,batch_z = mnist_data.next_batch(5)
#%%
def conv2d_transpose(inputs, out_channel, name,
training, with_bn_relu = True):
# with_bn_relu: 反卷积的最后一层是不需要经过relu, 经过tanh就输出了
"""Wrapper of conv2d transpose. """
with tf.variable_scope(name):
conv2d_trans = tf.layers.conv2d_transpose(inputs,
out_channel,
[5,5],
strides= (2, 2),
padding='same')
if with_bn_relu:
bn = tf.layers.batch_normalization(conv2d_trans,
training= training)
return tf.nn.relu(bn)
else:
return conv2d_trans
def conv2d(inputs, out_channel, name, training):
"""Wrapper of conv2d"""
def leaky_relu(x, leak=0.2, name= ''):
return tf.maximum(x, x*leak, name=name)
with tf.variable_scope(name):
# 卷积操作
conv2d_output = tf.layers.conv2d(inputs,
out_channel,
[5,5],
strides= (2,2),
padding='same')
bn = tf.layers.batch_normalization(conv2d_output, training=training)
return leaky_relu(bn, name='outputs')
class Generator:
"""Generator of GAN."""
def __init__(self, channels, init_conv_size):
self._channels = channels
self._init_conv_size = init_conv_size
# 重复调用
self._reuse = False
def __call__(self, inputs, training):
# 把任何的输入编程tensor
inputs = tf.convert_to_tensor(inputs)
with tf.variable_scope('generator', reuse=self._reuse):
"""
构建DC_GAN的第一步
random_vector -> fc -> self._chammels[0] * init_conv_size**2
-> reshape -> [init_conv_size, init_conv_size, channels[0]]
"""
with tf.variable_scope('inputs_conv'):
fc = tf.layers.dense(
inputs,
self._channels[0] * self._init_conv_size * self._init_conv_size
)
conv0 = tf.reshape(fc,
[-1,
self._init_conv_size,
self._init_conv_size,
self._channels[0]])
bn0 = tf.layers.batch_normalization(conv0, training=training)
relu0 = tf.nn.relu(bn0)
deconv_inputs = relu0
for i in range(1, len(self._channels)):
# 判断是否是最后一层,是,就不用加relu,不是就要加relu
with_bn_relu = (i != len(self._channels) - 1)
# 进行反卷积
deconv_inputs = conv2d_transpose(
deconv_inputs,
self._channels[i],
"deconv-{}".format(i),
training,
with_bn_relu
)
img_inputs = deconv_inputs
with tf.variable_scope('generate_imgs'):
# imgs value range: [-1,1]
imgs = tf.tanh(img_inputs, name='imgs')
self._reuse = True
self.variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES,
scope='generator'
)
return imgs
class Discriminator:
"""Discriminator of GAN."""
def __init__(self, channels):
self._channels = channels
self._reuse = False
def __call__(self, inputs, training):
inputs = tf.convert_to_tensor(inputs, dtype=tf.float32)
conv_inputs = inputs
with tf.variable_scope('discriminator', reuse=self._reuse):
for i in range(len(self._channels)):
conv_inputs = conv2d(conv_inputs,
self._channels[i],
'conv-{}'.format(i),
training)
fc_inputs = conv_inputs
with tf.variable_scope('fc'):
flatten = tf.layers.flatten(fc_inputs)
logits = tf.layers.dense(flatten, 2, name='logits')
self._reuse = True
self.variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES,
scope='discriminator'
)
return logits
class DCGAN:
"""DCGAN implementation."""
def __init__(self, hps):
g_channels = hps.g_channels
d_channels = hps.d_channels
self._batch_size = hps.batch_size
self._init_conv_size = hps.init_conv_size
self._z_dim = hps.z_dim
self._img_size = hps.img_size
self._generator = Generator(g_channels, self._init_conv_size)
self._discriminator = Discriminator(d_channels)
def build(self):
"""Builds the whole compute graph."""
self._z_placeholder = tf.placeholder(
tf.float32, (self._batch_size, self._z_dim)
)
# 真实的图像
self._img_placeholer = tf.placeholder(
tf.float32,
(self._batch_size, self._img_size, self._img_size, 1)
)
# 生成假的图像
generated_imgs = self._generator(
self._z_placeholder, training=True
)
fake_img_logits = self._discriminator(
generated_imgs, training=True
)
real_img_logits = self._discriminator(
self._img_placeholer, training=True
)
# 分别使用不同的损失函数进行对G和D进行提升
# G-loss
loss_on_fake_to_real = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
# 真实用1来表示
labels=tf.ones([self._batch_size], dtype=tf.int64),
logits= fake_img_logits
)
)
# 两个D-loss
loss_on_fake_to_fake = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=tf.zeros([self._batch_size], dtype=tf.int64),
logits= fake_img_logits
)
)
loss_on_real_to_real = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=tf.ones([self._batch_size], dtype=tf.int64),
logits= real_img_logits
)
)
tf.add_to_collection('g_losses', loss_on_fake_to_real)
tf.add_to_collection('d_losses', loss_on_fake_to_fake)
tf.add_to_collection('d_losses', loss_on_real_to_real)
loss = {
'g': tf.add_n(tf.get_collection('g_losses'),
name='total_g_loss'),
'd': tf.add_n(tf.get_collection('d_losses'),
name='total_d_loss')
}
return (self._z_placeholder, self._img_placeholer, generated_imgs, loss)
# train_op
def build_train_op(self, losses, learning_rate, beta1):
"""Builds train op, should be called after build is called."""
g_opt = tf.train.AdamOptimizer(
learning_rate=learning_rate,
beta1=beta1
)
d_opt = tf.train.AdamOptimizer(
learning_rate=learning_rate,
beta1=beta1
)
g_opt_op = g_opt.minimize(losses['g'], var_list=self._generator.variables)
d_opt_op = d_opt.minimize(losses['d'], var_list=self._discriminator.variables)
# 进行交叉训练
with tf.control_dependencies([g_opt_op, d_opt_op]):
return tf.no_op(name='train')
dcgan = DCGAN(hps)
z_placehodler, img_plcaehodler, generated_imgs, losses = dcgan.build()
train_op = dcgan.build_train_op(losses, hps.learning_rate, hps.beta1)
#%%
def combine_imgs(batch_imgs, img_size, rows=8, cols=16):
"""Combines small images in a batch into a big pic."""
# batch_imgs: [batch_size, img_size, img_size, 1]
result_big_img = []
for i in range(rows):
row_imgs = []
for j in range(cols):
# [img_size, img_size, 1]
img = batch_imgs[cols* i + j]
img = img.reshape((img_size, img_size))
img = (img+1)*127.5
row_imgs.append(img)
# 横向拼接
row_imgs = np.hstack(row_imgs)
result_big_img.append(row_imgs)
# [8*32, 16*32], 纵向拼接
result_big_img = np.vstack(result_big_img)
result_big_img = np.asarray(result_big_img, np.uint8)
result_big_img = Image.fromarray(result_big_img)
return result_big_img
init_op = tf.global_variables_initializer()
train_steps = 10000
with tf.Session() as sess:
sess.run(init_op)
for step in range(train_steps):
batch_imgs, batch_z = mnist_data.next_batch(hps.batch_size)
fetches = [train_op, losses['g'], losses['d']]
should_sample = (step+1) % 50 == 0
if should_sample:
fetches += [generated_imgs]
output_values = sess.run(fetches,
feed_dict={
z_placehodler: batch_z,
img_plcaehodler: batch_imgs
})
_, g_loss_val, d_loss_val = output_values[0:3]
logging.info('step: %4d, g_loss: %4.3f, d_loss: %4.3f' % (step, g_loss_val, d_loss_val))
if should_sample:
gen_imgs_val = output_values[3]
gen_img_path = os.path.join(output_dir, '%05d-gen.jpg' % (step + 1))
gt_img_path = os.path.join(output_dir, '%05d-gt.jpg' % (step + 1))
gen_img = combine_imgs(gen_imgs_val, hps.img_size)
gt_img = combine_imgs(batch_imgs, hps.img_size)
gen_img.save(gen_img_path)
gt_img.save(gt_img_path)
#%%