VAE(Variational Autoencoder)和 GAN之间的区别:
写论文以为会用到VAEs(Variational Autoencoders),但是后来发现并没有用到,OK分享一下我最近看到的GAN和VAEs之间的区别,
第一:两者都可以用来生成图像,
第二:VAEs有编码器和解码器,编码器用来获取图像的特征,并将特征传递给解码器, 解码器通过得到编码器的特征生成具体的图像, 其损失函数包括两部分,第一部分是:判断解码器是否是产生真实图像的分布 第二部分是KL散度
第三:GAN是通过调节生成模型和识别模型之间动态调节,将噪音转化成图像,其是具有对抗性的
总结: 也就是两者都能用来生成图像,但是两者生成图像的方式不一样。最后分享一下自己利用VAEs生成fashion-mnist的代码使用mlp做的 不是卷积操作
# -*- coding: utf-8 -*-
# @Time : 2019/2/3 20:52
# @Author : YYLin
# @Email : 854280599@qq.com
# @File : vaes.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../Dataset/fashion-mnist', one_hot=True)
batch_size = 64
noise = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
hide_unit = 128
lr = 0.001
def save_image(samples):
fig = plt.figure(figsize=(4, 4))
gs = gridspec.GridSpec(4, 4)
gs.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
ax = plt.subplot(gs[i])
plt.axis('off')
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_aspect('equal')
plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
return fig
def xavier_init(size):
in_dim = size[0]
xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
return tf.random_normal(shape=size, stddev=xavier_stddev)
X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, noise])
encoder_W1 = tf.Variable(xavier_init([X_dim, hide_unit]))
encoder_b1 = tf.Variable(tf.zeros(shape=[hide_unit]))
encoder_W2 = tf.Variable(xavier_init([hide_unit, noise]))
encoder_b2 = tf.Variable(tf.zeros(shape=[noise]))
encoder_W3 = tf.Variable(xavier_init([hide_unit, noise]))
encoder_b3 = tf.Variable(tf.zeros(shape=[noise]))
def encoder(X):
en_layer_1 = tf.nn.relu(tf.matmul(X, encoder_W1) + encoder_b1)
en_layer_2 = tf.matmul(en_layer_1, encoder_W2) + encoder_b2
en_layer_3 = tf.matmul(en_layer_1, encoder_W3) + encoder_b3
return en_layer_2, en_layer_3
def sample_data(z1, z_kl):
eps = tf.random_normal(shape=tf.shape(z1))
return z1 + tf.exp(z_kl / 2) * eps
decoder_W1 = tf.Variable(xavier_init([noise, hide_unit]))
decoder_b1 = tf.Variable(tf.zeros(shape=[hide_unit]))
decoder_W2 = tf.Variable(xavier_init([hide_unit, X_dim]))
decoder_b2 = tf.Variable(tf.zeros(shape=[X_dim]))
def decoder(z):
layer_1 = tf.nn.relu(tf.matmul(z, decoder_W1) + decoder_b1)
print("在decoder的形状是:", layer_1)
layer_2 = tf.matmul(layer_1, decoder_W2) + decoder_b2
prob = tf.nn.sigmoid(layer_2)
return prob, layer_2
# 核心部分 将真实图像进行编码 然后获取中间状态 然后进行解吗
z1, z_log_kl = encoder(X)
z_sample = sample_data(z1, z_log_kl)
_, logits = decoder(z_sample)
# 利用噪音生成图像
G_data, _ = decoder(z)
image_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_log_kl) + z1**2 - 1. - z_log_kl, 1)
vae_loss = tf.reduce_mean(image_loss + kl_loss)
solver = tf.train.AdamOptimizer().minimize(vae_loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
if not os.path.exists('fashion-mnist/'):
os.makedirs('fashion-mnist/')
i = 0
for sample_i in range(1000000):
X_sample, _ = mnist.train.next_batch(batch_size)
_, loss = sess.run([solver, vae_loss], feed_dict={X: X_sample})
if sample_i % 1000 == 0:
print('Iter: {}'.format(sample_i))
print('Loss: {:.4}'. format(loss))
print()
samples = sess.run(G_data, feed_dict={z: np.random.randn(batch_size, noise)})
fig = save_image(samples)
plt.savefig('fashion-mnist/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
i += 1
plt.close(fig)
图像的结果1
图像的结果2