# 【深度学习】【python】vae变分自编码器实现 中文注释版

“你的代码很不错,不过下一秒就是我的了.”

• python3.5
• tensorflow 1.4
• pytorch 0.2.0

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""

reference: https://jmetzen.github.io/2015-11-27/vae.html
"""
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# 随机数种子,用于重新生成数据;
np.random.seed(2017)
tf.set_random_seed(2017)

class VAE(object):
"""变分自编码器的简单实现"""
def __init__(self, input_dim=784, z_dim=50, batch_size=100, encoder_hidden_size=[500, 500],
decoder_hidden_size=[500, 500], act_fn=tf.nn.softplus):
"""
-----------变量说明-----------------

: input_dim: int, 输入维度;
: z_dim: int, 潜变量空间的维度;
: batch_size: int, 批规模;
: encoder_hidden_size: list or tuple, 编码器的隐含层单元数;
: decoder_hidden_size: list or tuple, 解码器的隐含层单元数;
: act_fn: 激活函数;
"""
# 变量初始化;
self.input_dim = input_dim
self.z_dim = z_dim
self.batch_size = batch_size
self.encoder_hidden_size = encoder_hidden_size
self.decoder_hidden_size = decoder_hidden_size
self.act_fn = act_fn

self._bulid_model()

def _bulid_model(self):
"""用于创建模型"""
# 输入占位符;
self.x = tf.placeholder(tf.float32, shape=[self.batch_size, self.input_dim])
# 编码器: 确定高斯分布的期望和(log)方差;
self.z_mean, self.z_log_sigma_sq = self._encoder(self.x)
# 从高斯分布进行采样;
eps = tf.random_normal([self.batch_size, self.z_dim], mean=0.0, stddev=1.0)
# 计算z = mean + sigma*epsilon;

# 解码器: 确定重构输入的伯努利分布的均值;
self.x_reconstr_mean = self._decoder(self.z)

# 计算损失;
with tf.name_scope("loss"):
# 重构的损失: cross entropy交叉熵;
reconstr_loss = -tf.reduce_sum(self.x * tf.log(1e-10 + self.x_reconstr_mean) + \
(1.0 - self.x) * tf.log(1e-10 + 1.0 - self.x_reconstr_mean), axis=1)
# 潜变量的损失: KL散度;
latent_loss = -0.5 * tf.reduce_sum(1.0 + self.z_log_sigma_sq - tf.square(self.z_mean) - \
tf.exp(self.z_log_sigma_sq), axis=1)
# 该batch的平均值;
self.cost = tf.reduce_mean(reconstr_loss + latent_loss)

# 优化;
# 学习率;
self.lr = tf.Variable(0.001, trainable=False)
# 训练的参数;
vars = tf.trainable_variables()
# 优化占位符;

def _encoder(self, x, name="encoder"):
"""编码器Encoder"""
# encoder过程;
with tf.variable_scope(name):
# 输入维度;
n_in = self.input_dim
# 对每一个encoder隐含层的单元;
for i, s in enumerate(self.encoder_hidden_size):
# 获取对应参数值;
w, b = self._get_vars(n_in, s, name="h{0}".format(i))
# 第一个单元;
if i == 0:
# 激活函数计算;
h = self.act_fn(tf.nn.xw_plus_b(x, w, b))
# 非第一个单元;
else:
# 激活函数计算;
h = self.act_fn(tf.nn.xw_plus_b(h, w, b))
# 下一轮input数量设为已迭代的所有单元的个数;
n_in = s
# 参数值获取;
w, b = self._get_vars(n_in, self.z_dim, name="out_mean")
# 计算z_mean = h*w + b;
z_mean = tf.nn.xw_plus_b(h, w, b)
# 参数值获取;
w, b = self._get_vars(n_in, self.z_dim, name="out_log_sigma")
# 计算z_log_sigma_sq = h*w + b;
z_log_sigma_sq = tf.nn.xw_plus_b(h, w, b)
# 返回结果;
return z_mean, z_log_sigma_sq

def _decoder(self, z, name="decoder"):
"""解码器Decoder"""
# decoder过程;
with tf.variable_scope(name):
# 输入的维度是encoder输出的维度;
n_in = self.z_dim
# 对每一个decoder隐含层的单元;
for i, s in enumerate(self.decoder_hidden_size):
# 参数值;
w, b = self._get_vars(n_in, s, name="h{0}".format(i))
# 第一个单元;
if i == 0:
# 激活函数计算;
h = self.act_fn(tf.nn.xw_plus_b(z, w, b))
# 非第一个单元;
else:
# 激活函数计算;
h = self.act_fn(tf.nn.xw_plus_b(h, w, b))
# 下一轮input数量设为已迭代的所有单元的个数;
n_in = s
# 获取参数;
w, b = self._get_vars(n_in, self.input_dim, name="out_mean")
# 使用sigmoid函数构造伯努利分布;
x_reconstr_mean = tf.nn.sigmoid(tf.nn.xw_plus_b(h, w, b))
# 返回结果;
return x_reconstr_mean

def _get_vars(self, n_in, n_out, name=""):
"""构造权重和偏置参数值"""
# 域名;
with tf.variable_scope(name):
# 构造权重;
w = tf.get_variable("w", [n_in, n_out], initializer=tf.contrib.layers.xavier_initializer())
# 构造偏置;
b = tf.get_variable("b", [n_out,], initializer=tf.constant_initializer(0.1))
# 返回结果;
return w, b

if __name__ == "__main__":
# 训练参数;
# 迭代次数;
n_epochs = 30
# 学习率;
lr = 0.001
# 单批数量;
batch_size = 100
# 打印设置;
display_every = 1

# 当前路径;
path = sys.path[0]
# 读取mnis数据;
# 开始训练;
with tf.Session() as sess:
# 创建模型实例;
vae = VAE(input_dim=784, z_dim=2, batch_size=batch_size, encoder_hidden_size=[500, 500],
decoder_hidden_size=[500, 500], act_fn=tf.nn.softplus)
# 参数初始化;
sess.run(tf.global_variables_initializer())
# 存储模型操作句柄;
saver = tf.train.Saver()
# 存储模型;
#saver.restore(sess, save_path=path+"/model/model.ckpt")
# 开始训练;
print("Start training...")
# 批次数;
total_batch = int(mnist.train.num_examples/batch_size)
# 开始迭代;
for epoch in range(n_epochs):
# 声明平均损失;
avg_cost = 0.0
# 每一批次;
for i in range(total_batch):
# 获取用于当前批次训练的数据;
batch_xs, _ = mnist.train.next_batch(batch_size)
# 进行cost计算、训练操作;
c, _ = sess.run([vae.cost, vae.train_op], feed_dict={vae.x: batch_xs})
# avg_cost累计;
avg_cost += c/total_batch
# 到了打印详情的轮;
if epoch % display_every == 0:
# 存储一次模型进度;
save_path = saver.save(sess, path+"/model/model.ckpt")
# 打印存储路径;
#print("\tModel saved in file: {0}".format(save_path))
# 打印训练详情;
print("\tEpoch {0}, cost {1}".format(epoch, avg_cost))

# 采样;
# 获取当前批次test数据;
x_sample, _ = mnist.test.next_batch(batch_size)
# x_reconstr是模型重新解码出来的结果;
x_reconstr = sess.run(vae.x_reconstr_mean, feed_dict={vae.x: x_sample})
# 画图;
plt.figure(figsize=(8, 12))
# 画五个数字的encoder-decoder生成结果;
for i in range(5):
# 这是数据集的样本;
plt.subplot(5, 2, 2*i + 1)
plt.imshow(np.reshape(x_sample[i],(28, 28)), vmin=0, vmax=1, cmap="gray")
plt.title("Test input")
plt.colorbar()
# 这是模型生成的伪样本;
plt.subplot(5, 2, 2*i + 2)
plt.imshow(np.reshape(x_reconstr[i], [28, 28]), vmin=0, vmax=1, cmap="gray")
plt.title("Reconstruction")
plt.colorbar()
# 画线分割;
plt.tight_layout()
# 存储结果;
plt.savefig(path+"/results/img_epoch{0}.jpg".format(n_epochs))
# 显示结果;
plt.show()

# 随机采样;
# 范围大小;
nx, ny = 20, 20
# xs、ys的序列范围;
xs = np.linspace(-3, 3, nx)
ys = np.linspace(-3, 3, ny)
# np.meshgrid从坐标向量返回坐标矩阵:用于下一步画zs(xs,ys)的三维图;
xs, ys = np.meshgrid(xs, ys)
# 维度调整为一维;
xs = np.reshape(xs, [-1, 1])
# 维度调整为一维;
ys = np.reshape(ys, [-1, 1])
# 数组拼接;axis=1代表拼接方向是(xs;ys)(纵向拼接);
zs = np.concatenate((xs, ys), axis=1)

# 生成零矩阵;28*28是字体图片维度;
canvas = np.zeros((28*ny, 28*nx))
# 对应图像的零矩阵;
xs_recon = np.zeros((batch_size*4, 28*28))
# 取4个批次;
for i in range(4):
# 取zs第i到i+1单位批次的数据为z_mu,作为潜变量输入;
z_mu = zs[batch_size*i:batch_size*(i+1), :]
# 生成对应伪图像期望;
x_mean = sess.run(vae.x_reconstr_mean, feed_dict={vae.z: z_mu})
# 设定xs_recon对应值(本来为零);
xs_recon[i*batch_size:(i+1)*batch_size] = x_mean

# 初始;
n = 0
# 开始 nx*ny 步的绘图(每次绘制一个28*28的图像);
for i in range(nx):
for j in range(ny):
# 数据来自xs_recon;
canvas[(ny-i-1)*28:(ny-i)*28, j*28:(j+1)*28] = xs_recon[n].reshape(28, 28)
# 下一图;
n = n + 1

# 画8*10个手写字体图;
plt.figure(figsize=(8, 10))
# 图的外观;
plt.imshow(anvas, origin="upper", vmin=0, vmax=1, interpolation='none', cmap='gray')
# 分割线;
plt.tight_layout()
# 存储结果;
plt.savefig(path+"/results/rand_img_epoch{0}.jpg".format(n_epochs))
# 显示;
plt.show()