tf中的Auto Encoder(VAE)
1. Auto Encoder(AE)
基本Auto Encoder
Auto Encoder自编码器应用于无监督学习 监督学习,即训练数据含有标签,一般用于分类和预测任务 无监督学习,即训练数据不含有标签,一般用于聚类和数据生成任务 自编码器的基本结构如下所示,其中encoder的输出可以是降维也可以是升维的(一般降维),其中维度较低的中间层称为neck层,neck层的输出可以看为聚类的结果
Dropout AutoEncoders
Dropout即在反向传播时,遗忘掉一部分权值(连接线),在权值更新与测试等时不遗忘
Adversarial AutoEncoders
Adversarial AutoEncoders基本思想:在原有的AutoEncoders上加入了一个判决器,来判决输出的分布与原始分布或规定分布是否一致(相近程度),不一致则判定错误 Adversarial AutoEncoders基本结构: Adversarial AutoEncoders的损失函数:
l
i
(
θ
,
ϕ
)
=
−
∑
z
∼
q
θ
(
z
∣
x
i
)
[
log
p
ϕ
(
x
i
∣
z
)
]
+
K
L
(
q
θ
(
z
∣
x
i
)
∣
∣
p
(
z
)
)
l_i(\theta,\phi) = -\sum_{z \sim q_\theta (z\mid x_i)}[\log p_{\phi}(x_i\mid z)]+KL(q_\theta(z\mid x_i)\mid\mid p(z))
l i ( θ , ϕ ) = − z ∼ q θ ( z ∣ x i ) ∑ [ log p ϕ ( x i ∣ z ) ] + K L ( q θ ( z ∣ x i ) ∣ ∣ p ( z ) ) 其中后部分为KL散度,用于衡量p和q的重叠程度(相似度),重叠部分越多,其值越小
K
L
(
p
∣
∣
q
)
=
∫
−
∞
+
∞
p
(
x
)
log
p
(
x
)
q
(
x
)
d
x
KL(p\mid\mid q) = \int_{-\infty}^{+\infty}p(x)\log\frac{p(x)}{q(x)}dx
K L ( p ∣ ∣ q ) = ∫ − ∞ + ∞ p ( x ) log q ( x ) p ( x ) d x Adversarial AutoEncoders的损失函数的前半部分理解为编码器输出的编码结果解码后尽可能逼近原数据,后半部分理解为输出分布尽可能逼近原分布或规定分布
2. Variational Auto Encoder(VAE)
Auto Encoder存在的问题:编码器的输出是一个分布,这个分布采样后不可求导 VAE
Auto Encoder编码器的输出是
z
∼
N
(
μ
,
σ
2
)
z\sim N(\mu,\sigma^2)
z ∼ N ( μ , σ 2 ) VAE编码器的输出是
z
=
μ
+
σ
⨀
ϵ
z = \mu +\sigma \bigodot\epsilon
z = μ + σ ⨀ ϵ 其中
ϵ
∼
N
(
0
,
1
)
\epsilon\sim N(0,1)
ϵ ∼ N ( 0 , 1 )
VAE的基本结构:neck层(VAE维度较低的层和正态分布参数层统称为neck层)内部连接方式不同 VAE所学得的编码器的输出,对其做不同的采样(其中维度较小的层对红色和绿色的层采样),可以生成不同的生成模型(生成不同类别的数据),而不是像AE那样只能生成一一对应的数据(原始输入数据)
3. AE实战
任务:利用Auto Encoder实现对fashion_mnist数据集生成 代码:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow. keras import Sequential, layers
from PIL import Image
from matplotlib import pyplot as plt
import os
os. environ[ 'TF_CPP_MIN_LOG_LEVEL' ] = '2'
def save_images ( ims, name) :
new_im = Image. new( 'L' , ( 280 , 280 ) )
index = 0
for i in range ( 0 , 280 , 28 ) :
for j in range ( 0 , 280 , 28 ) :
im = ims[ index]
im = Image. fromarray( im, mode= 'L' )
new_im. paste( im, ( i, j) )
index += 1
new_im. save( name)
h_dim = 20
batch_size = 256
lr = 1e - 3
( x_train, y_train) , ( x_test, y_test) = keras. datasets. fashion_mnist. load_data( )
x_train, x_test = x_train. astype( np. float32) / 255 . , x_test. astype( np. float32) / 255 .
train_db = tf. data. Dataset. from_tensor_slices( x_train)
train_db = train_db. shuffle( batch_size * 5 ) . batch( batch_size)
test_db = tf. data. Dataset. from_tensor_slices( x_test)
test_db = test_db. batch( batch_size)
class AE ( keras. Model) :
def __init__ ( self) :
super ( AE, self) . __init__( )
self. encoder = Sequential( [
layers. Dense( 256 , activation= tf. nn. relu) ,
layers. Dense( 128 , activation= tf. nn. relu) ,
layers. Dense( h_dim)
] )
self. decoder = Sequential( [
layers. Dense( 128 , activation= tf. nn. relu) ,
layers. Dense( 256 , activation= tf. nn. relu) ,
layers. Dense( 28 * 28 )
] )
def call ( self, inputs, training= None ) :
h = self. encoder( inputs)
x_hat = self. decoder( h)
return x_hat
model = AE( )
model. build( input_shape= ( None , 28 * 28 ) )
model. summary( )
optimizer = tf. optimizers. Adam( lr= lr)
for epoch in range ( 60 ) :
for step, x in enumerate ( train_db) :
x = tf. reshape( x, [ - 1 , 784 ] )
with tf. GradientTape( ) as tape:
x_output = model( x)
loss = tf. losses. binary_crossentropy( x, x_output, from_logits= True )
loss = tf. reduce_mean( loss)
grads = tape. gradient( loss, model. trainable_variables)
optimizer. apply_gradients( zip ( grads, model. trainable_variables) )
if step % 100 == 0 :
print ( epoch, step, float ( loss) )
x = next ( iter ( test_db) )
output = model( tf. reshape( x, [ - 1 , 784 ] ) )
x_hat = tf. sigmoid( output)
x_hat = tf. reshape( x_hat, [ - 1 , 28 , 28 ] )
x_concat0 = tf. concat( [ x, x_hat] , axis= 0 )
x_concat0 = x
x_concat0 = x_concat0. numpy( ) * 255 .
x_concat0 = x_concat0. astype( np. uint8)
save_images( x_concat0, 'ae_original_picture/epoch_%d.png' % epoch)
x_concat1 = tf. concat( [ x, x_hat] , axis= 0 )
x_concat1 = x_hat
x_concat1 = x_concat1. numpy( ) * 255 .
x_concat1 = x_concat1. astype( np. uint8)
save_images( x_concat1, 'ae_generate_picture/epoch_%d.png' % epoch)
VAE实战
任务:利用Variational Auto Encoder实现对fashion_mnist数据集生成 代码:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow. keras import Sequential, layers
from PIL import Image
from matplotlib import pyplot as plt
import os
os. environ[ 'TF_CPP_MIN_LOG_LEVEL' ] = '2'
def save_images ( ims, name) :
new_im = Image. new( 'L' , ( 280 , 280 ) )
index = 0
for i in range ( 0 , 280 , 28 ) :
for j in range ( 0 , 280 , 28 ) :
im = ims[ index]
im = Image. fromarray( im, mode= 'L' )
new_im. paste( im, ( i, j) )
index += 1
new_im. save( name)
h_dim = 20
z_dim = 10
batch_size = 256
lr = 1e - 3
( x_train, y_train) , ( x_test, y_test) = keras. datasets. fashion_mnist. load_data( )
x_train, x_test = x_train. astype( np. float32) / 255 . , x_test. astype( np. float32) / 255 .
train_db = tf. data. Dataset. from_tensor_slices( x_train)
train_db = train_db. shuffle( batch_size * 5 ) . batch( batch_size)
test_db = tf. data. Dataset. from_tensor_slices( x_test)
test_db = test_db. batch( batch_size)
class VAE ( keras. Model) :
def __init__ ( self) :
super ( VAE, self) . __init__( )
self. fc1 = layers. Dense( 128 )
self. fc2 = layers. Dense( z_dim)
self. fc3 = layers. Dense( z_dim)
self. fc4 = layers. Dense( 128 )
self. fc5 = layers. Dense( 784 )
def encoder ( self, x) :
h = tf. nn. relu( self. fc1( x) )
mu = self. fc2( h)
log_var = self. fc3( h)
return mu, log_var
def decoder ( self, z) :
out = tf. nn. relu( self. fc4( z) )
out = self. fc5( out)
return out
def reparameterize ( self, mu, log_var) :
eps = tf. random. normal( log_var. shape)
std = tf. exp( log_var) ** 0.5
z = mu + std * eps
return z
def call ( self, inputs, training= None ) :
mu, log_var = self. encoder( inputs)
z = self. reparameterize( mu, log_var)
x_hat = self. decoder( z)
return x_hat, mu, log_var
model = VAE( )
model. build( input_shape= ( 4 , 28 * 28 ) )
model. summary( )
optimizer = tf. optimizers. Adam( lr= lr)
for epoch in range ( 100 ) :
for step, x in enumerate ( train_db) :
x = tf. reshape( x, [ - 1 , 784 ] )
with tf. GradientTape( ) as tape:
x_output, mu, log_var = model( x)
loss = tf. nn. sigmoid_cross_entropy_with_logits( labels= x, logits= x_output)
loss0 = tf. reduce_sum( loss) / x. shape[ 0 ]
kl_div = - 0.5 * ( log_var + 1 - mu** 2 - tf. exp( log_var) )
kl_div = tf. reduce_sum( kl_div) / x. shape[ 0 ]
loss = loss0 + 1 . * kl_div
grads = tape. gradient( loss, model. trainable_variables)
optimizer. apply_gradients( zip ( grads, model. trainable_variables) )
if step % 100 == 0 :
print ( epoch, step, 'kl_div:' , float ( kl_div) , 'loss0' , float ( loss0) , 'loss' , float ( loss) )
x = next ( iter ( test_db) )
output, _, _ = model( tf. reshape( x, [ - 1 , 784 ] ) )
x_hat = tf. sigmoid( output)
x_hat = tf. reshape( x_hat, [ - 1 , 28 , 28 ] ) . numpy( ) * 255 .
x_hat = x_hat. astype( np. uint8)
save_images( x_hat, 'vae_mapping_picture/epoch_%d.png' % epoch)
z = tf. random. normal( ( batch_size, z_dim) )
out_put0 = model. decoder( z)
x_hat0 = tf. sigmoid( out_put0)
x_hat0 = tf. reshape( x_hat0, [ - 1 , 28 , 28 ] ) . numpy( ) * 255 .
x_hat0 = x_hat0. astype( np. uint8)
save_images( x_hat0, 'vae_sample_picture/epoch_%d.png' % epoch)