变分自编码器(VAE)及Tensorflow实现

基本概念

1、变分自编码器属于无监督学习
2、变分自编码器的主要作用是可以生成数据
3、VAE的网络结构:
在这里插入图片描述

Tensorflow实现

VAE实现 MNIST 手写数字识别
1、库导入:

import os
import tensorflow as tf
from tensorflow import keras
from PIL import Image
from matplotlib import pyplot as plt
from tensorflow.keras import Sequential, layers
import numpy as np

2、数据集加载:

# 数据集加载,自编码器不需要标签因为是无监督学习
(x_train, _), (x_test, _) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
train_db = tf.data.Dataset.from_tensor_slices(x_train)
train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
test_db = test_db.batch(batchsz)

3、模型搭建:
3.1 网络模块

# 编码网络
self.vae_encoder = layers.Dense(self.units)
# 均值网络
self.vae_mean = layers.Dense(self.z_dim)      # get mean prediction
# 方差网络(均值和方差是一一对应的,所以维度相同)
self.vae_variance = layers.Dense(self.z_dim)      # get variance prediction

# 解码网络
self.vae_decoder = layers.Dense(self.units)
# 输出网络
self.vae_out = layers.Dense(784)

3.2 encoder传播

def encoder(self, x):
	h = tf.nn.relu(self.vae_encoder(x))
	#计算均值
	mu = self.vae_mean(h)
	#计算方差
	log_var = self.vae_variance(h)
	
	return  mu, log_var

3.3 decoder传播

def decoder(self, z):
	out = tf.nn.relu(self.vae_decoder(z))
	out = self.vae_out(out)
	
	return out

3.4 参数重设定

def reparameterize(self, mu, log_var):
	eps = tf.random.normal(log_var.shape)
	
	 std = tf.exp(log_var)         # 去掉log, 得到方差;
	 std = std**0.5                # 开根号,得到标准差;
	
	 z = mu + std * eps
	 return z

3.5 主网络结构

def call(self, inputs):
	mu, log_var = self.encoder(inputs)
	# reparameterizaion trick:最核心的部分
	z = self.reparameterize(mu, log_var)
	# decoder 进行还原
	x_hat = self.decoder(z)
	
	# Variational auto-encoder除了前向传播不同之外,还有一个额外的约束;
	# 这个约束使得你的mu, var更接近正太分布;所以我们把mu, log_var返回;
	return x_hat, mu, log_var

3.6 模型实例化

model = VAE(z_dim,units=128)
model.build(input_shape=(128, 784))
optimizer = keras.optimizers.Adam(lr=lr)

3.7 loss函数

# 把每个像素点当成一个二分类的问题;
	rec_loss = tf.losses.binary_crossentropy(x, x_hat, from_logits=True)
	rec_loss = tf.reduce_mean(rec_loss)

3.8 计算KL散度
KL散度公式:
D k l ( N ( μ , σ 2 ) ∣ ∣ N ( 0 , 1 ) ) = 1 2 ( − l o g σ 2 + μ 2 + σ 2 − 1 ) D_{kl}(N(\mu,\sigma^2)||N(0,1))=\frac{1}{2}(-log\sigma^2+\mu^2+\sigma^2-1) Dkl(N(μ,σ2)N(0,1))=21(logσ2+μ2+σ21)

kl_div = -0.5 * (log_var + 1 -mu**2 - tf.exp(log_var))
kl_div = tf.reduce_mean(kl_div) / batchsz
loss = rec_loss + 1. * kl_div

完整代码

import os
import tensorflow as tf
from tensorflow import keras
from PIL import Image
from matplotlib import pyplot as plt
from tensorflow.keras import Sequential, layers
import numpy as np

tf.random.set_seed(2322)
np.random.seed(23422)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert  tf.__version__.startswith('2.')

# 把num张图片保存到一张
def save_images(img, name,num):
    new_im = Image.new('L', (28*num, 28*num))
    index = 0
    for i in range(0, 28*num, 28):
        for j in range(0, 28*num, 28):
            im = img[index]
            im = Image.fromarray(im, mode='L')
            new_im.paste(im, (i, j))
            index += 1

    new_im.save(name)

# 定义超参数
batchsz = 256
lr = 1e-4

# 数据集加载,自编码器不需要标签因为是无监督学习
(x_train, _), (x_test, _) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
train_db = tf.data.Dataset.from_tensor_slices(x_train)
train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
test_db = test_db.batch(batchsz)

# 搭建模型
z_dim = 10
class VAE(keras.Model):
    def __init__(self,z_dim,units=256):
        super(VAE, self).__init__()
        self.z_dim = z_dim
        self.units = units
        # 编码网络
        self.vae_encoder = layers.Dense(self.units)
        # 均值网络
        self.vae_mean = layers.Dense(self.z_dim)      # get mean prediction
        # 方差网络(均值和方差是一一对应的,所以维度相同)
        self.vae_variance = layers.Dense(self.z_dim)      # get variance prediction

        # 解码网络
        self.vae_decoder = layers.Dense(self.units)
        # 输出网络
        self.vae_out = layers.Dense(784)

    # encoder传播的过程
    def encoder(self, x):
        h = tf.nn.relu(self.vae_encoder(x))
        #计算均值
        mu = self.vae_mean(h)
        #计算方差
        log_var = self.vae_variance(h)

        return  mu, log_var

    # decoder传播的过程
    def decoder(self, z):
        out = tf.nn.relu(self.vae_decoder(z))
        out = self.vae_out(out)

        return out

    def reparameterize(self, mu, log_var):
        eps = tf.random.normal(log_var.shape)

        std = tf.exp(log_var)         # 去掉log, 得到方差;
        std = std**0.5                # 开根号,得到标准差;

        z = mu + std * eps
        return z

    def call(self, inputs):
        mu, log_var = self.encoder(inputs)
        # reparameterizaion trick:最核心的部分
        z = self.reparameterize(mu, log_var)
        # decoder 进行还原
        x_hat = self.decoder(z)

        # Variational auto-encoder除了前向传播不同之外,还有一个额外的约束;
        # 这个约束使得你的mu, var更接近正太分布;所以我们把mu, log_var返回;
        return x_hat, mu, log_var

model = VAE(z_dim,units=128)
model.build(input_shape=(128, 784))
optimizer = keras.optimizers.Adam(lr=lr)

epochs = 30
for epoch in range(epochs):

    for step, x in enumerate(train_db):

        x = tf.reshape(x, [-1, 784])
        with tf.GradientTape() as tape:
            # shape
            x_hat, mu, log_var = model(x)

            # 把每个像素点当成一个二分类的问题;
            rec_loss = tf.losses.binary_crossentropy(x, x_hat, from_logits=True)
            rec_loss = tf.reduce_mean(rec_loss)

            # compute kl divergence (mu, var) ~ N(0, 1): 我们得到的均值方差和正太分布的;
            # 链接参考: https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
            kl_div = -0.5 * (log_var + 1 -mu**2 - tf.exp(log_var))
            kl_div = tf.reduce_mean(kl_div) / batchsz
            loss = rec_loss + 1. * kl_div

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 ==0:
            print('\repoch: %3d, step:%4d, kl_div: %5f, rec_loss:%9f' %(epoch, step, float(kl_div), float(rec_loss)),end="")

    num_pic = 9
    # evaluation 1: 从正太分布直接sample;
    z = tf.random.normal((batchsz, z_dim))                              # 从正太分布中sample这个尺寸的
    logits = model.decoder(z)                                           # 通过这个得到decoder
    x_hat = tf.sigmoid(logits)
    x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() * 255.
    logits = x_hat.astype(np.uint8)                                     # 标准的图片格式;
    save_images(logits, 'd:\\vae_images\\sampled_epoch%d.png' %epoch,num_pic)         # 直接sample出的正太分布;

    # evaluation 2: 正常的传播过程;
    x = next(iter(test_db))
    x = tf.reshape(x, [-1, 784])
    x_hat_logits, _, _  = model(x)                       # 前向传播返回的还有mu, log_var
    x_hat = tf.sigmoid(x_hat_logits)
    x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() * 255.
    x_hat = x_hat.astype(np.uint8)                       # 标准的图片格式;
    # print(x_hat.shape)
    save_images(x_hat, 'd:\\vae_images\\rec_epoch%d.png' %epoch,num_pic)

参考:
Tensorflow实现变分自编码器
马氏距离+协方差矩阵+KL散度的理解
Tensorflow 深度学习入门与实战 (课时139-141)

  • 2
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
在Python中,您可以使用深度学习框架如TensorFlow或PyTorch来实现变分自编码器(Variational Autoencoder,VAE)。下面是一个使用PyTorch实现VAE的简单示例: ```python import torch import torch.nn as nn class VAE(nn.Module): def __init__(self, input_dim, hidden_dim, latent_dim): super(VAE, self).__init__() self.encoder = nn.Sequential( nn.Linear(input_dim, hidden_dim), nn.ReLU(), ) self.fc_mu = nn.Linear(hidden_dim, latent_dim) self.fc_logvar = nn.Linear(hidden_dim, latent_dim) self.decoder = nn.Sequential( nn.Linear(latent_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, input_dim), nn.Sigmoid(), ) def reparameterize(self, mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) z = mu + eps * std return z def forward(self, x): x = self.encoder(x) mu = self.fc_mu(x) logvar = self.fc_logvar(x) z = self.reparameterize(mu, logvar) reconstructed_x = self.decoder(z) return reconstructed_x, mu, logvar # 示例使用MNIST数据集 # 假设输入维度为784(28x28的图像展开成向量),隐藏层维度为400,潜在空间维度为20 input_dim = 784 hidden_dim = 400 latent_dim = 20 # 创建VAE模型实例 model = VAE(input_dim, hidden_dim, latent_dim) # 定义损失函数 criterion = nn.BCELoss(reduction='sum') # 定义优化器 optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 训练模型 def train_vae(model, optimizer, criterion, num_epochs, data_loader): model.train() for epoch in range(num_epochs): total_loss = 0 for batch_idx, (data, _) in enumerate(data_loader): data = data.view(-1, input_dim) optimizer.zero_grad() reconstructed_data, mu, logvar = model(data) # 计算重构损失 reconstruction_loss = criterion(reconstructed_data, data) # 计算KL散度损失 kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) # 总损失为重构损失和KL散度损失之和 loss = reconstruction_loss + kl_loss total_loss += loss.item() loss.backward() optimizer.step() print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, total_loss / len(data_loader.dataset))) # 示例使用MNIST数据集进行训练 from torchvision import datasets, transforms # 数据预处理 transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)) ]) # 加载MNIST数据集 train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True) # 设置训练参数并训练模型 num_epochs = 10 train_vae(model, optimizer, criterion, num_epochs, train_loader) ``` 以上是一个简单的使用PyTorch实现VAE的示例,其中包括模型定义、损失函数定义、优化器设置和训练过程。您可以根据自己的需求进行修改和扩展。希望对您有帮助!

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值