vae学习
模型架构原理下图一图说明:
loss function包含了两个部分:
Minimize1:
1
2
∑
k
(
x
k
^
−
x
k
)
2
\frac{1}{2} {\textstyle \sum_{k}^{}} (\hat{x_k} -x_k)^2
21∑k(xk^−xk)2
即使得输出与输入最大程度接近
Minimize2:
c
=
∑
i
=
1
3
(
e
σ
i
−
(
1
+
σ
i
)
+
(
m
i
)
2
)
c=\sum_{i=1}^{3}\left(e^{\sigma_{i}}-\left(1+\sigma_{i}\right)+\left(m_{i}\right)^{2}\right)
c=∑i=13(eσi−(1+σi)+(mi)2)
限制项,防止分配给噪声的权重趋近于0
首先需要将input输入一个NN encoder来计算两组编码:均值编码 ( m 1 , m 2 , m 3 ) (m_{1},m_{2},m_{3}) (m1,m2,m3)和方差编码 ( σ 1 , σ 2 , σ 3 ) (\sigma_{1},\sigma_{2},\sigma_{3}) (σ1,σ2,σ3),方差编码为噪声编码 ( e 1 , e 2 , e 3 ) (e_{1},e_{2},e_{3}) (e1,e2,e3)分配权重,将编码 m m m与噪声编码叠加后的到新的 ( c 1 , c 2 , c 3 ) (c_{1},c_{2},c_{3}) (c1,c2,c3),再送入NN Decoder中,Decoder学习使重构误差达到最小。
import torch
import visdom
from torch.utils.data import DataLoader
from torch import nn, optim
from torchvision import transforms, datasets
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
# [b, 784] => [b, 20]
# u: [b, 10]
# sigma: [b, 10]
self.encoder = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 64),
nn.ReLU(),
nn.Linear(64, 20),
nn.ReLU()
)
# [b, 20] => [b, 784]
self.decoder = nn.Sequential(
nn.Linear(10, 64),
nn.ReLU(),
nn.Linear(64, 256),
nn.ReLU(),
nn.Linear(256, 784),
nn.Sigmoid()
)
self.criteon = nn.MSELoss()
def forward(self, x):
"""
:param x: [b, 1, 28, 28]
:return:
"""
batchsz = x.size(0)
# flatten
x = x.view(batchsz, 784)
# encoder输出均值和方差,shape为[b, 20],
h_ = self.encoder(x)
# [b, 20] => [b, 10] and [b, 10] 把均值和方差分出来
mu, sigma = h_.chunk(2, dim=1)
# 采样过程的重参数技巧
h = mu + sigma * torch.randn_like(sigma)
# decoder: predicting result
x_hat = self.decoder(h)
x_hat = x_hat.view(batchsz, 1, 28, 28)
# KL散度,即minimize2
kld = 0.5 * torch.sum(torch.pow(mu, 2) + torch.pow(sigma, 2) - torch.log(1e-8 + torch.pow(sigma, 2)) - 1) / (batchsz*28*28)
return x_hat, kld
def main():
mnist_train = datasets.MNIST('mnist',True,
transform=transforms.Compose([transforms.ToTensor()]), )
#download=True)
mnist_train = DataLoader(mnist_train, batch_size=32, shuffle=True)
mnist_test = datasets.MNIST('mnist', False,
transform=transforms.Compose([transforms.ToTensor()]), download=True)
mnist_test = DataLoader(mnist_test, batch_size=32, shuffle=True)
x, _ = iter(mnist_train).next()
print('x:', x.shape)
device = torch.device('cuda')
model = VAE().to(device)
criteon = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(model)
viz = visdom.Visdom()
for epoch in range(1000):
for batchidx, (x,) in enumerate(mnist_train):
# [b, 1, 28, 28]
x = x.to(device)
x_hat, kld = model(x)
# 重构误差 minimize1
loss = criteon(x_hat, x)
if kld is not None:
elbo = - loss - 1.0 * kld
loss = - elbo # 总的loss
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(epoch, 'loss:', loss.item(), 'kld:', kld.item())
x, _ = iter(mnist_test).next()
x = x.to(device)
with torch.no_grad():
x_hat, kld = model(x)
viz.images(x, nrow=8, win='x', opts=dict(title='x'))
viz.images(x_hat, nrow=8, win='x_hat', opts=dict(title='x_hat'))
训练时如果不加载visdom的服务器会报错:
运行
Python -m visdom.server