下面分别是AE和VAE的模型,以及训练用的main函数
AE
import torch
from torch import nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
# [b, 784] mnist数据集像素28*28
self.encoder = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 64),
nn.ReLU(),
nn.Linear(64, 20),
nn.ReLU()
)
# [b, 20]
self.decoder = nn.Sequential(
nn.Linear(20, 64),
nn.ReLU(),
nn.Linear(64, 256),
nn.ReLU(),
nn.Linear(256, 784),
nn.Sigmoid() # 限制输出在0-1之间
)
def forward(self, x):
""""
"""
batchsz = x.size(0)
# flatten 打平后放入网络中
x = x.view(batchsz, 784)
# encoder
x = self.encoder(x)
# decoder
x = self.decoder(x)
# 从打平变成图像
x = x.view(batchsz, 1, 28, 28)
return x
VAE
import torch
from torch import nn
import numpy as np
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
# [b, 784] mnist数据集像素28*28
# u: [b, 10]
# sigma: [b,10]
self.encoder = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 64),
nn.ReLU(),
nn.Linear(64, 20),
nn.ReLU()
)
# [b, 20]
self.decoder = nn.Sequential(
nn.Linear(10, 64),
nn.ReLU(),
nn.Linear(64, 256),
nn.ReLU(),
nn.Linear(256, 784),
nn.Sigmoid() # 限制输出在0-1之间
)
def forward(self, x):
""""
"""
batchsz = x.size(0)
# flatten 打平后放入网络中
x = x.view(batchsz, 784)
# encoder
# [b, 20] include mean and sigma
h = self.encoder(x)
u, sigma = h.chunk(2, dim=1) # 拆分
# 上面把[b,20]拆分成了[b,10]和[b,10]
# 下面做一个reparametrize trick 解决不能sample的问题
h = u + sigma * torch.rand_like(sigma) # N(0,1),size和sigma一样
# decoder
x = self.decoder(h)
# 从打平变成图像
x = x.view(batchsz, 1, 28, 28)
# KL计算
kld = 0.5 * torch.sum(
torch.pow(u, 2) + torch.pow(sigma, 2) - torch.log(1e-8 + torch.pow(sigma, 2)) - 1
)/np.prod(x.shape)
# x.shape 也就是让他除以(batchsz*28*28)
# 1e-8使log不至于太小
return x, kld
main函数
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from auto_encoder import AE
from torch import nn, optim
import visdom
from varians_auto_encoder import VAE
# VAE刚训练的时候效果会差一点,越后面越好,这个手写数据集比较简单,所以不会比AE有明显优势
def main():
mnist_train = datasets.MNIST('mnist', True, transform=transforms.Compose([
transforms.ToTensor()
]), download=True)
mnist_train = DataLoader(mnist_train, batch_size=32, shuffle=True)
mnist_test = datasets.MNIST('mnist', True, transform=transforms.Compose([
transforms.ToTensor()
]), download=True)
mnist_test = DataLoader(mnist_test, batch_size=32, shuffle=True)
x, _ = iter(mnist_train).next() # 无监督,不要label
print("x:", x.shape)
device = torch.device('cuda')
# model = AE().to(device)
model = VAE().to(device)
criteon = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(model)
viz = visdom.Visdom()
for epoch in range(1000):
for batchidx, (x, _) in enumerate(mnist_train):
x = x.to(device)
x_hat, kld = model(x)
loss = criteon(x_hat, x)
if kld is not None:
elbo = -loss - 1.08 * kld
loss = -elbo
# back propogation
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(epoch, 'loss:', loss.item(), 'kld', kld.item())
x, _ = iter(mnist_test).next()
x = x.to(device)
with torch.no_grad():
x_hat, kld = model(x)
viz.images(x, nrow=8, win='x', opts=dict(title='x'))
viz.images(x_hat, nrow=8, win='x_hat', opts=dict(title='x_hat'))
if __name__ == '__main__':
main()