B-VAE(beta-VAE)实现过程
代码来源 github
依据文章 beta-VAE: Learning Basic Visual Concepts with a Constrained Variational Framework
主程序如下:运行环境gpu,使用torch框架
主程序主要用于定义程序后续用到的变量以及设置运行环境,方便后续修改调试,一目了然。
import argparse
import numpy as np
import torch
from solver import Solver
from utils import str2bool
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
def main(args):
seed = args.seed
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
net = Solver(args)
if args.train:
net.train()
else:
net.traverse()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Beta-VAE')
parser.add_argument('--train', default=True, type=str2bool, help='train or traverse')
parser.add_argument('--seed', default=1, type=int, help='random seed')
parser.add_argument('--cuda', default=True, type=str2bool, help='enable cuda')
parser.add_argument('--max_iter', default=1e6, type=float, help='maximum training iteration')
parser.add_argument('--batch_size', default=64, type=int, help='batch size')
parser.add_argument('--z_dim', default=10, type=int, help='dimension of the representation z')
parser.add_argument('--beta', default=4, type=float, help='beta parameter for KL-term in original beta-VAE')
parser.add_argument('--objective', default='H', type=str, help='beta-vae objective proposed in Higgins et al. or Burgess et al. H/B')
parser.add_argument('--model', default='H', type=str, help='model proposed in Higgins et al. or Burgess et al. H/B')
parser.add_argument('--gamma', default=1000, type=float, help='gamma parameter for KL-term in understanding beta-VAE')
parser.add_argument('--C_max', default=25, type=float, help='capacity parameter(C) of bottleneck channel')
parser.add_argument('--C_stop_iter', default=1e5, type=float, help='when to stop increasing the capacity')
parser.add_argument('--lr', default=1e-4, type=float, help='learning rate')
parser.add_argument('--beta1', default=0.9, type=float, help='Adam optimizer beta1')
parser.add_argument('--beta2', default=0.999, type=float, help='Adam optimizer beta2')
parser.add_argument('--dset_dir', default='data', type=str, help='dataset directory')
parser.add_argument('--dataset', default='CelebA', type=str, help='dataset name')
parser.add_argument('--image_size', default=64, type=int, help='image size. now only (64,64) is supported')
parser.add_argument('--num_workers', default=2, type=int, help='dataloader num_workers')
parser.add_argument('--viz_on', default=True, type=str2bool, help='enable visdom visualization')
parser.add_argument('--viz_name', default='main', type=str, help='visdom env name')
parser.add_argument('--viz_port', default=8097, type=str, help='visdom port number')
parser.add_argument('--save_output', default=True, type=str2bool, help='save traverse images and gif')
parser.add_argument('--output_dir', default='outputs', type=str, help='output directory')
parser.add_argument('--gather_step', default=1000, type=int, help='numer of iterations after which data is gathered for visdom')
parser.add_argument('--display_step', default=10000, type=int, help='number of iterations after which loss data is printed and visdom is updated')
parser.add_argument('--save_step', default=10000, type=int, help='number of iterations after which a checkpoint is saved')
parser.add_argument('--ckpt_dir', default='checkpoints', type=str, help='checkpoint directory')
parser.add_argument('--ckpt_name', default='last', type=str, help='load previous checkpoint. insert checkpoint filename')
args = parser.parse_args()
main(args)
dataset
该程序中主要用到celeA数据集,另一数据集为测试模型泛化能力试验数据集,CeleA是香港中文大学的开放数据,包含10177个名人身份的202599张图片,并且都做好了特征标记,这对人脸相关的训练是非常好用的数据集。
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms
def is_power_of_2(num):
return ((num & (num - 1)) == 0) and num != 0
class CustomImageFolder(ImageFolder):
def __init__(self, root, transform=None):
super(CustomImageFolder, self).__init__(root, transform)
def __getitem__(self, index):
path = self.imgs[index][0]
img = self.loader(path)
if self.transform is not None:
img = self.transform(img)
return img
class CustomTensorDataset(Dataset):
def __init__(self, data_tensor):
self.data_tensor = data_tensor
def __getitem__(self, index):
return self.data_tensor[index]
def __len__(self):
return self.data_tensor.size(0)
def return_data(args):
name = args.dataset
dset_dir = args.dset_dir
batch_size = args.batch_size
num_workers = args.num_workers
image_size = args.image_size
assert image_size == 64, 'currently only image size of 64 is supported'
if name.lower() == '3dchairs':
root = os.path.join(dset_dir, '3DChairs')
transform = transforms.Compose([
transforms.Resize((image_size, image_size)),
transforms.ToTensor(),])
train_kwargs = {
'root':root, 'transform':transform}
dset = CustomImageFolder
elif name.lower() == 'celeba':
root = os.path.join(dset_dir, 'CelebA')
transform = transforms.Compose([
transforms.Resize((image_size, image_size)),
transforms.ToTensor(),])
train_kwargs = {
'root':root, 'transform':transform}
dset = CustomImageFolder
elif name.lower() == 'dsprites':
root = os.path.join(dset_dir, 'dsprites-dataset/dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
if not os.path.exists(root):
import subprocess
print('Now download dsprites-dataset')
subprocess.call(['./download_dsprites.sh'])
print('Finished')
data = np.load(root, encoding='bytes')
data = torch.from_numpy(data['imgs']).unsqueeze(1).float()
train_kwargs = {
'data_tensor':data}
dset = CustomTensorDataset
else:
raise NotImplementedError
train_data = dset(**train_kwargs)
train_loader = DataLoader(train_data,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
pin_memory=True,
drop_last=True