最近复现一篇论文,其中用到Resnet18作为encoder和residual decoder的VAE结构,写篇博客暂时记录一下。
第一次写类似程序,记录一下几个自己掉的坑:
1、优化器直接用了SGD,导致第二个batch就发生了loss爆炸,换成了adam问题就解决了(猜测是没有设置动量的原因,梯度摆动过大)。
代码如下:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import os
from PIL import Image
import numpy as np
from torchvision.datasets import ImageFolder
import torchvision
#用上采样加卷积代替了反卷积
class ResizeConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, scale_factor, mode='nearest'):
super().__init__()
self.scale_factor = scale_factor
self.mode = mode
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=1)
def forward(self, x):
x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
x = self.conv(x)
return x
class ResNet18Enc(nn.Module):
def __init__(self, z_dim=32):
super(ResNet18Enc, self).__init__()
self.z_dim = z_dim
self.ResNet18 = models.resnet18(pretrained=True)
self.num_feature = self.ResNet18.fc.in_features
self.ResNet18.fc = nn.Linear(self.num_feature, 2 * self.z_dim)
def forward(self, x):
x = self.ResNet18(x)
mu = x[:, :self.z_dim]
logvar = x[:, self.z_dim:]
return mu, logvar
class BasicBlockDec(nn.Module):
def