(一)DCGAN实战

一、DCGAN论文要点
  1. 通过跨步卷积层(例如:stride= 2)代替池化层(例如:最大池化层)
  2. 取消全连接层,用全局平均池化层代替(在判别器的最后一层)
  3. 所有层的输出都用BatchNormalization做归一化
  4. 生成器使用转置卷积层,激活函数使用ReLU函数,但最后一层输出层使用Tanh激活函数;判别器使用卷积层,激活函数使用LeakyReLU, the slope of the leak was set to 0.2 in all models.
  5. All weights were initialized from a zero-centered Normal distribution with standard deviation 0.02. (所有权重均从零中心正态分布初始化,标准偏差为0.02),Adam优化器的beta1值设置为0.5,学习率设置为0.0002,所有模型均采用小批量随机梯度下降(SGD)训练,最小批量为128。
二、Generator架构

Generator
隐向量z是100维,在实际代码输入中z.shape = [batch_size, 100, 1, 1],然后通过Project and reshape 成 441024的形状(PS:在很多DCGAN的代码示例中我都没看到有这一步的操作,最初我在代码中把这一层也用了转置卷积层,也就是说把shape从[batch_size, 100, 1, 1] -->[batch_size, 1024, 4, 4],但是模型不收敛,所以还是参考了官方文档的示例)

数据集:CelebA 提取码:1997

三、完整代码(详细注释)
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.utils as utils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from torch.utils.tensorboard import SummaryWriter

parser = argparse.ArgumentParser()
parser.add_argument('--dataroot',default='../dataset/celeba', help='root path for dataset')
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers')
parser.add_argument('--batch_size', type = int, default=128, help='input batch size')
parser.add_argument('--image_size', type = int, default=64,help='Spatial size of training images')
parser.add_argument('--nc',type = int, default=3, help='Number of channels in the training images')  
parser.add_argument('--ngf', type = int, default=64)
parser.add_argument('--nz',type = int, default=100)
parser.add_argument('--ndf', type = int, default = 64)
parser.add_argument('--epochs', type = int, default=5, help='number of training epochs')
parser.add_argument('--lr', type = float, default = 0.0002,help = 'learning rate')
parser.add_argument('--beta1', type = float, default = 0.5, help='Beta1 hyperparam for Adam optimizers')
parser.add_argument('-ngpu', type = int, default = 1, help = 'number of gpu')

args = parser.parse_args(args=[])
print(args)
# Set random seed for reproducibilit
manualseed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualseed)
random.seed(manualseed)
torch.manual_seed(manualseed)

# We can use an image folder dataset the way we have it setup.
# Create the dataset
dataset = datasets.ImageFolder(root = args.dataroot, transform=transforms.Compose([
    transforms.Resize(args.image_size),
    transforms.CenterCrop(args.image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
]))
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset = dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers)
# Decide which device we want to run on
device = torch.device('cuda: 0' if (torch.cuda.is_available() and args.ngpu >0) else 'cpu')
# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis('off')
plt.title('Training Images')
# real_batch[0].shape, real_batch[1].shape # (torch.Size([128, 3, 64, 64]), torch.Size([128]))
# real_batch[0] = real_batch[0]/2 + 0.5
# image = utils.make_grid(real_batch[0][:64])
# image = np.transpose(image, (1,2,0))
# plt.imshow(image)

plt.imshow(np.transpose(utils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

ngpu = int(args.ngpu)
nc = int(args.nc)
ngf = int(args.ngf)
ndf = int(args.ndf)
nz = int(args.nz)
# 参数初始化:从 DCGAN 论文中,作者指定所有模型权重均应从均值= 0,stdev = 0.02 的正态分布中随机初始化。
# custom weights initialization called on netG and netD
def weights_init(m):
    # m作为一个形参,原则上可以传递很多的内容,为了实现多实参传递,每一个moudle要给出自己的name. 所以这句话就是返回m的名字。
    class_name = m.__class__.__name__ 
    if class_name.find('Conv') != -1: #find()函数,实现查找classname中是否含有conv字符,没有返回-1;有返回0.
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif class_name.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0.0)

# ======================
#      Generator
#=======================
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.generator_layers = nn.Sequential(
            # First Conv_layer,input is Z, going into a convolution [b, 100, 1, 1]-->[b, 512, 4, 4]
            nn.ConvTranspose2d(in_channels = nz,
                                out_channels = ngf*8,
                                kernel_size = (4,4),
                                stride = (1,1),
                                padding = 0,
                                bias = False),
            nn.BatchNorm2d(ngf*8), #512
            nn.ReLU(True),
            # Second Conv_layer -->[b, 256, 8, 8]
            nn.ConvTranspose2d(ngf*8, ngf*4, kernel_size=(4,4), stride = (2,2), padding=1, bias=False),
            nn.BatchNorm2d(ngf*4),
            nn.ReLU(True),
            # Third Conv_layer -->[b, 128, 16, 16]
            nn.ConvTranspose2d(ngf*4, ngf*2, kernel_size=(4,4), stride=(2,2), padding=1, bias=False),
            nn.BatchNorm2d(ngf*2),
            nn.ReLU(True),
            # Fourth Conv_layer -->[b, 64, 32, 32]
            nn.ConvTranspose2d(ngf*2, ngf, kernel_size=(4,4), stride=(2,2), padding=1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # Fifth Conv_layer -->[b, 3, 64, 64]
            nn.ConvTranspose2d(ngf, nc, kernel_size=(4,4), stride=(2,2), padding=1, bias=False),
            nn.Tanh()
            
        )
    def forward(self, input):
        return self.generator_layers(input)
# Create the generator
netG = Generator(ngpu).to(device)
# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1) :
    netG = nn.DataParallel(module = netG, device_ids=list(range(ngpu)))
# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
netG.apply(weights_init)
# Print the model
print(netG)

# ======================
#    Discriminator
#=======================
class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64       [3, 64, 64]
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32   [64, 32, 32]
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16  [128, 16, 16]
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8    [256, 8, 8]
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4    [512, 4, 4]
            
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), #-->[128, 1, 1, 1]
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)
    
# Create the Discriminator
netD = Discriminator(ngpu).to(device)

# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))

# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
netD.apply(weights_init)

# Print the model
print(netD)

# 可视化
writer = SummaryWriter('runs/DCGAN')

# Initialize BCELoss function
criterion = nn.BCELoss()
# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
fixed_noised = torch.randn(64, nz, 1, 1, device = device)
# Establish convention for real and fake labels during training
real_label = 1
fake_label = 0
# Setup Adam optimizers for both G and D
optimizerG = optim.Adam(netD.parameters(), lr = args.lr, betas=(args.beta1, 0.999))
optimizerD = optim.Adam(netD.parameters(), lr = args.lr, betas=(args.beta1, 0.999))

# Training Loop

# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
g_losses = 0
d_losses = 0

print("Starting Training Loop...")
# For each epoch
for epoch in range(args.epochs):
    # For each batch in the dataloader, 0表示索引从0开始
    for i ,data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()
        # Format batch
        real_batch = data[0].to(device)
        b_size = real_batch.size(0)
        label = torch.full((b_size,), real_label, device = device)
        # Forward pass real batch through D
        output = netD(real_batch).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        # get the mean score of this batch, in order to show 
        D_x = output.mean().item()
        
        ## Train with all-fake batch
        # Generate batch of latent vectors
        noise = torch.randn(b_size, nz, 1, 1, device = device) # 注意这里是b_size,而不是batch_size
        # Generate fake image batch with G
        fake = netG(noise)
        # 将前面的label填充0
        label.fill_(fake_label)
        # Classify all fake batch with D
        # fake.detach()的作用是:假如A网络输出了一个Tensor类型的变量a, a要作为输入传入到B网络中,如果我想通过损失函数反向传播修改
        # B网络的参数,但是不想修改A网络的参数,这个时候就可以使用detcah()方法
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Add the gradients from the all-real and all-fake batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()
        
        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.fill_(real_label)
        output = netD(fake).view(-1)
        errG = criterion(output, label)
        errG.backward()
        D_G_z2 = output.mean().item()
        optimizerG.step()
        
        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD_to_True: %.4f\tD_to_False/D_to_G: %.4f / %.4f'
                  % (epoch, args.epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())
        g_losses += errG
        d_losses += errD
        if i % 300 == 0:
            with torch.no_grad():
                fake = netG(fixed_noised)
                utils.save_image(fake.detach(), 'Picture/fake_epoch_{:01d}_step_{:03d}.png'.format(epoch,i))
                img_list.append(utils.make_grid(fake, padding=2, normalize=True))
            writer.add_scalar('Discriminator Loss', d_losses/i, epoch*len(dataloader) + i)
            writer.add_scalar('Generator Loss', g_losses/i, epoch*len(dataloader) + i)
    # do checkpointing 只保存参数
    torch.save(netG.state_dict(), 'Model/netG_epoch_%d.pth' % (epoch))
    torch.save(netD.state_dict(), 'Model/netD_epoch_%d.pth' % (epoch))

# 动画演示
fig = plt.figure(figsize=(8,8))
plt.axis("off")
ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)

HTML(ani.to_jshtml())
    
dataiter = iter(dataloader)
images = next(dataiter)
with writer as w:
    w.add_graph(netG, fixed_noised)
    w.add_graph(netD, images)
四、遇到的问题及解决
  1. 第一次接触argparse模块:链接: 学习博客.
  2. argparse模块报错,类似于下列的错误:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("echo",help="echo the string you use here")
args = parser.parse_args()
print(args.echo)

报错信息:
在这里插入图片描述
解决:改成args = parser.parse_args(args=[])

import torch
import torch.nn as nn
x = torch.randn(2,2,3,3)
print(x)
model = nn.Sequential(
    nn.AdaptiveAvgPool2d(2)
)
y = model(x)
y.shape,y

输出结果:
tensor([[[[-1.4929,  0.8155, -1.2013],
          [ 0.2924, -0.4006,  0.6900],
          [ 0.4264,  1.1288, -0.7075]],

         [[ 0.5070, -1.2403, -1.7126],
          [-0.2472,  0.9000,  0.5351],
          [-0.2368, -0.3022, -0.4225]]],


        [[[ 1.6018, -2.8020,  0.2903],
          [-1.2826, -0.5864, -2.8019],
          [-0.2695, -0.7365,  1.0316]],

         [[-0.5240, -0.4855, -0.7353],
          [ 1.0181, -0.5583, -0.6875],
          [-0.9585,  0.2850,  0.4453]]]])
torch.Size([2, 2, 1, 1])
由于它还是四维,所以在判别器的使用了nn.AdaptiveAvgPool2d(1)之后还要加
一个nn.Flatten()层将数据打平后再送入最后的全连接分类层

pytorch的nn.AdaptiveAvgPool2d的参数为int或tuple,表示输出的shape的H*W,int表示H和W相等。

  1. Pytorch torch.Tensor.detach()方法的用法:假如A网络输出了一个Tensor类型的变量a, a要作为输入传入到B网络中,如果我想通过损失函数反向传播修改B网络的参数,但是不想修改A网络的参数,这个时候就可以使用detach()方法
  2. torch.fill_()函数的使用
# label.fill_(fake_label) # 将该tensor用指定的数值填充
a = torch.randn(2,3)
a.fill_(1)
a
输出:
tensor([[1., 1., 1.],
        [1., 1., 1.]])
  1. 动画显示学习:参考链接
五、按照论文的generator和discriminator架构
# ======================
#      Generator
#=======================
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.generator_layers = nn.Sequential(
            # First Conv_layer,input is Z, going into a convolution [b, 100, 1, 1]-->[b, 1024, 4, 4]
            nn.ConvTranspose2d(in_channels = nz,
                                out_channels = ngf*16,
                                kernel_size = (4,4),
                                stride = (1,1),
                                padding = 0,
                                bias = False),
            nn.BatchNorm2d(ngf*16), #1024
            nn.ReLU(True),
            # Second Conv_layer -->[b, 512, 8, 8]
            nn.ConvTranspose2d(ngf*16, ngf*8, kernel_size=(4,4), stride = (2,2), padding=1, bias=False),
            nn.BatchNorm2d(ngf*8),
            nn.ReLU(True),
            # Third Conv_layer -->[b, 256, 16, 16]
            nn.ConvTranspose2d(ngf*8, ngf*4, kernel_size=(4,4), stride=(2,2), padding=1, bias=False),
            nn.BatchNorm2d(ngf*4),
            nn.ReLU(True),
            # Fourth Conv_layer -->[b, 128, 32, 32]
            nn.ConvTranspose2d(ngf*4, ngf*2, kernel_size=(4,4), stride=(2,2), padding=1, bias=False),
            nn.BatchNorm2d(ngf*2),
            nn.ReLU(True),
            # Fifth Conv_layer -->[b, 3, 64, 64]
            nn.ConvTranspose2d(ngf*2, nc, kernel_size=(4,4), stride=(2,2), padding=1, bias=False),
            nn.Tanh()
        )
    def forward(self, input):
        return self.generator_layers(input)

# ======================
#    Discriminator
#=======================
class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64       [3, 64, 64]
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32   [64, 32, 32]
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16  [128, 16, 16]
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8    [256, 8, 8]
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4    [512, 4, 4]
            
            nn.AdaptiveAvgPool2d(1),  # -->[128,512,1,1]
            nn.Flatten(), # 打平 -->[128, 512]
            nn.Linear(in_features=512 ,out_features=1), # -->[128, 1]
            nn.Sigmoid()
        )
    def forward(self, input):
        return self.main(input)

但是我使用上面的架构,模型不会收敛,我也不知道原因。

  • 3
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值