基于pytorch实现DCGAN以及代码详解

最新推荐文章于 2024-02-22 07:24:03 发布
FelixWang0515
最新推荐文章于 2024-02-22 07:24:03 发布
阅读量3.6k
点赞数 5
分类专栏：深度学习文章标签： GAN
深度学习专栏收录该内容
2 篇文章 1 订阅
订阅专栏
代码详细解释直接附在具体的代码模块
# -*- coding: utf-8 -*-
from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

# Set random seem for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)


# 数据集的根目录
dataroot = "/home/jsx/wjq/celeba/"

# 使用DataLoader 加载数据的线程数
workers = 4

# 每个batch中的图片数量，论文中为128
batch_size = 128

# 图片的分辨率，默认为64x64。如果使用了其它尺寸，需要更改判别器与生成器的结构
image_size = 64

# 输入图片的色彩通道，对彩色图片来说为 3
nc = 3

# 生成器的输入zz 的维度
nz = 100

#  生成器特征图的深度
ngf = 64

# 判别器特征图的深度
ndf = 64

# 训练的轮数
num_epochs = 10

# 训练的 learning rate。论文中取值为0.0002
lr = 0.0002

# Adam optimizers 的超参beta1。论文中取值为 0.5
beta1 = 0.5

# 使用的gpu个数，若为0表示使用cpu模式。
ngpu = 1


# We can use an image folder dataset the way we have it setup.
# Create the dataset
dataset = dset.ImageFolder(root=dataroot,
                           transform=transforms.Compose([
                               # 传入参数可以是一个列表(a,b)和具体的值a，表示resize成(a,b)，或者(a,a)大小
                               transforms.Resize(image_size),
                               # 对图像进行中心裁剪成(a,b)和具体的值a，表示中心裁剪成(a,b)，或者(a,a)大小，即可以接受两种参数一种是tuple，一种是数值
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               # 表示将图片进行正则化，前面一个参数是RGB图像的均值，后面一个tuple是方差
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 
                           ]))
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

print("Using gpu?",torch.cuda.is_available())

# next函数式取迭代器中的下一个数据，iter是将一个对象（列表）变成迭代器对象。
real_batch = next(iter(dataloader))
# 绘制出8*8个原始图片
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

# 函数接受一个初始化过的model作为传入参数并将这个model内部的所有层的参数进行重新初始化为均值为0，方差为1的随机变量
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        # 用于将a使用均值为b，方差为c的高斯函数进行初始化
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)


# 生成器就是用来将一个潜在空间的向量z映射到数据空间，
# nz表示z的长度，此处是100，ngf是Generator输出特征图的size，此处是64，nc是输出图像的channel数目，RGB图像是3。
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential( # 一个序列容器，将模块按照次序添加到这个容器里面组成一个model
            # input is Z, going into a convolution
            nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
            # c是输入特征图的数目，也就是channel数目，对每个特征图上的点进行减均值除方差的操作（均值和方差是每个mini-batch内的对应feature层的均值和方差）
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, input):
        return self.main(input)

# 将模型放在显存上，因为有GPU，GPU是和显存进行交互的
netG = Generator(ngpu).to(device)

# device.type获得设备的类型，etG=nn.DataParallel(netG,[0,2,3])将模型放置在放在0,2,3号显卡上执行
if (device.type == 'cuda') and (ngpu > 1):
    netG = nn.DataParallel(netG, list(range(ngpu)))

# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
netG.apply(weights_init)

# 输出网络模型
print(netG)

# 判别器是一个二元分类器，输入是一个图像，输出是这张图片是真图的可能性
# 没使用池化层的原因是作者觉得使用了卷积相当于让网络自己学习池化的方式，同时使用leaky relu,能加快梯度传播，有助于训练。

class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)


######################################################################
# Now, as with the generator, we can create the discriminator, apply the
# ``weights_init`` function, and print the model’s structure.
# 

# Create the Discriminator
netD = Discriminator(ngpu).to(device)

# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))
    
# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
netD.apply(weights_init)

# Print the model
print(netD)


# 损失函数使用的是交叉熵（binary cross entropy loss，BCELoss)
criterion = nn.BCELoss()

# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
# 生成size为n * m * e * v的随机数张量
fixed_noise = torch.randn(64, nz, 1, 1, device=device)

# Establish convention for real and fake labels during training
real_label = 1
fake_label = 0

# Setup Adam optimizers for both G and D
# Adam优化器的三个参数，
# 第一个参数是优化目标，是网络中的参数，使用netD.parameters()来获得
# 第二个参数是学习率，第三个是Adam里面的参数。
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

# 训练阶段
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0

print("Starting Training Loop...")
# For each epoch
# epoch是训练的轮数目，是要将每一个训练数据训练多少个批次
for epoch in range(num_epochs): 
    # For each batch in the dataloader
    for i, data in enumerate(dataloader, 0):
        
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()   # 将模型的梯度初始化为0
        # Format batch
        # 将数据放在device上面，因为初始化的时候没有指定将数据放在哪个device上面所以训练的时候要转化一下。
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)  # 第0维表示数据批次的size
        label = torch.full((b_size,), real_label, device=device)  # 产生一个size大小的值为fill_value的张量，这个张量是被用来当做真实标签的值都是1。
        # 隐式的调用forward
        # .view(-1)就是将一个tensor变成一维的，是对真实图片进行一次正向传播
        output = netD(real_cpu).view(-1)
        # 对于一个小批量数据，计算loss，注意这里的output和label都是mini_batch_size 维度的。
        errD_real = criterion(output, label)
        # 进行一次反向传播更新梯度
        errD_real.backward()
        # 是对一个tensor去均值并将这个tensor转换成python中的数值
        D_x = output.mean().item()

        ## Train with all-fake batch
        # Generate batch of latent vectors
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        # Generate fake image batch with G
        fake = netG(noise)
        # 给张量 label 赋予值fake_label
        label.fill_(fake_label)
        # tensor.detach()的功能是将一个张量从graph中剥离出来，不用计算梯度
        # 因为想D网络和G网络之间有耦合，我们想在想单独的训练D，G网络，所以在D，G网络之间有数据传递的时间要用.detach()
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Add the gradients from the all-real and all-fake batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = netD(fake).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()
        
        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
        
        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())
        
        # Check how the generator is doing by saving G's output on fixed_noise
        if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
            with torch.no_grad(): # 含义是确定上下文管理器里面的tensor都是不需要计算梯度的，可以减少计算单元的浪费
                fake = netG(fixed_noise).detach().cpu() # 将一个数据从显存复制到内存里面(return a copy)。
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
            
        iters += 1


# Results
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()


# **Visualization of G’s progression**


# Grab a batch of real images from the dataloader
real_batch = next(iter(dataloader))

# Plot the real images
plt.figure(figsize=(15,15))
plt.subplot(1,2,1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(),(1,2,0)))

# Plot the fake images from the last epoch
plt.subplot(1,2,2)
plt.axis("off")
plt.title("Fake Images")
plt.imshow(np.transpose(img_list[-1],(1,2,0)))
plt.show()