dcgan-mnist ----pytorch版代码全注释

最新推荐文章于 2024-06-28 22:48:25 发布

sher lock

最新推荐文章于 2024-06-28 22:48:25 发布

阅读量357

点赞数 1

分类专栏： pytorch

本文链接：https://blog.csdn.net/weixin_45564209/article/details/119788851

版权

pytorch 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

这是一个使用PyTorch实现的深度卷积生成对抗网络（DCGAN）在MNIST数据集上的训练代码。代码中包含了模型定义、训练过程、损失函数计算、优化器设置以及生成图片的可视化。主要涉及了生成器和判别器的网络结构，以及训练过程中真伪样本的判别和损失函数的计算。

摘要由CSDN通过智能技术生成

github地址

main.py

import argparse
import logging
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.autograd import Variable
from torchvision.utils import save_image


from utils import get_data_loader, generate_images, save_gif
from model import Generator,Discriminator


# 如果当前文件是被直接运行的，那么才做以下所有事情
if __name__ == '__main__':

    #创建一个名为parser的容器去装这些参数，描述是DCGANS MNIST
    parser = argparse.ArgumentParser(description='DCGANS MNIST')

    #添加一个可选参数，名字是--num-epochs，类型是int，默认值是100
    parser.add_argument('num-epochs', type=int, default=100)

    #添加一个可选参数，名字是--ndf，类型是int，默认值是32，描述是Number of features to be used in Discriminator network
    parser.add_argument('ndf', type=int, default=32, help='Number of features to be used in Discriminator network')

    #添加一个可选参数，名字是ngf，类型是int，默认值是32，描述是Number of features to be used in Generator network
    parser.add_argument('--ngf', type=int, default=32, help='Number of features to be used in Generator network')

    #添加一个可选参数，名字是--nz，类型是int，默认值是100，描述是Size of the noise
    parser.add_argument('nz', type=int, default=100, help='Size of the noise')

    #添加一个可选参数，名字是d-lr，类型是float，默认值是0.0002，描述是Learning rate for the discriminator
    parser.add_argument('--d-lr', type=float, default=0.0002, help='Learning rate for the discriminator')

    #添加一个可选参数，名字是g-lr，类型是float，默认值是0.0002，描述是Learning rate for the generator
    parser.add_argument('--g-lr', type=float, default=0.0002, help='Learning rate for the generator')

    #添加一个可选参数，名字是nc，类型是int，默认值是1，描述是Number of input channels. Ex: for grayscale images: 1 and RGB images: 3
    parser.add_argument('--nc', type=int, default=1, help='Number of input channels. Ex: for grayscale images: 1 and RGB images: 3 ')

    #添加一个可选参数，名字是batch-size，类型是int，默认值是128，描述是Batch size
    parser.add_argument('--batch-size', type=int, default=128, help='Batch size')

    ##添加一个可选参数，名字是num-test-samples，类型是int，默认值是16，描述是Number of samples to visualize
    parser.add_argument('--num-test-samples', type=int, default=16, help='Number of samples to visualize')

    #添加一个可选参数，名字是output-path，类型是str，默认值是./results/，描述是Path to save the images
    parser.add_argument('--output-path', type=str, default='./results/', help='Path to save the images')

    #添加一个可选参数，名字是fps，类型是int，默认值是5，描述是frames-per-second value for the gif
    parser.add_argument('--fps', type=int, default=5, help='frames-per-second value for the gif')

    #添加一个可选参数，名字是use-fixed，如果输入的参数里有他，那值就是true,否则，值就是false,描述是Boolean to use fixed noise or not
    parser.add_argument('--use-fixed', action='store_true', help='Boolean to use fixed noise or not')



    # 定义一个变量叫做opt里面是所有的参数
    opt = parser.parse_args()
    # 打印这些所有参数
    print(opt)


    # 创建一个变量叫做train_loader，因为已经把utils给import进来了，所以直接调用里面的get_data_loader函数，参数就是opt.batch_size
    train_loader = get_data_loader(opt.batch_size)

    # 设置device，如果cuda可以用的话那就是cuda，如果没有的话那就是cpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # 打印你使用的device
    print("Using", device)

    # Define Discriminator and Generator architectures
    # Generator有三个参数nc,nz还有ngf
    # Discriminator有两个参数，nc还有ndf
    netG = Generator(opt.nc, opt.nz, opt.ngf).to(device)
    netD = Discriminator(opt.nc, opt.ndf).to(device)

    # loss function 设置为BCELoss,变量命名为criterion
    criterion = nn.BCELoss()

    # optimizers 俩个，一个叫optimizerD,一个叫optimizerG,
    # 都用Adam优化器，需要传入网络的参数还有各自的学习率
    optimizerD = optim.Adam(netD.parameters(), lr=opt.d_lr)
    optimizerG = optim.Adam(netG.parameters(), lr=opt.g_lr)
    
    # 设置real_label为1
    real_label = 1
    # 设置fake_label为0
    fake_label = 0
    # 设置num_batches为获取的数据的长度
    num_batches = len(train_loader)

    # 设置固定噪声，参数：opt.num_test_samples, 100, 1, 1, device=device
    fixed_noise = torch.randn(opt.num_test_samples, 100, 1, 1, device=device)

    # 循环每一个epoch，opt.num_epochs是总的epoch数
    for epoch in range(opt.num_epochs):
        # 遍历train_loader的时候用enumerate，counter为i，value为real_images
        for i, (real_images, _) in enumerate(train_loader):

            
            # print(real_images.shape)
            # batch_size=128 channel=1 width=28 height=28
            # 创建bs遍历来装batch_size
            bs = real_images.shape[0]

            ##############################
            #   Training discriminator   #
            ##############################


            # 先将网络netD的梯度重新设置为0
            netD.zero_grad()
            # 让real_images在之前设置的设备中运行
            real_images = real_images.to(device)
            # 创建一个变量名叫label，它是一个tensor里面装着bs大小的,里面的东西全是real_label的值，也就是1
            label = torch.full((bs,), real_label, device=device)
            # 创建一个变量名叫output，是netD的输出，输入是real_images
            output = netD(real_images)
            # 把output变成float32的类型
            output=output.to(torch.float32)
            # label也变成float32的类型
            label=label.to(torch.float32)
            # 创建一个变量叫lossD_real值是损失函数用output和label当作参数的结果
            lossD_real = criterion(output, label)
            # lossD_real反向传播一下
            lossD_real.backward()
            # 把output的平均值的转化为值再复制给D_x
            D_x = output.mean().item()
            # 设置噪声，赋值给变量noise,参数是bs,opt.nz,1,1,device=device
            noise = torch.randn(bs, opt.nz, 1, 1, device=device)
            # 设置一个变量叫做fake_images，值是以noise为输入的netG的输出
            fake_images = netG(noise)
            # 把label全部填充成0，就是fake_label的值
            label.fill_(fake_label)
            # 把netD的输出存在变量output里面，输入是用噪声生成的图片，然后可以脱离计算图
            output = netD(fake_images.detach())
            # 把损失函数计算出来的结果存到lossD_fake里面，损失函数的输入是output还有label
            lossD_fake = criterion(output, label)
            # 损失函数之后.backward()，经典操作
            lossD_fake.backward()
            # 把netD的输出的平均值的值存在D_G_z1里面
            D_G_z1 = output.mean().item()
            # 损失函数的lossD的值是用真实数据和虚假数据的lossD的和
            lossD = lossD_real + lossD_fake

            # update weights
            optimizerD.step()

            ##########################
            #   Training generator   #
            ##########################

            # 把将网络netG的梯度重新设置为0
            netG.zero_grad()
            # 把label再次全部赋值为1，也就是real_label的值
            label.fill_(real_label)
            # 把netD的输出存在output里面，netD的输出是假照片
            output = netD(fake_images)
            # 把netG的损失函数的值算一下
            lossG = criterion(output, label)
            # lossG backward()一下
            lossG.backward()
            # output的平均值的值存在_G_z2里面
            D_G_z2 = output.mean().item()
            # update weights
            optimizerG.step()

            if (i+1)%100 == 0:
                print('Epoch [{}/{}], step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, Discriminator - D(G(x)): {:.2f}, Generator - D(G(x)): {:.2f}'.format(epoch+1, opt.num_epochs, 
                                                            i+1, num_batches, lossD.item(), lossG.item(), D_x, D_G_z1, D_G_z2))
        # 进入测试模式
        netG.eval()
        generate_images(epoch, opt.output_path, fixed_noise, opt.num_test_samples, netG, device, use_fixed=opt.use_fixed)
        # 进入训练模式
        netG.train()

    # Save gif:
    save_gif(opt.output_path, opt.fps, fixed_noise=opt.use_fixed)

model.py

import torch.nn as nn
import torch.nn.functional as F



class Generator(nn.Module):
    # 构造函数需要三个参数，nc,nz，还有ngf
    def __init__(self, nc, nz, ngf):
      # 使用super的init，不过super的init没有参数
      super().__init__()
      

      self.network = nn.Sequential(
          nn.ConvTranspose2d(nz, ngf*4, 4, 1, 0, bias=False),
          nn.BatchNorm2d(ngf*4),
          nn.ReLU(True),
  
          nn.ConvTranspose2d(ngf*4, ngf*2, 3, 2, 1, bias=False),
          nn.BatchNorm2d(ngf*2),
          nn.ReLU(True),
  
          nn.ConvTranspose2d(ngf*2, ngf, 4, 2, 1, bias=False),
          nn.BatchNorm2d(ngf),
          nn.ReLU(True),
  
          nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
          nn.Tanh()
      )
  
    def forward(self, input):
      output = self.network(input)
      return output

class Discriminator(nn.Module):
    def __init__(self, nc, ndf):
        super(Discriminator, self).__init__()

        # nn.Sequential  Sequential是一个时序容器
        self.network = nn.Sequential(
    
                # kernel_size = 4, stride = 2, padding = 1,就是将特征图宽高缩小一倍的意思，就比如是将256*256 -> 128*128
                nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
                # inplace=True，代表会改变输入的数据，torch.nn.LeakyReLU(negative_slope=0.01, inplace=False) 第一个数字代表倾斜的角度,这里倾斜的角度为0.2
                nn.LeakyReLU(0.2, inplace=True),
                
                nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ndf * 2),
                nn.LeakyReLU(0.2, inplace=True),
                
                nn.Conv2d(ndf * 2, ndf * 4, 3, 2, 1, bias=False),
                nn.BatchNorm2d(ndf * 4),
                nn.LeakyReLU(0.2, inplace=True),
                
                nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False),

                nn.Sigmoid()
            )
    def forward(self, input):
        output = self.network(input)
        return output.view(-1, 1).squeeze(1)

utils.py

import torch
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import math
import itertools
import imageio
import natsort
from glob import glob

from torchvision.datasets.mnist import MNIST

def get_data_loader(batch_size):
    
    # 创建一个变量名叫transform的变量，是用transforms.Compose来串联起来的，里面有俩
    # 一个是变成tensor的函数，
    # 一个是0.1307和0.3081是mnist数据集的均值和标准差
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.1307, ), std=(0.3081, ))])

    # 读取MNIST数据集命名为train_dataset,root='./mnist_data/',train=true,transform=transform,download=True
    train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=transform, download=True)

    # 创建一个名叫train_loader的变量，调用DataLoader函数，数据集是上面的train_dataset,batch_size是传进来的参数batch_size，并且是shuffle的
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    return train_loader

def generate_images(epoch, path, fixed_noise, num_test_samples, netG, device, use_fixed=False):
    # 生成随机张量存到变量z里面
    z = torch.randn(num_test_samples, 100, 1, 1, device=device)
    # 把num_test_samples给开方一下存到size_figure_grid里面 
    size_figure_grid = int(math.sqrt(num_test_samples))
    # 变量title设置值为None
    title = None
    # 如果用了固定噪声，那就用它传进来的固定噪声，如果没用，那就用自己现生成的固定噪声，路径就正常加
    if use_fixed:
        generated_fake_images = netG(fixed_noise)
        path += 'fixed_noise/'
        title = 'Fixed Noise'
    else:
        generated_fake_images = netG(z)
        path += 'variable_noise/'
        title = 'Variable Noise'
  
    fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(6,6))
    for i, j in itertools.product(range(size_figure_grid), range(size_figure_grid)):
        # 设置轴，让轴看不见
        ax[i,j].get_xaxis().set_visible(False)
        ax[i,j].get_yaxis().set_visible(False)
    for k in range(num_test_samples):
        i = k//4
        j = k%4
        ax[i,j].cla()
        ax[i,j].imshow(generated_fake_images[k].data.cpu().numpy().reshape(28,28), cmap='Greys')
    label = 'Epoch_{}'.format(epoch+1)
    fig.text(0.5, 0.04, label, ha='center')
    fig.suptitle(title)
    fig.savefig(path+label+'.png')

def save_gif(path, fps, fixed_noise=False):
    # 根据参数设定存储的路径
    if fixed_noise==True:
        path += 'fixed_noise/'
    else:
        path += 'variable_noise/'
    # 把路径下的所有图片存到images里面
    images = glob(path + '*.png')
    # 所有图片按照从小到大的顺序排序一下
    images = natsort.natsorted(images)
    # 创建gif变量
    gif = []
    # 循环images里的所有图片，都加到gif里面
    for image in images:
        gif.append(imageio.imread(image))
    # 把生成的gif保存一下,参数是路径，文件名字，嗨哟fps
    imageio.mimsave(path+'animated.gif', gif, fps=fps)