main.py
import argparse
import logging
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.autograd import Variable
from torchvision.utils import save_image
from utils import get_data_loader, generate_images, save_gif
from model import Generator,Discriminator
# 如果当前文件是被直接运行的,那么才做以下所有事情
if __name__ == '__main__':
#创建一个名为parser的容器去装这些参数,描述是DCGANS MNIST
parser = argparse.ArgumentParser(description='DCGANS MNIST')
#添加一个可选参数,名字是--num-epochs,类型是int,默认值是100
parser.add_argument('num-epochs', type=int, default=100)
#添加一个可选参数,名字是--ndf,类型是int,默认值是32,描述是Number of features to be used in Discriminator network
parser.add_argument('ndf', type=int, default=32, help='Number of features to be used in Discriminator network')
#添加一个可选参数,名字是ngf,类型是int,默认值是32,描述是Number of features to be used in Generator network
parser.add_argument('--ngf', type=int, default=32, help='Number of features to be used in Generator network')
#添加一个可选参数,名字是--nz,类型是int,默认值是100,描述是Size of the noise
parser.add_argument('nz', type=int, default=100, help='Size of the noise')
#添加一个可选参数,名字是d-lr,类型是float,默认值是0.0002,描述是Learning rate for the discriminator
parser.add_argument('--d-lr', type=float, default=0.0002, help='Learning rate for the discriminator')
#添加一个可选参数,名字是g-lr,类型是float,默认值是0.0002,描述是Learning rate for the generator
parser.add_argument('--g-lr', type=float, default=0.0002, help='Learning rate for the generator')
#添加一个可选参数,名字是nc,类型是int,默认值是1,描述是Number of input channels. Ex: for grayscale images: 1 and RGB images: 3
parser.add_argument('--nc', type=int, default=1, help='Number of input channels. Ex: for grayscale images: 1 and RGB images: 3 ')
#添加一个可选参数,名字是batch-size,类型是int,默认值是128,描述是Batch size
parser.add_argument('--batch-size', type=int, default=128, help='Batch size')
##添加一个可选参数,名字是num-test-samples,类型是int,默认值是16,描述是Number of samples to visualize
parser.add_argument('--num-test-samples', type=int, default=16, help='Number of samples to visualize')
#添加一个可选参数,名字是output-path,类型是str,默认值是./results/,描述是Path to save the images
parser.add_argument('--output-path', type=str, default='./results/', help='Path to save the images')
#添加一个可选参数,名字是fps,类型是int,默认值是5,描述是frames-per-second value for the gif
parser.add_argument('--fps', type=int, default=5, help='frames-per-second value for the gif')
#添加一个可选参数,名字是use-fixed,如果输入的参数里有他,那值就是true,否则,值就是false,描述是Boolean to use fixed noise or not
parser.add_argument('--use-fixed', action='store_true', help='Boolean to use fixed noise or not')
# 定义一个变量叫做opt里面是所有的参数
opt = parser.parse_args()
# 打印这些所有参数
print(opt)
# 创建一个变量叫做train_loader,因为已经把utils给import进来了,所以直接调用里面的get_data_loader函数,参数就是opt.batch_size
train_loader = get_data_loader(opt.batch_size)
# 设置device,如果cuda可以用的话那就是cuda,如果没有的话那就是cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 打印你使用的device
print("Using", device)
# Define Discriminator and Generator architectures
# Generator有三个参数nc,nz还有ngf
# Discriminator有两个参数,nc还有ndf
netG = Generator(opt.nc, opt.nz, opt.ngf).to(device)
netD = Discriminator(opt.nc, opt.ndf).to(device)
# loss function 设置为BCELoss,变量命名为criterion
criterion = nn.BCELoss()
# optimizers 俩个,一个叫optimizerD,一个叫optimizerG,
# 都用Adam优化器,需要传入网络的参数还有各自的学习率
optimizerD = optim.Adam(netD.parameters(), lr=opt.d_lr)
optimizerG = optim.Adam(netG.parameters(), lr=opt.g_lr)
# 设置real_label为1
real_label = 1
# 设置fake_label为0
fake_label = 0
# 设置num_batches为获取的数据的长度
num_batches = len(train_loader)
# 设置固定噪声,参数:opt.num_test_samples, 100, 1, 1, device=device
fixed_noise = torch.randn(opt.num_test_samples, 100, 1, 1, device=device)
# 循环每一个epoch,opt.num_epochs是总的epoch数
for epoch in range(opt.num_epochs):
# 遍历train_loader的时候用enumerate,counter为i,value为real_images
for i, (real_images, _) in enumerate(train_loader):
# print(real_images.shape)
# batch_size=128 channel=1 width=28 height=28
# 创建bs遍历来装batch_size
bs = real_images.shape[0]
##############################
# Training discriminator #
##############################
# 先将网络netD的梯度重新设置为0
netD.zero_grad()
# 让real_images在之前设置的设备中运行
real_images = real_images.to(device)
# 创建一个变量名叫label,它是一个tensor里面装着bs大小的,里面的东西全是real_label的值,也就是1
label = torch.full((bs,), real_label, device=device)
# 创建一个变量名叫output,是netD的输出,输入是real_images
output = netD(real_images)
# 把output变成float32的类型
output=output.to(torch.float32)
# label也变成float32的类型
label=label.to(torch.float32)
# 创建一个变量叫lossD_real值是损失函数用output和label当作参数的结果
lossD_real = criterion(output, label)
# lossD_real反向传播一下
lossD_real.backward()
# 把output的平均值的转化为值再复制给D_x
D_x = output.mean().item()
# 设置噪声,赋值给变量noise,参数是bs,opt.nz,1,1,device=device
noise = torch.randn(bs, opt.nz, 1, 1, device=device)
# 设置一个变量叫做fake_images,值是以noise为输入的netG的输出
fake_images = netG(noise)
# 把label全部填充成0,就是fake_label的值
label.fill_(fake_label)
# 把netD的输出存在变量output里面,输入是用噪声生成的图片,然后可以脱离计算图
output = netD(fake_images.detach())
# 把损失函数计算出来的结果存到lossD_fake里面,损失函数的输入是output还有label
lossD_fake = criterion(output, label)
# 损失函数之后.backward(),经典操作
lossD_fake.backward()
# 把netD的输出的平均值的值存在D_G_z1里面
D_G_z1 = output.mean().item()
# 损失函数的lossD的值是用真实数据和虚假数据的lossD的和
lossD = lossD_real + lossD_fake
# update weights
optimizerD.step()
##########################
# Training generator #
##########################
# 把将网络netG的梯度重新设置为0
netG.zero_grad()
# 把label再次全部赋值为1,也就是real_label的值
label.fill_(real_label)
# 把netD的输出存在output里面,netD的输出是假照片
output = netD(fake_images)
# 把netG的损失函数的值算一下
lossG = criterion(output, label)
# lossG backward()一下
lossG.backward()
# output的平均值的值存在_G_z2里面
D_G_z2 = output.mean().item()
# update weights
optimizerG.step()
if (i+1)%100 == 0:
print('Epoch [{}/{}], step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, Discriminator - D(G(x)): {:.2f}, Generator - D(G(x)): {:.2f}'.format(epoch+1, opt.num_epochs,
i+1, num_batches, lossD.item(), lossG.item(), D_x, D_G_z1, D_G_z2))
# 进入测试模式
netG.eval()
generate_images(epoch, opt.output_path, fixed_noise, opt.num_test_samples, netG, device, use_fixed=opt.use_fixed)
# 进入训练模式
netG.train()
# Save gif:
save_gif(opt.output_path, opt.fps, fixed_noise=opt.use_fixed)
model.py
import torch.nn as nn
import torch.nn.functional as F
class Generator(nn.Module):
# 构造函数需要三个参数,nc,nz,还有ngf
def __init__(self, nc, nz, ngf):
# 使用super的init,不过super的init没有参数
super().__init__()
self.network = nn.Sequential(
nn.ConvTranspose2d(nz, ngf*4, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf*4),
nn.ReLU(True),
nn.ConvTranspose2d(ngf*4, ngf*2, 3, 2, 1, bias=False),
nn.BatchNorm2d(ngf*2),
nn.ReLU(True),
nn.ConvTranspose2d(ngf*2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
)
def forward(self, input):
output = self.network(input)
return output
class Discriminator(nn.Module):
def __init__(self, nc, ndf):
super(Discriminator, self).__init__()
# nn.Sequential Sequential是一个时序容器
self.network = nn.Sequential(
# kernel_size = 4, stride = 2, padding = 1,就是将特征图宽高缩小一倍的意思,就比如是将256*256 -> 128*128
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
# inplace=True,代表会改变输入的数据,torch.nn.LeakyReLU(negative_slope=0.01, inplace=False) 第一个数字代表倾斜的角度,这里倾斜的角度为0.2
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf * 2, ndf * 4, 3, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
output = self.network(input)
return output.view(-1, 1).squeeze(1)
utils.py
import torch
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import math
import itertools
import imageio
import natsort
from glob import glob
from torchvision.datasets.mnist import MNIST
def get_data_loader(batch_size):
# 创建一个变量名叫transform的变量,是用transforms.Compose来串联起来的,里面有俩
# 一个是变成tensor的函数,
# 一个是0.1307和0.3081是mnist数据集的均值和标准差
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=(0.1307, ), std=(0.3081, ))])
# 读取MNIST数据集命名为train_dataset,root='./mnist_data/',train=true,transform=transform,download=True
train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=transform, download=True)
# 创建一个名叫train_loader的变量,调用DataLoader函数,数据集是上面的train_dataset,batch_size是传进来的参数batch_size,并且是shuffle的
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
return train_loader
def generate_images(epoch, path, fixed_noise, num_test_samples, netG, device, use_fixed=False):
# 生成随机张量存到变量z里面
z = torch.randn(num_test_samples, 100, 1, 1, device=device)
# 把num_test_samples给开方一下存到size_figure_grid里面
size_figure_grid = int(math.sqrt(num_test_samples))
# 变量title设置值为None
title = None
# 如果用了固定噪声,那就用它传进来的固定噪声,如果没用,那就用自己现生成的固定噪声,路径就正常加
if use_fixed:
generated_fake_images = netG(fixed_noise)
path += 'fixed_noise/'
title = 'Fixed Noise'
else:
generated_fake_images = netG(z)
path += 'variable_noise/'
title = 'Variable Noise'
fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(6,6))
for i, j in itertools.product(range(size_figure_grid), range(size_figure_grid)):
# 设置轴,让轴看不见
ax[i,j].get_xaxis().set_visible(False)
ax[i,j].get_yaxis().set_visible(False)
for k in range(num_test_samples):
i = k//4
j = k%4
ax[i,j].cla()
ax[i,j].imshow(generated_fake_images[k].data.cpu().numpy().reshape(28,28), cmap='Greys')
label = 'Epoch_{}'.format(epoch+1)
fig.text(0.5, 0.04, label, ha='center')
fig.suptitle(title)
fig.savefig(path+label+'.png')
def save_gif(path, fps, fixed_noise=False):
# 根据参数设定存储的路径
if fixed_noise==True:
path += 'fixed_noise/'
else:
path += 'variable_noise/'
# 把路径下的所有图片存到images里面
images = glob(path + '*.png')
# 所有图片按照从小到大的顺序排序一下
images = natsort.natsorted(images)
# 创建gif变量
gif = []
# 循环images里的所有图片,都加到gif里面
for image in images:
gif.append(imageio.imread(image))
# 把生成的gif保存一下,参数是路径,文件名字,嗨哟fps
imageio.mimsave(path+'animated.gif', gif, fps=fps)