目 录
前言
GitHub:https://github.com/wuhuah/DCGAN
使用pytorch框架,不追求过多的技巧。不单流程走完,而是遵循学习的一般思路,按层次不断完善。预计设计2个py文件,一个存参数,一个训练和测试。后期应该会上传GitHub。
1、明确搭建流程
大纲先列出来
# 导入必要的库(变写边导入)
# 数据集准备
# 数据集加载器
# 搭建生成器
# 搭建判别器
# 权重初始化
# 实例化、权重初始化
# 优化器
# 损失函数
# 训练使用GPU
# 训练
# 测试
2、搭出大概轮廓
不求炫酷的trick,不求能跑通,先写出大纲下的每一块。
# 导入必要的库(边写边导入)
import os
import argparse
import numpy as np
import random
#网络搭建需要的库
import torch
from torch.autograd import Variable
import torch.nn as nn
#准备数据集所需的库
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.utils import save_image
# 训练使用GPU&cuda并行计算
# device = torch.device('cuda:0' if (torch.cuda.is_available) else 'cup')
cuda = True if torch.cuda.is_available else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor# 转换为cuda张量还是普通张量
# 数据集准备
dataset = datasets.ImageFolder(root='../',# 注意这里是文件夹路径并非文件路径
transform=transforms.Compose([transforms.ToTensor(),
transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))]
)
)
# 数据集加载器
data_loader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=32,
num_workers=6
)
# 搭建生成器
class generator(nn.Module):# Module,Sequential大写
def __init__(self):
super(generator,self).__init__()
self.main = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=3, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64)
)
def forward(self,z):
out = self.main(z)
return out
# 搭建判别器
class discriminator(nn.Module):
def __init__(self):
super(discriminator,self).__init__()
self.main = nn.Sequential(
nn.Conv2d(3,64,3,1,1),
nn.BatchNorm2d(64),
nn.Conv2d(64,64,3,1,1),
nn.BatchNorm2d(64),
nn.Conv2d(64,3,3,1,1),
nn.BatchNorm2d(3),
nn.Sigmoid()
)
def forward(self,input):
out = self.main(input)
return out
# 权重初始化
def weights_init_normal(m):# m一般指网络框架
classname = m.__class__.__name__ # m作为一个形参,原则上可以传递很多的内容,为了实现多实参传递,每一个moudle要给出自己的name. 所以这句话就是返回m的名字。
if classname.find('Conv') != -1:#find()函数,实现查找classname中是否含有Conv字符,没有返回-1;有返回0.
nn.init.normal_(m.weight.data, 0.0, 0.02)#m.weight.data表示需要初始化的权重。 nn.init.normal_()表示随机初始化采用正态分布,均值为0,标准差为0.02.
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0) # nn.init.constant_()表示将偏差定义为常量0
# 实例化、权重初始化
generator=generator()
discriminator=discriminator()
if cuda:
generator.cuda()
discriminator()
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
#print(generator,discriminator)
# 优化器
optimizer_g = torch.optim.Adam(generator.parameters(),lr=0.001,betas=(0.9,0.999))#beta1,2的设置参考斋藤康毅
optimizer_d = torch.optim.Adam(discriminator.parameters(),lr =0.001,betas=(0.9,0.999))
# 损失函数(交叉熵)
loss=nn.BCELoss()
if cuda:
loss.cuda()
# 噪声生成器
noise = torch.randn(size=32, 100,1,1,device)
# 训练标签
real_lable = 1
fake_lable = 0
# 训练
print(f'开始训练......')
n_epochs = 5
for epoch in range(n_epochs):
for i,imgs in enumerate(data_loader):
#生成标签
valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)
#处理输入图片
real_imgs = Variable(imgs.type(Tensor))
# 训练生辰器
generator.zero_grad()
z = Variable(np.random.normal(0,1,(imgs[0],100)))
gent_imgs = generator(z)
g_loss = loss(discriminator(gent_imgs),valid)
g_loss.backward()
optimizer_g.step()
# 训练生成器
discriminator.zero_grad()
real_loss = loss(discriminator(real_imgs),valid)
fake_loss = loss(discriminator(gent_imgs),fake)
d_loss = (real_imgs + fake_loss) / 2
d_loss.backward()
optimizer_d.step()
print('[Epoch: %d/%d, Batch: %d/%d, d_loss: %d, g_loss: %d]'
%(epoch, n_epochs, i, len(data_loader), d_loss.item(), g_loss.item())
)
batches_done = epoch*len(data_loader) + i
if batches_done % 500 == 0:
save_image(gent_imgs.data[:25],'images/%d.jpg'%batches_done,)
torch.save(generator.state_dict(),'models/g_%d.pth'%batches_done)
2.1. nn.BCELoss(x,y)交叉熵损失函数的公式
网络中套用该公式的具体过程为(注意D(a)判断的是a为真的概率是多少):
2.2. 注意
- cuda是并行计算的框架,如果有cuda时,才可以将网络和损失函数放在cuda下运行,例如:
if cuda:
generator.cuda()
discriminator.cuda()
loss.cuda()
-
使用cuda时,需要将网络的噪声和图像的输入、噪声和图像的标签都转换为cuda下的张量。
-
Variable自动求梯度时,里边的内容应该为张量,而不是矩阵。
2.3. 参考
- pytorch方法测试——损失函数(BCELoss)
- detach
- torch.autograd中的Variable类参照深度学习之PyTorch实战计算机视觉
- Adam优化器中的beta值参考斋藤康毅的深度学习入门
- torch.FloatTensor()
- np.random一系列(np.random.normal()、np.random.randint、np.random.randn、np.random.rand)
- pytorch .item() 的用法
- pytorch-保存和加载模型
3、调通程序
# 导入必要的库(边写边导入)
import os
import argparse
import numpy as np
import random
#网络搭建需要的库
import torch
from torch.autograd import Variable
import torch.nn as nn
#准备数据集所需的库
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.utils import save_image
# 训练使用GPU&cuda并行计算
# device = torch.device('cuda:0' if (torch.cuda.is_available) else 'cup')
cuda = True if torch.cuda.is_available else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor# 转换为cuda张量还是普通张量
# 数据集准备
dataset = datasets.ImageFolder(root='../',# 注意这里是文件夹路径并非文件路径
transform=transforms.Compose([
transforms.Resize(64),
transforms.CenterCrop(64),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))]
)
)
# 数据集加载器
data_loader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=64,
num_workers=6
)
# 搭建生成器
class generator(nn.Module):# Module,Sequential大写
def __init__(self):
super(generator,self).__init__()
self.l1 = nn.Sequential(nn.Linear(100,64**3))
self.main = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.Conv2d(in_channels=64, out_channels=3, kernel_size=3, stride=1, padding=1),
)
def forward(self,z):
out = self.l1(z)
out = out.view(out.shape[0],64,64,64)
out = self.main(out)
return out
# 搭建判别器
class discriminator(nn.Module):
def __init__(self):
super(discriminator,self).__init__()
self.main = nn.Sequential(
nn.Conv2d(3,64,3,1,1),
nn.BatchNorm2d(64),
nn.Conv2d(64,64,3,1,1),
nn.BatchNorm2d(64),
nn.Conv2d(64,3,3,1,1),
nn.BatchNorm2d(3),
)
self.l1 = nn.Sequential(
nn.Linear(3*64*64,1),
nn.Sigmoid()
)
def forward(self,input):
out = self.main(input)
out = out.view(out.shape[0],-1)
out = self.l1(out)
return out
# 权重初始化
def weights_init_normal(m):# m一般指网络框架
classname = m.__class__.__name__ # m作为一个形参,原则上可以传递很多的内容,为了实现多实参传递,每一个moudle要给出自己的name. 所以这句话就是返回m的名字。
if classname.find('Conv') != -1:#find()函数,实现查找classname中是否含有Conv字符,没有返回-1;有返回0.
nn.init.normal_(m.weight.data, 0.0, 0.02)#m.weight.data表示需要初始化的权重。 nn.init.normal_()表示随机初始化采用正态分布,均值为0,标准差为0.02.
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0) # nn.init.constant_()表示将偏差定义为常量0
# 实例化、权重初始化
generator=generator()
discriminator=discriminator()
if cuda:
generator.cuda()
discriminator.cuda()
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
#print(generator,discriminator)
# 优化器
optimizer_g = torch.optim.Adam(generator.parameters(),lr=0.001,betas=(0.9,0.999))#beta1,2的设置参考斋藤康毅
optimizer_d = torch.optim.Adam(discriminator.parameters(),lr =0.001,betas=(0.9,0.999))
# 损失函数(交叉熵)
loss=nn.BCELoss()
if cuda:
loss.cuda()
# 噪声生成器
#noise = torch.randn(size=32, 100,1,1,device)
# 训练标签
#real_lable = 1
#fake_lable = 0
# 训练
print(f'开始训练......')
n_epochs = 5
for epoch in range(n_epochs):
for i,(imgs,_) in enumerate(data_loader):# 加括号解决列表不是矩阵的问题,从而解决没有shape的问题
#生成标签
valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)
#处理输入图片
real_imgs = Variable(imgs.type(Tensor))
# 训练生辰器
generator.zero_grad()
z = Variable(Tensor(np.random.normal(0,1,(imgs.shape[0],100))))# 使用Variable必须转换成张量,和np的矩阵还不同
gent_imgs = generator(z)
g_loss = loss(discriminator(gent_imgs),valid)
g_loss.backward()
optimizer_g.step()
# 训练生成器
discriminator.zero_grad()
real_loss = loss(discriminator(real_imgs),valid)
fake_loss = loss(discriminator(gent_imgs.detach()),fake)#必须是用gent_imgs.detach()来获取使用过的结果
d_loss = (real_loss + fake_loss) / 2
d_loss.backward()
optimizer_d.step()
print('[Epoch: %d/%d, Batch: %d/%d, d_loss: %d, g_loss: %d]'
%(epoch, n_epochs, i, len(data_loader), d_loss.item(), g_loss.item())
)
batches_done = epoch*len(data_loader) + i
if batches_done % 500 == 0:
save_image(gent_imgs.data[:25],'images/%d.jpg'%batches_done,)
torch.save(generator.state_dict(),'models/g_%d.pth'%batches_done)
torch.save(discriminator.state_dict(),'models/d_%d.pth'%batches_done)
3.1. 运行结果
可以看到,程序已经可以运行了,只是运行的比较垃圾:
GPU使用情况,利用率还是比较稳定的,没有周期性变化:
3.2. 调试中,需要注意的主要几点如下
- 很明显,创建网络类时,Module和Sequential要注意首字母大写
- 卷积层和全连接层之间的连接,通常要通过.view()来转换tensor的形状。
- 若不用gent_imgs.detach(),会报错RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time.
- 改bug时,要经常用print(‘1’)来看程序是否执行到指定位置,print(a.shape)来查看矩阵或张量形状等。
- torch.randn()生成指定形状的,满足标准正态分布的tensor。torch中生成的基本上都是tensor类型。
3.3. 优化方向
- 我是参照这个代码写的GAN的训练次序:pytorch-gan,但是仔细观察会发现D和G的训练并没有严格按照Goodfellow论文的要求,而是直接G和D交替训练。后续改进可以参考pytorch给出的官方参考文档,对训练进行优化。
- 并且网络结构我也是随便搭了几层,没哟考虑DCGAN的文章trick,优化时需要改进。
- 代码风格不行,整个项目一个文件就下来了。
- g_loss和d_loss都是整数值。
- 打印模型及训练日志。
3.4. 参考
- CNN中.view()和.shape()用法总结
- python 多线程报错An attempt has been made to start a new process before the current process has finished
- 解决AttributeError: ‘list‘ object has no attribute ‘shape‘
- TypeError: img should be PIL Image. Got <class ‘torch.Tensor‘>
- pytorch .detach() .detach_() 和 .data用于切断反向传播
- pytorch——weights_init(m)
4、优化程序
这部分,我们将一个程序分为2个,分别是参数、训练和测试。并且按照论文要求调整网络结构。
优化程序1
- 完成了网络结构的完全复现。
- 完成了网络参数的完全复现。
- Goodfellow的GAN训练策略的完全复现。
- 欠缺:生成器损失函数在学习早期和后期的分别操作。
- 欠缺:是否可以使用变化学习率等。
- 生成器损失下降慢,D(X)和D(G(Z))难以从两个方向接近0.5
- 当开始下一个epoch时,结果变得很糟糕。
训练程序:
cd GAN
python DCGAN.py --train
# 导入必要的库(边写边导入)
import os
import argparse
from parser import args
import numpy as np
import random
#网络搭建需要的库
import torch
from torch.autograd import Variable
import torch.nn as nn
#准备数据集所需的库
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.utils import save_image
# 判断cuda是否存在,是否使用cuda张量
# device = torch.device('cuda:0' if (torch.cuda.is_available) else 'cup')
cuda = True if torch.cuda.is_available else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor# 转换为cuda张量还是普通张量
# 数据集准备
dataset = datasets.ImageFolder(root=args.dataset_path,# 注意这里是文件夹路径并非文件路径
transform=transforms.Compose([
transforms.Resize(args.input_size),
transforms.CenterCrop(64),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))]
)
)
# 数据集加载器
data_loader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=args.batch_size,
num_workers=args.num_workers
)
# 搭建生成器
class generator(nn.Module):# Module,Sequential大写
def __init__(self):
super(generator,self).__init__()
self.init_size = args.input_size // 16
self.l1 = nn.Sequential(nn.Linear(args.latent_dim,(args.gf*8)*(self.init_size**2)))
self.main = nn.Sequential(
nn.BatchNorm2d(num_features=args.gf*8),# 只优化4维空间的第2维
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf*8, out_channels=args.gf*4, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(num_features=512),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf*4, out_channels=args.gf*2, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(num_features=256),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf*2, out_channels=args.gf, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(num_features=128),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf, out_channels=args.input_channels, kernel_size=4, stride=2, padding=1),
# nn.BatchNorm2d(num_features=args.input_channels),
nn.Tanh()
)
def forward(self,z):
out = self.l1(z)
out = out.view(out.shape[0],args.gf*8,self.init_size,self.init_size)# n,c,h,w
out = self.main(out)
return out
# 搭建判别器
class discriminator(nn.Module):
def __init__(self):
super(discriminator, self).__init__()
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(args.input_channels, args.df, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df) x 32 x 32
nn.Conv2d(args.df, args.df * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(args.df * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df*2) x 16 x 16
nn.Conv2d(args.df * 2, args.df * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(args.df * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df*4) x 8 x 8
nn.Conv2d(args.df * 4, args.df * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(args.df * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df*8) x 4 x 4
nn.Conv2d(args.df * 8, 1, 4, 1, 0, bias=False),
# state size. 1 x 1 x 1
nn.Sigmoid()
)
self.l1 = nn.Sequential(
nn.Linear(128*args.input_size*args.input_size,1),
nn.Sigmoid()
)
def forward(self,input):
out = self.main(input)
out = out.view(out.shape[0],-1)
return out
# 权重初始化
def weights_init_normal(m):# m一般指网络框架
classname = m.__class__.__name__ # m作为一个形参,原则上可以传递很多的内容,为了实现多实参传递,每一个moudle要给出自己的name. 所以这句话就是返回m的名字。
if classname.find('Conv') != -1:#find()函数,实现查找classname中是否含有Conv字符,没有返回-1;有返回0.
nn.init.normal_(m.weight.data, 0.0, 0.02)#m.weight.data表示需要初始化的权重。 nn.init.normal_()表示随机初始化采用正态分布,均值为0,标准差为0.02.
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
nn.init.constant_(m.bias.data, 0) # nn.init.constant_()表示将偏差定义为常量0
# 实例化、权重初始化
generator=generator()
discriminator=discriminator()
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
#print(generator,discriminator)
# 优化器
optimizer_g = torch.optim.Adam(generator.parameters(),args.g_lr,betas=(args.beta1,args.beta2))#beta1,2的设置参考斋藤康毅
optimizer_d = torch.optim.Adam(discriminator.parameters(),args.d_lr,betas=(args.beta1,args.beta2))
# 损失函数(交叉熵)
loss=nn.BCELoss()
# 训练使用GPU&cuda并行计算
if cuda:
generator.cuda()
discriminator.cuda()
loss.cuda()
# 训练
if args.train:
print(f'Starting Training Loop...')
for epoch in range(args.n_epochs):
for i,(imgs,_) in enumerate(data_loader):# 加括号解决列表不是矩阵的问题,从而解决没有shape的问题
#生成标签
valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)
#处理输入图片
real_imgs = Variable(imgs.type(Tensor))
z = Variable(Tensor(np.random.normal(0,1,(imgs.shape[0],args.latent_dim))))# 使用Variable必须转换成张量,和np的矩阵还不同
gent_imgs = generator(z)
# 训练判别器
for k in range(args.k_disc):
discriminator.zero_grad()
discriminator_real_imgs = discriminator(real_imgs)
d_x = discriminator_real_imgs.mean().item()
discriminator_gent_imgs = discriminator(gent_imgs.detach())
d_g_z1 = discriminator_gent_imgs.mean().item()
real_loss = loss(discriminator_real_imgs,valid)
real_loss.backward()# 求梯度
fake_loss = loss(discriminator_gent_imgs,fake)#必须是用gent_imgs.detach()来切断G中的反向传播,因为从后边优化器知,其实只更新了d的参数
fake_loss.backward()
d_loss = real_loss + fake_loss# 梯度相加
#d_loss.backward()
optimizer_d.step()# 更新
# 训练生成器
# 前期训练
#if (epoch*len(data_loader)+i+1) <= args.early:
generator.zero_grad()
discriminator_gent_imgs = discriminator(gent_imgs)
d_g_z2 = discriminator_gent_imgs.mean().item()
g_loss = loss(discriminator_gent_imgs,valid)
g_loss.backward()
optimizer_g.step()
'''
# 后期训练
else:
if (epoch*len(data_loader)+i+1) == args.early:
print(f'转换生成器损失函数')
generator.zero_grad()
discriminator_gent_imgs = discriminator(gent_imgs)
d_g_z2 = discriminator_gent_imgs.mean().item()
g_loss = (-1)*loss(discriminator_gent_imgs,valid)# 有问题
g_loss.backward()
optimizer_g.step()
'''
print('[Epoch: %d/%d, Batch: %d/%d, g_loss: %.4f, d_loss: %.4f, D(x): %.4f, D(G(z)): %.4f/%.4f]'
%(epoch, args.n_epochs, i, len(data_loader), g_loss.item(), d_loss.item(), d_x, d_g_z1, d_g_z2)
)
batches_done = epoch*len(data_loader) + i
if batches_done % 500 == 0:
save_image(gent_imgs.data[:24],'images/%d.jpg'%batches_done,)
torch.save(generator.state_dict(),args.models_path+'g_%d.pth'%batches_done)
torch.save(discriminator.state_dict(),args.models_path+'d_%d.pth'%batches_done)
print('finished!')
# 测试
#if args.test:
结果:
conv2d的计算公式:
ConvTranspose2d的计算公式:
参考
- GAN生成图像数据、数据增强
- ZC-argparse库的基本使用
- PyTorch中nn.ReLU() 和 nn.ReLU(inplace=True)区别第一个值是x<0时的斜率,第二个值请参考文章。
- 神经网络中数据以这样的格式流通[batch_size,chanell,height,width]
- Pytorch中 nn.BatchNorm2d() (官方文档)以后要学会看官方文档了呀铁汁!!!
- python定义函数时,形参前加*和**的用法。
- Conv2d和ConvTranspose2d计算的区别,坑死我了!!!
优化程序2
- 完成了训练加载预训练模型的部分。
- 完成了测试代码的编写。
- 优化后期学习g的损失函数(待完成)
训练程序:
cd GAN
python DCGAN.py --train
加载预训练模型训练程序:
cd GAN
python DCGAN.py --train --pre_train
测试程序:
python DCGAN.py --test
# 导入必要的库(边写边导入)
import cv2
import os
import argparse
from parser import args
import numpy as np
import random
#网络搭建需要的库
import torch
from torch.autograd import Variable
import torch.nn as nn
#准备数据集所需的库
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.utils import save_image
# 判断cuda是否存在,是否使用cuda张量
# device = torch.device('cuda:0' if (torch.cuda.is_available) else 'cup')
cuda = True if torch.cuda.is_available else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor# 转换为cuda张量还是普通张量
# 数据集准备
dataset = datasets.ImageFolder(root=args.dataset_path,# 注意这里是文件夹路径并非文件路径
transform=transforms.Compose([
transforms.Resize(args.input_size),
transforms.CenterCrop(64),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))]
)
)
# 数据集加载器
data_loader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=args.batch_size,
num_workers=args.num_workers
)
# 搭建生成器
class generator(nn.Module):# Module,Sequential大写
def __init__(self):
super(generator,self).__init__()
self.init_size = args.input_size // 16
self.l1 = nn.Sequential(nn.Linear(args.latent_dim,(args.gf*8)*(self.init_size**2)))
self.main = nn.Sequential(
nn.BatchNorm2d(num_features=args.gf*8),# 只优化4维空间的第2维
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf*8, out_channels=args.gf*4, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(num_features=512),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf*4, out_channels=args.gf*2, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(num_features=256),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf*2, out_channels=args.gf, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(num_features=128),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(in_channels=args.gf, out_channels=args.input_channels, kernel_size=4, stride=2, padding=1),
# nn.BatchNorm2d(num_features=args.input_channels),
nn.Tanh()
)
def forward(self,z):
out = self.l1(z)
out = out.view(out.shape[0],args.gf*8,self.init_size,self.init_size)# n,c,h,w
out = self.main(out)
return out
# 搭建判别器
class discriminator(nn.Module):
def __init__(self):
super(discriminator, self).__init__()
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(args.input_channels, args.df, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df) x 32 x 32
nn.Conv2d(args.df, args.df * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(args.df * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df*2) x 16 x 16
nn.Conv2d(args.df * 2, args.df * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(args.df * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df*4) x 8 x 8
nn.Conv2d(args.df * 4, args.df * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(args.df * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (args.df*8) x 4 x 4
nn.Conv2d(args.df * 8, 1, 4, 1, 0, bias=False),
# state size. 1 x 1 x 1
nn.Sigmoid()
)
self.l1 = nn.Sequential(
nn.Linear(128*args.input_size*args.input_size,1),
nn.Sigmoid()
)
def forward(self,input):
out = self.main(input)
out = out.view(out.shape[0],-1)
return out
# 权重初始化
def weights_init_normal(m):# m一般指网络框架
classname = m.__class__.__name__ # m作为一个形参,原则上可以传递很多的内容,为了实现多实参传递,每一个moudle要给出自己的name. 所以这句话就是返回m的名字。
if classname.find('Conv') != -1:#find()函数,实现查找classname中是否含有Conv字符,没有返回-1;有返回0.
nn.init.normal_(m.weight.data, 0.0, 0.02)#m.weight.data表示需要初始化的权重。 nn.init.normal_()表示随机初始化采用正态分布,均值为0,标准差为0.02.
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
nn.init.constant_(m.bias.data, 0) # nn.init.constant_()表示将偏差定义为常量0
# 实例化、权重初始化
generator=generator()
discriminator=discriminator()
if args.pre_train:
generator.load_state_dict(torch.load(args.g_model_path))
discriminator.load_state_dict(torch.load(args.d_model_path))
else:
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
#print(generator,discriminator)
# 优化器
optimizer_g = torch.optim.Adam(generator.parameters(),args.g_lr,betas=(args.beta1,args.beta2))#beta1,2的设置参考斋藤康毅
optimizer_d = torch.optim.Adam(discriminator.parameters(),args.d_lr,betas=(args.beta1,args.beta2))
# 损失函数(交叉熵)
loss=nn.BCELoss()
# 训练使用GPU&cuda并行计算
if cuda:
generator.cuda()
discriminator.cuda()
loss.cuda()
# 训练
if args.train:
print(f'Starting Training Loop...')
for epoch in range(args.n_epochs):
for i,(imgs,_) in enumerate(data_loader):# 加括号解决列表不是矩阵的问题,从而解决没有shape的问题
#生成标签
valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)
#处理输入图片
real_imgs = Variable(imgs.type(Tensor),)
z = Variable(Tensor(np.random.normal(0,1,(imgs.shape[0],args.latent_dim))))
# 使用Variable必须转换成张量,和np的矩阵还不同.variable只对张量进行封装
gent_imgs = generator(z)
# 训练判别器
for k in range(args.k_disc):
discriminator.zero_grad()
discriminator_real_imgs = discriminator(real_imgs)
d_x = discriminator_real_imgs.mean().item()
discriminator_gent_imgs = discriminator(gent_imgs.detach())
d_g_z1 = discriminator_gent_imgs.mean().item()
real_loss = loss(discriminator_real_imgs,valid)
real_loss.backward()# 求梯度
fake_loss = loss(discriminator_gent_imgs,fake)#必须是用gent_imgs.detach()来切断G中的反向传播,因为从后边优化器知,其实只更新了d的参数
fake_loss.backward()
d_loss = real_loss + fake_loss# 梯度相加
#d_loss.backward()
optimizer_d.step()# 更新
# 训练生成器
# 前期训练
#if (epoch*len(data_loader)+i+1) <= args.early:
generator.zero_grad()
discriminator_gent_imgs = discriminator(gent_imgs)
d_g_z2 = discriminator_gent_imgs.mean().item()
g_loss = loss(discriminator_gent_imgs,valid)
g_loss.backward()
optimizer_g.step()
'''
# 后期训练
else:
if (epoch*len(data_loader)+i+1) == args.early:
print(f'转换生成器损失函数')
generator.zero_grad()
discriminator_gent_imgs = discriminator(gent_imgs)
d_g_z2 = discriminator_gent_imgs.mean().item()
g_loss = (-1)*loss(discriminator_gent_imgs,valid)# 有问题
g_loss.backward()
optimizer_g.step()
'''
print('[Epoch: %d/%d, Batch: %d/%d, g_loss: %.4f, d_loss: %.4f, D(x): %.4f, D(G(z)): %.4f/%.4f]'
%(epoch, args.n_epochs, i, len(data_loader), g_loss.item(), d_loss.item(), d_x, d_g_z1, d_g_z2)
)
batches_done = epoch*len(data_loader) + i
if batches_done % 500 == 0:
save_image(gent_imgs.data[:24],'images/%d.jpg'%batches_done,)
torch.save(generator.state_dict(),args.models_path+'g_%d.pth'%batches_done)
torch.save(discriminator.state_dict(),args.models_path+'d_%d.pth'%batches_done)
print('finished!')
# 测试
if args.test:
z = Tensor(np.random.normal(0,1,(args.n_test,args.latent_dim)))
g_model = generator
if cuda:
g_model.cuda()
g_model.load_state_dict(torch.load(args.g_model_path), strict=True)
g_model.eval()
# g_model = g_model.to(device)
print('g_Model path {:s}. \nTesting...'.format(args.g_model_path))
with torch.no_grad():
output = g_model(z).data.squeeze().float().cpu().clamp_(0, 1).numpy()
leng = output.shape[0]
for i in range(leng):
output1 = output[i]
output1 = np.transpose(output1[[2, 1, 0], :, :], (1, 2, 0))
output1 = (output1 * 255.0).round()
os.makedirs('./results', exist_ok=True)
cv2.imwrite('results/%d_rlt.png'%i, output1)
参数文件:
import argparse
import os
parser = argparse.ArgumentParser(description='arguments')
parser.add_argument('--batch_size', type=int, default=128, help='')
parser.add_argument('--g_lr', type=float, default=0.0002, help='lr of generator')
parser.add_argument('--d_lr', type=float, default=0.0002, help='lr of discriminator')
parser.add_argument('--dataset_path', type=str, default='../', help='the folder path of dataset')
parser.add_argument('--input_size', type=int, default=64, help='')
parser.add_argument('--input_channels', type=int, default=3, help='')
parser.add_argument('--gf', type=int, default=128, help='')
parser.add_argument('--df', type=int, default=128, help='')
parser.add_argument('--num_workers', type=int, default=8, help='')
parser.add_argument('--latent_dim', type=int, default=100, help='length of noise')
parser.add_argument('--beta1', type=float, default=0.5, help='')
parser.add_argument('--beta2', type=float, default=0.999, help='')
parser.add_argument('--n_epochs', type=int, default=5, help='')
os.makedirs('./models/',exist_ok=True)
parser.add_argument('--models_path', type=str, default='models/', help='')
os.makedirs('./images/',exist_ok=True)
parser.add_argument('--images_path', type=str, default='images/', help='Intermediate generated image')
parser.add_argument('--train',action='store_true')
parser.add_argument('--test',action='store_true')
parser.add_argument('--k_disc',type=int, help="the number of discriminator's training per batch size", default=1)
parser.add_argument('--early',type=int, help="the number of batch size in early", default=2)
parser.add_argument('--pre_train',action='store_true',help='for train')
parser.add_argument('--g_model_path',type=str, default='./models/g_9500.pth')
parser.add_argument('--d_model_path',type=str, default='./models/d_9500.pth')
parser.add_argument('--n_test',type=int,default=10,help='the number of test images')
args = parser.parse_args()
- 指定GPU训练
CUDA_VISIBLE_DEVICES=0 \
- 训练时指定model.train()和model.eval(),非常重要。
参考
- PyTorch加载预训练模型(所以用vgg19计算感知损失时,直接用API调就完事了)
- model.train()和model.eval()用法和区别
- TypeError: forward() missing 1 required positional argument: ‘x‘
5、总结
这篇文章从2021.4.5写到了2021.4.8,今天算是有个小结尾了。几个遗憾就是,1. 随严格按照论文要求搭建,也完全按照gan的训练流程,但是效果仍不太好。2. gan中g在后期的损失函数还没调好。3.一个新的epoch开始时网络开始变差。
但是通过搭整个网络,还是学到了很多东西。这篇文章还是有多读的必要的。
参考文献
- PyTorch官方参考文档中文版
- PyTorch_API文档
- GAN网络
- DCGAN
- pytorch——weights_init(m)