gan网络原理如下:
mnist手写字体实战:
import torch
import torchvision
from torchvision import transforms
from torchvision.utils import save_image
from torch import nn
from torch.autograd import Variable
from torch import optim
import os
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)),
])
mnist = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
dataloader = torch.utils.data.DataLoader(mnist, batch_size=100, shuffle=True)
class Dnet(nn.Module):
def __init__(self):
super(Dnet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(1, 6, 3, padding=2), # batch, 6, 30,30
nn.LeakyReLU(0.2, True),
nn.MaxPool2d(2, stride=2), # batch, 6, 15, 15
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 12, 3, padding=2), # batch, 12, 17, 17
nn.LeakyReLU(0.2, True),
nn.MaxPool2d(2, stride=2) # batch, 12, 8, 8
)
self.fc = nn.Sequential(
nn.Linear(12 * 8 * 8, 1024),
nn.LeakyReLU(0.2, True),
nn.Linear(1024, 1),
nn.Sigmoid()
)
# x.shape:[100,1,28,28]
def forward(self, x):
'''
x: batch, width, height, channel=1
'''
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 将第二次卷积的输出拉伸为一行
x = self.fc(x)
x = x.squeeze(-1) # x.shape:[100,]
return x
class Gnet(nn.Module):
def __init__(self):
super(Gnet, self).__init__()
self.fc = nn.Linear(128, 784) # batch, 1,28,28
self.br = nn.Sequential(
nn.BatchNorm2d(1),
nn.ReLU(True)
)
self.downsample1 = nn.Sequential(
nn.Conv2d(1, 12, 3, stride=1, padding=1), # batch, 12, 28, 28
nn.BatchNorm2d(12),
nn.ReLU(True)
)
self.downsample2 = nn.Sequential(
nn.Conv2d(12, 6, 3, stride=1, padding=1), # batch, 6, 28, 28
nn.BatchNorm2d(6),
nn.ReLU(True)
)
self.downsample3 = nn.Sequential(
nn.Conv2d(6, 1, 3, stride=1, padding=1), # batch, 1, 28, 28
nn.Tanh()
)
# x.shape:[100,128]
def forward(self, x):
x = self.fc(x) # # x.shape:[100,784]
x = x.view(x.size(0), 1, 28, 28) # x.shape:[100,1,28,28]
x = self.br(x) # x.shape:[100,1,28,28]
x = self.downsample1(x) # x.shape:[100,12,28,28]
x = self.downsample2(x) # x.shape:[100,6,28,28]
x = self.downsample3(x) # x.shape:[100,1,28,28]
return x
def to_img(x):
y = (x + 1) * 0.5
y = y.clamp(0, 1)
y = y.view(-1, 1, 28, 28)
return y
class Net:
def __init__(self):
self.dnet = Dnet()
self.gnet = Gnet()
self.dnet = self.dnet.cuda()
self.gnet = self.gnet.cuda()
self.Loss = nn.BCELoss()
self.d_optimizer = optim.Adam(self.dnet.parameters(), lr=0.0002)
self.g_optimizer = optim.Adam(self.gnet.parameters(), lr=0.0002)
def forward(self, real_x, fack_x):
self.real_d_out = self.dnet(real_x) # 将真样本输入到判别器,得到判别结果self.real_d_out
g_out = self.gnet(fack_x) # 将噪声输入生成器产生假样本,g_out.shape:[100,1,28,28]
self.g_d_out = net.dnet(g_out.detach()) #将假样本输入判别器,得到判别结果self.g_d_out
def backward(self, pos_y, nega_y, fack_xs):
# 以下几行的目地是训练判别器,使得判别器遇到真实样本就给1,遇到假样本就给0;
d_out_loss = self.Loss(self.real_d_out, pos_y) # 将真样本判别结果self.real_d_out和真实标签求loss
g_d_loss = self.Loss(self.g_d_out, nega_y) # 将假样本判别结果self.g_d_out和假标签求loss
self.d_loss = d_out_loss + g_d_loss
self.d_optimizer.zero_grad()
self.d_loss.backward(retain_graph = True)
self.d_optimizer.step()
# 以下几行的目的是训练生成器,使得生成器产生的假样本越来越接近真实;
self.fack_g_out = self.gnet(fack_xs) # 将噪声输入生成器产生假样本
self.fack_g_d_out = self.dnet(self.fack_g_out) #将假样本输入判别器,得到判别结果self.fack_g_d_out
self.g_loss = self.Loss(self.fack_g_d_out, pos_y) # 将假样本的判别结果与正标签对比求loss,意思是让假样本越来越接近真实;
self.g_optimizer.zero_grad()
self.g_loss.backward()
self.g_optimizer.step()
if __name__ == '__main__':
if not os.path.exists('img'):
os.mkdir('img')
net = Net()
for i in range(100):
for x, y in dataloader:
# x = x.view(x.size(0),-1)
real_x = Variable(x).cuda() # x [100,1,28,28] ,real_x:真样本输入
fack_x = Variable(torch.randn(100, 128)).cuda() #fack_x:噪声 torch.randn(100, 128)标准正态分布
pos_y = Variable(torch.ones(100)).cuda() # 真样本标签
nega_y = Variable(torch.zeros(100)).cuda() # 假样本标签
fack_xs = Variable(torch.randn(100, 128)).cuda() # fack_xs:噪声
net.forward(real_x, fack_x) # 前向推理,输入real_x, fack_x,即真样本输入和噪声
net.backward(pos_y, nega_y, fack_xs) #反向传播过程
img = to_img(net.fack_g_out.data)
D_Accuracy = ((net.real_d_out.mean() + 1 - net.fack_g_d_out.mean()) / 2).item()
print(net.d_loss.item(), net.g_loss.item(), D_Accuracy)
save_image(img, './img/fake_images-{}.png'.format(i + 1))