环境: Win10 + Anaconda + pytorch
参考:https://www.leiphone.com/news/201702/eIGiQzuGeuAaH22e.html
(原题目:如何用不到50行代码训练GAN)
准备工作:
1.Tensor & Variable
刚开始接触torch,对里面有什么还一无所知,参考了这篇博文:http://blog.csdn.net/victoriaw/article/details/72673110
PyTorch中的Tensor本质上和numpy数组是一样的:Tensor是一个n维数组,并且PyTorch定义了关于Tensor的很多操作。并且Tensor和numpy一样,不知道深度学习、计算图和梯度的概念,它们都是通用的科学计算工具。但是和numpy不同的是,Torch可以利用GPU来加速数值计算。
如果用numpy或者Tensor来实现神经网络,需要手动写出前向过程和反向过程。对于简单的网络,反向过程中的导数容易求得,但是随着网络深度以及网络复杂度的增加,求出梯度的解析表达式是非常困难的。
PyTorch的包autograd提供了自动求导的功能。当使用autograd时,定义的前向网络会生成一个计算图:每个节点是一个Tensor,边表示由输入Tensor到输出Tensor的函数。沿着计算图的反向传播可以很容易地计算出梯度。
在实现的时候,用到了Variable对象。Variable对Tensor对象进行封装,只需要Variable::data即可取出Tensor,并且Variable还封装了该Tensor的梯度Variable::grad(是个Variable对象)。现在用Variable作为计算图的节点,则通过反向传播自动求得的导数就保存在Variable对象中了。
Variable提供了和Tensor一样的API,即能在Tensor上执行的操作也可以在Variable上执行。
2.torch.nn
首先有pytorch的官方文档可以参考:http://pytorch.org/docs/master/nn.html
其次中文的话还有这个:http://blog.csdn.net/xmdxcsj/article/details/49314297
哇哇,又找到一个中文文档!传送门:https://ptorch.com/docs/1/torch-nn(可以偷懒了嘿嘿嘿)
Parameter。Variable的子类。
Module。是神经网络的基本组成部分,作为一个抽象类,可以通过定义成员函数实现不同的神经网络结构,nn.*都是Module的子类。
卷积层。
class torch.nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
3.Optimizer
用到了一个optim.Adam()的函数。主要是用到了Adam算法,本质上是带有动量项的RMSprop,它利用梯度的一阶矩估计和二阶矩估计动态调整每个参数的学习率。它的优点主要在于经过偏置校正后,每一次迭代学习率都有个确定范围,使得参数比较平稳。
代码:
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 20 11:02:52 2017
50行代码实现GAN!
https://www.leiphone.com/news/201702/eIGiQzuGeuAaH22e.html
@author: Luoluoluoplus7
"""
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import matplotlib.pyplot as plt
from pylab import *
#R:原始、真实数据集:bell curve 贝尔曲线
#平均数和方差作为输入
#(这里被坑了一小下下,说什么贝尔曲线钟形分布的,说白了就是正态分布啊!)
def get_distribution_sampler(mu,sigma):
return lambda n: torch.Tensor(np.random.normal(mu,sigma,(1,n))) #[1*n]
#I:输入噪音
#均匀分布
def get_generator_input_sampler():
return lambda m,n: torch.rand(m,n) #[m*n]
#G:2隐层,3线性映射,ELU(exponential linear unit)
class Generator(nn.Module):
def __init__(self,input_size,hidden_size,output_size):
super(Generator,self).__init__()
self.map1 = nn.Linear(input_size,hidden_size)
self.map2 = nn.Linear(hidden_size,hidden_size)
self.map3 = nn.Linear(hidden_size,output_size)
def forward(self,x):
x = F.elu(self.map1(x))
x = F.sigmoid(self.map2(x))
return self.map3(x)
#D:2隐层,3线性映射
class Discriminator(nn.Module):
def __init__(self,input_size,hidden_size,output_size):
super(Discriminator,self).__init__()
self.map1 = nn.Linear(input_size,hidden_size)
self.map2 = nn.Linear(hidden_size,hidden_size)
self.map3 = nn.Linear(hidden_size,output_size)
def forward(self,x):
x = F.elu(self.map1(x))
x = F.sigmoid(self.map2(x))
return self.map3(x)
#返回值未知?????????????????????????
def extract(v):
return v.data.storage().tolist()
#返回值d的均值和方差
def stats(d):
return [np.mean(d), np.std(d)]
##返回值未知?????????????????????????
def decorate_with_diffs(input_data, exponent):
mean = torch.mean(torch.Tensor(input_data.data),1,True)
mean_broadcast = torch.mul(torch.ones(input_data.size()), mean.tolist()[0][0])
diffs = torch.pow(input_data - Variable(mean_broadcast), exponent)
return torch.cat([input_data, diffs], 1)
#Data params:
data_mean = 4
data_stddev = 1.25
#Model params:
g_input_size = 1
g_hidden_size = 50
g_output_size = 1
d_input_size = 100
d_hidden_size = 50
d_output_size = 1
minibatch_size = d_input_size
d_learning_rate = 2e-4
g_learning_rate = 2e-4
optim_betas = ( 0.9 , 0.999)
num_epochs = 3000
print_interval = 500
d_steps = 1
g_steps = 1
# ### Uncomment only one of these
(name, preprocess, d_input_func) = ("Raw data", lambda data: data, lambda x: x)
#(name, preprocess, d_input_func) = ("Data and variancesz",lambda data: decorate_with_diffs(data, 2.0), lambda x: x * 2)
print("Using data [%s]" % (name))
d_sampler = get_distribution_sampler(data_mean,data_stddev) #[1*n]
gi_sampler = get_generator_input_sampler() #[m*n] / [null]
G = Generator(input_size=g_input_size,
hidden_size=g_hidden_size,output_size=g_output_size)
D = Discriminator(input_size=d_input_size,
hidden_size=d_hidden_size,output_size=d_output_size)
#二分类交叉熵,就是GAN里面定义的那个
criterion = nn.BCELoss() # Binary cross entropy: http://pytorch.org/docs/nn.html#bceloss
#优化器:Adam算法
d_optimizer = optim.Adam(D.parameters(),lr=d_learning_rate,betas=optim_betas)
g_optimizer = optim.Adam(G.parameters(),lr=g_learning_rate,betas=optim_betas)
#Train:
error_r = np.zeros(0)
error_f = np.zeros(0)
for epoch in range(num_epochs):
for d_index in range(d_steps):
D.zero_grad()
#Train D on real
d_real_data = Variable(d_sampler(d_input_size)) #[1*100]
d_real_decision = D(preprocess(d_real_data)) #Discriminator on real data
d_real_error = criterion(d_real_decision,Variable(torch.ones(1))) #[1*1]
d_real_error.backward()
#Train D on fake
d_gen_input =Variable(gi_sampler(minibatch_size,g_input_size)) #[minibatch_size*g_input_size]/[100*1]
d_fake_data = G(d_gen_input)
d_fake_decision = D(preprocess(d_fake_data.t())) #Discriminator on fake data
d_fake_error = criterion(d_fake_decision, Variable(torch.zeros(1))) # zeros = fake
d_fake_error.backward()
#清空梯度, 计算损失
d_optimizer.step() # Only optimizes D's parameters; changes based on stored gradients from backward()
for g_index in range(g_steps):
#Train G on D's response
G.zero_grad()
gen_input = Variable(gi_sampler(minibatch_size, g_input_size)) #[minibatch_size*g_input_size]/[100*1]
g_fake_data = G(gen_input)
dg_fake_decision = D(preprocess(g_fake_data.t()))
g_error = criterion(dg_fake_decision, Variable(torch.ones(1))) # we want to fool, so pretend it's all genuine
g_error.backward()
g_optimizer.step() # Only optimizes G's parameters
if epoch % print_interval == 0:
print("%s: D: %s/%s G: %s (Real: %s, Fake: %s) " % (epoch,
extract(d_real_error)[0],
extract(d_fake_error)[0],
extract(g_error)[0],
stats(extract(d_real_data)),
stats(extract(d_fake_data))))
error_r = np.append(error_r,d_real_error.data.numpy())
error_f = np.append(error_f,d_fake_error.data.numpy())
#作图
plt.subplot(1,2,1)
plt.scatter(range(3000),error_r)
plt.subplot(1,2,2)
plt.scatter(range(3000),error_f)
plt.show()
因为只有3000次,所以结果不是太好。。。。。