利用Pytorch自动求导手写卷积网络
思路:
1、mnist数据处理
2、卷积层、全连接层、柔性最大层构建
3、前向传播,反向传播、权重及偏置更新
4、正确率验证
Mnist手写数据集处理
import torch as tch
import torchvision.datasets.mnist as mnist
def main():
#读取mnist数据
train_set=(
mnist.read_image_file(\
'D:/mnist/train-images.idx3-ubyte'),
mnist.read_label_file(\
'D:/mnist/train-labels.idx1-ubyte')
)
test_set=(
mnist.read_image_file(\
'D:/mnist/t10k-images.idx3-ubyte'),
mnist.read_label_file(\
'D:/mnist/t10k-labels.idx1-ubyte')
)
#将图片与标签对应,图片数据归一化整理成网络输入格式
train_data=[[(a.view(1,784).double())/255,b.double()] for (a,b) in zip(train_set[0],
train_set[1])]
test_data=[[(a.view(1,784).double())/255,b.double()] for (a,b) in zip(test_set[0],
test_set[1])]
return (train_data,test_data)
main()
网络
import torch as tch
import torch.nn.functional as F
import numpy as np
import torchvision.datasets.mnist as mnist
from torch.autograd import Variable as Var
import cv2 as cv
import mnist_load_test as load
import random
import sys
class Network(object):
def __init__(self,layers):#网络初始化
self.layers=[]#定义网络层列表
self.n_layer=len(layers)#网络层数
for i in range(self.n_layer):#获取传入的每层网络
self.layers.append(layers[i])
def SGD(self,train_data,test_data,mini_batch_size,epoch,eta,lamda,eta_decay):
self.n_train=len(train_data)
self.n_test=len(test_data)
self.eta=eta
self.loss_record=0#总误差
for i in range(epoch):#迭代期
n_accuracy=0#验证时正确数量
self.loss_record=0
random.shuffle(train_data)#打乱训练列表
#训练数据分组[(10,2),(10,2),(10,2)]
batches=[train_data[k:k+mini_batch_size]\
for k in range(0,self.n_train,mini_batch_size) ]
n_progress=0#小样本训练进展
for batch in batches:
print("{0}/{1}".format(n_progress,len(batches)))
self.update(batch,mini_batch_size,lamda)#更新
n_progress+=1
for (x,y) in test_data:#验证正确率
self.layers[0].forward(x)
for j in range(1,self.n_layer):
self.layers[j].forward(self.layers[j-1].output)
if (tch.argmax(self.layers[-1].output)==y):
n_accuracy+=1
print('Epoch:{0} Accuracy:{1}/{2}\nLoss_record:{3}\neta:{4}\n'\
.format(i,n_accuracy,self.n_test,self.loss_record/self.n_train,self.eta))
self.eta*=np.power(eta_decay,(i+1)/10)
#self.eta=eta*np.power(eta_decay,(i+1)/5)
def update(self,batch,mini_batch_size,lamda):#更新
for (x,y) in batch:#遍历每个样本
w_all=0#L1正则化权重和
#前向传播
self.layers[0].forward(x)
for i in range(1,self.n_layer):
self.layers[i].forward(self.layers[i-1].output)
#w_all+=((self.layers[i].w)**2).sum()
loss=self.layers[-1].cost_fn(convert(y))#+lamda/(2*self.n_train)*w_all
self.loss_record+=loss#记录代价
loss.backward()#反向传播
for i in range(self.n_layer):#更新权重及偏置
delta_w=self.layers[i].w.grad
delta_b=self.layers[i].b.grad
self.layers[i].w.data-=(self.eta/mini_batch_size*delta_w)
self.layers[i].b.data-=(self.eta/mini_batch_size*delta_b)
self.layers[i].w.grad.zero_()
self.layers[i].b.grad.zero_()
class FullyConnectedLayer(object):#全连接层
def __init__(self,n_in,n_out):#全连接层初始化
self.n_in=n_in#输入数量
self.n_out=n_out#输出数量
'''self.w=Var(tch.from_numpy(np.random.normal(loc=0.0,
scale=1/np.sqrt(n_in),
size=(n_in,n_out))))'''
self.w=Var(tch.from_numpy(np.random.randn(n_in,n_out)/np.sqrt(n_in)))#权重初始化
self.w.requires_grad=True#允许求导
self.w.retain_grad()#非叶子节点保留梯度
'''self.b=Var(tch.from_numpy(np.random.normal(loc=0.0,
scale=n_out,size=(1,n_out))))'''
self.b=Var(tch.from_numpy(np.random.randn(1,n_out)))
self.b.requires_grad=True
self.b.retain_grad()
def forward(self,inpt):#前向传播
self.inpt=inpt.view(1,self.n_in)#对齐输入格式
self.output=self.activation_fn(\
tch.mm(self.inpt,self.w)+self.b)#输出
def activation_fn(self,z):#激活函数sigmoid
return 1/(1+tch.exp(-z))
def cost_fn(self,y):#二次代价函数
return (((y-self.output)**2)/2).sum()
class SoftmaxLayer(object):#柔性最大层
def __init__(self,n_in,n_out):#初始化
self.n_in=n_in
self.n_out=n_out
self.w=Var(tch.from_numpy(np.random.randn(n_in,n_out)/np.sqrt(n_in)))
self.w.requires_grad=True
self.w.retain_grad()
self.b=Var(tch.from_numpy(np.random.randn(1,n_out)))
self.b.requires_grad=True
self.b.retain_grad()
def forward(self,inpt):
self.inpt=inpt.view(1,self.n_in)
self.output=self.activation_fn(\
tch.mm(self.inpt,self.w)+self.b)
def activation_fn(self,z):#激活函数柔性最大值函数
return tch.exp(z)/(tch.exp(z).sum())
def cost_fn(self,y):#代价函数对数似然函数与柔性最大值匹配使用
return -(y*tch.log(self.output)).sum()
class ConvPoolLayer(object):#卷积层
def __init__(self,image,kernel):
self.image=image#输入图片格式
self.kernel=kernel#定义卷积格式
#初始化权重格式为(维度,卷积核大小)
self.w=Var(tch.randn(size=(kernel.d,kernel.s,kernel.s),dtype=tch.double)/np.sqrt(image.h*image.w))
self.w.requires_grad=True
self.w.retain_grad()
#初始化偏置格式(维度)
self.b=Var(tch.randn(size=(kernel.d,1),dtype=tch.double))
self.b.requires_grad=True
self.b.retain_grad()
def forward(self,inpt):#前向传播
#卷积层大小计算
self.conv_h=self.image.h-self.kernel.s+1
self.conv_w=self.image.w-self.kernel.s+1
#定义卷积层
self.conv=tch.zeros(self.kernel.d,int(self.conv_h),int(self.conv_w),dtype=tch.double)
#计算池化层大小
self.GetPoolSize()
#定义池化层
self.output=tch.zeros(self.kernel.d,int(self.pool_h),int(self.pool_w),dtype=tch.double)
#对齐输入格式
self.inpt=inpt.view(self.image.h,self.image.w)
#卷积运算得出卷积层
for i in range(self.conv_h):
for j in range(self.conv_w):
for dep in range(self.kernel.d):
self.conv[dep,i,j]=self.activation_fn(tch.mul\
(self.inpt[i:i+self.kernel.s,j:j+self.kernel.s],
self.w[dep]).sum()+self.b[dep])
#卷积层补零用于池化
self.conv=F.pad(self.conv,(0,int(self.pool_w*self.kernel.p-self.conv_w),
0,int(self.pool_h*self.kernel.p-self.conv_h)),"constant",0)
#最大池化
for i in range(self.pool_h):
for j in range(self.pool_w):
for dep in range(self.kernel.d):
self.output[dep,i,j]=self.conv[dep,i*self.kernel.p:i*self.kernel.p+self.kernel.p,
j*self.kernel.p:j*self.kernel.p+self.kernel.p].max()
#计算池化层大小
def GetPoolSize(self):
h=self.conv_h%self.kernel.p
w=self.conv_w%self.kernel.p
self.pool_h=int(self.conv_h/self.kernel.p) if (h==0)\
else int(self.conv_h//self.kernel.p)+1
self.pool_w=int(self.conv_w/self.kernel.p) if (w==0)\
else int(self.conv_w//self.kernel.p)+1
def activation_fn(self,z):#激活函数sigmoid
return 1/(1+tch.exp(-z))
def convert(number):#将期望输入转换为验证格式
y=tch.zeros(1,10,dtype=tch.double)
for i in range(0,10):
if (i==number):
y[0][i]=1
return y
class Image_shape(object):#定义输入图片格式
def __init__(self,channels,height,width):
self.c=channels#通道
self.h=height
self.w=width
class Kernel_shape(object):#定义卷积核格式
def __init__(self,size,depth,pool_size):
self.s=size#大小
self.d=depth#维度
self.p=pool_size#池化大小
def main():
train_data,test_data=load.main()#获取数据
eta=0.5 #学习率
mini_batch_size=10 #小样本数量
epoch=30 #迭代期
lamda=0.05 #L1正则化参数
eta_decay=0.96 #学习率基数
#未调用卷积层,按以上超参数正确率为96.55%
'''net=Network([FullyConnectedLayer(784,30),
SoftmaxLayer(30,10)])
net.SGD(train_data,test_data,mini_batch_size,epoch,eta,lamda,eta_decay)'''
#加入卷积层,计算量过大无法全部训练,待优化
net=Network([ConvPoolLayer(Image_shape(1,28,28),Kernel_shape(5,1,2)),
#FullyConnectedLayer(1*12*12,30),
SoftmaxLayer(1*12*12,10)])
for i in range(10):
print('round {0}:'.format(i))
random.shuffle(train_data)
random.shuffle(test_data)
net.SGD(train_data[0:1000],test_data[0:100],mini_batch_size,epoch,eta,lamda,eta_decay)
main()
遇到的问题:
pytorch计算图:
1.tensor定义时需设置requires_grad=True,允许求导
2.计算图中间的非叶子节点不保留梯度(grad),可调用tensor.retain_grad()进行保留
3.计算图求导后,用于求导的子图会释放(free),需设置tensor.backward(retain_graph=True)保留图(由于loss每次更新,网络中无需用到)
4.当权重与偏置为tensor,在用grad更新后,会在计算图中形成环(个人理解)。需将tensor类型改为Variable类型,用Variable.data更新数值,将数值与图分离
卷积池化层:
1.卷积运算需三重循环(长、宽、维度)遍历图片,前向传播速度,应该可以用矩阵运算代替遍历(待优化)
2.加入卷积层后,学习速度明显下降并且占用较大内存(但可在较小的样本上训练并有效果),将权重、偏置及每层输出加入cude效果不明显(待优化)
3.loss.backward()对最大值池化max()函数进行自动求导未验证是否正确