import sys ,os
sys.path.append(os.pardir)
from common.layers import *#layers保存神经网络的层的有序字典/155
from common.gradient import numerical_gradient#梯度
from collections import OrderedDict
class TwolayeNet:
def _init_(self,input_size,hidden_size,output_size
weight_init_std=0.01):#初始化了输入输出隐藏层神经
self.params = {}#空字典保存神经网络的参数的字典型变量
self.params['w1']=weight_init_std*\
np.random.randn(input_size,hidden_size)#高斯分布初始化权重
self.params['b1']=np.zeros(hidden_size)#偏置进行初始化为0
self.params['w2']=weight_init_std*\
np.random.randn(hidden_size,output_size)
self.params['b2']=np.zeros(output_size)
#生成层
self.layers = OrderedDict()#是个有序字典,可以记住字典添加元素的顺序
self.layers['Affine1'] = \Affine(self.params['w1'],self.params['b1'])
self.layers['Relu1'] = Relu()
self.layers['Affine2'] = \Affine(self.params['w2'],self.params['b2'])
self.lastLayer = SoftmaxWithLoss()
#所以神经网络正传播可以添加元素顺序调用各自层的forward()函数,反向传播则反过来就行
def predict(self,x): #传新参数和 x 给它进行前向推理
for layer in self.layers.values():#这个values 函数?? 目前理解是从字典layers中循环调用层
x = layer.forward(x)#调用这些层的前向处理
return x
def loss(self,x,t):
y = self.predict(self,x)
return self.lastLayer.forward(y,t)#损失函数
def accuracy(self,x,t):
y = self.predict(self,x)
y = np.argmax(y,axis = 1)
if t.ndim!=1:
t = np.argmax(t,axis = 1)
accuracy = np.sum=(y==t)/float(x.shape[0])
return accuracy
def numerical_gradient(self,x,t):#数值微分求梯度
loss_w = lambda w:self.loss(x,t)#这个函数里面会计算loss函数,并计算各个参数对损失函数的梯度
grads={}#保存的梯度值
grads['w1']= numerical_gradient(loss_w,self.params['w1'])
grads['b1']= numerical_gradient(loss_w,self.params['b1'])
grads['w2']= numerical_gradient(loss_w,self.params['w2'])
grads['b2']= numerical_gradient(loss_w,self.params['b2'])
def gradient(self,x,t):
self.loss(x,t)
dout = 1
dout = self.lastLayer.backward(dout)#softmaxwithloss 的反向传播,得到差分(numpy)
layers = list(self.layers.values())#生成各个层名字的列表
layers.reverse()#进行倒序
for layer in layers:#循环调用
dout = layer.backward(dout)#反向传播
grads={}#保存的梯度值
grads['w1']= self.layers['Affine1'].dw#参数更新
grads['b1']= self.layers['Affine1'].db
grads['w2']= self.layers['Affine2'].dw
grads['b2']= self.layers['Affine2'].db
return grads
——————————————————————————————————————————————————————————————————————————————
#SGD的实现(当函数是个非均匀的图形,则这种效率10分的低,因为梯度方向)
class SGD:
def _init_(self,lr = 0.01):
self.lr = lr
def update(self,params,grads):#权重和参数
for i in params[keys]:
params[key]-=self.lr*grads[key]
#Momentum 由于参数的梯度变化受到历史速度的影响,因此水平方向的梯度变化由于其历史速度的累加而不断加速,从而能够更快地到达最优点,最终加速整个训练过程的收敛。
class Momentum:
def _init_(self,lr =0.01,momentum = 0.9):
self.lr = lr
self.momentum = mometum
self.v = None
def update(self,params,grads):
if self.v is None:
self.v = {}
for key,val in params.items():
self.v[key] = np.zero_like(val)
for key in params[key]:
self.v[key] =self.momentum*self.v[key]-self.lr*grads[key]
params[key] += self.v[key]
#AdaGrad 添加了学习率的变化的学习算法
class AdaGrad:
def _init_(self,lr = 0):
self.lr = lr
self.h = None
def update (self,params,grads):
if self.h is None:
self.h = {}
for key,val in params.items():
self.h[key] = np.zero_like(val)
for key in params.keys():
self.h[key] +=grads[key]*grads[key]#以梯度为基础累加学习率
params[key] -= self.lr*grads[key]/(np.sqrt(self.h[key])+le-7)#学习率作为分母,这就意味梯度变化大,学习率变小
#权重初始
w = np.random.randn(node_num,node_num)/np.sqrt(node_num)
#标准差
如果激活函数用的是Relu则应根号2/n
#Batch Normalization 算法,正规化矩阵传来的数据,使得神经网络的学习不用过于依赖权重初始值
#Dropout 算法抑制过拟合,会使训练数据和测试数据的识别精度差距缩小
class Dropout:
def _init_(self,dropout_ratio = 0.5):
self.dropout_ratio = dropout_ratio
self.mask = None
def forword(self,x,train_flg = Ture):
if train_flg:
self.mask = np.randm.rand(*x.shape)<self.dropout_ratio
return x*self.mask
else:
return x*(1.0-self.dropout_ratio)
def backward(self,dout):
return dout*self.mask
————————————————————————————————————————————————————————————————————————————————
#卷积神经网络:因为全连接会忽视输入数据的形状,而CNN不会
#使用填充主要是要为了调整输出数据的大小
class Convolution:
def _init_(self,w,b,stride = 1,pad = 0):
self.w = w
self.b = b
self.stride = stride#步幅
self.pad = pad#填充
def forward(self,x):
FN,C,FH,FW = self.w.shape#FN 是滤波器的数量,C是通道
N,C,H,W = x.shape#原始数据的形状
out_h = int(1+(H+2*self.pad-FH)/self.stride)#计算完后的一个形状
out_w = int(1+(W+2*self.pad-FW)/self.stride)
col = im2col(x,FH,FW,self.stride,self.pad)#im2col有把数据弄成2维形状的数据
col_w = self.w.reshape(FN,-1).T#reshape将参数指定-1,这是reshape的一个便利功能,会自动计算-1维度上的元素个数,以使多维数组元素个数前后一致
#对滤波器进行展开
out = np.dot(col,col_w)+self.b#形成新的数据块
out = out.reshape(N,out_h,out_w,-1),transpose(0,3,1,2)#transpose为更改多维数组轴的顺序
return out
#池化层的实现
class Pooling:
def _init_(self,pool_h,pool_w,stride =1,pad = 0 ) def _init_(self,w,b,stride = 1,pad = 0):
self.pool_w = w
self.pool_h= h
self.stride = stride#步幅
self.pad = pad#填充
def forward(self,x):
N,C,H,W = x.shape#原始数据的形状
out_h = int(1+(H-self.pool_h)/self.stride)#计算完后的一个形状
out_w = int(1+(W-self.pool_w)/self.stride)
col = im2col(x,self.pool_h,self.stride,self.pad)#im2col有把数据弄成2维形状的数据
col = col.reshape(-1,self.pool_h*self.pool_w)
out = np.max(col,axit = 1)
out = out.reshape(N,out_h,out_w,C).transpose(0,3,1,2)#transpose为更改多维数组轴的顺序
return out
#完成CNN实现
class SimpleConveNet:
def _init(self,input_dim = (1,28,28),cinv_param = {'filter_num':30,'filter_size0':5,'pad':0,'stride':1}
(hidden_size = 100,output_size=10,weight_init_std=0.01):#放超参数
filter_num = conv_param['fliter_num']
filter_size = conv_param['filter_size']
filter_pad = conv_param['pad']
filter_stride = conv_param['stride']
input_size = input_dim[1]
conv_output_size = (input_size-filter_size+2*filter_pad)/\filter_stride+1#计算卷积层的大小
pool_output_size = int()fliter_num*(conv_output_size/2)*(con_output_size/2)*(conv_output_size/2)
self.params = {}#空字典保存神经网络的参数的字典型变量
self.params['w1']=weight_init_std*\
np.random.randn(filter_num,input_dim[0],filter_size,filter_size)#高斯分布初始化权重
self.params['b1']=np.zeros(filter_num)#偏置进行初始化为0
self.params['w2']=weight_init_std*\
np.random.randn(pool_output_size,hidden_size)
self.params['b2']=np.zeros(hidden_size)
self.params['w3']=weight_init_std*\#初始化时权重的标准差
np.random.randn(hidden_size,output_size)
self.params['b3']=np.zeros(output_size)
#第一层权重用来卷积层,第二用来池化,第三用全连接
self.layers = OrderedDict()#是个有序字典,可以记住字典添加元素的顺序
self.layer['Conv1'] = Convolution(self.params['w1'],
self.params['b1'],
conv_param['stride'],
conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['pool'] = Pooling(pool_h = 2,poo;_w = 2,stride=2)
self.layers['Affine1'] = Affine(self.params['w2'],self.params['b2'])
self.layers['Relu1'] = Relu()
self.layers['Affine2'] = \Affine(self.params['w3'],self.params['b3'])
self.lastLayer = SoftmaxWithLoss()
def predict(self,x): 传新参数和 x 给它进行前向推理
for layer in self.layers.values():#这个values 函数?? 目前理解是从字典layers中循环调用层
x = layer.forward(x)#调用这些层的前向处理
return x
def loss(self,x,t):
y = self.predict(x)
return self.lastLayer.forward(y,t)#损失函数
def gradient(self,x,t):
self.loss(x,t)
dout = 1
dout = self.lastLayer.backward(dout)#softmaxwithloss 的反向传播,得到差分(numpy)
layers = list(self.layers.values())#生成各个层名字的列表
layers.reverse()#进行倒序
for layer in layers:#循环调用
dout = layer.backward(dout)#反向传播
grads={}#保存的梯度值
grads['w1']= self.layers['Conv1'].dw#参数更新
grads['b1']= self.layers['Conv1'].db
grads['w2']= self.layers['Affine1'].dw
grads['b2']= self.layers['Affine1'].db
grads['w2']= self.layers['Affine2'].dw
grads['b2']= self.layers['Affine2 '].db
return grads