反向传播是利用解析的方式推导梯度的计算式,但由于NN网络复杂,容易推导错误;数值微分求梯度虽然更耗时,但结果一定基本是正确的;所以我们把两种梯度的值作差观察差距,验证BP计算的正确性。
# GradientCheck.py
# 2层网络,一个隐层,结构:784-50-10
# 梯度确认,比较数值梯度计算值和反向传播梯度计算值的差
# 以验证反向传播法梯度计算的正确性
from dataset.mnist import load_mnist
import numpy as np
from TwoLayerNet import TwoLayerNet
import time
start = time.clock()
# 读入数据
(x_train, t_train), (x_test, t_test) = \
load_mnist(normalize=True, one_hot_label=True)
net = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
x_batch = x_train[:3]
t_batch = t_train[:3] # 获取从第一个元素到索引为3的元素(不包括3),即取前3条输入数据用于验算即可
grad_numerical = net.numerical_gradient(x_batch, t_batch)
grad_backpro = net.gradient(x_batch, t_batch)
# 求出w1, b1, w2, b2的绝对误差的平均值
for key in grad_numerical.keys():
diff = np.average(np.abs(grad_backpro[key] - grad_numerical[key]))
print(key + ':' + str(diff))
end = time.clock()
print('Running Time: %s Seconds' %(end - start))
# TwoLayerNet.py
# 2层网络,1个隐层
# 反向传播法求梯度
import numpy as np
from collections import OrderedDict
# 有序字典,NN的层必须保存为有序字典变量以实现前向反向的依序处理
from BackPropagation import *
# 导入定义affine,relu,softmaxwithloss层的类
# 数值梯度的实现,类内的数值梯度方法需要调用这个方法
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[