基础网络优化
增加优化内容:
1、二次代价函数➡交叉熵代价函数
2、权重初始化:标准高斯分布➡标准差为1/sqrt(n)的高斯分布
3、增加L2规范化
4、增加数据记录
5、增加网络本地保存与载入
import json
import random
import sys
import mnist_loader
import numpy as np
class QuadraticCost(object):#二次代价函数及其对z的偏导δ
@staticmethod
def fn(a, y):
return 0.5*np.linalg.norm(a-y)**2
@staticmethod
def delta(z, a, y):
return (a-y) * sigmoid_prime(z)
class CrossEntropyCost(object):#交叉熵代价函数及其对z偏导δ
@staticmethod
def fn(a, y):
return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
@staticmethod
def delta(z, a, y):
return (a-y)
class Network(object):
def __init__(self, sizes, cost=CrossEntropyCost): #初始化网络
self.num_layers = len(sizes)
self.sizes = sizes
self.default_weight_initializer()
self.cost=cost
def default_weight_initializer(self):#权重标准差为1/sqrt(n)的分布
self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
self.weights = [np.random.randn(y, x)/np.sqrt(x)
for x, y in zip(self.sizes[:-1], self.sizes[1:])]
def large_weight_initializer(self):#权重标准高斯分布初始化
self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
self.weights = [np.random.randn(y, x)
for x, y in zip(self.sizes[:-1], self.sizes[1:])]
def feedforward(self, a): #计算输出层激活值
for b, w in zip(self.biases, self.weights):
a = sigmoid(np.dot(w, a)+b)
return a
def SGD(self, training_data, epochs, mini_batch_size, eta,
lmbda = 0.0, #labma为L2规范化参数
evaluation_data=None, #测试数据
monitor_evaluation_cost=False, #测试总代价记录标志位
monitor_evaluation_accuracy=False, #测试准确率标志位
monitor_training_cost=False, #训练总代价记录标志位
monitor_training_accuracy=False): #训练准确率标志位
if evaluation_data: n_data = len(evaluation_data) #记录验证数据长度
n = len(training_data) #训练数据长度
evaluation_cost, evaluation_accuracy = [], []
training_cost, training_accuracy = [], []
for j in range(epochs): #迭代期
random.shuffle(training_data) #打乱训练数据
mini_batches = [
training_data[k:k+mini_batch_size]
for k in range(0, n, mini_batch_size)] #训练数据分组
for mini_batch in mini_batches: #遍历每组训练数据
self.update_mini_batch(
mini_batch, eta, lmbda, len(training_data)) #更新权重和偏置
print ("Epoch %s training complete" % j)
#以下为各类数据记录
if monitor_training_cost: #训练数据代价计算
cost = self.total_cost(training_data, lmbda)
training_cost.append(cost) #储存每个迭代期训练数据总代价
print ("Cost on training data: {}".format(cost))
if monitor_training_accuracy: #训练数据准确率
accuracy = self.accuracy(training_data, convert=True)
training_accuracy.append(accuracy) #储存每个迭代期准确量
print ("Accuracy on training data: {} / {}".format(
accuracy, n)) #训练数据准确率
if monitor_evaluation_cost: #测试数据代价计算
cost = self.total_cost(evaluation_data, lmbda, convert=True)
evaluation_cost.append(cost) #储存每个迭代期测试数据总代价
print ("Cost on evaluation data: {}".format(cost))
if monitor_evaluation_accuracy: #测试数据准确率
accuracy = self.accuracy(evaluation_data)
evaluation_accuracy.append(accuracy) #储存测试数据准确量
print ("Accuracy on evaluation data: {} / {}".format(
self.accuracy(evaluation_data), n_data)) #测试数据准确率
print
return evaluation_cost, evaluation_accuracy, \
training_cost, training_accuracy
def update_mini_batch(self, mini_batch, eta, lmbda, n):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights] #创建累加偏导数组
for x, y in mini_batch: #遍历单个数据组
delta_nabla_b, delta_nabla_w = self.backprop(x, y) #反向传播求合适偏导
nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] #累加偏导
self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw
for w, nw in zip(self.weights, nabla_w)] #(1-eta*(lmbda/n))*w:L2规范化后的权重更新
self.biases = [b-(eta/len(mini_batch))*nb
for b, nb in zip(self.biases, nabla_b)] #偏置更新
def backprop(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights] #储存网络偏导
activation = x #activation当前层激活值
activations = [x] #储存每一层激活值
zs = [] #储存带权输入z
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation)+b #计算每层带权输入z
zs.append(z) #储存每层带权输入z
activation = sigmoid(z) #计算当前层激活值
activations.append(activation) #储存当前层激活值
delta = (self.cost).delta(zs[-1], activations[-1], y) #计算输出层代价函数对z的偏导(反向传播公式1)
nabla_b[-1] = delta #储存输出层代价函数对b的偏导=δ
nabla_w[-1] = np.dot(delta, activations[-2].transpose()) #储存输出层代价函数对w的偏导(规范化的项在更新网络权重时体现)
for i in range(2, self.num_layers): #计算每层偏导
z = zs[-i]
sp = sigmoid_prime(z)
delta = np.dot(self.weights[-i+1].transpose(), delta) * sp #反向传播公式2
nabla_b[-i] = delta #储存每层代价函数对b的偏导(反向传播公式3)
nabla_w[-i] = np.dot(delta, activations[-i-1].transpose()) #储存每层代价函数对w的偏导(反向传播公式4)
return (nabla_b, nabla_w)
def accuracy(self, data, convert=False):
if convert:#训练数据时
results = [(np.argmax(self.feedforward(x)), np.argmax(y)) #计算实际输出与期望输出
for (x, y) in data]
else:#测试数据时
results = [(np.argmax(self.feedforward(x)), y)
for (x, y) in data]
return sum(int(x == y) for (x, y) in results) #计算准确量
def total_cost(self, data, lmbda, convert=False): #累加每组数据的代价,总代价
cost = 0.0
for x, y in data:
a = self.feedforward(x) #a为输出层激活值
if convert: y = vectorized_result(y) #将期望值转换格式(测试数据中的期望输出与训练数据格式不同)
cost += self.cost.fn(a, y)/len(data) #累加小批量数据代价(/len)
cost += 0.5*(lmbda/len(data))*sum(
np.linalg.norm(w)**2 for w in self.weights) #累加规范化项C=Co+L2
return cost
def save(self, filename): #储存当前网络信息
data = {"sizes": self.sizes,
"weights": [w.tolist() for w in self.weights],
"biases": [b.tolist() for b in self.biases],
"cost": str(self.cost.__name__)}
f = open(filename, "w")
json.dump(data, f)
f.close()
def load(filename): #载入本地网络
f = open(filename, "r")
data = json.load(f)
f.close()
cost = getattr(sys.modules[__name__], data["cost"])
net = Network(data["sizes"], cost=cost)
net.weights = [np.array(w) for w in data["weights"]]
net.biases = [np.array(b) for b in data["biases"]]
return net
def vectorized_result(j): #将测试数据期望值转换为(10,1)数组格式
e = np.zeros((10, 1))
e[j] = 1.0
return e
def sigmoid(z): #逻辑函数
return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z): #逻辑函数导数
return sigmoid(z)*(1-sigmoid(z))
def main():
training_data,validation,test_data=mnist_loader.load_data_wrapper()
net=Network([784,30,10],cost=CrossEntropyCost)#CrossEntropyCost为代价函数的类
net.large_weight_initializer()#权重初始化
net.SGD(list(training_data),30,10,0.5,evaluation_data=list(test_data),monitor_evaluation_accuracy=True)
main()
参考文献:
Michael A.Nielsen,“Neural Network and Deep Learning”,Determination Press,2015