神经网络与深度学习入门记录003——基础网络优化代码解析

基础网络优化

增加优化内容:
1、二次代价函数➡交叉熵代价函数
2、权重初始化:标准高斯分布➡标准差为1/sqrt(n)的高斯分布
3、增加L2规范化
4、增加数据记录
5、增加网络本地保存与载入

import json
import random
import sys
import mnist_loader
import numpy as np
class QuadraticCost(object):#二次代价函数及其对z的偏导δ
    @staticmethod
    def fn(a, y):
        return 0.5*np.linalg.norm(a-y)**2
    @staticmethod
    def delta(z, a, y):
        return (a-y) * sigmoid_prime(z)
class CrossEntropyCost(object):#交叉熵代价函数及其对z偏导δ
    @staticmethod
    def fn(a, y):
        return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
    @staticmethod
    def delta(z, a, y):
        return (a-y)
class Network(object):
    def __init__(self, sizes, cost=CrossEntropyCost):   #初始化网络
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.default_weight_initializer()
        self.cost=cost
    def default_weight_initializer(self):#权重标准差为1/sqrt(n)的分布
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)/np.sqrt(x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]
    def large_weight_initializer(self):#权重标准高斯分布初始化
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]
    def feedforward(self, a):   #计算输出层激活值
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    def SGD(self, training_data, epochs, mini_batch_size, eta,
            lmbda = 0.0,        #labma为L2规范化参数
            evaluation_data=None,   #测试数据
            monitor_evaluation_cost=False,  #测试总代价记录标志位
            monitor_evaluation_accuracy=False,  #测试准确率标志位
            monitor_training_cost=False,    #训练总代价记录标志位
            monitor_training_accuracy=False):   #训练准确率标志位
        if evaluation_data: n_data = len(evaluation_data)   #记录验证数据长度
        n = len(training_data)              #训练数据长度
        evaluation_cost, evaluation_accuracy = [], []
        training_cost, training_accuracy = [], []
        for j in range(epochs):     #迭代期
            random.shuffle(training_data)       #打乱训练数据
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]      #训练数据分组
            for mini_batch in mini_batches:     #遍历每组训练数据
                self.update_mini_batch(
                    mini_batch, eta, lmbda, len(training_data))     #更新权重和偏置
            print ("Epoch %s training complete" % j)
            #以下为各类数据记录
            if monitor_training_cost:    #训练数据代价计算
                cost = self.total_cost(training_data, lmbda)
                training_cost.append(cost)      #储存每个迭代期训练数据总代价
                print ("Cost on training data: {}".format(cost))
            if monitor_training_accuracy:       #训练数据准确率
                accuracy = self.accuracy(training_data, convert=True)
                training_accuracy.append(accuracy)      #储存每个迭代期准确量
                print ("Accuracy on training data: {} / {}".format(
                    accuracy, n))           #训练数据准确率
            if monitor_evaluation_cost:     #测试数据代价计算
                cost = self.total_cost(evaluation_data, lmbda, convert=True)
                evaluation_cost.append(cost)    #储存每个迭代期测试数据总代价
                print ("Cost on evaluation data: {}".format(cost))
            if monitor_evaluation_accuracy:     #测试数据准确率
                accuracy = self.accuracy(evaluation_data)       
                evaluation_accuracy.append(accuracy)    #储存测试数据准确量
                print ("Accuracy on evaluation data: {} / {}".format(
                    self.accuracy(evaluation_data), n_data))    #测试数据准确率
            print
        return evaluation_cost, evaluation_accuracy, \
            training_cost, training_accuracy
    def update_mini_batch(self, mini_batch, eta, lmbda, n):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]     #创建累加偏导数组
        for x, y in mini_batch:     #遍历单个数据组
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)  #反向传播求合适偏导
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]   #累加偏导
        self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]    #(1-eta*(lmbda/n))*w:L2规范化后的权重更新
        self.biases = [b-(eta/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]      #偏置更新
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]     #储存网络偏导
        activation = x      #activation当前层激活值  
        activations = [x]   #储存每一层激活值
        zs = []             #储存带权输入z
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b         #计算每层带权输入z
            zs.append(z)                        #储存每层带权输入z
            activation = sigmoid(z)             #计算当前层激活值
            activations.append(activation)      #储存当前层激活值
        delta = (self.cost).delta(zs[-1], activations[-1], y)   #计算输出层代价函数对z的偏导(反向传播公式1)
        nabla_b[-1] = delta         #储存输出层代价函数对b的偏导=δ
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())    #储存输出层代价函数对w的偏导(规范化的项在更新网络权重时体现)
        for i in range(2, self.num_layers):     #计算每层偏导
            z = zs[-i]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-i+1].transpose(), delta) * sp #反向传播公式2
            nabla_b[-i] = delta         #储存每层代价函数对b的偏导(反向传播公式3)
            nabla_w[-i] = np.dot(delta, activations[-i-1].transpose())      #储存每层代价函数对w的偏导(反向传播公式4)
        return (nabla_b, nabla_w)
    def accuracy(self, data, convert=False):    
        if convert:#训练数据时
            results = [(np.argmax(self.feedforward(x)), np.argmax(y))   #计算实际输出与期望输出
                       for (x, y) in data]
        else:#测试数据时
            results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)   #计算准确量
    def total_cost(self, data, lmbda, convert=False):   #累加每组数据的代价,总代价
        cost = 0.0
        for x, y in data:
            a = self.feedforward(x)     #a为输出层激活值
            if convert: y = vectorized_result(y)    #将期望值转换格式(测试数据中的期望输出与训练数据格式不同)
            cost += self.cost.fn(a, y)/len(data)    #累加小批量数据代价(/len)
        cost += 0.5*(lmbda/len(data))*sum(
            np.linalg.norm(w)**2 for w in self.weights)     #累加规范化项C=Co+L2
        return cost
    def save(self, filename):   #储存当前网络信息
        data = {"sizes": self.sizes,
                "weights": [w.tolist() for w in self.weights],
                "biases": [b.tolist() for b in self.biases],
                "cost": str(self.cost.__name__)}
        f = open(filename, "w")
        json.dump(data, f)
        f.close()
def load(filename):     #载入本地网络
    f = open(filename, "r")
    data = json.load(f)
    f.close()
    cost = getattr(sys.modules[__name__], data["cost"])
    net = Network(data["sizes"], cost=cost)
    net.weights = [np.array(w) for w in data["weights"]]
    net.biases = [np.array(b) for b in data["biases"]]
    return net
def vectorized_result(j):   #将测试数据期望值转换为(10,1)数组格式
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e
def sigmoid(z):     #逻辑函数
    return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z):   #逻辑函数导数
    return sigmoid(z)*(1-sigmoid(z))
def main():
    training_data,validation,test_data=mnist_loader.load_data_wrapper()
    net=Network([784,30,10],cost=CrossEntropyCost)#CrossEntropyCost为代价函数的类
    net.large_weight_initializer()#权重初始化
    net.SGD(list(training_data),30,10,0.5,evaluation_data=list(test_data),monitor_evaluation_accuracy=True)
main()

参考文献:
Michael A.Nielsen,“Neural Network and Deep Learning”,Determination Press,2015

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值