nndl手写数字识别

理解反向传播
1
2
随机梯度下降
在这里插入图片描述
计算梯度——反向传播
在这里插入图片描述

import random
import minset_loader
import numpy as np


class Network(object):
    '''神经网络

    sizes: 表示各层神经元的数量,[2,3,1]
    '''
    def __init__(self, sizes):
        # 网络层数
        self.num_layers = len(sizes)
        # 每层神经元的个数
        self.sizes = sizes
        # 偏置列表,从第二层到最后一层,每层正态生成一个y行1列的偏置矩阵
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        # 权值列表,从第二层到最后一层,每层正态生成一个y行x列的权重矩阵,y表示该层神经元个数,x表示下层神经元的个数
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        '''对于一个给定的输入,得出输出值

        :param a:输入的数据,n行1列的向量,n是第一层输入层的神经元个数
        :return: 输出值
        '''
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        '''随机梯度下降算法

        :param training_data:数据列表,元素数(x,y),x表示输入,y表示对应的输出
        :param epochs:迭代期数据量,就是用完所有输入进行一次梯度下降的次数
        :param mini_batch_size:采样数据大小
        :param eta:学习率
        :param test_data:测试数据(可选)
        :return:
        '''
        if test_data:
            n_test = len(test_data)
            n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            # 分成小的连续数据集
            mini_batches = [training_data[k:k + mini_batch_size]
                            for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                # 在每一个训练集上进行梯度下降
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test))

            else:
                print("Epoch {0} complete".format(j))

    def update_mini_batch(self, mini_batch, eta):
        '''随机梯度下降

        :param mini_batch: 小数据集
        :param eta: 学习率
        :return:
        '''
        # 初始偏执和权重置0
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        # 梯度平均值
        self.weights = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        '''反向传播算法,求梯度

        :param x:
        :param y:
        :return:
        '''
        # 初始梯度是和b,w维度相同的矩阵
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # 激活值
        activation = x
        activations = [x]  # list to store all the activations, layer by layer
        zs = []  # list to store all the z vectors, layer by layer
        # 向前传播的过程
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # 输出层的误差, 二次函数误差对激活值的梯度*激活值对激活函数内部值(wa+b)的梯度
        delta = self.cost_derivative(activations[-1], y) * \
                sigmoid_prime(zs[-1])#激活函数的导数
        # 所以误差对b的梯度就是误差对激活值的梯度
        nabla_b[-1] = delta
        # 误差对w的梯度还要*上一个激活值
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # 最后一层的计算结束了,还有num_layers-1层需要计算,逐步反向计算
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):  # 评估,
        """Return the number of test inputs for which the neural
        network outputs the correct result. Note that the neural
        network's output is assumed to be the index of whichever
        neuron in the final layer has the highest activation."""
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        return (output_activations - y)


def sigmoid(z):
    """The sigmoid function."""
    return 1.0 / (1.0 + np.exp(-z))


def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z) * (1 - sigmoid(z))

if __name__ == '__main__':
    training_data,validation_data,test_data = minset_loader.load_data_wrapper()
    net = Network([704,38,10])
    net.SGD(training_data,30,10,3.0,test_data=test_data)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值