神经网络的层参数维度(14)---《深度学习》

我们了解神经网络中的梯度下降算法,反向传播算法,损失函数等等,现在我们参考Michael Nielsen实现自己的神经网络构建和梯度下降算法和反向传播算法等等的实现!

1)mnist数据集的读取:

import cPickle
import gzip

# Third-party libraries
import numpy as np

def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.

    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('../data/mnist.pkl.gz', 'rb')
    training_data, test_data = cPickle.load(f)
    f.close()
    return (training_data,  test_data)

def load_data_wrapper():
    """Return a tuple containing ``(training_data, validation_data,
    test_data)``. Based on ``load_data``, but the format is more
    convenient for use in our implementation of neural networks.

    In particular, ``training_data`` is a list containing 50,000
    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
    containing the input image.  ``y`` is a 10-dimensional
    numpy.ndarray representing the unit vector corresponding to the
    correct digit for ``x``.

    ``validation_data`` and ``test_data`` are lists containing 10,000
    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
    numpy.ndarry containing the input image, and ``y`` is the
    corresponding classification, i.e., the digit values (integers)
    corresponding to ``x``.

    Obviously, this means we're using slightly different formats for
    the training data and the validation / test data.  These formats
    turn out to be the most convenient for use in our neural network
    code."""
    tr_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = zip(training_inputs, training_results)
    #validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    #validation_data = zip(validation_inputs, va_d[1])
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = zip(test_inputs, te_d[1])
    return (training_data, test_data)

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

2)神经网络和梯度下降算法的实现:

#-*-coding=utf-8-*-
import numpy as np
import tensorflow as tf
import random
import mnist_loader
#sizes=[3,4,3]
#w=[[4*3],[3*4]]
#b=[[4*1],[3*1]]
#a=[[4,],[3,]]
#z=[[4,],[3,]]

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

class Network(object):
    def __init__(self,sizes):
        self.num_layers=len(sizes)
        self.sizes=sizes
        self.weights=[np.random.rand(j,i) for i,j in zip(sizes[:-1],sizes[1:])]
        self.biases=[np.random.rand(j,1) for j in sizes[1:]]

    def feedforward(self,a):
        for w,b in zip(self.weights,self.biases):
            a=sigmoid(np.dot(w,a)+b)
        return a
    def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):
        if test_data:
            n_test=len(test_data)
        n=len(training_data)

        for j in xrange(epochs):
            random.shuffle(training_data)
            mini_batches=[training_data[k:k+mini_batch_size]
                for k in xrange(0,n,mini_batch_size)]
            #print(mini_batches)
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch,eta)

            if test_data:
                print("Epoch:{0},acc:{1}/{2}".format(j,self.evaluate(test_data),n_test))
            else:
                print("finish")
        #return (self.weights,self.biases)

    def update_mini_batch(self,mini_batch,eta):
        deriv_w_mb=[np.zeros(w.shape) for w in self.weights]
        deriv_b_mb=[np.zeros(b.shape) for b in self.biases]
        for x,y in mini_batch:
            deriv_w,deriv_b=self.backprop(x,y)

            deriv_w_mb=[w+dw for w,dw in zip(deriv_w_mb,deriv_w)]
            deriv_b_mb=[b+db for b,db in zip(deriv_b_mb,deriv_b)]

        self.weights=[w-(eta/len(mini_batch))*nw 
            for w,nw in zip(self.weights,deriv_w_mb)]
        self.biases=[b-(eta/len(mini_batch))*nb
            for b,nb in zip(self.biases,deriv_b_mb)]

    def backprop(self,x,y):
        deriv_b=[np.zeros(b.shape) for b in self.biases]
        deriv_w=[np.zeros(w.shape) for w in self.weights]

        a=x
        a_arr=[x]
        z_arr=[]
        for w,b in zip(self.weights,self.biases):
            #print(np.dot(w,a).shape,b.shape)
            z=np.dot(w,a)+b
            z_arr.append(z)
            a=sigmoid(z)
            a_arr.append(a)

        #print(a_arr[-1].shape)
        delta=(a_arr[-1]-y)*sigmoid_prime(z_arr[-1])

        deriv_b[-1]=delta
        deriv_w[-1]=np.dot(delta,np.transpose(a_arr[-2]))

        for l in xrange(2,self.num_layers):
            z=z_arr[-l]


            delta=np.dot(np.transpose(self.weights[-l+1]),delta)*sigmoid_prime(z)
            #print(delta.shape,a_arr[-l-1].shape)
            deriv_b[-l]=delta
            deriv_w[-l]=np.dot(delta,np.transpose(a_arr[-l-1]))

        return (deriv_w,deriv_b)
    def evaluate(self,test_data):
        test_results=[(np.argmax(self.feedforward(x)),y) 
            for (x,y) in test_data]
        #return sum(int(x == y) for (x, y) in test_results)
        return sum(int(x==y) for (x,y) in test_results)

if __name__=="__main__":
    sizes=[784,30,10]
    training_data,test_data=mnist_loader.load_data_wrapper()
    print(training_data[0][0].shape,training_data[0][1].shape)
    net=Network(sizes)
    net.SGD(training_data,10,20000,0.2,test_data)
    '''
    #the model (len(n1_node)*1) format to start with
    sizes=[3,4,5,6,7,3]
    training_data=[([[1],[2],[3]],[[1],[0],[0]]),
        ([[4],[5],[6]],[[0],[1],[0]]),
        ([[7],[8],[9]],[[0],[0],[1]])]
    test_data=[([[10],[11],[12]],0),
                ([[13],[14],[15]],1),
                ([[16],[17],[18]],2)]
    #def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):
    net=Network(sizes)
    w,b=net.SGD(training_data,5,1,0.002,test_data)

    '''
    '''x=[1,2,3]
    x=np.expand_dims(x,1)
    y=np.random.rand(3)
    y=np.expand_dims(y,1)
    z=[(x,y)]
    net1=Network(sizes)
    (w,b)=net1.backprop(z[0][0],z[0][1])

    print(w[0].shape,w[1].shape)
    print(w[0],w[1])
    print(b[0].shape,b[1].shape)
    print(b[0],b[1])'''

如下,我们搭建了一个3层网络,然后第一层的输入为(3,1),第一层的权重为(4,3),第二层的输入为(4,1),第二层的权重为(3,4),第三层为(3,1),这是我们的实现方式!

ps:注意这儿我们并没有让第一层的输入为(3,),只是为了在计算偏导的时候可以方便求解,利用矩阵就可以进行处理而已,并不代表不能输入(3,),也是可以的,只是计算起来比较复杂而已!
这里写图片描述

ps:(3,)和(4,)两个ndarray进行np.dot运算:
这里写图片描述
可以看到这种结构无法进行矩阵运算,因此也就无法进行权重w偏导矩阵的求解或者偏置b矩阵的求解,因此注意我们送入网络中训练的数据的维度和权重维度!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值