神经网络的层参数维度（14）---《深度学习》

最新推荐文章于 2024-09-23 23:24:25 发布

阿华Go

最新推荐文章于 2024-09-23 23:24:25 发布

阅读量2.7k

点赞数

分类专栏：深度学习神经网络文章标签：深度学习神经网络

本文链接：https://blog.csdn.net/u014038273/article/details/78289040

版权

深度学习同时被 2 个专栏收录

44 篇文章

订阅专栏

神经网络

2 篇文章

订阅专栏

我们了解神经网络中的梯度下降算法，反向传播算法，损失函数等等，现在我们参考Michael Nielsen实现自己的神经网络构建和梯度下降算法和反向传播算法等等的实现！

1）mnist数据集的读取：

import cPickle
import gzip

# Third-party libraries
import numpy as np

def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.

    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('../data/mnist.pkl.gz', 'rb')
    training_data, test_data = cPickle.load(f)
    f.close()
    return (training_data,  test_data)

def load_data_wrapper():
    """Return a tuple containing ``(training_data, validation_data,
    test_data)``. Based on ``load_data``, but the format is more
    convenient for use in our implementation of neural networks.

    In particular, ``training_data`` is a list containing 50,000
    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
    containing the input image.  ``y`` is a 10-dimensional
    numpy.ndarray representing the unit vector corresponding to the
    correct digit for ``x``.

    ``validation_data`` and ``test_data`` are lists containing 10,000
    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
    numpy.ndarry containing the input image, and ``y`` is the
    corresponding classification, i.e., the digit values (integers)
    corresponding to ``x``.

    Obviously, this means we're using slightly different formats for
    the training data and the validation / test data.  These formats
    turn out to be the most convenient for use in our neural network
    code."""
    tr_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = zip(training_inputs, training_results)
    #validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    #validation_data = zip(validation_inputs, va_d[1])
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = zip(test_inputs, te_d[1])
    return (training_data, test_data)

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

2）神经网络和梯度下降算法的实现：

#-*-coding=utf-8-*-
import numpy as np
import tensorflow as tf
import random
import mnist_loader
#sizes=[3,4,3]
#w=[[4*3],[3*4]]
#b=[[4*1],[3*1]]
#a=[[4,],[3,]]
#z=[[4,],[3,]]

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

class Network(object):
    def __init__(self,sizes):
        self.num_layers=len(sizes)
        self.sizes=sizes
        self.weights=[np.random.rand(j,i) for i,j in zip(sizes[:-1],sizes[1:])]
        self.biases=[np.random.rand(j,1) for j in sizes[1:]]

    def feedforward(self,a):
        for w,b in zip(self.weights,self.biases):
            a=sigmoid(np.dot(w,a)+b)
        return a
    def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):
        if test_data:
            n_test=len(test_data)
        n=len(training_data)

        for j in xrange(epochs):
            random.shuffle(training_data)
            mini_batches=[training_data[k:k+mini_batch_size]
                for k in xrange(0,n,mini_batch_size)]
            #print(mini_batches)
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch,eta)

            if test_data:
                print("Epoch:{0},acc:{1}/{2}".format(j,self.evaluate(test_data),n_test))
            else:
                print("finish")
        #return (self.weights,self.biases)

    def update_mini_batch(self,mini_batch,eta):
        deriv_w_mb=[np.zeros(w.shape) for w in self.weights]
        deriv_b_mb=[np.zeros(b.shape) for b in self.biases]
        for x,y in mini_batch:
            deriv_w,deriv_b=self.backprop(x,y)

            deriv_w_mb=[w+dw for w,dw in zip(deriv_w_mb,deriv_w)]
            deriv_b_mb=[b+db for b,db in zip(deriv_b_mb,deriv_b)]

        self.weights=[w-(eta/len(mini_batch))*nw 
            for w,nw in zip(self.weights,deriv_w_mb)]
        self.biases=[b-(eta/len(mini_batch))*nb
            for b,nb in zip(self.biases,deriv_b_mb)]

    def backprop(self,x,y):
        deriv_b=[np.zeros(b.shape) for b in self.biases]
        deriv_w=[np.zeros(w.shape) for w in self.weights]

        a=x
        a_arr=[x]
        z_arr=[]
        for w,b in zip(self.weights,self.biases):
            #print(np.dot(w,a).shape,b.shape)
            z=np.dot(w,a)+b
            z_arr.append(z)
            a=sigmoid(z)
            a_arr.append(a)

        #print(a_arr[-1].shape)
        delta=(a_arr[-1]-y)*sigmoid_prime(z_arr[-1])

        deriv_b[-1]=delta
        deriv_w[-1]=np.dot(delta,np.transpose(a_arr[-2]))

        for l in xrange(2,self.num_layers):
            z=z_arr[-l]


            delta=np.dot(np.transpose(self.weights[-l+1]),delta)*sigmoid_prime(z)
            #print(delta.shape,a_arr[-l-1].shape)
            deriv_b[-l]=delta
            deriv_w[-l]=np.dot(delta,np.transpose(a_arr[-l-1]))

        return (deriv_w,deriv_b)
    def evaluate(self,test_data):
        test_results=[(np.argmax(self.feedforward(x)),y) 
            for (x,y) in test_data]
        #return sum(int(x == y) for (x, y) in test_results)
        return sum(int(x==y) for (x,y) in test_results)

if __name__=="__main__":
    sizes=[784,30,10]
    training_data,test_data=mnist_loader.load_data_wrapper()
    print(training_data[0][0].shape,training_data[0][1].shape)
    net=Network(sizes)
    net.SGD(training_data,10,20000,0.2,test_data)
    '''
    #the model (len(n1_node)*1) format to start with
    sizes=[3,4,5,6,7,3]
    training_data=[([[1],[2],[3]],[[1],[0],[0]]),
        ([[4],[5],[6]],[[0],[1],[0]]),
        ([[7],[8],[9]],[[0],[0],[1]])]
    test_data=[([[10],[11],[12]],0),
                ([[13],[14],[15]],1),
                ([[16],[17],[18]],2)]
    #def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):
    net=Network(sizes)
    w,b=net.SGD(training_data,5,1,0.002,test_data)

    '''
    '''x=[1,2,3]
    x=np.expand_dims(x,1)
    y=np.random.rand(3)
    y=np.expand_dims(y,1)
    z=[(x,y)]
    net1=Network(sizes)
    (w,b)=net1.backprop(z[0][0],z[0][1])

    print(w[0].shape,w[1].shape)
    print(w[0],w[1])
    print(b[0].shape,b[1].shape)
    print(b[0],b[1])'''