SGD实现手写数字识别

最新推荐文章于 2022-07-28 16:39:38 发布

北極企鹅

最新推荐文章于 2022-07-28 16:39:38 发布

阅读量410

点赞数

分类专栏：深度学习学习笔记文章标签：深度学习 sgd 神经网络

本文链接：https://blog.csdn.net/ccs174/article/details/118579072

版权

深度学习学习笔记专栏收录该内容

2 篇文章 0 订阅

订阅专栏

本文详细介绍了如何使用Python的struct模块读取MNIST手写数字数据集，并展示了数据集的读取过程。接着，通过神经网络模型进行训练，包括前向传播、反向传播和梯度下降等步骤，最后给出了训练和评估神经网络的代码示例。

摘要由CSDN通过智能技术生成

数据集来源

MNIST公开手写数字数据集

	git clone https://github.com/mnielsen/neural-networks-and-deep-learning.git

MNIST数据读取

数据读取参考教程: https://blog.csdn.net/panrenlong/article/details/81736754

python的struct用法

函数	return	explain
pack(fmt,v1,v2…)	string	按照给定的格式(fmt),把数据转换成字符串(字节流),并将该字符串返回.
pack_into(fmt,buffer,offset,v1,v2…)	None	按照给定的格式(fmt),将数据转换成字符串(字节流),并将字节流写入以offset开始的buffer中.(buffer为可写的缓冲区,可用array模块)
unpack(fmt,v1,v2……)	tuple	按照给定的格式(fmt)解析字节流,并返回解析结果
pack_from(fmt,buffer,offset)	tuple	按照给定的格式(fmt)解析以offset开始的缓冲区,并返回解析结果
calcsize(fmt)	size of fmt	计算给定的格式(fmt)占用多少字节的内存，注意对齐方式

输入输出格式化

Character	Byte order
@	4个字节
=	原生
<	小端
>	大端
!	网络

具体代码

index = 0
magic, numImages , numRows , numColumns = struct.unpack_from('>IIII',data,index)
index += struct.calcsize('>IIII')

解决print打印bytes的话，会直接显示ascii对应的字符的问题

def print_hex(bytes):
	l = [hex(int(i)) for i in bytes]
	print(" ".join(l))#Python join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。

第二种写法

  print(" ".join(['%02X'% i for i in data]))

数据读取代码

data = open("train-images.idx3-ubyte", 'rb').read()

index = 0
magic, numImages , numRows , numColumns = struct.unpack_from('>IIII' ,   data , index)
index += struct.calcsize('>IIII')

#img = np.array([int('%d'% i) for i in data[16:800]]).reshape(28,28)
#print(img) 直接读取图片

img = struct.unpack_from('784B',data,index)
index += struct.calcsize('784B');
img = np.array(img).reshape(28,28)
print(img) # 用struct读取图片

fig = plt.figure() #输出一个
plotwindow = fig.add_subplot(111)
plt.imshow(img,cmap='gray')
plt.show()

同时读取数据集与标签

# -*- coding: utf-8 -*-
import numpy as np
import gzip
import struct
import matplotlib.pyplot as plt


def read_data(train_path,test_path):
    """
    读取MNIST数据集
    ``return``元祖: (数据,标签)
    """
    data = open(train_path,'rb').read()
    data2 = open(test_path,'rb').read()
    index = 0
    index2 = 8
    ret_data = []
    magic, num, width, height = struct.unpack_from('>IIII',data,index)
    index += struct.calcsize('>IIII')
    for i in range(10):
        img = struct.unpack_from('%dB'%(height*width),data,index)
        lable = struct.unpack_from('B',data2,index2)
        index += struct.calcsize('%dB'%(height*width))
        index2 += 1
        ret_data.append( tuple([np.array(img).reshape(width,height),lable]) )
    return ret_data


data = read_data("MachineLearning/Digital recognition/DataSet/train-images.idx3-ubyte", \
    "MachineLearning/Digital recognition/DataSet/train-labels.idx1-ubyte")

fig = plt.figure()
for i in range(0,9):
    #plotwindow = fig.add_subplot(111)
    plt.subplot((251 + i))
    plt.title('%d'%data[i][1])
    plt.imshow(data[i][0],cmap='gray_r')
plt.show()

神经网络训练

# -*- coding: utf-8 -*-
import random
import struct
import numpy as np
import matplotlib.pyplot as plt

class Network(object):

    def __init__(self, sizes):
        """``sizes``每一层的神经元数量(For example, if the listwas [2, 3, 1] 
        then it would be a three-layer network, with thefirst layer containing 
        2 neurons, the second layer 3 neurons,and the third layer 1 neuron) 
        ``biases``weights: (0,1)区间的随机数 """
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """ ``a``输入``Return``神经网络的输出  """
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta,test_data=None):
        """用mini-batch stochasticgradient descent训练神经网络.
        `training_data为元祖`(x, y)表示输入和输出.  
        `epoch表示迭代次数,.
        `mini_batch_size`表示取样块的大小"""
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size]for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print( "迭代次数 {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test) )
            else:
                print( "迭代次数 {0} complete".format(j) )

    def update_mini_batch(self, mini_batch, eta):
        """应用梯度下降更新神经网络,使用反向传播对单个mini batch.
        ``mini_batch`` 为元祖``(x, y)``,和``eta``是learning rate."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """Return:``(nabla_b, nabla_w)`` representing the
        gradient for the cost function C_x.  ``nabla_b`` and
        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
        to ``self.biases`` and ``self.weights``."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        """Return the number of test inputs for which the neural
        network outputs the correct result. Note that the neural
        network's output is assumed to be the index of whichever
        neuron in the final layer has the highest activation."""
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        output_activations[y] -= 1
        return output_activations

#### Miscellaneous functions
def sigmoid(z):
    """sigmoid函数"""
    return 1.0/(1.0+np.exp(-z))


def sigmoid_prime(z):
    """sigmoid函数的导数"""
    return sigmoid(z)*(1-sigmoid(z))

调用代码

# -*- coding: utf-8 -*-
import numpy as np
import gzip
import struct
import matplotlib.pyplot as plt
import network

def read_data(train_path,test_path):
    """
    读取MNIST数据集
    `return` tuple(数据集,标签)
    """
    data = open(train_path,'rb').read()
    data2 = open(test_path,'rb').read()
    index = 0
    index2 = 8
    ret_data = []
    magic, num, width, height = struct.unpack_from('>IIII',data,index)
    index += struct.calcsize('>IIII')
    for i in range(num):
        img = struct.unpack_from('%dB'%(height*width),data,index)
        lable = struct.unpack_from('B',data2,index2)
        index += struct.calcsize('%dB'%(height*width))
        index2 += 1
        ret_data.append( [np.array(img).reshape(width*height,1),int(lable[0])] )
    return ret_data

if __name__ == "__main__":
    training_data = read_data("MachineLearning/Digital recognition/DataSet/train-images.idx3-ubyte", \
    "MachineLearning/Digital recognition/DataSet/train-labels.idx1-ubyte")
    test_data = read_data('MachineLearning/Digital recognition/DataSet/t10k-images.idx3-ubyte',\
        "MachineLearning/Digital recognition/DataSet/t10k-labels.idx1-ubyte")
    net = network.Network([784, 30, 10])
    net.SGD(training_data, 20, 10, 3.0, test_data=test_data)
    # plt.figure()
    # for i in range(0,1):
    #     plt.subplot((251 + i))
    #     plt.title('%d'%test_data[i][1])
    #     plt.imshow(test_data[i][0].reshape(28,28),cmap='gray_r')
    # plt.show()

参考文章

[1]《神经网络与深度学习》(Neuralnetworks and Deeplearning)

北極企鹅

关注

0
点赞
踩
5

收藏

觉得还不错? 一键收藏
打赏
0
评论
SGD实现手写数字识别

数据集来源MNIST公开手写数字数据集 git clone https://github.com/mnielsen/neural-networks-and-deep-learning.gitMNIST数据读取数据读取参考教程: https://blog.csdn.net/panrenlong/article/details/81736754python的struct用法函数returnexplainpack(fmt,v1,v2…)string按照给定的格
复制链接

扫一扫