数据集来源
- MNIST公开手写数字数据集
git clone https://github.com/mnielsen/neural-networks-and-deep-learning.git
MNIST数据读取
-
数据读取参考教程: https://blog.csdn.net/panrenlong/article/details/81736754
-
python的struct用法
函数 return explain pack(fmt,v1,v2…) string 按照给定的格式(fmt),把数据转换成字符串(字节流),并将该字符串返回. pack_into(fmt,buffer,offset,v1,v2…) None 按照给定的格式(fmt),将数据转换成字符串(字节流),并将字节流写入以offset开始的buffer中.(buffer为可写的缓冲区,可用array模块) unpack(fmt,v1,v2……) tuple 按照给定的格式(fmt)解析字节流,并返回解析结果 pack_from(fmt,buffer,offset) tuple 按照给定的格式(fmt)解析以offset开始的缓冲区,并返回解析结果 calcsize(fmt) size of fmt 计算给定的格式(fmt)占用多少字节的内存,注意对齐方式 输入输出格式化
Character Byte order @ 4个字节 = 原生 < 小端 > 大端 ! 网络 -
具体代码
index = 0 magic, numImages , numRows , numColumns = struct.unpack_from('>IIII',data,index) index += struct.calcsize('>IIII')
-
解决print打印bytes的话,会直接显示ascii对应的字符的问题
def print_hex(bytes): l = [hex(int(i)) for i in bytes] print(" ".join(l))#Python join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。
第二种写法
print(" ".join(['%02X'% i for i in data]))
-
数据读取代码
data = open("train-images.idx3-ubyte", 'rb').read() index = 0 magic, numImages , numRows , numColumns = struct.unpack_from('>IIII' , data , index) index += struct.calcsize('>IIII') #img = np.array([int('%d'% i) for i in data[16:800]]).reshape(28,28) #print(img) 直接读取图片 img = struct.unpack_from('784B',data,index) index += struct.calcsize('784B'); img = np.array(img).reshape(28,28) print(img) # 用struct读取图片 fig = plt.figure() #输出一个 plotwindow = fig.add_subplot(111) plt.imshow(img,cmap='gray') plt.show()
- 同时读取数据集与标签
# -*- coding: utf-8 -*- import numpy as np import gzip import struct import matplotlib.pyplot as plt def read_data(train_path,test_path): """ 读取MNIST数据集 ``return``元祖: (数据,标签) """ data = open(train_path,'rb').read() data2 = open(test_path,'rb').read() index = 0 index2 = 8 ret_data = [] magic, num, width, height = struct.unpack_from('>IIII',data,index) index += struct.calcsize('>IIII') for i in range(10): img = struct.unpack_from('%dB'%(height*width),data,index) lable = struct.unpack_from('B',data2,index2) index += struct.calcsize('%dB'%(height*width)) index2 += 1 ret_data.append( tuple([np.array(img).reshape(width,height),lable]) ) return ret_data data = read_data("MachineLearning/Digital recognition/DataSet/train-images.idx3-ubyte", \ "MachineLearning/Digital recognition/DataSet/train-labels.idx1-ubyte") fig = plt.figure() for i in range(0,9): #plotwindow = fig.add_subplot(111) plt.subplot((251 + i)) plt.title('%d'%data[i][1]) plt.imshow(data[i][0],cmap='gray_r') plt.show()
神经网络训练
# -*- coding: utf-8 -*-
import random
import struct
import numpy as np
import matplotlib.pyplot as plt
class Network(object):
def __init__(self, sizes):
"""``sizes``每一层的神经元数量(For example, if the listwas [2, 3, 1]
then it would be a three-layer network, with thefirst layer containing
2 neurons, the second layer 3 neurons,and the third layer 1 neuron)
``biases``weights: (0,1)区间的随机数 """
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
self.weights = [np.random.randn(y, x)
for x, y in zip(sizes[:-1], sizes[1:])]
def feedforward(self, a):
""" ``a``输入``Return``神经网络的输出 """
for b, w in zip(self.biases, self.weights):
a = sigmoid(np.dot(w, a)+b)
return a
def SGD(self, training_data, epochs, mini_batch_size, eta,test_data=None):
"""用mini-batch stochasticgradient descent训练神经网络.
`training_data为元祖`(x, y)表示输入和输出.
`epoch表示迭代次数,.
`mini_batch_size`表示取样块的大小"""
if test_data: n_test = len(test_data)
n = len(training_data)
for j in range(epochs):
random.shuffle(training_data)
mini_batches = [training_data[k:k+mini_batch_size]for k in range(0, n, mini_batch_size)]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch, eta)
if test_data:
print( "迭代次数 {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test) )
else:
print( "迭代次数 {0} complete".format(j) )
def update_mini_batch(self, mini_batch, eta):
"""应用梯度下降更新神经网络,使用反向传播对单个mini batch.
``mini_batch`` 为元祖``(x, y)``,和``eta``是learning rate."""
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
self.weights = [w-(eta/len(mini_batch))*nw
for w, nw in zip(self.weights, nabla_w)]
self.biases = [b-(eta/len(mini_batch))*nb
for b, nb in zip(self.biases, nabla_b)]
def backprop(self, x, y):
"""Return:``(nabla_b, nabla_w)`` representing the
gradient for the cost function C_x. ``nabla_b`` and
``nabla_w`` are layer-by-layer lists of numpy arrays, similar
to ``self.biases`` and ``self.weights``."""
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# feedforward
activation = x
activations = [x] # list to store all the activations, layer by layer
zs = [] # list to store all the z vectors, layer by layer
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation)+b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# backward pass
delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
for l in range(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
return (nabla_b, nabla_w)
def evaluate(self, test_data):
"""Return the number of test inputs for which the neural
network outputs the correct result. Note that the neural
network's output is assumed to be the index of whichever
neuron in the final layer has the highest activation."""
test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
return sum(int(x == y) for (x, y) in test_results)
def cost_derivative(self, output_activations, y):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
output_activations[y] -= 1
return output_activations
#### Miscellaneous functions
def sigmoid(z):
"""sigmoid函数"""
return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z):
"""sigmoid函数的导数"""
return sigmoid(z)*(1-sigmoid(z))
调用代码
# -*- coding: utf-8 -*-
import numpy as np
import gzip
import struct
import matplotlib.pyplot as plt
import network
def read_data(train_path,test_path):
"""
读取MNIST数据集
`return` tuple(数据集,标签)
"""
data = open(train_path,'rb').read()
data2 = open(test_path,'rb').read()
index = 0
index2 = 8
ret_data = []
magic, num, width, height = struct.unpack_from('>IIII',data,index)
index += struct.calcsize('>IIII')
for i in range(num):
img = struct.unpack_from('%dB'%(height*width),data,index)
lable = struct.unpack_from('B',data2,index2)
index += struct.calcsize('%dB'%(height*width))
index2 += 1
ret_data.append( [np.array(img).reshape(width*height,1),int(lable[0])] )
return ret_data
if __name__ == "__main__":
training_data = read_data("MachineLearning/Digital recognition/DataSet/train-images.idx3-ubyte", \
"MachineLearning/Digital recognition/DataSet/train-labels.idx1-ubyte")
test_data = read_data('MachineLearning/Digital recognition/DataSet/t10k-images.idx3-ubyte',\
"MachineLearning/Digital recognition/DataSet/t10k-labels.idx1-ubyte")
net = network.Network([784, 30, 10])
net.SGD(training_data, 20, 10, 3.0, test_data=test_data)
# plt.figure()
# for i in range(0,1):
# plt.subplot((251 + i))
# plt.title('%d'%test_data[i][1])
# plt.imshow(test_data[i][0].reshape(28,28),cmap='gray_r')
# plt.show()
参考文章
- [1]《神经网络与深度学习》(Neuralnetworks and Deeplearning)