import random #产生随机数
import numpy as np
# 神经网络类
class Network(object):
# 构造函数
def __init__(self,sizes):
self.num_layers=len(sizes) #神经网络层数
self.sizes=sizes #每层参数, sizes每层神经元的个数,net=Network{[2,3,1]}
self.biases=[np.random.randn(y,1) for y in sizes[1:]] #偏移值
# np.random.randn(y, 1) 随机从正态分布(均值为0,方差为1)中生成
self.weights=[np.random.randn(y,x)
for x,y in zip(sizes[:-1],sizes[1:])] #权重
#输入层向输出层更新
def feedforward(self,a):
for b,w in zip(self.biases,self.weights):
a=sigmoid(np.dot(w,a)+b) # a=w1*a1+w2*a2...+b
return a
#随机梯度下降算法
# 指向当前类,训练集,训练多少轮,最小训练集大小,学习率,测试数据集
def SGD(self,training_data,epochs,mini_batch_size,eta,
test_data=None):
if test_data:n_test=len(test_data) #如果test_data不为空,求出长度
n=len(training_data) #求训练集的长度
for j in range(epochs): #轮数循环
random.shuffle(training_data) #随机打乱训练集
#将训练集分成小块,比如训练集有1000个,最小训练集大小为100,则将训练集分为10个大小为100的小训练集
#取最小块数据集 0-99,100-199,200-299,300-399...900-999
mini_batches=[training_data[k:k+mini_batch_size] for k in range(0,n,mini_batch_size)]
#针对分好的小训练集,逐个选取单个小训练集
for mini_batch in mini_batches:
#关键步骤
self.update_mini_batch(mini_batch,eta) #更新参数w,b
if test_data: #j是轮数,self.evaluate(test_data)测试集准确数
print("Epoch{0}:{1}/{2}".format(
j,self.evaluate(test_data),n_test))
else:
print("Epoch{0} complete".format(j))
#更新w,b #单个块数据集
def update_mini_batch(self,mini_batch,eta):
#初始化两个0矩阵,类型和w,b一样
nabla_b=[np.zeros(b.shape) for b in self.biases]
nabla_w=[np.zeros(w.shape) for w in self.weights]
#取小数据集中的每一行x,y
for x,y in mini_batch:
#算出w,b的偏导数
delta_nabla_b,delta_nabla_w=self.backprop(x,y)
#更新w b
nabla_b=[nb+dnb for nb,dnb in zip(nabla_b,delta_nabla_b)]
nabla_w=[nw+dnw for nw,dnw in zip(nabla_w,delta_nabla_w)]
self.weights=[w-(eta/len(mini_batch))*nw
for w,nw in zip(self.weights,nabla_w)]
self.biases=[b-(eta/len(mini_batch))*nb
for b,nb in zip(self.biases,nabla_b)] #self.biases.nabla_b
# 算出关于w,b的偏导数
def backprop(self,x,y):
nabla_b=[np.zeros(b.shape) for b in self.biases]
nabla_w=[np.zeros(w.shape) for w in self.weights]
#feedforward
activation=x
activations=[x]
zs=[]
for b,w in zip(self.biases,self.weights):
z=np.dot(w,activation)+b
zs.append(z)
activation=sigmoid(z)
activations.append(activation)
#backward pass
delta=self.cost_derivative(activations[-1],y)*\
sigmoid_prime(zs[-1])
nabla_b[-1]=delta
nabla_w[-1]=np.dot(delta,activations[-2].transpose())
#note that the variable l in the loop below is used a little...
for l in range(2,self.num_layers):
z = zs[-l] #z = zs[-1] 写成123的1,实际应该是L的小写l
sp=sigmoid_prime(z)
delta=np.dot(self.weights[-l+1].transpose(),delta)*sp
nabla_b[-l]=delta
nabla_w[-l]=np.dot(delta,activations[-l-1].transpose())
return (nabla_b,nabla_w)
#计算准确率
def evaluate(self, test_data):
# argmax返回的是最大数的索引
test_results = [(np.argmax(self.feedforward(x)), y)
for (x, y) in test_data]
# 返回x和y相等的个数
return sum(int(x == y) for (x, y) in test_results)
def cost_derivative(self, output_activations, y):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
return (output_activations-y)
#激励函数
def sigmoid(z):
"""The sigmoid function."""
return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z):
"""Derivative of the sigmoid function."""
return sigmoid(z)*(1-sigmoid(z))
二,mnist_loader.py
# -*- coding: utf-8 -*-
# from __future__ import print_function,division
import pickle
import gzip
import numpy as np
#从数据集中载入数据
def load_data():
file=gzip.open('../MNIST_data/MNIST_data/mnist.pkl.gz','rb' )
training_data,validation_data,test_data=pickle.load(file,encoding='bytes')
file.close()
return training_data,validation_data,test_data
#改编数据集的格式
def load_data_wrapper():
tr_d,va_d,te_d=load_data()
#训练集
training_inputs=[np.reshape(x,(784,1)) for x in tr_d[0]]
training_labels=[vectorized_label(x) for x in tr_d[1]]
training_data=list(zip(training_inputs,training_labels))
#验证集
validation_inputs=[np.reshape(x,(784,1)) for x in va_d[0]]
validation_data=list(zip(validation_inputs,va_d[1]))
#测试集
test_inputs=[np.reshape(x,(784,1)) for x in te_d[0]]
test_data=list(zip(test_inputs,te_d[1]))
return (training_data, validation_data, test_data)
def vectorized_label(j):
#形状为10行1列
e=np.zeros((10,1))
e[j]=1.0
return e
二,demo.py
import mnist_loader
import network
#训练集, 验证集, 测试集
training_data,validation_data,test_data=mnist_loader.load_data_wrapper()
# print("training_data")
# print(type(training_data)) #数据类型
# print(len(training_data)) #数据长度
# print(training_data[0][0].shape) #第一维是元祖,第二维0是x
# print(training_data[0][1].shape) #第一维是元祖,第二维1是y
# print(training_data[0])
#
# print("validation_data")
# print(len(validation_data))
#
# print("test_data")
# print(len(test_data))
#
#第一层784个神经元,第二层30个神经元,输出10个
net=network.Network([784,30,10])
# 训练集,训练多少轮,最小训练集大小,学习率,测试数据集
net.SGD(training_data,30,10,3.0,test_data=test_data)
#
# net=network.Network([784,100,10])
# net.SGD(training_data,30,10,3.0,test_data=test_data)
# net=network.Network([784,50,60,10])
# net.SGD(training_data,30,10,3.0,test_data=test_data)
四,运行结果