在上篇博客中,我们学习了mnist数据集下载以及导入的方法,今天尝试用别人训练好的网络权重参数,进行前向网络的搭建,不适用TensorFlow等框架,来体会神经网络各层之间的联系
神经网络权重参数下载链接:(mnist.pkl)https://mp.csdn.net/console/Editorial/14121869
来自《深度学习入门基于python的理论与实现》一书的附赠资源
上一篇博客传送门https://blog.csdn.net/weixin_43872532/article/details/112501990
神经网络的推理处理
神经网络输入层共有784
(28*28)个神经元,输出层共有10
(0~9共10个取值)个神经元,此外,这个网络共有2个隐藏层,第一个隐藏层有50个神经元,第二个共有100个神经元,其中50和100可以设置成任意值,此处选用书籍提供的权重参数形状50,100
首先定义三个函数get_data(),init_network(),predict()
def get_data():
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
return x_test, t_test
def init_network():
with open("E:\DeepLearning\sample_weight.pkl", 'rb') as f:
network = pickle.load(f)
return network
def predict(network, x):
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, W3) + b3
y = softmax(a3)
return y
其中用到的函数在function.py中有定义(见文末):
init_network()会读入保存在pickle文件sample_weight.pkl中的权重参数
predict()创建了一个简单的前向神经网络。
接着评价这个网络的识别精度:
x, t = get_data()
network = init_network()
print(network['W3'].shape)
accuracy_cnt = 0
for i in range(len(x)):
y = predict(network, x[i])
p= np.argmax(y) # 获取概率最高的元素的索引
if p == t[i]:
accuracy_cnt += 1
print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
这就实现了一个简单的神经网络了,整体代码如下:
# coding: utf-8
import sys, os
import numpy as np
import pickle
from load_mnist import load_mnist
from functions import sigmoid, softmax
def get_data():
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
return x_test, t_test
def init_network():
with open("E:\DeepLearning\sample_weight.pkl", 'rb') as f:
network = pickle.load(f)
return network
def predict(network, x):
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, W3) + b3
y = softmax(a3)
return y
x, t = get_data()
network = init_network()
print(network['W3'].shape)
accuracy_cnt = 0
for i in range(len(x)):
y = predict(network, x[i])
p= np.argmax(y) # 获取概率最高的元素的索引
if p == t[i]:
accuracy_cnt += 1
print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
附件
function.py
# coding: utf-8
import numpy as np
def identity_function(x):
return x
def step_function(x):
return np.array(x > 0, dtype=np.int)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
def relu(x):
return np.maximum(0, x)
def relu_grad(x):
grad = np.zeros(x)
grad[x>=0] = 1
return grad
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 溢出对策
return np.exp(x) / np.sum(np.exp(x))
def mean_squared_error(y, t):#均方误差
return 0.5 * np.sum((y-t)**2)
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# 监督数据是one-hot-vector的情况下,转换为正确解标签的索引
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def softmax_loss(X, t):
y = softmax(X)
return cross_entropy_error(y, t)