1、单层神经网络
1、网络结构
本次测试使用的是mnist手写体识别
用来训练的数据的维度是x_train = (784, batch_size), y_train = (10, batch_size)。
用来测试的数据的维度是x_test = (784, batch_size), y_test = (10, batch_size)。
用来玩的网络结构是
所以参数的维数定义为
w
e
i
g
h
1
=
(
10
,
784
)
weigh^1 = (10, 784)
weigh1=(10,784)
b
i
a
s
e
s
=
(
10
,
1
)
biases = (10, 1)
biases=(10,1)
2、前向传播维度
z = w e i g h t 1 ∗ X = ( 10 , b a t c h − s i z e ) z = weight^1 * X = (10, batch-size) z=weight1∗X=(10,batch−size) a = s i g m o i d ( z ) = ( 10 , b a t c h − s i z e ) a = sigmoid(z) = (10, batch-size) a=sigmoid(z)=(10,batch−size) l o s s − f u n t i o n = − y l o g A − ( 1 − y ) l o g ( 1 − A ) loss-funtion = -ylogA - (1-y)log(1-A) loss−funtion=−ylogA−(1−y)log(1−A)
3 、反向传播
d w = ∂ z ∂ w ∗ ∂ A ∂ z ∗ ∂ L ∂ A dw= \frac{\partial z}{\partial w}* \frac{\partial A}{\partial z}* \frac{\partial L}{\partial A} dw=∂w∂z∗∂z∂A∗∂A∂L其中 ∂ L ∂ a = − y a + 1 − y 1 − a \frac{\partial L}{\partial a} = -\frac{y}{a} + \frac{1-y}{1-a} ∂a∂L=−ay+1−a1−y ∂ A ∂ z = g ( z ) ( 1 − g ( z ) ) = A ∗ ( 1 − A ) \frac{\partial A}{\partial z} = g(z)(1-g(z)) = A * (1-A) ∂z∂A=g(z)(1−g(z))=A∗(1−A) d w = ( A − y ) ∗ x T = ( 10 , 784 ) dw = (A - y)*x^T = (10, 784) dw=(A−y)∗xT=(10,784) d b = ( A − y ) = ( 10 , b a t c h − s i z e ) db = (A - y) = (10, batch-size) db=(A−y)=(10,batch−size)
4、代码
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd
mnist = input_data.read_data_sets("./data", one_hot=True)
# x_train -> [784, m], y_train -> [10, m]
class Propagation():
def __init__(self, x_train, y_train):
# 定义神经网络结构
self.input_node = 784
self.output_node = 10
# 定义优化的参数
self.learning_rate = 0.8
self.weight = None
self.bias = None
# 输入图片的数量
self.m = x_train.shape[1]
# 输入
self.x, self.y = x_train, y_train
# 初始化参数设置
def init_parameter(self):
# 初始化权重参数和偏置项 weight -> [10, 784], bias -> [10, 1]
self.weight = np.array(np.random.normal(0, 0.5, (self.output_node, self.input_node)))
self.b = 0
return self.weight, self.b
# 计算前向传播
def propagation(self):
self.b = self.b
self.weight = self.weight
z = np.dot(self.weight, self.x) + self.b
A = 1 / (1 + np.exp(-z))
loss_function = (1 / self.m) * np.sum(-self.y * np.log(A) - (1 - self.y) * np.log(1 - A))
dw = (1 / self.m) * np.dot((A - self.y), self.x.T)
db = (1 / self.m) * np.sum(A - self.y)
self.weight = self.weight - self.learning_rate * dw
self.b = self.b - self.learning_rate * db
return loss_function, self.weight, self.b
class Accuracy(object):
def __init__(self, x_test, y_test, weight, bias):
self.weight_test = weight
self.bias_test = bias
self.x = x_test
self.y = y_test
def accuracy_compute(self):
z = np.dot(self.weight_test, self.x) + self.bias_test
A = 1 / (1 + np.exp(-z))
y_predict = np.zeros(shape=(10, self.x.shape[1]))
for i in range(0, A.shape[0]):
for j in range(0, A.shape[1]):
if A[i, j] >= 0.5:
y_predict[i, j] = 1
else:
y_predict[i, j] = 0
bool_matricx = (y_predict == self.y) + 0
acc_list = []
for i in range(bool_matricx.shape[1]):
summery = sum(bool_matricx[:, i])
summery_means = summery / bool_matricx.shape[0]
acc_list.append(summery_means)
acc = sum(acc_list) / len(acc_list)
return acc
x_train, y_train = mnist.train.next_batch(100)
x_train = x_train.T
y_train = y_train.T
x_test = mnist.test.images.T
y_test = mnist.test.labels.T
pro = Propagation(x_train, y_train)
Propagation.weight, Propagation.bias = pro.init_parameter()
# print(Propagation.weight.shape, Propagation.bias)
pro.weight = Propagation.weight
pro.bias = Propagation.bias
loss_list = []
acc_train_means = []
acc_test_means = []
iteration = []
# 开始训练
for i in range(30000):
loss, weight, bias = pro.propagation()
if i % 500 == 0:
acc_test = Accuracy(x_test, y_test, weight, bias)
acc_train = Accuracy(x_train, y_train, weight, bias)
acc_test_storage = acc_test.accuracy_compute()
acc_train_storage = acc_train.accuracy_compute()
print("经过" + str(i) + "次迭代" + "在训练集上的损失是" + str(loss))
print("经过%d次迭代,在训练集上的精度是%.5f" % (i, acc_train_storage))
print("经过%d次迭代,在测试集上的精度是%.5f" % (i, acc_test_storage))
loss_list.append(loss)
acc_train_means.append(acc_train_storage)
acc_test_means.append(acc_test_storage)
iteration.append(i)
save_scv = {"iteration": iteration,
"acc_train": acc_train_means,
"acc_validate": acc_test_means,
"loss_train": loss_list}
dataframe = pd.DataFrame(save_scv)
dataframe.to_csv("./storage/data.csv", index=False, sep=',')
6、结果分析
模型在训练集上拥有很高的精度,但在测试集上却表现平平
出现这种低偏差,高方差的情况,考虑是过拟合或者参数调整不到位
。可以加上正则化,或者调整学习率。此处就不在测试
2、双层网络
1、代码
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd
mnist = input_data.read_data_sets("./data", one_hot=True)
class Propagation(object):
def __init__(self, x, y):
# 定义输入输出
self.x = x
self.y = y
# 定义网络结构
self.input_node = 784
self.hidden_layer = 500
self.output_node = 10
# 定义学习率
self.learning_rate = 0.006 # 0.0045
# 定义权重参数
self.weights1 = None
self.weights2 = None
self.bias1 = None
self.bias2 = None
# 输入参数的数量
self.m = x.shape[1]
def init_parameter(self):
self.weights1 = np.array(np.random.normal(0, 0.5, (self.hidden_layer, self.input_node)))
self.bias1 = 0
self.weights2 = np.array(np.random.normal(0, 0.5, (self.output_node, self.hidden_layer)))
self.bias2 = 0
return self.weights1, self.bias1, self.weights2, self.bias2
def forward_propagation(self):
self.weights1 = self.weights1
self.bias2 = self.bias2
self.weights2 = self.weights2
self.bias1 = self.bias1
# 第一层网络的前向传播
z1 = np.dot(self.weights1, self.x) + self.bias1
self.a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))
# 第二层网络的前向传播
z2 = np.dot(self.weights2, self.a1) + self.bias2
self.a2 = 1 / (1 + np.exp(-z2))
# 计算损失函数
loss = (1 / self.m) * np.sum(-self.y * np.log(self.a2) - (1 - self.y) * np.log(1 - self.a2))
return loss
def back_propagation(self):
dz2 = self.a2 - self.y
dw2 = (1 / self.m) * np.dot(dz2, self.a1.T)
db2 = (1 / self.m) * np.sum(dz2, axis=1, keepdims=1)
dz1 = np.dot(self.weights2.T, dz2, (1 - np.square(self.a1)))
dw1 = (1 / self.m) * np.dot(dz1, self.x.T)
db1 = (1 / self.m) * np.sum(dz1, axis=1, keepdims=1)
self.weights2 = self.weights2 - self.learning_rate * dw2
self.bias2 = self.bias2 - self.learning_rate * db2
self.weights1 = self.weights1 - self.learning_rate * dw1
self.bias1 = self.bias1 - self.learning_rate * db1
return self.weights1, self.bias1, self.weights2, self.bias2
class Accuracy(object):
def __init__(self, x_test, y_test, weight1, bias1, weight2, bias2):
self.weight1_test = weight1
self.bias1_test = bias1
self.weight2_test = weight2
self.bias2_test = bias2
self.x = x_test
self.y = y_test
def accuracy_compute(self):
# 第一层网络的前向传播
z1 = np.dot(self.weight1_test, self.x) + self.bias1_test
a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))
# 第二层网络的前向传播
z2 = np.dot(self.weight2_test, a1) + self.bias2_test
A = 1 / (1 + np.exp(-z2))
y_predict = np.zeros(shape=(10, self.x.shape[1]))
for i in range(0, A.shape[0]):
for j in range(0, A.shape[1]):
if A[i, j] >= 0.5:
y_predict[i, j] = 1
else:
y_predict[i, j] = 0
bool_matricx = (y_predict == self.y) + 0
acc_list = []
for i in range(bool_matricx.shape[1]):
summery = sum(bool_matricx[:, i])
summery_means = summery / bool_matricx.shape[0]
acc_list.append(summery_means)
acc = sum(acc_list) / len(acc_list)
return acc
x_train, y_train = mnist.train.next_batch(100)
x_train = x_train.T
y_train = y_train.T
x_test = mnist.test.images.T
y_test = mnist.test.labels.T
loss_list = []
acc_train_means = []
acc_test_means = []
iteration = []
pro = Propagation(x_train, y_train)
pro.weights1, pro.bias1, pro.weights2, pro.bias2 = pro.init_parameter()
for i in range(30000):
loss_value = pro.forward_propagation()
weight1, bias1, weight2, bias2 = pro.back_propagation()
if i % 500 == 0:
acc_train = Accuracy(x_train, y_train, weight1, bias1, weight2, bias2)
acc_test = Accuracy(x_test, y_test, weight1, bias1, weight2, bias2)
acc_train_storage = acc_train.accuracy_compute()
acc_test_storage = acc_test.accuracy_compute()
print("经过" + str(i) + "次迭代" + "在训练集上的损失是" + str(loss_value))
print("经过%d次迭代,在训练集上的精度是%.5f" % (i, acc_train_storage))
print("经过%d次迭代,在测试集上的精度是%.5f" % (i, acc_test_storage))
loss_list.append(loss_value)
acc_train_means.append(acc_train_storage)
acc_test_means.append(acc_test_storage)
iteration.append(i)
save_scv = {"iteration": iteration,
"acc_train": acc_train_means,
"acc_validate": acc_test_means,
"loss_train": loss_list}
dataframe = pd.DataFrame(save_scv)
dataframe.to_csv("./storage/2_layer.csv", index=False, sep=',')
2、结果
3、加入正则化
1、代码
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd
mnist = input_data.read_data_sets("./data", one_hot=True)
class Propagation(object):
def __init__(self, x, y):
# 定义输入输出
self.x = x
self.y = y
# 定义网络结构
self.input_node = 784
self.hidden_layer = 500
self.output_node = 10
# 定义学习率
self.learning_rate = 0.05 # 0.0045
# 定义权重参数
self.weights1 = None
self.weights2 = None
self.bias1 = None
self.bias2 = None
# 输入参数的数量
self.m = x.shape[1]
# 正则化参数
self.regularaztion = 0.8 # 0.5 # 0.005
def init_parameter(self):
self.weights1 = np.array(np.random.normal(0, 0.5, (self.hidden_layer, self.input_node)))
self.bias1 = 0
self.weights2 = np.array(np.random.normal(0, 0.5, (self.output_node, self.hidden_layer)))
self.bias2 = 0
return self.weights1, self.bias1, self.weights2, self.bias2
def forward_propagation(self):
self.weights1 = self.weights1
self.bias2 = self.bias2
self.weights2 = self.weights2
self.bias1 = self.bias1
# 第一层网络的前向传播
z1 = np.dot(self.weights1, self.x) + self.bias1
self.a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))
# 第二层网络的前向传播
z2 = np.dot(self.weights2, self.a1) + self.bias2
self.a2 = 1 / (1 + np.exp(-z2))
# 计算损失函数
loss = (1 / self.m) * np.sum(-self.y * np.log(self.a2) - (1 - self.y) * np.log(1 - self.a2)) + \
(self.regularaztion / (2 * self.m)) * np.sum(np.square(self.weights1)) + \
(self.regularaztion / (2 * self.m)) * np.sum(np.square(self.weights2))
return loss
def back_propagation(self):
dz2 = self.a2 - self.y
dw2 = (1 / self.m) * np.dot(dz2, self.a1.T) + (self.regularaztion / self.m) * self.weights2
db2 = (1 / self.m) * np.sum(dz2, axis=1, keepdims=1)
dz1 = np.dot(self.weights2.T, dz2, (1 - np.square(self.a1)))
dw1 = (1 / self.m) * np.dot(dz1, self.x.T) + (self.regularaztion / self.m) * self.weights1
db1 = (1 / self.m) * np.sum(dz1, axis=1, keepdims=1)
self.weights2 = self.weights2 - self.learning_rate * dw2
self.bias2 = self.bias2 - self.learning_rate * db2
self.weights1 = self.weights1 - self.learning_rate * dw1
self.bias1 = self.bias1 - self.learning_rate * db1
return self.weights1, self.bias1, self.weights2, self.bias2
class Accuracy(object):
def __init__(self, x_test, y_test, weight1, bias1, weight2, bias2):
self.weight1_test = weight1
self.bias1_test = bias1
self.weight2_test = weight2
self.bias2_test = bias2
self.x = x_test
self.y = y_test
def accuracy_compute(self):
# 第一层网络的前向传播
z1 = np.dot(self.weight1_test, self.x) + self.bias1_test
a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))
# 第二层网络的前向传播
z2 = np.dot(self.weight2_test, a1) + self.bias2_test
A = 1 / (1 + np.exp(-z2))
y_predict = np.zeros(shape=(10, self.x.shape[1]))
for i in range(0, A.shape[0]):
for j in range(0, A.shape[1]):
if A[i, j] >= 0.5:
y_predict[i, j] = 1
else:
y_predict[i, j] = 0
bool_matricx = (y_predict == self.y) + 0
acc_list = []
for i in range(bool_matricx.shape[1]):
summery = sum(bool_matricx[:, i])
summery_means = summery / bool_matricx.shape[0]
acc_list.append(summery_means)
acc = sum(acc_list) / len(acc_list)
return acc
x_train, y_train = mnist.train.next_batch(100)
x_train = x_train.T
y_train = y_train.T
x_test = mnist.test.images.T
y_test = mnist.test.labels.T
loss_list = []
acc_train_means = []
acc_test_means = []
iteration = []
pro = Propagation(x_train, y_train)
pro.weights1, pro.bias1, pro.weights2, pro.bias2 = pro.init_parameter()
for i in range(30000):
loss_value = pro.forward_propagation()
weight1, bias1, weight2, bias2 = pro.back_propagation()
if i % 500 == 0:
acc_train = Accuracy(x_train, y_train, weight1, bias1, weight2, bias2)
acc_test = Accuracy(x_test, y_test, weight1, bias1, weight2, bias2)
acc_train_storage = acc_train.accuracy_compute()
acc_test_storage = acc_test.accuracy_compute()
print("经过" + str(i) + "次迭代" + "在训练集上的损失是" + str(loss_value))
print("经过%d次迭代,在训练集上的精度是%.5f" % (i, acc_train_storage))
print("经过%d次迭代,在测试集上的精度是%.5f" % (i, acc_test_storage))
loss_list.append(loss_value)
acc_train_means.append(acc_train_storage)
acc_test_means.append(acc_test_storage)
iteration.append(i)
save_scv = {"iteration": iteration,
"acc_train": acc_train_means,
"acc_validate": acc_test_means,
"loss_train": loss_list}
dataframe = pd.DataFrame(save_scv)
dataframe.to_csv("./storage/2_layer.csv", index=False, sep=',')
2、结果
4、画图代码
import pandas as pd
from mpl_toolkits.axes_grid1 import host_subplot
import matplotlib.pyplot as plt
# data = pd.DataFrame(pd.read_csv('./bp_csv/bp.csv'))
data = pd.DataFrame(pd.read_csv('./bp_csv/2_layer.csv'))
print(data.head())
print(len(data['iteration'].tolist()))
print(len(data['acc_train'].tolist()))
print(len(data['acc_validate'].tolist()))
print(len(data['loss_train'].tolist()))
x = data['iteration'].tolist()
y_train = data['acc_train'].tolist()
y_validation = data['acc_validate'].tolist()
y_loss = data['loss_train'].tolist()
host = host_subplot(110)
par = host.twinx()
host.set_xlabel("iteration_step")
host.set_ylabel("accuracy")
par.set_ylabel("loss_value")
host.set_title('Accuracy loss curve')
line1, = host.plot(x, y_train, "b-", label="acc_train")
line2, = host.plot(x, y_validation, "r-", label="acc_validate")
line3, = par.plot(x, y_loss, "g-", label="loss_value")
leg = plt.legend(loc='center right', fancybox=True, shadow=True)
host.yaxis.get_label().set_color(line1.get_color())
leg.texts[0].set_color(line1.get_color())
host.yaxis.get_label().set_color(line2.get_color())
leg.texts[1].set_color(line2.get_color())
par.yaxis.get_label().set_color(line3.get_color())
leg.texts[2].set_color(line3.get_color())
plt.show()