简单神经网络-CSDN博客

本文链接：https://blog.csdn.net/wewinwe/article/details/100930349

简单神经网络传播

1、单层神经网络
2、双层网络
- 1、代码
- 2、结果
3、加入正则化
- 1、代码
- 2、结果
4、画图代码

1、单层神经网络

1、网络结构

本次测试使用的是mnist手写体识别
用来训练的数据的维度是x_train = (784, batch_size), y_train = (10, batch_size)。
用来测试的数据的维度是x_test = (784, batch_size), y_test = (10, batch_size)。
用来玩的网络结构是
在这里插入图片描述
所以参数的维数定义为 $weigh^1 = (10, 784)$ $b i a s e s = (10, 1)$

2、前向传播维度

$z = weight^1 * X = (10, batch-size)$ $a = s i g m o i d (z) = (10, b a t c h - s i z e)$ $l o s s - f u n t i o n = - y l o g A - (1 - y) l o g (1 - A)$

3 、反向传播

$\frac{\partial z}{\partial w}* \frac{\partial A}{\partial z}* \frac{\partial L}{\partial A}$ 其中 $\frac{\partial L}{\partial a} = -\frac{y}{a} + \frac{1-y}{1-a}$ $\frac{\partial A}{\partial z} = g(z)(1-g(z)) = A * (1-A)$ $dw = (A - y)*x^T = (10, 784)$ $d b = (A - y) = (10, b a t c h - s i z e)$

4、代码

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd
mnist = input_data.read_data_sets("./data", one_hot=True)


# x_train -> [784, m], y_train -> [10, m]
class Propagation():
    def __init__(self, x_train, y_train):
        # 定义神经网络结构
        self.input_node = 784
        self.output_node = 10
        # 定义优化的参数
        self.learning_rate = 0.8
        self.weight = None
        self.bias = None
        # 输入图片的数量
        self.m = x_train.shape[1]
        # 输入
        self.x, self.y = x_train, y_train

    # 初始化参数设置
    def init_parameter(self):
        # 初始化权重参数和偏置项 weight -> [10, 784], bias -> [10, 1]
        self.weight = np.array(np.random.normal(0, 0.5, (self.output_node, self.input_node)))
        self.b = 0
        return self.weight, self.b

    # 计算前向传播
    def propagation(self):
        self.b = self.b
        self.weight = self.weight
        z = np.dot(self.weight, self.x) + self.b
        A = 1 / (1 + np.exp(-z))
        loss_function = (1 / self.m) * np.sum(-self.y * np.log(A) - (1 - self.y) * np.log(1 - A))
        dw = (1 / self.m) * np.dot((A - self.y), self.x.T)
        db = (1 / self.m) * np.sum(A - self.y)
        self.weight = self.weight - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db
        return loss_function, self.weight, self.b


class Accuracy(object):
    def __init__(self, x_test, y_test, weight, bias):
        self.weight_test = weight
        self.bias_test = bias
        self.x = x_test
        self.y = y_test

    def accuracy_compute(self):
        z = np.dot(self.weight_test, self.x) + self.bias_test
        A = 1 / (1 + np.exp(-z))
        y_predict = np.zeros(shape=(10, self.x.shape[1]))
        for i in range(0, A.shape[0]):
            for j in range(0, A.shape[1]):
                if A[i, j] >= 0.5:
                    y_predict[i, j] = 1
                else:
                    y_predict[i, j] = 0
        bool_matricx = (y_predict == self.y) + 0
        acc_list = []
        for i in range(bool_matricx.shape[1]):
            summery = sum(bool_matricx[:, i])
            summery_means = summery / bool_matricx.shape[0]
            acc_list.append(summery_means)
            acc = sum(acc_list) / len(acc_list)
        return acc


x_train, y_train = mnist.train.next_batch(100)
x_train = x_train.T
y_train = y_train.T
x_test = mnist.test.images.T
y_test = mnist.test.labels.T

pro = Propagation(x_train, y_train)
Propagation.weight, Propagation.bias = pro.init_parameter()
# print(Propagation.weight.shape, Propagation.bias)
pro.weight = Propagation.weight
pro.bias = Propagation.bias

loss_list = []
acc_train_means = []
acc_test_means = []
iteration = []
# 开始训练
for i in range(30000):
    loss, weight, bias = pro.propagation()
    if i % 500 == 0:
        acc_test = Accuracy(x_test, y_test, weight, bias)
        acc_train = Accuracy(x_train, y_train, weight, bias)
        acc_test_storage = acc_test.accuracy_compute()
        acc_train_storage = acc_train.accuracy_compute()
        print("经过" + str(i) + "次迭代" + "在训练集上的损失是" + str(loss))
        print("经过%d次迭代，在训练集上的精度是%.5f" % (i, acc_train_storage))
        print("经过%d次迭代，在测试集上的精度是%.5f" % (i, acc_test_storage))

        loss_list.append(loss)
        acc_train_means.append(acc_train_storage)
        acc_test_means.append(acc_test_storage)
        iteration.append(i)

        save_scv = {"iteration": iteration,
                    "acc_train": acc_train_means,
                    "acc_validate": acc_test_means,
                    "loss_train": loss_list}
        dataframe = pd.DataFrame(save_scv)
        dataframe.to_csv("./storage/data.csv", index=False, sep=',')

6、结果分析

在这里插入图片描述
模型在训练集上拥有很高的精度，但在测试集上却表现平平
出现这种低偏差，高方差的情况，考虑是过拟合或者参数调整不到位
。可以加上正则化，或者调整学习率。此处就不在测试

2、双层网络

1、代码

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd

mnist = input_data.read_data_sets("./data", one_hot=True)


class Propagation(object):
    def __init__(self, x, y):
        # 定义输入输出
        self.x = x
        self.y = y
        # 定义网络结构
        self.input_node = 784
        self.hidden_layer = 500
        self.output_node = 10
        # 定义学习率
        self.learning_rate = 0.006  # 0.0045
        # 定义权重参数
        self.weights1 = None
        self.weights2 = None
        self.bias1 = None
        self.bias2 = None
        # 输入参数的数量
        self.m = x.shape[1]

    def init_parameter(self):
        self.weights1 = np.array(np.random.normal(0, 0.5, (self.hidden_layer, self.input_node)))
        self.bias1 = 0
        self.weights2 = np.array(np.random.normal(0, 0.5, (self.output_node, self.hidden_layer)))
        self.bias2 = 0
        return self.weights1, self.bias1, self.weights2, self.bias2

    def forward_propagation(self):

        self.weights1 = self.weights1
        self.bias2 = self.bias2
        self.weights2 = self.weights2
        self.bias1 = self.bias1
        # 第一层网络的前向传播
        z1 = np.dot(self.weights1, self.x) + self.bias1
        self.a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))

        # 第二层网络的前向传播
        z2 = np.dot(self.weights2, self.a1) + self.bias2
        self.a2 = 1 / (1 + np.exp(-z2))
        # 计算损失函数
        loss = (1 / self.m) * np.sum(-self.y * np.log(self.a2) - (1 - self.y) * np.log(1 - self.a2))
        return loss

    def back_propagation(self):
        dz2 = self.a2 - self.y
        dw2 = (1 / self.m) * np.dot(dz2, self.a1.T)
        db2 = (1 / self.m) * np.sum(dz2, axis=1, keepdims=1)
        dz1 = np.dot(self.weights2.T, dz2, (1 - np.square(self.a1)))
        dw1 = (1 / self.m) * np.dot(dz1, self.x.T)
        db1 = (1 / self.m) * np.sum(dz1, axis=1, keepdims=1)
        self.weights2 = self.weights2 - self.learning_rate * dw2
        self.bias2 = self.bias2 - self.learning_rate * db2
        self.weights1 = self.weights1 - self.learning_rate * dw1
        self.bias1 = self.bias1 - self.learning_rate * db1
        return self.weights1, self.bias1, self.weights2, self.bias2


class Accuracy(object):
    def __init__(self, x_test, y_test, weight1, bias1, weight2, bias2):
        self.weight1_test = weight1
        self.bias1_test = bias1
        self.weight2_test = weight2
        self.bias2_test = bias2
        self.x = x_test
        self.y = y_test

    def accuracy_compute(self):
        # 第一层网络的前向传播
        z1 = np.dot(self.weight1_test, self.x) + self.bias1_test
        a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))

        # 第二层网络的前向传播
        z2 = np.dot(self.weight2_test, a1) + self.bias2_test
        A = 1 / (1 + np.exp(-z2))

        y_predict = np.zeros(shape=(10, self.x.shape[1]))
        for i in range(0, A.shape[0]):
            for j in range(0, A.shape[1]):
                if A[i, j] >= 0.5:
                    y_predict[i, j] = 1
                else:
                    y_predict[i, j] = 0
        bool_matricx = (y_predict == self.y) + 0
        acc_list = []
        for i in range(bool_matricx.shape[1]):
            summery = sum(bool_matricx[:, i])
            summery_means = summery / bool_matricx.shape[0]
            acc_list.append(summery_means)
            acc = sum(acc_list) / len(acc_list)
        return acc


x_train, y_train = mnist.train.next_batch(100)
x_train = x_train.T
y_train = y_train.T
x_test = mnist.test.images.T
y_test = mnist.test.labels.T

loss_list = []
acc_train_means = []
acc_test_means = []
iteration = []

pro = Propagation(x_train, y_train)
pro.weights1, pro.bias1, pro.weights2, pro.bias2 = pro.init_parameter()
for i in range(30000):
    loss_value = pro.forward_propagation()
    weight1, bias1, weight2, bias2 = pro.back_propagation()
    if i % 500 == 0:

        acc_train = Accuracy(x_train, y_train, weight1, bias1, weight2, bias2)
        acc_test = Accuracy(x_test, y_test, weight1, bias1, weight2, bias2)

        acc_train_storage = acc_train.accuracy_compute()
        acc_test_storage = acc_test.accuracy_compute()

        print("经过" + str(i) + "次迭代" + "在训练集上的损失是" + str(loss_value))
        print("经过%d次迭代，在训练集上的精度是%.5f" % (i, acc_train_storage))
        print("经过%d次迭代，在测试集上的精度是%.5f" % (i, acc_test_storage))

        loss_list.append(loss_value)
        acc_train_means.append(acc_train_storage)
        acc_test_means.append(acc_test_storage)
        iteration.append(i)

save_scv = {"iteration": iteration,
            "acc_train": acc_train_means,
            "acc_validate": acc_test_means,
            "loss_train": loss_list}
dataframe = pd.DataFrame(save_scv)
dataframe.to_csv("./storage/2_layer.csv", index=False, sep=',')

2、结果

在这里插入图片描述

3、加入正则化

1、代码

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd

mnist = input_data.read_data_sets("./data", one_hot=True)


class Propagation(object):
    def __init__(self, x, y):
        # 定义输入输出
        self.x = x
        self.y = y
        # 定义网络结构
        self.input_node = 784
        self.hidden_layer = 500
        self.output_node = 10
        # 定义学习率
        self.learning_rate = 0.05  # 0.0045
        # 定义权重参数
        self.weights1 = None
        self.weights2 = None
        self.bias1 = None
        self.bias2 = None
        # 输入参数的数量
        self.m = x.shape[1]
        # 正则化参数
        self.regularaztion = 0.8  # 0.5  # 0.005

    def init_parameter(self):
        self.weights1 = np.array(np.random.normal(0, 0.5, (self.hidden_layer, self.input_node)))
        self.bias1 = 0
        self.weights2 = np.array(np.random.normal(0, 0.5, (self.output_node, self.hidden_layer)))
        self.bias2 = 0
        return self.weights1, self.bias1, self.weights2, self.bias2

    def forward_propagation(self):
        self.weights1 = self.weights1
        self.bias2 = self.bias2
        self.weights2 = self.weights2
        self.bias1 = self.bias1
        # 第一层网络的前向传播
        z1 = np.dot(self.weights1, self.x) + self.bias1
        self.a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))

        # 第二层网络的前向传播
        z2 = np.dot(self.weights2, self.a1) + self.bias2
        self.a2 = 1 / (1 + np.exp(-z2))
        # 计算损失函数
        loss = (1 / self.m) * np.sum(-self.y * np.log(self.a2) - (1 - self.y) * np.log(1 - self.a2)) + \
               (self.regularaztion / (2 * self.m)) * np.sum(np.square(self.weights1)) + \
               (self.regularaztion / (2 * self.m)) * np.sum(np.square(self.weights2))
        return loss

    def back_propagation(self):
        dz2 = self.a2 - self.y
        dw2 = (1 / self.m) * np.dot(dz2, self.a1.T) + (self.regularaztion / self.m) * self.weights2
        db2 = (1 / self.m) * np.sum(dz2, axis=1, keepdims=1)
        dz1 = np.dot(self.weights2.T, dz2, (1 - np.square(self.a1)))
        dw1 = (1 / self.m) * np.dot(dz1, self.x.T) + (self.regularaztion / self.m) * self.weights1
        db1 = (1 / self.m) * np.sum(dz1, axis=1, keepdims=1)
        self.weights2 = self.weights2 - self.learning_rate * dw2
        self.bias2 = self.bias2 - self.learning_rate * db2
        self.weights1 = self.weights1 - self.learning_rate * dw1
        self.bias1 = self.bias1 - self.learning_rate * db1
        return self.weights1, self.bias1, self.weights2, self.bias2


class Accuracy(object):
    def __init__(self, x_test, y_test, weight1, bias1, weight2, bias2):
        self.weight1_test = weight1
        self.bias1_test = bias1
        self.weight2_test = weight2
        self.bias2_test = bias2
        self.x = x_test
        self.y = y_test

    def accuracy_compute(self):
        # 第一层网络的前向传播
        z1 = np.dot(self.weight1_test, self.x) + self.bias1_test
        a1 = (np.exp(z1) - np.exp(-z1)) / (np.exp(z1) + np.exp(-z1))

        # 第二层网络的前向传播
        z2 = np.dot(self.weight2_test, a1) + self.bias2_test
        A = 1 / (1 + np.exp(-z2))

        y_predict = np.zeros(shape=(10, self.x.shape[1]))
        for i in range(0, A.shape[0]):
            for j in range(0, A.shape[1]):
                if A[i, j] >= 0.5:
                    y_predict[i, j] = 1
                else:
                    y_predict[i, j] = 0
        bool_matricx = (y_predict == self.y) + 0
        acc_list = []
        for i in range(bool_matricx.shape[1]):
            summery = sum(bool_matricx[:, i])
            summery_means = summery / bool_matricx.shape[0]
            acc_list.append(summery_means)
            acc = sum(acc_list) / len(acc_list)
        return acc


x_train, y_train = mnist.train.next_batch(100)
x_train = x_train.T
y_train = y_train.T
x_test = mnist.test.images.T
y_test = mnist.test.labels.T

loss_list = []
acc_train_means = []
acc_test_means = []
iteration = []

pro = Propagation(x_train, y_train)
pro.weights1, pro.bias1, pro.weights2, pro.bias2 = pro.init_parameter()
for i in range(30000):
    loss_value = pro.forward_propagation()
    weight1, bias1, weight2, bias2 = pro.back_propagation()
    if i % 500 == 0:
        acc_train = Accuracy(x_train, y_train, weight1, bias1, weight2, bias2)
        acc_test = Accuracy(x_test, y_test, weight1, bias1, weight2, bias2)

        acc_train_storage = acc_train.accuracy_compute()
        acc_test_storage = acc_test.accuracy_compute()

        print("经过" + str(i) + "次迭代" + "在训练集上的损失是" + str(loss_value))
        print("经过%d次迭代，在训练集上的精度是%.5f" % (i, acc_train_storage))
        print("经过%d次迭代，在测试集上的精度是%.5f" % (i, acc_test_storage))

        loss_list.append(loss_value)
        acc_train_means.append(acc_train_storage)
        acc_test_means.append(acc_test_storage)
        iteration.append(i)

save_scv = {"iteration": iteration,
            "acc_train": acc_train_means,
            "acc_validate": acc_test_means,
            "loss_train": loss_list}
dataframe = pd.DataFrame(save_scv)
dataframe.to_csv("./storage/2_layer.csv", index=False, sep=',')

2、结果

在这里插入图片描述

4、画图代码

import pandas as pd
from mpl_toolkits.axes_grid1 import host_subplot
import matplotlib.pyplot as plt

# data = pd.DataFrame(pd.read_csv('./bp_csv/bp.csv'))
data = pd.DataFrame(pd.read_csv('./bp_csv/2_layer.csv'))
print(data.head())
print(len(data['iteration'].tolist()))
print(len(data['acc_train'].tolist()))
print(len(data['acc_validate'].tolist()))
print(len(data['loss_train'].tolist()))
x = data['iteration'].tolist()
y_train = data['acc_train'].tolist()
y_validation = data['acc_validate'].tolist()
y_loss = data['loss_train'].tolist()

host = host_subplot(110)


par = host.twinx()

host.set_xlabel("iteration_step")
host.set_ylabel("accuracy")
par.set_ylabel("loss_value")
host.set_title('Accuracy loss curve')

line1, = host.plot(x, y_train, "b-", label="acc_train")
line2, = host.plot(x, y_validation, "r-", label="acc_validate")
line3, = par.plot(x, y_loss, "g-", label="loss_value")


leg = plt.legend(loc='center right', fancybox=True, shadow=True)

host.yaxis.get_label().set_color(line1.get_color())
leg.texts[0].set_color(line1.get_color())

host.yaxis.get_label().set_color(line2.get_color())
leg.texts[1].set_color(line2.get_color())

par.yaxis.get_label().set_color(line3.get_color())
leg.texts[2].set_color(line3.get_color())

plt.show()