Python实现BP神经网络

最新推荐文章于 2024-07-25 23:52:06 发布

ANTennaaa

最新推荐文章于 2024-07-25 23:52:06 发布

阅读量6.9k

点赞数 7

分类专栏：模式识别文章标签：神经网络机器学习模式识别

本文链接：https://blog.csdn.net/m0_38139098/article/details/104306859

版权

模式识别专栏收录该内容

14 篇文章 2 订阅

订阅专栏

Python实现BP神经网络

前言：BP神经网络是理解神经网络原理的基础，代码实现有助于我们快速入门，深入理解。在此把手写BP神经网络发出来和大家一起讨论，也望各位大佬指出不足之处，共同学习。

1.作业要求

请编写两个通用的三层前向神经网络反向传播算法程序,一个采用批量方式更新权重, 另一个采用单样本方式更新权重。其中,隐含层结点的激励函数采用双曲正切函数,输出层的激励函数采用 sigmoid 函数。目标函数采用平方误差准则函数。
将数据带入所设计的BP神经网络

2.Python代码实现

"""
三层反向传播神经网络

批处理更新权重
不同的梯度更新步长对训练的影响

默认参数
rate＝ 0.001 0.004 0.006
node ＝ 6
step ＝ 5000
"""

import numpy as np
from numpy import dot, exp, ones, zeros, random, zeros_like, ones_like, multiply
import math
import matplotlib.pyplot as plt

# 给定数据  共三类，每类取7个作为训练数据，3个作为测试数据
class1 = np.array([[1.58, 2.32, -5.8], [0.67, 1.58, -4.78], [1.04, 1.01, -3.63],
                   [-1.49, 2.18, -3.39], [-0.41, 1.21, -4.73], [1.39, 3.16, 2.87],
                   [1.20, 1.40, -1.89], [-0.92, 1.44, -3.22], [0.45, 1.33, -4.38],
                   [-0.76, 0.84, -1.96]], dtype=float).reshape(-1, 3)

label1 = np.zeros_like(class1)
label1[:, 0] = ones([len(label1)], dtype=float)
class1 = np.hstack((class1, label1))

ext = np.ones(len(class1))
ext = ext.reshape(10, -1)
class1 = np.hstack((ext, class1))

class2 = np.array([[0.21, 0.03, -2.21], [0.37, 0.28, -1.8], [0.18, 1.22, 0.16],
                   [-0.24, 0.93, -1.01], [-1.18, 0.39, -0.39], [0.74, 0.96, -1.16],
                   [-0.38, 1.94, -0.48], [0.02, 0.72, -0.17], [0.44, 1.31, -0.14],
                   [0.46, 1.49, 0.68]]).reshape(-1, 3)

label2 = zeros_like(class2)
label2[:, 1] = ones([len(label2)], dtype=float)
class2 = np.hstack((ext, class2, label2))

class3 = np.array([[-1.54, 1.17, 0.64], [5.41, 3.45, -1.33], [1.55, 0.99, 2.69],
                   [1.86, 3.19, 1.51], [1.68, 1.79, -0.87], [3.51, -0.22, -1.39],
                   [1.40, -0.44, -0.92], [0.44, 0.83, 1.97], [0.25, 0.68, -0.99],
                   [0.66, -0.45, 0.08]]).reshape(-1, 3)

label3 = zeros_like(class3)
label3[:, 2] = ones([len(label3)], dtype=float)
class3 = np.hstack((ext, class3, label3))

#all_class = np.vstack((class1, class2, class3))
train_data = np.vstack((class1[:7], class2[:7], class3[:7]))
test_data = np.vstack((class1[7:], class2[7:], class3[7:]))

# 激励函数及相应导数
def tan_h(x):
    return math.tanh(x)
    
def diff_tang_h(x):
    return 1.0 / (1 + pow(x, 2))
    
# sigmoid
def sigmoid(x):
    return 1.0 / (1 + exp(-x))

# sigmoid 求导
def diff_sigmoid(x):
    out = sigmoid(x)
    return out * (1 - out)

# 线性函数
def linear(x):
    return x

# 线性函数求导
def diff_linear(x):
    return ones_like(x)  # 对线性函数求导 结果全是1

# 定义神经网络 
class NN:
    # 输入层、隐藏层、输出层的节点（数）
    def __init__(self, n_i, n_h, n_o):

        # 获取各层节点数量
        self.n_i = n_i  # 还需 增加一个偏差节点
        self.n_h = n_h
        self.n_o = n_o

        # 激活神经网络的所有节点（向量） 存储加权求和之后 对应 net
        self.data_i = ones(self.n_i)  # n+1 x 1
        self.data_net_h = ones(self.n_h)  # node_num x 1
        self.data_net_o = ones(self.n_o)  # c x 1

        # 对应书上 y z
        self.data_y = ones(self.n_h)  # .reshape(-1,1)  # node_num x 1
        self.data_z = ones(self.n_o)  # c x 1

        self.f0_net_k = ones(self.n_o)
        self.delta_k = ones(self.n_o)

        # 初始化权重矩阵
        self.wi = random.random((self.n_h, self.n_i))  # 输入到隐含层权重  nxd
        self.wo = random.random((self.n_h, self.n_o))  # 隐含层到输出 nxc

        # 待更新缓存
        self.delta_wi_temp = self.wi
        self.delta_wo_temp = self.wo

    def calculate_output(self, input):  # 传进来单个样本 dx1
        # input layer
        self.data_i = input

        # in - hidden
        self.data_net_h = dot(self.wi, self.data_i)  # nxd x dx1 -- nx1
        self.data_y = np.array(list(map(tan_h, self.data_net_h)))

        # self.data_y = self.data_y.reshape(-1, 1)  # 初始化技巧！ 1xn

        # hidden - output
        self.data_net_o = dot(self.data_y, self.wo)  # 1xn nxc
        self.data_z = list(map(sigmoid, self.data_net_o))
        return self.data_z  # 输出

    def BP(self, target, updata_flag, rate_1, rate_2):
        # -1 Layer Delta 计算更新量
        # get误差
        error_t_k = target - self.data_z
        for i in range(self.n_o):  # 对net k求导  1xc
            self.f0_net_k[i] = diff_sigmoid(self.data_net_o[i])

        self.delta_k = np.multiply(self.f0_net_k, error_t_k)
        data_y_temp = self.data_y.reshape(-1, 1)
        delta_wo = dot(data_y_temp, self.delta_k.reshape(1, 3))

        # -2 Layer Delta
        epsilon = zeros(self.n_h).reshape(-1, 1)  # n_hx1

        for i in range(self.n_h):
            epsilon[i] = multiply(self.delta_k, self.wo[i:i + 1][0]).sum()  # epsilon=(delta_k x wo[]) .sum   n x 1
        # print(epsilon)
        delta_wi = rate_2 * dot(epsilon, self.data_i.reshape(1, -1))

        self.delta_wo_temp = self.delta_wo_temp + delta_wo
        self.delta_wi_temp = self.delta_wi_temp + delta_wi

        if updata_flag == 1:
            # Updata
            # -1
            self.wo = self.wo + rate_2 * delta_wo
            # -2
            self.wi = self.wi + rate_1 * delta_wi

        error = 0.5 * dot((target - self.data_z), (target - self.data_z).reshape(-1, 1))

        return error

    def train(self, patterns, input_data, rate_1, rate_2):  # 输入全部样本

        stop_flag = 0
        error_set = []
        acc_set = []
        step = 0
        sample_len = len(patterns)
        sample_num = 0
        rate_temp = 0

        # while stop_flag == 0:
        for m in range(5000):
            step += 1
            updata_flag = 1

            for p in patterns:  # 样本集
                sample_num += 1
                inputs = p[1:4].reshape(-1, 1)  # 前三个是数据
                targets = p[4:]  # 后三个是目标 －标签

                if sample_num == sample_len:
                    updata_flag = 1

                self.calculate_output(inputs)  # 更新网络前向输出
                error = self.BP(targets, updata_flag, rate_1, rate_2)

            rate = self.test(input_data)
            rate_temp = rate_temp + rate

            if step % 100 == 0:
                error_set.append(error)
                print("error", error,"acc:",rate)
            if step % 10 == 0:
                rate_temp = rate_temp / 10
                acc_set.append(rate_temp)
                rate_temp = 0

        return error_set, acc_set

    def test(self, input_data):
        #  测试！！
        ok = 1
        for p in input_data:  # 测试样本集
            inputs = p[1:4].reshape(-1, 1)  # 前三个是数据
            targets = p[4:]  # 后三个是目标

            output = self.calculate_output(inputs)  # 更新网络前向输出
            out_class = np.where(output == np.max(output))

            if targets[out_class] == 1:
                ok = ok + 1
        rate = ok / len(input_data)
        return rate

    def plot_plot(self, error_set0, error_set1, error_set2):
        set_len = len(error_set1)

        plt.plot(range(set_len), error_set0, range(set_len), error_set1, '-', range(set_len), error_set2, '--')
        plt.legend(['error_set0','error_set1', 'error_set2'], loc='best')
        plt.title("ErrorCurve")
        plt.show()


def Run(test_data=test_data):
    pat = train_data
    test_data = test_data

    rate_1 = 0.001
    rate_2 = 0.004
    rate_3 = 0.006

    # 创建一个神经网络：输入层  隐藏层  输出层
    n0 = NN(3, 3, 3)
    error_set0, acc0 = n0.train(pat, test_data, rate_2, rate_2)

    n1 = NN(3, 6, 3)
    error_set1, acc1 = n1.train(pat, test_data, rate_2, rate_2)

    n2 = NN(3, 8, 3)
    error_set2, acc2 = n1.train(pat, test_data, rate_2, rate_2)

    n2.plot_plot(error_set0, error_set1, error_set2)


if __name__ == '__main__':
    Run()

# Liu Yaohua 2019.11.21 in UCAS

3.结果分析

（1）通过给定不同的rate，得到误差曲线如下

在这里插入图片描述

4.参考文献

《模式分类》第二版

分界线－－－－－－－－－－－－－－－－－－－－－－－

再补充一个最小实验,用Pytorch来看看效果,而不是像上面那样手写BP

"""
反向传播测试

现象:
最后b(a)和a非常接近
"""

import torch
import torch.nn as nn


a = torch.randn((1, 1, 4, 5))  # batch channel w h
print(a)

b = nn.Conv2d(1, 1, kernel_size=3, stride=1, bias=False, padding=1)
# print(b(a))

opt = torch.optim.SGD(b.parameters(), lr=0.001)
cri = nn.MSELoss()

for i in range(8000):
    opt.zero_grad()

    loss = cri(b(a), a)
    loss.backward()
    opt.step()

print("para:\n", b.state_dict())

print(b(a))

最后的结果：

tensor([[[[-2.1294, -0.6980,  0.0754,  2.0101, -1.0723],
          [ 0.5540,  1.9507, -0.6204, -0.3294, -0.0558],
          [ 1.1823,  0.9731, -0.3350, -0.2357,  0.9396],
          [ 2.2166, -1.2478, -0.2418, -0.9258, -0.2936]]]])
para:
 OrderedDict([('weight', tensor([[[[-2.6675e-03, -5.2032e-04, -4.5360e-03],
          [ 3.9354e-04,  9.9799e-01, -2.0927e-03],
          [ 7.5304e-05,  1.1001e-03, -6.6399e-03]]]]))])
tensor([[[[-2.1360, -0.6913,  0.0724,  2.0083, -1.0694],
          [ 0.5479,  1.9574, -0.6237, -0.3317, -0.0596],
          [ 1.1795,  0.9731, -0.3311, -0.2343,  0.9382],
          [ 2.2097, -1.2460, -0.2412, -0.9267, -0.2933]]]],
       grad_fn=<MkldnnConvolutionBackward>)

Process finished with exit code 0