python基于BP实现数据分类（鸢尾花数据集）

HardToNoBug

已于 2022-11-05 11:30:49 修改

阅读量1.3k

点赞数 2

文章标签： python sklearn

于 2022-11-05 11:30:32 首次发布

本文链接：https://blog.csdn.net/christopHERE/article/details/127701566

版权

由于最近课程过多，所以在此就不再讲解具体原理实现了，详情大家可以参考这篇文章，过程写的还是非常详细了。

鸢尾花数据集

起初笔者自己实现时，发现程序拥有大量的for循环语句，十分复杂
在这里插入图片描述

但细细思考发现其实神经网络的传递过程就十分像矩阵相乘，因为是全相连，所以之后便优化代码，调整为直接的矩阵乘法，速度和性能好了不少。

完整代码

import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.preprocessing import MinMaxScaler
np.set_printoptions(suppress=True)


class BP:
    # b_Size隐藏层神经元个数
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
np.set_printoptions(suppress=True)


class BP:
    # b_Size隐藏层神经元个数
    def __init__(self, x, y,  Kinds, b_Size=10, alpha=0.1) -> None:
        self.x = np.matrix(x)  # 输入 n x m
        self.y = np.matrix(y)  # 输出 n x kinds
        self.b_size = b_Size
        self.v = np.matrix(np.random.rand(
            x.shape[1], b_Size))  # 随机生成输入层到隐藏层的权重
        self.w = np.matrix(np.random.rand(b_Size, Kinds))  # 随机生成隐藏层到输出成的权重
        self.alpha = alpha
        self.kinds = Kinds
        self.Gamma = np.matrix(np.random.rand(1, self.b_size))  # 隐藏层的阈值
        self.Theta = np.matrix(np.random.rand(1, self.kinds))  # 输出层的阈值

    def __sigmoid(x):
        return 1/(1 + np.exp(-x))

    def fit(self, N):
        pred = np.matrix(np.zeros((self.x.shape[0], self.kinds)))
        i = 0
        n = 0
        while n < N:
            # 向前传递
            # 输入层 --> 隐藏层
            Net1 = np.dot(self.x[i], self.v)
            b = BP.__sigmoid(Net1-self.Gamma)
            # 隐藏层 --> 输出层
            Net2 = np.dot(b, self.w)
            pred[i] = BP.__sigmoid(Net2 - self.Theta)

            # 反向调整
            g = np.multiply(np.multiply(
                pred[i], (1-pred[i])), self.y[i] - pred[i])
            e = np.multiply(np.multiply(b, (1-b)), (self.w*g.T).T)
            # 更新系数
            for j in range(self.x.shape[1]):
                for k in range(self.b_size):
                    self.v[j, k] = self.v[j, k] + \
                        self.alpha*e[0, k]*self.x[i, j]

            self.Gamma = self.Gamma - self.alpha*e

            for j in range(self.b_size):
                for k in range(self.y.shape[1]):
                    self.w[j, k] = self.w[j, k] + \
                        self.alpha*g[0, k]*b[0, j]

            self.Theta = self.Theta - self.alpha*g
            if i >= self.x.shape[0] - 1:
                i = 0
            else:
                i += 1
            n += 1

    def predict(self, x, pre):
        i = 0
        while i <= x.shape[0] - 1:
            pred = np.matrix(np.zeros((1, self.kinds)))
            # 输入层 --> 隐藏层
            Net1 = np.dot(x[i], self.v)
            b = BP.__sigmoid(Net1-self.Gamma)
            # 隐藏层 --> 输出层
            Net2 = np.dot(b, self.w)
            pred = BP.__sigmoid(Net2 - self.Theta)
            pre.append(np.argmax(pred[0]))
            i += 1

    def show(self, cm, labels_name, title):
        print("accuracy:{:.2%}".format(
            (cm[0][0]+cm[1][1]+cm[2][2])/(np.sum(cm))))
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]    # 归一化
        plt.imshow(cm, interpolation='nearest')    # 在特定的窗口上显示图像
        plt.title(title)    # 图像标题
        plt.colorbar()
        num_local = np.array(range(len(labels_name)))
        plt.xticks(num_local, labels_name, rotation=90)    # 将标签印在x轴坐标上
        plt.yticks(num_local, labels_name)    # 将标签印在y轴坐标上
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()


def clear(list): 
    result = []
    label = []
    for i in list:
        if i[0] == 'Iris-setosa':
            result.append([1, 0, 0])
            label.append(0)
        elif i[0] == 'Iris-versicolor':
            result.append([0, 1, 0])
            label.append(1)
        else:
            result.append([0, 0, 1])
            label.append(2)
    result = np.array(result)
    label = np.array(label)
    return label, result


Labels_name = ['sepal_length', 'sepal_width',
               'petal_length', 'petal_width', 'species']
iris = pd.read_excel(
    '/Users/christcheng/Code/python/DataBase/iris_data.xlsx', header=1, names=Labels_name)
x = np.array(iris.iloc[:, 0:4])
y = np.array(iris.iloc[:, 4:5])
label, y = clear(y)
x_train, x_test, y_train, y_test, label_train, label_test = model_selection.train_test_split(
    x, y, label, test_size=0.4)
test = BP(x_train, y_train, 3)

test.fit(10000)
pre = []
test.predict(x_test, pre)
cm = confusion_matrix(label_test, np.array(pre))
title = "Iris Prediction"
test.show(cm, ['setosa', 'versicolor', 'virginica'], title)