【实战 python】 第5章 神经网络 -- 感知机、误差逆传播算法

理论知识:笔记(五)机器学习(周志华)第5章 神经网络

一、感知机

算法实现

#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Author  : Cabbage
# @project : ML
# @FileName: preceptron.py
# @Blog    : https://blog.csdn.net/lzbmc
'''
train_binary.csv是MNIST数据集将十分类的数据改为二分类的数据。
参考博客:https://blog.csdn.net/wds2006sdo/article/details/51923546(数据下载)
理论参考:统计学习方法(李航)
感知机是二分类的线性分类模型,属于判别模型
'''

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
def loadDataSet(filename):
    raw_data = pd.read_csv(filename)
    xArr = raw_data.values[:, 1:]
    labels = raw_data.values[:, 0]
    x0 = np.ones(xArr.shape[0])
    xArr = np.insert(xArr, 0, values=x0, axis=1)  # 第0个位置axis=1按列添加x0
    return xArr, labels

def sign(z):
    if z >= 0:
        return 1
    else:
        return 0

# 随机梯度下降
def train(x, y):
    alpha = 0.01
    m, n = x.shape  # m条数据,n个特征
    weights = np.ones(n)  # (1,n)
    for i in range(m):
        z = sum(weights * x[i])
        yi = 2 * y[i] - 1  # 数据集中的label是0和1,《统计学习方法》中y是1和-1,进行转换
        if yi * z <= 0:
            weights = weights + alpha * yi * x[i]
    return weights

# 改进的随机梯度下降
def stocGradDescent(x, y, numIter=150):  # 增加参数迭代次数
    m, n = x.shape  # m条数据,n个特征
    weights = np.ones(n)  # (1,n)
    for iter in range(numIter):
        dataIndex = list(range(m))  # python3要加list,因为python3中range不返回数组对象,而是返回range对象
        for i in range(m):
            randIndex = int(np.random.uniform(0, len(dataIndex)))  # len(dataIndex)是变化的
            alpha = 4 / (1 + iter + i) + 0.001
            z = sum(weights * x[i])
            yi = 2 * y[i] - 1  # 数据集中的label是0和1,《统计学习方法》中y是1和-1,进行转换
            if yi * z <= 0:
                weights = weights + alpha * yi * x[i]
            del(dataIndex[randIndex])
    return weights  # 数组

def predict(inputX, weights):
    predList = []
    for xTest in inputX:
        z = sum(weights * xTest)
        pred = sign(z)
        predList.append(pred)
    return predList


if __name__ == '__main__':
    xArr, labels = loadDataSet('train_binary.csv')
    x_train, x_test, y_train, y_test = train_test_split(xArr, labels,  test_size=0.33, random_state=0)

    # 随机梯度下降
    weights = train(x_train, y_train)
    # print(weights)
    test_pred = predict(x_test, weights)
    score = accuracy_score(test_pred, y_test)
    print(score)

    # 改进的随机梯度下降
    weights = stocGradDescent(x_train, y_train)
    test_pred = predict(x_test, weights)
    score = accuracy_score(test_pred, y_test)
    print(score)

sklearn实现

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Perceptron
# 读取数据
raw_data = pd.read_csv('./train_binary.csv')
data = raw_data.values
features = data[:, 1:]
labels = data[:, 0]
# 划分数据
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.33,
                                                                            random_state=0)
clf = Perceptron(max_iter=2000)  # 定义
clf.fit(train_features, train_labels)  # 训练
test_predict = clf.predict(test_features)  # 预测
score = accuracy_score(test_labels, test_predict)  # 准确率
print(score)  # 0.986002886002886

# 得到训练结果,权重矩阵
weight = clf.coef_
# 超平面的截距
print(clf.intercept_)

二、误差逆传播算法(5.5)

  • 包括标准BP和累积BP算法
  • 课后题5.5 试编程实现标准BP算法和累积BP算法,在西瓜数据集3.0上分别用这两个算法训练一下单隐层网络,进行比较。
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Author  : Cabbage
# @Time1    : 2019/9/7 20:05
# @project : ML
# @FileName: BP.py
# @Last modified time:
# @Blog    : https://blog.csdn.net/lzbmc

import numpy as np
from numpy import *
from sklearn.metrics import accuracy_score

def loadDataSet():
    X = np.mat('2,3,3,2,1,2,3,3,3,2,1,1,2,1,3,1,2;\
                1,1,1,1,1,2,2,2,2,3,3,1,2,2,2,1,1;\
                2,3,2,3,2,2,2,2,3,1,1,2,2,3,2,2,3;\
                3,3,3,3,3,3,2,3,2,3,1,1,2,2,3,1,2;\
                1,1,1,1,1,2,2,2,2,3,3,3,1,1,2,3,2;\
                1,1,1,1,1,2,2,1,1,2,1,2,1,1,2,1,1;\
                0.697,0.774,0.634,0.668,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719;\
                0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103\
                ').T
    X = np.array(X)# (17,8)
    Y = np.mat('1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0').T  # (17, 1)
    Y = np.array(Y)
    return X, Y


def sigmoid(inX):
    return 1.0 / (1 + exp(-inX))

def standardBP(NumHiddenLayer):
    '''
    标准BP:每次只针对单个样例更新参数
    hiddenLayer:隐藏层神经元个数
    单隐层网络,输出层神经元个数为1
    '''
    # 初始化参数
    X, y = loadDataSet()  # (17,8),(17, 1) 每一条数据只有一个类别
    m, n = X.shape
    L = y.shape[1]

    V = random.rand(n, NumHiddenLayer)  # (n, q)等价于random.random((X.shape[1], NumHiddenLayer))
    gamma = random.rand(1, NumHiddenLayer)  #
    W = random.rand(NumHiddenLayer, L)   # (q, L)
    theta = random.rand(1, L)
    # 设置初始化变量
    learn_rate = 0.1  # 学习率
    error = 0.001  # 最大允许误差
    maxTrainNum = 1000000  # 最大学习次数
    trainNum = 0  # 训练次数

    flag = 1
    while flag:
        sumE = 0
        for i in range(m):
            b = sigmoid(np.dot(X[i], V) - gamma)     # (1, q)
            pred_y = sigmoid(np.dot(b, W) - theta)  # (1, L)
            Ei = 1/2 * sum((pred_y - y[i]) ** 2)
            sumE += Ei
            if trainNum > maxTrainNum:
                flag = 0
                break
            trainNum += 1
            # 求g和e
            g = pred_y * (1 - pred_y) * (y[i] - pred_y)  # (1, L)
            e = b * (1 - b) * ((np.dot(W, g.T)).T)   # w*g.T = (q,1) .T =(1,q)
            # 更新
            W += learn_rate * np.dot(b.T, g)
            theta -= learn_rate * g
            V += learn_rate * np.dot(X[i].reshape((n, 1)), e)  # (n,q)
            gamma -= learn_rate * e
        if sumE/m < error:
            flag = 0
            break
    # # 输出训练结果参数
    print('标准误差逆传播法训练次数: ' + str(trainNum))
    print(sumE/m)
    # print('V:', V)
    # print('W:', W)
    # print('gamma:', gamma)
    # print('theta:', theta)
    return V, gamma, W, theta

def accumuBP(NumHiddenLayer):
    '''
    累计BP:对累积误差最小化,它是在读取整个训练数据集之后,才对参数进行更新
    :param hiddenLayer: 隐层数目
    '''
    # 初始化参数
    X, y = loadDataSet()  # (17,8),(17, 1) 每一条数据只有一个类别
    m, n = X.shape
    L = y.shape[1]

    V = random.rand(n, NumHiddenLayer)  # (n, q)等价于random.random((X.shape[1], NumHiddenLayer))
    gamma = random.rand(m, NumHiddenLayer)  #
    W = random.rand(NumHiddenLayer, L)  # (q, L)
    theta = random.rand(m, L)
    # 设置初始化变量
    learn_rate = 0.1  # 学习率
    error = 0.001  # 最大允许误差
    maxTrainNum = 1000000  # 最大学习次数
    trainNum = 0  # 训练次数
    E = 0
    flag = 1
    while flag:
        b = sigmoid(np.dot(X, V) - gamma)  # (m, q)
        pred_y = sigmoid(np.dot(b, W) - theta)  # (17, L)
        E = 1/m * sum(1 / 2 * sum((pred_y - y) ** 2))
        if E < error or trainNum > maxTrainNum:
            flag = 0
            break
        trainNum += 1
        # 求g和e
        g = pred_y * (1 - pred_y) * (y - pred_y)  # (17, L)
        e = b * (1 - b) * ((np.dot(W, g.T)).T)  # w*g.T = (q,1) .T =(17,q)

        # 更新
        W += learn_rate * np.dot(b.T, g)
        theta -= learn_rate * g
        V += learn_rate * np.dot(X.T, e)  # (n,q)
        gamma -= learn_rate * e
    # # 输出训练结果参数
    print('累计误差逆传播法训练次数: ' + str(trainNum))
    # print('V:', V)
    # print('W:', W)
    # print('gamma:', gamma)
    # print('theta:', theta)
    print(E)
    return V, gamma, W, theta

def predict(inputX, V, gamma, W, theta):
    b = sigmoid(np.dot(inputX, V) - gamma)  # (m, q)
    pred_y = sigmoid(np.dot(b, W) - theta)
    predList = []
    for i in pred_y:
        if i > 0.5:
            predList.append(1)
        else:
            predList.append(0)
    return predList

if __name__ == '__main__':
    X, y = loadDataSet()

    V, gamma, W, theta = standardBP(5)
    predList = predict(X, V, gamma, W, theta)
    score = accuracy_score(predList, y)
    print(predList, '\t', score)

    V, gamma, W, theta = accumuBP(5)
    predList = predict(X, V, gamma, W, theta)
    score = accuracy_score(predList, y)
    print(predList, '\t', score)

结果

  • 比较:
    • 标准BP和累积BP算法,类似于随机梯度下降和标准梯度下降
    • 要想达到同样的累积误差极小值,标准BP需要迭代更多的次数
    • 累积BP算法,它在读取整个训练集D一遍后,即进行了一轮(one epoch)学习之后,才对参数进行更新,参数更新的频率低得多。但是,在很多任务中,累积误差在下降到一定程度后,进一步下降会非常缓慢。这时标准BP算法能更快地获得较好的解,尤其是训练集D非常大的时候。

代码参考:
https://blog.csdn.net/weixin_41056428/article/details/84498623
https://blog.csdn.net/qdbszsj/article/details/79110888

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值