一、感知机
算法实现
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Author : Cabbage
# @project : ML
# @FileName: preceptron.py
# @Blog : https://blog.csdn.net/lzbmc
'''
train_binary.csv是MNIST数据集将十分类的数据改为二分类的数据。
参考博客:https://blog.csdn.net/wds2006sdo/article/details/51923546(数据下载)
理论参考:统计学习方法(李航)
感知机是二分类的线性分类模型,属于判别模型
'''
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
def loadDataSet(filename):
raw_data = pd.read_csv(filename)
xArr = raw_data.values[:, 1:]
labels = raw_data.values[:, 0]
x0 = np.ones(xArr.shape[0])
xArr = np.insert(xArr, 0, values=x0, axis=1) # 第0个位置axis=1按列添加x0
return xArr, labels
def sign(z):
if z >= 0:
return 1
else:
return 0
# 随机梯度下降
def train(x, y):
alpha = 0.01
m, n = x.shape # m条数据,n个特征
weights = np.ones(n) # (1,n)
for i in range(m):
z = sum(weights * x[i])
yi = 2 * y[i] - 1 # 数据集中的label是0和1,《统计学习方法》中y是1和-1,进行转换
if yi * z <= 0:
weights = weights + alpha * yi * x[i]
return weights
# 改进的随机梯度下降
def stocGradDescent(x, y, numIter=150): # 增加参数迭代次数
m, n = x.shape # m条数据,n个特征
weights = np.ones(n) # (1,n)
for iter in range(numIter):
dataIndex = list(range(m)) # python3要加list,因为python3中range不返回数组对象,而是返回range对象
for i in range(m):
randIndex = int(np.random.uniform(0, len(dataIndex))) # len(dataIndex)是变化的
alpha = 4 / (1 + iter + i) + 0.001
z = sum(weights * x[i])
yi = 2 * y[i] - 1 # 数据集中的label是0和1,《统计学习方法》中y是1和-1,进行转换
if yi * z <= 0:
weights = weights + alpha * yi * x[i]
del(dataIndex[randIndex])
return weights # 数组
def predict(inputX, weights):
predList = []
for xTest in inputX:
z = sum(weights * xTest)
pred = sign(z)
predList.append(pred)
return predList
if __name__ == '__main__':
xArr, labels = loadDataSet('train_binary.csv')
x_train, x_test, y_train, y_test = train_test_split(xArr, labels, test_size=0.33, random_state=0)
# 随机梯度下降
weights = train(x_train, y_train)
# print(weights)
test_pred = predict(x_test, weights)
score = accuracy_score(test_pred, y_test)
print(score)
# 改进的随机梯度下降
weights = stocGradDescent(x_train, y_train)
test_pred = predict(x_test, weights)
score = accuracy_score(test_pred, y_test)
print(score)
sklearn实现
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Perceptron
# 读取数据
raw_data = pd.read_csv('./train_binary.csv')
data = raw_data.values
features = data[:, 1:]
labels = data[:, 0]
# 划分数据
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.33,
random_state=0)
clf = Perceptron(max_iter=2000) # 定义
clf.fit(train_features, train_labels) # 训练
test_predict = clf.predict(test_features) # 预测
score = accuracy_score(test_labels, test_predict) # 准确率
print(score) # 0.986002886002886
# 得到训练结果,权重矩阵
weight = clf.coef_
# 超平面的截距
print(clf.intercept_)
二、误差逆传播算法(5.5)
- 包括标准BP和累积BP算法
- 课后题5.5 试编程实现标准BP算法和累积BP算法,在西瓜数据集3.0上分别用这两个算法训练一下单隐层网络,进行比较。
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Author : Cabbage
# @Time1 : 2019/9/7 20:05
# @project : ML
# @FileName: BP.py
# @Last modified time:
# @Blog : https://blog.csdn.net/lzbmc
import numpy as np
from numpy import *
from sklearn.metrics import accuracy_score
def loadDataSet():
X = np.mat('2,3,3,2,1,2,3,3,3,2,1,1,2,1,3,1,2;\
1,1,1,1,1,2,2,2,2,3,3,1,2,2,2,1,1;\
2,3,2,3,2,2,2,2,3,1,1,2,2,3,2,2,3;\
3,3,3,3,3,3,2,3,2,3,1,1,2,2,3,1,2;\
1,1,1,1,1,2,2,2,2,3,3,3,1,1,2,3,2;\
1,1,1,1,1,2,2,1,1,2,1,2,1,1,2,1,1;\
0.697,0.774,0.634,0.668,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719;\
0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103\
').T
X = np.array(X)# (17,8)
Y = np.mat('1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0').T # (17, 1)
Y = np.array(Y)
return X, Y
def sigmoid(inX):
return 1.0 / (1 + exp(-inX))
def standardBP(NumHiddenLayer):
'''
标准BP:每次只针对单个样例更新参数
hiddenLayer:隐藏层神经元个数
单隐层网络,输出层神经元个数为1
'''
# 初始化参数
X, y = loadDataSet() # (17,8),(17, 1) 每一条数据只有一个类别
m, n = X.shape
L = y.shape[1]
V = random.rand(n, NumHiddenLayer) # (n, q)等价于random.random((X.shape[1], NumHiddenLayer))
gamma = random.rand(1, NumHiddenLayer) #
W = random.rand(NumHiddenLayer, L) # (q, L)
theta = random.rand(1, L)
# 设置初始化变量
learn_rate = 0.1 # 学习率
error = 0.001 # 最大允许误差
maxTrainNum = 1000000 # 最大学习次数
trainNum = 0 # 训练次数
flag = 1
while flag:
sumE = 0
for i in range(m):
b = sigmoid(np.dot(X[i], V) - gamma) # (1, q)
pred_y = sigmoid(np.dot(b, W) - theta) # (1, L)
Ei = 1/2 * sum((pred_y - y[i]) ** 2)
sumE += Ei
if trainNum > maxTrainNum:
flag = 0
break
trainNum += 1
# 求g和e
g = pred_y * (1 - pred_y) * (y[i] - pred_y) # (1, L)
e = b * (1 - b) * ((np.dot(W, g.T)).T) # w*g.T = (q,1) .T =(1,q)
# 更新
W += learn_rate * np.dot(b.T, g)
theta -= learn_rate * g
V += learn_rate * np.dot(X[i].reshape((n, 1)), e) # (n,q)
gamma -= learn_rate * e
if sumE/m < error:
flag = 0
break
# # 输出训练结果参数
print('标准误差逆传播法训练次数: ' + str(trainNum))
print(sumE/m)
# print('V:', V)
# print('W:', W)
# print('gamma:', gamma)
# print('theta:', theta)
return V, gamma, W, theta
def accumuBP(NumHiddenLayer):
'''
累计BP:对累积误差最小化,它是在读取整个训练数据集之后,才对参数进行更新
:param hiddenLayer: 隐层数目
'''
# 初始化参数
X, y = loadDataSet() # (17,8),(17, 1) 每一条数据只有一个类别
m, n = X.shape
L = y.shape[1]
V = random.rand(n, NumHiddenLayer) # (n, q)等价于random.random((X.shape[1], NumHiddenLayer))
gamma = random.rand(m, NumHiddenLayer) #
W = random.rand(NumHiddenLayer, L) # (q, L)
theta = random.rand(m, L)
# 设置初始化变量
learn_rate = 0.1 # 学习率
error = 0.001 # 最大允许误差
maxTrainNum = 1000000 # 最大学习次数
trainNum = 0 # 训练次数
E = 0
flag = 1
while flag:
b = sigmoid(np.dot(X, V) - gamma) # (m, q)
pred_y = sigmoid(np.dot(b, W) - theta) # (17, L)
E = 1/m * sum(1 / 2 * sum((pred_y - y) ** 2))
if E < error or trainNum > maxTrainNum:
flag = 0
break
trainNum += 1
# 求g和e
g = pred_y * (1 - pred_y) * (y - pred_y) # (17, L)
e = b * (1 - b) * ((np.dot(W, g.T)).T) # w*g.T = (q,1) .T =(17,q)
# 更新
W += learn_rate * np.dot(b.T, g)
theta -= learn_rate * g
V += learn_rate * np.dot(X.T, e) # (n,q)
gamma -= learn_rate * e
# # 输出训练结果参数
print('累计误差逆传播法训练次数: ' + str(trainNum))
# print('V:', V)
# print('W:', W)
# print('gamma:', gamma)
# print('theta:', theta)
print(E)
return V, gamma, W, theta
def predict(inputX, V, gamma, W, theta):
b = sigmoid(np.dot(inputX, V) - gamma) # (m, q)
pred_y = sigmoid(np.dot(b, W) - theta)
predList = []
for i in pred_y:
if i > 0.5:
predList.append(1)
else:
predList.append(0)
return predList
if __name__ == '__main__':
X, y = loadDataSet()
V, gamma, W, theta = standardBP(5)
predList = predict(X, V, gamma, W, theta)
score = accuracy_score(predList, y)
print(predList, '\t', score)
V, gamma, W, theta = accumuBP(5)
predList = predict(X, V, gamma, W, theta)
score = accuracy_score(predList, y)
print(predList, '\t', score)
- 比较:
- 标准BP和累积BP算法,类似于随机梯度下降和标准梯度下降
- 要想达到同样的累积误差极小值,标准BP需要迭代更多的次数
- 累积BP算法,它在读取整个训练集D一遍后,即进行了一轮(one epoch)学习之后,才对参数进行更新,参数更新的频率低得多。但是,在很多任务中,累积误差在下降到一定程度后,进一步下降会非常缓慢。这时标准BP算法能更快地获得较好的解,尤其是训练集D非常大的时候。
代码参考:
https://blog.csdn.net/weixin_41056428/article/details/84498623
https://blog.csdn.net/qdbszsj/article/details/79110888