习题5.5: 试编程实现标准BP算法和累积BP算法,在西瓜数据集3.0上分别用这两个算法训练一个单隐层网络,并进行比较
算法的主要思想来自周志华《机器学习》上讲BP算法的部分,实现了书上介绍的标准BP算法和累积BP算法,对于西瓜数据集3.0,已经把文字部分的取值变为离散的数字了
如果要求解亦或问题,把下面的代码注释取消即可
x = np.mat( '1,1,2,2;\
1,2,1,2\
').T
x = np.array(x)
y=np.mat('0,1,1,0')
y = np.array(y).T
后来通过numpy的矩阵操作,使得代码量大大简化,并且运行的时间也比这一版的版本快不少(戳这里查看高级版本)。
#!/usr/bin/python
#-*- coding:utf-8 -*-
############################
#File Name: bp-watermelon3.py
#Author: No One
#E-mail: 1130395634@qq.com
#Created Time: 2017-02-23 13:30:35
############################
import numpy as np
import math
x = np.mat( '2,3,3,2,1,2,3,3,3,2,1,1,2,1,3,1,2;\
1,1,1,1,1,2,2,2,2,3,3,1,2,2,2,1,1;\
2,3,2,3,2,2,2,2,3,1,1,2,2,3,2,2,3;\
3,3,3,3,3,3,2,3,2,3,1,1,2,2,3,1,2;\
1,1,1,1,1,2,2,2,2,3,3,3,1,1,2,3,2;\
1,1,1,1,1,2,2,1,1,2,1,2,1,1,2,1,1;\
0.697,0.774,0.634,0.668,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719;\
0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103\
').T
x = np.array(x)
y = np.mat('1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0')
y = np.array(y).T
'''
x = np.mat( '1,1,2,2;\
1,2,1,2\
').T
x = np.array(x)
y=np.mat('0,1,1,0')
y = np.array(y).T
'''
xrow, xcol = x.shape
yrow, ycol = y.shape
print 'x: ', x.shape, x
print 'y: ', y.shape, y
class BP:
def __init__(self, n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value):
self.n_input = n_input
self.n_hidden_layer = n_hidden_layer
self.n_output = n_output
self.learn_rate = learn_rate
self.error = error
self.n_max_train = n_max_train
self.v = np.random.random((self.n_input, self.n_hidden_layer))
self.w = np.random.random((self.n_hidden_layer, self.n_output))
self.theta0 = np.random.random(self.n_hidden_layer)
self.theta1 = np.random.random(self.n_output)
self.b = []
self.yo = []
self.x = 0
self.y = 0
self.lossAll = []
self.lossAverage = 0
self.nRight = 0
self.value = value
def printParam(self):
print 'printParam'
print '---------------'
print ' v: ', self.v
print ' w: ', self.w
print 'theta0: ', self.theta0
print 'theta1: ', self.theta1
print '---------------'
def init(self, x, y):
#print 'init'
nx = len(x)
ny = len(y)
self.x = x
self.y = y
self.b = []
self.yo = []
for k in range(nx):
tmp = []
for h in range(self.n_hidden_layer):
tmp.append(0)
self.b.append(tmp)
tmp = []
for j in range(self.n_output):
tmp.append(0)
self.yo.append(tmp)
def printResult(self):
print 'printResult'
self.calculateLossAll()
print 'lossAll: ', self.lossAll
print 'lossAverage: ', self.lossAverage
self.nRight = 0
for k in range(len(self.x)):
print self.y[k], '----', self.yo[k]
self.nRight += 1
for j in range(self.n_output):
if(self.yo[k][j] > self.value[j][0] and self.y[k][j] != self.value[j][2]):
self.nRight -= 1
break
if(self.yo[k][j] < self.value[j][0] and self.y[k][j] != self.value[j][1]):
self.nRight -= 1
break
print 'right rate: %d/%d'%(self.nRight, len(self.x))
def printProgress(self):
print 'yo: ', self.yo
def calculateLoss(self, y, yo):
#print 'calculateLoss'
loss = 0
for j in range(self.n_output):
loss += (y[j] - yo[j])**2
return loss
def calculateLossAll(self):
self.lossAll = []
for k in range(len(self.x)):
loss = self.calculateLoss(self.y[k], self.yo[k])
self.lossAll.append(loss)
self.lossAverage = sum(self.lossAll) / len(self.x)
def calculateOutput(self, x, k):
#print 'calculateOutput'
for h in range(self.n_hidden_layer):
tmp = 0
for i in range(self.n_input):
tmp += self.v[i][h] * x[i]
self.b[k][h] = sigmoid(tmp - self.theta0[h])
for j in range(self.n_output):
tmp = 0
for h in range(self.n_hidden_layer):
tmp += self.w[h][j] * self.b[k][h]
self.yo[k][j] = sigmoid(tmp - self.theta1[j])
#print 'yo of x[k]', self.yo[k]
#print ' b of x[k]', self.b[k]
#print ' b:', self.b
#print 'yo:', self.yo
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-1.0 * x))
class BPStandard(BP):
'''
标准bp算法就是每计算一个训练例就更新一次参数
'''
def updateParam(self, k):
#print 'updateParam: ', k
g = []
#print ' y: ', self.y
#print 'yo: ', self.yo
#print ' b: ', self.b
for j in range(self.n_output):
tmp = self.yo[k][j] * (1 - self.yo[k][j]) * (self.y[k][j] - self.yo[k][j])
g.append(tmp)
e = []
for h in range(self.n_hidden_layer):
tmp = 0
for j in range(self.n_output):
tmp += self.b[k][h] * (1.0 - self.b[k][h]) * self.w[h][j] * g[j]
e.append(tmp)
#print ' g: ', g
#print ' e: ', e
for h in range(self.n_hidden_layer):
for j in range(self.n_output):
self.w[h][j] += self.learn_rate * g[j] * self.b[k][h]
for j in range(self.n_output):
self.theta1[j] -= self.learn_rate * g[j]
for i in range(self.n_input):
for h in range(self.n_hidden_layer):
self.v[i][h] += self.learn_rate * e[h] * self.x[k][i]
for h in range(self.n_hidden_layer):
self.theta0[h] -= self.learn_rate * e[h]
def train(self, x, y):
print 'train neural networks'
self.init(x, y)
self.printParam()
tag = 0
loss1 = 0
print 'train begin:'
n_train = 0
nr = 0
while 1:
for k in range(len(x)):
n_train += 1
self.calculateOutput(x[k], k)
#loss = self.calculateLoss(y[k], self.yo[k])
self.calculateLossAll()
loss = self.lossAverage
#print 'k, y, yo, loss', k, y[k], self.yo[k], loss
if abs(loss1 - loss) < self.error:
nr += 1
if nr >= 100: # 连续100次达到目标才结束
break
else:
nr = 0
self.updateParam(k)
if n_train % 10000 == 0:
for k in range(len(x)):
self.calculateOutput(x[k], k)
self.printProgress()
if n_train > self.n_max_train or nr >= 100:
break
print 'train end'
self.printParam()
self.printResult()
print 'train count: ', n_train
class BPAll(BP):
def updateParam(self):
#print 'updateParam: ', k
g = []
#print ' y: ', self.y
#print 'yo: ', self.yo
#print ' b: ', self.b
for k in range(len(self.x)):
gk = []
for j in range(self.n_output):
tmp = self.yo[k][j] * (1 - self.yo[k][j]) * (self.y[k][j] - self.yo[k][j])
gk.append(tmp)
g.append(gk)
e = []
for k in range(len(self.x)):
ek = []
for h in range(self.n_hidden_layer):
tmp = 0
for j in range(self.n_output):
tmp += self.b[k][h] * (1.0 - self.b[k][h]) * self.w[h][j] * g[k][j]
ek.append(tmp)
e.append(ek)
#print ' g: ', g
#print ' e: ', e
for h in range(self.n_hidden_layer):
for j in range(self.n_output):
for k in range(len(self.x)):
self.w[h][j] += self.learn_rate * g[k][j] * self.b[k][h]
for j in range(self.n_output):
for k in range(len(self.x)):
self.theta1[j] -= self.learn_rate * g[k][j]
for i in range(self.n_input):
for h in range(self.n_hidden_layer):
for k in range(len(self.x)):
self.v[i][h] += self.learn_rate * e[k][h] * self.x[k][i]
for h in range(self.n_hidden_layer):
for k in range(len(self.x)):
self.theta0[h] -= self.learn_rate * e[k][h]
def train(self, x, y):
print 'train neural networks'
self.init(x, y)
tag = 0
loss1 = 0
print 'train begin:'
n_train = 0
self.printParam()
nr = 0
while 1:
n_train += 1
for k in range(len(x)):
self.calculateOutput(x[k], k)
self.calculateLossAll()
loss = self.lossAverage
if abs(loss - loss1) < self.error:
nr += 1
# 连续100次达到目标才结束
if(nr >= 100):
break;
else:
nr = 0
self.updateParam()
if n_train % 10000 == 0:
self.printProgress()
print 'train end'
self.printParam()
self.printResult()
print 'train count: ', n_train
if __name__ == '__main__':
# 参数分别是 属性数量,隐层神经元数量,输出值数量,学习率,误差
# 最大迭代次数 以及 对应每个输出的取值(用于计算正确率)
n_input = xcol
n_hidden_layer = 10
n_output = ycol
learn_rate = 0.1
error = 0.005
n_max_train = 1000000
value = [[0.5, 0, 1]]
bps = BPStandard(n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value)
bpa = BPAll(n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value)
bpa.train(x, y)
#bps.train(x, y)
算法输出:
train end
printParam # 学习到的参数
---------------
v: [[ 0.52727992 0.53390963 0.92160647 3.67952132 0.16630974 1.6822452
0.93664275 0.24462431 0.93997841 0.53243003]
[ 0.24240478 0.63022721 0.6001551 -4.62150461 0.03211749 -0.57931296
0.4101665 0.14450949 0.95041148 0.69107199]
[ 0.67834334 0.07274142 0.27501118 -3.00794936 0.81412982 -4.0613038
0.90506147 0.30416225 0.02774325 0.64938027]
[ 0.29030722 0.12477934 0.61165559 3.46040825 0.70166332 1.243593
0.1714465 0.8168826 0.77291002 -0.00569992]
[ 0.14111054 0.30277532 -0.11134744 -1.95607166 0.66018174 -0.14093981
0.79980225 -0.15311372 0.29560204 -0.0566586 ]
[ 0.0875982 0.12740192 0.66973479 3.95411433 0.09406343 2.06225472
0.51891833 0.78969425 0.48966896 0.94956282]
[ 0.55161796 0.54138592 0.1186167 -1.28220958 0.15578557 -2.70826766
0.5800524 0.9019849 0.22191171 0.51698389]
[ 0.5127042 0.77296113 0.91880206 0.21883632 0.10012541 2.64687966
0.95227393 0.17313574 0.5399895 0.93380628]]
w: [[-0.42702855]
[-0.28844243]
[-0.13435568]
[ 8.49719176]
[-0.15660901]
[-7.16285254]
[-0.68307245]
[ 0.1653864 ]
[-0.80135337]
[-0.2704846 ]]
theta0: [ 0.82961074 0.1514226 0.85650088 1.73185284 0.04683857 2.19624902
0.39374242 0.38117421 0.11448397 0.36201341]
theta1: [ 1.8630305]
---------------
printResult # 学习结果,包括误差和输出
lossAll: [0.00079828078092570589, 0.00031748657549017826, 0.0066439549574497199, 0.00045874897478553612, 0.00066582058549037061, 0.013766194221762165, 0.013431130054205837, 0.0045185364810388338, 0.00033557823373299748, 3.4455951467673449e-05, 0.00011105276760085924, 0.00020955014960734856, 0.0026002742727130983, 0.00014025779841836633, 0.039653612186256237, 0.00015206485095047901, 0.0010814531659795953]
lossAverage: 0.00499520305929
[1] ---- [0.97174613688491951]
[1] ---- [0.982181847023606]
[1] ---- [0.9184895408094782]
[1] ---- [0.97858157394238465]
[1] ---- [0.97419650051852713]
[1] ---- [0.88267057393065385]
[1] ---- [0.88410724762002657]
[1] ---- [0.93277993989113939]
[0] ---- [0.018318794549123517]
[0] ---- [0.0058699192045268776]
[0] ---- [0.010538157694818352]
[0] ---- [0.014475847111908462]
[0] ---- [0.050992884530227336]
[0] ---- [0.011843048527231758]
[0] ---- [0.19913214754593553]
[0] ---- [0.012331457778806162]
[0] ---- [0.032885455234489235]
right rate: 17/17
train count: 4914
ss