Python 实现 周志华 《机器学习》 BP算法

2 篇文章 1 订阅
2 篇文章 0 订阅

习题5.5: 试编程实现标准BP算法和累积BP算法,在西瓜数据集3.0上分别用这两个算法训练一个单隐层网络,并进行比较

算法的主要思想来自周志华《机器学习》上讲BP算法的部分,实现了书上介绍的标准BP算法和累积BP算法,对于西瓜数据集3.0,已经把文字部分的取值变为离散的数字了

如果要求解亦或问题,把下面的代码注释取消即可

x = np.mat( '1,1,2,2;\
             1,2,1,2\
             ').T
x = np.array(x)
y=np.mat('0,1,1,0')
y = np.array(y).T

后来通过numpy的矩阵操作,使得代码量大大简化,并且运行的时间也比这一版的版本快不少(戳这里查看高级版本)。

#!/usr/bin/python  
#-*- coding:utf-8 -*-  
############################  
#File Name: bp-watermelon3.py
#Author: No One  
#E-mail: 1130395634@qq.com  
#Created Time: 2017-02-23 13:30:35
############################

import numpy as np
import math
x = np.mat( '2,3,3,2,1,2,3,3,3,2,1,1,2,1,3,1,2;\
            1,1,1,1,1,2,2,2,2,3,3,1,2,2,2,1,1;\
            2,3,2,3,2,2,2,2,3,1,1,2,2,3,2,2,3;\
            3,3,3,3,3,3,2,3,2,3,1,1,2,2,3,1,2;\
            1,1,1,1,1,2,2,2,2,3,3,3,1,1,2,3,2;\
            1,1,1,1,1,2,2,1,1,2,1,2,1,1,2,1,1;\
            0.697,0.774,0.634,0.668,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719;\
            0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103\
            ').T
x = np.array(x)
y = np.mat('1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0')
y = np.array(y).T
'''
x = np.mat( '1,1,2,2;\
             1,2,1,2\
             ').T
x = np.array(x)
y=np.mat('0,1,1,0')
y = np.array(y).T
'''
xrow, xcol = x.shape
yrow, ycol = y.shape
print 'x: ', x.shape, x
print 'y: ', y.shape, y

class BP:
    def __init__(self, n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value):
        self.n_input = n_input
        self.n_hidden_layer = n_hidden_layer
        self.n_output = n_output
        self.learn_rate = learn_rate
        self.error = error
        self.n_max_train = n_max_train

        self.v = np.random.random((self.n_input, self.n_hidden_layer))
        self.w = np.random.random((self.n_hidden_layer, self.n_output))
        self.theta0 = np.random.random(self.n_hidden_layer)
        self.theta1 = np.random.random(self.n_output)
        self.b = []
        self.yo = []
        self.x = 0
        self.y = 0
        self.lossAll = []
        self.lossAverage = 0
        self.nRight = 0
        self.value = value

    def printParam(self):
        print 'printParam'
        print '---------------'
        print '     v: ', self.v
        print '     w: ', self.w
        print 'theta0: ', self.theta0
        print 'theta1: ', self.theta1
        print '---------------'

    def init(self, x, y):
        #print 'init'
        nx = len(x)
        ny = len(y)
        self.x = x
        self.y = y
        self.b = []
        self.yo = []
        for k in range(nx):
            tmp = []
            for h in range(self.n_hidden_layer):
                tmp.append(0)
            self.b.append(tmp)
            tmp = []
            for j in range(self.n_output):
                tmp.append(0)
            self.yo.append(tmp)

    def printResult(self):
        print 'printResult'
        self.calculateLossAll()
        print 'lossAll: ', self.lossAll
        print 'lossAverage: ', self.lossAverage
        self.nRight = 0
        for k in range(len(self.x)):
            print self.y[k], '----', self.yo[k]
            self.nRight += 1
            for j in range(self.n_output):
                if(self.yo[k][j] > self.value[j][0] and self.y[k][j] != self.value[j][2]):
                    self.nRight -= 1
                    break
                if(self.yo[k][j] < self.value[j][0] and self.y[k][j] != self.value[j][1]):
                    self.nRight -= 1
                    break
        print 'right rate: %d/%d'%(self.nRight, len(self.x))

    def printProgress(self):
        print 'yo: ', self.yo

    def calculateLoss(self, y, yo):
        #print 'calculateLoss'
        loss = 0
        for j in range(self.n_output):
            loss += (y[j] - yo[j])**2
        return loss

    def calculateLossAll(self):
        self.lossAll = []
        for k in range(len(self.x)):
            loss = self.calculateLoss(self.y[k], self.yo[k])
            self.lossAll.append(loss)

        self.lossAverage = sum(self.lossAll) / len(self.x)

    def calculateOutput(self, x, k):
        #print 'calculateOutput'
        for h in range(self.n_hidden_layer):
            tmp = 0
            for i in range(self.n_input):
                tmp += self.v[i][h] * x[i]
            self.b[k][h] = sigmoid(tmp - self.theta0[h])

        for j in range(self.n_output):
            tmp = 0
            for h in range(self.n_hidden_layer):
                tmp += self.w[h][j] * self.b[k][h]
            self.yo[k][j] = sigmoid(tmp - self.theta1[j])
        #print 'yo of x[k]', self.yo[k]
        #print ' b of x[k]', self.b[k]

        #print ' b:', self.b
        #print 'yo:', self.yo

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))

class BPStandard(BP):
    '''
        标准bp算法就是每计算一个训练例就更新一次参数
    '''

    def updateParam(self, k):
        #print 'updateParam: ', k
        g = []
        #print ' y: ', self.y
        #print 'yo: ', self.yo
        #print ' b: ', self.b
        for j in range(self.n_output):
            tmp = self.yo[k][j] * (1 - self.yo[k][j]) * (self.y[k][j] - self.yo[k][j])
            g.append(tmp)
        e = []
        for h in range(self.n_hidden_layer):
            tmp = 0
            for j in range(self.n_output):
                tmp += self.b[k][h] * (1.0 - self.b[k][h]) * self.w[h][j] * g[j]
            e.append(tmp)
        #print ' g: ', g
        #print ' e: ', e

        for h in range(self.n_hidden_layer):
            for j in range(self.n_output):
                self.w[h][j] += self.learn_rate * g[j] * self.b[k][h]
        for j in range(self.n_output):
            self.theta1[j] -= self.learn_rate * g[j]
        for i in range(self.n_input):
            for h in range(self.n_hidden_layer):
                self.v[i][h] += self.learn_rate * e[h] * self.x[k][i]
        for h in range(self.n_hidden_layer):
            self.theta0[h] -= self.learn_rate * e[h]


    def train(self, x, y):
        print 'train neural networks'
        self.init(x, y)
        self.printParam()
        tag = 0
        loss1 = 0
        print 'train begin:'
        n_train = 0
        nr = 0
        while 1:
            for k in range(len(x)):
                n_train += 1
                self.calculateOutput(x[k], k)
                #loss = self.calculateLoss(y[k], self.yo[k])
                self.calculateLossAll()
                loss = self.lossAverage
                #print 'k, y, yo, loss', k, y[k], self.yo[k], loss
                if abs(loss1 - loss) < self.error:
                    nr += 1
                    if nr >= 100: # 连续100次达到目标才结束
                        break
                else:
                    nr = 0
                    self.updateParam(k)

                if n_train % 10000 == 0: 
                    for k in range(len(x)):
                        self.calculateOutput(x[k], k)
                    self.printProgress() 

            if n_train > self.n_max_train or nr >= 100:
                break

        print 'train end'
        self.printParam()
        self.printResult()
        print 'train count: ', n_train

class BPAll(BP):
    def updateParam(self): 
        #print 'updateParam: ', k
        g = []
        #print ' y: ', self.y
        #print 'yo: ', self.yo
        #print ' b: ', self.b
        for k in range(len(self.x)):
            gk = []
            for j in range(self.n_output):
                tmp = self.yo[k][j] * (1 - self.yo[k][j]) * (self.y[k][j] - self.yo[k][j])
                gk.append(tmp)
            g.append(gk)

        e = []
        for k in range(len(self.x)):
            ek = []
            for h in range(self.n_hidden_layer):
                tmp = 0
                for j in range(self.n_output):
                    tmp += self.b[k][h] * (1.0 - self.b[k][h]) * self.w[h][j] * g[k][j]
                ek.append(tmp)
            e.append(ek)

        #print ' g: ', g
        #print ' e: ', e

        for h in range(self.n_hidden_layer):
            for j in range(self.n_output):
                for k in range(len(self.x)):
                    self.w[h][j] += self.learn_rate * g[k][j] * self.b[k][h]
        for j in range(self.n_output):
            for k in range(len(self.x)):
                self.theta1[j] -= self.learn_rate * g[k][j]

        for i in range(self.n_input):
            for h in range(self.n_hidden_layer):
                for k in range(len(self.x)):
                    self.v[i][h] += self.learn_rate * e[k][h] * self.x[k][i]
        for h in range(self.n_hidden_layer):
            for k in range(len(self.x)):
                self.theta0[h] -= self.learn_rate * e[k][h]



    def train(self, x, y):
        print 'train neural networks'
        self.init(x, y)
        tag = 0
        loss1 = 0
        print 'train begin:'
        n_train = 0
        self.printParam()
        nr = 0
        while 1:
            n_train += 1

            for k in range(len(x)):
               self.calculateOutput(x[k], k)

            self.calculateLossAll()
            loss = self.lossAverage
            if abs(loss - loss1) < self.error:
                nr += 1
                # 连续100次达到目标才结束
                if(nr >= 100):
                    break;
            else:
                nr = 0
                self.updateParam()
            if n_train % 10000 == 0:
                self.printProgress()
        print 'train end'
        self.printParam()
        self.printResult()
        print 'train count: ', n_train

if __name__ == '__main__':
    # 参数分别是 属性数量,隐层神经元数量,输出值数量,学习率,误差
    # 最大迭代次数 以及 对应每个输出的取值(用于计算正确率)
    n_input = xcol
    n_hidden_layer = 10 
    n_output = ycol
    learn_rate = 0.1
    error = 0.005
    n_max_train = 1000000
    value = [[0.5, 0, 1]]

    bps = BPStandard(n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value)
    bpa = BPAll(n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value)
    bpa.train(x, y)
    #bps.train(x, y)

算法输出:

train end
printParam # 学习到的参数
---------------
     v:  [[ 0.52727992  0.53390963  0.92160647  3.67952132  0.16630974  1.6822452
   0.93664275  0.24462431  0.93997841  0.53243003]
 [ 0.24240478  0.63022721  0.6001551  -4.62150461  0.03211749 -0.57931296
   0.4101665   0.14450949  0.95041148  0.69107199]
 [ 0.67834334  0.07274142  0.27501118 -3.00794936  0.81412982 -4.0613038
   0.90506147  0.30416225  0.02774325  0.64938027]
 [ 0.29030722  0.12477934  0.61165559  3.46040825  0.70166332  1.243593
   0.1714465   0.8168826   0.77291002 -0.00569992]
 [ 0.14111054  0.30277532 -0.11134744 -1.95607166  0.66018174 -0.14093981
   0.79980225 -0.15311372  0.29560204 -0.0566586 ]
 [ 0.0875982   0.12740192  0.66973479  3.95411433  0.09406343  2.06225472
   0.51891833  0.78969425  0.48966896  0.94956282]
 [ 0.55161796  0.54138592  0.1186167  -1.28220958  0.15578557 -2.70826766
   0.5800524   0.9019849   0.22191171  0.51698389]
 [ 0.5127042   0.77296113  0.91880206  0.21883632  0.10012541  2.64687966
   0.95227393  0.17313574  0.5399895   0.93380628]]
     w:  [[-0.42702855]
 [-0.28844243]
 [-0.13435568]
 [ 8.49719176]
 [-0.15660901]
 [-7.16285254]
 [-0.68307245]
 [ 0.1653864 ]
 [-0.80135337]
 [-0.2704846 ]]
theta0:  [ 0.82961074  0.1514226   0.85650088  1.73185284  0.04683857  2.19624902
  0.39374242  0.38117421  0.11448397  0.36201341]
theta1:  [ 1.8630305]
---------------
printResult # 学习结果,包括误差和输出
lossAll:  [0.00079828078092570589, 0.00031748657549017826, 0.0066439549574497199, 0.00045874897478553612, 0.00066582058549037061, 0.013766194221762165, 0.013431130054205837, 0.0045185364810388338, 0.00033557823373299748, 3.4455951467673449e-05, 0.00011105276760085924, 0.00020955014960734856, 0.0026002742727130983, 0.00014025779841836633, 0.039653612186256237, 0.00015206485095047901, 0.0010814531659795953]
lossAverage:  0.00499520305929
[1] ---- [0.97174613688491951]
[1] ---- [0.982181847023606]
[1] ---- [0.9184895408094782]
[1] ---- [0.97858157394238465]
[1] ---- [0.97419650051852713]
[1] ---- [0.88267057393065385]
[1] ---- [0.88410724762002657]
[1] ---- [0.93277993989113939]
[0] ---- [0.018318794549123517]
[0] ---- [0.0058699192045268776]
[0] ---- [0.010538157694818352]
[0] ---- [0.014475847111908462]
[0] ---- [0.050992884530227336]
[0] ---- [0.011843048527231758]
[0] ---- [0.19913214754593553]
[0] ---- [0.012331457778806162]
[0] ---- [0.032885455234489235]
right rate: 17/17
train count:  4914

ss

作业一(Matlab) 假设x=(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20),y=( 2.94, 4.53, 5.96, 7.88, 9.02, 10.94, 12.14, 13.96, 14.74, 16.68, 17.79, 19.67, 21.20, 22.07, 23.75, 25.22, 27.17, 28.84, 29.84, 31.78).请写出拟合的直线方程,并画图(包括原数据点及拟合的直线),请打印出来。 请使用线性回归模型来拟合bodyfat数据。数据集介绍可阅读:https://www.mathworks.com/help/nnet/examples/body-fat-estimation.html 在matlab中,在命令行中输入[X,Y] = bodyfat_dataset; 即可获得一个拥有13个属性,252个样本的数据集。使用前200个样本来获得模型,并写出你所获得的模型。使用后52个样本做测,汇报你所获得的泛化误差。 编程实现对数回归,并给出教材89页上的西瓜数据集3.0上的结果。要求采用4折交叉验证法来评估结果。因为此处一共17个样本,你可以去掉最后一个样本,也可以用所有数据,然后测用5个样本。在汇报结果时,请说明你的选择。请在二维图上画出你的结果(用两种不同颜色或者形状来标注类别),同时打印出完整的代码。 作业二 采用信息增益准则,基于表4.2中编号为1、2、3、6、7、9、10、14、15、16、17的11个样本的色泽、根蒂、敲声、文理属性构建决策树。(本次作业可以用笔算,鼓励编程实现,但都需要列出主要步骤,其中log2(3)=1.585,log2(5)=2.322,log2(6)=2.585,log2(7)=2.807,log2(9)=3.17,log2(10)=3.322,log2(11)=3.459) 用表4.2中编号为4、5、8、11、12、13的样本做测集,对上题的训练数据采用预剪枝策略构建决策树,并汇报验证集精度。 用表4.2中编号为4、5、8、11、12、13的样本做测集,对题1所构建的决策树进行后剪枝,并汇报验证集精度。 作业三(Matlab) 编程实现累积BP算法,在西瓜数据集2.0上(用训练数据)训练一个单隐层网络,用验证集计算出均方误差。要自己实现,不能直接调用现成的库函数。 作业四 下载并安装libsvm,http://www.csie.ntu.edu.tw/~cjlin/libsvm/ ,在西瓜数据集3.0a上分别用线性核训练一个SVM。用正类1-6和负类9-14作为训练集,其余作为测集。C取不同的值,其它参数设为默认值。作出测正确率随C取值变化的图,C=[1 100 10000 10^6 10^8]。 换成高斯核(宽度设为1),重复上题的步骤。 作业五 以西瓜数据集2.0(见教材76页表4.1)中样本1--16为训练训练一个朴素贝叶斯分类器,对测样本17进行分类。请写出详细的计算过程。 假设x_k是一个班上学生的分数,对应的分数及其分布是 x_1=30, P1=0.5,一共有14个学生; x_2=18, P2=mu,有6个学生; x_3=20, P3=2mu,有9个学生; x_4=23, P4=0.5-3mu,有10个学生; 通过最大对数似然法求出mu的值。 作业六(Python) 1 使用PCA对Yale人脸数据集进行降维,并分别观察前20、前100个特征向量所对应的图像。请随机选取3张照片来对比效果。数据集http://vision.ucsd.edu/content/yale-face-database
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值