SVM支持向量机总结(不包括高维核函数等)

原创 2013年12月02日 17:18:26
SVM支持向量机总结

本文将会讲到:

1.从原问题到最终结果主要公式展示

2.个人编写的python代码及结果演示

------------------------------------------------------------------------------------

1.从原问题到最终结果主要公式展示









2.个人编写的python代码及结果演示

(1)训练集

trainDataSet.txt

3.542485  1.977398 -1
3.018896 2.556416   -1
7.551510 -1.580030 1
2.114999 -0.004466 -1
8.127113 1.274372 1
7.108772 -0.986906 1
8.610639 2.046708 1
2.326297 0.265213 -1
3.634009 1.730537 -1
0.341367 -0.894998 -1
3.125951 0.293251 -1
2.123252 -0.783563 -1
0.887835 -2.797792 -1
7.139979 -2.329896 1
1.696414 -1.212496 -1
8.117032 0.623493 1
8.497162 -0.266649 1
4.658191 3.507396 -1
8.197181 1.545132 1
1.208047 0.213100 -1
1.928486 -0.321870 -1
2.175808 -0.014527 -1
7.886608 0.461755 1
3.223038 -0.552392 -1
3.628502 2.190585 -1
7.407860 -0.121961 1
7.286357 0.251077 1
2.301095 -0.533988 -1
-0.232542 -0.547690 -1
3.457096 -0.082216 -1
3.023938 -0.057392 -1
8.015003 0.885325 1
8.991748 0.923154 1
7.916831 -1.781735 1
7.616862 -0.217958 1
2.450939 0.744967 -1
7.270337 -2.507834 1
1.749721 -0.961902 -1
1.803111 -0.176349 -1
8.804461 3.044301 1
1.231257 -0.568573 -1
2.074915 1.410550 -1
-0.743036 -1.736103 -1
3.536555 3.964960 -1
8.410143 0.025606 1
7.382988 -0.478764 1
6.960661 -0.245353 1
8.234460 0.701868 1
8.168618 -0.903835 1
1.534187 -0.622492 -1
9.229518 2.066088 1
7.886242 0.191813 1
2.893743 -1.643468 -1
1.870457 -1.040420 -1
5.286862 -2.358286 1
6.080573 0.418886 1
2.544314 1.714165 -1
6.016004 -3.753712 1
0.926310 -0.564359 -1
0.870296 -0.109952 -1
2.369345 1.375695 -1
1.363782 -0.254082 -1
7.279460 -0.189572 1
1.896005 0.515080 -1
8.102154 -0.603875 1
2.529893 0.662657 -1
1.963874 -0.365233 -1
8.132048 0.785914 1
8.245938 0.372366 1
6.543888 0.433164 1
-0.236713 -5.766721 -1
8.112593 0.295839 1
9.803425 1.495167 1
1.497407 -0.552916 -1
1.336267 -1.632889 -1
9.205805 -0.586480 1
1.966279 -1.840439 -1
8.398012 1.584918 1
7.239953 -1.764292 1
7.556201 0.241185 1
9.015509 0.345019 1
8.266085 -0.230977 1
8.545620 2.788799 1
9.295969 1.346332 1
2.404234 0.570278 -1
2.037772 0.021919 -1
1.727631 -0.453143 -1
1.979395 -0.050773 -1
8.092288 -1.372433 1
1.667645 0.239204 -1
9.854303 1.365116 1
7.921057 -1.327587 1
8.500757 1.492372 1
1.339746 -0.291183 -1
3.107511 0.758367 -1
2.609525 .902979 -1
3.263585 1.367898 -1
2.912122 -0.202359 -1
1.731786 0.589096 -1
2.387003 1.573131 -1

-------------------------------------------------
(2)python代码
import math,random,numpy
import matplotlib.pyplot as plt
from numpy import *
'''
read training data set
'''
def loadTrainDataSet(fileName):
    data = []
    label = []
    fileread = open(fileName)
    for line in fileread.readlines():
        words = line.strip().split('\t')
        data.append([float(words[0]),float(words[1])])
        label.append(float(words[2]))
    fileread.close()
    return data,label




'''
update alphaj2
'''
def clipAlpha(alpha,H,L):
    if alpha > H:
        alpha = H
    if alpha < L:
        alpha = L
    return alpha


'''
select some index of alphaj who is different from alphai
'''
def selectJrand(i,m):
    j=i #we want to select any J not equal to i
    while (j==i):
        j = int(random.uniform(0,m))
    return j


def selectJ(i,m):
    j = 0
    if i < m-1:
        j = i+1
    else:
        j = 0
    return j
'''
SMO Algorithm
'''
def simpleSMO_heuristicSearch(dataX,labelY,C,limit,maxIter):
    X = dataX
    Y = labelY
    b = 0
    m = len(Y)
    alphas = [0.0] * m
    itercnt = 0
    W = 0.0
    while(itercnt < maxIter):
        #print 'iter count:',itercnt,'\n'
        for i in range(m): 
            alphaPairsChanged = 0
            sumi = [0.0,0.0]
            FXi = 0
            for k in range(m):
                sumi += numpy.dot(dot(Y[k],alphas[k]),X[k])
                
            #FXi is the represention of WTXi+b
            FXi = numpy.dot(sumi,X[i]) + b
            Ei = FXi - float(Y[i])
            
            # check out the example who violates the KKT condition
            if ((alphas[i]>0 and alphas[i]<C and Y[i]*Ei !=0) or (alphas[i] ==C and Y[i]*Ei >0) or (alphas[i]==0 and Y[i]*Ei<0 )):
                j = selectJrand(i,m)
                sumj = [0.0,0.0]
                FXj = 0
                for k in range(m):
                    sumj += numpy.dot(dot(Y[k],alphas[k]),X[k])
                FXj = numpy.dot(sumj,X[j]) + b
                Ej = FXj - float(Y[j])
                alphaIold = numpy.copy(alphas[i])
                alphaJold = numpy.copy(alphas[j])
                if(Y[i]!=Y[j]):
                    L = max(0,alphas[j]-alphas[i])
                    H = min(C,C+alphas[j]-alphas[i])
                else:
                    L = max(0,alphas[j]+alphas[i]-C)
                    H = min(C,alphas[j]+alphas[i])
                if L==H:
                    #print 'L==H'
                    continue
                n = float(numpy.dot(X[i],X[i])+numpy.dot(X[j],X[j])-2*numpy.dot(X[i],X[j]))
                if (n <= 0):
                    #print 'when n<=0 no min value!'
                    continue
                alphas[j] += Y[j]*(Ei - Ej)/n
                alphas[j] = clipAlpha(alphas[j],H,L)
                if (abs(alphas[j]- alphaJold) < limit):
                    #print 'alphaj update convergence!'
                    continue
                #update alphai by some alphaj
                alphas[i] += Y[i]*Y[j]*(alphaJold - alphas[j])
                #------------------------- update b --------------------#
                b1 = b - Ei - Y[i]*(alphas[i]-alphaIold)*dot(X[i],X[i]) - Y[j]*(alphas[j]-alphaJold)*dot(X[i],X[j]) 
                b2 = b - Ej - Y[i]*(alphas[i]-alphaIold)*dot(X[i],X[j]) - Y[j]*(alphas[j]-alphaJold)*dot(X[j],X[j])
                if((alphas[i] > 0) and (alphas[i] < C)):
                    b = b1
                elif((alphas[j] > 0) and (alphas[j] < C)):
                    b = b2    
                else:
                    b = (b1+b2)/2.0
                alphaPairsChanged += 1
                #print 'changed:alpha',i,'\t','update success:',alphaPairsChanged,'\n'
                alphaPairsChanged = 0
        #----------- W(Alpha) ----------#
        sumW = 0.0       
        for p in range(m):
            #for q in range(m):
            sumW += Y[p]*Y[p]*alphas[p]*alphas[p]*dot(X[p],X[p])
        print '---------------- max W(alphas)-------------'
        print 'W(alphas)=',0.5*sumW - sum(alphas),'\n'   
        itercnt += 1
    #print 'b=',b,'\n'
    #print 'alphas = ',alphas,'\n'
    return b,alphas
        








if __name__ == '__main__':
    
    dataX = []
    labelY = []
    [dataX,labelY] = loadTrainDataSet("trainDataSet.txt")
    Xlen =len(dataX)
    Ylen = len(labelY)
    #------------ simpleSMO_heuristic search -----------#
    bias = 0
    alphass = [0.0]* Ylen
    [bias,alphass] = simpleSMO_heuristicSearch(dataX,labelY,0.1,0.0001,100)
    #----------------- line show -----------------------#
    W = [0.0,0.0]
    for i in range(Ylen):
        W += dot(labelY[i]*alphass[i],dataX[i]) 
    xx =[-2,1,5,10]
    xy =[0.0]*4
    for j in range(4):
        xy[j] = (-W[0]*xx[j]-bias)/W[1]
    plt.plot(xx,xy)
    for i in range(Ylen):
        if labelY[i] == 1.0 and alphass[i]==0:
            plt.plot(dataX[i][0],dataX[i][1],'o')
        if labelY[i] == -1.0 and alphass[i]==0:
            plt.plot(dataX[i][0],dataX[i][1],'or')
        if alphass[i]!=0:
            plt.plot(dataX[i][0],dataX[i][1],'*')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.title('trainDataSet')  
    print 'the line function is : F(x)=',-W[0]/W[1],'X ',- bias/W[1],'\n'
    plt.show()

    

(3)实验结果,给大家看看当C的值不同时的不同情况(C越大,容忍越小)

      C = 1.0 时,图像:


C = 1000.0时



C = 0.001时


C =0.1

当 C=1. 0 时,max W(alphas)变化情况,及最后函数

---------------- max W(alphas)-------------
W(alphas)= 0.145449910692 


---------------- max W(alphas)-------------
W(alphas)= 0.109034120556 


---------------- max W(alphas)-------------
W(alphas)= 0.117310017424 


---------------- max W(alphas)-------------
W(alphas)= 0.117310017424 


---------------- max W(alphas)-------------
W(alphas)= 0.208741283826 


---------------- max W(alphas)-------------
W(alphas)= 0.145774558452 


---------------- max W(alphas)-------------
W(alphas)= 0.214635282019 


---------------- max W(alphas)-------------
W(alphas)= 0.214406342912 


---------------- max W(alphas)-------------
W(alphas)= 0.215661425503 


---------------- max W(alphas)-------------
W(alphas)= 0.262514693433 


---------------- max W(alphas)-------------
W(alphas)= 0.262514693433 


---------------- max W(alphas)-------------
W(alphas)= 0.298964958248 


---------------- max W(alphas)-------------
W(alphas)= 0.298931357619 


---------------- max W(alphas)-------------
W(alphas)= 0.285197304204 


---------------- max W(alphas)-------------
W(alphas)= 0.152757945319 


---------------- max W(alphas)-------------
W(alphas)= 0.710549717934 


---------------- max W(alphas)-------------
W(alphas)= 0.710549717934 


---------------- max W(alphas)-------------
W(alphas)= 0.710770567073 


---------------- max W(alphas)-------------
W(alphas)= 0.916464185695 


---------------- max W(alphas)-------------
W(alphas)= 0.916464185695 


---------------- max W(alphas)-------------
W(alphas)= 0.916065417441 


---------------- max W(alphas)-------------
W(alphas)= 0.897747262982 


---------------- max W(alphas)-------------
W(alphas)= 0.897747262982 


---------------- max W(alphas)-------------
W(alphas)= 0.887148444406 


---------------- max W(alphas)-------------
W(alphas)= 0.894184257071 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.88167577001 


---------------- max W(alphas)-------------
W(alphas)= 0.847130393883 


---------------- max W(alphas)-------------
W(alphas)= 0.847130393883 


---------------- max W(alphas)-------------
W(alphas)= 0.847130393883 


---------------- max W(alphas)-------------
W(alphas)= 0.847130393883 


---------------- max W(alphas)-------------
W(alphas)= 0.847130393883 


---------------- max W(alphas)-------------
W(alphas)= 0.97437607653 


---------------- max W(alphas)-------------
W(alphas)= 0.97437607653 


---------------- max W(alphas)-------------
W(alphas)= 0.975068782717 


---------------- max W(alphas)-------------
W(alphas)= 0.975068782717 


---------------- max W(alphas)-------------
W(alphas)= 1.05602850898 


---------------- max W(alphas)-------------
W(alphas)= 1.05602850898 


---------------- max W(alphas)-------------
W(alphas)= 1.05602850898 


---------------- max W(alphas)-------------
W(alphas)= 1.09935581252 


---------------- max W(alphas)-------------
W(alphas)= 1.1000959207 


---------------- max W(alphas)-------------
W(alphas)= 1.1020184241 


---------------- max W(alphas)-------------
W(alphas)= 1.37252176298 


---------------- max W(alphas)-------------
W(alphas)= 1.37318113788 


---------------- max W(alphas)-------------
W(alphas)= 1.30420424719 


---------------- max W(alphas)-------------
W(alphas)= 1.27448105315 


---------------- max W(alphas)-------------
W(alphas)= 1.27448105315 


---------------- max W(alphas)-------------
W(alphas)= 1.29659804543 


---------------- max W(alphas)-------------
W(alphas)= 1.29659804543 


---------------- max W(alphas)-------------
W(alphas)= 1.45357178013 


---------------- max W(alphas)-------------
W(alphas)= 1.44283554071 


---------------- max W(alphas)-------------
W(alphas)= 1.42846910637 


---------------- max W(alphas)-------------
W(alphas)= 1.42846910637 


---------------- max W(alphas)-------------
W(alphas)= 1.42846910637 


---------------- max W(alphas)-------------
W(alphas)= 1.42846910637 


---------------- max W(alphas)-------------
W(alphas)= 1.42846910637 


---------------- max W(alphas)-------------
W(alphas)= 1.42846910637 


---------------- max W(alphas)-------------
W(alphas)= 1.42846910637 


---------------- max W(alphas)-------------
W(alphas)= 1.44174164661 


---------------- max W(alphas)-------------
W(alphas)= 1.44174164661 


---------------- max W(alphas)-------------
W(alphas)= 1.44174164661 


---------------- max W(alphas)-------------
W(alphas)= 1.5255097852 


---------------- max W(alphas)-------------
W(alphas)= 1.52799651754 


---------------- max W(alphas)-------------
W(alphas)= 1.52799651754 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.66410118437 


---------------- max W(alphas)-------------
W(alphas)= 1.713308663 


the line function is : F(x) = 2.93694985569 X  -  13.7908490406 

相关文章推荐

理解支持向量机(二)核函数

由之前对核函数的定义(见统计学习方法定义7.6): 设χ是输入空间(欧氏空间或离散集合),Η为特征空间(希尔伯特空间),如果存在一个从χ到Η的映射 φ(x): χ→Η 使得对所有的x,z∈χ...

支持向量机系列---为什么要引入核函数

前面的算法是针对线性可分的情况,当我们的样本线性不可分的时候怎么办呢?如果我们可以把线性不可分的问题转变成线性可分的问题该有多好!生活就是这么的美妙,我们可以将样本通过一个映射函数交它从原始空间投射到...

支持向量机SVM核函数的选择(七)

支持向量机是建立在统计学习理论基础之上的新一代机器学习算法,支持向量机的优势主要体现在解决线性不可分问题,它通过引入核函数,巧妙地解决了在高维空间中的内积运算,从而很好地解决了非线性分类问题。要构造出...

核函数与支持向量机入门

原文博客传送门:核函数与支持向量机入门。 理解支持向量机(Support Vector Machine, SVM)的角度很多。从分类问题入手,由最小化训练错误导出限制条件下的凸优化问题的解,进...

机器学习----支持向量机(核函数)

讲SVM扩展到非线性可分领域

解密SVM系列(一):关于拉格朗日乘子法和KKT条件

写在之前 支持向量机(SVM),一个神秘而众知的名字,在其出来就受到了莫大的追捧,号称最优秀的分类算法之一,以其简单的理论构造了复杂的算法,又以其简单的用法实现了复杂的问题,不得不说确实完美。 ...
  • on2way
  • on2way
  • 2015年08月17日 18:53
  • 17948

解密SVM系列(二):SVM的理论基础

上节我们探讨了关于拉格朗日乘子和KKT条件,这为后面SVM求解奠定基础,本节希望通俗的细说一下原理部分。一个简单的二分类问题如下图: 我们希望找到一个决策面使得两类分开,这个决策面一般表示就是W...
  • on2way
  • on2way
  • 2015年08月17日 19:14
  • 7175

解密SVM系列(三):SMO算法原理与实战求解

上节我们讨论到解SVM问题最终演化为求下列带约束条件的问题: minW(α)=12(∑i,j=1Nαiyiαjyjxi∗xj)−∑i=1Nαis.t.0≤αi≤C∑i=1Nαiyi=0min\qua...
  • on2way
  • on2way
  • 2015年08月17日 19:39
  • 9614

机器学习的时代来临,人类应该做点什么?

机器学习的时代来临,人类应该做点什么? 文章摘要: 近年来,机器学习技术得到飞速的发展,虽然很多人怀疑其有效性,或者害怕未来可能会影响到人类的发展。但是目前还不用过度的担心,而且国外已很多创业...

支持向量机通俗导论(理解SVM的三层境界)

作者:July、pluskid;致谢:白石。 出处:结构之法算法之道blog。 前言 第一层、了解SVM 1.0、什么是支持向量机SVM 1.1、线性分类 1.2、线性分类的一个例子 ...
内容举报
返回顶部
收藏助手
不良信息举报
您举报文章:SVM支持向量机总结(不包括高维核函数等)
举报原因:
原因补充:

(最多只允许输入30个字)