今天写了个SVM算法(支持向量机)的代码,总算写好。
代码参照了《机器学习实战》,但个人觉得它的逻辑不是太好,所以结合李航的书改了下。
数据就用《机器学习实战》中支持向量机那张的数据,代码上传上来和大家共享下:
# coding=utf8 import sys import numpy as np from numpy import * import os import matplotlib.pylab as plt reload(sys) sys.setdefaultencoding('utf-8') os.chdir('D:\Study\ML\MLAction\Ch06') """存储SMO优化过程中的常用数据""" class SmoOpt: def __init__(self,X,y,C,tol): #输入参数 self.X=X self.y=y self.C=C self.tol=tol #模型变量 self.alpha=None self.b=None #缓存数据 self.K=None self.E=None def cal_predict(x_index,opt): """ 计算预测值(不是最终决策函数的值) :param x_index: :param opt: :return: """ y_pred=np.sum(opt.alpha*opt.y*opt.K[:,x_index])+opt.b return y_pred def calcEk(opt, k): fXk=np.sum(opt.alpha * opt.y * opt.K[:, k]) + opt.b Ek = fXk - opt.y[k] return Ek def updateEk(opt,k): opt.E[k]=calcEk(opt, k) def updateE(opt): for i in range(opt.X.shape[0]): opt.E[i]=calcEk(opt,i) def check_KTT(alpha_index,opt): """ 判断变量alpha是否满足KTT条件 :param alpha_index: 判断的变量位置 :param opt: :return: bool """ is_KTT=False alpha=opt.alpha[alpha_index] C=opt.C tol=opt.tol y_true=opt.y[alpha_index] y_pred=cal_predict(alpha_index,opt) if alpha==0 and y_true*y_pred-1>=tol: is_KTT=True elif alpha==C and y_true*y_pred-1<=-tol: is_KTT = True elif 0<alpha<C and -tol<=y_true*y_pred-1<=tol: is_KTT=True # is_KTT1 = True # if ((opt.y[alpha_index]*opt.E[alpha_index] < -opt.tol) and (opt.alpha[alpha_index] < opt.C)) \ # or ((opt.y[alpha_index]*opt.E[alpha_index] > opt.tol) and (opt.alpha[alpha_index] > 0)): # is_KTT1=False return is_KTT #region 核函数 def rbf(X,Xi,kernel_opt): X_sum = np.sum(np.power(X - Xi,2), axis=1) K = np.exp(-0.5 * X_sum / kernel_opt ** 2) return K def ploy(X,Xi,kernel_opt): K = (np.dot(X, Xi) + 1) ** kernel_opt return K def linear(X,Xi,kernel_opt): K = np.dot(X, Xi) return K #endregion def smo(X,y,C=1,tol=1e-6,kernel=rbf,kernel_opt=1): """ smo算法找拉格朗日乘子alpha的值和分界平面常数项b的值 :param X: m*n array :param y: 1*m list :param C: :param tol: :param kernel:核函数 :param kernel_opt:核函数参数 :return:alpha,b """ #输入检查 if X is None or y is None: return None,None #初始化 m, n = X.shape opt=SmoOpt(X,y,C,tol) opt.alpha=np.zeros(m) opt.b=0.0 m=X.shape[0] # 核函数 K = np.zeros((m, m)) for i in range(m): K[i] = kernel(opt.X,X[i],kernel_opt) opt.K=K # 误差 E=np.zeros(m) for i in range(m): E[i]=calcEk(opt,i) opt.E=E #迭代更新alpha while True: is_changed=False#判断本次是否找到了更改的变量 #region 找出两个更新的变量alphai,alphaj #选择第一个变量alphai # 找出满足KKT条件的点:先遍历间隔边上的点(0<alpha<C),没有则遍历所有点,如果还没有,终止程序 #开始遍历间隔边上的点和其它点 alpha_0C=(opt.alpha>0)*(opt.alpha< opt.C) alpha_index=np.argsort(~alpha_0C) for besti in alpha_index: bestj = -1 if check_KTT(besti,opt): continue #选择第二个变量alphaj:两个变量误差相差最大的点(|Ei-Ej|最大),且有足够的下降 #先尝试快速找一个:Ei>0,找最小Ej,Ei<0,找最大Ej #如果没有遍历间隔边界上的点,如果没有遍历所有点,如果还是没有,更换第一个变量alphai #开始快速查找alphaj # if opt.E[besti]>0: # j=np.argmin(opt.E) # else: # j=np.argmax(opt.E) # if besti!=j: # bestj=j # #遍历间隔边界上的点和其它点 # if bestj==-1: # for j in alpha_index: # if besti!=j: # bestj=j # break # #还没找到,就更换alphai # if bestj==-1: # continue maxE_diff=0 for j in alpha_index: if besti==j: continue E_diff=np.abs(opt.E[i]-opt.E[j]) if E_diff>maxE_diff: bestj=j maxE_diff=E_diff # region 更新变量alphai、alphaj eta = opt.K[besti, besti] + opt.K[bestj, bestj] - 2 * opt.K[besti, bestj] alphaj = opt.alpha[bestj] + opt.y[bestj] * (opt.E[besti] - opt.E[bestj]) / eta if opt.y[besti] != opt.y[bestj]: L = np.max([0, opt.alpha[bestj] - opt.alpha[besti]]) H = np.min([opt.C, opt.C + opt.alpha[bestj] - opt.alpha[besti]]) else: L = np.max([0, opt.alpha[bestj] + opt.alpha[besti] - opt.C]) H = np.min([opt.C, opt.alpha[bestj] + opt.alpha[besti]]) if alphaj > H: alphaj = H elif alphaj < L: alphaj = L if np.abs(alphaj - opt.alpha[bestj]) < tol: # alphaj没有移动足够的值,重新找 continue alphai = opt.alpha[besti] + opt.y[besti] * opt.y[bestj] * (opt.alpha[bestj] - alphaj) # endregion # region 更新变量b b1_new = -opt.E[besti] - opt.y[besti] * opt.K[besti, besti] * (alphai - opt.alpha[besti]) \ - opt.y[bestj] * opt.K[bestj, besti] * (alphaj - opt.alpha[bestj]) + opt.b b2_new = -opt.E[bestj] - opt.y[besti] * opt.K[besti, bestj] * (alphai - opt.alpha[besti]) \ - opt.y[bestj] * opt.K[bestj, bestj] * (alphaj - opt.alpha[bestj]) + opt.b if 0 < alphai < opt.C: b = b1_new elif 0 < alphaj < opt.C: b = b2_new else: b = (b1_new + b2_new) / 2.0 opt.b = b opt.alpha[besti] = alphai opt.alpha[bestj] = alphaj # endregion # region 更新误差Ei、Ej updateE(opt) # opt.E[besti] = calcEk(opt,besti) # opt.E[bestj] = calcEk(opt,bestj) # endregion is_changed=True # 没有找到更改的变量,终止程序 if not is_changed: break return opt.alpha,opt.b def loadDataSet(fileName): dataMat = []; labelMat = [] fr = open(fileName) for line in fr.readlines(): lineArr = line.strip().split('\t') dataMat.append([float(lineArr[0]), float(lineArr[1])]) labelMat.append(float(lineArr[2])) return np.array(dataMat),np.array(labelMat) def calcWs(alphas,dataArr,classLabels): X = mat(dataArr); labelMat = mat(classLabels).transpose() m,n = shape(X) w = zeros((n,1)) for i in range(m): w += multiply(alphas[i]*labelMat[i],X[i,:].T) return w def svm_test(): k1=0.5 kernel=linear dataArr,labelArr = loadDataSet('testSet.txt') # dataArr,labelArr = loadDataSet('testSetRBF.txt') alphas,b = smo(dataArr, labelArr, 200, 0.0001,kernel, k1) #C=200 important svInd=nonzero(alphas>0) sVs=dataArr[svInd] #get matrix of only support vectors labelSV = labelArr[svInd] print 'there are %d Support Vectors' % shape(sVs)[0] m,n = shape(dataArr) errorCount = 0 for k in range(m): kernelEval=kernel(sVs, dataArr[k], k1) predict=np.sum(kernelEval* labelSV*alphas[svInd]) + b if sign(predict)!=sign(labelArr[k]): errorCount += 1 print "the training error rate is: %f" % (float(errorCount)/m) w=calcWs(alphas,dataArr,labelArr).flatten() print(w) plt.scatter(dataArr[:,0],dataArr[:,1],c=labelArr) x=arange(2,8,0.5) y=(-w[0]*x- b)/w[1] plt.plot(x,y) # dataArr,labelArr = loadDataSet('testSetRBF2.txt') # errorCount = 0 # m,n = shape(dataArr) # for i in range(m): # kernelEval = kernel(sVs,dataArr[i],k1) # predict=np.sum(kernelEval* labelSV*alphas[svInd]) + b # if sign(predict)!=sign(labelArr[i]): errorCount += 1 # print "the test error rate is: %f" % (float(errorCount)/m) svm_test()