svm参数调优python_SVM python小样例

本文详细介绍了SVM优化算法SMO(Sequential Minimal Optimization)的工作原理和实现过程。从数据预处理到核心的SMO算法,包括数据加载、选择随机数据对、调整alpha值以及误差缓存的更新等步骤,展示了SVM如何通过迭代优化找到最佳超平面。此外,还提供了完整的SMO函数和辅助函数,如kernelTrans(核函数转换)和calcWs(权重计算)。
摘要由CSDN通过智能技术生成

1 from numpy import *

2 from time importsleep3

4 defloadDataSet(fileName):5 dataMat = []; labelMat =[]6 fr =open(fileName)7 for line infr.readlines():8 lineArr = line.strip().split('\t')9 dataMat.append([float(lineArr[0]), float(lineArr[1])])10 labelMat.append(float(lineArr[2]))11 returndataMat,labelMat12

13 #i是第一个alpha的下标,m是所有alpha的数目。只要函数值不等于输入值i,函数就会进行随机选择。

14 defselectJrand(i,m):15 j=i #we want to select any J not equal to i

16 while (j==i):17 j =int(random.uniform(0,m))18 returnj19

20 #该函数用于调整大于H或小于L的alpha值。

21 defclipAlpha(aj,H,L):22 if aj >H:23 aj =H24 if L >aj:25 aj =L26 returnaj27

28 '''

29 创建一个alpha向量并将其初始化为0向量30 当迭代次数小于最大迭代次数时(外循环)31 对数据集中的每个数据向量(内循环):32 如果该数据向量可以被优化:33 随机选择另外一个数据向量34 同时优化这两个向量35 如果两个向量都不能被优化,退出内循环36 如果所有向量都没被优化,增加迭代数目,继续下一次循环37 '''

38 defsmoSimple(dataMatIn, classLabels, C, toler, maxIter):39 dataMatrix = mat(dataMatIn); labelMat =mat(classLabels).transpose()40 b = 0; m,n =shape(dataMatrix)41 alphas = mat(zeros((m,1)))42 iter =043 while (iter

48 if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] >0)):49 j =selectJrand(i,m)50 fXj = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T)) +b51 Ej = fXj -float(labelMat[j])52 alphaIold = alphas[i].copy(); alphaJold =alphas[j].copy();53 if (labelMat[i] !=labelMat[j]):54 L = max(0, alphas[j] -alphas[i])55 H = min(C, C + alphas[j] -alphas[i])56 else:57 L = max(0, alphas[j] + alphas[i] -C)58 H = min(C, alphas[j] +alphas[i])59 if L==H: print("L==H"); continue

60 eta = 2.0 * dataMatrix[i,:]*dataMatrix[j,:].T - dataMatrix[i,:]*dataMatrix[i,:].T - dataMatrix[j,:]*dataMatrix[j,:].T61 if eta >= 0: print("eta>=0"); continue

62 alphas[j] -= labelMat[j]*(Ei - Ej)/eta63 alphas[j] =clipAlpha(alphas[j],H,L)64 if (abs(alphas[j] - alphaJold) < 0.00001): print ("j not moving enough"); continue

65 alphas[i] += labelMat[j]*labelMat[i]*(alphaJold - alphas[j])#update i by the same amount as j

66 #the update is in the oppostie direction

67 b1 = b - Ei- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T68 b2 = b - Ej- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T69 if (0 < alphas[i]) and (C > alphas[i]): b =b170 elif (0 < alphas[j]) and (C > alphas[j]): b =b271 else: b = (b1 + b2)/2.0

72 alphaPairsChanged += 1

73 print("iter: %d i:%d, pairs changed %d" %(iter,i,alphaPairsChanged))74 if (alphaPairsChanged == 0): iter += 1

75 else: iter =076 print ("iteration number: %d" %iter)77 returnb,alphas78

79 def kernelTrans(X, A, kTup): #calc the kernel or transform data to a higher dimensional space

80 m,n =shape(X)81 K = mat(zeros((m,1)))82 if kTup[0]=='lin': K = X * A.T #linear kernel

83 elif kTup[0]=='rbf':84 for j inrange(m):85 deltaRow = X[j,:] -A86 K[j] = deltaRow*deltaRow.T87 K = exp(K/(-1*kTup[1]**2)) #divide in NumPy is element-wise not matrix like Matlab

88 else: raise NameError('Houston We Have a Problem -- \89 That Kernel is not recognized')90 returnK91

92 #完整版的SMO的支持函数

93 classoptStruct:94 def __init__(self, dataMatIn, classLabels, C, toler, kTup): #Initialize the structure with the parameters

95 self.X =dataMatIn96 self.labelMat =classLabels97 self.C =C98 self.tol =toler99 self.m =shape(dataMatIn)[0]100 self.alphas = mat(zeros((self.m, 1)))101 self.b =0102 self.eCache = mat(zeros((self.m, 2))) #first column is valid flag

103 self.K =mat(zeros((self.m, self.m)))104 for i inrange(self.m):105 self.K[:, i] =kernelTrans(self.X, self.X[i, :], kTup)106

107 #误差缓存

108 defcalcEk(oS, k):109 fXk = float(multiply(oS.alphas, oS.labelMat).T * oS.K[:, k] +oS.b)110 Ek = fXk -float(oS.labelMat[k])111 returnEk112

113 #内循环中的启发式方法

114 def selectJ(i, oS, Ei): #this is the second choice -heurstic, and calcs Ej

115 maxK = -1;116 maxDeltaE =0;117 Ej =0118 oS.eCache[i] = [1, Ei] #set valid #choose the alpha that gives the maximum delta E

119 validEcacheList =nonzero(oS.eCache[:, 0].A)[0]120 if (len(validEcacheList)) > 1:121 for k in validEcacheList: #loop through valid Ecache values and find the one that maximizes delta E

122 if k == i: continue #don't calc for i, waste of time

123 Ek =calcEk(oS, k)124 deltaE = abs(Ei -Ek)125 #选择具有最大步长的j

126 if (deltaE >maxDeltaE):127 maxK =k;128 maxDeltaE =deltaE;129 Ej =Ek130 returnmaxK, Ej131 else: #in this case (first time around) we don't have any valid eCache values

132 j =selectJrand(i, oS.m)133 Ej =calcEk(oS, j)134 returnj, Ej135

136

137 def updateEk(oS, k): #after any alpha has changed update the new value in the cache

138 Ek =calcEk(oS, k)139 oS.eCache[k] = [1, Ek]140

141 definnerL(i, oS):142 Ei =calcEk(oS, i)143 if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] >0)):144 j,Ej = selectJ(i, oS, Ei) #this has been changed from selectJrand

145 alphaIold = oS.alphas[i].copy(); alphaJold =oS.alphas[j].copy();146 if (oS.labelMat[i] !=oS.labelMat[j]):147 L = max(0, oS.alphas[j] -oS.alphas[i])148 H = min(oS.C, oS.C + oS.alphas[j] -oS.alphas[i])149 else:150 L = max(0, oS.alphas[j] + oS.alphas[i] -oS.C)151 H = min(oS.C, oS.alphas[j] +oS.alphas[i])152 if L==H: print("L==H"); return0153 eta = 2.0 * oS.K[i,j] - oS.K[i,i] - oS.K[j,j] #changed for kernel

154 if eta >= 0: print ("eta>=0"); return0155 oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta156 oS.alphas[j] =clipAlpha(oS.alphas[j],H,L)157 updateEk(oS, j) #added this for the Ecache

158 if (abs(oS.alphas[j] - alphaJold) < 0.00001): print("j not moving enough"); return0159 oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])#update i by the same amount as j

160 updateEk(oS, i) #added this for the Ecache #the update is in the oppostie direction

161 b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i] - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[i,j]162 b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,j]- oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j]163 if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]): oS.b =b1164 elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]): oS.b =b2165 else: oS.b = (b1 + b2)/2.0

166 return 1

167 else: return0168

169 def smoP(dataMatIn, classLabels, C, toler, maxIter,kTup=('lin', 0)): #full Platt SMO

170 oS =optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler, kTup)171 iter =0172 entireSet = True; alphaPairsChanged =0173 while (iter < maxIter) and ((alphaPairsChanged > 0) or(entireSet)):174 alphaPairsChanged =0175 if entireSet: #go over all

176 for i inrange(oS.m):177 alphaPairsChanged +=innerL(i,oS)178 print("fullSet, iter: %d i:%d, pairs changed %d" %(iter,i,alphaPairsChanged))179 iter += 1

180 else:#go over non-bound (railed) alphas

181 nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A

186 if entireSet: entireSet = False #toggle entire set loop

187 elif (alphaPairsChanged == 0): entireSet =True188 print("iteration number: %d" %iter)189 returnoS.b,oS.alphas190

191 defcalcWs(alphas,dataArr,classLabels):192 X = mat(dataArr); labelMat =mat(classLabels).transpose()193 m,n =shape(X)194 w = zeros((n,1))195 for i inrange(m):196 w += multiply(alphas[i]*labelMat[i],X[i,:].T)197 returnw198

199

200

201 dataArr,labelArr=loadDataSet('testSet.txt')202 b,alphas=smoP(dataArr,labelArr,0.6,0.001,40)203 ws=calcWs(alphas,dataArr,labelArr)204 print(ws)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值