1 from numpy import *
2 from time importsleep3
4 defloadDataSet(fileName):5 dataMat = []; labelMat =[]6 fr =open(fileName)7 for line infr.readlines():8 lineArr = line.strip().split('\t')9 dataMat.append([float(lineArr[0]), float(lineArr[1])])10 labelMat.append(float(lineArr[2]))11 returndataMat,labelMat12
13 #i是第一个alpha的下标,m是所有alpha的数目。只要函数值不等于输入值i,函数就会进行随机选择。
14 defselectJrand(i,m):15 j=i #we want to select any J not equal to i
16 while (j==i):17 j =int(random.uniform(0,m))18 returnj19
20 #该函数用于调整大于H或小于L的alpha值。
21 defclipAlpha(aj,H,L):22 if aj >H:23 aj =H24 if L >aj:25 aj =L26 returnaj27
28 '''
29 创建一个alpha向量并将其初始化为0向量30 当迭代次数小于最大迭代次数时(外循环)31 对数据集中的每个数据向量(内循环):32 如果该数据向量可以被优化:33 随机选择另外一个数据向量34 同时优化这两个向量35 如果两个向量都不能被优化,退出内循环36 如果所有向量都没被优化,增加迭代数目,继续下一次循环37 '''
38 defsmoSimple(dataMatIn, classLabels, C, toler, maxIter):39 dataMatrix = mat(dataMatIn); labelMat =mat(classLabels).transpose()40 b = 0; m,n =shape(dataMatrix)41 alphas = mat(zeros((m,1)))42 iter =043 while (iter
48 if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] >0)):49 j =selectJrand(i,m)50 fXj = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T)) +b51 Ej = fXj -float(labelMat[j])52 alphaIold = alphas[i].copy(); alphaJold =alphas[j].copy();53 if (labelMat[i] !=labelMat[j]):54 L = max(0, alphas[j] -alphas[i])55 H = min(C, C + alphas[j] -alphas[i])56 else:57 L = max(0, alphas[j] + alphas[i] -C)58 H = min(C, alphas[j] +alphas[i])59 if L==H: print("L==H"); continue
60 eta = 2.0 * dataMatrix[i,:]*dataMatrix[j,:].T - dataMatrix[i,:]*dataMatrix[i,:].T - dataMatrix[j,:]*dataMatrix[j,:].T61 if eta >= 0: print("eta>=0"); continue
62 alphas[j] -= labelMat[j]*(Ei - Ej)/eta63 alphas[j] =clipAlpha(alphas[j],H,L)64 if (abs(alphas[j] - alphaJold) < 0.00001): print ("j not moving enough"); continue
65 alphas[i] += labelMat[j]*labelMat[i]*(alphaJold - alphas[j])#update i by the same amount as j
66 #the update is in the oppostie direction
67 b1 = b - Ei- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T68 b2 = b - Ej- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T69 if (0 < alphas[i]) and (C > alphas[i]): b =b170 elif (0 < alphas[j]) and (C > alphas[j]): b =b271 else: b = (b1 + b2)/2.0
72 alphaPairsChanged += 1
73 print("iter: %d i:%d, pairs changed %d" %(iter,i,alphaPairsChanged))74 if (alphaPairsChanged == 0): iter += 1
75 else: iter =076 print ("iteration number: %d" %iter)77 returnb,alphas78
79 def kernelTrans(X, A, kTup): #calc the kernel or transform data to a higher dimensional space
80 m,n =shape(X)81 K = mat(zeros((m,1)))82 if kTup[0]=='lin': K = X * A.T #linear kernel
83 elif kTup[0]=='rbf':84 for j inrange(m):85 deltaRow = X[j,:] -A86 K[j] = deltaRow*deltaRow.T87 K = exp(K/(-1*kTup[1]**2)) #divide in NumPy is element-wise not matrix like Matlab
88 else: raise NameError('Houston We Have a Problem -- \89 That Kernel is not recognized')90 returnK91
92 #完整版的SMO的支持函数
93 classoptStruct:94 def __init__(self, dataMatIn, classLabels, C, toler, kTup): #Initialize the structure with the parameters
95 self.X =dataMatIn96 self.labelMat =classLabels97 self.C =C98 self.tol =toler99 self.m =shape(dataMatIn)[0]100 self.alphas = mat(zeros((self.m, 1)))101 self.b =0102 self.eCache = mat(zeros((self.m, 2))) #first column is valid flag
103 self.K =mat(zeros((self.m, self.m)))104 for i inrange(self.m):105 self.K[:, i] =kernelTrans(self.X, self.X[i, :], kTup)106
107 #误差缓存
108 defcalcEk(oS, k):109 fXk = float(multiply(oS.alphas, oS.labelMat).T * oS.K[:, k] +oS.b)110 Ek = fXk -float(oS.labelMat[k])111 returnEk112
113 #内循环中的启发式方法
114 def selectJ(i, oS, Ei): #this is the second choice -heurstic, and calcs Ej
115 maxK = -1;116 maxDeltaE =0;117 Ej =0118 oS.eCache[i] = [1, Ei] #set valid #choose the alpha that gives the maximum delta E
119 validEcacheList =nonzero(oS.eCache[:, 0].A)[0]120 if (len(validEcacheList)) > 1:121 for k in validEcacheList: #loop through valid Ecache values and find the one that maximizes delta E
122 if k == i: continue #don't calc for i, waste of time
123 Ek =calcEk(oS, k)124 deltaE = abs(Ei -Ek)125 #选择具有最大步长的j
126 if (deltaE >maxDeltaE):127 maxK =k;128 maxDeltaE =deltaE;129 Ej =Ek130 returnmaxK, Ej131 else: #in this case (first time around) we don't have any valid eCache values
132 j =selectJrand(i, oS.m)133 Ej =calcEk(oS, j)134 returnj, Ej135
136
137 def updateEk(oS, k): #after any alpha has changed update the new value in the cache
138 Ek =calcEk(oS, k)139 oS.eCache[k] = [1, Ek]140
141 definnerL(i, oS):142 Ei =calcEk(oS, i)143 if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] >0)):144 j,Ej = selectJ(i, oS, Ei) #this has been changed from selectJrand
145 alphaIold = oS.alphas[i].copy(); alphaJold =oS.alphas[j].copy();146 if (oS.labelMat[i] !=oS.labelMat[j]):147 L = max(0, oS.alphas[j] -oS.alphas[i])148 H = min(oS.C, oS.C + oS.alphas[j] -oS.alphas[i])149 else:150 L = max(0, oS.alphas[j] + oS.alphas[i] -oS.C)151 H = min(oS.C, oS.alphas[j] +oS.alphas[i])152 if L==H: print("L==H"); return0153 eta = 2.0 * oS.K[i,j] - oS.K[i,i] - oS.K[j,j] #changed for kernel
154 if eta >= 0: print ("eta>=0"); return0155 oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta156 oS.alphas[j] =clipAlpha(oS.alphas[j],H,L)157 updateEk(oS, j) #added this for the Ecache
158 if (abs(oS.alphas[j] - alphaJold) < 0.00001): print("j not moving enough"); return0159 oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])#update i by the same amount as j
160 updateEk(oS, i) #added this for the Ecache #the update is in the oppostie direction
161 b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i] - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[i,j]162 b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,j]- oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j]163 if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]): oS.b =b1164 elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]): oS.b =b2165 else: oS.b = (b1 + b2)/2.0
166 return 1
167 else: return0168
169 def smoP(dataMatIn, classLabels, C, toler, maxIter,kTup=('lin', 0)): #full Platt SMO
170 oS =optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler, kTup)171 iter =0172 entireSet = True; alphaPairsChanged =0173 while (iter < maxIter) and ((alphaPairsChanged > 0) or(entireSet)):174 alphaPairsChanged =0175 if entireSet: #go over all
176 for i inrange(oS.m):177 alphaPairsChanged +=innerL(i,oS)178 print("fullSet, iter: %d i:%d, pairs changed %d" %(iter,i,alphaPairsChanged))179 iter += 1
180 else:#go over non-bound (railed) alphas
181 nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A
186 if entireSet: entireSet = False #toggle entire set loop
187 elif (alphaPairsChanged == 0): entireSet =True188 print("iteration number: %d" %iter)189 returnoS.b,oS.alphas190
191 defcalcWs(alphas,dataArr,classLabels):192 X = mat(dataArr); labelMat =mat(classLabels).transpose()193 m,n =shape(X)194 w = zeros((n,1))195 for i inrange(m):196 w += multiply(alphas[i]*labelMat[i],X[i,:].T)197 returnw198
199
200
201 dataArr,labelArr=loadDataSet('testSet.txt')202 b,alphas=smoP(dataArr,labelArr,0.6,0.001,40)203 ws=calcWs(alphas,dataArr,labelArr)204 print(ws)