1.代码
代码如下(示例):
#Python3.8
#@Time :2020/10/8
#@Software:Pycharm
#@Author :xuhaoyu
#@Filename:PlattSMO.p
from numpy import *
class optStruct:
def _init_(self,dataMatrixInput,classLabels,constantC,tolerance):
self.X = dataMatrixInput
self.labelMatrix = classLabels
self.constantC = constantC
self.tolerance = tolerance
self.m = shape(dataMatrixInput)[0]
self.alphas = mat(zeros((self.m,1)))
self.b = 0
self.eCache = mat(zeros((self.m,2)))
def selectJrand(alphaIndex,numberOfAlpha):
j = alphaIndex
while(j == alphaIndex):
j = int(random.uniform(0,numberOfAlpha))
return j
#用于调整alpha的值alpha大于highValue将highValue赋值给alpha,反之也赋值
def clipAlpha(alpha,highValue,lowValue):
if alpha > highValue:
alpha = highValue
if alpha < lowValue:
alpha = lowValue
return alpha
def loadDataSet(fileName):
dataMatrix = [] #数据矩阵列表
labelMatrix = [] #数据的标签tag列表
fileOpen = open(fileName)
for line in fileOpen.readlines(): #读取文件的每一行
#strip()函数将文件的首位空格去掉(默认参数)
#split()函数将每一行用'\t'作为分隔符,放到一个列表中
lineArray = line.strip().split('\t')
#append()函数用来在列表尾部追加元素(参数是一个元素,例如:元组,列表,等)
dataMatrix.append([float(lineArray[0]),float(lineArray[1])])
labelMatrix.append(float(lineArray[2]))
return dataMatrix,labelMatrix
def calcEk(oS,k):
fXk = float(multiply(oS.alphas,oS.labelMat).T*(oS.X*oS.X[k,:].T)) + oS.b
Ek = fXk - float(oS.labelMatrix[k])
return Ek
def selectJ(i,oS,Ei):
maxK = -1
maxDeltaE = 0
Ej = 0
oS.eCache[i] = [1,Ei]
validEcacheList = nonzero(oS.eCache[:,0].A)[0]
if (len(validEcacheList))>1:
for k in validEcacheList:
if k ==i:
continue
Ek = calcEk(oS,k)
deltaE = abs(Ei -Ek)
if(deltaE > maxDeltaE):
maxK = k
maxDeltaE = maxDeltaE
Ej = Ek
return
else:
j = selectJrand(i,oS.m)
Ej = calcEk(oS,j)
return j,Ej
def updateEk(oS,k):
Ek = calcEk(oS,k)
oS.eCache[k] = [1,Ek]
def clipAlpha(alpha,highValue,lowValue):
if alpha > highValue:
alpha = highValue
if alpha < lowValue:
alpha = lowValue
return alpha
def innerL(i,oS):
Ei = calcEk(oS,i)
if ((oS.labelMatrix[i]*Ei < -oS.tolerance) and (oS.alphas[i]<oS.constantC))or ((oS.labelMatrix[i]>oS.tolerance) and (oS.alphas[i] > 0)):
j,Ej = selectJ(i,oS,Ei)
alphaIold = oS.alphas[i].copy()
alphaJold = oS.alphas[j].copy()
if(oS.labelMatrix[i] != oS.laelMatrix[j]):
H = min(oS.constantC,oS.constantC + oS.alphas[j] - oS.alphas[i])
else:
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.constantC)
H = min(oS.constantC,oS.alphas[j] + oS.alphas[i])
if(L == H):
print('L == H')
return 0
eta = 2.0 * oS.X[i,:] * oS.X[j,:].T - oS.X[i,:]*oS.X[i,:].T - oS.X[j,:] * oS.X[j,:].T
if eta >= 0 :
print("eta >= 0 ")
return 0
oS.alphas[j] -= oS.labelMatrix[j] * (Ei - Ej)/eta
oS.alphas[j] = clipAlpha(oS.alphas[j],H,L)
updateEk(oS,j)
if(abs(oS.alphas[j] - alphaJold) < 0.0001):
print("j not moving enough")
return 0
oS.alphas[i] += os.labelMatrix[j] * oS.labelMatrix[i] * (alphaIold - oS.alphas[j])
updateEk(oS,i)
b1 = oS.b - Ei - oS.labelMatrix[i] * (oS.alphas[i] - alphaIold) * oS.X[i,:] * oS.X[i,:] * oS.X[i,:].T - oS.labelMatrix[j] * (oS.alphas[j] - alphaJold) * oS.X[i,:] * oS.X[j,:].T
b2 = oS.b - Ei - oS.labelMatrix[i] * (oS.alphas[i] - alphaIold) * oS.X[i,:] * oS.X[i,:] * oS.X[i,:].T - oS.labelMatrix[j] * (oS.alphas[j] - alphaJold) * oS.X[j,:] * oS.X[j,:].T
if (0 < oS.alphas[i])and(oS.constantC > oS.alphas[i]):
oS.b = b1
elif(0<oS.alphas[j]) and (oS.constantC > oS.alphas[j]):
oS.b = b2
else:
oS.b = (b1 + b2)/2.0
return 1
else: return 0
def smoP(dataMatrixInput,classLabels,constantC,tolerance,maxIter,kTup = ('lin',0)):
oS = optStruct(mat(dataMatrixInput),mat(classLabels).transpose(),constantC,tolerance)
iter =0
entireSet = True
alphaPairsChanged = 0
while(iter < maxIter) and ((alphaPairsChanged > 0)or(entireSet)):
alphaPairsChanged = 0
if entireSet:
for i in range(oS.m):
alphaPairsChanged += innerL(i,oS)
print("fullSet,iter:{} i:{},pairs changed {}".format(iter,i,alphaPairsChanged))
iter += 1
else:
nonBoundIs = nonzero((oS.alphas.A > 0 ) * (oS.alpha.A < constantC))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i,oS)
print("non-bound,iter:{} i:{},pairs changed {}".format(iter,i,alphaPairsChanged))
iter += 1
if entireSet:
entireSet = False
elif (alphaPairsChanged == 0 ):
entireSet = True
print("iteration number:{}".format(iter))
return oS.b,oS.alphas
dataArr,labelArr = loadDataSet('testSet.txt')
b,alphas = smoP(dataArr,labelArr,0.6,0.001,40)
#核转换函数
def kernelTrans(X,A,kTup):
m,n = shape(X)
k = mat(zeros(m,1))
if kTup[0] =='lin':
k = X * A.T
elif kTup[0] == 'rbf':
for j in range(m):
deltaRow = X[j,:] - A
k[j] = deltaRow * deltaRow.T
k = exp(k/(-1*kTup[1]*2))
else:
raise NameError('Houston We have a problem that kernel is not recognized')
return k
class optStruct:
def _init_(self,dataMatrixInput,classLabels,constantC,tolerance,kTup):
self.X = dataMatrixInput
self.lableMatrix = classLabels
self.constantC = constantC
self.tolerance = tolerance
self.m = shape(dataMatrixInput)[0]
self.alphas = mat(zeros((self.m,1)))
self.b = 0
self.eCache = mat(zeros((self.m,2)))
self.k = mat(zeros((self.m,self.m)))
for i in range(self.m):
self.k[:,i] = kernelTrans(self.X,self.X[i,:],kTup)
#径向基测试函数,虽然我不懂啥意思但是先跑一遍代码再说,以后慢慢理解。
def testRbf(k1 = 1.3):
dataArr,labelArr = loadDataSet('testSetRBF.txt')
b,alphas = smoP(dataArr,labelArr,200,0.0001,10000,('rbf',k1))
dataMatrix = mat(dataArr)
labelMatrix = mat(labelArr)