简版SMO:
from numpy import *
import time
import matplotlib.pyplot as plt
def loaddata(filename):
datamat=[];labelmat=[]
fr=open(filename)
for line in fr.readlines():
linearr=line.strip().split('\t')
datamat.append([float(linearr[0]),float(linearr[1])])
labelmat.append(float(linearr[2]))
return datamat,labelmat
def selectrand(i,m):
j=i
while(j==i):
j=int(random.uniform(0,m))
return j
def clipalpha(aj,H,L):
if(aj<L):
aj=L
if(aj>H):
aj=H
return aj
def smoSimple(dataMatIn, classLabels, C, toler, maxIter):
dataMatrix = mat(dataMatIn); labelMat = mat(classLabels).transpose()
b = 0; m,n = shape(dataMatrix)
alphas = mat(zeros((m,1)))
iter = 0
while (iter < maxIter):
alphaPairsChanged = 0
for i in range(m):
fXi = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[i,:].T)) + b
Ei = fXi - float(labelMat[i])#if checks if an example violates KKT conditions
if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] > 0)):
j = selectrand(i,m)
fXj = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T)) + b
Ej = fXj - float(labelMat[j])
alphaIold = alphas[i].copy(); alphaJold = alphas[j].copy()
if (labelMat[i] != labelMat[j]):
L = max(0, alphas[j] - alphas[i])
H = min(C, C + alphas[j] - alphas[i])
else:
L = max(0, alphas[j] + alphas[i] - C)
H = min(C, alphas[j] + alphas[i])
if L==H: print ("L==H"); continue
eta = 2.0 * dataMatrix[i,:]*dataMatrix[j,:].T - dataMatrix[i,:]*dataMatrix[i,:].T - dataMatrix[j,:]*dataMatrix[j,:].T
if eta >= 0: print ("eta>=0"); continue
alphas[j] -= labelMat[j]*(Ei - Ej)/eta
alphas[j] = clipalpha(alphas[j],H,L)
if (abs(alphas[j] - alphaJold) < 0.00001): print ("j not moving enough"); continue
alphas[i] += labelMat[j]*labelMat[i]*(alphaJold - alphas[j])#update i by the same amount as j
#the update is in the oppostie direction
b1 = b - Ei- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T
b2 = b - Ej- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T
if (0 < alphas[i]) and (C > alphas[i]): b = b1
elif (0 < alphas[j]) and (C > alphas[j]): b = b2
else: b = (b1 + b2)/2.0
alphaPairsChanged += 1
print ("iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged))
if (alphaPairsChanged == 0):
iter += 1
else: iter = 0
print ("iteration number: %d" % iter)
return b,alphas
def calcws(alphas,data,label):
x=mat(data)
label=mat(label).transpose()
m,n=shape(x)
w=zeros((n,1))
for i in range(m):
w+=multiply(alphas[i]*label[i],x[i,:].T)
return w
data,label=loaddata(r"C:\Users\huashuo111\Desktop\python\machinelearninginaction\Ch06\testSet.txt")
data=mat(data)
label=mat(label)
b,alphas=smoSimple(data,label,0.6,0.001,40)
'''
w=mat(zeros((100,2)))
w=multiply(alphas,label.transpose())
w=multiply(w,data)
W=mat(zeros((1,2)))
W=sum(w,axis=0)
x=[]
y=[]
X=[]
Y=[]
for i in range(100):
if label[0,i]==1:
x.append(data[i,0])
y.append(data[i,1])
else:
X.append(data[i, 0])
Y.append(data[i, 1])
plt.scatter(x,y,s=20,c='red')
plt.scatter(X,Y,s=20,c='green')
xx=arange(0,10,0.1)
w1=W[0,0]
w2=W[0,1]
yy=(-b-w1*xx)/w2
A=[]
B=[]
for i in range(100):
A.append(xx[i])
B.append((yy[0,i]))
plt.plot(A,B)
plt.show()
'''
完整版SMO(速度提升约10倍)
import numpy as np
from numpy import *
import matplotlib.pyplot as plt
import time
def loadDataSet(filename):
dataMat=[];labelMat=[]
fr=open(filename)
for line in fr.readlines():
lineArr=line.strip().split('\t')
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
def selectJrand(i,m):
j=i
while(j==i):
j=int(random.uniform(0,m))
return j
def clipAlphas(aj,H,L):
if aj>H:
aj=H
if aj<L:
aj=L
return aj
class optStruct:
def __init__(self,dataMatIn,classLabels,C,toler):
self.X=dataMatIn
self.labelMat=classLabels
self.C=C
self.tol=toler
self.m=shape(dataMatIn)[0]
self.alphas=mat(zeros((self.m,1)))
self.b=0
self.eCache=mat(zeros((self.m,2)))
def calcEk(oS,k):
fXk=float(multiply(oS.alphas,oS.labelMat).T*(oS.X*oS.X[k,:].T))+oS.b
Ek=fXk-float(oS.labelMat[k])
return Ek
def selectJ(i,oS,Ei):
maxK=-1;maxDeltaE=0;Ej=0
oS.eCache[i]=[1,Ei]
validEcacheList=nonzero(oS.eCache[:,0].A)[0]
if len(validEcacheList)>1:
for k in validEcacheList:
if k==i:continue
Ek=calcEk(oS,k)
deltaE=abs(Ei-Ek)
if(deltaE>maxDeltaE):
maxK=k;
maxDeltaE=deltaE;
Ej=Ek;
return maxK,Ej
else:
j=selectJrand(i,oS.m)
Ej=calcEk(oS,j)
return j,Ej
def updataEk(oS,k):
Ek=calcEk(oS,k)
oS.eCache[k]=[1,Ek]
def innerL(i,oS):
Ei=calcEk(oS,i)
if ((oS.labelMat[i]*Ei<-oS.tol)and(oS.alphas[i]<oS.C))or\
((oS.labelMat[i]*Ei>oS.tol)and(oS.alphas[i]>0)):
j,Ej=selectJ(i,oS,Ei)
alphaIold=oS.alphas[i].copy();alphaJold=oS.alphas[j].copy()
if(oS.labelMat[i]!=oS.labelMat[j]):
L=max(0,oS.alphas[j]-oS.alphas[i])
H=min(oS.C,oS.C+oS.alphas[j]-oS.alphas[i])
else:
L=max(0,oS.alphas[j]+oS.alphas[i]-oS.C)
H=min(oS.C,oS.C+oS.alphas[j]+oS.alphas[i])
if L==H:print("L==H");return 0
eta=2.0*oS.X[i,:]*oS.X[j,:].T-oS.X[i,:]*oS.X[i,:].T-oS.X[j,:]*oS.X[j,:].T
if eta>=0:print("eta>=0");return 0
oS.alphas[j]-=oS.labelMat[j]*(Ei-Ej)/eta
oS.alphas[j]=clipAlphas(oS.alphas[j],H,L)
updataEk(oS,j)
if(abs(oS.alphas[j]-alphaJold)<0.00001):
print("j not moving enough");return 0
oS.alphas[i]+=oS.labelMat[j]*oS.labelMat[i]*(alphaJold-oS.alphas[j])
updataEk(oS,i)
b1=oS.b-Ei-oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.X[i,:]*oS.X[i,:].T-\
oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.X[j,:]*oS.X[j,:].T
b2=oS.b-Ej-oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.X[i,:]*oS.X[i,:].T-\
oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.X[j,:]*oS.X[j,:].T
if(0<oS.alphas[i])and(oS.C>oS.alphas[i]):oS.b=b1
elif (0<oS.alphas[j])and(oS.C>oS.alphas[j]):oS.b=b2
else:oS.b=(b1+b2)/2
return 1
else:
return 0
def smoP(dataMatIn,classLabels,C,toler,maxIter):
oS=optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler)
iter=0
entireSet=True;alphaPairsChanged=0
while(iter<maxIter)and((alphaPairsChanged>0)or(entireSet)):
alphaPairsChanged=0
if entireSet:
for i in range(oS.m):
alphaPairsChanged+=innerL(i,oS)
print("fullSet,iter:%d i:%d,pairs changed %d"%(iter,i,alphaPairsChanged))
iter+=1
else:
nonBoundIs=nonzero((oS.alphas.A>0)*(oS.alphas.A<oS.C))[0]
for i in nonBoundIs:
alphaPairsChanged+=innerL(i,oS)
print("non-bound,iter:%d i:%d,pairs changed %d"%(iter,i,alphaPairsChanged))
iter+=1
if entireSet:entireSet=False
elif(alphaPairsChanged==0):entireSet=True
print("iteration number:%d"%iter)
return oS.b,oS.alphas
def calWs(alphas,dataArr,classLabels):
X=mat(dataArr);labelMat=mat(classLabels).transpose()
m,n=shape(X)
w=zeros((n,1))
for i in range(m):
w+=multiply(alphas[i]*labelMat[i],X[i,:].T)
return w
def plotfig_SVM(xMat,yMat,ws,b,alphas):
xMat = mat(xMat)
yMat = mat(yMat)
b = array(b)[0] #b原来是矩阵,先转为数组类型后其数组大小为(1,1),所以后面加[0],变为(1,)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:,0].flatten().A[0],xMat[:,1].flatten().A[0]) #a.flatten()就是把a降到一维,默认是按横的方向降,矩阵.A(等效于矩阵.getA())变成了数组
x = arange(-1.0,10.0,0.1) #x最大值,最小值根据原数据集dataArr[:,0]的大小而定
y =(-b-ws[0][0]*x)/ws[1][0] #根据x.w + b = 0 得到,其式子展开为w0.x1 + w1.x2 + b = 0,x2就是y值
ax.plot(x,y)
for i in range(100): #找到支持向量,并在图中标红
if alphas[i]>0.0:
ax.plot(xMat[i,0],xMat[i,1],'ro')
plt.show()
dataArr,labelArr=loadDataSet(r'C:/Users/huashuo111/Desktop/python/machinelearninginaction/Ch06/testSet.txt')
b,alphas=smoP(dataArr,labelArr,0.6,0.001,40)
ws=calWs(alphas,dataArr,labelArr)
plotfig_SVM(dataArr,labelArr,ws,b,alphas)
![](//img-blog.csdn.net/20180321172304423)
核函数:
import numpy as np
from numpy import *
import matplotlib.pyplot as plt
import time
def loadDataSet(filename):
dataMat=[];labelMat=[]
fr=open(filename)
for line in fr.readlines():
lineArr=line.strip().split('\t')
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
def selectJrand(i,m):
j=i
while(j==i):
j=int(random.uniform(0,m))
return j
def clipAlphas(aj,H,L):
if aj>H:
aj=H
if aj<L:
aj=L
return aj
class optStruct:
def __init__(self,dataMatIn,classLabels,C,toler,kTup):
self.X=dataMatIn
self.labelMat=classLabels
self.C=C
self.tol=toler
self.m=shape(dataMatIn)[0]
self.alphas=mat(zeros((self.m,1)))
self.b=0
self.eCache=mat(zeros((self.m,2)))
self.K=mat(zeros((self.m,self.m)))
for i in range(self.m):
self.K[:,i]=kernelTrans(self.X,self.X[i,:],kTup)
def calcEk(oS,k):
fXk=float(multiply(oS.alphas,oS.labelMat).T*oS.K[:,k])+oS.b
Ek=fXk-float(oS.labelMat[k])
return Ek
def selectJ(i,oS,Ei):
maxK=-1;maxDeltaE=0;Ej=0
oS.eCache[i]=[1,Ei]
validEcacheList=nonzero(oS.eCache[:,0].A)[0]
if len(validEcacheList)>1:
for k in validEcacheList:
if k==i:continue
Ek=calcEk(oS,k)
deltaE=abs(Ei-Ek)
if(deltaE>maxDeltaE):
maxK=k;
maxDeltaE=deltaE;
Ej=Ek;
return maxK,Ej
else:
j=selectJrand(i,oS.m)
Ej=calcEk(oS,j)
return j,Ej
def updataEk(oS,k):
Ek=calcEk(oS,k)
oS.eCache[k]=[1,Ek]
def innerL(i,oS):
Ei=calcEk(oS,i)
if ((oS.labelMat[i]*Ei<-oS.tol)and(oS.alphas[i]<oS.C))or\
((oS.labelMat[i]*Ei>oS.tol)and(oS.alphas[i]>0)):
j,Ej=selectJ(i,oS,Ei)
alphaIold=oS.alphas[i].copy();alphaJold=oS.alphas[j].copy()
if(oS.labelMat[i]!=oS.labelMat[j]):
L=max(0,oS.alphas[j]-oS.alphas[i])
H=min(oS.C,oS.C+oS.alphas[j]-oS.alphas[i])
else:
L=max(0,oS.alphas[j]+oS.alphas[i]-oS.C)
H=min(oS.C,oS.C+oS.alphas[j]+oS.alphas[i])
if L==H:print("L==H");return 0
eta=2.0*oS.K[i,j]-oS.K[i,i]-oS.K[j,j]
if eta>=0:print("eta>=0");return 0
oS.alphas[j]-=oS.labelMat[j]*(Ei-Ej)/eta
oS.alphas[j]=clipAlphas(oS.alphas[j],H,L)
updataEk(oS,j)
if(abs(oS.alphas[j]-alphaJold)<0.00001):
print("j not moving enough");return 0
oS.alphas[i]+=oS.labelMat[j]*oS.labelMat[i]*(alphaJold-oS.alphas[j])
updataEk(oS,i)
b1=oS.b-Ei-oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i]-\
oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j]
b2=oS.b-Ej-oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i]-\
oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j]
if(0<oS.alphas[i])and(oS.C>oS.alphas[i]):oS.b=b1
elif (0<oS.alphas[j])and(oS.C>oS.alphas[j]):oS.b=b2
else:oS.b=(b1+b2)/2
return 1
else:
return 0
def smoP(dataMatIn,classLabels,C,toler,maxIter,kTup=('lin',0)):
oS=optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler,kTup)
iter=0
entireSet=True;alphaPairsChanged=0
while(iter<maxIter)and((alphaPairsChanged>0)or(entireSet)):
alphaPairsChanged=0
if entireSet:
for i in range(oS.m):
alphaPairsChanged+=innerL(i,oS)
print("fullSet,iter:%d i:%d,pairs changed %d"%(iter,i,alphaPairsChanged))
iter+=1
else:
nonBoundIs=nonzero((oS.alphas.A>0)*(oS.alphas.A<oS.C))[0]
for i in nonBoundIs:
alphaPairsChanged+=innerL(i,oS)
print("non-bound,iter:%d i:%d,pairs changed %d"%(iter,i,alphaPairsChanged))
iter+=1
if entireSet:entireSet=False
elif(alphaPairsChanged==0):entireSet=True
print("iteration number:%d"%iter)
return oS.b,oS.alphas
def kernelTrans(X,A,kTup):
m,n=shape(X)
K=mat(zeros((m,1)))
if kTup[0]=='lin':
K=X*A.T
elif kTup[0]=='rbf':
for j in range(m):
deltaRow=X[j,:]-A
K[j]=deltaRow*deltaRow.T
K=exp(-K/(kTup[1]**2))
else:raise NameError("The Kernel is not recognized!")
return K
def testRbf(k1=1.3):
dataArr,labelArr=loadDataSet(r'C:/Users/huashuo111/Desktop/python/machinelearninginaction/Ch06/testSetRBF.txt')
b,alphas=smoP(dataArr,labelArr,200,0.0001,10000,('rbf',k1))
dataMat=mat(dataArr)
labelMat=mat(labelArr).transpose()
svInd=nonzero(alphas.A>0)[0]
sVs=dataMat[svInd]
labelSV=labelMat[svInd];print("there are %d support vectors" % shape(sVs)[0])
m,n=shape(dataMat)
errorCount=0
for i in range(m):
kernelEval=kernelTrans(sVs,dataMat[i,:],('rbf',k1))
predict=kernelEval.T*multiply(labelSV,alphas[svInd])+b
if sign(predict)!=sign(labelArr[i]):errorCount+=1
print("the training error rate is: %f" %(float(errorCount)/m))
dataArr,labelArr=loadDataSet(r'C:/Users/huashuo111/Desktop/python/machinelearninginaction/Ch06/testSetRBF2.txt')
dataMat=mat(dataArr)
labelMat=mat(labelArr).transpose()
errorCount=0
m,n=shape(labelMat)
for i in range(m):
kernelEval=kernelTrans(sVs,dataMat[i,:],('rbf',k1))
predict=kernelEval.T*multiply(labelSV,alphas[svInd])+b
if sign(predict)!=sign(labelArr[i]):errorCount+=1
print("the test error rate is: %f" %(float(errorCount)/m))
def calWs(alphas,dataArr,classLabels):
X=mat(dataArr);labelMat=mat(classLabels).transpose()
m,n=shape(X)
w=zeros((n,1))
for i in range(m):
w+=multiply(alphas[i]*labelMat[i],X[i,:].T)
return w
def plotfig_SVM(xMat,yMat,ws,b,alphas):
xMat = mat(xMat)
yMat = mat(yMat)
b = array(b)[0] #b原来是矩阵,先转为数组类型后其数组大小为(1,1),所以后面加[0],变为(1,)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:,0].flatten().A[0],xMat[:,1].flatten().A[0]) #a.flatten()就是把a降到一维,默认是按横的方向降,矩阵.A(等效于矩阵.getA())变成了数组
x = arange(-1.0,10.0,0.1) #x最大值,最小值根据原数据集dataArr[:,0]的大小而定
y =(-b-ws[0][0]*x)/ws[1][0] #根据x.w + b = 0 得到,其式子展开为w0.x1 + w1.x2 + b = 0,x2就是y值
ax.plot(x,y)
for i in range(100): #找到支持向量,并在图中标红
if alphas[i]>0.0:
ax.plot(xMat[i,0],xMat[i,1],'ro')
plt.show()
testRbf()