优化后的SMO算法:
步骤1:先“扫描”所有乘子,把第一个违反KKT条件的作为更新对象,令为a1;
步骤2:在所有不违反KKT条件的乘子中,选择使|E1 −E2|最大的a2进行更新,使得能最大限度增大目标函数的值(类似于梯度下降. 此外,而,求出来的E代表函数ui对输入xi的预测值与真实输出类标记yi之差)。
最后,每次更新完两个乘子的优化后,都需要再重新计算b,及对应的Ei值。
————————————————
版权声明:本文为CSDN博主「v_JULY_v」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/v_JULY_v/article/details/7624837
MySVM——smop
# -*- coding: utf-8 -*-
import random
import numpy as np
import matplotlib.pyplot as plt
#加载训练集
def loadDataSet(filename):
dataMat = []
labelMat = []
fr = open(filename)
for line in fr.readlines():
lineArr = line.strip().split('\t')
#s.strip(rm) 删除s字符串中开头、结尾处,位于rm删除序列的字符;split按括号内的进行返回一个列表
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
# 定义了一个类作为数据结构来存储一些信息;建立一个类存放基本数据以及alphas的缓存
class optStruct:
def __init__(self,dataMatIn, classLabels, C, toler, kTup):
self.X = dataMatIn #数据
self.labelMat = classLabels #标签
self.C = C #容忍度
self.tol = toler #误差容忍度
self.m = np.shape(dataMatIn)[0]#数据个数,给出行;1的话是给出列
self.alphas = np.mat(np.zeros((self.m,1))) #alpha值 (m,1)
self.b = 0 #常数项
self.eCache = np.mat(np.zeros((self.m,2)))#保存误差和下表 (m,2)
#辅助函数一: 创建一个样本点返回来的随机整数(0,m),即选择alpha(i,j)
def selectJrand(i,m):
j = i
while (j == i):
j = int(random.uniform(0,m)) #返回一个浮点数 N,取值范围为0,m
return j
# 辅助函数二:将aj规划到[0,C]范围
def ClipAlpha(aj,H,L):
if aj > H:
aj = H
if aj < L:
aj = L
return aj
# 建立计算错误率的函数
def calcEk(oS, k):
fxk = float(np.multiply(oS.alphas, oS.labelMat).T *\
(oS.X*oS.X[k,:].T) +oS.b)
Ek = fxk - float(oS.labelMat[k])
return Ek
# 启发式找j:选择最长步长的那个(就是选择|Ei-Ej|最大)
def selectJ(i, oS, Ei):
maxK = -1
maxDeltaE = 0
Ej = 0
oS.eCache[i] = [1,Ei] #误差缓存。1st 列为1时表示有效(计算好了误差)
validEcachelist = np.nonzero(oS.eCache[:,0].A)[0] #1\A.将matrix类型转换成array类型2\返回数组中不为0的元素下标
if(len(validEcachelist )) > 1:
for k in validEcachelist:
if k == i:
continue
Ek = calcEk(oS,k)
deltaE = abs(Ei - Ek)
if (deltaE > maxDeltaE):
maxK = k
maxDeltaE = deltaE
Ej = Ek
return maxK, Ej
else:
j = selectJrand(i, oS.m)
Ej = calcEk(oS,j)
return j, Ej
#更新错误率Ek
def updateEk(oS,k):
Ek = calcEk(oS,k)
oS.eCache[k] = [1,Ek]
# 更新b并且返回两个alpha参数是否改变的情况
def innerL(i,oS):
Ei = calcEk(oS, i)
if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or\
((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] > 0)):#不满足KKT
j,Ej = selectJ(i, oS, Ei) # 寻找|Ei-Ej|最大的j
alphaIold = oS.alphas[i].copy()
alphaJold = oS.alphas[j].copy()
if (oS.labelMat[i] != oS.labelMat[j]):
L = max(0, oS.alphas[j] - oS.alphas[i])
H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])
else:
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
if L==H:
print("L==H")
return 0
eta = 2.0 * oS.X[i,:]*oS.X[j,:].T-oS.X[i,:]*oS.X[i,:].T-oS.X[j,:]*oS.X[j,:].T
if eta >= 0:
print("eta>=0")
return 0
oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta
oS.alphas[j] = ClipAlpha(oS.alphas[j],H,L)
updateEk(oS, j)
if (abs(oS.alphas[j] - alphaJold) < oS.tol):
print("j not moving enough")
return 0
oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])
updateEk(oS, i)
b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.X[i,:]*oS.X[i,:].T - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.X[i,:]*oS.X[j,:].T
b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.X[i,:]*oS.X[j,:].T - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.X[j,:]*oS.X[j,:].T
if (0 < oS.alphas[i] < oS.C):
oS.b = b1
elif (0 < oS.alphas[j] < oS.C):
oS.b = b2
else:
oS.b = (b1 + b2)/2.0
return 1
else:
return 0
# 计算w
def calcWs(dataMat, labelMat, alphas):
alphas, dataMat, labelMat = np.array(alphas), np.array(dataMat), np.array(labelMat)
# np.tile:np.tile(a,(1,2))第一个参数为Y轴扩大倍数,第二个为X轴扩大倍数,即原矩阵列数*2
w = np.dot((np.tile(labelMat.reshape(1, -1).T, (1, 2)) * dataMat).T, alphas) #得到2*1
return w.tolist() # 数组——>列表
def smoP(dataMatIn, classLabels, C, toler, maxIter,kTup=('lin', 0)):
oS = optStruct(np.mat(dataMatIn),np.mat(classLabels).transpose(),C,toler, kTup)
iter = 0
entireSet = True
alphaPairsChanged = 0
while (iter < maxIter) and ((alphaPairsChanged > 0) or (entireSet)):
alphaPairsChanged = 0
if entireSet:
for i in range(oS.m):
alphaPairsChanged += innerL(i,oS)
print("fullSet, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged))
iter += 1
else:
nonBoundIs = np.nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i,oS)
print("non-bound, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged))
iter += 1
if entireSet:
entireSet = False
elif (alphaPairsChanged == 0):
entireSet = True
print("iteration number: %d" % iter)
return oS.b,oS.alphas
# 画图
def showClassifer(dataMat, labelMat, alphas,w ,b):
data_plus = []
data_minus = []
#注意六 把原始点画出来,橙色的点对应的标签是-1,蓝色的点对应的标签是1
for i in range(len(dataMat)):
if labelMat[i] > 0:
data_plus.append(dataMat[i])
else:
data_minus.append(dataMat[i])
data_plus_np = np.array(data_plus)
data_minus_np = np.array(data_minus)
plt.scatter(np.transpose(data_plus_np)[0],np.transpose(data_plus_np)[1],s = 30, alpha=0.7)
plt.scatter(np.transpose(data_minus_np)[0], np.transpose(data_minus_np)[1], s=30, alpha=0.7)
#注意七:画出训练出来的超平面
x1 = max(dataMat)[0]
x2 = min(dataMat)[0]
a1, a2 = w
b = float(b)
a1 = float(a1[0])
a2 = float(a2[0])
y1, y2 = (-b - a1 * x1) / a2,(-b - a1 * x2) / a2 # 纵坐标max min
plt.plot([x1,x2],[y1,y2])
# 注意八 画出向量机点,和那些“忽略”的点
# 带有红色圆圈的是支持向量机点即间隔线上的点,带有黑色的点是间隔线内的点
for i, alpha in enumerate(alphas): #将一个可遍历的数据对象组合为一个索引序列,同时列出数据和数据下标
if 0.6 > abs(alpha) > 0:
x, y = dataMat[i]
plt.scatter([x],[y],s=150,c='none',alpha=0.7,linewidths=1.5,edgecolors='red')
if 0.6 == abs(alpha):
x, y = dataMat[i]
plt.scatter([x],[y],s=150,c='none',alpha=0.7,linewidths=1.5,edgecolors='black')
plt.show()
Mytest
# -*- coding: utf-8 -*-#
import smoP as svm
x=[[1,8],[3,20],[1,15],[3,35],[5,35],[4,40],[7,80],[6,49],[1.5,25],[3.5,45],[4.5,50],[6.5,15],[5.5,20],[5.8,74],[2.5,5]]
y=[1,1,-1,-1,1,-1,-1,1,-1,-1,-1,1,1,-1,1]
b,alphas = svm.smoP(x,y,0.6,0.001,40)
w = svm.calcWs(x,y,alphas)
svm.showClassifer(x,y,alphas, w, b)
太难了,扣脚
一直有个疑惑,为啥每次迭代有时候会自己找到几个“算法合理”的最优,一定要迭代到40才是最优,所有每次还要重新计算。
核函数
对松弛变量的理解:
1、取值存在一个最优解,当松弛变量太大,支持向量机太少,也就是说我们利用了很少的点去决策,显然结果不好,正如上面体现的那样,测试集的错误在上升,当松弛变量太小,支持向量机太多,也就是我们基本利用了所有样本点。
2、最重要的还是要通过调试找到RBF最佳的超参数值。(引用https://blog.csdn.net/weixin_42001089/article/details/83420109)
# -*- coding: utf-8 -*-
import random
import numpy as np
import matplotlib.pyplot as plt
#加载训练集
def loadDataSet(filename):
dataMat = []
labelMat = []
fr = open(filename)
for line in fr.readlines():
lineArr = line.strip().split('\t')
#s.strip(rm) 删除s字符串中开头、结尾处,位于rm删除序列的字符;split按括号内的进行返回一个列表
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
# 定义了一个类作为数据结构来存储一些信息;建立一个类存放基本数据以及alphas的缓存
class optStruct:
def __init__(self,dataMatIn, classLabels, C, toler, kTup):
self.X = dataMatIn #数据
self.labelMat = classLabels #标签
self.C = C #容忍度
self.tol = toler #误差容忍度
self.m = np.shape(dataMatIn)[0]#数据个数,给出行;1的话是给出列
self.alphas = np.mat(np.zeros((self.m,1))) #alpha值 (m,1)
self.b = 0 #常数项
self.eCache = np.mat(np.zeros((self.m,2)))#保存误差和下表 (m,2)
self.K = np.mat(np.zeros((self.m,self.m)))
for i in range(self.m):
self.K[:,i] = kernelTrans(self.X, self.X[i,:], kTup)
#辅助函数一: 创建一个样本点返回来的随机整数(0,m),即选择alpha(i,j)
def selectJrand(i,m):
j = i
while (j == i):
j = int(random.uniform(0,m)) #返回一个浮点数 N,取值范围为0,m
return j
# 辅助函数二:将aj规划到[0,C]范围
def ClipAlpha(aj,H,L):
if aj > H:
aj = H
if aj < L:
aj = L
return aj
# 径向高斯核函数(RBF)
def kernelTrans(X, A, kTup):
m,n = np.shape(X)
K = np.mat(np.zeros((m,1)))
if kTup[0]=='lin':
K = X * A.T
elif kTup[0]=='rbf':
for j in range(m):
deltaRow = X[j,:] - A
K[j] = deltaRow*deltaRow.T
K = np.exp(K/(-1*kTup[1]**2))
else:
raise NameError('Houston We Have a Problem -- That Kernel is not recognized')
return K
# 建立计算错误率的函数
def calcEk(oS, k):
fxk = float(np.multiply(oS.alphas, oS.labelMat).T *\
(oS.X*oS.X[k,:].T) +oS.b)
Ek = fxk - float(oS.labelMat[k])
return Ek
# 启发式找j:选择最长步长的那个(就是选择|Ei-Ej|最大)
def selectJ(i, oS, Ei):
maxK = -1
maxDeltaE = 0
Ej = 0
oS.eCache[i] = [1,Ei] #误差缓存。1st 列为1时表示有效(计算好了误差)
validEcachelist = np.nonzero(oS.eCache[:,0].A)[0] #1\A.将matrix类型转换成array类型2\返回数组中不为0的元素下标
if(len(validEcachelist )) > 1:
for k in validEcachelist:
if k == i:
continue
Ek = calcEk(oS,k)
deltaE = abs(Ei - Ek)
if (deltaE > maxDeltaE):
maxK = k
maxDeltaE = deltaE
Ej = Ek
return maxK, Ej
else:
j = selectJrand(i, oS.m)
Ej = calcEk(oS,j)
return j, Ej
#更新错误率Ek
def updateEk(oS,k):
Ek = calcEk(oS,k)
oS.eCache[k] = [1,Ek]
#更新错误率Ek
def updateEk(oS,k):
Ek = calcEk(oS,k)
oS.eCache[k] = [1,Ek]
# 更新b并且返回两个alpha参数是否改变的情况
def innerL(i,oS):
Ei = calcEk(oS, i)
if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or\
((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] > 0)):#不满足KKT
j,Ej = selectJ(i, oS, Ei) # 寻找|Ei-Ej|最大的j
alphaIold = oS.alphas[i].copy()
alphaJold = oS.alphas[j].copy()
if (oS.labelMat[i] != oS.labelMat[j]):
L = max(0, oS.alphas[j] - oS.alphas[i])
H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])
else:
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
if L==H:
print("L==H")
return 0
eta = 2.0 * oS.K[i,j]-oS.K[i,i]-oS.K[j,j]
if eta >= 0:
print("eta>=0")
return 0
oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta
oS.alphas[j] = ClipAlpha(oS.alphas[j],H,L)
updateEk(oS, j)
if (abs(oS.alphas[j] - alphaJold) < oS.tol):
print("j not moving enough")
return 0
oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])
updateEk(oS, i)
b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i] - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[i,j]
b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,j]- oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j]
if (0 < oS.alphas[i] < oS.C):
oS.b = b1
elif (0 < oS.alphas[j] < oS.C):
oS.b = b2
else:
oS.b = (b1 + b2)/2.0
return 1
else:
return 0
# smpK
def smoK(dataMatIn, classLabels, C, toler, maxIter,kTup=('lin', 0)):
oS = optStruct(np.mat(dataMatIn),np.mat(classLabels).transpose(),C,toler, kTup)
iter = 0
entireSet = True
alphaPairsChanged = 0
while (iter < maxIter) and ((alphaPairsChanged > 0) or (entireSet)):
alphaPairsChanged = 0
if entireSet:
for i in range(oS.m):
alphaPairsChanged += innerL(i,oS)
print("fullSet, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged))
iter += 1
else:
nonBoundIs = np.nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i,oS)
print("non-bound, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged))
iter += 1
if entireSet:
entireSet = False
elif (alphaPairsChanged == 0):
entireSet = True
print("iteration number: %d" % iter)
return oS.b,oS.alphas
#作用是使用训练集去训练SVM模型,然后分别统计该模型在训练集上和测试集上的错误率。
def testRbf(data_train,data_test):
dataArr,labelArr = loadDataSet(data_train)
b,alphas = smoK(dataArr,labelArr,200,0.0001,10000,('rbf',5))
datMat = np.mat(dataArr)
labelMat = np.mat(labelArr).transpose()
svInd = np.nonzero(alphas)[0] #通过\alpha构建权重w时是只用到是支持向量机那些点即\alpha > 0那些点
sVs = datMat[svInd]
labelSV = labelMat[svInd]
print("there are %d Support Vectors" % np.shape(sVs)[0])
m,n = np.shape(datMat)
errorCount = 0
for i in range(m):
kernelEval = kernelTrans(sVs,datMat[i,:],('rbf',1.3))
predict = kernelEval.T * np.multiply(labelSV,alphas[svInd]) + b
if np.sign(predict) != np.sign(labelArr[i]):
errorCount +=1
print("the training error rate is: %f" %(float(errorCount)/m))
dataArr_test, labelArr_test = loadDataSet(data_test)
errorCount_test = 0
datMat_test = np.mat(dataArr_test)
labelMat = np.mat(labelArr_test).transpose()
m,n = np.shape(datMat_test)
for i in range(m):
kernelEval = kernelTrans(sVs, datMat_test[i,:],('rbf',0.1))
predict = kernelEval.T * np.multiply(labelSV,alphas[svInd]) + b
if np.sign(predict) != np.sign(labelArr[i]): #f=sign(x):x>0:1;x=0:0;x<0:-1
errorCount +=1
print("the testing error rate is: %f" %(float(errorCount_test)/m))
return dataArr,labelArr,alphas
# 画图
def showClassifer(dataMat, labelMat, alphas):
data_plus = []
data_minus = []
#注意六 把原始点画出来,橙色的点对应的标签是-1,蓝色的点对应的标签是1
for i in range(len(dataMat)):
if labelMat[i] > 0:
data_plus.append(dataMat[i])
else:
data_minus.append(dataMat[i])
data_plus_np = np.array(data_plus)
data_minus_np = np.array(data_minus)
plt.scatter(np.transpose(data_plus_np)[0],np.transpose(data_plus_np)[1],s = 30, alpha=0.7)
plt.scatter(np.transpose(data_minus_np)[0], np.transpose(data_minus_np)[1], s=30, alpha=0.7)
# 带有红色圆圈的是支持向量机点即间隔线上的点
for i, alpha in enumerate(alphas): #将一个可遍历的数据对象组合为一个索引序列,同时列出数据和数据下标
if abs(alpha) > 0:
x, y = dataMat[i]
plt.scatter([x],[y],s=150,c='none',alpha=0.7,linewidths=1.5,edgecolors='red')
plt.show()
# -*- coding: utf-8 -*-
import smoK as svm
traindata = 'C:\\Users\\Lenovo\\Desktop\\Train_data.txt'
testdata = 'C:\\Users\\Lenovo\\Desktop\\Test_data.txt'
TraindataArr, TrainlabelArr, alphas = svm.testRbf(traindata,testdata)
svm.showClassifer(TraindataArr,TrainlabelArr,alphas)