6 支持向量机(Support vector machines)
6.4 Platt SMO算法(Speeding up optimization with the full Platt SMO)
Platt SMO算法优化部分(更新 α \mathbf{\alpha} α)与简化版SMO一致,区别在于优化过程中Platt SMO算法采用启发式算法进行 α \alpha α对- ( α i , α j ) (\alpha_i, \alpha_j) (αi,αj)的选取。
- 选取 α i \alpha_i αi
无界
α
\alpha
α:
0
<
α
<
C
0 \lt \alpha \lt C
0<α<C,即
α
\alpha
α还未到达边界处。
在单次传递整个数据集和单次传递非绑定阿尔法之间进行此更改。
- 选取 α j \alpha_j αj
选取能够使步长最大的 α j \alpha_j αj
计算全局误差 E j E_j Ej,然后从中选取能够使步长最大的 α j \alpha_j αj,即 E i − E j E_i - E_j Ei−Ej
# Listing 6.3 Support functions for full Platt SMO
class optStruct(object):
def __init__(self, dataMatIn, classLabels, C, toler):
self.X = dataMatIn
self.labelMat = classLabels
self.C = C
self.tol = toler
self.m = dataMatIn.shape[0]
self.alphas = np.matrix(np.zeros((self.m, 1)))
self.b = 0
# error cache
self.eCache = np.matrix(np.zeros((self.m, 2)))
def calcEk(oS, k):
fXk = np.multiply(oS.alphas, oS.labelMat).T * \
(oS.X * oS.X[k, :].T) + oS.b
Ek = fXk - oS.labelMat[k]
return Ek
# inner-loop heuristic
def selectJ(i, oS, Ei):
maxK = -1
maxDeltaE = 0
Ej = 0
oS.eCache[i] = [1, Ei]
validEcacheList = np.nonzero(oS.eCache[:, 0].A)[0]
if len(validEcacheList) > 1:
for k in validEcacheList:
if k == i:
continue
Ek = calcEk(oS, k)
deltaE = abs(Ei - Ek)
# choose j for maximum step size
if deltaE > maxDeltaE:
maxK = k
maxDeltaE = deltaE
Ej = Ek
return maxK, Ej
else:
j = selectJrand(i, oS.m)
Ej = calcEk(oS, j)
return j, Ej
def updateEk(oS, k):
Ek = calcEk(oS, k)
oS.eCache[k] = [1, Ek]
# Listing 6.4 Full Platt SMO optimization routine
def innerL(i, oS):
Ei = calcEk(oS, i)
if ((oS.labelMat[i] * Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or \
((oS.labelMat[i] * Ei > oS.tol) and (oS.alphas[i] > 0)):
# second-choice heuristic
j, Ej = selectJ(i, oS, Ei)
alphaIold = oS.alphas[i].copy()
alphaJold = oS.alphas[j].copy()
if (oS.labelMat[i] != oS.labelMat[j]):
L = max(0, oS.alphas[j] - oS.alphas[i])
H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])
else:
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
if L == H:
print("L == H")
return 0
eta = 2.0 * oS.X[i, :] * oS.X[j, :].T - oS.X[i, :] * oS.X[i, :].T - \
oS.X[j, :] * oS.X[j, :].T
if eta >= 0:
print("eta >= 0")
return 0
oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta
oS.alphas[j] = clipAlpha(oS.alphas[j], H, L)
# updates ecache
updateEk(oS, j)
if abs(oS.alphas[j] - alphaJold) < 0.00001:
print("j not moving enough")
return 0
oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * \
(alphaJold - oS.alphas[j])
# updates ecache
updateEk(oS, i)
b1 = oS.b - Ei - \
oS.labelMat[i] * (oS.alphas[i] - alphaIold) * \
oS.X[i, :] * oS.X[i, :].T - \
oS.labelMat[j] * (oS.alphas[j] - alphaJold) * \
oS.X[i, :] * oS.X[j, :].T
b2 = oS.b - Ej - \
oS.labelMat[i] * (oS.alphas[i] - alphaIold) * \
oS.X[i, :] * oS.X[j, :].T - \
oS.labelMat[j] * (oS.alphas[j] - alphaJold) * \
oS.X[j, :] * oS.X[j, :].T
if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]):
oS.b = b1
elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]):
oS.b = b2
else:
oS.b = (b1 + b2) / 2
return 1
else:
return 0
# Listing 6.5 Full Platt SMO outer loop
def smoP(dataMatIn, classLabels, C, toler, maxIter, kTup=("lin", 0)):
oS = optStruct(np.matrix(dataMatIn),
np.matrix(classLabels).transpose(),
C, toler)
iter = 0
entireSet = True
alphaPairsChanged = 0
while (iter < maxIter) and ((alphaPairsChanged > 0) or entireSet):
alphaPairsChanged = 0
if entireSet:
# go over all values
for i in range(oS.m):
alphaPairsChanged += innerL(i, oS)
print("fullSet, iter: {}, i: {}, pairs changed: {}" \
.format(iter, i, alphaPairsChanged))
iter += 1
else:
# go over non-bound values
nonBoundIs = np.nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i, oS)
print("non-bound, iter: {}, i: {}, pairs changed: {}" \
.format(iter, i, alphaPairsChanged))
iter += 1
if entireSet:
entireSet = False
elif alphaPairsChanged == 0:
entireSet = True
print("iteration number: {} \n ---".format(iter))
return oS.b, oS.alphas
while循环退出条件:
(1)迭代次数达到最大;或者(2)遍历训练集,但没有 α \alpha α对被修改。
SVM训练目标:(1)所有样本的余量(margin)至少为1;(2)余量尽可能大。 C C C的作用是在二者之间取得平衡,当 C C C很大时,分类器试图用分隔超平面(separating hyperplane)将所有样本正确分类。
dataArr, labelArr = loadDataSet("testSet.txt")
b, alphas = smoP(dataArr, labelArr, 0.6, 0.001, 40)
L == H
L == H
L == H
j not moving enough
L == H
L == H
L == H
L == H
L == H
L == H
j not moving enough
L == H
L == H
j not moving enough
L == H
L == H
L == H
L == H
L == H
j not moving enough
fullSet, iter: 0, i: 99, pairs changed: 6
iteration number: 1
---
j not moving enough
non-bound, iter: 1, i: 0, pairs changed: 0
j not moving enough
non-bound, iter: 1, i: 3, pairs changed: 0
j not moving enough
non-bound, iter: 1, i: 4, pairs changed: 0
j not moving enough
non-bound, iter: 1, i: 17, pairs changed: 0
j not moving enough
non-bound, iter: 1, i: 18, pairs changed: 0
j not moving enough
non-bound, iter: 1, i: 25, pairs changed: 0
j not moving enough
non-bound, iter: 1, i: 46, pairs changed: 0
non-bound, iter: 1, i: 55, pairs changed: 0
non-bound, iter: 1, i: 94, pairs changed: 0
iteration number: 2
---
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L == H
j not moving enough
j not moving enough
L == H
j not moving enough
j not moving enough
L == H
L == H
j not moving enough
fullSet, iter: 2, i: 99, pairs changed: 0
iteration number: 3
---
print(b)
# support vectors
support_vector_indices = np.nonzero(alphas)[0]
support_vector_alphas = alphas[support_vector_indices].getA()
print(support_vector_indices)
print(support_vector_alphas)
print(support_vector_alphas.shape)
support_vector_data = np.array([dataArr[idx] for idx in support_vector_indices])
support_vector_labels = np.array([labelArr[idx] for idx in support_vector_indices])
for idx in support_vector_indices:
print(dataArr[idx], labelArr[idx])
weights = np.zeros(support_vector_data.shape[1])
for idx in range(len(support_vector_indices)):
weights += support_vector_alphas[idx] * support_vector_labels[idx] * \
support_vector_data[idx, :]
print(weights)
print(weights.shape)
[[-2.89901748]]
[ 0 3 4 17 18 25 46 55 94]
[[0.06961952]
[0.0169055 ]
[0.0169055 ]
[0.0272699 ]
[0.04522972]
[0.0272699 ]
[0.0243898 ]
[0.06140181]
[0.06140181]]
(9, 1)
[3.542485, 1.977398] -1.0
[2.114999, -0.004466] -1.0
[8.127113, 1.274372] 1.0
[4.658191, 3.507396] -1.0
[8.197181, 1.545132] 1.0
[7.40786, -0.121961] 1.0
[6.960661, -0.245353] 1.0
[6.080573, 0.418886] 1.0
[3.107511, 0.758367] -1.0
[ 0.65307162 -0.17196128]
(2,)
xcord3 = []
ycord3 = []
for idx in support_vector_indices:
xcord3.append(dataArr[idx][0])
ycord3.append(dataArr[idx][1])
x_max = max(max(xcord1, xcord2))
x_min = min(min(xcord1, xcord2))
y_max = max(max(ycord1, ycord2))
y_min = min(min(ycord1, ycord2))
x = np.linspace(x_min, x_max)
y = (- (weights[0] * x + b) / weights[1]).getA().flatten()
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c="red", marker="s", label="Class 0")
ax.scatter(xcord2, ycord2, s=30, c="green", label="Class 1")
ax.scatter(xcord3, ycord3, s=120, marker="o", c="", edgecolors="blue")
ax.plot(x, y)
ax.set_xlabel("$x_1$")
ax.set_ylabel("$x_2$")
ax.set_xlim(x_min - (x_max + x_min) / 5, x_max + (x_max + x_min) / 5)
ax.set_ylim(y_min - (y_max + y_min), y_max + (y_max + y_min))
ax.legend()
plt.show()
- SVM系数:
f ( x ) = w T x + b = ∑ i − 1 m α i y ( i ) < x ( i ) , x > + b ↓ w = ∑ i − 1 m α i y ( i ) x ( i ) \begin{aligned} f \left( \mathbf{x} \right) = \mathbf{w}^{\mathrm{T}} \mathbf{x} + b = & \sum_{i - 1}^{m} \alpha_i y^{(i)} \left< \mathbf{x}^{(i)},\mathbf{x} \right> + b \\ \downarrow & \\ \mathbf{w} = & \sum_{i - 1}^{m} \alpha_i y^{(i)} \mathbf{x}^{(i)} \end{aligned} f(x)=wTx+b=↓w=i−1∑mαiy(i)⟨x(i),x⟩+bi−1∑mαiy(i)x(i)
def calcWs(alphas, dataArr, classLabels):
X = np.matrix(dataArr)
labelMat = np.matrix(classLabels).transpose()
m, n = X.shape
w = np.zeros((n, 1))
for i in range(m):
w += np.multiply(alphas[i] * labelMat[i], X[i, :].T)
return w
ws = calcWs(alphas, dataArr, labelArr)
print(ws)
[[ 0.65307162]
[-0.17196128]]
datMat = np.matrix(dataArr)
print("P0: ")
print(datMat[0] * np.matrix(ws) + b)
print(labelArr[0])
print("---")
print("P1: ")
print(datMat[1] * np.matrix(ws) + b)
print(labelArr[1])
print("---")
print("P2: ")
print(datMat[2] * np.matrix(ws) + b)
print(labelArr[2])
print("---")
P0:
[[-0.92555695]]
-1.0
---
P1:
[[-1.36706674]]
-1.0
---
P2:
[[2.30436336]]
1.0
---
6.5 核:处理复杂数据(Using kernels for more complex data)
6.5.1 利用核将数据映射到高维
当数据线性不可分时,可以尝试利用核(kernel)函数,将其从低维(lower-dimensional)特征空间(feature space)映射到高维特征空间(higher-dimensional)。
由于SVM优化是通过内积(inner products)运算实现的。我们利用核技巧,即用核函数(kernel trick,kernel substation)运算替换内积运算,将数据映射到高维特征空间。
将核函数 K ( x , y ) K \left( \mathbf{x, y} \right) K(x,y)替换内积运算,SVM对偶优化问题表示为:
max
α
W
(
α
)
=
∑
i
=
1
m
α
i
−
1
2
∑
i
,
j
=
1
m
α
i
α
j
y
(
i
)
y
(
j
)
K
(
x
(
i
)
,
x
(
j
)
)
\max_{\mathbf{\alpha}}W\left(\mathbf{\alpha} \right) = \sum^{m}_{i=1} \alpha_i - \frac{1}{2} \sum^{m}_{i,j=1} \alpha_i \alpha_j y ^{\left(i\right)} y^{\left(j\right)} K \left( \mathbf{x}^{\left(i\right)}, \mathbf{x}^{\left(j\right)} \right)
αmaxW(α)=i=1∑mαi−21i,j=1∑mαiαjy(i)y(j)K(x(i),x(j))
s.t.
\text{s.t.}
s.t.
C
≥
α
i
≥
0
∑
i
=
1
m
α
i
y
(
i
)
=
0
\begin{aligned} C \geq \alpha_i \geq & 0 \\ \sum^{m}_{i=1} \alpha_i y ^{\left(i\right)} = & 0 \end{aligned}
C≥αi≥i=1∑mαiy(i)=00
SVM推理表达式为:
f ( x ) = ∑ i − 1 m α i y ( i ) K ( x ( i ) , x ) + b f \left( \mathbf{x} \right) = \sum_{i - 1}^{m} \alpha_i y^{(i)} K \left( \mathbf{x}^{(i)},\mathbf{x} \right) + b f(x)=i−1∑mαiy(i)K(x(i),x)+b
6.5.2 径向偏置核函数
径向偏置函数(radial basis function,应该是径向基函数(adial basis function)):
- 输入:矢量
- 输出:标量
输出标量与输入向量的距离相关。
高斯核:
k ( x , y ) = exp ( − ∥ x − y ∥ 2 2 σ 2 ) k \left( \mathbf{x, y} \right) = \exp \left( \frac{- \left \| \mathbf{x - y} \right\|^{2}}{2 \sigma^{2} } \right) k(x,y)=exp(2σ2−∥x−y∥2)
高斯核将数据映射到无穷维空间(高维空间)中。
- 径向基函数(radial basis function, RBF)是一个实值函数 ϕ \phi ϕ,它的值只取决于输入向量到原点(或其它中心 c \mathbf {c} c)的距离:
ϕ
(
x
)
=
ϕ
(
∥
x
∥
)
\phi \left( \mathbf{x} \right) = \phi \left( \left\|\mathbf {x} \right\| \right)
ϕ(x)=ϕ(∥x∥)
ϕ
(
x
,
c
)
=
ϕ
(
∥
x
−
c
∥
)
\phi \left( \mathbf{x, c} \right) = \phi \left( \left\|\mathbf {x - c} \right\| \right)
ϕ(x,c)=ϕ(∥x−c∥)
任何满足该条件的函数都是径向基函数,范数通常取欧几里德距离。
r r r: r = ∥ x − x i ∥ r = \left\| \mathbf {x} - \mathbf {x}_i \right\| r=∥x−xi∥
ε \varepsilon ε:临界半径(critical radius)倒数
-
常用径向基函数类型:
- 高斯核(Gaussian):
ϕ ( r ) = exp ( − ( ε r ) 2 ) \phi \left( r \right) = \exp \left( - \left( \varepsilon r \right)^2 \right) ϕ(r)=exp(−(εr)2)
- 多元二次核(Multiquadric):
ϕ ( r ) = 1 + ( ε r ) 2 \phi \left(r\right) = \sqrt {1 + \left( \varepsilon r \right)^{2}} ϕ(r)=1+(εr)2
- 逆二次核(Inverse quadratic):
ϕ ( r ) = 1 1 + ( ε r ) 2 \phi \left(r\right)={\dfrac {1}{1+\left(\varepsilon r\right)^{2}}} ϕ(r)=1+(εr)21
- 逆多元二次核(Inverse multiquadric):
ϕ ( r ) = 1 1 + ( ε r ) 2 \phi \left(r\right)={\dfrac {1}{\sqrt {1+\left(\varepsilon r\right)^{2}}}} ϕ(r)=1+(εr)21
- 多重调和样条核(Polyharmonic spline):
ϕ ( r ) = r k , k = 1 , 3 , 5 , ⋯ ϕ ( r ) = r k ln ( r ) , k = 2 , 4 , 6 , ⋯ \begin{aligned} \phi \left( r \right) &= r^{k}, & k & = 1,3,5, \cdots \\ \phi \left( r \right) &= r^{k} \ln \left( r \right), & k & = 2,4,6, \cdots \end{aligned} ϕ(r)ϕ(r)=rk,=rkln(r),kk=1,3,5,⋯=2,4,6,⋯
- 薄板样条核(Thin plate spline):
ϕ ( r ) = r 2 ln ( r ) \phi \left( r \right) = r^{2} \ln \left( r \right) ϕ(r)=r2ln(r)
# Listing 6.6 Kernel transformation function
def kernelTrans(X, A, kTup):
m, n = X.shape
K = np.matrix(np.zeros((m, 1)))
if kTup[0] == "lin":
K = X * A.T
elif kTup[0] == "rbf":
for j in range(m):
deltaRow = X[j, :] - A
K[j] = deltaRow * deltaRow.T
# element-wise division
K = np.exp(K / (-1 * kTup[1]**2))
else:
raise NameError("Houston We Have a Problem -- That Kernel is not recognized")
return K
class optStruct:
def __init__(self, dataMatIn, classLabels, C, toler, kTup):
self.X = dataMatIn
self.labelMat = classLabels
self.C = C
self.tol = toler
self.m = dataMatIn.shape[0]
self.alphas = np.matrix(np.zeros((self.m, 1)))
self.b = 0
self.eCache = np.matrix(np.zeros((self.m, 2)))
self.K = np.matrix(np.zeros((self.m, self.m)))
for i in range(self.m):
self.K[:, i] = kernelTrans(self.X, self.X[i, :], kTup)
# Listing 6.7 Changes to innerL() and calcEk() needed to user kernels
def innerL(i, oS):
Ei = calcEk(oS, i)
if ((oS.labelMat[i] * Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or \
((oS.labelMat[i] * Ei > oS.tol) and (oS.alphas[i] > 0)):
# second-choice heuristic
j, Ej = selectJ(i, oS, Ei)
alphaIold = oS.alphas[i].copy()
alphaJold = oS.alphas[j].copy()
if (oS.labelMat[i] != oS.labelMat[j]):
L = max(0, oS.alphas[j] - oS.alphas[i])
H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])
else:
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
if L == H:
print("L == H")
return 0
eta = 2.0 * oS.K[i, j] - oS.K[i, i] - oS.K[j, j]
if eta >= 0:
print("eta >= 0")
return 0
oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta
oS.alphas[j] = clipAlpha(oS.alphas[j], H, L)
# updates ecache
updateEk(oS, j)
if abs(oS.alphas[j] - alphaJold) < 0.00001:
print("j not moving enough")
return 0
oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * \
(alphaJold - oS.alphas[j])
# updates ecache
updateEk(oS, i)
b1 = oS.b - Ei - \
oS.labelMat[i] * (oS.alphas[i] - alphaIold) * oS.K[i, i] - \
oS.labelMat[j] * (oS.alphas[j] - alphaJold) * oS.K[i, j]
b2 = oS.b - Ej - \
oS.labelMat[i] * (oS.alphas[i] - alphaIold) * oS.K[i, j] - \
oS.labelMat[j] * (oS.alphas[j] - alphaJold) * oS.K[j, j]
if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]):
oS.b = b1
elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]):
oS.b = b2
else:
oS.b = (b1 + b2) / 2
return 1
else:
return 0
def calcEk(oS, k):
fXk = np.multiply(oS.alphas, oS.labelMat).T * \
oS.K[:, k] + oS.b
Ek = fXk - oS.labelMat[k]
return Ek
def smoP(dataMatIn, classLabels, C, toler, maxIter, kTup=("lin", 0)):
oS = optStruct(np.matrix(dataMatIn),
np.matrix(classLabels).transpose(),
C, toler, kTup)
iter = 0
entireSet = True
alphaPairsChanged = 0
while (iter < maxIter) and ((alphaPairsChanged > 0) or entireSet):
alphaPairsChanged = 0
if entireSet:
# go over all values
for i in range(oS.m):
alphaPairsChanged += innerL(i, oS)
print("fullSet, iter: {}, i: {}, pairs changed: {}" \
.format(iter, i, alphaPairsChanged))
iter += 1
else:
# go over non-bound values
nonBoundIs = np.nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i, oS)
print("non-bound, iter: {}, i: {}, pairs changed: {}" \
.format(iter, i, alphaPairsChanged))
iter += 1
if entireSet:
entireSet = False
elif alphaPairsChanged == 0:
entireSet = True
print("iteration number: {} \n ---".format(iter))
return oS.b, oS.alphas
6.5.3 核测试
参数 σ \sigma σ需要人工确定
# Listing 6.8 Radial bias test function for classifying with a kernel
def testRbf(kl=1.3):
dataArr, labelArr = loadDataSet("testSetRBF.txt")
# training
b, alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, ("rbf", kl))
datMat = np.matrix(dataArr)
labelMat = np.matrix(labelArr).transpose()
svInd = np.nonzero(alphas.A > 0)[0]
# create matrix of support vectors
sVs = datMat[svInd]
labelSV = labelMat[svInd]
print("there are {} support vectors".format(sVs.shape[0]))
# training error rate
m, n = datMat.shape
errorCount = 0
for i in range(m):
kernelEval = kernelTrans(sVs, datMat[i, :], ("rbf", kl))
predict = kernelEval.T * np.multiply(labelSV, alphas[svInd]) + b
if np.sign(predict) != np.sign(labelArr[i]):
errorCount += 1
print("the training error rate is: {}".format(errorCount / m))
# plot
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for i in range(m):
if int(labelArr[i]) == 1:
xcord1.append(dataArr[i][0])
ycord1.append(dataArr[i][1])
else:
xcord2.append(dataArr[i][0])
ycord2.append(dataArr[i][1])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c="red", marker="s", label="Class 0")
ax.scatter(xcord2, ycord2, s=30, c="green", label="Class 1")
ax.scatter(sVs[:, 0].A, sVs[:, 1].A, s=120, marker="o", c="", edgecolors="blue", label="svs")
ax.set_xlabel("$x_1$")
ax.set_ylabel("$x_2$")
ax.legend()
plt.show()
# test error rate
dataArr, labelArr = loadDataSet("testSetRBF2.txt")
errorCount = 0
datMat = np.matrix(dataArr)
labelMat = np.matrix(labelArr).transpose()
m, n = datMat.shape
for i in range(m):
kernelEval = kernelTrans(sVs, datMat[i, :], ("rbf", kl))
predict = kernelEval.T * np.multiply(labelSV, alphas[svInd]) + b
if np.sign(predict) != np.sign(labelArr[i]):
errorCount += 1
print("the test error rate is: {}".format(errorCount / m))
testRbf()
L == H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L == H
j not moving enough
j not moving enough
L == H
j not moving enough
j not moving enough
j not moving enough
L == H
L == H
L == H
fullSet, iter: 0, i: 99, pairs changed: 30
iteration number: 1
---
non-bound, iter: 1, i: 0, pairs changed: 1
j not moving enough
non-bound, iter: 1, i: 1, pairs changed: 1
non-bound, iter: 1, i: 2, pairs changed: 2
j not moving enough
non-bound, iter: 1, i: 3, pairs changed: 2
j not moving enough
non-bound, iter: 1, i: 6, pairs changed: 2
j not moving enough
non-bound, iter: 1, i: 8, pairs changed: 2
j not moving enough
non-bound, iter: 1, i: 10, pairs changed: 2
j not moving enough
non-bound, iter: 1, i: 11, pairs changed: 2
j not moving enough
non-bound, iter: 1, i: 13, pairs changed: 2
j not moving enough
non-bound, iter: 1, i: 14, pairs changed: 2
non-bound, iter: 1, i: 15, pairs changed: 3
j not moving enough
non-bound, iter: 1, i: 16, pairs changed: 3
j not moving enough
non-bound, iter: 1, i: 18, pairs changed: 3
j not moving enough
non-bound, iter: 1, i: 19, pairs changed: 3
j not moving enough
non-bound, iter: 1, i: 21, pairs changed: 3
j not moving enough
non-bound, iter: 1, i: 27, pairs changed: 3
j not moving enough
non-bound, iter: 1, i: 30, pairs changed: 3
j not moving enough
...
L == H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L == H
j not moving enough
j not moving enough
L == H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
fullSet, iter: 6, i: 99, pairs changed: 0
iteration number: 7
---
there are 88 support vectors
the training error rate is: 0.0
the test error rate is: 0.08
6.6 例:回顾手写数字分类(Example: revisiting handwriting classification)
例子:用SVM识别手写数字
收集:提供文本文件。
准备:由二进制图像创建向量
分析:可视化
训练:径向偏置核使用不同的内核和不同的设置,运行SMO算法
测试:计算错误率
使用:
# Listing 6.9 Support vector machine handwriting recognition
import os
def img2vector(filename):
returnVect = []
with open(filename) as fr:
for lineStr in fr.readlines():
returnVect.extend([int(bit) for bit in lineStr[0 : 32]])
returnVect = np.array(returnVect, ndmin=2)
return returnVect
def loadImages(dirName):
hwLabels = []
trainingFileList = os.listdir(dirName)
m = len(trainingFileList)
trainingMat = np.zeros((m, 1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
if classNumStr == 9:
hwLabels.append(-1)
else:
hwLabels.append(1)
trainingMat[i, :] = img2vector("{}/{}".format(dirName, fileNameStr))
return trainingMat, hwLabels
def testDigits(kTup=("rbf", 10)):
# training
dataArr, labelArr = loadImages("./trainingDigits")
b, alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, kTup)
datMat = np.matrix(dataArr)
labelMat = np.matrix(labelArr).transpose()
svInd = np.nonzero(alphas.A > 0)[0]
sVs = datMat[svInd]
labelSV = labelMat[svInd]
print("there are {} Support Vectors".format(sVs.shape[0]))
# training error rate
m, n = datMat.shape
errorCount = 0
for i in range(m):
kernelEval = kernelTrans(sVs, datMat[i, :], kTup)
predict = kernelEval.T * np.multiply(labelSV, alphas[svInd]) + b
if np.sign(predict) != np.sign(labelArr[i]):
errorCount += 1
print("the training error rate is: {}".format(errorCount / m))
# test error rate
dataArr, labelArr = loadImages("./testDigits")
datMat = np.matrix(dataArr)
labelMat = np.matrix(labelArr).transpose()
m, n = datMat.shape
for i in range(m):
kernelEval = kernelTrans(sVs, datMat[i, :], kTup)
predict = kernelEval.T * np.multiply(labelSV, alphas[svInd]) + b
if np.sign(predict) != np.sign(labelArr[i]):
errorCount += 1
print("the test error rate is: {}".format(errorCount / m))
testDigits(("rbf", 20))
there are 157 Support Vectors
the training error rate is: 0.0
the test error rate is: 0.012684989429175475
testDigits(("rbf", 10))
there are 298 Support Vectors
the training error rate is: 0.0
the test error rate is: 0.006342494714587738
注意:SVM是二元分类器,且标签值只能为 ± 1 \pm 1 ±1,因此本例中将手写数字识别任务修改为数字9和非9数字分类,并将9的标签设置为-1,其余设置为1。
多分类SVM参考"A Comparison of Methods for Multiclass Support Vector Machines" by C. W. Hus et al。