【实例简介】支持向量机(support vector machine,简称SVM)于1964年由Vapnik和Chervonenkis建立,在上世纪90年代获得快速发展并衍生出一系列改进和扩展算法,在人像识别、文本分类、手写字识别及生物信息学等领域获得广泛应用。
【实例截图】
【核心代码】
class SMO(object):
def __init__(self, C = 100, toler = 0.001, maxIter = 10000):
self.C = C
self.tol = toler
self.maxIter = maxIter
def fit(self, X, y):
self.X, self.y = X, y
self.n_samples = len(X)
self.alphas = np.zeros(self.n_samples, dtype = float)
self.b = 0.
self.Error = np.zeros_like(self.alphas)
self.iterNum = 0
iterNum = 0
examineAll = True
alphaChanged = 0
while iterNum < self.maxIter and (alphaChanged > 0 or examineAll == True):
alphaChanged = 0
if examineAll:
for i in range(len(self.X)): alphaChanged = self._innerLoop(i)
iterNum = 1
examineAll = False
else:
nonBoundInd = np.nonzero((self.alphas > 0) * (self.alphas < self.C))[0]
for i in nonBoundInd: alphaChanged = self._innerLoop(i)
iterNum = 1
if alphaChanged == 0: examineAll = True
self.iterNum = iterNum
return self
def _innerLoop(self, i):
Ei = self.updateError(i)
if (((Ei * self.y[i] < -self.tol) and (self.alphas[i] < self.C)) or
((Ei * self.y[i] > self.tol) and (self.alphas[i] > 0))):
j = self.selectJ(i)
Ej = self.Error[j]
alphaIold, alphaJold = self.alphas[i], self.alphas[j]
if self.y[i] != self.y[j]:
L = max(0, alphaIold - alphaJold)
H = min(self.C, self.C alphaIold - alphaJold)
else:
L = max(0, alphaJold alphaIold -self.C)
H = min(self.C, alphaJold alphaIold)
if H == L: return 0
Kii, Kij, Kjj = (self.K(self.X[i], self.X[i]), self.K(self.X[i], self.X[j]),
self.K(self.X[j], self.X[j]))
eta = Kii Kjj - 2 * Kij
if eta <= 0: return 0
self.alphas[i] = self.y[i] * (Ej - Ei)/eta
if self.alphas[i] <= L:
self.alphas[i] = L
elif self.alphas[i] >= H:
self.alphas[i] = H
if np.abs(self.alphas[i] - alphaIold) < 1.e-10: return 0
self.alphas[j] = self.y[j] * self.y[i] * (alphaIold - self.alphas[i])
b0 = (self.b - Ej - self.y[j] * Kjj * (self.alphas[j] - alphaJold) -
self.y[i] * Kij * (self.alphas[i] - alphaIold))
b1 = (self.b - Ei - self.y[j] * Kij * (self.alphas[j] - alphaJold) -
self.y[i] * Kii * (self.alphas[i] - alphaIold))
if 0 < self.alphas[j] < self.C: self.b = b0
elif 0 < self.alphas[i] < self.C: self.b = b1
else: self.b = (b0 b1) / 2
return 1
else: return 0
def selectJ(self, i):
j = 0
maxDeltaE = -1.
priorIndices = np.nonzero(self.Error)[0]
if len(priorIndices) > 1:
for k in priorIndices:
if k == i: continue
Ek = self.updateError(k)
deltaE = np.abs(Ek - self.Error[i])
if deltaE > maxDeltaE: j, maxDeltaE = k, deltaE
return j
else:
j = np.random.choice([k for k in range(self.n_samples) if k != i])
self.updateError(j)
return j
def updateError(self, i):
fxi = np.sum(self.alphas * self.y * np.array([self.K(self.X[i], self.X[j]) for
j in range(self.n_samples)])) self.b
self.Error[i] = fxi - self.y[i]
return self.Error[i]
def K(self, Xi, Xj):
return np.sum(Xi * Xj)
def predict(self, testX):
num = len(testX)
y_pred = np.ones(num, dtype = int)
for i in range(num):
fxi = np.sum(self.alphas * self.y * np.array([self.K(testX[i], self.X[j]) for
j in range(self.n_samples)])) self.b
if fxi < 0: y_pred[i] = -1
return y_pred