前言
- 因为篇幅太长被迫分成两篇,“书”接上回——从零逐步实现SVM(含公式推导)上
SMO算法推导结果
g ( x ) = ∑ i = 1 m α ( i ) y ( i ) K ( x ( i ) , x ) + b E i = g ( x ( i ) ) − y ( i ) = ( ∑ j = 1 m α ( j ) y ( j ) K ( x ( j ) , x ( i ) ) + b ) − y ( i ) η = K 11 + K 22 − K 12 g(x) = \sum_{i=1}^m\alpha^{(i)}y^{(i)}K(x^{(i)},x) + b\\ E_i = g(x^{(i)}) - y^{(i)} = \left(\sum_{j=1}^m\alpha^{(j)}y^{(j)}K(x^{(j)},x^{(i)}) + b\right) - y^{(i)}\\ \eta = K_{11} + K_{22} - K_{12} g(x)=i=1∑mα(i)y(i)K(x(i),x)+bEi=g(x(i))−y(i)=(j=1∑mα(j)y(j)K(x(j),x(i))+b)−y(i)η=K11+K22−K12
α n e w , u n c ( 2 ) = α o l d ( 2 ) + y ( 2 ) ( E 1 − E 2 ) η \alpha_{new,unc}^{(2)} = \alpha_{old}^{(2)} + \frac{y^{(2)}(E_1 - E_2)}{\eta} αnew,unc(2)=αold(2)+ηy(2)(E1−E2)
- 若 y ( 1 ) ≠ y 2 y^{(1)} \neq y^{2} y(1)=y2:
L = m a x ( 0 , − ς ) = m a x ( 0 , α o l d ( 2 ) − α o l d ( 1 ) ) H = m i n ( C , C − ς ) = m i n ( C , C + α o l d ( 2 ) − α o l d ( 1 ) ) L = max(0,-\varsigma) = max(0,\alpha_{old}^{(2)} - \alpha_{old}^{(1)})\\ H = min(C,C-\varsigma) = min(C, C + \alpha_{old}^{(2)} - \alpha_{old}^{(1)}) L=max(0,−ς)=max(0,αold(2)−αold(1))H=min(C,C−ς)=min(C,C+αold(2)−αold(1))
- 若 y ( 1 ) = y ( 2 ) y^{(1)} = y^{(2)} y(1)=y(2):
L = m a x ( 0 , ς − C ) = m a x ( 0 , α o l d ( 2 ) + α o l d ( 1 ) − C ) H = m i n ( C , ς ) = m i n ( C , α o l d ( 2 ) + α o l d ( 1 ) ) L = max(0,\varsigma-C) = max(0,\alpha^{(2)}_{old} + \alpha_{old}^{(1)} - C)\\ H = min(C, \varsigma) = min(C, \alpha^{(2)}_{old} + \alpha_{old}^{(1)}) L=max(0,ς−C)=max(0,αold(2)+αold(1)−C)H=min(C,ς)=min(C,αold(2)+αold(1))
α n e w ( 2 ) = { H , α n e w , u n c ( 2 ) > H α n e w , u n c ( 2 ) , L ≤ α n e w , u n c ( 2 ) ≤ H L , α n e w , u n c ( 2 ) < L \alpha^{(2)}_{new} =\begin{cases}H,\ \ \alpha_{new,unc}^{(2)} > H\\\alpha^{(2)}_{new,unc},\ L \leq \alpha_{new,unc}^{(2)} \leq H\\L,\alpha_{new,unc}^{(2)} < L\end{cases} αnew(2)=⎩ ⎨ ⎧H, αnew,unc(2)>Hαnew,unc(2), L≤αnew,unc(2)≤HL,αnew,unc(2)<L
α n e w ( 1 ) = α o l d ( 1 ) + y ( 1 ) y ( 2 ) ( α o l d ( 2 ) − α n e w ( 2 ) ) b n e w ( 1 ) = − E 1 − y ( 1 ) K 11 ( α n e w ( 1 ) − α o l d ( 1 ) ) − y ( 2 ) K 21 ( α n e w ( 2 ) − α o l d ( 2 ) ) + b o l d b n e w ( 2 ) = − E 2 − y ( 1 ) K 12 ( α n e w ( 1 ) − α o l d ( 1 ) ) − y ( 2 ) K 22 ( α n e w ( 2 ) − α o l d ( 2 ) ) + b o l d \alpha_{new}^{(1)} = \alpha_{old}^{(1)} + y^{(1)}y^{(2)}(\alpha_{old}^{(2)} - \alpha_{new}^{(2)})\\ b_{new}^{(1)} = -E_1-y^{(1)}K_{11}(\alpha_{new}^{(1)} - \alpha_{old}^{(1)}) - y^{(2)}K_{21}(\alpha_{new}^{(2)} - \alpha_{old}^{(2)}) + b_{old}\\ b_{new}^{(2)} = -E_2-y^{(1)}K_{12}(\alpha_{new}^{(1)} - \alpha_{old}^{(1)}) - y^{(2)}K_{22}(\alpha_{new}^{(2)} - \alpha_{old}^{(2)}) + b_{old} αnew(1)=αold(1)+y(1)y(2)(αold(2)−αnew(2))bnew(1)=−E1−y(1)K11(αnew(1)−αold(1))−y(2)K21(αnew(2)−αold(2))+boldbnew(2)=−E2−y(1)K12(αnew(1)−αold(1))−y(2)K22(αnew(2)−αold(2))+bold
- 若 0 < α n e w ( 1 ) < C 0 < \alpha_{new}^{(1)} < C 0<αnew(1)<C:
b = b n e w ( 1 ) b = b_{new}^{(1)} b=bnew(1)
- 若 0 < α n e w ( 1 ) < C 0 < \alpha_{new}^{(1)} < C 0<αnew(1)<C:
b = b n e w ( 2 ) b = b_{new}^{(2)} b=bnew(2)
- 其他情况:
b n e w = b n e w ( 1 ) + b n e w ( 2 ) 2 b_{new} = \frac{b_{new}^{(1)} + b_{new}^{(2)}}{2} bnew=2bnew(1)+bnew(2)
Python实现线性支持向量机
- 线性支持向量机中目标函数:
m i n α 1 2 ∑ i = 1 m ∑ j = 1 m α ( i ) α ( j ) y ( i ) y ( j ) ( x ( i ) ⋅ x ( j ) ) − ∑ i = 1 m α ( i ) s . t . { ∑ j = 1 m α ( i ) y ( i ) = 0 0 ≤ α ( i ) ≤ C , i = 1 , 2 , . . . , m \mathop{min}\limits_{\alpha}\frac{1}{2}\sum_{i=1}^m\sum_{j=1}^m\alpha^{(i)}\alpha^{(j)}y^{(i)}y^{(j)}(x^{(i)} \cdot x^{(j)}) - \sum_{i=1}^m\alpha^{(i)}\\ s.t.\ \begin{cases} \sum\limits_{j=1}^{m}\alpha^{(i)}y^{(i)} = 0\\ 0 \leq \alpha^{(i)} \leq C,\ i=1,2,...,m\\ \end{cases} αmin21i=1∑mj=1∑mα(i)α(j)y(i)y(j)(x(i)⋅x(j))−i=1∑mα(i)s.t. ⎩ ⎨ ⎧j=1∑mα(i)y(i)=00≤α(i)≤C, i=1,2,...,m
-
内积 ( x ( i ) ⋅ x ( j ) ) (x^{(i)} \cdot x^{(j)}) (x(i)⋅x(j))可以看成一个线性的核函数,依然可以使用上面推导出来的公式进行计算
-
导入必要包
import numpy as np
import pandas as pd
import statsmodels.api as sm
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing, make_regression
np.random.seed(42)
- 导入数据函数
def load_data(data_path):
df = pd.read_csv(data_path, header=None, sep="\t")
row_num, col_num = df.shape
columns=[]
for i in range(1, col_num):
columns.append('x'+str(i))
columns.append('y')
df.columns = columns
return df
- 随机选择2个样本(作为 α ( 1 ) \alpha^{(1)} α(1), α ( 2 ) \alpha^{(2)} α(2))
def select_alpha(i, m):
temp_list = [a for a in range(m) if a != i]
return np.random.choice(temp_list)
- 对未剪辑的 α ( i ) \alpha^{(i)} α(i)进行剪辑:
def clip_alpha(a_j, H, L):
if a_j > H:
a_j = H
elif L > a_j:
a_j = L
return a_j
- 训练线性支持向量机,得到 α \alpha α值和 b b b值
def smo_simple(df, C, toler, steps):
# 取特征 -> 变为np.array -> 变为np.mat
data = np.mat(df.iloc[:,:-1].values)
# 取标签 -> 变为np.array -> 变为np.mat
label = np.mat(df.iloc[:,-1].values.reshape(-1,1))
# 初始化b
b = 0
# 初始化alpha矩阵
row_num, col_num = data.shape
alpha = np.mat(np.zeros((row_num, 1)))
step = 0
while(step < steps):
alpha_change = 0
for i in range(row_num):
gxi = float(np.multiply(alpha, label).T * (data * data[i,:].T)) + b
ei = gxi - float(label[i])
if ((label[i] * ei < -toler) and (alpha[i] < C)) or (label[i] * ei > toler) and (alpha[i] > 0):
j = select_alpha(i, row_num)
gxj = float(np.multiply(alpha, label).T * (data * data[j,:].T)) + b
ej = gxj - float(label[j])
# 保存以前的alpha值
alpha_i_old = alpha[i].copy()
alpha_j_old = alpha[j].copy()
if label[i] != label[j]:
L = max(0, alpha_j_old - alpha_i_old)
H = min(C, C + alpha_j_old - alpha_i_old)
else:
L = max(0, alpha_j_old + alpha_i_old - C)
H = min(C, alpha_j_old + alpha_i_old)
eta = 2 * data[i,:] * data[j,:].T - data[i,:] * data[i,:].T - data[j,:] * data[j,:].T
alpha[j] = alpha[j] - label[j] * (ei - ej)/eta
alpha[j] = clip_alpha(alpha[j], H, L)
alpha[i] = alpha[i] + label[i] * label[j] * (alpha_j_old - alpha[j])
b1 = b - ei - label[i] * (data[i,:] * data[i,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[i,:].T) * (alpha[j] - alpha_j_old)
b2 = b - ej - label[i] * (data[i,:] * data[j,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[j,:].T) * (alpha[j] - alpha_j_old)
if (0 < alpha[i]) and (C > alpha[i]):
b = b1
elif (0 < alpha[j]) and (C > alpha[j]):
b = b2
else:
b = (b1 + b2)/2.0
alpha_change = alpha_change + 1
if (alpha_change == 0):
step = step + 1
else:
step = 0
return b, alpha
- 计算 w w w值
def calculate_w(alpha, df):
# 取特征 -> 变为np.array -> 变为np.mat
data = np.mat(df.iloc[:,:-1].values)
# 取标签 -> 变为np.array -> 变为np.mat
label = np.mat(df.iloc[:,-1].values.reshape(-1,1))
row_num, col_num = data.shape
# 初始化w值
w = np.zeros((col_num,1))
for i in range(row_num):
w = w + np.multiply(alpha[i] * label[i], data[i,:].T)
return w
- 调用函数
data = load_data('/kaggle/input/studyml/svm1.txt')
b, alpha = smo_simple(data, 0.6, 0.001, 50)
w = calculate_w(alpha, data)
print(b)
print(w)
# 打印输出:[[-3.83574561]]
#[[ 0.81406087]
# [-0.27265396]]
Python实现非线性支持向量机
- 非线性支持向量机只需要将上面计算 ( x ( i ) ⋅ x ( j ) ) (x^{(i)} \cdot x^{(j)}) (x(i)⋅x(j))的地方换成核函数计算方法就行,这里以高斯核函数为例
- 高斯核函数计算函数
def kernel_trans(data, a, k_tup):
row_num, col_num = data.shape
k = np.mat(np.zeros((row_num, 1)))
if k_tup[0] == 'lin':
k = data * a.T
elif k_tup[0] == 'rbf':
for j in range(row_num):
data_temp = data[j, :] - a
k[j] = data_temp * data_temp.T
k = np.exp(k/(-2 * k_tup[1] ** 2))
return k
- 训练SVM函数
def smo_simple(df, C, toler, steps):
# 取特征 -> 变为np.array -> 变为np.mat
data = np.mat(df.iloc[:,:-1].values)
# 取标签 -> 变为np.array -> 变为np.mat
label = np.mat(df.iloc[:,-1].values.reshape(-1,1))
# 初始化b
b = 0
# 初始化alpha矩阵
row_num, col_num = data.shape
alpha = np.mat(np.zeros((row_num, 1)))
step = 0
while(step < steps):
alpha_change = 0
for i in range(row_num):
gxi = float(np.multiply(alpha, label).T * kernel_trans(data, data[i,:], ['rbf', 0.05])) + b
ei = gxi - float(label[i])
if ((label[i] * ei < -toler) and (alpha[i] < C)) or (label[i] * ei > toler) and (alpha[i] > 0):
j = select_alpha(i, row_num)
gxj = float(np.multiply(alpha, label).T * kernel_trans(data, data[j,:], ['rbf', 0.05])) + b
ej = gxj - float(label[j])
# 保存以前的alpha值
alpha_i_old = alpha[i].copy()
alpha_j_old = alpha[j].copy()
if label[i] != label[j]:
L = max(0, alpha_j_old - alpha_i_old)
H = min(C, C + alpha_j_old - alpha_i_old)
else:
L = max(0, alpha_j_old + alpha_i_old - C)
H = min(C, alpha_j_old + alpha_i_old)
eta = 2 * data[i,:] * data[j,:].T - data[i,:] * data[i,:].T - data[j,:] * data[j,:].T
alpha[j] = alpha[j] - label[j] * (ei - ej)/eta
alpha[j] = clip_alpha(alpha[j], H, L)
alpha[i] = alpha[i] + label[i] * label[j] * (alpha_j_old - alpha[j])
b1 = b - ei - label[i] * (data[i,:] * data[i,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[i,:].T) * (alpha[j] - alpha_j_old)
b2 = b - ej - label[i] * (data[i,:] * data[j,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[j,:].T) * (alpha[j] - alpha_j_old)
if (0 < alpha[i]) and (C > alpha[i]):
b = b1
elif (0 < alpha[j]) and (C > alpha[j]):
b = b2
else:
b = (b1 + b2)/2.0
alpha_change = alpha_change + 1
if (alpha_change == 0):
step = step + 1
else:
step = 0
return b, alpha
- 调用函数
b, alpha = smo_simple(data, 0.6, 0.001, 50)
w = calculate_w(alpha, data)
print(b)
print(w)