从零逐步实现SVM(含公式推导)下

前言

SMO算法推导结果

g ( x ) = ∑ i = 1 m α ( i ) y ( i ) K ( x ( i ) , x ) + b E i = g ( x ( i ) ) − y ( i ) = ( ∑ j = 1 m α ( j ) y ( j ) K ( x ( j ) , x ( i ) ) + b ) − y ( i ) η = K 11 + K 22 − K 12 g(x) = \sum_{i=1}^m\alpha^{(i)}y^{(i)}K(x^{(i)},x) + b\\ E_i = g(x^{(i)}) - y^{(i)} = \left(\sum_{j=1}^m\alpha^{(j)}y^{(j)}K(x^{(j)},x^{(i)}) + b\right) - y^{(i)}\\ \eta = K_{11} + K_{22} - K_{12} g(x)=i=1mα(i)y(i)K(x(i),x)+bEi=g(x(i))y(i)=(j=1mα(j)y(j)K(x(j),x(i))+b)y(i)η=K11+K22K12

α n e w , u n c ( 2 ) = α o l d ( 2 ) + y ( 2 ) ( E 1 − E 2 ) η \alpha_{new,unc}^{(2)} = \alpha_{old}^{(2)} + \frac{y^{(2)}(E_1 - E_2)}{\eta} αnew,unc(2)=αold(2)+ηy(2)(E1E2)

  • y ( 1 ) ≠ y 2 y^{(1)} \neq y^{2} y(1)=y2

L = m a x ( 0 , − ς ) = m a x ( 0 , α o l d ( 2 ) − α o l d ( 1 ) ) H = m i n ( C , C − ς ) = m i n ( C , C + α o l d ( 2 ) − α o l d ( 1 ) ) L = max(0,-\varsigma) = max(0,\alpha_{old}^{(2)} - \alpha_{old}^{(1)})\\ H = min(C,C-\varsigma) = min(C, C + \alpha_{old}^{(2)} - \alpha_{old}^{(1)}) L=max(0,ς)=max(0,αold(2)αold(1))H=min(C,Cς)=min(C,C+αold(2)αold(1))

  • y ( 1 ) = y ( 2 ) y^{(1)} = y^{(2)} y(1)=y(2)

L = m a x ( 0 , ς − C ) = m a x ( 0 , α o l d ( 2 ) + α o l d ( 1 ) − C ) H = m i n ( C , ς ) = m i n ( C , α o l d ( 2 ) + α o l d ( 1 ) ) L = max(0,\varsigma-C) = max(0,\alpha^{(2)}_{old} + \alpha_{old}^{(1)} - C)\\ H = min(C, \varsigma) = min(C, \alpha^{(2)}_{old} + \alpha_{old}^{(1)}) L=max(0,ςC)=max(0,αold(2)+αold(1)C)H=min(C,ς)=min(C,αold(2)+αold(1))

α n e w ( 2 ) = { H ,    α n e w , u n c ( 2 ) > H α n e w , u n c ( 2 ) ,   L ≤ α n e w , u n c ( 2 ) ≤ H L , α n e w , u n c ( 2 ) < L \alpha^{(2)}_{new} =\begin{cases}H,\ \ \alpha_{new,unc}^{(2)} > H\\\alpha^{(2)}_{new,unc},\ L \leq \alpha_{new,unc}^{(2)} \leq H\\L,\alpha_{new,unc}^{(2)} < L\end{cases} αnew(2)= H,  αnew,unc(2)>Hαnew,unc(2), Lαnew,unc(2)HL,αnew,unc(2)<L

α n e w ( 1 ) = α o l d ( 1 ) + y ( 1 ) y ( 2 ) ( α o l d ( 2 ) − α n e w ( 2 ) ) b n e w ( 1 ) = − E 1 − y ( 1 ) K 11 ( α n e w ( 1 ) − α o l d ( 1 ) ) − y ( 2 ) K 21 ( α n e w ( 2 ) − α o l d ( 2 ) ) + b o l d b n e w ( 2 ) = − E 2 − y ( 1 ) K 12 ( α n e w ( 1 ) − α o l d ( 1 ) ) − y ( 2 ) K 22 ( α n e w ( 2 ) − α o l d ( 2 ) ) + b o l d \alpha_{new}^{(1)} = \alpha_{old}^{(1)} + y^{(1)}y^{(2)}(\alpha_{old}^{(2)} - \alpha_{new}^{(2)})\\ b_{new}^{(1)} = -E_1-y^{(1)}K_{11}(\alpha_{new}^{(1)} - \alpha_{old}^{(1)}) - y^{(2)}K_{21}(\alpha_{new}^{(2)} - \alpha_{old}^{(2)}) + b_{old}\\ b_{new}^{(2)} = -E_2-y^{(1)}K_{12}(\alpha_{new}^{(1)} - \alpha_{old}^{(1)}) - y^{(2)}K_{22}(\alpha_{new}^{(2)} - \alpha_{old}^{(2)}) + b_{old} αnew(1)=αold(1)+y(1)y(2)(αold(2)αnew(2))bnew(1)=E1y(1)K11(αnew(1)αold(1))y(2)K21(αnew(2)αold(2))+boldbnew(2)=E2y(1)K12(αnew(1)αold(1))y(2)K22(αnew(2)αold(2))+bold

  • 0 < α n e w ( 1 ) < C 0 < \alpha_{new}^{(1)} < C 0<αnew(1)<C

b = b n e w ( 1 ) b = b_{new}^{(1)} b=bnew(1)

  • 0 < α n e w ( 1 ) < C 0 < \alpha_{new}^{(1)} < C 0<αnew(1)<C

b = b n e w ( 2 ) b = b_{new}^{(2)} b=bnew(2)

  • 其他情况:

b n e w = b n e w ( 1 ) + b n e w ( 2 ) 2 b_{new} = \frac{b_{new}^{(1)} + b_{new}^{(2)}}{2} bnew=2bnew(1)+bnew(2)

Python实现线性支持向量机

  • 线性支持向量机中目标函数:

m i n α 1 2 ∑ i = 1 m ∑ j = 1 m α ( i ) α ( j ) y ( i ) y ( j ) ( x ( i ) ⋅ x ( j ) ) − ∑ i = 1 m α ( i ) s . t .   { ∑ j = 1 m α ( i ) y ( i ) = 0 0 ≤ α ( i ) ≤ C ,   i = 1 , 2 , . . . , m \mathop{min}\limits_{\alpha}\frac{1}{2}\sum_{i=1}^m\sum_{j=1}^m\alpha^{(i)}\alpha^{(j)}y^{(i)}y^{(j)}(x^{(i)} \cdot x^{(j)}) - \sum_{i=1}^m\alpha^{(i)}\\ s.t.\ \begin{cases} \sum\limits_{j=1}^{m}\alpha^{(i)}y^{(i)} = 0\\ 0 \leq \alpha^{(i)} \leq C,\ i=1,2,...,m\\ \end{cases} αmin21i=1mj=1mα(i)α(j)y(i)y(j)(x(i)x(j))i=1mα(i)s.t.  j=1mα(i)y(i)=00α(i)C, i=1,2,...,m

  • 内积 ( x ( i ) ⋅ x ( j ) ) (x^{(i)} \cdot x^{(j)}) (x(i)x(j))可以看成一个线性的核函数,依然可以使用上面推导出来的公式进行计算

  • 导入必要包

import numpy as np
import pandas as pd
import statsmodels.api as sm
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing, make_regression

np.random.seed(42)
  • 导入数据函数
def load_data(data_path):
    df = pd.read_csv(data_path, header=None, sep="\t")
    row_num, col_num = df.shape
    columns=[]
    for i in range(1, col_num):
        columns.append('x'+str(i))
    columns.append('y')
    df.columns = columns
    return df
  • 随机选择2个样本(作为 α ( 1 ) \alpha^{(1)} α(1) α ( 2 ) \alpha^{(2)} α(2)
def select_alpha(i, m):
    temp_list = [a for a in range(m) if a != i]
    return np.random.choice(temp_list)
  • 对未剪辑的 α ( i ) \alpha^{(i)} α(i)进行剪辑:
def clip_alpha(a_j, H, L):
    if a_j > H:
        a_j = H
    elif L > a_j:
        a_j = L
    return a_j
  • 训练线性支持向量机,得到 α \alpha α值和 b b b
def smo_simple(df, C, toler, steps):
    # 取特征 -> 变为np.array -> 变为np.mat
    data = np.mat(df.iloc[:,:-1].values)
    # 取标签 -> 变为np.array -> 变为np.mat
    label = np.mat(df.iloc[:,-1].values.reshape(-1,1))
    
    # 初始化b
    b = 0
    # 初始化alpha矩阵
    row_num, col_num = data.shape
    alpha = np.mat(np.zeros((row_num, 1)))
    
    step = 0
    while(step < steps):
        alpha_change = 0
        for i in range(row_num):
            gxi = float(np.multiply(alpha, label).T * (data * data[i,:].T)) + b
            ei = gxi - float(label[i])
            if ((label[i] * ei < -toler) and (alpha[i] < C)) or (label[i] * ei > toler) and (alpha[i] > 0):
                j = select_alpha(i, row_num)
                gxj = float(np.multiply(alpha, label).T * (data * data[j,:].T)) + b
                ej = gxj - float(label[j])
                
                # 保存以前的alpha值
                alpha_i_old = alpha[i].copy()
                alpha_j_old = alpha[j].copy()
                
                if label[i] != label[j]:
                    L = max(0, alpha_j_old - alpha_i_old)
                    H = min(C, C + alpha_j_old - alpha_i_old)
                else:
                    L = max(0, alpha_j_old + alpha_i_old - C)
                    H = min(C, alpha_j_old + alpha_i_old)
                
                eta = 2 * data[i,:] * data[j,:].T - data[i,:] * data[i,:].T - data[j,:] * data[j,:].T
                
                alpha[j] = alpha[j] - label[j] * (ei - ej)/eta
                alpha[j] = clip_alpha(alpha[j], H, L)
                
                alpha[i] = alpha[i] + label[i] * label[j] * (alpha_j_old - alpha[j])
                b1 = b - ei - label[i] * (data[i,:] * data[i,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[i,:].T) * (alpha[j] - alpha_j_old)
                b2 = b - ej - label[i] * (data[i,:] * data[j,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[j,:].T) * (alpha[j] - alpha_j_old)
                
                if (0 < alpha[i]) and (C > alpha[i]):
                    b = b1
                elif (0 < alpha[j]) and (C > alpha[j]):
                    b = b2
                else:
                    b = (b1 + b2)/2.0
                
                alpha_change = alpha_change + 1
                
        if (alpha_change == 0):
            step = step + 1
        else:
            step = 0
    return b, alpha
  • 计算 w w w
def calculate_w(alpha, df):
    # 取特征 -> 变为np.array -> 变为np.mat
    data = np.mat(df.iloc[:,:-1].values)
    # 取标签 -> 变为np.array -> 变为np.mat
    label = np.mat(df.iloc[:,-1].values.reshape(-1,1))
    
    row_num, col_num = data.shape
    
    # 初始化w值
    w = np.zeros((col_num,1))
    for i in range(row_num):
        w = w + np.multiply(alpha[i] * label[i], data[i,:].T)
    return w
  • 调用函数
data = load_data('/kaggle/input/studyml/svm1.txt')
b, alpha = smo_simple(data, 0.6, 0.001, 50)
w = calculate_w(alpha, data)
print(b)
print(w)
# 打印输出:[[-3.83574561]]
#[[ 0.81406087]
# [-0.27265396]]

Python实现非线性支持向量机

  • 非线性支持向量机只需要将上面计算 ( x ( i ) ⋅ x ( j ) ) (x^{(i)} \cdot x^{(j)}) (x(i)x(j))的地方换成核函数计算方法就行,这里以高斯核函数为例
  • 高斯核函数计算函数
def kernel_trans(data, a, k_tup):
    row_num, col_num = data.shape
    k = np.mat(np.zeros((row_num, 1)))
    if k_tup[0] == 'lin':
        k = data * a.T
    elif k_tup[0] == 'rbf':
        for j in range(row_num):
            data_temp = data[j, :] - a
            k[j] = data_temp * data_temp.T
        k = np.exp(k/(-2 * k_tup[1] ** 2))
    return k
  • 训练SVM函数
def smo_simple(df, C, toler, steps):
    # 取特征 -> 变为np.array -> 变为np.mat
    data = np.mat(df.iloc[:,:-1].values)
    # 取标签 -> 变为np.array -> 变为np.mat
    label = np.mat(df.iloc[:,-1].values.reshape(-1,1))
    
    # 初始化b
    b = 0
    # 初始化alpha矩阵
    row_num, col_num = data.shape
    alpha = np.mat(np.zeros((row_num, 1)))
    
    step = 0
    while(step < steps):
        alpha_change = 0
        for i in range(row_num):
            gxi = float(np.multiply(alpha, label).T * kernel_trans(data, data[i,:], ['rbf', 0.05])) + b
            ei = gxi - float(label[i])
            if ((label[i] * ei < -toler) and (alpha[i] < C)) or (label[i] * ei > toler) and (alpha[i] > 0):
                j = select_alpha(i, row_num)
                gxj = float(np.multiply(alpha, label).T * kernel_trans(data, data[j,:], ['rbf', 0.05])) + b
                ej = gxj - float(label[j])
                
                # 保存以前的alpha值
                alpha_i_old = alpha[i].copy()
                alpha_j_old = alpha[j].copy()
                
                if label[i] != label[j]:
                    L = max(0, alpha_j_old - alpha_i_old)
                    H = min(C, C + alpha_j_old - alpha_i_old)
                else:
                    L = max(0, alpha_j_old + alpha_i_old - C)
                    H = min(C, alpha_j_old + alpha_i_old)
                
                eta = 2 * data[i,:] * data[j,:].T - data[i,:] * data[i,:].T - data[j,:] * data[j,:].T
                
                alpha[j] = alpha[j] - label[j] * (ei - ej)/eta
                alpha[j] = clip_alpha(alpha[j], H, L)
                
                alpha[i] = alpha[i] + label[i] * label[j] * (alpha_j_old - alpha[j])
                b1 = b - ei - label[i] * (data[i,:] * data[i,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[i,:].T) * (alpha[j] - alpha_j_old)
                b2 = b - ej - label[i] * (data[i,:] * data[j,:].T) * (alpha[i] - alpha_i_old) - label[j] * (data[j,:] * data[j,:].T) * (alpha[j] - alpha_j_old)
                
                if (0 < alpha[i]) and (C > alpha[i]):
                    b = b1
                elif (0 < alpha[j]) and (C > alpha[j]):
                    b = b2
                else:
                    b = (b1 + b2)/2.0
                
                alpha_change = alpha_change + 1
                
        if (alpha_change == 0):
            step = step + 1
        else:
            step = 0
    return b, alpha
  • 调用函数
b, alpha = smo_simple(data, 0.6, 0.001, 50)
w = calculate_w(alpha, data)
print(b)
print(w)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

羽星_s

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值