# 支持向量机

### Demo实践

#### Step 1 库函数导入

#基础函数库
import numpy as np
#导入画图库
import matplotlib.pyplot as plt
import seaborn as sns
#导入逻辑回归模型函数
from sklearn  import svm


#### Step 2 构建数据集并进行模型训练

#Demo演示LogisticRegression分类
#构造数据集
x_fearures = np.array([[-1, -2], [-2, -1], [-3, -2], [1, 3], [2, 1], [3, 2]])
y_label = np.array([0, 0, 0, 1, 1, 1])
#调用SVC模型 （支持向量机分类）
svc = svm.SVC(kernel='linear')
#用SVM模型拟合构造的数据集
svc = svc.fit(x_fearures, y_label)


#### Step 3 模型参数查看

#查看其对应模型的w
print('the weight of Logistic Regression:',svc.coef_)
#查看其对应模型的w0
print('the intercept(w0) of Logistic Regression:',svc.intercept_)


#### Step 4 模型预测

#模型预测
y_train_pred = svc.predict(x_fearures)
print('The predction result:',y_train_pred)


#### Step 5 模型可视化

#最佳函数
x_range = np.linspace(-3, 3)
w = svc.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_range - (svc.intercept_[0]) / w[1]
#可视化决策边界
plt.figure()
plt.scatter(x_fearures[:,0],x_fearures[:,1], c=y_label, s=50, cmap='viridis')
plt.plot(x_range, y_3, '-c')
plt.show()


### 支持向量机的介绍

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
%matplotlib inline
#画图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=60, cmap=plt.cm.Paired)


#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
x_fit = np.linspace(0, 3)
#画函数
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
y_2 = -0.3 * x_fit + 3
plt.plot(x_fit, y_2, '-k')


#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
plt.scatter([3], [2.8], c='#cccc00', marker='<', s=100, cmap=plt.cm.Paired)
x_fit = np.linspace(0, 3)
#画函数
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
y_2 = -0.3 * x_fit + 3
plt.plot(x_fit, y_2, '-k')


#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
x_fit = np.linspace(0, 3)
#画函数
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
#画边距
plt.fill_between(x_fit, y_1 - 0.6, y_1 + 0.6, edgecolor='none', color='#AAAAAA', alpha=0.4)
y_2 = -0.3 * x_fit + 3
plt.plot(x_fit, y_2, '-k')
plt.fill_between(x_fit, y_2 - 0.4, y_2 + 0.4, edgecolor='none', color='#AAAAAA', alpha=0.4)


**

#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
#画图
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
#画边距
plt.fill_between(x_fit, y_1 - 0.6, y_1 + 0.6, edgecolor='none', color='#AAAAAA', alpha=0.4)


from sklearn.svm import SVC
#SVM 函数
clf = SVC(kernel='linear')
clf.fit(X, y)


#最佳函数
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_fit - (clf.intercept_[0]) / w[1]
#最大边距 下届
b_down = clf.support_vectors_[0]
y_down = a* x_fit + b_down[1] - a * b_down[0]
#最大边距 上届
b_up = clf.support_vectors_[-1]
y_up = a* x_fit + b_up[1] - a * b_up[0]
#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
#画函数
plt.plot(x_fit, y_3, '-c')
#画边距
plt.fill_between(x_fit, y_down, y_up, edgecolor='none', color='#AAAAAA', alpha=0.4)
#画支持向量
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none')


### 软间隔

#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.9)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)


#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.9)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
#惩罚参数：C=1
clf = SVC(C=1, kernel='linear')
clf.fit(X, y)
#最佳函数
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_fit - (clf.intercept_[0]) / w[1]
#最大边距 下届
b_down = clf.support_vectors_[0]
y_down = a* x_fit + b_down[1] - a * b_down[0]
#最大边距 上届
b_up = clf.support_vectors_[-1]
y_up = a* x_fit + b_up[1] - a * b_up[0]
#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
#画函数
plt.plot(x_fit, y_3, '-c')
#画边距
plt.fill_between(x_fit, y_down, y_up, edgecolor='none', color='#AAAAAA', alpha=0.4)
#画支持向量
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none')


X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.9)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
#惩罚参数：C=0.2
clf = SVC(C=0.2, kernel='linear')
clf.fit(X, y)
x_fit = np.linspace(-1.5, 4)
#最佳函数
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_fit - (clf.intercept_[0]) / w[1]
#最大边距 下届
b_down = clf.support_vectors_[10]
y_down = a* x_fit + b_down[1] - a * b_down[0]
#最大边距 上届
b_up = clf.support_vectors_[1]
y_up = a* x_fit + b_up[1] - a * b_up[0]
#画散点图
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
#画函数
plt.plot(x_fit, y_3, '-c')
#画边距
plt.fill_between(x_fit, y_down, y_up, edgecolor='none', color='#AAAAAA', alpha=0.4)
#画支持向量
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none')


### 超平面

from sklearn.datasets.samples_generator import make_circles
#画散点图
X, y = make_circles(100, factor=.1, noise=.1, random_state=2019)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
clf = SVC(kernel='linear').fit(X, y)
#最佳函数
x_fit = np.linspace(-1.5, 1.5)
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*X - (clf.intercept_[0]) / w[1]
plt.plot(X, y_3, '-c')


#画图
X, y = make_circles(100, factor=.1, noise=.1, random_state=2019)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
clf = SVC(kernel='rbf')
clf.fit(X, y)
ax = plt.gca()
x = np.linspace(-1, 1)
y = np.linspace(-1, 1)
x_1, y_1 = np.meshgrid(x, y)
P = np.zeros_like(x_1)
for i, xi in enumerate(x):
for j, yj in enumerate(y):
P[i, j] = clf.decision_function(np.array([[xi, yj]]))
ax.contour(x_1, y_1, P, colors='k', levels=[-1, 0, 0.9], alpha=0.5,
linestyles=['--', '-', '--'])
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none');


### 思考与讨论

1.什么是支持向量

w T x i + b ≥ + 1 , y i = + 1 w T + x i + b ≤ − 1 , y i = − 1 w^Tx_i+b\geq +1,y_i=+1\\ w^T+x_i+b\leq-1,y_i=-1

γ = 2 ∣ ∣ w ∣ ∣ \gamma=\frac{2}{||w||}

#绘制所有点
if( ! require("ISLR") ){ install.packages("ISLR") }
if( ! require("e1071") ){ install.packages("e1071") }
set.seed(0)
DF = data.frame( x1=c(3,2,4,1,2,4,4), x2=c(4,2,4,4,1,3,1),
y=as.factor(c(rep(1,4),rep(0,3))) )
colors = c( rep('red',4), rep('blue',3) )
plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' )
grid()
#超平面斜率
slope = ( 3.5 - 1.5 ) / ( 4 - 2 )
#超平面截距
intercept = -2 * slope + 1.5
#上边界
slope_m_upper = slope
intercept_m_upper = -2 * slope_m_upper + 2
#下边界
slope_m_lower = slope
intercept_m_lower = -2 * slope_m_lower + 1
#画图
plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' )
grid()
#画边界线及超平面
abline(a=intercept,b=slope,col='black')
abline(a=intercept_m_upper,b=slope_m_upper,col='gray',lty=2,lwd=2)
abline(a=intercept_m_lower,b=slope_m_lower,col='gray',lty=2,lwd=2)
grid()


2.支持向量机的推导

m a x w , b   2 ∣ ∣ w ∣ ∣ s . t . y i ( w T x i + b ) ≥ 1 , i = 1 , 2 , … , m \begin{aligned} &max_{w,b}\ \frac{2}{||w||}\\ &s.t.\quad y_i(w^Tx_i+b) \geq1,\quad i=1,2,…,m \end{aligned}

m i n w , b   1 2 ∣ ∣ w ∣ ∣ 2 s . t . y i ( w T x i + b ) ≥ 1 , i = 1 , 2 , … , m \begin{aligned} &min_{w,b}\ \frac12||w||^2\\ &s.t.\quad y_i(w^Tx_i+b) \geq1,\quad i=1,2,…,m \end{aligned}

3.SVM的损失函数
SVM的损失函数为
①合页损失(hinge loss)，损失函数定义如下：
L i = ∑ j ≠ y i m a x ( 0 , s j − s y i + Δ ) L_i=\sum_{j\not = y_i}max(0,s_j-s_{y_i}+\Delta)

L i = m a x ( 0 , − 7 − 13 + 10 ) + m a x ( 0 , 11 − 13 + 10 ) = 8 L_i=max(0,-7-13+10)+max(0,11-13+10)=8

② 指数损失(exponential loss)： l e x p ( z ) = e x p ( − z ) l_{exp}(z)=exp(-z)
③ 对率损失(logistic loss)： l l o g ( z ) = l o g ( 1 + e x p ( − z ) ) l_{log}(z)=log(1+exp(-z))

f ( x ) = s g n [ P r ( Y = + 1 ∣ x ) − 1 2 ] f(x)=sgn[Pr(Y=+1|x)-\frac12]

4.SVM的核函数有哪些，核函数的作用是什么

① 线性核 k ( x , x ′ ) = < x , x ′ > k(x,x^{'})=<x,x^{'}>
② 多项式核 k ( x , x ′ ) = ( 1 + < x , x ′ > ) d k(x,x^{'})=(1+<x,x^{'}>)^d
③ 径向基核 k ( x , x ′ ) = e x p ( − γ ∣ ∣ x − x − x ′ ∣ ∣ ) 2 k(x,x^{'})=exp(-\gamma||x-x-x^{'}||)^2
④ 高斯核 k ( x , x ′ ) = e x p ( − ∣ ∣ x − x ′ ∣ ∣ 2 2 σ 2 ) k(x,x^{'})=exp(\frac{-||x-x^{'}||^2}{2\sigma^2})
⑤ 神经网络核 k ( x , x ′ ) = t a n h ( k , < x , x ′ + k 2 > ) k(x,x^{'})=tanh(k,<x,x^{'}+k_2>)

5.硬间隔和软间隔

y i ( w T x i + b ) ≥ 1 y_i(w^Tx_i+b)\ge 1

m i n w , b 1 2 ∣ ∣ w ∣ ∣ 2 + C ∑ i = 1 m l 0 / 1 ( y i ( w T x i + b ) − 1 ) min_{w,b}\frac12||w||^2+C\sum_{i=1}^ml_{0/1}(y_i(w^Tx_i+b)-1)

l 0 / 1 ( z ) = 1 , i f z > 0 0 , o t h e r w i s e \begin{aligned} l_{0/1}(z)=&1,\quad if z>0\\ &0,\quad otherwise \end{aligned}

m i n w , b 1 2 ∣ ∣ w ∣ ∣ 2 + C ∑ i = 1 m m a x ( 0 , 1 − y i ( w T x i + b ) ) min_{w,b}\frac12||w||^2+C\sum_{i=1}^m max(0,1-y_i(w^Tx_i+b))

m i n w , b , ξ i 1 2 ∣ ∣ w ∣ ∣ 2 + C ∑ i = 1 m ξ i min_{w,b,\xi_i}\frac12||w||^2+C\sum_{i=1}^m\xi_i

s . t .   y i ( w T x i + b ) ≥ 1 − ξ i ξ i ≥ 0 ,   i = 1 , 2 , … , m \begin{aligned} s.t.\ &y_i(w^Tx_i+b)\ge1-\xi_i\\ &\xi_i\ge0,\ i=1,2,…,m \end{aligned}

6.SVM可以做多分类吗，怎么做

①直接法：直接在目标函数上进行修改，将多个分类面的参数求解合并到一个最优化问题中，通过求解该最优化问题“一次性”实现多分类。这种方法看似简单，但其计算复杂度比较高，实现起来比较困难，只适合用于小型问题中
②间接法：通过组合多个二分类器来实现多分类器的构造，常见的方法有一对一和一对多两种。
1)一对一为在任意两类样本之间设计一个SVM，当对一个未知样本进行分类时，得票最多的类别为该未知样本的类别；
2)一对多为训练时依次把某个类别的样本归为一类，其他剩余的归为另一类，分类时将未知样本分类为具有最大分类函数值的那一类。
7.SVM可以做回归吗，怎么做

8.SVM的对偶问题，为什么要把原问题转化为对偶问题

9.KKT限制条件有哪些

L p = 1 2 ∣ ∣ w ∣ ∣ 2 + C ∑ i = 1 n ξ i − ∑ i = 1 n α i [ y i ( w T x i + w 0 ) − ( 1 − ξ i ) ] − ∑ i = 1 n μ i ξ i L_p=\frac12||w||^2+C\sum_{i=1}^n\xi_i-\sum_{i=1}^n\alpha_i[y_i(w^Tx_i+w_0)-(1-\xi_i)]-\sum{i=1}^n\mu_i\xi_i

w = ∑ n α i y i x i , 0 = ∑ n α i y i , α i = c − μ i α i [ y i ( w T x i + w 0 ) − ( 1 − ξ i ) ] = 0 μ i ξ i = 0 y i ( w T x i + w 0 ) ≥ 1 − ξ i ,   ξ i ≥ 0 α i ≥ 0 , μ i ≥ 0 \begin{aligned} &w=\sum^n\alpha_iy_ix_i,\quad 0=\sum^n\alpha_iy_i,\quad \alpha_i=c-\mu_i\\ &\alpha_i[y_i(w^Tx_i+w_0)-(1-\xi_i)]=0\\ &\mu_i\xi_i=0\\ &y_i(w^Tx_i+w_0)\ge 1-\xi_i, \ \xi_i\ge0\\ &\alpha_i\ge0,\mu_i\ge0 \end{aligned}

α i = 0 y i g ( x i ) ≥ 1 0 < α i < C y i g ( x i ) = 1 x j = C y i g ( x i ) ≤ 1 \begin{aligned} &\alpha_i=0\quad y_ig(x_i)\ge1\\ &0<\alpha_i<C\quad y_ig(x_i)=1\\ &x_j=C\quad y_ig(x_i)\le1 \end{aligned}

10.假如你在训练SVM时发现模型过拟合了，应该怎样调参数？

11.LDA与SVM什么条件下等价？

LDA是从多分类出发提出的，其求解有不同方法，但都是一个优化的结果。有的教材是贝叶斯定理，本质上是最大化后验概率，周志华书上是一个二次规划思想。在二分类情形与SVM很相近，但由于优化目标函数不同，结果通常是不同的。两个优化问题的解系数成比例时，二者的分类结果就是等价的。

06-01 108万+
08-23 17万+

04-11 7万+
07-05 5万+
06-30 3万+
09-28 8万+
03-28 12万+
03-31 4万+