支持向量机（SVM）_svmc越大-CSDN博客

本文链接：https://blog.csdn.net/weixin_44400573/article/details/96340024

一、什么是支持向量机

逻辑回归根据损失函数来求决策边界

在这里插入图片描述

最大化Margin

在这里插入图片描述

二、Max(margin)

从点到直线的距离推广到点到平面的距离

在这里插入图片描述

三、Soft Margin SVM

在这里插入图片描述

C 越大，相应的容错空间更小 硬间隔
C越小，有更大的容错空间 软间隔

四、实际使用SVM

和KNN一样，要做数据标准化处理

涉及距离的

4.1 导入相关模块和数据集

import numpy as np 
import matplotlib.pyplot as plt
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

X = X[y<2,:2]
y = y[y<2]

4.2 绘制原始数据散点图

plt.figure(figsize=(10,10))
plt.scatter(X[y==0,0],X[y==0,1],color='red')
plt.scatter(X[y==1,0],X[y==1,1],color='blue')
plt.show()

在这里插入图片描述

4.3 对数据进行标准化处理

from sklearn.preprocessing import StandardScaler

standardscaler = StandardScaler()
standardscaler.fit(X)
X_standard = standardscaler.transform(X)

4.4 C = 1e9 训练模型

from sklearn.svm import LinearSVC

svc = LinearSVC(C=1e9)
svc.fit(X_standard,y)

在这里插入图片描述

4.5 绘制决策边界

def plot_decision_boundary(model, axis):
	# meshgrid函数用两个坐标轴上的点在平面上画格，返回坐标矩阵
	X0, X1 = np.meshgrid(
		# 随机两组数，起始值和密度由坐标轴的起始值决定
		np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),
		np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1),
	)
	# ravel()方法将高维数组降为一维数组，c_[]将两个数组以列的形式拼接起来，形成矩阵
	X_grid_matrix = np.c_[X0.ravel(), X1.ravel()]
	
	# 通过训练好的逻辑回归模型，预测平面上这些点的分类
	y_predict = model.predict(X_grid_matrix)
	y_predict_matrix = y_predict.reshape(X0.shape)
	
	# 设置色彩表
	from matplotlib.colors import ListedColormap
	my_colormap = ListedColormap(['#EF9A9A', '#FFF59D', '#90CAF9'])
	
	# 绘制等高线，并且填充等高区域的颜色
	plt.contourf(X0, X1, y_predict_matrix, linewidth=5, cmap=my_colormap)

plt.figure(figsize=(10,10))
plot_decision_boundary(svc,axis=[-3,3,-3,3])
plt.scatter(X_standard[y==0,0],X_standard[y==0,1])
plt.scatter(X_standard[y==1,0],X_standard[y==1,1])
plt.show()

在这里插入图片描述

4.6 C=0.01

svc2 = LinearSVC(C=0.01)
svc2.fit(X_standard,y)

在这里插入图片描述

plt.figure(figsize=(10,10))
plot_decision_boundary(svc2,axis=[-3,3,-3,3])
plt.scatter(X_standard[y==0,0],X_standard[y==0,1])
plt.scatter(X_standard[y==1,0],X_standard[y==1,1])
plt.show()

在这里插入图片描述

svc.coef_ # 特征两个，两个系数

在这里插入图片描述

svc.intercept_ # 截距

在这里插入图片描述

4.7 绘制Soft Margin SVM 决策边界

def plot_svc_decision_boundary(model, axis):
    # meshgrid函数用两个坐标轴上的点在平面上画格，返回坐标矩阵
    X0, X1 = np.meshgrid(
        # 随机两组数，起始值和密度由坐标轴的起始值决定
        np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),
        np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1),
    )
    
    # ravel()方法将高维数组降为一维数组，c_[]将两个数组以列的形式拼接起来，形成矩阵
    X_grid_matrix = np.c_[X0.ravel(), X1.ravel()]
    
    # 通过训练好的逻辑回归模型，预测平面上这些点的分类
    y_predict = model.predict(X_grid_matrix)
    y_predict_matrix = y_predict.reshape(X0.shape)
    
    # 设置色彩表
    from matplotlib.colors import ListedColormap
    my_colormap = ListedColormap(['#EF9A9A', '#FFF59D', '#90CAF9'])
    
    # 绘制等高线，并且填充等高区域的颜色
    plt.contourf(X0, X1, y_predict_matrix, linewidth=5, cmap=my_colormap)
    w = model.coef_[0]
    b = model.intercept_[0]
    #  w0*x0 + w1*x1 +b = 0
    #  x1 = (-wo*x0-b)/w1
    plot_x = np.linspace(axis[0],axis[1],200)
    up_y = -w[0]/w[1]*plot_x - b/w[1] + 1/w[1]
    down_y = -w[0] /w[1] *plot_x -b/w[1] - 1/w[1]
    
    up_index = (up_y >= axis[2]) & (up_y <= axis[3])
    down_index = (down_y >= axis[2]) & (down_y <= axis[3])
    plt.plot(plot_x[up_index], up_y[up_index], color="black")
    plt.plot(plot_x[down_index], down_y[down_index], color="black")

4.7.1 C=1e9

plt.figure(figsize=(10,10))
plot_svc_decision_boundary(svc, axis=[-3, 3, -3, 3])
plt.scatter(X_standard[y == 0, 0], X_standard[y == 0, 1])
plt.scatter(X_standard[y == 1, 0], X_standard[y == 1, 1])
plt.show()

在这里插入图片描述

4.7.2 C=0.01

plt.figure(figsize=(10,10))
plot_svc_decision_boundary(svc2, axis=[-3, 3, -3, 3])
plt.scatter(X_standard[y == 0, 0], X_standard[y == 0, 1])
plt.scatter(X_standard[y == 1, 0], X_standard[y == 1, 1])
plt.show()

在这里插入图片描述

五、SVM中使用多项式特征

5.1 导入数据

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

X,y = datasets.make_moons()

在这里插入图片描述

plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

X,y = datasets.make_moons(noise=0.15,random_state=666)

plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

5.2 使用多项式

from sklearn.preprocessing import PolynomialFeatures,StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline

def PolynomialSVC(degree,C=1.0):
    return Pipeline([
        ('poly',PolynomialFeatures(degree=degree)),
        ('std_scaler',StandardScaler()),
        ('linearSVC',LinearSVC(C=C))
    ])

poly_svc = PolynomialSVC(degree=3)
poly_svc.fit(X,y)

在这里插入图片描述

def plot_decision_boundary(model, axis):
    # meshgrid函数用两个坐标轴上的点在平面上画格，返回坐标矩阵
    X0, X1 = np.meshgrid(
    # 随机两组数，起始值和密度由坐标轴的起始值决定
        np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),
        np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1),
    )
    # ravel()方法将高维数组降为一维数组，c_[]将两个数组以列的形式拼接起来，形成矩阵
    X_grid_matrix = np.c_[X0.ravel(), X1.ravel()]

    # 通过训练好的逻辑回归模型，预测平面上这些点的分类
    y_predict = model.predict(X_grid_matrix)
    y_predict_matrix = y_predict.reshape(X0.shape)

    # 设置色彩表
    from matplotlib.colors import ListedColormap
    my_colormap = ListedColormap(['#EF9A9A', '#FFF59D', '#90CAF9'])

    # 绘制等高线，并且填充等高区域的颜色
    plt.contourf(X0, X1, y_predict_matrix, linewidth=5, cmap=my_colormap)

plt.figure(figsize =(10,10))
plot_decision_boundary(poly_svc,axis=[-1.5,2.5,-1,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

5.3 使用多项式核函数的SVM

from sklearn.svm import SVC

def PolynomialKernelSVC(degree,C=1.0):
    return Pipeline([
        ('std_scaler',StandardScaler()),
        ('kernelSVC',SVC(kernel="poly",degree=degree,C=C))
    ])

poly_kernel_svc = PolynomialKernelSVC(degree=3)
poly_kernel_svc.fit(X,y)

在这里插入图片描述

plt.figure(figsize =(10,10))
plot_decision_boundary(poly_kernel_svc,axis=[-1.5,2.5,-1,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

六、什么是核函数

在这里插入图片描述

七、高斯核函数

在这里插入图片描述

正态分布- 高斯函数

在这里插入图片描述

每一个样本点映射到一个无穷维的特征空间

在这里插入图片描述

7.1 高斯核实现

import numpy as np
import matplotlib.pyplot as plt

x = np.arange(-4,5,1)
y = np.array((x >=-2) & (x<=2),dtype='int')
y

在这里插入图片描述

plt.scatter(x[y==0],[0]*len(x[y==0]))
plt.scatter(x[y==1],[0]*len(x[y==1]))
plt.show()

在这里插入图片描述

def gaussian(x,l):
    gamma = 1.0
    return np.exp(-gamma * (x-l)**2)

l1,l2 = -1,1
X_new = np.empty((len(x),2))
for i,data in enumerate(x):
    X_new[i,0] = gaussian(data,l1)
    X_new[i,1] = gaussian(data,l2)
    
plt.scatter(X_new[y==0,0],X_new[y==0,1],color="red")
plt.scatter(X_new[y==1,0],X_new[y==1,1],color="blue")
plt.show()

在这里插入图片描述

7.2 高斯函数

$σ$ 越大，正态分布的图像越扁平
$σ$ 越小，正态分布的图像越尖锐

核函数中，与高斯函数相反

八、scikit-learn中的RBF核

8.1 导入数据

X,y = datasets.make_moons(noise=0.15,random_state=666)

plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

8.2 Pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

def RBFKernelSVC(gamma = 1.0):
    return Pipeline([
        ('std_scaler',StandardScaler()),
        ('svc',SVC(kernel='rbf',gamma=gamma))
    ])

svc = RBFKernelSVC(gamma=1.0)
svc.fit(X,y)

在这里插入图片描述

8.3 绘制决策边界

def plot_decision_boundary(model, axis):
    # meshgrid函数用两个坐标轴上的点在平面上画格，返回坐标矩阵
    X0, X1 = np.meshgrid(
    # 随机两组数，起始值和密度由坐标轴的起始值决定
        np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),
        np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1),
    )
    # ravel()方法将高维数组降为一维数组，c_[]将两个数组以列的形式拼接起来，形成矩阵
    X_grid_matrix = np.c_[X0.ravel(), X1.ravel()]

    # 通过训练好的逻辑回归模型，预测平面上这些点的分类
    y_predict = model.predict(X_grid_matrix)
    y_predict_matrix = y_predict.reshape(X0.shape)

    # 设置色彩表
    from matplotlib.colors import ListedColormap
    my_colormap = ListedColormap(['#EF9A9A', '#FFF59D', '#90CAF9'])

    # 绘制等高线，并且填充等高区域的颜色
    plt.contourf(X0, X1, y_predict_matrix, linewidth=5, cmap=my_colormap)

8.4 gamma = 1.0

plt.figure(figsize =(10,10))
plot_decision_boundary(svc,axis=[-1.5,2.5,-1,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

8.5 gamma = 100

svc_gamma100 = RBFKernelSVC(gamma=100)
svc_gamma100.fit(X,y)

plt.figure(figsize =(10,10))
plot_decision_boundary(svc_gamma100,axis=[-1.5,2.5,-1,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

8.6 gamma = 10

svc_gamma10 = RBFKernelSVC(gamma=10)
svc_gamma10.fit(X,y)

plt.figure(figsize =(10,10))
plot_decision_boundary(svc_gamma10,axis=[-1.5,2.5,-1,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

8.7 gamma = 0.5

svc_gamma05 = RBFKernelSVC(gamma=0.5)
svc_gamma05.fit(X,y)

plt.figure(figsize =(10,10))
plot_decision_boundary(svc_gamma05,axis=[-1.5,2.5,-1,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

8.8 gamma = 0.1

svc_gamma01 = RBFKernelSVC(gamma=0.1)
svc_gamma01.fit(X,y)

plt.figure(figsize =(10,10))
plot_decision_boundary(svc_gamma01,axis=[-1.5,2.5,-1,1.5])
plt.scatter(X[y==0,0],X[y==0,1])
plt.scatter(X[y==1,0],X[y==1,1])
plt.show()

在这里插入图片描述

九、SVM解决回归问题

在这里插入图片描述

在margin中的点越多越好

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVR,SVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


boston = datasets.load_boston()
X = boston.data
y = boston.target

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=666)

def StandardLinearSVR(epsilon=0.1):
    return Pipeline([
        ('std_scaler',StandardScaler()),
        ('linearSVR',LinearSVR(epsilon=epsilon))
    ])

svr = StandardLinearSVR()
svr.fit(X_train,y_train)

在这里插入图片描述