1. SVM 支撑向量机
Support Vector Machine
寻找一个最优的决策边界 – 距离两个类别最近的样本最远
最近的两类别样本叫做支撑向量
2、Hard Margin
求出d,在推导后得出
正则化
使用SVM
c越大容错空间越小,偏于hard margin
c越小容错空间越大,偏于soft margin
首先要数据标准化处理,否则y在判断时会有相当大的偏差
from sklearn.svm import LinearSVC # 在SVM中引入线性SVM
svc_hard = LinearSVC(C=1e9) # 此时C值较大,为hard margin
svc_hard.fit(x_train, y_train)
def plot_svc_decision_boundary(model, axis): # 绘制边界图以及边界线
x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1], int((axis[1]-axis[0])*100)).reshape(-1, 1),
np.linspace(axis[2], axis[3], int((axis[3]-axis[2])*100)).reshape(-1, 1),
)
X_new = np.c_[x0.ravel(), x1.ravel()]
y_predict = model.predict(X_new)
zz = y_predict.reshape(x0.shape)
from matplotlib.colors import ListedColormap
custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)
w = model.coef_[0]
b = model.intercept_[0]
# w0*x0 + w1*x1 + b = 0
# => x1 = -w0/w1 * x0 - b/w1
plot_x = np.linspace(axis[0], axis[1], 200)
up_y = -w[0]/w[1] * plot_x - b/w[1] + 1/w[1]
down_y = -w[0]/w[1] * plot_x - b/w[1] - 1/w[1]
up_index = (up_y >= axis[2]) & (up_y <= axis[3])
down_index = (down_y >= axis[2]) & (down_y <= axis[3])
plt.plot(plot_x[up_index], up_y[up_index], color='black') # 黑的上边界线
plt.plot(plot_x[down_index], down_y[down_index], color='black') # 黑色的下边界线
plot_svc_decision_boundary(svc_hard, axis=[3,9,1,5]) # 绘制图形
plt.scatter(x_train[y_train==0, 0], x_train[y_train==0, 1])
plt.scatter(x_train[y_train==1, 0], x_train[y_train==1, 1])
plt.show()
svc_soft = LinearSVC(C=0.01) # C值偏小则可容错空间变大,更加的泛化
svc_soft.fit(x_train, y_train)
plot_svc_decision_boundary(svc_soft, axis=[3,8,1,5])
plt.scatter(x_train[y_train==0, 0], x_train[y_train==0, 1])
plt.scatter(x_train[y_train==1, 0], x_train[y_train==1, 1])
plt.show()
多项式SVM
# 生成数据
from sklearn.svm import LinearSVC # 在SVM中引入线性LinearSVC
from sklearn.datasets import make_moons
x, y = make_moons(noise=0.15)
from sklearn.preprocessing import PolynomialFeatures # 设置多项式SVM
from sklearn.pipeline import Pipeline
def Polynomialsvm(degree, C=1):
return Pipeline([
( 'poly', PolynomialFeatures(degree=degree) ),
( 'svm', LinearSVC(C=C) )
])
# 绘制出分割图形
svm = Polynomialsvm(3, 0.1)
svm.fit(x,y)
plot_decision_boundary(svm, axis=[-2,3,-0.8,1.5])
plt.scatter(x[y==0, 0], x[y==0, 1])
plt.scatter(x[y==1, 0], x[y==1, 1])
plt.show()
多项式核函数的SVM
from sklearn.svm import SVC # 引入SVC
svm_kernel = SVC(kernel='poly', degree=3, C=1) # 设置kernel='poly', degree=3, C=1
svm_kernel.fit(x, y)
# 此时曲线更加偏直相对比LinearSVC
高斯核函数 RBF核
将每一样本点映射到一个无穷维的特征空间
对数据升维,将线性不可分的一维数据映射为二维数据,变得可分
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def svc_kernal(gamma):
return Pipeline([
('standard', StandardScaler()),
('svc', SVC(kernel='rbf', gamma=gamma)) # 设置kernel='rbf'高斯核函数,传入参数gamma
])
svc = svc_kernal(gamma=1)
svc.fit(x, y)
使用SVM解决回归问题
思路:和之前的分类问题相反,上下边界包括的点越多越准确
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
lin_svr = LinearSVR(epsilon=0.1)