from sklearn.datasets import make_blobs
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
X,y = make_blobs(n_samples=50,centers=2,random_state=0,cluster_std=0.6)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plt.xticks([])
plt.yticks([])
plt.show()
#获取平面上两条坐标轴的最大值和最小值
xlim = ax.get_xlim()
ylim = ax.get_ylim()
#在最大值和最小值之间形成30个规律的数据
axisx = np.linspace(xlim[0],xlim[1],30)
axisy = np.linspace(ylim[0],ylim[1],30)
axisx
array([-0.7425579 , -0.60066997, -0.45878204, -0.31689411, -0.17500618,
-0.03311826, 0.10876967, 0.2506576 , 0.39254553, 0.53443346,
0.67632139, 0.81820931, 0.96009724, 1.10198517, 1.2438731 ,
1.38576103, 1.52764896, 1.66953689, 1.81142481, 1.95331274,
2.09520067, 2.2370886 , 2.37897653, 2.52086446, 2.66275238,
2.80464031, 2.94652824, 3.08841617, 3.2303041 , 3.37219203])
axisy,axisx = np.meshgrid(axisy,axisx)
axisx #我们将使用这里形成的二维数组作为我们contour函数中的X和Y
#使用meshgrid函数将两个一维向量转换为特征矩阵
#核心是将两个特征向量广播,以便获取y.shape * x.shape这么多个坐标点的横坐标和纵坐标
array([[-0.7425579 , -0.7425579 , -0.7425579 , ..., -0.7425579 ,
-0.7425579 , -0.7425579 ],
[-0.7425579 , -0.7425579 , -0.7425579 , ..., -0.7425579 ,
-0.7425579 , -0.7425579 ],
[-0.7425579 , -0.7425579 , -0.7425579 , ..., -0.7425579 ,
-0.7425579 , -0.7425579 ],
...,
[ 3.37219203, 3.37219203, 3.37219203, ..., 3.37219203,
3.37219203, 3.37219203],
[ 3.37219203, 3.37219203, 3.37219203, ..., 3.37219203,
3.37219203, 3.37219203],
[ 3.37219203, 3.37219203, 3.37219203, ..., 3.37219203,
3.37219203, 3.37219203]])
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T #其中ravel()是降维函数,vstack能够将多个结构一致的一维数组按行堆叠起来
#xy就是已经形成的网格,它是遍布在整个画布上的密集的点
plt.scatter(xy[:,0],xy[:,1],s=1,cmap="rainbow") #理解函数meshgrid和vstack的作用
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca()
#建模,通过fit计算出对应的决策边界
clf = SVC(kernel="linear").fit(X,y)
Z = clf.decision_function(xy).reshape(axisx.shape)
#重要接口decision_function,返回每个输入的样本所对应的到决策边界的距离
#然后再将这个距离转换为axisx的结构,这是由于画图的函数contour要求Z的结构必须与X和Y保持一致
#画决策边界和平行于决策边界的超平面
ax.contour(axisx,axisy,Z,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"]) #画三条等高线,分别是Z为-1,Z为0和Z为1的三条线
ax.set_xlim(xlim)
ax.set_ylim(ylim)
#记得Z的本质么?是输入的样本到决策边界的距离,而contour函数中的level其实是输入了这个距离
#让我们用一个点来试试看
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plt.scatter(X[10,0],X[10,1],c="black",s=50,cmap="rainbow")
clf.decision_function(X[10].reshape(1,2))
结果:
array([-3.33917354])
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca()
ax.contour(axisx,axisy,Z,colors="k",levels=[-3.33917354],alpha=0.5,linestyles=["--"])
#将上述过程包装成函数
def plot_svc_decision_function(model,ax=None):
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
x = np.linspace(xlim[0],xlim[1],30)
y = np.linspace(ylim[0],ylim[1],30)
Y,X = np.meshgrid(y,x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape)
ax.contour(X, Y, P,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
clf.predict(X)
clf.score(X,y)
print(clf.support_vectors_)
#返回支持向量
clf.n_support_
#返回每个类中支持向量的个数
结果:
array([1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0])
1.0
[[0.44359863 3.11530945]
[2.33812285 3.43116792]
[2.06156753 1.96918596]]
array([2, 1], dtype=int32)
使用环形数据
from sklearn.datasets import make_circles
X,y = make_circles(100,factor=0.1,noise=.1)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plt.show()
from sklearn.datasets import make_circles
X,y = make_circles(100,factor=0.1,noise=.1)
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
很明显,现在线性SVM已经不适合于我们的状况了,我们无法找出一条直线来划分我们的数据集,让直线的两边分别 是两种类别。这个时候,如果我们能够在原本的X和y的基础上,添加一个维度r,变成三维,我们可视化这个数据,来看看添加维度让我们的数据如何变化。
r = np.exp(-(X**2).sum(1))
rlim = np.linspace(min(r),max(r),100)
from mpl_toolkits import mplot3d
def plot_3D(elev=30,azim=30,X=X,y=y):
ax = plt.subplot(projection="3d")
ax.scatter3D(X[:,0],X[:,1],r,c=y,s=50,cmap='rainbow')
ax.view_init(elev=elev,azim=azim)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("r")
plt.show()
from ipywidgets import interact,fixed
interact(plot_3D,elev=[0,30],azip=(-180,180),X=fixed(X),y=fixed(y))
plt.show()
此时我们的数据在三维空间中,我们的超平面就是一个二维平面。明显我们可以用一个平面将两类数据隔开,这个 平面就是我们的决策边界了。我们刚才做的,计算r,并将r作为数据的第三维度来将数据升维的过程,被称为“核变换”,即是将数据投影到高维空间中,以寻找能够将数据完美分割的超平面,即是说寻找能够让数据线性可分的高维空间。为了详细解释这个过程,我们需要引入SVM中的核心概念:核函数。
clf = SVC(kernel = "rbf").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import svm
from sklearn.datasets import make_circles , make_moons, make_blobs,make_classification
n_samples = 100
datasets = [
make_moons(n_samples=n_samples, noise=0.2, random_state=0),
make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),
make_blobs(n_samples=n_samples, centers=2, random_state=5),
make_classification(n_samples=n_samples,n_features =2,n_informative=2,n_redundant=0, random_state=5)
]
Kernel = ["linear","poly","rbf","sigmoid"]
#四个数据集分别是什么样子呢?
for X,Y in datasets:
plt.figure(figsize=(5,4))
plt.scatter(X[:,0],X[:,1],c=Y,s=50,cmap="rainbow")
[*enumerate(datasets)]
#index,(X,Y) = [(索引,array([特征矩阵X],[标签Y]))]
结果:
[(0,
(array([[-1.09443462e-02, 9.89784876e-01],
[ 1.96749886e+00, -1.10921978e-01],
[ 9.18923151e-01, -7.87831621e-03],
[-1.97813183e-02, 3.67422878e-02],
[ 8.97047211e-01, -5.26043067e-01],
[ 2.05087697e+00, 4.82966687e-01],
[ 5.52592656e-01, 5.10008493e-01],
[ 9.36108682e-01, -6.67176177e-01],
[-8.57905150e-03, 3.44030710e-01],
[ 1.79962867e+00, 3.22578165e-01],
[-1.79739813e-01, 5.12417381e-01],
[ 1.96928635e+00, -1.84060982e-01],
[ 1.40757108e+00, -6.55885144e-01],
[ 1.04103920e+00, 1.04537944e+00],
[ 6.11861752e-01, 5.09315861e-01],
[-3.59476500e-01, 1.05930036e+00],
[ 2.54029695e-01, 1.15116524e+00],
[ 2.13555501e-01, 8.82321641e-01],
[-3.30880800e-01, 8.04221145e-01],
[ 1.06603845e+00, 5.32174106e-01],
[-7.06988363e-01, 5.63246401e-01],
[ 4.02928450e-01, -1.95330382e-01],
[ 7.83527128e-01, 5.65637444e-01],
[ 7.29264348e-01, -4.64258931e-01],
[-7.61131674e-01, -1.74321350e-03],
[-8.30401440e-01, -2.33952062e-01],
[ 7.12873757e-01, 3.33441281e-01],
[ 2.12091446e+00, 1.51388354e-01],
[ 1.76738365e+00, -1.38842428e-02],
[ 1.35429861e+00, -2.35239859e-01],
[ 3.82226943e-01, -1.29870625e-01],
[ 1.15255238e+00, -8.36624186e-01],
[ 1.85603425e+00, -2.25641253e-02],
[ 4.78053620e-01, -3.54658215e-01],
[ 4.65065876e-02, 5.22966374e-01],
[-1.68749515e-01, 9.97161466e-01],
[ 2.17677252e-01, 9.71890153e-01],
[ 1.45168696e-01, 2.06362619e-01],
[-6.04440255e-02, 4.86891449e-02],
[ 1.00652060e+00, -5.83659180e-01],
[ 1.34599608e+00, -8.74713518e-03],
[ 5.07344926e-01, -3.11872588e-01],
[-8.84426881e-01, 1.75672048e-01],
[-1.00353955e+00, 2.54679349e-01],
[ 1.00682339e+00, 3.36434579e-01],
[ 8.11581056e-01, 1.19684303e+00],
[ 6.05383054e-01, 1.34346598e+00],
[-5.25267589e-01, 6.67755643e-01],
[-9.36918623e-01, 3.24010896e-01],
[ 8.32721148e-01, 2.07541427e-01],
[ 1.56011397e+00, -1.61052076e-03],
[-2.00343863e-01, -1.71769945e-01],
[ 8.14368163e-01, 2.98144383e-01],
[-5.33016793e-01, 7.25851388e-01],
[ 1.39949996e-01, 5.16100416e-01],
[ 1.30241869e-01, 2.73900710e-01],
[ 6.05976627e-01, 8.71416086e-01],
[-3.55599199e-01, 4.28344752e-01],
[ 1.80905518e-01, 1.21324092e+00],
[-6.86271500e-02, 4.98563121e-01],
[ 6.91482442e-01, 7.02335678e-01],
[-3.83113433e-01, 9.66746318e-01],
[ 2.98656366e-01, -1.83495206e-01],
[ 1.17897990e-01, -2.31064511e-01],
[ 9.04410734e-01, -6.86183692e-01],
[ 1.27108202e+00, -3.39556126e-01],
[-2.52941845e-01, 9.36590815e-01],
[ 1.58149755e+00, -5.26620862e-01],
[ 7.04126938e-01, 6.45019632e-01],
[ 2.05387806e+00, -4.99221849e-01],
[ 2.78958975e-01, 8.79248341e-01],
[-7.28199738e-01, 9.21967277e-01],
[-9.21538389e-01, 4.83269613e-02],
[ 2.01257720e+00, 2.06208601e-01],
[ 2.09649727e+00, 4.53952338e-01],
[ 4.55121438e-01, -5.98476065e-01],
[ 3.25942701e-01, 1.06336046e+00],
[ 1.80917678e+00, 3.67632943e-01],
[ 7.05843148e-01, -4.60516884e-01],
[ 1.24497910e+00, -4.89751662e-01],
[-1.02501239e-01, 1.16337954e+00],
[-6.13951804e-01, 9.35134524e-01],
[ 1.32828610e+00, -2.76080239e-01],
[-9.10782155e-01, 4.00675696e-01],
[-8.86192869e-01, -1.04843093e-01],
[ 1.59251994e-01, -3.50710293e-02],
[ 1.74397698e-01, -4.22503039e-02],
[ 7.28423493e-01, 4.19640376e-01],
[ 1.70911154e+00, -1.84104334e-01],
[ 5.76918298e-01, -2.31102480e-01],
[ 1.68451790e+00, -3.48465285e-01],
[-7.71915511e-01, 6.67699774e-01],
[ 2.75312996e-01, 8.48999530e-01],
[ 1.42394089e+00, -6.57814970e-01],
[ 1.05961304e+00, 5.80782652e-01],
[ 1.79348845e+00, 4.94760237e-01],
[ 3.24902726e-01, -5.63880560e-01],
[ 7.60779589e-01, -3.73787587e-01],
[-1.20959690e+00, 3.24700620e-01],
[-1.18089701e+00, 2.27800799e-01]]),
array([0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0,
1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0]))),
(1,
(array([[-0.38289117, -0.09084004],
[-0.02096229, -0.47787416],
[-0.39611596, -1.28942694],
[-0.61813018, -0.06383715],
[ 0.70347838, -0.18703837],
[-0.45970463, 0.69477465],
[-0.45091682, -0.71570524],
[-0.45562004, -0.13406016],
[-0.26513904, 0.40812871],
[-0.15474648, 0.41406973],
[ 0.231206 , -0.53275899],
[ 0.15623875, -0.8678088 ],
[ 0.51647541, 0.48940995],
[ 0.68707007, -0.02334129],
[ 0.54759869, -0.16482373],
[-0.32179572, -0.80553536],
[-1.07478639, -0.733362 ],
[ 0.76758455, -0.43498783],
[-0.47475234, -0.33813186],
[ 0.15420656, 1.07306032],
[ 0.65916696, 0.20773634],
[-0.77795003, 0.1326555 ],
[ 0.47025454, -0.31228748],
[-0.04246799, 0.26555446],
[-0.72405954, 0.48807185],
[-0.36960005, -1.06514028],
[ 0.17833327, -0.49718972],
[-0.93927864, -0.41951638],
[ 0.50914152, -0.70977467],
[-0.05569852, -0.82162607],
[-0.11214579, 0.72197044],
[ 0.80463921, -0.15221296],
[ 0.08261487, -0.11749021],
[ 0.20349541, -0.37396789],
[ 0.13864693, -0.23905642],
[ 0.32785307, -1.00769037],
[ 0.88944061, -0.39117628],
[-0.05837947, 0.28487039],
[-1.0673653 , 0.2204006 ],
[-0.60071345, -0.69545189],
[-0.03972324, -0.40936056],
[ 0.39742085, 0.20621162],
[-0.36941154, 0.0129811 ],
[ 0.03573703, 0.46666229],
[-0.56814999, -0.41288419],
[ 0.41047299, -0.73640868],
[ 0.88249707, -0.69004404],
[ 0.06579822, -0.50458395],
[-0.75737223, -0.0724028 ],
[ 0.18316966, 0.08722007],
[ 0.67248314, -0.41892665],
[ 0.25898723, 0.39688645],
[-1.1312983 , 0.4810614 ],
[ 1.0592844 , 0.64490287],
[ 0.41019663, 0.38790198],
[ 0.95142029, -0.04089983],
[-0.60492988, 0.43950906],
[ 0.23314762, -0.81785711],
[ 0.91067331, 0.30702075],
[-0.45026472, -0.03724104],
[-0.81396121, -0.64733959],
[-0.23191338, 0.50533992],
[-0.59760983, 0.28023168],
[ 0.73960166, -0.84270281],
[ 0.57294659, -0.31198928],
[ 0.24821133, -0.54784509],
[ 0.52127802, 0.94108005],
[ 0.33973198, 0.10609978],
[ 1.05339036, -0.02197593],
[ 0.01327466, -0.63379502],
[ 0.2422589 , 0.49032064],
[-0.89266612, 0.6345076 ],
[ 0.1672566 , 0.23548462],
[-0.05611705, 0.38834099],
[ 0.84695486, 0.81435811],
[ 0.29976195, -0.07943031],
[-0.1404762 , 0.72486032],
[-0.05482024, 0.18417328],
[-0.24643884, -0.43283337],
[-0.23460645, 0.6409442 ],
[-1.13184893, -0.61964942],
[-0.92413821, -0.45302089],
[ 0.2225745 , 0.77052597],
[-0.69453765, 0.53014147],
[-1.0362509 , 0.77339965],
[ 0.51880585, 0.30152232],
[-0.77429541, 0.02553767],
[ 0.71468326, 0.56869015],
[-0.33875274, 0.46826063],
[-0.34749244, 0.13441418],
[ 1.12980796, 0.04281936],
[-0.38308979, 0.79116812],
[-0.07425141, 0.2184625 ],
[-0.44945202, -0.05722266],
[ 0.85783288, 0.63778888],
[-0.47486203, -0.22498112],
[ 0.12627243, 0.86978412],
[-0.64736458, -0.36342437],
[ 0.47440459, 1.01101585],
[-0.38565772, -0.81031183]]),
array([1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0,
1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1,
0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0]))),
(2,
(array([[-5.66730056, 9.6747529 ],
[-5.02967294, 8.6596218 ],
[-6.46936898, 6.82300947],
[-6.75290119, 7.20976961],
[-4.14673856, 7.63590025],
[-5.38927072, 7.95803989],
[-4.64344634, 7.55830615],
[-4.46345027, 8.83731915],
[-4.68748196, 7.21252795],
[-5.84140615, 8.13223642],
[-5.36728768, 9.39035577],
[-5.95944934, 8.31654712],
[-4.4207936 , 7.90908652],
[-6.90411213, 8.20050911],
[-4.81440963, 8.19155371],
[-4.95550633, 8.99006291],
[-4.76708326, 6.78307449],
[-3.12936539, 7.16255399],
[-6.43201576, 6.99213819],
[-7.12970024, 7.7990026 ],
[-7.61935775, 9.00251464],
[-4.40874557, 9.27197713],
[-4.41862884, 6.91289058],
[-5.8659896 , 6.93691471],
[-6.93064951, 8.76263877],
[-5.56001672, 8.89406765],
[-3.47162189, 7.76156545],
[-6.51197348, 8.920786 ],
[-5.89647284, 7.31403178],
[-5.04948453, 8.09078804],
[-5.65507444, 8.71871991],
[-6.16662202, 7.25381173],
[-3.73877184, 6.98761474],
[-7.4653154 , 9.24937097],
[-5.9958405 , 8.38682543],
[-5.80084772, 6.59052267],
[-6.43266806, 8.78589697],
[-5.64701218, 8.97617842],
[-6.27060303, 7.19945832],
[-7.22492511, 6.71446709],
[-7.55929193, 8.67586662],
[-5.92496887, 5.98552041],
[-6.37587295, 8.88947751],
[-7.43787746, 8.95340227],
[-6.15353935, 8.20288407],
[-6.54074446, 6.55779297],
[-4.42173579, 8.81517442],
[-5.61244473, 7.66386378],
[-5.81966167, 7.88508551],
[-4.90351711, 7.53945295],
[-5.39791168, 8.47356295],
[-6.86121319, 9.27910763],
[-6.32089689, 6.7034829 ],
[-7.64676819, 7.62431237],
[-5.1184643 , 9.83531364],
[-5.36709327, 7.66612429],
[-6.23402642, 6.12310003],
[-5.06657892, 7.91513345],
[-5.56633149, 7.31357851],
[-5.50099233, 7.05133525],
[-4.54171545, 8.47599756],
[-8.20123871, 7.20493971],
[-6.15849651, 7.17122642],
[-5.61579958, 9.51942024],
[-4.76771396, 7.58541058],
[-6.00874331, 8.14802833],
[-4.37107585, 7.3410528 ],
[-6.13764981, 8.56685089],
[-5.36247649, 8.7494947 ],
[-5.45052674, 8.99712724],
[-7.09800301, 9.09917142],
[-4.90935504, 7.7337076 ],
[-5.40795882, 10.61018377],
[-4.41488335, 8.97908848],
[-5.45939839, 7.7700846 ],
[-4.93810069, 10.01217222],
[-6.00556657, 6.93252594],
[-4.99200386, 7.42740444],
[-7.07131614, 8.05949363],
[-5.29052417, 8.70660951],
[-6.27313606, 7.5595934 ],
[-4.56369675, 8.12706739],
[-3.79200136, 9.08201602],
[-6.71208551, 10.89302579],
[-7.34687609, 8.35527284],
[-4.65333349, 7.73756566],
[-5.63928794, 6.72181978],
[-5.37253335, 7.08477617],
[-5.36599885, 7.22067391],
[-8.41982454, 8.20401253],
[-3.63234608, 8.33751606],
[-4.12716808, 9.83742304],
[-6.75503763, 7.07126671],
[-5.55684774, 7.30871568],
[-5.91896553, 8.01811773],
[-7.14524008, 8.4087608 ],
[-6.72483849, 6.0175721 ],
[-6.24123774, 6.95029361],
[-6.87090971, 6.72508089],
[-6.27460923, 7.61738757]]),
array([0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0,
0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1]))),
(3,
(array([[-9.33292655e-01, -1.27122049e+00],
[-7.66789307e-01, 1.77992147e+00],
[ 9.03096345e-01, 1.44981703e+00],
[-1.58918963e+00, -8.32751056e-02],
[ 9.28016429e-01, -1.23398104e+00],
[ 1.35985803e+00, 1.51790366e+00],
[ 5.64146897e-01, 8.34197749e-01],
[ 3.38467944e-01, -1.54519884e+00],
[-1.21399342e+00, 4.89499469e-01],
[-3.51523809e-01, 1.05604000e+00],
[ 1.42192482e+00, 1.30870092e+00],
[ 1.18748491e+00, 9.31400904e-01],
[ 8.19270631e-01, 1.48514069e+00],
[-1.59251151e+00, 4.85137234e-02],
[ 1.85219944e-01, 1.53870339e+00],
[ 1.02943234e+00, -1.81742875e+00],
[ 1.65055723e+00, -1.19890744e+00],
[-3.69716837e-01, 1.67597179e+00],
[-1.77851943e+00, 4.23377975e-01],
[-4.81676607e-01, -2.04460722e+00],
[ 2.19893663e+00, 2.99254258e-01],
[ 4.16115190e-01, -7.85493949e-01],
[-2.74842332e+00, 2.23314001e-01],
[ 6.72127536e-01, -1.39095875e+00],
[ 8.39858504e-01, 1.55542390e+00],
[ 1.12529264e+00, 2.74200610e-01],
[-1.22108822e+00, -4.41750034e-01],
[ 2.78739455e+00, -1.61112841e-03],
[ 3.53203076e-01, 1.12240129e-01],
[ 3.10603047e-01, -1.81672854e+00],
[ 1.86080686e+00, -6.41503310e-01],
[ 8.94746914e-01, -7.35762193e-01],
[-1.07707343e+00, -9.91615820e-01],
[-9.57932339e-01, -9.56397414e-01],
[-1.96082646e-01, -7.15486741e-01],
[ 3.23109232e+00, -6.33780279e-01],
[-1.38817524e+00, -2.48688592e-01],
[-1.06976424e+00, 9.66055142e-01],
[ 2.28064080e-01, -1.01741538e+00],
[ 5.71846228e-01, 1.17039597e+00],
[-9.47886334e-01, -1.09494698e+00],
[-2.28879834e+00, 9.91856748e-01],
[ 2.79871886e-01, 7.12484148e-01],
[-1.14485872e+00, -7.50568271e-01],
[-8.08416962e-01, -1.35525390e+00],
[ 1.15874630e+00, 7.35684201e-02],
[-1.27191176e+00, 6.74231628e-01],
[-6.68826500e-01, 1.40599404e+00],
[-2.33048236e+00, 7.96103665e-01],
[-1.27620109e+00, 6.92027964e-01],
[-9.52783841e-01, -1.08481428e+00],
[ 9.24379467e-02, 1.44550825e+00],
[ 3.32723963e-01, -1.06577791e+00],
[-1.02970093e+00, -8.31163579e-01],
[ 1.30603973e+00, 4.14757047e-01],
[-1.71750956e+00, 5.00547184e-01],
[-1.99566063e+00, 1.00972464e+00],
[ 7.13394018e-01, 1.88985067e+00],
[-1.79392778e+00, 1.63824076e+00],
[ 5.19077120e-01, -5.54142502e-01],
[ 1.41713525e+00, -1.13359774e+00],
[-8.48825636e-01, -1.14486125e+00],
[ 1.81154534e+00, 4.97164230e-01],
[-2.14823656e+00, 7.33412925e-01],
[ 1.06731305e+00, 1.63059220e+00],
[-1.73590855e+00, 3.20189254e-01],
[ 1.02467738e+00, -1.25475559e+00],
[ 2.21517183e+00, -5.96284790e-01],
[ 1.14752935e+00, 8.57570034e-01],
[-1.55245701e+00, -3.36171152e-02],
[ 2.55716114e+00, -2.99789421e-01],
[ 7.94490859e-01, 8.74280300e-01],
[-1.16336461e+00, 2.32337605e-01],
[-3.94536291e-01, 1.19313340e+00],
[-1.13456945e+00, -7.49047339e-01],
[ 1.00666441e+00, -1.55511830e+00],
[-2.34109735e+00, 4.52143651e-01],
[-7.90107843e-01, -1.40539861e+00],
[-2.49600849e-02, 1.01556610e+00],
[ 1.88879091e+00, -1.27159355e+00],
[ 2.01763813e+00, -6.80769118e-01],
[-7.21544129e-01, -1.56319001e+00],
[ 3.28795018e-01, 9.88507261e-01],
[-2.39486480e+00, 8.87560308e-01],
[-9.37827394e-01, 8.55915239e-01],
[ 5.93709424e-01, -8.83762427e-01],
[ 2.68917440e-01, 1.76442408e+00],
[-1.61918481e+00, 4.36660291e-01],
[-2.25592991e-01, -2.72499883e+00],
[ 2.81655475e-01, 6.83987133e-01],
[-8.51055761e-01, -1.22979364e+00],
[ 1.74848466e+00, 1.17389035e+00],
[ 1.12440472e+00, 5.85181570e-01],
[-5.00014133e-01, 1.58890117e+00],
[ 7.13425522e-01, 1.11017445e+00],
[ 5.70065432e-01, 1.58754104e+00],
[-1.10825112e+00, -5.84147570e-01],
[-1.04162242e+00, -8.80162451e-01],
[ 7.51042801e-01, -1.50398321e+00],
[ 8.50404935e-01, -7.86309791e-01]]),
array([0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,
1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1])))]
nrows=len(datasets)
ncols=len(Kernel) + 1
fig, axes = plt.subplots(nrows, ncols,figsize=(20,16))
#第一层循环:在不同的数据集中循环
for ds_cnt, (X,Y) in enumerate(datasets):
#在图像中的第一列,放置原数据的分布
ax = axes[ds_cnt, 0]
if ds_cnt == 0:
ax.set_title("Input data")
ax.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired,edgecolors='k')
ax.set_xticks(())
ax.set_yticks(())
#第二层循环:在不同的核函数中循环
#从图像的第二列开始,一个个填充分类结果
for est_idx, kernel in enumerate(Kernel):
#定义子图位置
ax = axes[ds_cnt, est_idx + 1]
#建模
clf = svm.SVC(kernel=kernel, gamma=2).fit(X, Y)
score = clf.score(X, Y)
#绘制图像本身分布的散点图
ax.scatter(X[:, 0], X[:, 1], c=Y
,zorder=10
,cmap=plt.cm.Paired,edgecolors='k')
#绘制支持向量
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=50, facecolors='none', zorder=10, edgecolors='k')
#绘制决策边界
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
#np.mgrid,合并了我们之前使用的np.linspace和np.meshgrid的用法
#一次性使用最大值和最小值来生成网格
#表示为[起始值:结束值:步长]
#如果步长是复数,则其整数部分就是起始值和结束值之间创建的点的数量,并且结束值被包含在内
XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
#np.c_,类似于np.vstack的功能
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()]).reshape(XX.shape)
#填充等高线不同区域的颜色
ax.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
#绘制等高线
ax.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'], levels=[-1, 0, 1])
#设定坐标轴为不显示
ax.set_xticks(())
ax.set_yticks(())
#将标题放在第一行的顶上
if ds_cnt == 0:
ax.set_title(kernel)
#为每张图添加分类的分数
ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0')
, size=15
, bbox=dict(boxstyle='round', alpha=0.8, facecolor='white')
#为分数添加一个白色的格子作为底色
, transform=ax.transAxes #确定文字所对应的坐标轴,就是ax子图的坐标轴本身
, horizontalalignment='right' #位于坐标轴的什么方向
)
plt.tight_layout()
plt.show()
结果:
可以观察到,线性核函数和多项式核函数在非线性数据上表现会浮动,如果数据相对线性可分,则表现不错,如果是像环形数据那样彻底不可分的,则表现糟糕。在线性数据集上,线性核函数和多项式核函数即便有扰动项也可以表现不错,可见多项式核函数是虽然也可以处理非线性情况,但更偏向于线性的功能。
Sigmoid核函数就比较尴尬了,它在非线性数据上强于两个线性核函数,但效果明显不如rbf,它在线性数据上完全 比不上线性的核函数们,对扰动项的抵抗也比较弱,所以它功能比较弱小,很少被用到。
rbf,高斯径向基核函数基本在任何数据集上都表现不错,属于比较万能的核函数。我个人的经验是,无论如何先试试看高斯径向基核函数,它适用于核转换到很高的空间的情况,在各种情况下往往效果都很不错,如果rbf效果不好,那我们再试试看其他的核函数。另外,多项式核函数多被用于图像处理之中。