SVM练习

最新推荐文章于 2024-03-13 15:36:43 发布

潘诺西亚的火山

最新推荐文章于 2024-03-13 15:36:43 发布

阅读量806

点赞数

文章标签：机器学习

本文链接：https://blog.csdn.net/helldoger/article/details/107399369

版权

from sklearn.datasets import make_blobs
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np

X,y = make_blobs(n_samples=50, centers=2, random_state=0,cluster_std=0.6)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")#rainbow彩虹色
plt.xticks([])
plt.yticks([])
plt.show()

#首先要有散点图
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca() #获取当前的子图，如果不存在，则创建新的子图

在这里插入图片描述

#获取平面上两条坐标轴的最大值和最小值
xlim = ax.get_xlim()
ylim = ax.get_ylim()
 
#在最大值和最小值之间形成30个规律的数据
axisx = np.linspace(xlim[0],xlim[1],30)
axisy = np.linspace(ylim[0],ylim[1],30)
 
axisy,axisx = np.meshgrid(axisy,axisx)
#我们将使用这里形成的二维数组作为我们contour函数中的X和Y
#使用meshgrid函数将两个一维向量转换为特征矩阵
#核心是将两个特征向量广播，以便获取y.shape * x.shape这么多个坐标点的横坐标和纵坐标
 
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T
#其中ravel()是降维函数，vstack能够将多个结构一致的一维数组按行堆叠起来
#xy就是已经形成的网格，它是遍布在整个画布上的密集的点
 
plt.scatter(xy[:,0],xy[:,1],s=1,cmap="rainbow")
 
#理解函数meshgrid和vstack的作用
a = np.array([1,2,3])
b = np.array([7,8])
#两两组合，会得到多少个坐标？
#答案是6个，分别是 (1,7),(2,7),(3,7),(1,8),(2,8),(3,8)
 
v1,v2 = np.meshgrid(a,b)
 
v1
 
v2
 
v = np.vstack([v1.ravel(), v2.ravel()]).T

在这里插入图片描述

#建模，通过fit计算出对应的决策边界
clf = SVC(kernel = "linear").fit(X,y)#计算出对应的决策边界
Z = clf.decision_function(xy).reshape(axisx.shape)
#重要接口decision_function，返回每个输入的样本所对应的到决策边界的距离
#然后再将这个距离转换为axisx的结构，这是由于画图的函数contour要求Z的结构必须与X和Y保持一致

#首先要有散点图
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca() #获取当前的子图，如果不存在，则创建新的子图
#画决策边界和平行于决策边界的超平面
ax.contour(axisx,axisy,Z
           ,colors="k"
           ,levels=[-1,0,1] #画三条等高线，分别是Z为-1，Z为0和Z为1的三条线
           ,alpha=0.5#透明度
           ,linestyles=["--","-","--"])
 
ax.set_xlim(xlim)#设置x轴取值
ax.set_ylim(ylim)

(-0.43660961990940284, 5.772756283035797)

在这里插入图片描述

#记得Z的本质么？是输入的样本到决策边界的距离，而contour函数中的level其实是输入了这个距离
#让我们用一个点来试试看
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plt.scatter(X[10,0],X[10,1],c="black",s=50,cmap="rainbow")

<matplotlib.collections.PathCollection at 0x2462e456828>

在这里插入图片描述

clf.decision_function(X[10].reshape(1,2))
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca()
ax.contour(axisx,axisy,Z
            ,colors="k"
            ,levels=[-3.33917354]
            ,alpha=0.5
            ,linestyles=["--"])

<matplotlib.contour.QuadContourSet at 0x2462e4b46d8>

在这里插入图片描述

#将上述过程包装成函数：
def plot_svc_decision_function(model,ax=None):
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    x = np.linspace(xlim[0],xlim[1],30)
    y = np.linspace(ylim[0],ylim[1],30)
    Y,X = np.meshgrid(y,x) 
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)
    
    ax.contour(X, Y, P,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"]) 
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
 
#则整个绘图过程可以写作：
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)

在这里插入图片描述

clf.predict(X)
#根据决策边界，对X中的样本进行分类，返回的结构为n_samples
 
clf.score(X,y)
#返回给定测试数据和标签的平均准确度
 
clf.support_vectors_
#返回支持向量坐标
 
clf.n_support_#array([2, 1])
#返回每个类中支持向量的个数

array([2, 1])

from sklearn.datasets import make_circles
X,y = make_circles(100, factor=0.1, noise=.1)
 
X.shape
 
y.shape
 
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plt.show()

在这里插入图片描述

clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
clf.score(X,y)

0.68

在这里插入图片描述

#定义一个由x计算出来的新维度r
r = np.exp(-(X**2).sum(1))
 
rlim = np.linspace(min(r),max(r),100)
 
from mpl_toolkits import mplot3d
 
#定义一个绘制三维图像的函数
#elev表示上下旋转的角度
#azim表示平行旋转的角度
def plot_3D(elev=30,azim=30,X=X,y=y):
    ax = plt.subplot(projection="3d")
    ax.scatter3D(X[:,0],X[:,1],r,c=y,s=50,cmap='rainbow')
    ax.view_init(elev=elev,azim=azim)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("r")
    plt.show()
    
plot_3D()

在这里插入图片描述

#如果放到jupyter notebook中运行
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
 
from sklearn.datasets import make_circles
X,y = make_circles(100, factor=0.1, noise=.1)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
 
def plot_svc_decision_function(model,ax=None):
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    x = np.linspace(xlim[0],xlim[1],30)
    y = np.linspace(ylim[0],ylim[1],30)
    Y,X = np.meshgrid(y,x) 
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)
    
    ax.contour(X, Y, P,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"])
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
 
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
 
r = np.exp(-(X**2).sum(1))
 
rlim = np.linspace(min(r),max(r),0.2)
 
from mpl_toolkits import mplot3d
 
def plot_3D(elev=30,azim=30,X=X,y=y):
    ax = plt.subplot(projection="3d")
    ax.scatter3D(X[:,0],X[:,1],r,c=y,s=50,cmap='rainbow')
    ax.view_init(elev=elev,azim=azim)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("r")
    plt.show()
 
from ipywidgets import interact,fixed
interact(plot_3D,elev=[0,30,60,90],azip=(-180,180),X=fixed(X),y=fixed(y))
plt.show()

f:\Anaconda3\lib\site-packages\ipykernel_launcher.py:32: DeprecationWarning: object of type <class 'float'> cannot be safely interpreted as an integer.

在这里插入图片描述

interactive(children=(Dropdown(description='elev', index=1, options=(0, 30, 60, 90), value=30), IntSlider(valu…

clf = SVC(kernel = "rbf").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)

f:\Anaconda3\lib\site-packages\sklearn\svm\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-HqjpYKFo-1594926610385)(output_13_1.png)]

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import svm#from sklearn.svm import SVC  两者都可以
from sklearn.datasets import make_circles, make_moons, make_blobs,make_classification

n_samples = 100
 
datasets = [
    make_moons(n_samples=n_samples, noise=0.2, random_state=0),
    make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),
    make_blobs(n_samples=n_samples, centers=2, random_state=5),#分簇的数据集
    make_classification(n_samples=n_samples,n_features = 2,n_informative=2,n_redundant=0, random_state=5)
                #n_features：特征数，n_informative：带信息的特征数，n_redundant：不带信息的特征数
    ]
 
Kernel = ["linear","poly","rbf","sigmoid"]
 
#四个数据集分别是什么样子呢？
for X,Y in datasets:
    plt.figure(figsize=(5,4))
    plt.scatter(X[:,0],X[:,1],c=Y,s=50,cmap="rainbow")

在这里插入图片描述

nrows=len(datasets)
ncols=len(Kernel) + 1
 
fig, axes = plt.subplots(nrows, ncols,figsize=(20,16))

在这里插入图片描述

[*enumerate(datasets)] == list(enumerate(datasets))#  enumerate、map、zip都可以这样展开
# index，(X,Y) = [(索引, array([特矩阵征X],[标签Y]))]

True

nrows=len(datasets)
ncols=len(Kernel) + 1
 
fig, axes = plt.subplots(nrows, ncols,figsize=(20,16))

#第一层循环：在不同的数据集中循环
for ds_cnt, (X,Y) in enumerate(datasets):
    
    #在图像中的第一列，放置原数据的分布
    ax = axes[ds_cnt, 0]
    if ds_cnt == 0:
        ax.set_title("Input data")
    ax.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired,edgecolors='k')
    ax.set_xticks(())
    ax.set_yticks(())
    
    #第二层循环：在不同的核函数中循环
    #从图像的第二列开始，一个个填充分类结果
    for est_idx, kernel in enumerate(Kernel):
        
        #定义子图位置
        ax = axes[ds_cnt, est_idx + 1]
        
        #建模
        clf = svm.SVC(kernel=kernel, gamma=2).fit(X, Y)
        score = clf.score(X, Y)
        
        #绘制图像本身分布的散点图
        ax.scatter(X[:, 0], X[:, 1], c=Y
                   ,zorder=10
                   ,cmap=plt.cm.Paired,edgecolors='k')
        #绘制支持向量
        ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=50,
                    facecolors='none', zorder=10, edgecolors='k')# facecolors='none':透明的
        
        #绘制决策边界
        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
        
        #np.mgrid，合并了我们之前使用的np.linspace和np.meshgrid的用法
        #一次性使用最大值和最小值来生成网格
        #表示为[起始值：结束值：步长]
        #如果步长是复数，则其整数部分就是起始值和结束值之间创建的点的数量，并且结束值被包含在内
        XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
        #np.c_，类似于np.vstack的功能
        Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()]).reshape(XX.shape)
        #填充等高线不同区域的颜色
        ax.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
        #绘制等高线
        ax.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
                    levels=[-1, 0, 1])
        
        #设定坐标轴为不显示
        ax.set_xticks(())
        ax.set_yticks(())
        
        #将标题放在第一行的顶上
        if ds_cnt == 0:
            ax.set_title(kernel)
            
        #为每张图添加分类的分数   
        ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0')
                , size=15
                , bbox=dict(boxstyle='round', alpha=0.8, facecolor='white')
                    #为分数添加一个白色的格子作为底色
                , transform=ax.transAxes #确定文字所对应的坐标轴，就是ax子图的坐标轴本身
                , horizontalalignment='right' #位于坐标轴的什么方向
               )
 
plt.tight_layout()
plt.show()

f:\Anaconda3\lib\site-packages\matplotlib\contour.py:1230: UserWarning: No contour levels were found within the data range.
  warnings.warn("No contour levels were found"

在这里插入图片描述

from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)#将数据转化为0,1正态分布
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from time import time
import datetime
 
data = load_breast_cancer()
X = data.data
y = data.target
 
X.shape
np.unique(y)
 
plt.scatter(X[:,0],X[:,1],c=y)
plt.show()
 
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,y,test_size=0.3,random_state=420)
 
Kernel = ["linear","poly","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
            # , degree = 1
             , cache_size=10000#使用计算的内存，单位是MB，默认是200MB
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)

Kernel = ["linear","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
            # , degree = 1
             , cache_size=5000
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)

The accuracy under kernel linear is 0.929825
0.795527458190918
The accuracy under kernel rbf is 0.596491
0.06104254722595215
The accuracy under kernel sigmoid is 0.596491
0.008005142211914062

Kernel = ["linear","poly","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
             , degree = 1
             , cache_size=5000
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)

The accuracy under kernel linear is 0.929825
0.8025338649749756
The accuracy under kernel poly is 0.923977
0.14710068702697754
The accuracy under kernel rbf is 0.596491
0.06003713607788086
The accuracy under kernel sigmoid is 0.596491
0.011008739471435547

import pandas as pd
data = pd.DataFrame(X)
data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#描述性统计
#从mean列和std列可以看出严重的量纲不统一
#从1%的数据和最小值相对比，90%的数据和最大值相对比，查看是否是正态分布或偏态分布，如果差的太多就是偏态分布，谁大方向就偏向谁
#可以发现数据大的特征存在偏态问题
#这个时候就需要对数据进行标准化

	count	mean	std	min	1%	5%	10%	25%	50%	75%	90%	99%	max
0	569.0	14.127292	3.524049	6.981000	8.458360	9.529200	10.260000	11.700000	13.370000	15.780000	19.530000	24.371600	28.11000
1	569.0	19.289649	4.301036	9.710000	10.930400	13.088000	14.078000	16.170000	18.840000	21.800000	24.992000	30.652000	39.28000
2	569.0	91.969033	24.298981	43.790000	53.827600	60.496000	65.830000	75.170000	86.240000	104.100000	129.100000	165.724000	188.50000
3	569.0	654.889104	351.914129	143.500000	215.664000	275.780000	321.600000	420.300000	551.100000	782.700000	1177.400000	1786.600000	2501.00000
4	569.0	0.096360	0.014064	0.052630	0.068654	0.075042	0.079654	0.086370	0.095870	0.105300	0.114820	0.132888	0.16340
5	569.0	0.104341	0.052813	0.019380	0.033351	0.040660	0.049700	0.064920	0.092630	0.130400	0.175460	0.277192	0.34540
6	569.0	0.088799	0.079720	0.000000	0.000000	0.004983	0.013686	0.029560	0.061540	0.130700	0.203040	0.351688	0.42680
7	569.0	0.048919	0.038803	0.000000	0.000000	0.005621	0.011158	0.020310	0.033500	0.074000	0.100420	0.164208	0.20120
8	569.0	0.181162	0.027414	0.106000	0.129508	0.141500	0.149580	0.161900	0.179200	0.195700	0.214940	0.259564	0.30400
9	569.0	0.062798	0.007060	0.049960	0.051504	0.053926	0.055338	0.057700	0.061540	0.066120	0.072266	0.085438	0.09744
10	569.0	0.405172	0.277313	0.111500	0.119740	0.160100	0.183080	0.232400	0.324200	0.478900	0.748880	1.291320	2.87300
11	569.0	1.216853	0.551648	0.360200	0.410548	0.540140	0.640400	0.833900	1.108000	1.474000	1.909400	2.915440	4.88500
12	569.0	2.866059	2.021855	0.757000	0.953248	1.132800	1.280200	1.606000	2.287000	3.357000	5.123200	9.690040	21.98000
13	569.0	40.337079	45.491006	6.802000	8.514440	11.360000	13.160000	17.850000	24.530000	45.190000	91.314000	177.684000	542.20000
14	569.0	0.007041	0.003003	0.001713	0.003058	0.003690	0.004224	0.005169	0.006380	0.008146	0.010410	0.017258	0.03113
15	569.0	0.025478	0.017908	0.002252	0.004705	0.007892	0.009169	0.013080	0.020450	0.032450	0.047602	0.089872	0.13540
16	569.0	0.031894	0.030186	0.000000	0.000000	0.003253	0.007726	0.015090	0.025890	0.042050	0.058520	0.122292	0.39600
17	569.0	0.011796	0.006170	0.000000	0.000000	0.003831	0.005493	0.007638	0.010930	0.014710	0.018688	0.031194	0.05279
18	569.0	0.020542	0.008266	0.007882	0.010547	0.011758	0.013012	0.015160	0.018730	0.023480	0.030120	0.052208	0.07895
19	569.0	0.003795	0.002646	0.000895	0.001114	0.001522	0.001710	0.002248	0.003187	0.004558	0.006185	0.012650	0.02984
20	569.0	16.269190	4.833242	7.930000	9.207600	10.534000	11.234000	13.010000	14.970000	18.790000	23.682000	30.762800	36.04000
21	569.0	25.677223	6.146258	12.020000	15.200800	16.574000	17.800000	21.080000	25.410000	29.720000	33.646000	41.802400	49.54000
22	569.0	107.261213	33.602542	50.410000	58.270400	67.856000	72.178000	84.110000	97.660000	125.400000	157.740000	208.304000	251.20000
23	569.0	880.583128	569.356993	185.200000	256.192000	331.060000	384.720000	515.300000	686.500000	1084.000000	1673.000000	2918.160000	4254.00000
24	569.0	0.132369	0.022832	0.071170	0.087910	0.095734	0.102960	0.116600	0.131300	0.146000	0.161480	0.188908	0.22260
25	569.0	0.254265	0.157336	0.027290	0.050094	0.071196	0.093676	0.147200	0.211900	0.339100	0.447840	0.778644	1.05800
26	569.0	0.272188	0.208624	0.000000	0.000000	0.018360	0.045652	0.114500	0.226700	0.382900	0.571320	0.902380	1.25200
27	569.0	0.114606	0.065732	0.000000	0.000000	0.024286	0.038460	0.064930	0.099930	0.161400	0.208940	0.269216	0.29100
28	569.0	0.290076	0.061867	0.156500	0.176028	0.212700	0.226120	0.250400	0.282200	0.317900	0.360080	0.486908	0.66380
29	569.0	0.083946	0.018061	0.055040	0.058580	0.062558	0.065792	0.071460	0.080040	0.092080	0.106320	0.140628	0.20750

from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)#将数据转化为0,1正态分布
data = pd.DataFrame(X)
data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#均值很接近，方差为1了

	count	mean	std	min	1%	5%	10%	25%	50%	75%	90%	99%	max
0	569.0	-3.162867e-15	1.00088	-2.029648	-1.610057	-1.305923	-1.098366	-0.689385	-0.215082	0.469393	1.534446	2.909529	3.971288
1	569.0	-6.530609e-15	1.00088	-2.229249	-1.945253	-1.443165	-1.212786	-0.725963	-0.104636	0.584176	1.326975	2.644095	4.651889
2	569.0	-7.078891e-16	1.00088	-1.984504	-1.571053	-1.296381	-1.076672	-0.691956	-0.235980	0.499677	1.529432	3.037982	3.976130
3	569.0	-8.799835e-16	1.00088	-1.454443	-1.249201	-1.078225	-0.947908	-0.667195	-0.295187	0.363507	1.486075	3.218702	5.250529
4	569.0	6.132177e-15	1.00088	-3.112085	-1.971730	-1.517125	-1.188910	-0.710963	-0.034891	0.636199	1.313694	2.599511	4.770911
5	569.0	-1.120369e-15	1.00088	-1.610136	-1.345369	-1.206849	-1.035527	-0.747086	-0.221940	0.493857	1.347811	3.275782	4.568425
6	569.0	-4.421380e-16	1.00088	-1.114873	-1.114873	-1.052316	-0.943046	-0.743748	-0.342240	0.526062	1.434288	3.300560	4.243589
7	569.0	9.732500e-16	1.00088	-1.261820	-1.261820	-1.116837	-0.974010	-0.737944	-0.397721	0.646935	1.328412	2.973759	3.927930
8	569.0	-1.971670e-15	1.00088	-2.744117	-1.885853	-1.448032	-1.153036	-0.703240	-0.071627	0.530779	1.233221	2.862418	4.484751
9	569.0	-1.453631e-15	1.00088	-1.819865	-1.600987	-1.257643	-1.057477	-0.722639	-0.178279	0.470983	1.342243	3.209454	4.910919
10	569.0	-9.076415e-16	1.00088	-1.059924	-1.030184	-0.884517	-0.801577	-0.623571	-0.292245	0.266100	1.240514	3.198294	8.906909
11	569.0	-8.853492e-16	1.00088	-1.554264	-1.462915	-1.227791	-1.045885	-0.694809	-0.197498	0.466552	1.256518	3.081820	6.655279
12	569.0	1.773674e-15	1.00088	-1.044049	-0.946900	-0.858016	-0.785049	-0.623768	-0.286652	0.243031	1.117354	3.378079	9.461986
13	569.0	-8.291551e-16	1.00088	-0.737829	-0.700152	-0.637545	-0.597942	-0.494754	-0.347783	0.106773	1.121579	3.021867	11.041842
14	569.0	-7.541809e-16	1.00088	-1.776065	-1.327593	-1.116972	-0.939031	-0.624018	-0.220335	0.368355	1.123053	3.405812	8.029999
15	569.0	-3.921877e-16	1.00088	-1.298098	-1.160988	-0.982870	-0.911510	-0.692926	-0.281020	0.389654	1.236492	3.598943	6.143482
16	569.0	7.917900e-16	1.00088	-1.057501	-1.057501	-0.949654	-0.801336	-0.557161	-0.199065	0.336752	0.882848	2.997338	12.072680
17	569.0	-2.739461e-16	1.00088	-1.913447	-1.913447	-1.292055	-1.022462	-0.674490	-0.140496	0.472657	1.117927	3.146456	6.649601
18	569.0	-3.108234e-16	1.00088	-1.532890	-1.210240	-1.063590	-0.911757	-0.651681	-0.219430	0.355692	1.159654	3.834036	7.071917
19	569.0	-3.366766e-16	1.00088	-1.096968	-1.014237	-0.859880	-0.788466	-0.585118	-0.229940	0.288642	0.904208	3.349301	9.851593
20	569.0	-2.333224e-15	1.00088	-1.726901	-1.462332	-1.187658	-1.042700	-0.674921	-0.269040	0.522016	1.535063	3.001373	4.094189
21	569.0	1.763674e-15	1.00088	-2.223994	-1.706020	-1.482403	-1.282757	-0.748629	-0.043516	0.658341	1.297666	2.625885	3.885905
22	569.0	-1.198026e-15	1.00088	-1.693361	-1.459232	-1.173717	-1.044983	-0.689578	-0.285980	0.540279	1.503553	3.009644	4.287337
23	569.0	5.049661e-16	1.00088	-1.222423	-1.097625	-0.966014	-0.871684	-0.642136	-0.341181	0.357589	1.393000	3.581882	5.930172
24	569.0	-5.213170e-15	1.00088	-2.682695	-1.948882	-1.605910	-1.289152	-0.691230	-0.046843	0.597545	1.276124	2.478455	3.955374
25	569.0	-2.174788e-15	1.00088	-1.443878	-1.298811	-1.164575	-1.021571	-0.681083	-0.269501	0.539669	1.231407	3.335783	5.112877
26	569.0	6.856456e-16	1.00088	-1.305831	-1.305831	-1.217748	-1.086814	-0.756514	-0.218232	0.531141	1.435090	3.023359	4.700669
27	569.0	-1.412656e-16	1.00088	-1.745063	-1.745063	-1.375270	-1.159448	-0.756400	-0.223469	0.712510	1.436382	2.354181	2.685877
28	569.0	-2.289567e-15	1.00088	-2.160960	-1.845039	-1.251767	-1.034661	-0.641864	-0.127409	0.450138	1.132518	3.184317	6.046041
29	569.0	2.575171e-15	1.00088	-1.601839	-1.405690	-1.185223	-1.006009	-0.691912	-0.216444	0.450762	1.239884	3.141089	6.846856

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,y,test_size=0.3,random_state=420)
 
Kernel = ["linear","poly","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
             , degree = 1
             , cache_size=5000
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)

The accuracy under kernel linear is 0.976608
0.01501321792602539
The accuracy under kernel poly is 0.964912
0.006003141403198242
The accuracy under kernel rbf is 0.970760
0.011005401611328125
The accuracy under kernel sigmoid is 0.953216
0.0060024261474609375

score = []
gamma_range = np.logspace(-10, 1, 50) #返回在对数刻度上均匀间隔的数字
for i in gamma_range:
    clf = SVC(kernel="rbf",gamma = i,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
    
print(max(score), gamma_range[score.index(max(score))])
plt.plot(gamma_range,score)
plt.show()

0.9766081871345029 0.012067926406393264

在这里插入图片描述

from sklearn.model_selection import StratifiedShuffleSplit#用于支持带交叉验证的网格搜索
from sklearn.model_selection import GridSearchCV#带交叉验证的网格搜索
 
time0 = time()
 
gamma_range = np.logspace(-10,1,20)
coef0_range = np.linspace(0,5,10)
 
param_grid = dict(gamma = gamma_range
                  ,coef0 = coef0_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.3, random_state=420)#将数据分为5份，5份数据中测试集占30%
grid = GridSearchCV(SVC(kernel = "poly",degree=1,cache_size=5000
                        ,param_grid=param_grid
                        ,cv=cv)
grid.fit(X, y)
 
print("The best parameters are %s with a score of %0.5f" % (grid.best_params_, 
grid.best_score_))
print(time()-time0)

The best parameters are {'coef0': 0.0, 'gamma': 0.18329807108324375} with a score of 0.96959
13.360332727432251

#调线性核函数
score = []
C_range = np.linspace(0.01,30,50)
for i in C_range:
    clf = SVC(kernel="linear",C=i,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range,score)
plt.show()
 
#换rbf
score = []
C_range = np.linspace(0.01,30,50)
for i in C_range:
    clf = SVC(kernel="rbf",C=i,gamma = 0.012742749857031322,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
    
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range,score)
plt.show()
 
#进一步细化
score = []
C_range = np.linspace(5,7,50)
for i in C_range:
    clf = SVC(kernel="rbf",C=i,gamma = 
0.012742749857031322,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
    
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range,score)
plt.show()

0.9766081871345029 1.2340816326530613

在这里插入图片描述

潘诺西亚的火山

关注

0
点赞
踩
11

收藏

觉得还不错? 一键收藏
打赏
0
评论
SVM练习

from sklearn.datasets import make_blobsfrom sklearn.svm import SVCimport matplotlib.pyplot as pltimport numpy as npX,y = make_blobs(n_samples=50, centers=2, random_state=0,cluster_std=0.6)plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")#rainbow彩虹色
复制链接

扫一扫