SVM练习

from sklearn.datasets import make_blobs
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
X,y = make_blobs(n_samples=50, centers=2, random_state=0,cluster_std=0.6)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")#rainbow彩虹色
plt.xticks([])
plt.yticks([])
plt.show()

#首先要有散点图
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca() #获取当前的子图,如果不存在,则创建新的子图

在这里插入图片描述

#获取平面上两条坐标轴的最大值和最小值
xlim = ax.get_xlim()
ylim = ax.get_ylim()
 
#在最大值和最小值之间形成30个规律的数据
axisx = np.linspace(xlim[0],xlim[1],30)
axisy = np.linspace(ylim[0],ylim[1],30)
 
axisy,axisx = np.meshgrid(axisy,axisx)
#我们将使用这里形成的二维数组作为我们contour函数中的X和Y
#使用meshgrid函数将两个一维向量转换为特征矩阵
#核心是将两个特征向量广播,以便获取y.shape * x.shape这么多个坐标点的横坐标和纵坐标
 
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T
#其中ravel()是降维函数,vstack能够将多个结构一致的一维数组按行堆叠起来
#xy就是已经形成的网格,它是遍布在整个画布上的密集的点
 
plt.scatter(xy[:,0],xy[:,1],s=1,cmap="rainbow")
 
#理解函数meshgrid和vstack的作用
a = np.array([1,2,3])
b = np.array([7,8])
#两两组合,会得到多少个坐标?
#答案是6个,分别是 (1,7),(2,7),(3,7),(1,8),(2,8),(3,8)
 
v1,v2 = np.meshgrid(a,b)
 
v1
 
v2
 
v = np.vstack([v1.ravel(), v2.ravel()]).T

在这里插入图片描述

#建模,通过fit计算出对应的决策边界
clf = SVC(kernel = "linear").fit(X,y)#计算出对应的决策边界
Z = clf.decision_function(xy).reshape(axisx.shape)
#重要接口decision_function,返回每个输入的样本所对应的到决策边界的距离
#然后再将这个距离转换为axisx的结构,这是由于画图的函数contour要求Z的结构必须与X和Y保持一致

#首先要有散点图
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca() #获取当前的子图,如果不存在,则创建新的子图
#画决策边界和平行于决策边界的超平面
ax.contour(axisx,axisy,Z
           ,colors="k"
           ,levels=[-1,0,1] #画三条等高线,分别是Z为-1,Z为0和Z为1的三条线
           ,alpha=0.5#透明度
           ,linestyles=["--","-","--"])
 
ax.set_xlim(xlim)#设置x轴取值
ax.set_ylim(ylim)



(-0.43660961990940284, 5.772756283035797)

在这里插入图片描述

#记得Z的本质么?是输入的样本到决策边界的距离,而contour函数中的level其实是输入了这个距离
#让我们用一个点来试试看
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plt.scatter(X[10,0],X[10,1],c="black",s=50,cmap="rainbow")
<matplotlib.collections.PathCollection at 0x2462e456828>

在这里插入图片描述

clf.decision_function(X[10].reshape(1,2))
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
ax = plt.gca()
ax.contour(axisx,axisy,Z
            ,colors="k"
            ,levels=[-3.33917354]
            ,alpha=0.5
            ,linestyles=["--"])
<matplotlib.contour.QuadContourSet at 0x2462e4b46d8>

在这里插入图片描述

#将上述过程包装成函数:
def plot_svc_decision_function(model,ax=None):
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    x = np.linspace(xlim[0],xlim[1],30)
    y = np.linspace(ylim[0],ylim[1],30)
    Y,X = np.meshgrid(y,x) 
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)
    
    ax.contour(X, Y, P,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"]) 
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
 
#则整个绘图过程可以写作:
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)

在这里插入图片描述

clf.predict(X)
#根据决策边界,对X中的样本进行分类,返回的结构为n_samples
 
clf.score(X,y)
#返回给定测试数据和标签的平均准确度
 
clf.support_vectors_
#返回支持向量坐标
 
clf.n_support_#array([2, 1])
#返回每个类中支持向量的个数
array([2, 1])
from sklearn.datasets import make_circles
X,y = make_circles(100, factor=0.1, noise=.1)
 
X.shape
 
y.shape
 
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plt.show()

在这里插入图片描述

clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
clf.score(X,y)
0.68

在这里插入图片描述

#定义一个由x计算出来的新维度r
r = np.exp(-(X**2).sum(1))
 
rlim = np.linspace(min(r),max(r),100)
 
from mpl_toolkits import mplot3d
 
#定义一个绘制三维图像的函数
#elev表示上下旋转的角度
#azim表示平行旋转的角度
def plot_3D(elev=30,azim=30,X=X,y=y):
    ax = plt.subplot(projection="3d")
    ax.scatter3D(X[:,0],X[:,1],r,c=y,s=50,cmap='rainbow')
    ax.view_init(elev=elev,azim=azim)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("r")
    plt.show()
    
plot_3D()

在这里插入图片描述

#如果放到jupyter notebook中运行
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
 
from sklearn.datasets import make_circles
X,y = make_circles(100, factor=0.1, noise=.1)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
 
def plot_svc_decision_function(model,ax=None):
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    x = np.linspace(xlim[0],xlim[1],30)
    y = np.linspace(ylim[0],ylim[1],30)
    Y,X = np.meshgrid(y,x) 
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)
    
    ax.contour(X, Y, P,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"])
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
 
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
 
r = np.exp(-(X**2).sum(1))
 
rlim = np.linspace(min(r),max(r),0.2)
 
from mpl_toolkits import mplot3d
 
def plot_3D(elev=30,azim=30,X=X,y=y):
    ax = plt.subplot(projection="3d")
    ax.scatter3D(X[:,0],X[:,1],r,c=y,s=50,cmap='rainbow')
    ax.view_init(elev=elev,azim=azim)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("r")
    plt.show()
 
from ipywidgets import interact,fixed
interact(plot_3D,elev=[0,30,60,90],azip=(-180,180),X=fixed(X),y=fixed(y))
plt.show()
f:\Anaconda3\lib\site-packages\ipykernel_launcher.py:32: DeprecationWarning: object of type <class 'float'> cannot be safely interpreted as an integer.

在这里插入图片描述

interactive(children=(Dropdown(description='elev', index=1, options=(0, 30, 60, 90), value=30), IntSlider(valu…
clf = SVC(kernel = "rbf").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)

f:\Anaconda3\lib\site-packages\sklearn\svm\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-HqjpYKFo-1594926610385)(output_13_1.png)]

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import svm#from sklearn.svm import SVC  两者都可以
from sklearn.datasets import make_circles, make_moons, make_blobs,make_classification

n_samples = 100
 
datasets = [
    make_moons(n_samples=n_samples, noise=0.2, random_state=0),
    make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),
    make_blobs(n_samples=n_samples, centers=2, random_state=5),#分簇的数据集
    make_classification(n_samples=n_samples,n_features = 2,n_informative=2,n_redundant=0, random_state=5)
                #n_features:特征数,n_informative:带信息的特征数,n_redundant:不带信息的特征数
    ]
 
Kernel = ["linear","poly","rbf","sigmoid"]
 
#四个数据集分别是什么样子呢?
for X,Y in datasets:
    plt.figure(figsize=(5,4))
    plt.scatter(X[:,0],X[:,1],c=Y,s=50,cmap="rainbow")

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

nrows=len(datasets)
ncols=len(Kernel) + 1
 
fig, axes = plt.subplots(nrows, ncols,figsize=(20,16))

在这里插入图片描述

[*enumerate(datasets)] == list(enumerate(datasets))#  enumerate、map、zip都可以这样展开
# index,(X,Y) = [(索引, array([特矩阵征X],[标签Y]))]
True
nrows=len(datasets)
ncols=len(Kernel) + 1
 
fig, axes = plt.subplots(nrows, ncols,figsize=(20,16))

#第一层循环:在不同的数据集中循环
for ds_cnt, (X,Y) in enumerate(datasets):
    
    #在图像中的第一列,放置原数据的分布
    ax = axes[ds_cnt, 0]
    if ds_cnt == 0:
        ax.set_title("Input data")
    ax.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired,edgecolors='k')
    ax.set_xticks(())
    ax.set_yticks(())
    
    #第二层循环:在不同的核函数中循环
    #从图像的第二列开始,一个个填充分类结果
    for est_idx, kernel in enumerate(Kernel):
        
        #定义子图位置
        ax = axes[ds_cnt, est_idx + 1]
        
        #建模
        clf = svm.SVC(kernel=kernel, gamma=2).fit(X, Y)
        score = clf.score(X, Y)
        
        #绘制图像本身分布的散点图
        ax.scatter(X[:, 0], X[:, 1], c=Y
                   ,zorder=10
                   ,cmap=plt.cm.Paired,edgecolors='k')
        #绘制支持向量
        ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=50,
                    facecolors='none', zorder=10, edgecolors='k')# facecolors='none':透明的
        
        #绘制决策边界
        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
        
        #np.mgrid,合并了我们之前使用的np.linspace和np.meshgrid的用法
        #一次性使用最大值和最小值来生成网格
        #表示为[起始值:结束值:步长]
        #如果步长是复数,则其整数部分就是起始值和结束值之间创建的点的数量,并且结束值被包含在内
        XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
        #np.c_,类似于np.vstack的功能
        Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()]).reshape(XX.shape)
        #填充等高线不同区域的颜色
        ax.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
        #绘制等高线
        ax.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
                    levels=[-1, 0, 1])
        
        #设定坐标轴为不显示
        ax.set_xticks(())
        ax.set_yticks(())
        
        #将标题放在第一行的顶上
        if ds_cnt == 0:
            ax.set_title(kernel)
            
        #为每张图添加分类的分数   
        ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0')
                , size=15
                , bbox=dict(boxstyle='round', alpha=0.8, facecolor='white')
                    #为分数添加一个白色的格子作为底色
                , transform=ax.transAxes #确定文字所对应的坐标轴,就是ax子图的坐标轴本身
                , horizontalalignment='right' #位于坐标轴的什么方向
               )
 
plt.tight_layout()
plt.show()
f:\Anaconda3\lib\site-packages\matplotlib\contour.py:1230: UserWarning: No contour levels were found within the data range.
  warnings.warn("No contour levels were found"

在这里插入图片描述

from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)#将数据转化为0,1正态分布
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from time import time
import datetime
 
data = load_breast_cancer()
X = data.data
y = data.target
 
X.shape
np.unique(y)
 
plt.scatter(X[:,0],X[:,1],c=y)
plt.show()
 
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,y,test_size=0.3,random_state=420)
 
Kernel = ["linear","poly","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
            # , degree = 1
             , cache_size=10000#使用计算的内存,单位是MB,默认是200MB
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)
Kernel = ["linear","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
            # , degree = 1
             , cache_size=5000
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)

The accuracy under kernel linear is 0.929825
0.795527458190918
The accuracy under kernel rbf is 0.596491
0.06104254722595215
The accuracy under kernel sigmoid is 0.596491
0.008005142211914062
Kernel = ["linear","poly","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
             , degree = 1
             , cache_size=5000
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)
The accuracy under kernel linear is 0.929825
0.8025338649749756
The accuracy under kernel poly is 0.923977
0.14710068702697754
The accuracy under kernel rbf is 0.596491
0.06003713607788086
The accuracy under kernel sigmoid is 0.596491
0.011008739471435547
import pandas as pd
data = pd.DataFrame(X)
data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#描述性统计
#从mean列和std列可以看出严重的量纲不统一
#从1%的数据和最小值相对比,90%的数据和最大值相对比,查看是否是正态分布或偏态分布,如果差的太多就是偏态分布,谁大方向就偏向谁
#可以发现数据大的特征存在偏态问题
#这个时候就需要对数据进行标准化
countmeanstdmin1%5%10%25%50%75%90%99%max
0569.014.1272923.5240496.9810008.4583609.52920010.26000011.70000013.37000015.78000019.53000024.37160028.11000
1569.019.2896494.3010369.71000010.93040013.08800014.07800016.17000018.84000021.80000024.99200030.65200039.28000
2569.091.96903324.29898143.79000053.82760060.49600065.83000075.17000086.240000104.100000129.100000165.724000188.50000
3569.0654.889104351.914129143.500000215.664000275.780000321.600000420.300000551.100000782.7000001177.4000001786.6000002501.00000
4569.00.0963600.0140640.0526300.0686540.0750420.0796540.0863700.0958700.1053000.1148200.1328880.16340
5569.00.1043410.0528130.0193800.0333510.0406600.0497000.0649200.0926300.1304000.1754600.2771920.34540
6569.00.0887990.0797200.0000000.0000000.0049830.0136860.0295600.0615400.1307000.2030400.3516880.42680
7569.00.0489190.0388030.0000000.0000000.0056210.0111580.0203100.0335000.0740000.1004200.1642080.20120
8569.00.1811620.0274140.1060000.1295080.1415000.1495800.1619000.1792000.1957000.2149400.2595640.30400
9569.00.0627980.0070600.0499600.0515040.0539260.0553380.0577000.0615400.0661200.0722660.0854380.09744
10569.00.4051720.2773130.1115000.1197400.1601000.1830800.2324000.3242000.4789000.7488801.2913202.87300
11569.01.2168530.5516480.3602000.4105480.5401400.6404000.8339001.1080001.4740001.9094002.9154404.88500
12569.02.8660592.0218550.7570000.9532481.1328001.2802001.6060002.2870003.3570005.1232009.69004021.98000
13569.040.33707945.4910066.8020008.51444011.36000013.16000017.85000024.53000045.19000091.314000177.684000542.20000
14569.00.0070410.0030030.0017130.0030580.0036900.0042240.0051690.0063800.0081460.0104100.0172580.03113
15569.00.0254780.0179080.0022520.0047050.0078920.0091690.0130800.0204500.0324500.0476020.0898720.13540
16569.00.0318940.0301860.0000000.0000000.0032530.0077260.0150900.0258900.0420500.0585200.1222920.39600
17569.00.0117960.0061700.0000000.0000000.0038310.0054930.0076380.0109300.0147100.0186880.0311940.05279
18569.00.0205420.0082660.0078820.0105470.0117580.0130120.0151600.0187300.0234800.0301200.0522080.07895
19569.00.0037950.0026460.0008950.0011140.0015220.0017100.0022480.0031870.0045580.0061850.0126500.02984
20569.016.2691904.8332427.9300009.20760010.53400011.23400013.01000014.97000018.79000023.68200030.76280036.04000
21569.025.6772236.14625812.02000015.20080016.57400017.80000021.08000025.41000029.72000033.64600041.80240049.54000
22569.0107.26121333.60254250.41000058.27040067.85600072.17800084.11000097.660000125.400000157.740000208.304000251.20000
23569.0880.583128569.356993185.200000256.192000331.060000384.720000515.300000686.5000001084.0000001673.0000002918.1600004254.00000
24569.00.1323690.0228320.0711700.0879100.0957340.1029600.1166000.1313000.1460000.1614800.1889080.22260
25569.00.2542650.1573360.0272900.0500940.0711960.0936760.1472000.2119000.3391000.4478400.7786441.05800
26569.00.2721880.2086240.0000000.0000000.0183600.0456520.1145000.2267000.3829000.5713200.9023801.25200
27569.00.1146060.0657320.0000000.0000000.0242860.0384600.0649300.0999300.1614000.2089400.2692160.29100
28569.00.2900760.0618670.1565000.1760280.2127000.2261200.2504000.2822000.3179000.3600800.4869080.66380
29569.00.0839460.0180610.0550400.0585800.0625580.0657920.0714600.0800400.0920800.1063200.1406280.20750
from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)#将数据转化为0,1正态分布
data = pd.DataFrame(X)
data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#均值很接近,方差为1了
countmeanstdmin1%5%10%25%50%75%90%99%max
0569.0-3.162867e-151.00088-2.029648-1.610057-1.305923-1.098366-0.689385-0.2150820.4693931.5344462.9095293.971288
1569.0-6.530609e-151.00088-2.229249-1.945253-1.443165-1.212786-0.725963-0.1046360.5841761.3269752.6440954.651889
2569.0-7.078891e-161.00088-1.984504-1.571053-1.296381-1.076672-0.691956-0.2359800.4996771.5294323.0379823.976130
3569.0-8.799835e-161.00088-1.454443-1.249201-1.078225-0.947908-0.667195-0.2951870.3635071.4860753.2187025.250529
4569.06.132177e-151.00088-3.112085-1.971730-1.517125-1.188910-0.710963-0.0348910.6361991.3136942.5995114.770911
5569.0-1.120369e-151.00088-1.610136-1.345369-1.206849-1.035527-0.747086-0.2219400.4938571.3478113.2757824.568425
6569.0-4.421380e-161.00088-1.114873-1.114873-1.052316-0.943046-0.743748-0.3422400.5260621.4342883.3005604.243589
7569.09.732500e-161.00088-1.261820-1.261820-1.116837-0.974010-0.737944-0.3977210.6469351.3284122.9737593.927930
8569.0-1.971670e-151.00088-2.744117-1.885853-1.448032-1.153036-0.703240-0.0716270.5307791.2332212.8624184.484751
9569.0-1.453631e-151.00088-1.819865-1.600987-1.257643-1.057477-0.722639-0.1782790.4709831.3422433.2094544.910919
10569.0-9.076415e-161.00088-1.059924-1.030184-0.884517-0.801577-0.623571-0.2922450.2661001.2405143.1982948.906909
11569.0-8.853492e-161.00088-1.554264-1.462915-1.227791-1.045885-0.694809-0.1974980.4665521.2565183.0818206.655279
12569.01.773674e-151.00088-1.044049-0.946900-0.858016-0.785049-0.623768-0.2866520.2430311.1173543.3780799.461986
13569.0-8.291551e-161.00088-0.737829-0.700152-0.637545-0.597942-0.494754-0.3477830.1067731.1215793.02186711.041842
14569.0-7.541809e-161.00088-1.776065-1.327593-1.116972-0.939031-0.624018-0.2203350.3683551.1230533.4058128.029999
15569.0-3.921877e-161.00088-1.298098-1.160988-0.982870-0.911510-0.692926-0.2810200.3896541.2364923.5989436.143482
16569.07.917900e-161.00088-1.057501-1.057501-0.949654-0.801336-0.557161-0.1990650.3367520.8828482.99733812.072680
17569.0-2.739461e-161.00088-1.913447-1.913447-1.292055-1.022462-0.674490-0.1404960.4726571.1179273.1464566.649601
18569.0-3.108234e-161.00088-1.532890-1.210240-1.063590-0.911757-0.651681-0.2194300.3556921.1596543.8340367.071917
19569.0-3.366766e-161.00088-1.096968-1.014237-0.859880-0.788466-0.585118-0.2299400.2886420.9042083.3493019.851593
20569.0-2.333224e-151.00088-1.726901-1.462332-1.187658-1.042700-0.674921-0.2690400.5220161.5350633.0013734.094189
21569.01.763674e-151.00088-2.223994-1.706020-1.482403-1.282757-0.748629-0.0435160.6583411.2976662.6258853.885905
22569.0-1.198026e-151.00088-1.693361-1.459232-1.173717-1.044983-0.689578-0.2859800.5402791.5035533.0096444.287337
23569.05.049661e-161.00088-1.222423-1.097625-0.966014-0.871684-0.642136-0.3411810.3575891.3930003.5818825.930172
24569.0-5.213170e-151.00088-2.682695-1.948882-1.605910-1.289152-0.691230-0.0468430.5975451.2761242.4784553.955374
25569.0-2.174788e-151.00088-1.443878-1.298811-1.164575-1.021571-0.681083-0.2695010.5396691.2314073.3357835.112877
26569.06.856456e-161.00088-1.305831-1.305831-1.217748-1.086814-0.756514-0.2182320.5311411.4350903.0233594.700669
27569.0-1.412656e-161.00088-1.745063-1.745063-1.375270-1.159448-0.756400-0.2234690.7125101.4363822.3541812.685877
28569.0-2.289567e-151.00088-2.160960-1.845039-1.251767-1.034661-0.641864-0.1274090.4501381.1325183.1843176.046041
29569.02.575171e-151.00088-1.601839-1.405690-1.185223-1.006009-0.691912-0.2164440.4507621.2398843.1410896.846856
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,y,test_size=0.3,random_state=420)
 
Kernel = ["linear","poly","rbf","sigmoid"]
 
for kernel in Kernel:
    time0 = time()
    clf= SVC(kernel = kernel
             , gamma="auto"
             , degree = 1
             , cache_size=5000
            ).fit(Xtrain,Ytrain)
    print("The accuracy under kernel %s is %f" % (kernel,clf.score(Xtest,Ytest)))
    print(time()-time0)
The accuracy under kernel linear is 0.976608
0.01501321792602539
The accuracy under kernel poly is 0.964912
0.006003141403198242
The accuracy under kernel rbf is 0.970760
0.011005401611328125
The accuracy under kernel sigmoid is 0.953216
0.0060024261474609375
score = []
gamma_range = np.logspace(-10, 1, 50) #返回在对数刻度上均匀间隔的数字
for i in gamma_range:
    clf = SVC(kernel="rbf",gamma = i,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
    
print(max(score), gamma_range[score.index(max(score))])
plt.plot(gamma_range,score)
plt.show()

0.9766081871345029 0.012067926406393264

在这里插入图片描述

from sklearn.model_selection import StratifiedShuffleSplit#用于支持带交叉验证的网格搜索
from sklearn.model_selection import GridSearchCV#带交叉验证的网格搜索
 
time0 = time()
 
gamma_range = np.logspace(-10,1,20)
coef0_range = np.linspace(0,5,10)
 
param_grid = dict(gamma = gamma_range
                  ,coef0 = coef0_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.3, random_state=420)#将数据分为5份,5份数据中测试集占30%
grid = GridSearchCV(SVC(kernel = "poly",degree=1,cache_size=5000
                        ,param_grid=param_grid
                        ,cv=cv)
grid.fit(X, y)
 
print("The best parameters are %s with a score of %0.5f" % (grid.best_params_, 
grid.best_score_))
print(time()-time0)

The best parameters are {'coef0': 0.0, 'gamma': 0.18329807108324375} with a score of 0.96959
13.360332727432251
#调线性核函数
score = []
C_range = np.linspace(0.01,30,50)
for i in C_range:
    clf = SVC(kernel="linear",C=i,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range,score)
plt.show()
 
#换rbf
score = []
C_range = np.linspace(0.01,30,50)
for i in C_range:
    clf = SVC(kernel="rbf",C=i,gamma = 0.012742749857031322,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
    
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range,score)
plt.show()
 
#进一步细化
score = []
C_range = np.linspace(5,7,50)
for i in C_range:
    clf = SVC(kernel="rbf",C=i,gamma = 
0.012742749857031322,cache_size=5000).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
    
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range,score)
plt.show()
0.9766081871345029 1.2340816326530613

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述


  • 0
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

潘诺西亚的火山

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值