

import numpy as np                
from matplotlib import colors     
from sklearn import svm            
from sklearn.svm import SVC
from sklearn import model_selection
import matplotlib.pyplot as plt
import matplotlib as mpl
# numpy:python第三方库,用于科学计算
# matplotlib:python第三方库,用于进行可视化
# sklearn:python的重要机器学习库,其中封装了大量的机器学习算法,如:分类、回归、降维以及聚类
# 导入鸢尾花数据
from sklearn import datasets
IrisDS = datasets.load_iris()
IrisDS.keys()# 数据集包含的名字

dict_keys([‘data’, ‘target’, ‘target_names’, ‘DESCR’, ‘feature_names’, ‘filename’])

print(IrisDS.target)# 数据标签
print(IrisDS.target_names)# 山鸢尾、变色鸢尾、维吉尼亚鸢尾
print(IrisDS.feature_names)# 花萼长度、花萼宽度、花瓣长度、花瓣宽度

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
2 2]
[‘setosa’ ‘versicolor’ ‘virginica’]
[‘sepal length (cm)’, ‘sepal width (cm)’, ‘petal length (cm)’, ‘petal width (cm)’]

X = IrisDS.data # X是鸢尾花数据集的样本特征
y = IrisDS.target # y是鸢尾花数据集的标签
y = y.ravel()

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

# 以前两个特征(花萼长度、花萼宽度)绘图
X = IrisDS.data[:, :2]
plt.scatter(X[y == 0, 0], X[y == 0, 1], color = "red", marker = "o")
plt.scatter(X[y == 1, 0], X[y == 1, 1], color = "green", marker = "+")
plt.scatter(X[y == 2, 0], X[y == 2, 1], color = "blue", marker = "x")
plt.xlabel('speal length', fontsize=20)
plt.ylabel('speal width', fontsize=20)
plt.title('Iris names', fontsize=30)
#图中第0类鸢尾花和1、 2两类明显区分开,但1、 2两类区分不明显

# 以后两个特征(花瓣长度、花瓣宽度)进行绘制
X = IrisDS.data[:, 2:]
plt.scatter(X[y == 0, 0], X[y == 0, 1], color = "red", marker = "o")
plt.scatter(X[y == 1, 0], X[y == 1, 1], color = "green", marker = "+")
plt.scatter(X[y == 2, 0], X[y == 2, 1], color = "blue", marker = "x")
plt.xlabel('petal length', fontsize=20)
plt.ylabel('petal width', fontsize=20)
plt.title('Iris names', fontsize=30)
#1、 2两类区分更为明显

X = IrisDS.data[:, :2]
X_train,X_test,y_train,y_test=model_selection.train_test_split(X,              #所要划分的样本特征集
                                                               y,              #所要划分的样本结果
                                                               random_state=666, #随机数种子确保产生的随机数组相同
                                                               test_size=0.3)  #测试样本占比


clf = svm.SVC(C=0.5,                         #误差项惩罚系数,默认值是1
              kernel='linear',               #线性核 kenrel="rbf":高斯核
              decision_function_shape='ovr') #决策函数




decision_function_shape=’ovr’时,为one v rest,即一个类别与其他类别进行划分

decision_function_shape=’ovo’时,为one v one,即将类别两两之间进行划分,用二分类的方法模拟多分类的结果


clf.fit(X_train,         #训练集特征向量,fit表示输入数据开始拟合
        y_train) #训练集目标值 ravel()扁平化,将原来的二维数组转换为一维数组

SVC(C=0.5, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=‘ovr’, degree=3, gamma=‘auto_deprecated’,
kernel=‘linear’, max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False)

#**************并判断a b是否相等,计算acc的均值*************
def show_accuracy(a, b, tip):
    acc = a.ravel() == b.ravel()
    print('%s Accuracy:%.3f' %(tip, np.mean(acc)))
def print_accuracy(clf,X_train,y_train,X_test,y_test):
    #分别打印训练集和测试集的准确率  score(X_train,y_train):表示输出X_train,y_train在模型上的准确率
    print('trianing prediction:%.3f' %(clf.score(X_train, y_train)))
    print('test data prediction:%.3f' %(clf.score(X_test, y_test)))
    #原始结果与预测结果进行对比   predict()表示对X_train样本进行预测,返回样本类别
    show_accuracy(clf.predict(X_train), y_train, 'traing data')
    show_accuracy(clf.predict(X_test), y_test, 'testing data')
    print('decision_function:\n', clf.decision_function(X_train))
# 4.模型评估

trianing prediction:0.810
test data prediction:0.844
traing data Accuracy:0.810
testing data Accuracy:0.844
[[ 2.24176755 0.92702147 -0.23412894]
[ 2.24916188 1.02282685 -0.25069592]
[ 2.24913534 1.18072983 -0.26852325]
[-0.22801325 2.21503397 1.08591481]
[ 2.22219679 1.07848846 -0.23254615]
[ 2.26559108 1.05769394 -0.26835508]
[-0.24571429 1.16016773 2.21756881]
[-0.18208562 2.19433483 0.94578951]
[-0.26338487 1.19115919 2.23589225]
[-0.22799941 2.19713689 1.13920689]
[ 2.24914861 1.12869781 -0.2606873 ]
[ 2.26088234 0.95791224 -0.25853289]
[-0.26338181 1.17897018 2.24088955]
[-0.26339402 1.21838843 2.21703115]
[-0.24573344 2.2082772 1.177549 ]
[-0.25253301 2.20904406 1.19691591]
[ 2.26090854 0.79362325 -0.22136024]
[-0.27168355 1.23241794 2.22593771]
[-0.25834953 1.20070183 2.2196521 ]
[ 2.26088889 0.88672142 -0.251763 ]
[-0.27820679 1.21345476 2.25519801]
[ 2.24175709 1.02751195 -0.24396579]
[ 2.26086269 1.16380895 -0.2734443 ]
[-0.20126251 2.18381767 1.0758292 ]
[-0.26339097 1.21054992 2.2240803 ]
[-0.18209989 2.2042964 0.90836286]
[-0.22803401 2.23451249 0.94271257]
[-0.27166932 1.18128615 2.25421945]
[-0.20121896 1.10945303 2.16946679]
[-0.25834602 1.19015485 2.22639633]
[ 2.20895193 0.90205515 -0.18609026]
[-0.26779185 1.20241523 2.23916383]
[-0.25251264 1.14353533 2.23436566]
[ 2.22216596 1.17061385 -0.25096773]
[ 2.25546254 0.96215829 -0.25306101]
[-0.21613225 2.18491254 1.12477163]
[ 2.19197353 1.17318527 -0.23641878]
[-0.22799248 2.18599149 1.15808091]
[-0.24571908 1.17536085 2.2096238 ]
[ 2.25546632 0.92050031 -0.24931963]
[-0.15635462 2.18157875 0.92052599]
[ 2.24916188 1.02282685 -0.25069592]
[ 2.26088889 0.88672142 -0.251763 ]
[ 2.2554474 1.12661068 -0.26519807]
[ 2.02388433 1.17809175 -0.18347847]
[-0.26779453 1.21128925 2.23397297]
[-0.24573344 2.2082772 1.177549 ]
[ 2.25545497 1.06499253 -0.25962584]
[ 2.22218908 1.11008682 -0.23788316]
[-0.22801325 2.21503397 1.08591481]
[-0.12010226 2.18043395 0.87283094]
[-0.25835304 1.20980158 2.21199992]
[-0.25251264 1.14353533 2.23436566]
[-0.28348322 1.19690126 2.26976248]
[-0.26778917 1.19214953 2.24383925]
[ 2.22217367 1.1544472 -0.24702388]
[ 1.02358531 2.22485653 -0.22747952]
[-0.23770144 2.22931038 1.07315373]
[-0.06544097 2.2315345 0.77667218]
[-0.22803401 2.23451249 0.94271257]
[-0.28349097 1.2343885 2.25615266]
[-0.25834602 1.19015485 2.22639633]
[-0.22802017 2.22232741 1.04655067]
[-0.27169066 2.24694385 1.20256387]
[-0.26338181 1.17897018 2.24088955]
[ 2.22217367 1.1544472 -0.24702388]
[-0.27168355 1.23241794 2.22593771]
[ 2.24914861 1.12869781 -0.2606873 ]
[-0.24573823 2.21639897 1.16281166]
[-0.23769573 2.22293205 1.10597133]
[-0.26778381 1.16589002 2.25192311]
[ 2.25543983 1.16521508 -0.26998701]
[-0.2809759 1.19597668 2.26585316]
[-0.26337876 1.1644952 2.24539928]
[ 2.2329238 1.03205777 -0.23604225]
[-0.28096221 0.98283592 2.28140512]
[-0.26339097 1.21054992 2.2240803 ]
[-0.15637416 2.19337501 0.88918741]
[ 2.26090199 0.81446637 -0.23373436]
[ 2.22216596 1.17061385 -0.25096773]
[-0.25252894 1.19982821 2.20651372]
[ 2.26558535 1.12230604 -0.27272474]
[ 2.24917955 0.86662487 -0.23213886]
[ 2.24175186 1.07191456 -0.24818723]
[-0.26778917 1.19214953 2.24383925]
[ 2.24917955 0.86662487 -0.23213886]
[-0.29600029 1.2195135 2.28571454]
[-0.20129515 2.21363671 0.93115481]
[-0.27167169 1.19312617 2.25058773]
[ 2.19203584 1.044919 -0.20078645]
[ 2.25545497 1.06499253 -0.25962584]
[ 2.19202338 1.08473985 -0.20986548]
[-0.27510305 0.97327043 2.27597656]
[-0.27166932 1.18128615 2.25421945]
[-0.22802709 2.22877376 0.99263184]
[ 2.23291751 1.07524337 -0.24102457]
[ 2.26088889 0.88672142 -0.251763 ]
[-0.23767861 2.19804597 1.16838741]
[ 2.22224301 0.85156324 -0.18166318]
[-0.21621789 2.25197679 0.81474919]
[-0.23766148 1.15867552 2.20391555]
[-0.22799248 2.18599149 1.15808091]
[-0.15631552 2.15081134 1.01791149]
[-0.27169066 2.24694385 1.20256387]
[-0.29149929 1.21756313 2.27858734]]

def draw(clf, X):
    iris_feature = 'sepal length', 'sepal width', 'petal lenght', 'petal width'
    # 开始画图
    X1_min, X1_max = X[:, 0].min(), X[:, 0].max()               #第0列的范围
    X2_min, X2_max = X[:, 1].min(), X[:, 1].max()               #第1列的范围
    X1, X2 = np.mgrid[X1_min:X1_max:200j, X2_min:X2_max:200j]   #生成网格采样点 开始坐标:结束坐标(不包括):步长
    grid_test = np.stack((X1.flat, X2.flat), axis=1)            #stack():沿着新的轴加入一系列数组,竖着(按列)增加两个数组,grid_test的shape:(40000, 2)
    print('grid_test:\n', grid_test)
    # 输出样本到决策面的距离
    z = clf.decision_function(grid_test)
    print('the distance to decision plane:\n', z)
    grid_hat = clf.predict(grid_test)                           # 预测分类值 得到【0,0.。。。2,2,2】
    print('grid_hat:\n', grid_hat)  
    grid_hat = grid_hat.reshape(X1.shape)                       # reshape grid_hat和x1形状一致
    cm_light = mpl.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
    cm_dark = mpl.colors.ListedColormap(['g', 'b', 'r'])
    plt.pcolormesh(X1, X2, grid_hat, cmap=cm_light)                                   # pcolormesh(x,y,z,cmap)这里参数代入
                                                                                      # x1,x2,grid_hat,cmap=cm_light绘制的是背景。
    plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(y), edgecolor='k', s=50, cmap=cm_dark) # 样本点
    plt.scatter(X_test[:, 0], X_test[:, 1], s=200, facecolor='yellow', zorder=10, marker='+')       # 测试点
    plt.xlabel(iris_feature[0], fontsize=20)
    plt.ylabel(iris_feature[1], fontsize=20)
    plt.xlim(X1_min, X1_max)
    plt.ylim(X2_min, X2_max)
    plt.title('svm in iris data classification', fontsize=30)
draw(clf, X)

[[4.3 2. ]
[4.3 2.0120603]
[4.3 2.0241206]

[7.9 4.3758794]
[7.9 4.3879397]
[7.9 4.4 ]]
the distance to decision plane:
[[ 2.0944395 1.24962478 -0.25718445]
[ 2.10087749 1.24911059 -0.25743298]
[ 2.10697759 1.24859004 -0.25767989]

[-0.25958006 0.81714649 2.27520775]
[-0.258944 0.81551291 2.27506211]
[-0.25829687 0.81391442 2.27491574]]
[0 0 0 … 2 2 2]


我们可以看到,仅依靠萼片长度和萼片宽度作为两种特征进行模型训练,在Iris-versicolor(红点所示) 与Iris-virginica(蓝点所示) 之间并不能达到很好的分类效果。


X2 = IrisDS.data[:, 2:] 
X2_train,X2_test,y2_train,y2_test=model_selection.train_test_split(X2,               #所要划分的样本特征集
                                                                   y,                #所要划分的样本结果
                                                                   random_state=666, #随机数种子确保产生的随机数组相同
                                                                   test_size=0.3)    #测试样本占比
clf.fit(X2_train,         #训练集特征向量,fit表示输入数据开始拟合
        y2_train) #训练集目标值 ravel()扁平化,将原来的二维数组转换为一维数组
# 模型评估

def draw2(clf, X):
    iris_feature = 'sepal length', 'sepal width', 'petal lenght', 'petal width'
    # 开始画图
    X1_min, X1_max = X[:, 0].min(), X[:, 0].max()               #第0列的范围
    X2_min, X2_max = X[:, 1].min(), X[:, 1].max()               #第1列的范围
    X1, X2 = np.mgrid[X1_min:X1_max:200j, X2_min:X2_max:200j]   #生成网格采样点 开始坐标:结束坐标(不包括):步长
    grid_test = np.stack((X1.flat, X2.flat), axis=1)            #stack():沿着新的轴加入一系列数组,竖着(按列)增加两个数组,grid_test的shape:(40000, 2)
    print('grid_test:\n', grid_test)
    # 输出样本到决策面的距离
    z = clf.decision_function(grid_test)
    print('the distance to decision plane:\n', z)
    grid_hat = clf.predict(grid_test)                           # 预测分类值 得到[0,0.。。。2,2,2]
    print('grid_hat:\n', grid_hat)  
    grid_hat = grid_hat.reshape(X1.shape)                       # reshape grid_hat和x1形状一致
    cm_light = mpl.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
    cm_dark = mpl.colors.ListedColormap(['g', 'b', 'r'])
    plt.pcolormesh(X1, X2, grid_hat, cmap=cm_light)                                   # pcolormesh(x,y,z,cmap)这里参数代入
                                                                                      # x1,x2,grid_hat,cmap=cm_light绘制的是背景。
    plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(y), edgecolor='k', s=50, cmap=cm_dark) # 样本点
    plt.scatter(X2_test[:, 0], X2_test[:, 1], s=200, facecolor='yellow', zorder=10, marker='+')       # 测试点
    plt.xlabel(iris_feature[2], fontsize=20)
    plt.ylabel(iris_feature[3], fontsize=20)
    plt.xlim(X1_min, X1_max)
    plt.ylim(X2_min, X2_max)
    plt.title('svm in iris data classification', fontsize=30)
draw2(clf, X2)

trianing prediction:0.943
test data prediction:1.000
traing data Accuracy:0.943
testing data Accuracy:1.000
[[ 2.24362371 1.28361548 -0.29794835]
[ 2.23061362 1.27887155 -0.2934875 ]
[ 2.24740564 1.28382225 -0.29865349]
[-0.2213609 2.26153293 0.7917178 ]
[ 2.24740564 1.28382225 -0.29865349]
[ 2.24567081 1.28277849 -0.2978571 ]
[-0.27331154 1.2289873 2.2340986 ]
[-0.23928582 2.25791001 0.84442627]
[-0.24890569 2.25442852 0.92782896]
[-0.25374962 2.25118093 1.03859904]
[ 2.23949355 1.28340697 -0.29721394]
[ 2.24907313 1.28482379 -0.2994149 ]
[-0.28342049 1.19991991 2.26898089]
[-0.267625 1.23886446 2.2023279 ]
[-0.25554678 2.25789294 0.96080111]
[-0.26543616 1.23960821 2.1916633 ]
[ 2.23975698 1.28146477 -0.29624954]
[-0.2645 2.24884888 1.15762803]
[-0.24343388 2.25742911 0.86464884]
[ 2.218707 1.28054477 -0.29279416]
[-0.2609059 2.25227962 1.1095909 ]
[ 2.24567081 1.28277849 -0.2978571 ]
[ 2.25836994 1.28540963 -0.30128857]
[-0.27160494 1.23734099 2.21947056]
[-0.28163771 1.21900185 2.25979041]
[-0.26955532 1.23078498 2.22132474]
[-0.19980279 2.26635185 0.76244683]
[-0.2645 2.24884888 1.15762803]
[-0.28342049 1.19991991 2.26898089]
[-0.28468464 1.2108068 2.26837313]
[ 2.23268453 1.28212612 -0.2955462 ]
[-0.25393917 2.25594891 0.96722749]
[-0.28515413 1.1903638 2.27367226]
[ 2.24907313 1.28482379 -0.2994149 ]
[ 2.24740564 1.28382225 -0.29865349]
[-0.26543616 1.23960821 2.1916633 ]
[ 2.23949355 1.28340697 -0.29721394]
[-0.267625 1.23886446 2.2023279 ]
[-0.27827471 1.23414172 2.24308847]
[ 2.23975698 1.28146477 -0.29624954]
[-0.23720366 2.26020985 0.82608301]
[ 2.24928559 1.28299227 -0.29856584]
[ 2.24173113 1.28256289 -0.29711886]
[ 2.24740564 1.28382225 -0.29865349]
[-0.25374962 2.25118093 1.03859904]
[-0.25674019 2.25061007 1.08128556]
[-0.22972456 2.25885375 0.81427237]
[ 2.23949355 1.28340697 -0.29721394]
[ 2.22779157 1.27984166 -0.29360258]
[-0.2724702 1.23333831 2.22728665]
[-0.23473644 2.25838485 0.82795204]
[-0.26885496 2.2476325 1.18714687]
[-0.28291642 1.21789318 2.26277225]
[-0.28117854 1.23955547 2.24645382]
[-0.27728629 1.21209913 2.25392827]
[ 2.24543966 1.28462531 -0.29874069]
[-0.25656379 2.24513823 1.12000008]
[-0.27149041 1.22989395 2.22809789]
[-0.15238573 2.26782614 0.74516554]
[-0.21152837 2.26402605 0.7751424 ]
[-0.28240207 1.23159873 2.25523768]
[-0.27827471 1.23414172 2.24308847]
[-0.17966903 2.26709719 0.75294939]
[-0.26674861 2.24824504 1.17374095]
[-0.25951415 2.25003122 1.11317169]
[ 2.24362371 1.28361548 -0.29794835]
[-0.27059686 1.23417123 2.22040526]
[ 2.25088159 1.28402731 -0.29933107]
[-0.26979845 1.24445628 2.1997843 ]
[-0.22719182 2.26109726 0.80139918]
[-0.28406067 1.2055949 2.26867844]
[ 2.25088159 1.28402731 -0.29933107]
[-0.28529301 1.21561007 2.26806492]
[-0.28633303 1.20279351 2.27315024]
[ 2.24362371 1.28361548 -0.29794835]
[-0.28992794 1.21556847 2.27634391]
[-0.28224596 1.21327807 2.26313232]
[-0.20328131 2.26067211 0.77688055]
[ 2.24740564 1.28382225 -0.29865349]
[ 2.24567081 1.28277849 -0.2978571 ]
[-0.27972287 1.2333083 2.24753751]
[ 2.25088159 1.28402731 -0.29933107]
[ 2.2302961 1.28100881 -0.29459876]
[ 2.23525417 1.28123779 -0.29544212]
[-0.26873 1.24174564 2.20106833]
[ 2.24762657 1.28191284 -0.29776537]
[-0.29253443 1.2119667 2.28144179]
[-0.23928582 2.25791001 0.84442627]
[-0.28029246 1.22008943 2.25654544]
[ 2.25088159 1.28402731 -0.29933107]
[ 2.2244556 1.28277082 -0.29481567]
[ 2.23496473 1.28319671 -0.2964484 ]
[-0.293287 1.19829851 2.28470839]
[-0.27887531 1.22115651 2.25300089]
[-0.2183969 2.26362019 0.78235336]
[ 2.24362371 1.28361548 -0.29794835]
[ 2.23949355 1.28340697 -0.29721394]
[-0.25823744 2.25282774 1.07658137]
[ 2.25408726 1.28423068 -0.29998268]
[-0.17966903 2.26709719 0.75294939]
[-0.25374962 2.25118093 1.03859904]
[-0.25536483 2.2533685 1.03214612]
[-0.26543616 1.23960821 2.1916633 ]
[-0.22173577 2.26520183 0.7813886 ]
[-0.2864652 1.22416896 2.26743966]]
[[1. 0.1 ]
[1. 0.1120603]
[1. 0.1241206]

[6.9 2.4758794]
[6.9 2.4879397]
[6.9 2.5 ]]
the distance to decision plane:
[[ 2.26102916 1.28560177 -0.30186795]
[ 2.26088366 1.28548692 -0.30179047]
[ 2.26073757 1.28537151 -0.3017126 ]

[-0.29572193 1.16905095 2.29078122]
[-0.29576118 1.16768234 2.29092211]
[-0.29580035 1.16629074 2.29106207]]
[0 0 0 … 2 2 2]


