import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from scipy.io import loadmat
from sklearn import svm
#导入数据
mat = loadmat('./data/ex6data1.mat')
print(mat.keys())
# dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])
X = mat['X']
y = mat['y']
定义显示数据图片的方法
def plotData(X, y):
plt.figure(figsize=(8, 5))
plt.scatter(X[:, 0], X[:, 1], c=y.flatten(), cmap='rainbow')
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend()
plotData(X, y)
plt.show()
flatten()函数用法
latten是numpy.ndarray.flatten的一个函数,即返回一个一维数组。
flatten只能适用于numpy对象,即array或者mat,普通的list列表不适用!。
a.flatten():a是个数组,a.flatten()就是把a降到一维,默认是按行的方向降 。
a.flatten().A:a是个矩阵,降维后还是个矩阵,矩阵.A(等效于矩阵.getA())变成了数组。
定义显示结果图像的方法
def plotBoundary(clf, X):
'''plot decision bondary'''
x_min, x_max = X[:,0].min()*1.2, X[:,0].max()*1.1
y_min, y_max = X[:,1].min()*1.1,X[:,1].max()*1.1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 500),
np.linspace(y_min, y_max, 500))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z)
models = [svm.SVC(C, kernel='linear') for C in [1, 50, 100]]
clfs = [model.fit(X, y.ravel()) for model in models]
predict:返回的是一个大小为n的一维数组,一维数组中的第i个值为模型预测第i个预测样本的标签;
contour(Z) 创建一个包含矩阵Z的等值线的等高线图,其中Z包含X-Y平面上的高度值。
contour(X,Y,Z) 指定Z中各值得X和Y坐标。
a = [1, 2, 3]
b = [4, 5, 6]
c = [4, 5, 6, 7, 8]
a_b_zip = zip(a, b) # 打包为元组的列表,而且元素个数与最短的列表一致
print("type of a_b_zip is %s" % type(a_b_zip)) # 输出zip函数的返回对象类型
a_b_zip = list(a_b_zip) # 因为zip函数返回一个zip类型对象,所以需要转换为list类型
print(a_b_zip)
print("------------------------------------------")
a_c_zip = zip(a, c)
a_c_zip = list(a_c_zip)
print(a_c_zip)
显示图片
itle = ['SVM Decision Boundary with C = {} (Example Dataset 1'.format(C) for C in [1, 50,100]]
for model,title in zip(clfs,title):
plt.figure(figsize=(8,5))
plotData(X, y)
plotBoundary(model, X)
plt.title(title)
plt.show()
非线性
mat = loadmat('./data/ex6data2.mat')
X2 = mat['X']
y2 = mat['y']
sigma = 0.1
gamma = np.power(sigma,-2.)/2
clf = svm.SVC(C=1, kernel='rbf', gamma=gamma)
modle = clf.fit(X2, y2.flatten())
plotData(X2, y2)
plotBoundary(modle, X2)
plt.show()
寻找最合适参数
mat3 = loadmat('data/ex6data3.mat')
X3, y3 = mat3['X'], mat3['y']
Xval, yval = mat3['Xval'], mat3['yval']
plotData(X3, y3)
Cvalues = (0.01, 0.03, 0.1, 0.3, 1., 3., 10., 30.)
sigmavalues = Cvalues
best_pair, best_score = (0, 0), 0
for C in Cvalues:
for sigma in sigmavalues:
gamma = np.power(sigma,-2.)/2
model = svm.SVC(C=C,kernel='rbf',gamma=gamma)
model.fit(X3, y3.flatten())
this_score = model.score(Xval, yval)
if this_score > best_score:
best_score = this_score
best_pair = (C, sigma)
print('best_pair={}, best_score={}'.format(best_pair, best_score))
# best_pair=(1.0, 0.1), best_score=0.965
model = svm.SVC(C=1., kernel='rbf', gamma = np.power(.1, -2.)/2)
model.fit(X3, y3.flatten())
plotData(X3, y3)
plotBoundary(model, X3)
plt.show()