python分类算法svm_分类算法之支持向量机:SVM(应用篇)

起步

示例一

from sklearn import svm

X = [[2, 0], [1, 1], [2,3]]

Y = [0, 0, 1]

clf = svm.SVC(kernel = 'linear')

clf.fit(X, Y)

# 查看模型

print(clf) # output: SVC(C=1.0, class_weight=None, ...)

# 打印出支持向量

print(clf.support_vectors_) # [[ 1. 1.], [ 2. 3.]]

# 支持向量在数据集中的索引

print(clf.support_) # output: [1, 2]

# 各类结果中的支持向量的个数

print(clf.n_support_) # output: [1, 1]

# 预测

print(clf.predict([[2, 2]])) # output: [1]

示例二

# coding: utf-8

import numpy as np

from sklearn import svm

np.random.seed(0) # 使用相同的seed()值,则每次生成的随即数都相同

# 创建可线性分类的数据集与结果集

X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20,2) + [2, 2]]

Y = [0] * 20 + [1] * 20

# 构造 SVM 模型

clf = svm.SVC(kernel='linear')

clf.fit(X, Y) # 训练

w = clf.coef_[0]

a = -w[0] / w[1] # 斜率

xx = np.linspace(-5, 5) # 在区间[-5, 5] 中产生连续的值,用于画线

yy = a * xx - (clf.intercept_[0]) / w[1]

b = clf.support_vectors_[0] # 第一个分类的支持向量

yy_down = a * xx + (b[1] - a * b[0])

b = clf.support_vectors_[-1] # 第二个分类中的支持向量

yy_up = a * xx + (b[1] - a * b[0])

import pylab as pl

pl.plot(xx, yy, 'k-')

pl.plot(xx, yy_down, 'k--')

pl.plot(xx, yy_up, 'k--')

pl.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],

s=80, facecolors='none')

pl.scatter(X[:, 0], X[:, 1], c=Y, cmap=pl.cm.Paired)

pl.axis('tight')

pl.show()

实例三:人脸识别

from sklearn.datasets import fetch_lfw_people

lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

人脸数据集基本信息

from sklearn.datasets import fetch_lfw_people

lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

n_samples, h, w = lfw_people.images.shape # 获取图像数据集的形状,绘图使用

# 获取特征数据集和结果集

X = lfw_people.data

Y = lfw_people.target

n_features = X.shape[1] # 特征的个数,或称为特征的维数

target_names = lfw_people.target_names # 数据集中有多少个人,以人名组成列表返回

n_classes = target_names.shape[0]

print("===== 数据集中信息 =====")

print("数据个数(n_samples):", n_samples) # output: 1288

print("特征个数,维度(n_features):", n_features) # output: 1859

print("结果集类别个数(n_classes):", n_classes) # output: 7

拆分训练集和测试集

from sklearn.model_selection import train_test_split

# 拆分训练集和测试集

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)

特征降维处理

from sklearn.decomposition import PCA

n_components = 150

t0 = time.time()

pca = PCA(n_components=n_components, whiten=True).fit(X_train)

print("pca done %0.3fs" % (time.time() - t0))

t0 = time.time()

X_train_pca = pca.transform(X_train)

X_test_pca = pca.transform(X_test)

print("data set to pca done %0.3fs" % (time.time() - t0))

提取特征点

# 从人脸中提取特征点

eigenfaces = pca.components_.reshape((n_components, h, w))

构造 SVM 分类器

from sklearn import svm

from sklearn.model_selection import GridSearchCV

# 构造分类器

t0 = time.time()

param_grid = {

"C": [1e3, 5e3, 1e4, 1e5],

"gamma": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]

}

clf = GridSearchCV(svm.SVC(kernel='rbf', class_weight='balanced'), param_grid=param_grid)

clf.fit(X_train_pca, Y_train)

print("fit done %0.3fs" % (time.time() - t0))

print(clf.best_estimator_)

预测

from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

# 预测

t0 = time.time()

y_pred = clf.predict(X_test_pca)

print(classification_report(Y_test, y_pred, target_names=target_names))

print(confusion_matrix(Y_test, y_pred, labels=range(n_classes)))

precision recall f1-score support

Ariel Sharon 0.78 0.70 0.74 20

Colin Powell 0.79 0.84 0.82 76

Donald Rumsfeld 0.81 0.71 0.76 31

George W Bush 0.85 0.91 0.88 125

Gerhard Schroeder 0.78 0.75 0.77 24

Hugo Chavez 0.94 0.83 0.88 18

Tony Blair 0.91 0.75 0.82 28

avg / total 0.83 0.83 0.83 322

[[ 14 3 0 2 1 0 0]

[ 2 64 1 9 0 0 0]

[ 2 2 22 5 0 0 0]

[ 0 8 3 114 0 0 0]

[ 0 1 1 2 18 1 1]

[ 0 1 0 0 1 15 1]

[ 0 2 0 2 3 0 21]]

测试结果可视化

import matplotlib.pyplot as plt

def plot_gallery(images, titles, h, w, n_row=3, n_col=4):

"""Helper function to plot a gallery of portraits"""

plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))

plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)

for i in range(n_row * n_col):

plt.subplot(n_row, n_col, i + 1)

plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)

plt.title(titles[i], size=12)

plt.xticks(())

plt.yticks(())

def title(y_pred, y_test, target_names, i):

pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]

true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]

return 'predicted: %s\ntrue: %s' % (pred_name, true_name)

prediction_titles = [title(y_pred, Y_test, target_names, i)

for i in range(y_pred.shape[0])]

plot_gallery(X_test, prediction_titles, h, w)

# plot the gallery of the most significative eigenfaces

eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]

plot_gallery(eigenfaces, eigenface_titles, h, w)

plt.show()

总结

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值