使用SVM对手写数字的识别

今晚加班到快通宵了,困得快睁不开眼了,女上司很关心,问我要不要吃宵夜。我没好气地说,宵夜就算了,能让我睡一觉就行了。女上司红着脸说了句讨厌啊,然后坐在我身边不动,好像距离我很近,搞得我很紧张,难道她发现我的程序出了bug?
在这里插入图片描述

import matplotlib as mpl
import matplotlib.pyplot as plt

from sklearn import datasets, svm, metrics
## 设置属性防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
## 加载数字图片数据
digits = datasets.load_digits()
digits


## 设置属性防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
## 设置属性防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
## 加载数字图片数据
digits = datasets.load_digits()
digits
## 加载数字图片数据
digits = datasets.load_digits()
digits

{‘DESCR’: “Optical Recognition of Handwritten Digits Data Set\n===================================================\n\nNotes\n-----\nData Set Characteristics:\n :Number of Instances: 5620\n :Number of Attributes: 64\n :Attribute Information: 8x8 image of integer pixels in the range 0…16.\n :Missing Attribute Values: None\n :Creator: E. Alpaydin (alpaydin ‘@’ boun.edu.tr)\n :Date: July; 1998\n\nThis is a copy of the test set of the UCI ML hand-written digits datasets\nhttp://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\nThe data set contains images of hand-written digits: 10 classes where\neach class refers to a digit.\n\nPreprocessing programs made available by NIST were used to extract\nnormalized bitmaps of handwritten digits from a preprinted form. From a\ntotal of 43 people, 30 contributed to the training set and different 13\nto the test set. 32x32 bitmaps are divided into nonoverlapping blocks of\n4x4 and the number of on pixels are counted in each block. This generates\nan input matrix of 8x8 where each element is an integer in the range\n0…16. This reduces dimensionality and gives invariance to small\ndistortions.\n\nFor info on NIST preprocessing routines, see M. D. Garris, J. L. Blue, G.\nT. Candela, D. L. Dimmick, J. Geist, P. J. Grother, S. A. Janet, and C.\nL. Wilson, NIST Form-Based Handprint Recognition System, NISTIR 5469,\n1994.\n\nReferences\n----------\n - C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their\n Applications to Handwritten Digit Recognition, MSc Thesis, Institute of\n Graduate Studies in Science and Engineering, Bogazici University.\n - E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika.\n - Ken Tang and Ponnuthurai N. Suganthan and Xi Yao and A. Kai Qin.\n Linear dimensionalityreduction using relevance weighted LDA. School of\n Electrical and Electronic Engineering Nanyang Technological University.\n 2005.\n - Claudio Gentile. A New Approximate Maximal Margin Classification\n Algorithm. NIPS. 2000.\n”,
‘data’: array([[ 0., 0., 5., …, 0., 0., 0.],
[ 0., 0., 0., …, 10., 0., 0.],
[ 0., 0., 0., …, 16., 9., 0.],
…,
[ 0., 0., 1., …, 6., 0., 0.],
[ 0., 0., 2., …, 12., 0., 0.],
[ 0., 0., 10., …, 12., 1., 0.]]),
‘images’: array([[[ 0., 0., 5., …, 1., 0., 0.],
[ 0., 0., 13., …, 15., 5., 0.],
[ 0., 3., 15., …, 11., 8., 0.],
…,
[ 0., 4., 11., …, 12., 7., 0.],
[ 0., 2., 14., …, 12., 0., 0.],
[ 0., 0., 6., …, 0., 0., 0.]],

    [[ 0.,  0.,  0., ...,  5.,  0.,  0.],
     [ 0.,  0.,  0., ...,  9.,  0.,  0.],
     [ 0.,  0.,  3., ...,  6.,  0.,  0.],
     ...,
     [ 0.,  0.,  1., ...,  6.,  0.,  0.],
     [ 0.,  0.,  1., ...,  6.,  0.,  0.],
     [ 0.,  0.,  0., ..., 10.,  0.,  0.]],

    [[ 0.,  0.,  0., ..., 12.,  0.,  0.],
     [ 0.,  0.,  3., ..., 14.,  0.,  0.],
     [ 0.,  0.,  8., ..., 16.,  0.,  0.],
     ...,
     [ 0.,  9., 16., ...,  0.,  0.,  0.],
     [ 0.,  3., 13., ..., 11.,  5.,  0.],
     [ 0.,  0.,  0., ..., 16.,  9.,  0.]],

    ...,

    [[ 0.,  0.,  1., ...,  1.,  0.,  0.],
     [ 0.,  0., 13., ...,  2.,  1.,  0.],
     [ 0.,  0., 16., ..., 16.,  5.,  0.],
     ...,
     [ 0.,  0., 16., ..., 15.,  0.,  0.],
     [ 0.,  0., 15., ..., 16.,  0.,  0.],
     [ 0.,  0.,  2., ...,  6.,  0.,  0.]],

    [[ 0.,  0.,  2., ...,  0.,  0.,  0.],
     [ 0.,  0., 14., ..., 15.,  1.,  0.],
     [ 0.,  4., 16., ..., 16.,  7.,  0.],
     ...,
     [ 0.,  0.,  0., ..., 16.,  2.,  0.],
     [ 0.,  0.,  4., ..., 16.,  2.,  0.],
     [ 0.,  0.,  5., ..., 12.,  0.,  0.]],

    [[ 0.,  0., 10., ...,  1.,  0.,  0.],
     [ 0.,  2., 16., ...,  1.,  0.,  0.],
     [ 0.,  0., 15., ..., 15.,  0.,  0.],
     ...,
     [ 0.,  4., 16., ..., 16.,  6.,  0.],
     [ 0.,  8., 16., ..., 16.,  8.,  0.],
     [ 0.,  1.,  8., ..., 12.,  1.,  0.]]]),

‘target’: array([0, 1, 2, …, 8, 9, 8]),
‘target_names’: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])}

## 获取样本数量,并将图片数据格式化(要求所有图片的大小、像素点都是一致的 => 转换成为的向量大小是一致的)
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
data.shape

(1797, 64)

## 模型构建
classifier = svm.SVC(gamma=0.001)#默认是rbf
# from sklearn.neighbors import KNeighborsClassifier
# classifier = KNeighborsClassifier(n_neighbors=9, weights='distance')
## 使用二分之一的数据进行模型训练
##取前一半数据训练,后一半数据测试
classifier.fit(data[:int(n_samples / 2)], digits.target[:int(n_samples / 2)])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3, gamma=0.001, kernel=‘rbf’,
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)

## 测试数据部分实际值和预测值获取
##后一半数据作为测试集
expected = digits.target[int(n_samples/2):] ##y_test
predicted = classifier.predict(data[int(n_samples / 2):])##y_predicted
## 计算准确率
print("分类器%s的分类效果:\n%s\n"
      % (classifier, metrics.classification_report(expected, predicted)))
##生成一个分类报告classification_report
print("混淆矩阵为:\n%s" % metrics.confusion_matrix(expected, predicted))
##生成混淆矩阵
print("score_svm:\n%f" %classifier.score(data[int(n_samples / 2):], digits.target[int(n_samples / 2):]))

分类器SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3, gamma=0.001, kernel=‘rbf’,
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)的分类效果:
precision recall f1-score support

      0       1.00      0.99      0.99        88
      1       0.99      0.97      0.98        91
      2       0.99      0.99      0.99        86
      3       0.98      0.87      0.92        91
      4       0.99      0.96      0.97        92
      5       0.95      0.97      0.96        91
      6       0.99      0.99      0.99        91
      7       0.96      0.99      0.97        89
      8       0.94      1.00      0.97        88
      9       0.93      0.98      0.95        92

avg / total 0.97 0.97 0.97 899

混淆矩阵为:
[[87 0 0 0 1 0 0 0 0 0]
[ 0 88 1 0 0 0 0 0 1 1]
[ 0 0 85 1 0 0 0 0 0 0]
[ 0 0 0 79 0 3 0 4 5 0]
[ 0 0 0 0 88 0 0 0 0 4]
[ 0 0 0 0 0 88 1 0 0 2]
[ 0 1 0 0 0 0 90 0 0 0]
[ 0 0 0 0 0 1 0 88 0 0]
[ 0 0 0 0 0 0 0 0 88 0]
[ 0 0 0 1 0 1 0 0 0 90]]
score_svm:
0.968854

## 进行图片展示
plt.figure(facecolor='gray', figsize=(12,5))
## 先画出5个预测失败的
##把预测错的值的 x值 y值 和y的预测值取出
images_and_predictions = list(zip(digits.images[int(n_samples / 2):][expected != predicted], expected[expected != predicted], predicted[expected != predicted]))
##通过enumerate,分别拿出x值 y值 和y的预测值的前五个,并画图
for index,(image,expection, prediction) in enumerate(images_and_predictions[:5]):
    plt.subplot(2, 5, index + 1)
    plt.axis('off')                          
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')#把cmap中的灰度值与image矩阵对应,并填充
    plt.title(u'预测值/实际值:%i/%i' % (prediction, expection))
## 再画出5个预测成功的
images_and_predictions = list(zip(digits.images[int(n_samples / 2):][expected == predicted], expected[expected == predicted], predicted[expected == predicted]))
for index, (image,expection, prediction) in enumerate(images_and_predictions[:5]):
    plt.subplot(2, 5, index + 6)
#     plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title(u'预测值/实际值:%i/%i' % (prediction, expection))

plt.subplots_adjust(.04, .02, .97, .94, .09, .2)
plt.show()

在这里插入图片描述

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Mr Robot

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值