【MNIST数据集SVM不同核函数分类实验】

一开始,我是从网上下载的MNIST数据集,然后再读取的

代码如下:

from sklearn.svm import SVC
import numpy as np
from time import time
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from struct import unpack
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

def readimage(path):
    with open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img = np.fromfile(f, dtype=np.uint8).reshape(num, 784)
    return img


def readlabel(path):
    with open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.fromfile(f, dtype=np.uint8)
    return lab


def main():
    train_data = readimage("MNIST/train-images.idx3-ubyte")
    train_label = readlabel("MNIST/train-labels.idx1-ubyte")
    test_data = readimage("MNIST/t10k-images.idx3-ubyte")
    test_label = readlabel("MNIST/t10k-labels.idx1-ubyte")
    #print(type(train_data))
    svc = SVC()
    parameters = {'kernel': ['linear'], 'C': [1]}

    print("Train...")
    clf = GridSearchCV(svc, parameters, n_jobs=-1)
    start = time()
    clf.fit(train_data, train_label)
    end = time()
    t = end - start
    print('Train:%dmin%.3fsec' % (t // 60, t - 60 * (t // 60)))
    prediction = clf.predict(test_data)
    print("accuracy: ", accuracy_score(prediction, test_label))
    accurate = [0] * 10
    sumall = [0] * 10
    i = 0
    while i < len(test_label):
        sumall[test_label[i]] += 1
        if prediction[i] == test_label[i]:
            accurate[test_label[i]] += 1
        i += 1
    print("分类正确的:", accurate)
    print("总的测试标签:", sumall)


if __name__ == '__main__':
    main()

 选择不同的核函数就在 parameters字典的kernel项修改即可。

但是!!!我发现 这个训练一直在进行,训练了一个多小时都没结果,cpu疯狂转。。。。。。

后来,改变思路,直接导入sklearn库里自带的MNIST数据集,再用train_test_split分一下数据集和测试集再训练

代码如下:

from sklearn.datasets import load_digits
from sklearn.svm import SVC
import numpy as np
from time import time
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

mnist = load_digits()
x_train,x_test,y_train,y_test = train_test_split(mnist.data,mnist.target,test_size=0.25,random_state=40)

svm1 = SVC(C=1, kernel='linear')#检验不同核函数性能
svm2 = SVC(C=1, kernel='rbf')
svm3 = SVC(C=1, kernel='poly')
svm4 = SVC(C=1, kernel='sigmoid')
print('train......')
t0 = time()
svm1.fit(x_train, y_train)
t1 = time()
svm2.fit(x_train, y_train)
t2 = time()
svm3.fit(x_train, y_train)
t3 = time()
svm4.fit(x_train, y_train)
t4 = time()
print('training done.')
print('testing')
svm1_score1 = accuracy_score(y_train, svm1.predict(x_train))
svm1_score2 = accuracy_score(y_test, svm1.predict(x_test))

svm2_score1 = accuracy_score(y_train, svm2.predict(x_train))
svm2_score2 = accuracy_score(y_test, svm2.predict(x_test))

svm3_score1 = accuracy_score(y_train, svm3.predict(x_train))
svm3_score2 = accuracy_score(y_test, svm3.predict(x_test))

svm4_score1 = accuracy_score(y_train, svm4.predict(x_train))
svm4_score2 = accuracy_score(y_test, svm4.predict(x_test))
print('testing done.')
x_tmp = [0, 1, 2, 3]
t_score = [t1 - t0, t2 - t1, t3 - t2, t4 - t3]
y_score1 = [svm1_score1, svm2_score1, svm3_score1, svm4_score1]
y_score2 = [svm1_score2, svm2_score2, svm3_score2, svm4_score2]

plt.figure(facecolor='w', figsize=(12, 6))
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.subplot(121)
plt.plot(x_tmp, y_score1, 'r-', lw=2, label=u'训练集准确率')
plt.plot(x_tmp, y_score2, 'g-', lw=2, label=u'测试集准确率')
plt.xlim(-0.3, 3.3)
plt.ylim(np.min((np.min(y_score1), np.min(y_score2))) * 0.9,
     np.max((np.max(y_score1), np.max(y_score2))) * 1.1)
plt.legend(loc='lower left')
plt.title(u'模型预测准确率', fontsize=13)
plt.xticks(x_tmp, [u'linear-SVM', u'rbf-SVM', u'poly-SVM', u'sigmoid-SVM'], rotation=0)
plt.grid(True)
plt.subplot(122)
plt.plot(x_tmp, t_score, 'b-', lw=2, label=u'模型训练时间')
plt.title(u'模型训练耗时', fontsize=13)
plt.xticks(x_tmp, [u'linear-SVM', u'rbf-SVM', u'poly-SVM', u'sigmoid-SVM'], rotation=0)
plt.xlim(-0.3, 3.3)
plt.grid(True)
plt.suptitle(u'MNIST数据SVM分类器不同内核函数模型比较', fontsize=16)
plt.show()

这时,再训练,巨快!!!,用不了几秒,结果就出来了,我也解释不太清其中的原因,反正这样做了之后就快了。

 最后我想说:6

  • 1
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值