coding=utf-8
“”"
author:lei
function: 使用svm识别mnist
“”"
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
def to_plot(n):
# 3、查看具体图像
num = train_image.iloc[n, :].values.reshape(28, 28)
plt.imshow(num)
plt.axis("off") # 不显示轴
plt.show()
特征降维和模型训练
def n_componets_analysis(n, x_train, y_train, x_test, y_test):
# 记录开始时间
start = time.time()
# pca 降维实现
pca = PCA(n_components=n)
print("特征降维,传递的参数为:{}".format(n))
pca.fit(x_train)
# 在训练集和测试集进行降维
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)
# 利用svc 进行训练
print("开始使用svc进行训练")
ss = svm.SVC()
ss.fit(x_train_pca, y_train)
# 获取accuracy结果
accuracy = ss.score(x_test_pca, y_test)
# 记录结束时间
end = time.time()
print("准确率是:{},消耗时间是:{}".format(accuracy, int(end-start)))
return accuracy
if name == ‘main’:
# 1、获取数据
train = pd.read_csv("./data/train.csv")
# print(train.shape) # (42000, 785)
# 2、确定特征值和目标值
# 图片为特征值
train_image = train.iloc[:, 1:]
# print(train_image.head())
# label为目标值
train_label = train.iloc[:, 0]
# 画图
# to_plot(0)
# to_plot(100)
# to_plot(40)
# 3、数据归一化处理
# 对数据特征值进行归一化处理
train_image = train_image.values / 255
train_label = train_label.values
# 4、数据集分割
x_train, x_test, y_train, y_test = train_test_split(train_image, train_label, train_size=0.8, random_state=0)
# (33600, 784) (8400, 784)
# print(x_train.shape, x_test.shape)
# 5、特征降维和模型训练
# 多次使用pca,确定最后的最优模型
# 传递多个n_components,寻找合理的n_components
n_s = np.linspace(0.7, 0.85, num=5)
accuracy = []
# for n in n_s:
# tmp = n_componets_analysis(n, x_train, y_train, x_test, y_test)
# accuracy.append(tmp)
# # 准确率可视化展示
# plt.plot(n_s, np.array(accuracy), "r")
# plt.show()
# 经过图形展示,选择合理的n_components,最后综合考虑确定结果为0.8
# 确定最优模型
pca = PCA(n_components=0.8)
pca.fit(x_train)
# 输出最后的列数 43列
print(pca.n_components)
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)
# (33600, 43) (8400, 43)
print(x_train_pca.shape, x_test_pca.shape)
# 训练比较优的模型,计算accuracy
ss1 = svm.SVC()
ss1.fit(x_train_pca, y_train)
# 得到最后结果 0.979047619047619
print(ss1.score(x_test_pca, y_test))