coding=utf-8
“”"
author:lei
function: 对mnist数据集进行模型训练,预测
“”"
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import random
class Mnist(object):
def init(self, train_path, test_path):
self.train_path = train_path
self.test_path = test_path
def data_reduction(self, data):
pca = PCA(0.85)
data = pca.fit_transform(data)
joblib.dump(pca, "./model/pca.pkl")
return data
def to_plot(self, num):
# 3、查看具体图像
plt.imshow(num)
plt.axis("off") # 不显示轴
plt.show()
# 对第n张测试集图片进行预测
def test(self, n):
pca = joblib.load("./model/pca.pkl")
data = pd.read_csv(self.test_path)
data = data.iloc[n, :].values
# 画图显示图形
self.to_plot(data.reshape(28, 28))
# 将特征值进行降维 将数据转换为二维数据
x = [data / 255]
# 将数据进行pca降维
x_pca = pca.transform(x)
# 将数据传入模型进行预测
svm = joblib.load("./model/mnist_svc.pkl")
pre = svm.predict(x_pca)
print(pre[0])
def train(self):
data = pd.read_csv(self.train_path)
x = data.iloc[:, 1:].values
y = data.iloc[:, 0].values
# 特征值归一化
x = x / 255
# 对图片进行pca降维
pca_x = self.data_reduction(x)
x_train, x_test, y_train, y_test = train_test_split(pca_x, y, test_size=0.2)
svc = SVC()
svc.fit(x_train, y_train)
# 输出准确率 0.9804761904761905
print(svc.score(x_test, y_test))
# 对模型进行保存
joblib.dump(svc, "./model/mnist_svc.pkl")
if name == ‘main’:
mn = Mnist("./data/train.csv", “./data/test.csv”)
# mn.train()
mn.test(random.randint(1, 1000))