SVM实现数字识别器
使用SVM的api实现识别出图片中所写的数字。
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
#获取数据
train = pd.read_csv("./number.csv")
#确定特征值\目标值
train_image = train.iloc[:,1:]
train_label = train.iloc[:,0]
#查看具体图形
def to_plot(n):
num = train_image.iloc[0,].values.reshape(28,28)
plt.imshow(num)
plt.axis("off")
plt.show()
to_plot(n=40)
#数据基本处理
#数据归一化处理
train_image = train_image.values / 255
train_label = train_label.values
#数据分割
x_train,x_val,y_train,y_val = train_test_split(train_image,train_label,train_size=0.8,random_state=0)
#特征降维和模型训练
#多次使用pca确定最后的最优模型
def n_components_analysis(n,x_train,y_train,x_val,y_val):
#记录开始时间
start = time.time()
#pca降维
pca = PCA(n_components=n)
print("特征降维,传递的参数为:{}".format(n))
pca.fit(x_train)
#在训练集和测试集进行降维
x_train_pca = pca.transform(x_train)
x_val_pca = pca.transform(x_val)
#利用svc进行训练
print("开始训练")
ss = svm.SVC()
ss.fit(x_train_pca,y_train)
#获取accuracy结果
accuracy = ss.score(x_val_pca,y_val)
#记录结束时间
end = time.time()
print("准确率是:{},消耗时间是:{}s".format(accuracy,int(end-start)))
return accuracy
#传递多个n_components,寻找合理的n_components
n_s = np.linspace(0.70,0.85,num=5)
accuracy = []
for n in n_s:
tmp = n_components_analysis(n,x_train,y_train,x_val,y_val)
accuracy.append(tmp)
#准确率可视化
plt.plot(n_s,np.array(accuracy),"r")
plt.show()
#确定最优模型
pca = PCA(n_components=0.8)
pca.fit(x_train)
print(pca.n_components_)
x_train_pca = pca.transform(x_train)
x_val_pca = pca.transform(x_val)
print(x_train_pca.shape,x_val_pca.shape)
#训练比较优的模型,计算accuracy
ss1 = svm.SVC()
ss1.fit(x_train_pca,y_train)
print(ss1.score(x_val_pca,y_val))