机器学习之KNN交叉验证实例

最新推荐文章于 2024-11-06 21:12:53 发布

qq_38404903

最新推荐文章于 2024-11-06 21:12:53 发布

阅读量90

点赞数

文章标签：机器学习人工智能

本文链接：https://blog.csdn.net/qq_38404903/article/details/133973987

版权

实例之数字识别

import matplotlib.pyplot as plt
import scipy.ndimage as ndimage
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
# 1、先将图片读入
# img_arr=plt.imread('C:/Users/lenovo/Desktop/python_use/digist/3/3_10.bmp')
# print(img_arr.shape) #表示3的第十张照片
# plt.imshow(img_arr)
# plt.show()
feature = [] #用来存储5000张图片对应的numpy数组，存每张照片的特征
target = [] #用来存储5000张图片的标签也就是代表的数字数
#由于每种类型的图片单独保存在同一个文件夹，所以需要对文件夹进行单独读取，双重循环
for i in range(10): # i表示外层文件夹的名称
    for j in range(1,501): # j表示内层文件夹的名称的一部分
        # 拼接好每一张图片的地址
        img_path = 'C:/Users/lenovo/Desktop/python_use/digist/'+str(i)+'/'+str(i)+'_'+str(j)+'.bmp'
        img_arr = plt.imread(img_path)
        feature.append(img_arr)
        target.append(i)
# print(feature)
# 切记现在的feature是列表，target也是列表，需要转成数组
# print(target)
feature = np.array(feature)
target = np.array(target)
# 需要查看数组的维度，因为训练模型用的是二维的
print(feature.shape)
print(target.shape)
# 将特征维度为3的转换成维度为2的数组
feature = feature.reshape((5000,28*28)) # 5000那一列不动，28*28设为一维
print(feature.shape)
# 考虑到数据是图片，没办法进行量化特征，直接用原始数据
x_train,x_test,y_train,y_test = train_test_split(feature,target,train_size=0.8,random_state=2525)
# 根据交叉验证和学习曲线找寻模型最优超参数
ks = np.arange(1,100,3)
scores = []
for ks_i in ks:
    knn = KNeighborsClassifier(ks_i)
    score = cross_val_score(knn,x_train,y_train,cv=5).mean()
    scores.append(score)
scores = np.array(scores)
best_k = ks[scores.argmax()]
print('模型的最优参数：'+str(best_k))
# plt.plot(ks,scores)
# plt.xlabel('k')
# plt.ylabel('score')
# plt.show()

# # 找出最优参数，带入模型中
# knn = KNeighborsClassifier(n_neighbors=1)
# knn.fit(x_train,y_train)
# print('模型识别的结果:', knn.predict(x_test))
# print('真实的结果：', y_test)
test_n = plt.imread('C:/Users/lenovo/Desktop/python_use/123.jpg')
# plt.imshow(test_n)
# plt.show()
print(test_n.shape)
zero_test = test_n[5:140, 180:290]
eight_test = test_n[450:580, 180:290]
# plt.imshow(zero_test)
# plt.show()
# plt.imshow(eight_test)
# plt.show()
# 查看此时的要测试的图片是声明样式的与原来的样式不一样
print(zero_test.shape,eight_test.shape)
# 对推按压缩需要用到的包是:import scipy.ndimage as ndimage
# 调用ndimage里的zoom，需要放入两个参数，1、你要压缩的图片数组，2、你图片的大小/压缩后的大小。比如从（58，50）压缩到(5,5),输入(5/58,5/58)
zero_test_zoom = ndimage.zoom(zero_test,zoom=(28/135,28/110))
eight_test_zoom = ndimage.zoom(eight_test,zoom=(28/130,28/110))
print(zero_test_zoom.shape,eight_test_zoom.shape)
# plt.imshow(X=zero_test_zoom)
# plt.show()
# plt.imshow(X=eight_test_zoom)
# plt.show()
knn_test = KNeighborsClassifier(n_neighbors=best_k)
knn_test.fit(X=x_test,y=y_test)
n1 = knn_test.predict(zero_test_zoom.reshape(1,784))
print(n1)
n2 = knn_test.predict(eight_test_zoom.reshape(1,784))
print(n1,n2)

结果是：

(5000, 28, 28)
(5000,)
(5000, 784)
模型的最优参数：1
(604, 500)
(135, 110) (130, 110)
(28, 28) (28, 28)
[0]
[0] [8]

Process finished with exit code 0

qq_38404903

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫