基于knn算法识别手写数字

最新推荐文章于 2024-06-20 21:22:09 发布

骆清波

最新推荐文章于 2024-06-20 21:22:09 发布

阅读量433

点赞数

本文链接：https://blog.csdn.net/qq_41710056/article/details/80294501

版权

#coding:gb2312
import numpy as np
from sklearn import neighbors
import csv


def showimg(line):
    #创建一个一维数组
    returnVect = np.zeros((1, 784))
    list=line.split(',')
    num=list[0]
    list.pop(0)
    new_image = np.zeros((28, 28))
    count=0
    for i in range(28):
        for j in range(28):
            returnVect[0, i * 28 + j] = int(list[i * 28 + j])
            count=count+1
    return num,returnVect

def showtestimg(line):
    #创建一个一维数组
    returnVect = np.zeros((1, 784))
    list=line.split(',')
    new_image = np.zeros((28, 28))
    count=0
    for i in range(28):
        for j in range(28):
            returnVect[0, i * 28 + j] = int(list[i * 28 + j])
            count = count + 1
    return returnVect

def readtest():
    testingMat = np.zeros((28000, 784))
    filePath = 'D:\\data\\test.csv'
    df = open(filePath)
    lines = df.readlines()
    count = 0;
    for line in lines:
        count = count + 1
        if (count != 1):
            print("第" + str(count) + "行")
            vect = showtestimg(line)
            testingMat[count - 2, :] = vect
    return testingMat

if __name__ == "__main__":
    knn = neighbors.KNeighborsClassifier()
    #初始化样本数据矩阵（M*784）
    trainingMat = np.zeros((42000, 784))
    filePath = 'D:\\data\\train.csv'
    df = open(filePath)
    lines = df.readlines()
    count=0;
    label=[]
    for line in lines:
        count=count+1
        if(count!=1):
            print("第"+str(count)+"行")
            num,vect=showimg(line)
            label.append(num)
            trainingMat[count-2, :]=vect
    print("开始训练模型")
    knn.fit(trainingMat, label)
    print("读入测试数据")
    testingMat=readtest()
    print("开始预测")
    results = knn.predict(testingMat)
    print(len(results))
    print("写入文件")
    with open("D:/data/result.csv", "w",newline='') as csvfile:
        writer = csv.writer(csvfile)
        # 先写入columns_name
        writer.writerow(["ImageId", "Label"])
        for i in range(len(results)):
            writer.writerow([i+1, results[i]])