KNN实现手写数字(自己的)识别

KNN实现手写数字(自己的)识别

项目文件

在这里插入图片描述

代码展示

import numpy as npy
import os
from collections import  Counter
import operator as opt
from matplotlib import pyplot as plt

#欧氏距离
def knn(k,test_data,train_data,labels): #传统蛮力求解KNN法,计算欧式距离
    traindatasize=train_data.shape[0]
    dif=npy.tile(test_data,(traindatasize,1))-train_data#tile的作用,在列方向复制test_data,train_data_size次,此处是测试数据与每个现有的数据相减
    sqdif=dif**2 # 求平方
    sumsqdif=sqdif.sum(axis=1)
    distance=sumsqdif**0.5#开方,欧氏距离
    sortdistance=distance.argsort()
    count={}
    for i in range(0,k):
        vote=labels[sortdistance[i]]
        count[vote]=count.get(vote,0)+1
    sortcount=sorted(count.items(),key=opt.itemgetter(1),reverse=True)#排序按照最大投票的数量排序
    return sortcount[0][0]#返回投票最多的标签


#曼哈顿距离
def Knn_MANHD(k, test_data, train_data, labels): #传统蛮力求解KNN法,计算曼哈顿距离
    train_data_size = train_data.shape[0]
    dif = abs(npy.tile(test_data, (train_data_size, 1)) - train_data)#tile的作用,在列方向复制test_data,train_data_size次,此处是测试数据与每个现有的数据相减
    sum_dif = dif.sum(axis=1)#距离和
    sort_distance = sum_dif.argsort()
    count = {}
    for i in range(k):
        vote = labels[sort_distance[i]]
        count[vote] = count.get(vote, 0) + 1
    sort_count = sorted(count.items(), key=opt.itemgetter(1), reverse=True)
    return sort_count[0][0]

#闵氏距离p=3
def Knn_MD(k, test_data, train_data, labels):  # 传统蛮力求解KNN法,计算欧式距离
    train_data_size = train_data.shape[0]
    dif = abs(npy.tile(test_data,
               (train_data_size, 1)) - train_data)  # tile的作用,在列方向复制test_data,train_data_size次,此处是测试数据与每个现有的数据相减
    sqrt_dif = dif ** 3  # 求3次方
    sum_sqrt_dif = sqrt_dif.sum(axis=1)  # 平方和

    distance = sum_sqrt_dif ** float((1/3)) # 开3方,欧氏距离
    sort_distance = distance.argsort()
    count = {}
    for i in range(k):
        vote = labels[sort_distance[i]]
        count[vote] = count.get(vote, 0) + 1
    # print(count)
    sort_count = sorted(count.items(), key=opt.itemgetter(1), reverse=True)  # 排序按照最大投票的数量排序
    return sort_count[0][0]  # 返回投票最多的标签

#余弦相似度
def cosine_similarity(x, y, norm=False):
    # zero_list = np.zeros((1,x.shape[1]))
    # if x == zero_list or y == zero_list:
    #     return float(1) if x == y else float(0)
    res = npy.array([[x[i] * y[i], x[i] * x[i], y[i] * y[i]] for i in range(len(x))])
    cos = sum(res[:, 0]) / (npy.sqrt(sum(res[:, 1])) * npy.sqrt(sum(res[:, 2])))
    return 0.5 * cos + 0.5 if norm else cos

def Knn_COS(k, test_data, train_data, labels):  # 余弦相似度
    distance=[]
    train_data_size = train_data.shape[0]
    for i in range(train_data_size):
        cos=cosine_similarity(test_data,train_data[i])
        distance.append(cos)
    distance=npy.array(distance)
    sort_distance = distance.argsort()
    count = {}
    for i in range(k):
        vote = labels[sort_distance[i]]
        count[vote] = count.get(vote, 0) + 1
    # print(count)
    sort_count = sorted(count.items(), key=opt.itemgetter(1), reverse=True)  # 排序按照最大投票的数量排序
    return sort_count[0][0]  # 返回投票最多的标签

#权值优化
def ED_weight_optimize(k, test_data, train_data, labels):
    sum = 0
    train_data_size = train_data.shape[0]
    for i in range(k):
        sum += (i+1)
    dif = npy.tile(test_data,
               (train_data_size, 1)) - train_data  # tile的作用,在列方向复制test_data,train_data_size次,此处是测试数据与每个现有的数据相减
    sqrt_dif = dif ** 2  # 求平方
    sum_sqrt_dif = sqrt_dif.sum(axis=1)  # 平方和

    distance = abs(sum_sqrt_dif ** 0.5)  # 开方,欧氏距离
    sort_distance = distance.argsort()
    count = {}
    for i in range(k):
        vote = labels[sort_distance[i]]
        count[vote] = count.get(vote,0.0) + round(float((k-i)/(sum)) ,2)
    # print(count)
    sort_count = sorted(count.items(), key=opt.itemgetter(1), reverse=True)  # 排序按照最大投票的数量排序
    return sort_count[0][0]  # 返回投票最多的标签



def datatoarray(fname): #转化为一维
    arr=[]
    fh=open(fname)
    for i in range(0,32):
        thisline=fh.readline()
        for j in range(0,32):
            arr.append(int(thisline[j]))
    return arr

def seplabel(fname):
    filestr=fname.split(".")[0]
    labels=int(filestr.split("_")[0])
    return labels

def traindata(): #训练集
    labels=[]
    trainfile=os.listdir("./traindata")
    num=len(trainfile)
    #像素32*32=1024
    #创建一个数组存放训练数据,行为文件总数,列为1024,为一个手写体的内容 zeros创建规定大小的数组
    trainarr=npy.zeros((num,1024))
    for i in range(0,num):
        thisfname=trainfile[i]
        thislabel=seplabel(thisfname)
        labels.append(thislabel)
        trainarr[i]=datatoarray("./traindata/"+thisfname)
    return trainarr,labels
def datatest():

    line = npy.zeros(5)
    plt.subplot(111)
    plt.ylim(0.85, 1)
    x = npy.zeros((5,16))
    correct = npy.zeros((5, 16))
    for j in range(1, 6):
        for i in range(1, 17):
            x[j-1][i - 1] = i
    for k in range(0, 5):

        for j in range(1, 17):

            num = 0;
            trainarr, labels = traindata() #进入训练集
            testlist = os.listdir("./testdata")
            tnum = len(testlist)
            for i in range(tnum):
                thisname=testlist[i]
                testarr=datatoarray("./testdata/"+thisname)
                if (k == 0):
                    rknn=knn(k=j,test_data=testarr,train_data=trainarr,labels=labels)

                elif (k == 1):
                    rknn = Knn_MANHD(k=j, test_data=testarr, train_data=trainarr, labels=labels)

                elif (k == 2):
                    rknn = Knn_MD(k=j, test_data=testarr, train_data=trainarr, labels=labels)

                # elif (k == 3):
                #     rknn = Knn_COS(k=j, test_data=testarr, train_data=trainarr, labels=labels)
                #     plt.plot(color='#4B0082')
                elif (k == 4):
                    rknn = ED_weight_optimize(k=j, test_data=testarr, train_data=trainarr, labels=labels)

                thislabel = seplabel(thisname)
                if thislabel == rknn:
                    num += 1
                #print(str(thisname)+"  :  "+str(rknn))
            print(k , "   ", j , "   正确率   ", num,"/",tnum,"   ",num/tnum)
            correct[k][j-1] = num/tnum;

        plt.plot(x[k], correct[k])
    print(x)
    print(correct)
    #ax.legend(('knn', 'Knn_MANHD', ), loc='upper right')
    plt.show()
    # plt.plot(x, correct[0], x, correct[1], x, correct[2], x, correct[3], x, correct[4])
    # plt.legend(('knn', 'Knn_MANHD', 'Knn_MD', 'Knn_COS', 'ED_weight_optimize'), loc='upper right')
    # plt.show()




datatest() #主函数


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值