kaggle-图像识别-getstart-knn

这篇博客介绍了如何使用KNN算法进行Kaggle上的图像识别任务。首先,博主导入了必要的库,然后加载并处理了训练集和测试集数据,将像素值归一化。接着,定义了一个classify0函数来执行KNN分类,并实现了保存预测结果到CSV文件的功能。最后,博主通过调用handwritingClassTest函数对一小部分数据进行了测试。
摘要由CSDN通过智能技术生成
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import matplotlib.pyplot as plt
import operator
from os import listdir
from csv import *
import csv


#for i in range(32):
#    print(i)           输出0-31
#for i in range(2,30,1):
#   print(i)            输出2-29
#读取文件
img_train = pd.read_csv('train.csv')
img_test = pd.read_csv('test.csv')
img_train = np.array(img_train)
img_test = np.array(img_test)

#初始化
"""
trainLabel = np.zeros(img_train.shape[0])
trainSet = np.zeros((img_train.shape[0], 28, 28, 1))
testSet = np.zeros((img_test.shape[0], 28, 28, 1))
testLabel = np.zeros(img_test.shape[0])
"""

trainLabel = np.zeros(img_train.shape[0])
trainSet = np.zeros((img_train.shape[0], 784))
testSet = np.zeros((img_test.shape[0], 784))
testLabel = np.zeros(img_test.shape[0])
#结果输出1个整体,共有42000个块,每块为28*28
#print(img_train.shape[0]) 输出42000 无标题头
#print(img_train.shape[1]) 输出785 有标题头
"""
for i in range(img_train.shape[0]):  #文件数据格式化
    trainSet[i] = img_train[i][1:].reshape(28, 28, 1)
    trainLabel[i] = img_train[i][0].astype(int)
for i in range(img_test.shape[0]):
    testSet[i] = img_test[i].reshape(28, 28, 1)
"""
for i in range(img_train.shape[0]):  #文件数据格式化
    trainSet[i] = img_train[i][1:]
    trainLabel[i] = img_train[i][0]
for i in range(img_test.shape[0]):
    testSet[i] = img_test[i]


trainSet /= 255  #归一化
testSet /= 255
#trainLabel = np.array(list(map(int, trainLabel)))

def classify0(inX, dataSet, labels, k):#被判断的数据集,训练集,训练集标签,k个近
    print("jsh")
    dataSetSize = dataSet.shape[0]#被训练的列数
    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet#被训练函数以行单位复制n份
    sqDiffMat = diffMat**2 #返回幂函数
    sqDistances = sqDiffMat.sum(axis=1)# axis=1 向量相加 axis=0 普通相加
    distances = sqDistances**0.5
    sortedDistIndicies = distances.argsort()#从小到大,给出数值的索引,距离最小
    classCount={}
    for i in range(k):
        voteIlabel = labels[sortedDistIndicies[i]]  #前几个的label
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 #返回指定键的值,如果值不在字典中返回0。
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    # 根据iteritems 的第1个元素排序 即字典中的val ,第0个 是key  #reverse定义为True时将按降序排列
    return sortedClassCount[0][0]

def saveResult(result):
    with open('answer.csv','w',newline='') as myFile:
        result = list(map(lambda x: [x],result))
        myWriter=csv.writer(myFile)
        if (result):
            myWriter.writerows(result)

def handwritingClassTest(testSet,trainSet,trainLabel):
#   trainData,trainLabel=loadTrainData()    输入训练集 结果集
#  testData=loadTestData()
#    testLabel=loadTestResult()
    trainData=trainSet
    testData=testSet
    trainLabel=trainLabel
    [m,n]=np.shape(testData)
    errorCount=0
    resultList=[]
    for i in range(m):
         classifierResult = classify0(testData[i], trainData, trainLabel, 5)
         resultList.append(classifierResult)
         print(classifierResult)
        # if (classifierResult != testLabel[0,i]): errorCount += 1.0#测试数据集
    #print "\nthe total number of errors is: %d" % errorCount
    #print "\nthe total error rate is: %f" % (errorCount/float(m))
    saveResult(resultList)

#handwritingClassTest(testSet, trainSet[0:20000], trainLabel[0:20000], 5)
#测试目标1000 5000 10000 20000
handwritingClassTest(testSet, trainSet[0:10], trainLabel[0:10])


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值