实验目的
使用Python实现K近邻
实验原理
(1)计算测试与各个训练数据之间的距离;
(2)按照距离的递增关系进行排序;
(3)选取距离最小的K个点;
(4)确定前K个点所在类别出现的概率;
(5)返回前K个点中出现概率最高的类别作为测试数据的预测分类。
(1) 鸢尾花识别。
import numpy as np
import pandas as pd
def getdata(path):
data=pd.read_csv(path,encoding='gbk')
return np.array(data)
def getlength(data,target,k):
temp=[]
for i in data:
temp.append(np.sum((i[:-1]-target)**2))
#print(temp)
#float ('inf')
indexList=[]
for i in range(k):
indexList.append(np.argmin(temp))
#temp[np.array(temp)]=('inf')
temp[np.argmin(temp)] = np.max(temp)
return indexList
def KNNs(data,indexList):
dic={0:0,1:0,2:0}
for i in indexList:
dic[data[i][-1]]+=1
Maxnum=0
Maxindex=0
for i in dic:
if dic[i]>Maxnum:
Maxnum=dic[i]
Maxindex=i
#print(dic)
return Maxindex
if __name__=='__main__':
dic = {0: 'setosa', 1: 'versicolor', 2: 'virginnica'}
path='iris_training.csv'
data= getdata(path)
#print(data)
target=np.array([5,3,1,0.5])
try:
while True:
k = (input("请输入k(退出请输入quit):"))
if k=='quit':
print("退出成功")
break
k=int(k)
#getlength(data,target,k)
indexList=getlength(data,target,k)
n=KNNs(data,indexList)
print(dic[n])
except Exception as e:
print("输入出错,请重新输入")