import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
class KNN:
def __init__(self,k,dataSet):
self.k = k
self.dataSet = dataSet
self.length = len(dataSet)
#计算当前数据到dataSet中每个数据的欧式距离
def distance(self,data):
dis = np.zeros(self.length)
for i in range(self.length):
dis[i]= math.sqrt(np.sum(np.power((data-self.dataSet[i,:-1]),2)))
return dis
#newData,在原有数据集上对此数据集进行KNN分类
def knn(self,newData):
lables = np.zeros(len(newData[:]))
#对于newData数据集中的每个数据计算到原有数据集上的欧式距离
for i in range(len(newData[:])):
dis = self.distance(newData[i])
sort_index = np.argsort(dis) #将dis从小到大排序,这里返回排序的下表,并不改变dis原先的顺序
#统计k个最相近数据的类别,即为当前数据的类别
lable = np.zeros(self.k)
for j in range(self.k):
lable[int(self.dataSet[sort_index[j],-1])] +=1
lables[i] = np.argmax(lable)
return lables
def main():
li = load_iris()
train_data,test_data,train_target,test_target = train_test_split(li.data, li.target, test_size = 0.25)
train = np.c_[train_data,train_target]
knn = KNN(3,train)
print(knn.knn(test_data))
print(test_target)
main()
'''
[2. 0. 2. 2. 1. 0. 2. 1. 1. 1. 2. 1. 2. 0. 2. 1. 1. 2. 2. 2. 0. 1. 2. 0.
2. 1. 1. 2. 0. 1. 2. 2. 1. 1. 2. 2. 0. 1.]
[2 0 2 2 1 0 2 2 1 1 2 1 2 0 1 1 1 2 2 2 0 1 2 0 2 1 1 2 0 1 2 2 1 1 2 2 0
1]
'''