KNN算法属于机器学习中的监督学习的分类问题的算法
KNN的原理就是当预测一个新的值x的时候,根据它距离最近的K个点是什么类别来判断x属于哪个类别
步骤:
1.计算已知类别数据集的点与当前点之间的距离
2.按照距离递增排序
3.选取与当前点距离最小的k个点
4.确定前K个点所在类别出现频率
5.返回出现最多的为当前类别
from numpy import *
import math
def get_dataset():
group = array([[1, 1], [0, 0], [5, 5], [4, 10]])
label = ['A', 'B', 'C', 'D']
return group, label
def get_class(group, label, data, k):
list = group
dis = []
index = []
for item in group:
a = array(item) - array(data)
dis.append(math.hypot(a[0], a[1]))
dis = array(dis)
dis1 = dis.argsort() # 按递减排序得到对应索引
for i in range(k):
index.append(dis1[i])
count_a = 0
count_b = 0
count_c = 0
count_d = 0
for item in index:
if label[item] == 'A':
count_a += 1
elif label[item] == 'B':
count_b += 1
elif label[item] == 'C':
count_c += 1
elif label[item] == 'D':
count_d += 1
if count_a == max(count_a, count_b, count_c, count_d):
classname = 'A'
elif count_b == max(count_a, count_b, count_c, count_d):
classname = 'B'
elif count_c == max(count_a, count_b, count_c, count_d):
classname = 'C'
elif count_d == max(count_a, count_b, count_c, count_d):
classname = 'D'
if __name__ == '__main__':
group, label = get_dataset()
print(get_class(group, label, [2, 10], 1))