from collections import Counter
import numpy as np
knownData = ((1, 180, 85),
(1, 180, 86),
(1, 180, 90),
(1, 180, 100),
(1, 185, 120),
(1, 175, 80),
(1, 175, 60),
(1, 170, 60),
(1, 175, 90),
(1, 175, 100),
(1, 185, 90),
(1, 185, 80))
knownTarget = (‘稍胖’, ‘稍胖’, ‘稍胖’, ‘过胖’,
‘太胖’, ‘正常’, ‘偏瘦’, ‘正常’,
‘过胖’, ‘太胖’, ‘正常’, ‘偏瘦’)
def KNNPredict(current, knownData=knownData, knownTarget=knownTarget, k=3):
# current为未知样本,格式为(性别,身高,体重)
data = dict(zip(knownData, knownTarget))
# 如果未知样本与某个已知样本精确匹配,直接返回结果
if current in data.keys():
return data[current]
# 按性别过滤,只考虑current性别一样的样本数据
g = lambda item:item[0][0]==current[0]
samples = list(filter(g, data.items()))
g = lambda item:((item[0][1]-current[1])**2+
(item[0][2]-current[2])**2)**0.5
distances = sorted(samples, key=g)
# 选取距离最小的前k个
distances = (item[1] for item in distances[:k])
# 计算选取的k个样本所属类别的出现频率
# 选择频率最高的类别作为结果
return Counter(distances).most_common(1)[0][0]
unKnownData = [(1, 180, 70), (1, 160, 90), (1, 170, 85)]
for current in unKnownData:
print(current, ‘:’, KNNPredict(current))