# 构建参考数据集
movie_data = {"宝贝当家": [45, 2, 9, "喜剧片"],
"美人鱼": [21, 17, 5, "喜剧片"],
"澳门风云3": [54, 9, 11, "喜剧片"],
"功夫熊猫3": [39, 0, 31, "喜剧片"],
"谍影重重": [5, 2, 57, "动作片"],
"叶问3": [3, 2, 65, "动作片"],
"伦敦陷落": [2, 3, 55, "动作片"],
"我的特工爷爷": [6, 4, 21, "动作片"],
"奔爱": [7, 46, 4, "爱情片"],
"夜孔雀": [9, 39, 8, "爱情片"],
"代理情人": [9, 38, 2, "爱情片"],
"新步步惊心": [8, 34, 17, "爱情片"]}
# 类似结构的数据
goo = {'哔哩哔哩':[21,4,17,'type']}
from numpy import array as ar
#获取数据
def get_type(kw):
goal_ = ar(goo[kw][:-1])
dis_dict = {}
for movie,msg in movie_data.items():
distance = (((ar(msg[:-1])-goal_)**2).sum())**0.5 # 欧氏距离
dis_dict[movie] = [distance,msg[-1]]
order_movie5 = sorted(dis_dict.items(),key=lambda item:item[1][0])[:5] # 前5个,一般取奇数个尽量避免并列
res = {}
for v in order_movie5:
res [v[-1][-1]] = 1 if v [-1][-1] not in res else res [v[-1][-1]] + 1
movie_type = max ( res.items (), key=lambda item: item [1] )[0]
print ( movie_type )
get_type('哔哩哔哩')
KNN算法实现
最新推荐文章于 2024-04-25 22:35:04 发布