需求:根据表格左侧的分类,将唐人街探案进行分类
# 分类算法 KNN
# 1.计算每个样本和我新样本的相似度是多高?
# 2.排序
# 3.选取K值
import pandas as pd
import numpy as np
movies = pd.read_excel("./电影分类数据.xlsx")
print(movies.columns)
list1 = np.sqrt((movies["搞笑镜头"] - movies.columns[7])**2 + (movies["拥抱镜头"] - movies.columns[8])**2 + (movies["打斗镜头"] - movies.columns[9])**2)
print(list1)
# 增加一列
movies["相似度"] = list1
print(movies.columns)
print(movies.loc[:, ["电影类型", "相似度"]].sort_values(by="相似度", ascending=True).head())
print("这部电影的类型是: ", movies.loc[:, ["电影类型", "相似度"]].sort_values(by="相似度", ascending=True).head()["电影类型"].astype("category").describe()['top'])