import numpy as np
import pandas as pd
class KNN(object):
def __init__(self,path,testdata,k):
self.path=path
self.testData=testdata
self.k=k
def readData(self):
#读取数据
try:
self.data = pd.read_csv(self.path,sep=',',encoding='gbk')
except:
self.data = pd.read_excel(self.path)
# return self.data
def clearData(self):
#清洗空值的数据
self.data.dropna(axis=1,how='all',inplace=True)
self.data.dropna(axis=0,how='all',inplace=True)
def distance(self):
# 计算距离
self.data['distance'] = np.sqrt((self.data['搞笑镜头']-self.testData[0])**2
+(self.data['拥抱镜头']-self.testData[1])**2+(self.data['打斗镜头']-self.testData[2])**2)
def KNN_way(self):
# KNN算法函数
sort_list = sorted(self.data['distance'])[:self.k] #将距离一列进行排列存入sort_list
list=[]
for i in sort_list:
list.append(self.data.loc[self.data['distance']==i]['电影类型'].values)
df = pd.DataFrame({'type':list})
top_type = df['type'].describe()[2][0]
print(top_type)
test = KNN(r'C:\Users\Administrator\Desktop\电影分类数据.xlsx',[23, 3, 17],5)
test.readData()
test.clearData()
test.distance()
test.KNN_way()
from sklearn.neighbors import KNeighborsClassifier
knn = neighbors.KNeighborsClassifier() # 取得knn分类器
knn.fit(data, labels) # 导入数据进行训练,data为属性,labels为标签
print('预测类型为:', knn.predict([[23,3,17]]))
【无标题】kNN算法学习
于 2022-07-09 11:43:36 首次发布