import pandas as pd
import numpy as np
def distance(v1, v2):
"""
自实现距离计算
:param v1: 点v1
:param v2: 点v2
:return: 距离
"""
# 法一
# ndim = len(v1)
# summary = 0
# for i in range(ndim):
# summary += (v1[i]-v2[i])**2
# dist = np.sqrt(summary)
# 法二
# 借助power进行距离计算
dist = np.sqrt(np.sum(np.power(v1-v2,2)))
return dist
def knn(train, k, test):
"""
进行knn自实现
:param train: 训练集
:param k: 取前几个
:param test: 测试集
:return: 电影类型预测的类别
"""
# 进行相似度计算 ---》距离计算
index_num = train.shape[0]
for i in range(index_num):
dist = distance(train.iloc[i, 1:4], test[1:])
# print(dist)
train.ix[i, '欧氏距离'] = dist
train = train.sort_values(by='欧氏距离').iloc[:k, :]
# 对前k行进行众数统计分析
# print(train['电影类型'].mode()[0])
# print(train)
return train['电影类型'].mode()[0]
# 加载数据
mov = pd.read_excel('电影分类数据.xlsx')
# print(mov)
train = mov.iloc[:, 1:6]
# print(train)
test = mov.columns[-4:]
test = np.array(test)
# print(test)
# 取前5个样本
k = 5
# 进行knn自实现
y_predict = knn(train, k, test)
print('knn算法预测值:', y_predict)
knn算法原理
最新推荐文章于 2023-11-12 20:36:50 发布