参考文献:
- 应用多元统计分析,高惠璇. 北京大学出版社
- 统计学习方法第二版,李航
import math
import numpy as np
class KNN_Model:
def __init__(self, X_train, y_train, n_neighbors=3, p=2, method="Minkowski"):
"""
param: n_neighbors 临近点个数
param: p 距离度量
param: method 度量距离的方法 又闵可夫斯基距离,标准欧式距离,兰士距离
距离度量方法还有相似系数,斜交距离,余弦夹角,详见应用多元统计分析课本聚类分析的章节
这里只写了三种距离
"""
self.n = n_neighbors
self.p = p
self.X_train = X_train
self.y_train = y_train
self.var = np.var(X_train, axis=0)
self.method = method
self.pred_list = []
# 测算邻居类别个数
def __func(self, dic, max_count):
for keys, values in dic.items():
if values == max_count:
return keys
def __class_count(self, new_knn_list):
new_knn_list1 = list(dict(new_knn_list).values())
from collections import Counter
dic = Counter(new_knn_list1)
max_count = max(dic.values())
ret = self.__func(dic, max_count)
return ret
# 闵可夫斯基距离
def __Minkowski(self, x, y, p):
if len(x) == len(y) and len(x) > 1:
sum_ = 0
for i in