class KNeighborsClassifier(object):
def __init__(self, n_neighbors = 5): #n_neighbors代表参与决策的样本数,初始化为5
self.n_neighbors = n_neighbors
def fit(self, data_x, data_y): #KNN不需要训练。
self.x = data_x
self.y = data_y
if data_x.shape[0] != data_y.shape[0] :
raise ValueError('数据集与输入数据维度不同')
def predict(self, x):
result = []
#x_len = self.x.shape[1]
#self_x_len = x.shape[1]
#if x_len != self_x_len:
#raise ValueError('数据集与输入数据维度不同')
#distansquare = (self.x - x)**2
#distance = np.sum(distansquare, axis=1) ** 0.5
len_x = x.shape[0]
for i in range(len_x): #每个样本都进行计算
count = {}
inx = x[i] #inx 表示第i个样本
distance = np.sum((self.x - inx)**2, axis=1) ** 0.5 #这里采用欧式距离
#print(distance.shape)
distancesort = distance.argsort() #对样本按距离大小进行排序
for k in range(self.n_neighbors): #遍历最近的n_neighbors个点
value = self.y[distancesort[k]] #得到相应的点的值
count[value] = count.get(value,0) + 1 #计数并保持在字典中
#print(count)
SortCalssCount = sorted(count.items(),key=lambda count:count[1],reverse=True) #对字典进行排序,找出类别数最多的点。
result.append(SortCalssCount[0][0]) #保存在列表中
return result
接下来对KNeighborsClassifier进行试验,使用sklearn中的数据集进行试验
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
x = iris.data
y = iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)
#print(x_train.shape)
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
result = knn.predict(x_test)
#print(result)
#print(y_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, result)) #计算准确率
运行一下,准确率是0.977777777778