有两种常见的距离计算公式
1.曼哈顿距离
|x1 - x2| + |y1- y2|
2.欧式距离
sqr((x1 -x2)**2 + (y1-y2)**2)
import matplotlib.pyplot as plt
import numpy as np
def CreateDataset():
group = np.array([[1.0,2.0],[1.2,0.1],[0.1,1.4],[3.2,2.1],[0.3,3.5],[5.2,3.1],[1.0,1.1],[0.5,1.5]])
labels =np.array(['B','A','B','A','B','A','B','B'])
return group,labels
def KNN_classify(k,dis,X_train,x_train,Y_test):
assert dis == 'E' or dis == 'M','dis must E or M,E代表欧式举例,M代表曼哈顿距离 '
num_test = Y_test.shape[0] #测试的几个点
labeslist = []
"""
使用欧拉公式作为距离量度
"""
if (dis == 'E'):
for i in range(num_test):
#实现欧式距离的公式
distance = np.sqrt(np.sum(((X_train - np.tile(Y_test[i], (x_train.shape[0],1))) ** 2),axis = 1))#np.tile
nearest_k = np.argsort(distance)#距离由小到大排序
topK = nearest_k[:k]#选取前K个距离,1个
classCount = {}
for i in topK: #统计每个类别的个数
classCount[x_train[i]] = classCount.get(x_train[i],0) + 1
sortedClassCount = sorted(classCount.items())
labeslist.append(sortedClassCount[0][0])
return np.array(labeslist)
if __name__ =='__main__':
groups,labels = CreateDataset()
plt.scatter(groups[labels=='A',0],groups[labels=='A',1],color="r",marker="*")
plt.scatter(groups[labels == 'B', 0], groups[labels == 'B', 1], color="g", marker="+")
plt.show()
y_test_pred = KNN_classify(1,'E',groups,labels,np.array([[0.0,0.1],[0.4,2.0]]))
print(y_test_pred)