knn算法的原理和实现流程
import numpy as np
from matplotlib import pyplot as plt
raw_data_X = [[3.3935, 2.3312],
[3.1101, 1.7815],
[1.3438, 3.3684],
[3.5823, 4.6792],
[2.2804, 2.8670],
[7.4234, 4.6965],
[5.7451, 3.5340],
[9.1722, 2.5111],
[7.7928, 3.4241],
[7.9398, 0.7916]]
raw_data_y =[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)
plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='r')
plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='b')
plt.show()
x = np.array([8.0936, 3.3657])
plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='r')
plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='b')
plt.scatter(x[0],x[1],color='g')
plt.show()
from math import sqrt
distance = []
for x_train in X_train:
"""方法一,适用于二维"""
distance.append(sqrt((x_train[0]-x[0])**2+(x_train[1]-x[1])**2))
"""方法二,适用于多维"""
"""方法三,列表生成式"""
k = 3
nearest = np.argsort(distance)
y_train[nearest]
top_K = [i for i in y_train[nearest]]
from collections import Counter
votes = Counter(top_K)
votes_test = Counter([1,1,0,2,2,2])
votes_test
y_predict = votes.most_common(1)[0][0]
在sklearn中调用knn算法
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
knn_clf = KNeighborsClassifier()
raw_data_X = [[3.3935, 2.3312],
[3.1101, 1.7815],
[1.3438, 3.3684],
[3.5823, 4.6792],
[2.2804, 2.8670],
[7.4234, 4.6965],
[5.7451, 3.5340],
[9.1722, 2.5111],
[7.7928, 3.4241],
[7.9398, 0.7916]]
raw_data_y =[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)
knn_clf.fit(X_train,y_train)
x = np.array([8.0936, 3.3657]).reshape(1,-1)
knn_clf.predict(x)