import numpy as np
from sklearn.neighbors import KNeighborsClassifier #KNN库
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
#动作片=0,爱情片=1
X = np.array(((78,30),(50,48),(76,27),(30,78),(45,30),(45,63),(18,51),(89,30),(54,50),(24,16),(46,44),(51,47),(23,16),(28,41),(51,43),(27,23)))
Y = np.array([0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1])
x_train,x_test,y_train,y_test=train_test_split(X, Y, test_size=0.4)
#n_neighbors:相邻样本的数量
#weights(权重):用于预测的权重函数(默认为 'uniform')
#'uniform':统一的权重,在每一个邻居区域里的点的权重都是一样的
#'distance':权重点等于他们距离的倒数。使用此函数,更近的邻居对于所预测的点的影响更大
#p:用于Minkowski 距离参数(默认值为 2);p = 1, 相当于使用曼哈顿距离;p = 2,相当于使用欧几里得距离。
Knn_model = KNeighborsClassifier(weights='distance',p=2)
Knn_model.fit(x_train,y_train)
print(Knn_model.predict(x_test))#预测类别
print(Knn_model.predict_proba(x_test))#预测概率
print(Knn_model.score(x_test,y_test))#预测精度
print("=======================================================================================")
#归一化
X = preprocessing.scale(X)
x_pro_train,x_pro_test,y_pro_train,y_pro_test=train_test_split(X, Y, test_size=0.4)
Knn_pro_model = KNeighborsClassifier(weights='distance',p=2)
Knn_pro_model.fit(x_pro_train,y_pro_train)
print(Knn_pro_model.predict(x_pro_test))#预测类别
print(Knn_pro_model.predict_proba(x_pro_test))#预测概率
print(Knn_pro_model.score(x_pro_test,y_pro_test))#预测精度
[1 0 0 0 0 0 1]
[[0.05644996 0.94355004]
[0.6359091 0.3640909 ]
[0.75152106 0.24847894]
[0.76645895 0.23354105]
[0.88754633 0.11245367]
[0.95165356 0.04834644]
[0.2755666 0.7244334 ]]
0.7142857142857143
=======================================================================================
[0 0 0 0 0 0 0]
[[0.52317298 0.47682702]
[0.85325474 0.14674526]
[0.947617 0.052383 ]
[0.93843376 0.06156624]
[0.59623344 0.40376656]
[0.87175927 0.12824073]
[0.61669927 0.38330073]]
0.5714285714285714