import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
X.shape
(150, 4)
index = np.arange(150)
index
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
143, 144, 145, 146, 147, 148, 149])
np.random.shuffle(index)
index
array([ 1, 21, 38, 118, 72, 114, 83, 29, 53, 122, 25, 149, 35,
50, 140, 143, 56, 0, 138, 17, 11, 10, 33, 61, 144, 19,
8, 88, 48, 85, 70, 112, 128, 127, 96, 137, 28, 107, 82,
77, 78, 79, 15, 146, 49, 12, 135, 145, 55, 2, 47, 80,
3, 141, 30, 111, 43, 133, 66, 13, 91, 54, 110, 124, 104,
106, 32, 52, 7, 120, 148, 123, 97, 18, 39, 126, 92, 60,
74, 46, 4, 23, 57, 58, 36, 113, 41, 44, 27, 142, 134,
98, 131, 64, 59, 94, 115, 42, 103, 5, 130, 102, 108, 117,
100, 105, 75, 65, 40, 139, 125, 84, 22, 109, 51, 63, 62,
99, 101, 73, 6, 9, 69, 67, 121, 20, 87, 147, 71, 14,
81, 16, 68, 86, 37, 95, 90, 34, 129, 136, 116, 93, 26,
132, 119, 45, 24, 89, 76, 31])
X_train,X_test = X[index[:100]],X[index[100:]]
y_train,y_test = y[index[:100]],y[index[-50:]]
knn = KNeighborsClassifier(n_neighbors=5,weights='distance',p = 1,n_jobs= 4)
knn.fit(X_train,y_train)
y_ = knn.predict(X_test)
knn.score(X_test,y_test)
1.0
y_
array([2, 2, 2, 2, 2, 2, 1, 1, 0, 2, 2, 1, 0, 2, 1, 1, 1, 1, 2, 1, 0, 0, 1,
1, 2, 0, 1, 2, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 2, 1, 0, 2, 2, 0,
0, 1, 1, 0])
proba_ = knn.predict_proba(X_test)
proba_
array([[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 1. , 0. ],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 0. , 1. ],
[ 0. , 1. , 0. ],
[ 0. , 0.6, 0.4],
[ 0. , 1. , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 0.2, 0.8],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 0. , 1. ],
[ 1. , 0. , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 0. , 1. ],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 0.6, 0.4],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 0. , 1. ],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 0. , 1. ],
[ 0. , 0.4, 0.6],
[ 1. , 0. , 0. ],
[ 1. , 0. , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 1. , 0. ],
[ 1. , 0. , 0. ]])
iris.target_names
array(['setosa', 'versicolor', 'virginica'],
dtype='<U10')
proba_.argmax(axis = 1)
array([2, 2, 2, 2, 2, 2, 1, 1, 0, 2, 2, 1, 0, 2, 1, 1, 1, 1, 2, 1, 0, 0, 1,
1, 2, 0, 1, 2, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 2, 1, 0, 2, 2, 0,
0, 1, 1, 0], dtype=int64)
knn.score(X_test,y_test)
1.0
print(y_)
print('-----------')
print(y_test)
[2 2 2 2 2 2 1 1 0 2 2 1 0 2 1 1 1 1 2 1 0 0 1 1 2 0 1 2 1 0 1 0 1 1 0 1 1
0 2 2 2 1 0 2 2 0 0 1 1 0]
-----------
[2 2 2 2 2 2 1 1 0 2 2 1 0 2 1 1 1 1 2 1 0 0 1 1 2 0 1 2 1 0 1 0 1 1 0 1 1
0 2 2 2 1 0 2 2 0 0 1 1 0]
(y_ == y_test).sum()/50
1.0