import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from math import sqrt
from collections import Counter
classkNNClassfier:def__init__(self,k):'''初始化kNN分类器'''assert k>=1,"k must be valid"
self.k=k
self._X_train=None
self._Y_train=Nonedeffit(self,X_train,y_train):'''根据训练数据集X_train和y_train训练kNN分类器'''assert X_train.shape[0]==y_train.shape[0],\
"the size of X_train must be equal to the size of train"assert self.k<=X_train.shape[0],\
"the size of X_train must be at least l."
self._X_train=X_train
self._y_train=y_train
return self
defpredict(self,X_predict):'''给定带预测数据集X)predict,返回表示X_predict的结果向量'''assert self._X_train isnotNoneand self._y_train isnotNone,\
"must fit before predict"assert X_predict.shape[1]==self._X_train.shape[1],\
"the feature number of X_predict must be equal to x_train"
y_predict=[self._predict(x)for x in X_predict]return np.array(y_predict)def_predict(self,x):'''给定单个带预测数据x,返回x的预测结果值'''assert x.shape[0]==self._X_train.shape[1], \
"must feature number of x must be equal to X_train"
distances=[sqrt(np.sum((x_train-x)**2))for x_train in self._X_train]
nearest=np.argsort(distances)
topK_y=[self._y_train[i]for i in nearest[:self.k]]
votes=Counter(topK_y)return votes.most_common(1)[0][0]defscore(self,X_test,y_test):'''根据测试数据集X_test和y_test确定当前模型的准确度'''
y_predict=self.predict(X_test)return accuracy_score(y_test,y_predict)def__repr__(self):return"KNN(k=%d"%self.k
defaccuracy_score(y_true,y_predict):'''计算y_true和y_predict之间的准确率'''assert y_true.shape[0]==y_predict.shape[0],\
"the size of y_true must be equal to the size of y_predict"returnsum(y_true==y_predict)/len(y_true)
digits=datasets.load_digits()print(digits.keys())print(digits.DESCR)
x=digits.data
print(x.shape)
y=digits.target
print(y.shape)
some_digit=x[666]
some_digit_image=some_digit.reshape(8,8)
plt.imshow(some_digit_image)
plt.show()from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
my_knn_clf=kNNClassfier(k=3)
my_knn_clf.fit(x_train,y_train)
y_predict=my_knn_clf.predict(x_test)print(sum(y_predict==y_test)/len(y_test))'''第二种方法'''from sklearn.neighbors import KNeighborsClassifier
knn_clf=KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(x_train,y_train)
y_predict=my_knn_clf.score(x_test,y_test)print(y_predict)