'''
k近邻(kNN)算法的工作机制比较简单,根据某种距离测度找出距离给定待测样本距离最小的k个训练样本,根据k个训练样本进行预测。
分类问题:k个点中出现频率最高的类别作为待测样本的类别
回归问题:通常以k个训练样本的平均值作为待测样本的预测值
kNN模型三要素:距离测度、k值的选择、分类或回归决策方式
'''
importnumpy as npclassKNNClassfier(object):def __init__(self, k=5, distance='euc'):
self.k=k
self.distance=distance
self.x=None
self.y=Nonedeffit(self,X, Y):'''
X : array-like [n_samples,shape]
Y : array-like [n_samples,1]
'''self.x=X
self.y=Ydefpredict(self,X_test):'''
X_test : array-like [n_samples,shape]
Y_test : array-like [n_samples,1]
output : array-like [n_samples,1]
'''output= np.zeros((X_test.shape[0],1))for i inrange(X_test.shape[0]):
dis=[]for j inrange(self.x.shape[0]):if self.distance == 'euc': #欧式距离
dis.append(np.linalg.norm(X_test[i]-self.x[j,:]))
labels=[]
index=sorted(range(len(dis)), key=dis.__getitem__)for j inrange(self.k):
labels.append(self.y[index[j]])
counts=[]for label inlabels:
counts.append(labels.count(label))
output[i]=labels[np.argmax(counts)]returnoutputdefscore(self,x,y):
pred=self.predict(x)
err= 0.0
for i inrange(x.shape[0]):if pred[i]!=y[i]:
err= err+1
return 1-float(err/x.shape[0])if __name__ == '__main__':from sklearn importdatasets
iris=datasets.load_iris()
x=iris.data
y=iris.target# x = np.array([[0.5,0.4],[0.1,0.2],[0.7,0.8],[0.2,0.1],[0.4,0.6],[0.9,0.9],[1,1]]).reshape(-1,2)
# y = np.array([0,1,0,1,0,1,1]).reshape(-1,1)
clf = KNNClassfier(k=3)
clf.fit(x,y)print('myknn score:',clf.score(x,y))from sklearn.neighbors importKNeighborsClassifier
clf_sklearn= KNeighborsClassifier(n_neighbors=3)
clf_sklearn.fit(x,y)print('sklearn score:',clf_sklearn.score(x,y))