knn算法:在训练集中取前n个距离目标最近的点,统计这些点的类别,哪个类别出现的次数最多,则数据属于哪一类
代码:
from __future__ import print_function from numpy import * import operator import csv from sklearn import datasets from sklearn.cross_validation import train_test_split from sklearn.neighbors import KNeighborsClassifier def file2matrix(filename): Mat = zeros((1000,20)) label = [] index = 0 t = 0 with open(filename,"r",encoding="utf-8") as csvfile: read = csv.reader(csvfile) for i in read: if t ==0 : t=t+1 continue else: Mat[index,:] = i[0:20] label.append(i[20]) index = index + 1 return Mat,label normMat,datingLabels = file2matrix("1.csv") errorcount = 0; X_train, X_test, y_train, y_test = train_test_split(normMat,datingLabels, test_size=0.3) knn = KNeighborsClassifier() knn.fit(X_train,y_train) for i in range(300): if(knn.predict(X_test)[i]!=y_test[i]): errorcount=errorcount+1 print(errorcount/300)