K近邻的详细算法,建议参考
李航老师的统计学习方法。下面是python3的代码
from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier import numpy as np data = [] with open('E:\python文件\python机器学习实践指南/iris数据.txt') as txtData: lines = txtData.readlines() for line in lines: lineData = line.strip().split(',') # 去除空白和逗号“,” data.append(lineData)
虽然sklearn里面已经有iris的数据
,但是我还是用python读取txt文件中的数据,因为数据的预处理往往要花费更多的时间以此方法来得到训练
data.pop(0) #删除花宽等文字 # print(data) arr = np.array(data)#将列表转化成数组型 np.reshape(arr,-1,5)#把一行的数组转化成n行5列 # print(arr)
iris_X = arr[:,:4]#前四列作为输入 iris_y0= (arr[:,4:])#最后一列作为输出 l2 = [] for i in iris_y0: if i =='Iris-setosa': i = 0 l2.append(i) elif i =='Iris-versicolor': i = 1 l2.append(i) else: i = 2 l2.append(i) # print(l2) iris_y = np.array(l2) np.reshape(iris_y,-1,1) # print(iris_y) # print(iris_X) # # # # print(iris_y) X_train,X_test,y_train,y_test =train_test_split(iris_X, iris_y, test_size=0.3) # print(y_train) knn = KNeighborsClassifier() knn.fit(X_train, y_train) print(knn.predict(X_test)) print(y_test) count = 0 i = 0 if knn.predict(X_test)[i]!=y_test[i]: count +=1 i = i+1 c = count /len(y_test) print(c)