运行结果:
完整代码:
#导入K近邻算法所使用的包
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.spatial import distance
import numpy as np
import operator
#计算欧氏距离
def my_matEuclidean(row,Matrix):
dataSetSize = Matrix.shape[0]
diffMat = np.tile(row,(dataSetSize,1)) - Matrix
sqDiffMat = diffMat ** 2
sqDistance = sqDiffMat.sum(axis = 1)
distance = sqDistance ** 0.5
return distance
#KNN算法实现
class MyKNN:
def __init__(self, n_neighbors):
self.n_neighbors = n_neighbors
self.X_train = None
self.y_train = None
def fit(self,X_train,y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
predictions = []
for row in X_test:
label = self.__closest(row)
predictions.append(label)
return predictions
def __closest(self, row):
distance = my_matEuclidean(row, X_train)
sortedDistance = distance.argsort()
classCount = {}
for i in range(self.n_neighbors):
voteLabel = y_train[sortedDistance[i]]
classCount[voteLabel] = classCount.get(voteLabel,0) + 1
maxCount = 0
for key, value in classCount.items():
if value > maxCount:
maxCount = value
classes = key
return classes
#加载数据集并测试KNN分类器
iris = datasets.load_iris()
X = iris.data
Y = iris.target
#随机划分训练集与测试集
X_pretrain, X_pretest, y_train, y_test = train_test_split(X, Y,test_size = 0.3)
#归一化
minMax = MinMaxScaler()
X_train = minMax.fit_transform(X_pretrain)
X_test = minMax.fit_transform(X_pretest)
#测试
best_score = 0.0
best_k = 0
for k in range(30,1,-1):
my_classification = MyKNN(k)
my_classification.fit(X_train, y_train)
predictions = my_classification.predict(X_test)
score = accuracy_score(y_test, predictions)
if score > best_score:
best_score = score
best_k = k
print("Best score is: ",best_score)
print("Best k is: ",best_k)