欧氏距离:(n维空间中两点的距离)
代码:
# k近邻算法(简化版)
from scipy.spatial import distance
def euc(a, b):
return distance.euclidean(a, b) # 测量a、b两点间的距离
class Lfw():
def fit(self, x_train, y_train):
self.x_train = x_train
self.y_train = y_train
def predict(self, x_test):
predictions = []
for row in x_test:
label = self.closest(row) # k为1,即只参考最近的一个点
predictions.append(label)
return predictions
def closest(self, row): # 找到离测试点最近的那个点,并返回其标签
best_dist = euc(row, self.x_train[0])
best_index = 0
for i in range(1, len(self.x_train)):
dist = euc(row, self.x_train[i])
if dist < best_dist:
best_dist = dist
best_index = i
return self.y_train[best_index]
from sklearn import datasets
iris = datasets.load_iris() # 载入花的数据
x = iris.data
y = iris.target
from sklearn.cross_validation import train_test_split
# 将数据分成训练数据和测试数据,x是特征数据,y是标签数据
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = .5)
classifier = Lfw()
classifier.fit(x_train, y_train) # 用训练数据拟合分类器模型
predictions = classifier.predict(x_test) # 预测
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions)) # 打印出的是准确率
结果:准确率是大于0.9的