numpy支持矩阵减向量(每行减去某个向量) 列的情况可以考虑转置。
R实现:
library(class)
attach(iris)
train <- iris[,1:2]
y <- as.numeric(Species)
x <- train
fit <- knn(x, x, y)
print(1 - sum(fit==y)/length(y))
library(class)
attach(iris)
train <- iris[,1:2]
y <- as.numeric(Species)
x <- train
fit <- knn(x, x, y)
print(1 - sum(fit==y)/length(y))
利用ndarray的诸多特性,可以实现非常简短的(DIY)KNN:
import numpy as np
from sklearn.datasets import load_iris
iris = load_iris()
#print iris.DESCR
X = iris.data[:,1:3]
y = iris.target
def predict_y(x, k = 1):
distance_array = np.linalg.norm(X - x, axis = 1)
# the increase distance indexs
distance_indexs = np.argsort(distance_array)[1:k + 1]
y_unique_counts = np.unique(y[distance_indexs], return_counts = True)
return y_unique_counts[0][np.argmax(y_unique_counts[1])]
predict = map(predict_y, X)
error_num = 0
for i in range(len(predict)):
if predict[i] != y[i]:
error_num += 1
print "error_error :"
print float(error_num) / len(predict)