利用knn定义构造函数,并通过十折交叉检验获取最优K值,最后计算算法精度(以iris数据为例)
1、利用k近邻定义,构造knn函数
knn_define <- function(train.data,test.data,k1){
train.len = nrow(train.data)
test.len = nrow(test.data)
## calculate distances between samples
dist = as.matrix(dist(rbind(test.data[,1:4], train.data[,1:4]),
method= 'euclidean'))[1:test.len, (test.len+1):(test.len+train.len)]
for (i in 1:dim(test.data)[1]) {
nn = as.data.frame(sort(dist[i,], index.return = TRUE))[1:k1,2]
test.label = train.data$Species[nn]
labels = data.frame(sort(table(test.label),decreasing = T))[1,1]
test.data[i,6] = labels
}
return(test.data)
}
2、利用十折交叉检验获取最优k
for (k1 in 3:15) {
for (n in 1:10) {
test.idx= seq(n,dim(iris)[1],by = 10)
train.idx = setdiff(1:150,test.i