k-近邻算法(R语言实现)+10折K近邻算法

一、k-近邻

rm(list=ls())
#1
read_digits <- function(file) {
  data = read.table(file)
  colnames(data)[1] = "zipcode"
  data[,1] = factor(data[,1])
  return(data)
}


file="train.txt"
train = read_digits(".\train.txt")
test = read_digits(".\test.txt")
predict_knn = function(train, test, k=1, d = "euclidean") {

  distance_train=train[,2:ncol(train)]
  distance_test=test[,2:ncol(test)]
  distances = matrix(0, nrow(test), nrow(train))
  for(i in 1:nrow(test)) {
    for(j in 1:nrow(train)) {
      distances[i,j] = dist(rbind(distance_test[i,],distance_train[j,]), method = d)
    }
  }
  p = numeric(nrow(distances))
  for(i in 1:nrow(distances)) {
    vec = train[order(distances[i,])[1:k],1]
    tabs = table(vec)
    pos = which.max(tabs)
    p[i] = as.numeric(names(tabs))[pos]
  }
  return(p)
}

二、10折K近邻

alldistance1=dist(train[,-1])
alldistance1= as.matrix(alldistance1)  #default eculid distance

alldistance2=dist(train[,-1], method = "manhattan")
alldistance2= as.matrix(alldistance2)

zipcode = train[,1]
make_pred = function(x, k,label) {
  p = numeric(nrow(x))
  for(i in 1:nrow(x)) {
    tbl = table(label[order(x[i,])[1:k]])
    p[i]=as.integer(names(tbl)[which.max(tbl)])
  }
  return(p)

}


cv_error_knn = function(k, d) {

  if(d == 1) alldistance = alldistance1
  if(d == 2) alldistance = alldistance2

  splits = (1:nrow(train)) %% 10
  splits = splits + 1
  preds = NULL
  dd =NULL
  for(i in 1:10) {

    tests = which(splits == i)
    trains = which(splits != i)
    sub = alldistance[tests, trains]
    temp=make_pred(sub,k,label = zipcode[trains])
    preds = c(preds,temp)
    zz=zipcode[tests]
    dd=as.numeric(c(dd,as.vector(zipcode[tests])))
  }
  # zipcode1=as.numeric(c(as.vector(zipcode[tests]),as.vector(zipcode[trains])))
  err_overall = mean(preds != dd)
  cm = table(preds, dd)
  return( list(err_overall,cm))
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值