用R解析mahout用户推荐协同过滤算法


##建立模型

FileDataModel=function(file){
  data=read.csv(file,header=F)
  names(data)=c('uid','iid','pref')
  user=unique(data$uid)
  item=unique(sort(data$iid))
  uidx=match(data$uid,user)
  iidx=match(data$iid,item)
  M=matrix(0,length(user),length(item))
  i=cbind(uidx,iidx,pref=data$pref)
  for(n in 1:nrow(i)){
    M[i[n,][1],i[n,][2]]=i[n,][3]

  }
  dimnames(M)[[2]]=item
  M
}


##欧式距离相似度算法
EuclideanDistanceSimilarity=function(M){
  row=nrow(M)
  s=matrix(0,row,row)
  for(z1 in 1:row){
    for(z2 in 1:row){
      if(z1<z2){
        <span="">
          num=intersect(which(M[z1,]!=0),which(M[z2,]!=0)) #可计算的列

        sum=0
        for(z3 in num ){
          sum<-sum+(M[z1,][z3]-M[z2,][z3])^2
        }
        s[z2,z1]<-length(num)/(1+sqrt(sum))

        if(s[z2,z1]>1) s[z2,z1]<-1 #标准化
        if(s[z2,z1]< -1) s[z2,z1]<- -1 #标准化

        #print(paste(z1,z2));print(num);print(sum)


      }
    }
  }
  ts<-t(s)
  w<-which(upper.tri(ts))
  s[w]<-ts[w]
  s
}




##最紧邻算法


NearestNUserNeighborhood<-function(S,n){
  row<-nrow(S)
  neighbor<-matrix(0, row, n)
  for(z1 in 1:row){
    for(z2 in 1:n){
      m<-which.max(S[,z1])
      #       print(paste(z1,z2,m,'\n'))
      neighbor[z1,][z2]<-m
      S[,z1][m]=0
    }
  }
  neighbor
}


###4). 推荐算法


UserBasedRecommender<-function(uid,n,M,S,N){
  row<-ncol(N)
  col<-ncol(M)
  r<-matrix(0, row, col)
  N1<-N[uid,]
  for(z1 in 1:length(N1)){
    num<-intersect(which(M[uid,]==0),which(M[N1[z1],]!=0)) #可计算的列
    #     print(num)

    for(z2 in num){
      #       print(paste("for:",z1,N1[z1],z2,M[N1[z1],z2],S[uid,N1[z1]]))
      r[z1,z2]=M[N1[z1],z2]*S[uid,N1[z1]]
    }
  }

  sum<-colSums(r)
  s2<-matrix(0, 2, col)
  for(z1 in 1:length(N1)){
    num<-intersect(which(colSums(r)!=0),which(M[N1[z1],]!=0))
    for(z2 in num){
      s2[1,][z2]<-s2[1,][z2]+S[uid,N1[z1]]
      s2[2,][z2]<-s2[2,][z2]+1
    }
  }

  s2[,which(s2[2,]==1)]=10000
  s2<-s2[-2,]

  r2<-matrix(0, n, 2)
  rr<-sum/s2
  item <-dimnames(M)[[2]]
  for(z1 in 1:n){
    w<-which.max(rr)
    if(rr[w]>0.5){
      r2[z1,1]<-item[which.max(rr)]
      r2[z1,2]<-as.double(rr[w])
      rr[w]=0
    }
  }
  r2
}
5). 运行程序


FILE<-"testCF.csv"
NEIGHBORHOOD_NUM<-2
RECOMMENDER_NUM<-3

M<-FileDataModel(FILE)
S<-EuclideanDistanceSimilarity(M)
N<-NearestNUserNeighborhood(S,NEIGHBORHOOD_NUM)

R1<-UserBasedRecommender(1,RECOMMENDER_NUM,M,S,N);R1
##      [,1]  [,2]  
## [1,] "104" "4.25"
## [2,] "106" "4"   
## [3,] "0"   "0" 

R2<-UserBasedRecommender(2,RECOMMENDER_NUM,M,S,N);R2
##      [,1]  [,2]
## [1,] "105" "3.95699903407931"
## [2,] "0"   "0"
## [3,] "0"   "0"

R3<-UserBasedRecommender(3,RECOMMENDER_NUM,M,S,N);R3
##      [,1]  [,2]
## [1,] "103" "3.18540697329411"
## [2,] "102" "2.80243217111765"
## [3,] "0"   "0"

R4<-UserBasedRecommender(4,RECOMMENDER_NUM,M,S,N);R4
##      [,1]  [,2]
## [1,] "102" "3" 
## [2,] "0"   "0" 
## [3,] "0"   "0"

R5<-UserBasedRecommender(5,RECOMMENDER_NUM,M,S,N);R5
##      [,1] [,2]
## [1,]    0    0
## [2,]    0    0
## [3,]    0    0
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值