如何将自己的推荐函数注册到recommenderlab包中

https://github.com/mhahsler/recommenderlab/blob/master/R/RECOM_IBCF.R
官方案例学习,仔细分析,确实能学到很多东西。
两个基于用户推荐的案例分析:

 

#案例一:
## item-based top N recomender (see Karypis 2001)
##
##getParameters函数目的:用parameter来更新p中的值(用户推荐时可以采用该函数,调整参数)
getParameters<- function(p, parameter) {
  if(!is.null(parameter) && length(parameter) != 0) {
    o <- pmatch(names(parameter), names(p))
    #pmatch匹配,返回names(parameter)在names(p)中的位置,匹配失败返回NA
    if(any(is.na(o)))
      stop(sprintf(ngettext(length(is.na(o)),
                            "Unknown option: %s",
                            "Unknown options: %s"),
                   paste(names(parameter)[is.na(o)],
                         collapse = " ")))
    
    p[o] <- parameter
  }
  
  p
}

#定义需要的参数,可以根据需要增加删除,名称也没有要求
.BIN_IBCF_params <- list(
    #设置项目的邻域个数
    k = 30,
    #定义求物品相似矩阵时,使用Jaccard(可选pearson、Cosine)
    method="Jaccard",
    #定义一个逻辑值,用于判断条件
    normalize_sim_matrix = FALSE,
    alpha = 0.5
)


#开始定义,推荐函数
BIN_IBCF <- function(data, parameter= NULL) {
    #获取定义好的参数
    p <- getParameters(.BIN_IBCF_params, parameter)
    
    ## this might not fit into memory! Maybe use a sample?
    ##计算相似矩阵
    sim <- as.matrix(similarity(data, method=p$method, which="items", args=list(alpha=p$alpha)))

    ## reduce similarity matrix to keep only the k highest similarities
    #相同项目的相似度修改为0    
    diag(sim) <- 0
    ##sim[!is.finite(sim)] <- 0
    
    ## normalize rows to 1
    ##是否需要将数据标准化
    if(p$normalize_sim_matrix) sim <- sim/rowSums(sim, na.rm=TRUE)
    #除了选择最相似的k个项目,与其他项目的相似度复制为0,,保持矩阵的稀疏性
    for(i in 1:nrow(sim))
        sim[i,head(order(sim[i,], decreasing=FALSE, na.last=FALSE), ncol(sim) - p$k)] <- 0

    ## make sparse##节省空间
    sim <- as(sim, "dgCMatrix")
    
    #将相似矩阵存放在model中,,然后同个参数model传递给预测函数predict
    model <- c(list(
        description = "IBCF: Reduced similarity matrix",
        sim = sim
        ), p
    )

    predict <- function(model, newdata, n = 10, data=NULL, type=c("topNList", "ratings", "ratingMatrix"), ...) {
        #匹配type是c("topNList", "ratings", "ratingMatrix")中的哪一个
        type <- match.arg(type)

        ## newdata are userid#这里的newdata是传的用户的id
        if(is.numeric(newdata)) {
            if(is.null(data) || !is(data, "ratingMatrix"))
                stop("If newdata is a user id then data needes to be the training dataset.")
        newdata <- data[newdata,]
        }
    
        #如果需要预测的数据集的列数和相似矩阵的行数(或者列数)不相等,则报错。。无法预测
        if(ncol(newdata) != nrow(model$sim))
            stop("number of items in newdata does not match model.")

        n <- as.integer(n)
        sim <- model$sim
        u <- as(newdata, "dgCMatrix")

        ## predict all ratings (average similarity)
        #ratings <- tcrossprod(sim,u)
        #tcrossprod函数用于计算向量与向量之间或矩阵与矩阵之间的内积
        ratings <- t(as(tcrossprod(sim,u) / tcrossprod(sim!=0, u!=0), "matrix"))
        #预测评分矩阵和输入数据newdata坐标名称相同
        dimnames(ratings) <- dimnames(newdata)
        
        #返回评分(自认为这里有一些问题,可用下边注释掉的内容替换)
        returnRatings(ratings, newdata, type, n)
        
        #ratings <- as(ratings, "realRatingMatrix")
         #返回的结果一定的预测的:用户对项目的评分矩阵
         #if(type=="ratings") return(ratings)
         #getTopNLists(ratings, n=n, minRating=model$minRating)
         #
    }

    ## construct recommender object
    ##生成推荐推向
    new("Recommender", method = "IBCF", dataType = class(data),
        ntrain = nrow(data), model = model, predict = predict)
}



## 将子集编写的推荐函数注册到recommenderlab包中
recommenderRegistry$set_entry(
    method="IBCF", dataType = "binaryRatingMatrix", fun=BIN_IBCF,
    description="Recommender based on item-based collaborative filtering (binary rating data).",    
    parameters=.BIN_IBCF_params)
#method自己随意起个名称
#fun对应自己编写的推荐函数
#description自己随便写
#parameters可不要

 

案例二:

#案例二
#预定义一些变量
.REAL_IBCF_params <- list(
    k = 30,
    method="Cosine",
    normalize = "center",
    normalize_sim_matrix = FALSE,
    alpha = 0.5,
    na_as_zero = FALSE
)


#编写推荐函数
REAL_IBCF <- function(data, parameter= NULL) {
    p <- getParameters(.REAL_IBCF_params, parameter)

    if(!is.null(p$normalize))
        data <- normalize(data, method=p$normalize)

    ## this might not fit into memory! Maybe use a sample?
    sim <- as.matrix(similarity(data, method=p$method, which="items",

    args=list(alpha=p$alpha, na_as_zero=p$na_as_zero)))

    ## normalize rows to 1
    if(p$normalize_sim_matrix) sim <- sim/rowSums(sim, na.rm=TRUE)

    ## reduce similarity matrix to keep only the k highest similarities
    diag(sim) <- NA
    ##sim[!is.finite(sim)] <- NA

    for(i in 1:nrow(sim))
        sim[i,head(order(sim[i,], decreasing=FALSE, na.last=FALSE),
                ncol(sim) - p$k)] <- NA

    ## make sparse
    sim <- dropNA(sim)

    model <- c(list(
        description = "IBCF: Reduced similarity matrix",
        sim = sim
        ), p
    )

    predict <- function(model, newdata, n = 10,
            data=NULL, type=c("topNList", "ratings", "ratingMatrix"), ...) {
        
        type <- match.arg(type)

        ## newdata are userid
        if(is.numeric(newdata))
            if(is.null(data) || !is(data, "ratingMatrix"))
                stop("If newdata is a user id then data needes to be the training dataset.")
            newdata <- data[newdata,]
        }

        if(ncol(newdata) != nrow(model$sim)) stop("number of items in newdata does not match model.")

        n <- as.integer(n)
        if(!is.null(model$normalize))
            newdata <- normalize(newdata, method=model$normalize)

        ## predict all ratings
        sim <- model$sim
        u <- as(newdata, "dgCMatrix")

        ratings <- t(as(tcrossprod(sim,u) / tcrossprod(sim, u!=0), "matrix"))
        ratings <- new("realRatingMatrix", data=dropNA(ratings),
        normalize = getNormalize(newdata))
        ratings <- denormalize(ratings)

        returnRatings(ratings, newdata, type, n)
    }

    ## construct recommender object
    new("Recommender", method = "IBCF", dataType = class(data),
        ntrain = nrow(data), model = model, predict = predict)
}

## register recommender
recommenderRegistry$set_entry(
    method="IBCF", dataType = "realRatingMatrix", fun=REAL_IBCF,
    description="Recommender based on item-based collaborative filtering.",
    parameters=.REAL_IBCF_params)

评价模型相关命令:

algorithms <- list(
  "random items" = list(name="RANDOM", param=list(normalize = "Z-score")),
  "popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),
  "user-based CF" = list(name="UBCF", param=list(normalize = "Z-score",
                                                   method="Cosine",
                                                   nn=50, minRating=3)),
    "item-based CF" = list(name="IBCF", param=list(normalize = "Z-score"
                                                   )),
    #把注册的自己的推荐引擎,列在小边,和其他推荐模型比较
    #例如
    #"Liu_Item CF" = list(name="Liu_Item", param=list(normalize = "Z-score"))
)


scheme <- evaluationScheme(data_train[1:200,], method = "split", train = .8,
                           k = 1, given = 10, goodRating = 4)
scheme

# run algorithms, predict next n movies
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))

# run algorithms, predict next n movies
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))

#执行单个推荐函数

#如下图所示:将整个推荐函数分为3步。Recommender命令执行 步骤1   步骤3  
#(如果说执行步骤2,个人认为仅声明预测函数predict,不执行内部业务逻辑)
#执行步骤3,返回:Recommender对象
rec <- Recommender(data_train[1:100], method = "Liu_Item")
rec

#predict命令,调用推荐函数中预测函数predict
#执行步骤2
pre <- predict(rec, data_train[101:102], n = 10)
pre

as(pre, "list")

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值