https://github.com/mhahsler/recommenderlab/blob/master/R/RECOM_IBCF.R
官方案例学习,仔细分析,确实能学到很多东西。
两个基于用户推荐的案例分析:
#案例一:
## item-based top N recomender (see Karypis 2001)
##
##getParameters函数目的:用parameter来更新p中的值(用户推荐时可以采用该函数,调整参数)
getParameters<- function(p, parameter) {
if(!is.null(parameter) && length(parameter) != 0) {
o <- pmatch(names(parameter), names(p))
#pmatch匹配,返回names(parameter)在names(p)中的位置,匹配失败返回NA
if(any(is.na(o)))
stop(sprintf(ngettext(length(is.na(o)),
"Unknown option: %s",
"Unknown options: %s"),
paste(names(parameter)[is.na(o)],
collapse = " ")))
p[o] <- parameter
}
p
}
#定义需要的参数,可以根据需要增加删除,名称也没有要求
.BIN_IBCF_params <- list(
#设置项目的邻域个数
k = 30,
#定义求物品相似矩阵时,使用Jaccard(可选pearson、Cosine)
method="Jaccard",
#定义一个逻辑值,用于判断条件
normalize_sim_matrix = FALSE,
alpha = 0.5
)
#开始定义,推荐函数
BIN_IBCF <- function(data, parameter= NULL) {
#获取定义好的参数
p <- getParameters(.BIN_IBCF_params, parameter)
## this might not fit into memory! Maybe use a sample?
##计算相似矩阵
sim <- as.matrix(similarity(data, method=p$method, which="items", args=list(alpha=p$alpha)))
## reduce similarity matrix to keep only the k highest similarities
#相同项目的相似度修改为0
diag(sim) <- 0
##sim[!is.finite(sim)] <- 0
## normalize rows to 1
##是否需要将数据标准化
if(p$normalize_sim_matrix) sim <- sim/rowSums(sim, na.rm=TRUE)
#除了选择最相似的k个项目,与其他项目的相似度复制为0,,保持矩阵的稀疏性
for(i in 1:nrow(sim))
sim[i,head(order(sim[i,], decreasing=FALSE, na.last=FALSE), ncol(sim) - p$k)] <- 0
## make sparse##节省空间
sim <- as(sim, "dgCMatrix")
#将相似矩阵存放在model中,,然后同个参数model传递给预测函数predict
model <- c(list(
description = "IBCF: Reduced similarity matrix",
sim = sim
), p
)
predict <- function(model, newdata, n = 10, data=NULL, type=c("topNList", "ratings", "ratingMatrix"), ...) {
#匹配type是c("topNList", "ratings", "ratingMatrix")中的哪一个
type <- match.arg(type)
## newdata are userid#这里的newdata是传的用户的id
if(is.numeric(newdata)) {
if(is.null(data) || !is(data, "ratingMatrix"))
stop("If newdata is a user id then data needes to be the training dataset.")
newdata <- data[newdata,]
}
#如果需要预测的数据集的列数和相似矩阵的行数(或者列数)不相等,则报错。。无法预测
if(ncol(newdata) != nrow(model$sim))
stop("number of items in newdata does not match model.")
n <- as.integer(n)
sim <- model$sim
u <- as(newdata, "dgCMatrix")
## predict all ratings (average similarity)
#ratings <- tcrossprod(sim,u)
#tcrossprod函数用于计算向量与向量之间或矩阵与矩阵之间的内积
ratings <- t(as(tcrossprod(sim,u) / tcrossprod(sim!=0, u!=0), "matrix"))
#预测评分矩阵和输入数据newdata坐标名称相同
dimnames(ratings) <- dimnames(newdata)
#返回评分(自认为这里有一些问题,可用下边注释掉的内容替换)
returnRatings(ratings, newdata, type, n)
#ratings <- as(ratings, "realRatingMatrix")
#返回的结果一定的预测的:用户对项目的评分矩阵
#if(type=="ratings") return(ratings)
#getTopNLists(ratings, n=n, minRating=model$minRating)
#
}
## construct recommender object
##生成推荐推向
new("Recommender", method = "IBCF", dataType = class(data),
ntrain = nrow(data), model = model, predict = predict)
}
## 将子集编写的推荐函数注册到recommenderlab包中
recommenderRegistry$set_entry(
method="IBCF", dataType = "binaryRatingMatrix", fun=BIN_IBCF,
description="Recommender based on item-based collaborative filtering (binary rating data).",
parameters=.BIN_IBCF_params)
#method自己随意起个名称
#fun对应自己编写的推荐函数
#description自己随便写
#parameters可不要
案例二:
#案例二
#预定义一些变量
.REAL_IBCF_params <- list(
k = 30,
method="Cosine",
normalize = "center",
normalize_sim_matrix = FALSE,
alpha = 0.5,
na_as_zero = FALSE
)
#编写推荐函数
REAL_IBCF <- function(data, parameter= NULL) {
p <- getParameters(.REAL_IBCF_params, parameter)
if(!is.null(p$normalize))
data <- normalize(data, method=p$normalize)
## this might not fit into memory! Maybe use a sample?
sim <- as.matrix(similarity(data, method=p$method, which="items",
args=list(alpha=p$alpha, na_as_zero=p$na_as_zero)))
## normalize rows to 1
if(p$normalize_sim_matrix) sim <- sim/rowSums(sim, na.rm=TRUE)
## reduce similarity matrix to keep only the k highest similarities
diag(sim) <- NA
##sim[!is.finite(sim)] <- NA
for(i in 1:nrow(sim))
sim[i,head(order(sim[i,], decreasing=FALSE, na.last=FALSE),
ncol(sim) - p$k)] <- NA
## make sparse
sim <- dropNA(sim)
model <- c(list(
description = "IBCF: Reduced similarity matrix",
sim = sim
), p
)
predict <- function(model, newdata, n = 10,
data=NULL, type=c("topNList", "ratings", "ratingMatrix"), ...) {
type <- match.arg(type)
## newdata are userid
if(is.numeric(newdata))
if(is.null(data) || !is(data, "ratingMatrix"))
stop("If newdata is a user id then data needes to be the training dataset.")
newdata <- data[newdata,]
}
if(ncol(newdata) != nrow(model$sim)) stop("number of items in newdata does not match model.")
n <- as.integer(n)
if(!is.null(model$normalize))
newdata <- normalize(newdata, method=model$normalize)
## predict all ratings
sim <- model$sim
u <- as(newdata, "dgCMatrix")
ratings <- t(as(tcrossprod(sim,u) / tcrossprod(sim, u!=0), "matrix"))
ratings <- new("realRatingMatrix", data=dropNA(ratings),
normalize = getNormalize(newdata))
ratings <- denormalize(ratings)
returnRatings(ratings, newdata, type, n)
}
## construct recommender object
new("Recommender", method = "IBCF", dataType = class(data),
ntrain = nrow(data), model = model, predict = predict)
}
## register recommender
recommenderRegistry$set_entry(
method="IBCF", dataType = "realRatingMatrix", fun=REAL_IBCF,
description="Recommender based on item-based collaborative filtering.",
parameters=.REAL_IBCF_params)
评价模型相关命令:
algorithms <- list(
"random items" = list(name="RANDOM", param=list(normalize = "Z-score")),
"popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),
"user-based CF" = list(name="UBCF", param=list(normalize = "Z-score",
method="Cosine",
nn=50, minRating=3)),
"item-based CF" = list(name="IBCF", param=list(normalize = "Z-score"
)),
#把注册的自己的推荐引擎,列在小边,和其他推荐模型比较
#例如
#"Liu_Item CF" = list(name="Liu_Item", param=list(normalize = "Z-score"))
)
scheme <- evaluationScheme(data_train[1:200,], method = "split", train = .8,
k = 1, given = 10, goodRating = 4)
scheme
# run algorithms, predict next n movies
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
# run algorithms, predict next n movies
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
#执行单个推荐函数
#如下图所示:将整个推荐函数分为3步。Recommender命令执行 步骤1 步骤3
#(如果说执行步骤2,个人认为仅声明预测函数predict,不执行内部业务逻辑)
#执行步骤3,返回:Recommender对象
rec <- Recommender(data_train[1:100], method = "Liu_Item")
rec
#predict命令,调用推荐函数中预测函数predict
#执行步骤2
pre <- predict(rec, data_train[101:102], n = 10)
pre
as(pre, "list")