als算法

// 加载观众影评数据集(观众ID,影片ID,评分)
val rawData = sc.textFile("u.data")
rawData.first()
val rawRating = rawData.map(_.split("\t").take(3))
 
import  org.apache.spark.mllib.recommendation.ALS
 
import  org.apache.spark.mllib.recommendation.Rating
// 将rawRating由数组类型转换为rating(user,movie,rating)类型
//Rating(user,product,rating)
val rating = rawRating.map{case Array(user,movie,rating)=>Rating(user.toInt,movie.toInt,rating.toDouble)}
//训练模型,rank,iterations,lambda参数值分分别为50,10,0.1.
val model = ALS.train(rating,50,10,0.01)
// 基于用户的推荐
//预测出用户789对123电影的评分
val productRating = model.predict(789,123)
// 返回用户789的前10推荐电影
val userId = 789
val K = 10
val topKRecs = model.recommendProducts(userId,K)
print(topKRecs.mkString("\n"))
// 加载电影数据集(编号,电影名(上映年)....)
val movies = sc.textFile("u.item")
//只取(编号,电影名(上映年)),生成的是一个key->value
val titles = movies.map(line=>line.split("\\|").take(2)).map(array=>(array(0).toInt,array(1))).collectAsMap()
titles(123)
//查看用户789点评过的所有电影
val moviesForUser = rating.keyBy(_.user).lookup(789)
println(moviesForUser.size)
//查看观众点评数据集中评分最高的前10影片并电影编号相应转换成电影名
moviesForUser.sortBy(-_.rating).take(10).map(rating=>(titles(rating.product),rating.rating)).foreach(println)
//返回用户789的前10推荐电影并电影编号相应转换成电影名
topKRecs.map(rating=>(titles(rating.product),rating.rating)).foreach(println)
 
moviesForUser.sortBy(-_.rating).take(10).map(rating=>(titles(rating.product),rating.rating)).foreach(println)
 
//物品推荐
//导入jblas包创建向量
import org.jblas.DoubleMatrix
val aMatrix = new DoubleMatrix(Array(1.0,2.0,3.0))
//定义计算输入量为向量的余弦形式度公式
def consineSimilarity(vec1:DoubleMatrix,vec2:DoubleMatrix):Double={
    vec1.dot(vec2)/(vec1.norm2()*vec2.norm2())
}
 
val itemId = 567
val itemFactor=model.productFeatures.lookup(itemId).head
val itemVector = new DoubleMatrix(itemFactor)
consineSimilarity(itemVector,itemVector)
//计算各个物品的相似度
val sims = model.productFeatures.map{case(id,factor)=>
    val factorVector=new DoubleMatrix(factor)
    val sim = consineSimilarity(factorVector,itemVector)
    (id,sim)
     }
val K = 10
//找到相似度排名前10的
val sortedSims = sims.top(K)(Ordering.by[(Int,Double),Double]{case(id,similarity)=>similarity})
println(sortedSims.take(10).mkString("\n"))
 
println(titles(itemId))
 
val sortedSims2 = sims.top(K+1)(Ordering.by[(Int,Double),Double]{case(id,similarary)=>similarary})
sortedSims2.slice(1,11).map{case (id,sim)=>(titles(id),sim)}.mkString("\n")
//取出user和product
val temp = rating.map{case Rating(user,product,rate)=>(user,product)}
 
 
//推荐结果效果的评定
//MSE均方差
//对于某一特定用户
val actualRating = moviesForUser.take(1)(0)
val predictedRating = model.predict(789,actualRating.product)
val squaredError = math.pow(actualRating.rating-predictedRating,2.0)
//对于全部用户
val usersProducts = rating.map{case Rating(user,product,rating)=>(user,product)}
val predictions = model.predict(usersProducts).map{case Rating(user,product,rating)=>((user,product),rating)}
val ratingsAndPredictions = rating.map{case Rating(user,product,rating)=>((user,product),rating)}.join(predictions)
val MSE =ratingsAndPredictions.map{case((user,product),(actual,predicted))=>math.pow((actual-predicted),2)}.reduce(_+_)/ratingsAndPredictions.count
//均方根差
val RMSE = math.sqrt(MSE)
 
//直接调用Mllib内置函数计算RMSE和MSE
import org.apache.spark.mllib.evaluation.RegressionMetrics
val  predictedAndTrue = ratingsAndPredictions.map{case((user,product),(predicted,actual))=>(predicted,actual)}
val regressionMetrics = new RegressionMetrics(predictedAndTrue)
println("MSE:"+regressionMetrics.meanSquaredError)
println("RMSE:"+regressionMetrics.rootMeanSquaredError)
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值