1.训练数据
数据格式为:用户id,物品id,评分
1,1,5.0
1,2,1.0
1,3,5.0
1,4,1.0
2,1,5.0
2,2,1.0
2,3,5.0
2,4,1.0
3,1,1.0
3,2,5.0
3,3,1.0
3,4,5.0
4,1,1.0
4,2,5.0
4,3,1.0
4,4,5.0
2.实例代码
package com.XX.mllib
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.recommendation.{ALS, Rating}
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
/**
* Created by XX on 2016/12/23.
*/
object ALSTest {
def main(args: Array[String]): Unit = {
//1.构建Spark对象
val conf = new SparkConf().setAppName("ALS").setMaster("local[*]")
val sc = new SparkContext(conf)
Logger.getRootLogger.setLevel(Level.WARN)
//2.读取样本数据
val data = sc.textFile("C:\\Users\\Desktop\\recommendation\\test.data")
val ratings = data.map(_.split(',') match {
case Array(user,item,rate) => Rating(user.toInt,item.toInt,rate.toDouble)
})
//3.建立模型
val rank = 10
val numIterarions = 20
val model = ALS.train(ratings,rank,numIterarions,0.01)
//4.预测结果
val userProducts = ratings.map{
case Rating(user,product,rate) => (user,product)
}
val predictions = model.predict(userProducts).map{
case Rating(user,product,rate) => ((user,product),rate)
}
predictions.collect().foreach(item=>println(item._1._1,item._1._2,item._2))
val ratesAndPreds = ratings.map{
case Rating(user,product,rate) => ((user,product),rate)
}.join(predictions)
val MSE = ratesAndPreds.map{
case ((user,product),(r1,r2)) => val err = (r1 - r2)
err * err
}.mean()
println("Mean Squared Error = " + MSE)
//保存/加载模型
model.save(sc,"myModelPath")
val sameModel = MatrixFactorizationModel.load(sc,"myModelPath")
}
}
3.运行结果
(4,4,4.995180973310869)
(4,1,1.000734490472519)
(4,2,4.995180973310869)
(4,3,1.000734490472519)
(1,4,1.0007290221482479)
(1,1,4.995208326066093)
(1,2,1.0007290221482479)
(1,3,4.995208326066093)
(2,4,1.0007290221482479)
(2,1,4.995208326066093)
(2,2,1.0007290221482479)
(2,3,4.995208326066093)
(3,4,4.995180973310869)
(3,1,1.000734490472519)
(3,2,4.995180973310869)
(3,3,1.000734490472519)
Mean Squared Error = 1.1813526716574833E-5