Spark MLlib ALS交替最小二乘法 实例

31 篇文章 0 订阅

1.训练数据

数据格式为:用户id,物品id,评分

1,1,5.0
1,2,1.0
1,3,5.0
1,4,1.0
2,1,5.0
2,2,1.0
2,3,5.0
2,4,1.0
3,1,1.0
3,2,5.0
3,3,1.0
3,4,5.0
4,1,1.0
4,2,5.0
4,3,1.0
4,4,5.0


2.实例代码

package com.XX.mllib

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.recommendation.{ALS, Rating}
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel


/**
  * Created by XX on 2016/12/23.
  */
object ALSTest {

  def main(args: Array[String]): Unit = {
    //1.构建Spark对象
    val conf = new SparkConf().setAppName("ALS").setMaster("local[*]")
    val sc = new SparkContext(conf)
    Logger.getRootLogger.setLevel(Level.WARN)

    //2.读取样本数据
    val data = sc.textFile("C:\\Users\\Desktop\\recommendation\\test.data")
    val ratings = data.map(_.split(',') match {
      case Array(user,item,rate) => Rating(user.toInt,item.toInt,rate.toDouble)
    })

    //3.建立模型
    val rank = 10
    val numIterarions = 20
    val model = ALS.train(ratings,rank,numIterarions,0.01)

    //4.预测结果
    val userProducts = ratings.map{
      case Rating(user,product,rate) => (user,product)
    }
    val predictions = model.predict(userProducts).map{
      case Rating(user,product,rate) => ((user,product),rate)
    }

    predictions.collect().foreach(item=>println(item._1._1,item._1._2,item._2))

    val ratesAndPreds = ratings.map{
      case Rating(user,product,rate) => ((user,product),rate)
    }.join(predictions)
    val MSE = ratesAndPreds.map{
      case ((user,product),(r1,r2)) => val err = (r1 - r2)
        err * err
    }.mean()
    println("Mean Squared Error = " + MSE)

    //保存/加载模型
    model.save(sc,"myModelPath")
    val sameModel = MatrixFactorizationModel.load(sc,"myModelPath")

  }

}

3.运行结果

(4,4,4.995180973310869)
(4,1,1.000734490472519)
(4,2,4.995180973310869)
(4,3,1.000734490472519)
(1,4,1.0007290221482479)
(1,1,4.995208326066093)
(1,2,1.0007290221482479)
(1,3,4.995208326066093)
(2,4,1.0007290221482479)
(2,1,4.995208326066093)
(2,2,1.0007290221482479)
(2,3,4.995208326066093)
(3,4,4.995180973310869)
(3,1,1.000734490472519)
(3,2,4.995180973310869)
(3,3,1.000734490472519)
Mean Squared Error = 1.1813526716574833E-5

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值