spark mllib机器学习之六 ALS

协同过滤采用音乐推荐的数据

http://www.iro.umontreal.ca/~lisa/datasets/profiledata_06-May-2005.tar.gz  


package com.agm.practice



import java.io.File
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.log4j.{ Level, Logger }


object adviceMusic {
  def main(args: Array[String]) {
    Logger.getLogger("org").setLevel(Level.ERROR)
    val conf = new SparkConf().setAppName("Simple Application") //给Application命名    
    conf.setMaster("local[2]")
    val sc = new SparkContext(conf)
    val rawArtistData = sc.textFile("D://Spark//文档//profiledata_06-May-2005//artist_data.txt")
    val artistByID = rawArtistData.flatMap { line =>
      val (id, name) = line.span(_ != '\t')
      if (name.isEmpty) {
        None
      } else {
        try {
          Some((id.toInt, name.trim))
        } catch {
          case e: NumberFormatException => None
        }
      }
    }


    val rawArtistAlias = sc.textFile("D://Spark//文档//profiledata_06-May-2005//artist_alias.txt")
    val artistAlias = rawArtistAlias.flatMap { line =>
      val tokens = line.split('\t')
      if (tokens(0).isEmpty) {
        None
      } else {
        Some((tokens(0).toInt, tokens(1).toInt))
      }
    }.collectAsMap()


    println(artistByID.lookup(6803336).head)
    println(artistByID.lookup(1000010).head)


    val rawUserArtistData = sc.textFile("D://Spark//文档//profiledata_06-May-2005//user_artist_data.txt")
    import org.apache.spark.mllib.recommendation._


    val bArtistAlias = sc.broadcast(artistAlias)
    val trainData = rawUserArtistData.map { line =>
      val Array(userID, artistID, count) = line.split(' ').map(_.toInt)
      val finalArtistID =
        bArtistAlias.value.getOrElse(artistID, artistID)
      Rating(userID, finalArtistID, count)
    }.cache()


    val model = ALS.trainImplicit(trainData, 10, 5, 0.01, 1.0)


    val rawArtistsForUser = rawUserArtistData.map(_.split(' ')).
      filter { case Array(user, _, _) => user.toInt == 2093760 }
    val existingProducts =
      rawArtistsForUser.map { case Array(_, artist, _) => artist.toInt }.
        collect().toSet
    artistByID.filter {
      case (id, name) =>
        existingProducts.contains(id)
    }.values.collect().foreach(println)


    val recommendations = model.recommendProducts(2093760, 5)
    recommendations.foreach(println)
  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值