【Data Algorithms_Recipes for Scaling up with Hadoop and Spark】Chapter 9 Recommendation People

:scala版算法实现

package com.bbw5.dataalgorithms.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.ArrayBuffer

/**
 * friends.txt
 * 1 2,3,4,5,6,7,8
 * 2 1,3,4,5,7
 * 3 1,2
 * 4 1,2,6
 * 5 1,2
 * 6 1,4
 * 7 1,2
 * 8 1
 * author:baibaiw5
 */
object SparkRecommendFriend {

  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("SparkRecommendFriend")
    val sc = new SparkContext(sparkConf)

    val textFiles = sc.textFile("G:/temp/data/friends.txt");
    val friends = textFiles.map(_.split(" ")).map { a => (a(0), a(1).split(",").sorted.toList) }.flatMap {
      case (user, items) =>
        val buf = new ArrayBuffer[(String, (String, String))]
        buf ++= items.map { f => (user, (f, "-1")) } //they are friends already
        buf ++= items.combinations(2).flatMap { pair => 
          Array((pair(0), (pair(1), user)), (pair(1), (pair(0), user))) 
        } //a and b has common friend user
        buf.toList
    }.groupByKey()

    friends.collect().foreach(println)

    val friends2 = friends.mapValues { iter =>
      //filter: a and b is already friends 
      iter.groupBy(t => t._1).mapValues(t => t.map(_._2)).filter(p => p._2.filter { _ == "-1" }.size == 0)
    }.mapValues(f=>f.toList.sorted)

    friends2.collect().foreach(println)
  }
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值