【Data Algorithms_Recipes for Scaling up with Hadoop and Spark】Chapter 9 Recommendation People_data algorithmsrecipes for scaling up with hadoop -CSDN博客

：scala版算法实现

package com.bbw5.dataalgorithms.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.ArrayBuffer

/**
 * friends.txt
 * 1 2,3,4,5,6,7,8
 * 2 1,3,4,5,7
 * 3 1,2
 * 4 1,2,6
 * 5 1,2
 * 6 1,4
 * 7 1,2
 * 8 1
 * author:baibaiw5
 */
object SparkRecommendFriend {

  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("SparkRecommendFriend")
    val sc = new SparkContext(sparkConf)

    val textFiles = sc.textFile("G:/temp/data/friends.txt");
    val friends = textFiles.map(_.split(" ")).map { a => (a(0), a(1).split(",").sorted.toList) }.flatMap {
      case (user, items) =>
        val buf = new ArrayBuffer[(String, (String, String))]
        buf ++= items.map { f => (user, (f, "-1")) } //they are friends already
        buf ++= items.combinations(2).flatMap { pair => 
          Array((pair(0), (pair(1), user)), (pair(1), (pair(0), user))) 
        } //a and b has common friend user
        buf.toList
    }.groupByKey()

    friends.collect().foreach(println)

    val friends2 = friends.mapValues { iter =>
      //filter: a and b is already friends 
      iter.groupBy(t => t._1).mapValues(t => t.map(_._2)).filter(p => p._2.filter { _ == "-1" }.size == 0)
    }.mapValues(f=>f.toList.sorted)

    friends2.collect().foreach(println)
  }
}