:scala版算法实现
package com.bbw5.dataalgorithms.spark
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.ArrayBuffer
/**
* friends.txt
* 1 2,3,4,5,6,7,8
* 2 1,3,4,5,7
* 3 1,2
* 4 1,2,6
* 5 1,2
* 6 1,4
* 7 1,2
* 8 1
* author:baibaiw5
*/
object SparkRecommendFriend {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("SparkRecommendFriend")
val sc = new SparkContext(sparkConf)
val textFiles = sc.textFile("G:/temp/data/friends.txt");
val friends = textFiles.map(_.split(" ")).map { a => (a(0), a(1).split(",").sorted.toList) }.flatMap {
case (user, items) =>
val buf = new ArrayBuffer[(String, (String, String))]
buf ++= items.map { f => (user, (f, "-1")) } //they are friends already
buf ++= items.combinations(2).flatMap { pair =>
Array((pair(0), (pair(1), user)), (pair(1), (pair(0), user)))
} //a and b has common friend user
buf.toList
}.groupByKey()
friends.collect().foreach(println)
val friends2 = friends.mapValues { iter =>
//filter: a and b is already friends
iter.groupBy(t => t._1).mapValues(t => t.map(_._2)).filter(p => p._2.filter { _ == "-1" }.size == 0)
}.mapValues(f=>f.toList.sorted)
friends2.collect().foreach(println)
}
}