关于查找共同好友这部分内容,原理部分如
MapReduce之共同好友
所示,下面是改为Spark程序之后的代码
输入数据
100,200 300 400 500 600
200,100 300 400
300,100 200 400 500
400,100 200 300
500,100 300
600,100
Scala程序
package CommonFriends
import org.apache.spark.{SparkConf, SparkContext}
object CommonFriends {
def main(args: Array[String]): Unit ={
val sparkConf=new SparkConf().setAppName("FindCommonFriends").setMaster("local")
val sc=new SparkContext(sparkConf)
val input="input/CommonFriends.txt"
val output="output"
val records=sc.textFile(input)
//通过映射器生成如[(person1,person2),(friends)]的键值对,并对键中的数据按照大小进行排序
val pairs=records.flatMap(s=>{
val tokens=s.split(",")
val person=tokens(0).toLong
val friends=tokens(1).split("\\s+").map(_.toLong).toList
val result=for{
i <- 0 until friends.size
friend=friends(i)
}yield{
if(person<friend)
((person,friend),friends)
else
((friend,person),friends)
}
result
})
//按照键进行归约
val grouped=pairs.groupByKey()
//通过对好友列表中出现的值进行修改来查找好友列表的交集
val commonFriends = grouped.mapValues(iter => {
val friendCount = for {
list <- iter
if !list.isEmpty
friend <- list
} yield ((friend, 1))
friendCount.groupBy(_._1).mapValues(_.unzip._2.sum).filter(_._2 > 1).map(_._1)
})
val formatedResult=commonFriends.map(
f => s"(${f._1._1}, ${f._1._2})\t${f._2.mkString("[", ", ", "]")}"
)
commonFriends.saveAsTextFile(output)
formatedResult.foreach(println)
sc.stop()
}
}