Spark实现之查找共同好友

最新推荐文章于 2022-06-24 16:07:12 发布

古巴与八股

最新推荐文章于 2022-06-24 16:07:12 发布

阅读量3k

点赞数 2

分类专栏： Spark

本文链接：https://blog.csdn.net/xuedingkai/article/details/78998123

版权

Spark 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

输入：邻接表

100, 200 300 400 500 600
200, 100 300 400
300, 100 200 400 500
400, 100 200 300
500, 100 300
600, 100

第一列表示用户，后面的表示用户的好友。

需求：查找两两用户的共同好友。

思路：1、key为两两用户，value为其中一个用户的所有好友

2、求两个用户所有好友的交集

步骤：1、map：取每一行，组合user和其任一好友为key（key中的两个字段按字典序排列），user的所有好友为value

2、reduce：求两个用户之间好友的交集

package dabook

import org.apache.spark.{SparkConf, SparkContext}

object CommFriend {


  def intersection(set1: Set[String], set2: Set[String]):Set[String]={
    if(set1 == null || set2 == null){
      return null;
    }
    var result = Set[String]()
    var small = Set[String]()
    var big = Set[String]()
    if(set1.size < set2.size){
      small = set1
      big = set2
    }
    else {
      small = set2
      big = set1
    }

//    println(small.mkString(","))

    for(s <- small){
      if(big.contains(s)){
        result += s
      }
    }

    return result
  }

  def mymap(x:String): Map[Tuple2[String, String], String] ={
    val arr = x.split(",")
    if(arr.length != 2){
      return null
    }
    val host = arr(0).trim
    val friends = arr(1).trim
    val arr_friends = friends.split(" ")
    var result = Map[Tuple2[String, String], String]()
    for(friend <- arr_friends){
      if(host.compareTo(friend) < 0){
        result += (Tuple2(host, friend)->friends)
      }
      else {
        result += (Tuple2(friend, host)->friends)
      }
    }
    return result
  }

  def commfri(x:Iterable[String]): Set[String] ={
    if(x.size != 2){
      return null
    }
    val arr = x.toArray
    var set1 = Set[String]()
    var set2 = Set[String]()
    val sz1 = arr(0).split(" ")
    val sz2 = arr(1).split(" ")
    for(s <- sz1){
      set1 += s
    }
    for(s <- sz2){
      set2 += s
    }
    intersection(set1, set2)
  }

  def main(args: Array[String]): Unit = {
    //val sparkContext = SparkSession.builder().master("local[2]").appName("common friend").getOrCreate()
    //val input = sparkContext.read.textFile("hdfs://127.0.0.1:9000/user/root/dabook/commfriend/cofriend").rdd

    val sparkConf = new SparkConf().setAppName("commfri").setMaster("local[2]")
    val sc = new SparkContext(sparkConf)
    val input1 = sc.textFile("/home/xdk/file/dabook/cofriend")

    val comm = input1.flatMap(x=>mymap(x)).groupByKey().map(x=>(x._1, commfri(x._2)))
    comm.foreach(println)
  }

  def test(): Unit ={
    var set1 = Set("1", "2", "s", "s", "d", "3")
    var set2 = Set("3", "2", "1");

    var res = intersection(set2, set1)

    for(s <- res){
      println(s)
    }
  }

}

输出：

((200,300),Set(100, 400))
((100,600),Set())
((300,400),Set(100, 200))
((100,500),Set(300))
((200,400),Set(100, 300))
((100,200),Set(300, 400))
((100,300),Set(200, 400, 500))
((100,400),Set(200, 300))
((300,500),Set(100))