Spark GraphX 中的PageRank算法、pregel函数、航班飞行网图分析

PageRank算法 

PageRank算法原理剖析及Spark实现 - 简书 (jianshu.com)

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Edge, Graph, VertexId, VertexRDD}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

case class User(name:String,age:Int,inDeg:Int,outDeg:Int)

object GraphDemo4 {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession.builder().appName("sparkgraph")
      .master("local[*]")
      .getOrCreate()
    val sc: SparkContext = spark.sparkContext

    val users: RDD[(Long, (String, Int))] = sc.makeRDD( //元组里面不限类型
      Array(
        (1L, ("Alice", 28)),
        (2L, ("Bob", 27)),
        (3L, ("Charlie", 65)),
        (4L, ("David", 42)),
        (5L, ("Ed", 55)),
        (6L, ("Fran", 50)),
        (7L,("KB11",8)),
        (8L,("KB12",7)),
        (9L,("KB13",9))


      )
    )
    val edges: RDD[Edge[Int]] = sc.makeRDD(
      Array(
        Edge(2L, 1L, 7),
        Edge(3L, 2L, 4),
        Edge(4L, 1L, 1),
        Edge(2L, 4L, 2),
        Edge(5L, 2L, 2),
        Edge(5L, 3L, 8),
        Edge(3L, 6L, 3),
        Edge(5L, 6L, 3),

        Edge(7L,8L,12),
        Edge(8L,9L,32),
        Edge(9L,7L,35)


      )
    )
    val graph: Graph[(String, Int), Int] = Graph(users,edges)
    val graph1: Graph[User, Int] = graph.mapVertices{case(id,(name,age))=>{User(name,age,0,0)}}
//    graph.mapVertices{(x,y)=>User(y._1,y._2,0,0)}
    graph1.vertices.collect().foreach(println)

    println("---------------------------------")

    val inDegrees: VertexRDD[Int] = graph.inDegrees
    inDegrees.collect().foreach(println)

    val graph2: Graph[User, Int] = graph1.outerJoinVertices(inDegrees)((id, user, inDeg)=>{User(user.name,user.age,inDeg.getOrElse(0),0)})
    graph2.vertices.foreach(println)

    val outDegrees: VertexRDD[Int] = graph.outDegrees
    println("-------------------------")
    outDegrees.foreach(println)

    val graph3: Graph[User, Int] = graph2.outerJoinVertices(outDegrees)((id, user, outDeg)=>{User(user.name,user.age,user.inDeg,outDeg.getOrElse(0))})

    println("-------------------------")
    graph3.vertices.foreach(x=>println(x._2.name+"喜欢 "+x._2.outDeg+"人,被 "+x._2.inDeg+"人喜欢。"))
//    graph3.vertices.foreach{case (x,y)=>{println(y.name+"喜欢 "+y.outDeg+"人,被")+y.inDeg+"人喜欢")}}
    println("---------------pageRank-----------------------")
    val graph44: Graph[Double, Double] = graph.pageRank(0.0001)
    graph44.vertices.foreach(println)
    /**0,0001
      * (6,0.9969646507526427)
      * (2,0.9969646507526427)
      * (1,1.7924127957615184)
      * (5,0.5451618049228395)
      * (3,0.6996243163176441)
      * (4,0.9688717814927127)
      */
      println("---------connectedComponents------------")

      val graphConn: Graph[VertexId, Int] = graph.connectedComponents()
    graphConn.triplets.collect().foreach(println)




    val emailRDD: RDD[(Long, String)] = sc.parallelize(
      Array(
        (1L, "qq.com"),
        (3L, "163.com"),
        (6L, "souhu.com"),
        (7L,"fox.com")
      )
    )

    val phoneRDD: RDD[(Long, String)] = sc.parallelize(
      Array(
        (1L, "12345678922"),
        (3L, "22232342433"),
        (6L, "23543652577")

      )
    )
    val graphjoin: Graph[(String, Int), Int] = graph.joinVertices(emailRDD)((id, y, z)=>{(y._1+"@"+z,y._2)})
//    graph.vertices.collect().foreach(println)
    println("----------------------------------------------")
//    graphjoin.vertices.collect().foreach(println)
    println("-----------------------------------")

    val graphjoin2: Graph[(String, Int), Int] = graph.outerJoinVertices(emailRDD)((id, y, z)=>{(y._1+"@"+z.getOrElse("tao.com"),y._2)})
//    graph.vertices.collect().foreach(println)

    /**
      * (4,(David@tao.com,42))
      * (1,(Alice@qq.com,28))
      * (5,(Ed@tao.com,55))
      * (6,(Fran@souhu.com,50))
      * (2,(Bob@tao.com,27))
      * (3,(Charlie@163.com,65))
      */
    println("----------------------------------------------")
//    graphjoin2.vertices.collect().foreach(println)
    println("--------------------------------------------------")
    /**
      * (4,(David,42))
      * (1,(Alice,28))
      * (5,(Ed,55))
      * (6,(Fran,50))
      * (2,(Bob,27))
      * (3,(Charlie,65))
      */

    val graphjoin3: Graph[(String, Int), Int] = graph.outerJoinVertices(phoneRDD)((id, y, z)=>{(y._1+":"+z.getOrElse("13611112222"),y._2)})
//    graph.vertices.collect().foreach(println)
    println("----------------------------------------------")
//    graphjoin3.vertices.collect().foreach(println)

    /**
      * (4,(David:13611112222,42))
      * (1,(Alice:12345678922,28))
      * (5,(Ed:13611112222,55))
      * (6,(Fran:23543652577,50))
      * (2,(Bob:13611112222,27))
      * (3,(Charlie:22232342433,65))
      */








  }

}

 pregel函数

Spark GraphX 中的 pregel函数_Bamdli-CSDN博客

import org.apache.spark.SparkContext
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object GraphDemo5 {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession.builder().appName("sparkgraph")
      .master("local[*]")
      .getOrCreate()
    val sc: SparkContext = spark.sparkContext

    val users: RDD[(Long, (String, Int))] = sc.makeRDD( //元组里面不限类型
      Array(
        (1L, ("Alice", 28)),
        (2L, ("Bob", 27)),
        (3L, ("Charlie", 65)),
        (4L, ("David", 42)),
        (5L, ("Ed", 55)),
        (6L, ("Fran", 50))

      )
    )
    val edges: RDD[Edge[Int]] = sc.makeRDD(
      Array(
        Edge(2L, 1L, 7),
        Edge(3L, 2L, 4),
        Edge(4L, 1L, 1),
        Edge(2L, 4L, 2),
        Edge(5L, 2L, 2),
        Edge(5L, 3L, 8),
        Edge(3L, 6L, 3),
        Edge(5L, 6L, 3)

      )
    )

    val graph: Graph[(String, Int), Int] = Graph(users,edges)

    val startVertexId=5L

    val initGraph: Graph[Double, Int] = graph.mapVertices {
      case (vid, (name, age)) => {
        if (vid == 5L) 0.0
        else Double.MaxValue
      }
    }
    initGraph.vertices.foreach(println)

    /**
      * (4,1.7976931348623157E308)
      * (6,1.7976931348623157E308)
      * (1,1.7976931348623157E308)
      * (2,1.7976931348623157E308)
      * (3,1.7976931348623157E308)
      * (5,0.0)
      */
    println("------------------------------------------------")
    val pregelGraph: Graph[Double, PartitionID] = initGraph.pregel(
      Double.PositiveInfinity, //初始值
      10,
      EdgeDirection.Out
    )(
      (vid: VertexId, vd: Double, disMsg: Double) => { //vprog: (VertexId, VD, A) => VD,
        val min: Double = math.min(vd, disMsg)
        println(s"顶点$vid, 属性$vd 收到的消息$disMsg 属性值与收到的消息比较后结果取最小值 $min")
        min
      },
      (edgeTriplet: EdgeTriplet[Double, PartitionID]) => {
        println(s"顶点${edgeTriplet.srcId} 给${edgeTriplet.dstId} 发送消息:${edgeTriplet.srcAttr} " +
          s"与${edgeTriplet.attr} 相加结果: ${edgeTriplet.srcAttr + edgeTriplet.attr}")
        if (edgeTriplet.srcAttr + edgeTriplet.attr < edgeTriplet.dstAttr)
          Iterator[(VertexId, Double)]((edgeTriplet.dstId, edgeTriplet.srcAttr + edgeTriplet.attr))
        else
          Iterator.empty

      },
      (msg1: Double, msg2: Double) => {
        math.min(msg1, msg2)
      }

    )
    pregelGraph.vertices.foreach(println)

    /**
      * 顶点4, 属性1.7976931348623157E308 收到的消息Infinity 属性值与收到的消息比较后结果取最小值 1.7976931348623157E308
      * 顶点3, 属性1.7976931348623157E308 收到的消息Infinity 属性值与收到的消息比较后结果取最小值 1.7976931348623157E308
      * 顶点1, 属性1.7976931348623157E308 收到的消息Infinity 属性值与收到的消息比较后结果取最小值 1.7976931348623157E308
      * 顶点5, 属性0.0 收到的消息Infinity 属性值与收到的消息比较后结果取最小值 0.0
      * 顶点6, 属性1.7976931348623157E308 收到的消息Infinity 属性值与收到的消息比较后结果取最小值 1.7976931348623157E308
      * 顶点2, 属性1.7976931348623157E308 收到的消息Infinity 属性值与收到的消息比较后结果取最小值 1.7976931348623157E308
      * 顶点5 给2 发送消息:0.0 与2 相加结果: 2.0
      * 顶点5 给3 发送消息:0.0 与8 相加结果: 8.0
      * 顶点2 给4 发送消息:1.7976931348623157E308 与2 相加结果: 1.7976931348623157E308
      * 顶点4 给1 发送消息:1.7976931348623157E308 与1 相加结果: 1.7976931348623157E308
      * 顶点2 给1 发送消息:1.7976931348623157E308 与7 相加结果: 1.7976931348623157E308
      * 顶点3 给2 发送消息:1.7976931348623157E308 与4 相加结果: 1.7976931348623157E308
      * 顶点3 给6 发送消息:1.7976931348623157E308 与3 相加结果: 1.7976931348623157E308
      * 顶点5 给6 发送消息:0.0 与3 相加结果: 3.0
      * 顶点6, 属性1.7976931348623157E308 收到的消息3.0 属性值与收到的消息比较后结果取最小值 3.0
      * 顶点2, 属性1.7976931348623157E308 收到的消息2.0 属性值与收到的消息比较后结果取最小值 2.0
      * 顶点3, 属性1.7976931348623157E308 收到的消息8.0 属性值与收到的消息比较后结果取最小值 8.0
      * 顶点2 给4 发送消息:2.0 与2 相加结果: 4.0
      * 顶点3 给6 发送消息:8.0 与3 相加结果: 11.0
      * 顶点2 给1 发送消息:2.0 与7 相加结果: 9.0
      * 顶点3 给2 发送消息:8.0 与4 相加结果: 12.0
      * 顶点4, 属性1.7976931348623157E308 收到的消息4.0 属性值与收到的消息比较后结果取最小值 4.0
      * 顶点1, 属性1.7976931348623157E308 收到的消息9.0 属性值与收到的消息比较后结果取最小值 9.0
      * 顶点4 给1 发送消息:4.0 与1 相加结果: 5.0
      * 顶点1, 属性9.0 收到的消息5.0 属性值与收到的消息比较后结果取最小值 5.0
      * (1,5.0)
      * (5,0.0)
      * (4,4.0)
      * (6,3.0)
      * (2,2.0)
      * (3,8.0)
      */







  }

}

航班飞行网图分析

import org.apache.spark.SparkContext
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object FlightDemo {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession.builder().appName("flightdemo")
      .master("local[*]")
      .getOrCreate()
    val sc: SparkContext = spark.sparkContext



    val flightRDD: RDD[String] = sc.textFile("in/flight.csv")
//    flightRDD.collect().foreach(println)
    //找出所有机场编号 和  机场名称
    val airPort: RDD[(VertexId, String)] = flightRDD.map(x => x.split(","))
      .flatMap(x => Array((x(5).toLong, x(6)), (x(7).toLong, x(8))))
      .distinct()  //(12953,LGA)
    //找出所有航线和航线间的距离
    val lines: RDD[Edge[Int]] = flightRDD.map(x => x.split(","))
      .map(x => (x(5).toLong, x(7).toLong, x(16).toInt))
      .distinct()
      .map(x => Edge(x._1, x._2, x._3))

//    lines.collect().foreach(println)


    val graph: Graph[String, Int] = Graph(airPort,lines)
//    graph.triplets.collect().foreach(println)

    //求机场数量(求顶点个数)
    val vertices: VertexId = graph.numVertices

    //求航线数量(求边个数)
    val edges: VertexId = graph.numEdges
    println("机场数量:"+ vertices + "航线数量:"+ edges)

    //求最长飞行线路(最长的边)
    graph.triplets.sortBy(x=>x.attr,false).take(3)
      .foreach(x=>println(x.srcAttr+"到达"+x.dstAttr+"距离是"+x.attr))

    //求最短飞行线路(最短的边)
    graph.triplets.sortBy(x=>x.attr, true).take(3)
      .foreach(x=>println(x.srcAttr+"到达"+x.dstAttr+"距离是"+x.attr))

    //入场航班最多的机场
    val value: RDD[(VertexId, Int)] = graph.inDegrees.sortBy(x=>x._2,false)
    val tuple: (VertexId, Int) = value.take(1)(0)
    println("进场航班最多的机场" +tuple)

    //出厂航班最多的机场
    val value2: RDD[(VertexId, Int)] = graph.outDegrees.sortBy(x=>x._2,false)
    val tuple2: (VertexId, Int) = value2.take(1)(0)
    println("出场航班最多的机场" +tuple2)

    //找出最重要的前三个机场
    graph.pageRank(0.05).vertices.sortBy(x=>x._2,false).take(3).foreach(println)

    println("--------------------------------------")


    //找出10397机场到其它机场的最便宜的航线
    val startAirPort=10397L

    val init_graph: Graph[Double, Double] = graph.mapVertices((id, name) => {
      if (id == startAirPort) 0.0
      else Double.MaxValue
    }).mapEdges(e => 180 + e.attr.toDouble * 0.15)
//    init_graph.triplets.take(3).foreach(println)

    val pregel_graph: Graph[Double, Double] = init_graph.pregel(
      Double.MaxValue,
      Int.MaxValue,
      EdgeDirection.Out
    )(
      (id, dist, new_dist) => {
        math.min(dist, new_dist)
      },
      (triple) => {
        if (triple.attr + triple.srcAttr < triple.dstAttr)
          Iterator((triple.dstId, triple.attr + triple.srcAttr)) //合并属性
        else
          Iterator.empty
      },
      (new_dist1, new_dist2) => {
        math.min(new_dist1, new_dist2)
      }
    )
    pregel_graph.vertices.filter(x=>x._1==12892L).collect().foreach(println)


    /**
      * 机场数量:301航线数量:4088
      * HNL到达JFK距离是4983
      * JFK到达HNL距离是4983
      * HNL到达EWR距离是4963
      * PSG到达WRG距离是31
      * WRG到达PSG距离是31
      * ACV到达CEC距离是56
      * 进场航班最多的机场(10397,152)
      * 出场航班最多的机场(10397,153)
      * (10397,11.060247708032241)
      * (13930,10.805558753161533)
      * (11298,10.652656481033038)
      * --------------------------------------
      * (12892,472.05)
      */








  }

}

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值