sparkGraphx-航班飞行网图分析

最新推荐文章于 2021-12-21 22:49:13 发布

qzc_root

最新推荐文章于 2021-12-21 22:49:13 发布

阅读量137

点赞数

本文链接：https://blog.csdn.net/qzc_root/article/details/117041414

版权

文章目录

数据格式
构建航班飞行网图

数据格式

文件格式为CSV，字段之间分隔符为“,”
依次为：#日、周#、航空公司、飞机注册号、航班号、起飞机场编号、起飞机场、到达机场编号、到达机场、预计起飞时间（时分）、起飞时间、起飞延迟（分钟）、到达预计时间、到达时间、到达延迟（分钟）、预计飞行时间、飞行距离

构建航班飞行网图

装载CSV为RDD，每个机场作为顶点。关键字段：起飞机场编号、起飞机场、到达机场编号、到达机场、飞行距离
初始化顶点集airports:RDD[(VertexId,String)]，顶点属性为机场名称
初始化边集lines:RDD[Edge]，边属性为飞行距离

package flight

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
object jFlightDemo {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("etldemo")
    val spark: SparkSession = SparkSession.builder().master("local[*]")
      .appName("demo")
      .config(conf)
      .getOrCreate()
    val sc: SparkContext = spark.sparkContext
    
    val flightRDD: RDD[String] = sc.textFile("in/flight.csv")
//    flightRDD.collect.foreach(println)
    //数组 取出 起飞机场编号-起飞机场	 到达机场编号-到达机场
    val airPort: RDD[(VertexId, String)] = flightRDD.map(x => x.split(","))
      .flatMap(x => Array((x(5).toLong, x(6)), (x(7).toLong, x(8))))
      .distinct()
//    airPort.collect().foreach(println)

    val lines: RDD[Edge[PartitionID]] = flightRDD.map(x => x.split(",")) //起飞机场编号 降落机场编号 飞行距离
      .map(x => (x(5).toLong, x(7).toLong, x(16).toInt))
      .distinct()
      .map(x => Edge(x._1, x._2, x._3))
    //构建graph对象
    val graph = Graph(airPort,lines)
//    graph.triplets.collect().foreach(println)

   //机场数量
    val numvertices: VertexId = graph.numVertices
    //航线数量
    val numedges: VertexId = graph.numEdges

    //计算最长的航线
    val rdd1: RDD[EdgeTriplet[String, PartitionID]] = graph.triplets.sortBy(x=>x.attr,false)
    val strings: Array[String] = rdd1.map(triplet => triplet.srcAttr+" " + triplet.dstAttr +" "+ triplet.attr)
      .take(3)
//    for (str<- strings){
//      println(str)
//    }

    //找出最繁忙的机场
    val indegrees: VertexRDD[PartitionID] = graph.inDegrees//降落飞机的数量
    val busyAirPort: (VertexId, PartitionID) = indegrees.sortBy(x => x._2, false)
      .take(1)(0)
    println(busyAirPort)
    val outdrgrees: VertexRDD[PartitionID] = graph.outDegrees
    val busyAirPort2: (VertexId, PartitionID) = outdrgrees.sortBy(x => x._2, false)
      .take(1)(0)
    println(busyAirPort2)

//    val tuples: Array[(VertexId, PartitionID)] = outdrgrees.sortBy(x => x._2, false)
//      .take(1)
//    for(t<- tuples){
//      println(t)
//    }
    //最重要的机场
    val vertices: VertexRDD[Double] = graph.pageRank(0.05).vertices
//    vertices.collect.foreach(println)
    vertices.sortBy(x=>x._2,false).collect.foreach(println)

    //找出最便宜的飞行航线
    //从初始指定的源点到任意一点的最短距离
    //price = 180.0 + distance * 0.15
    val value: RDD[(VertexId, String)] = airPort.sample(false,1.0/airPort.count(),1)
    val source_id =10268.toLong
    val srcAirportId = value.first()._1 //13296
    val srcAirportName = value.first()._2
    println(source_id,srcAirportId,srcAirportName)

    val init_graph: Graph[Double, Double] = graph.mapVertices((id, value) => {
      if (id == srcAirportId) 0
      else Double.PositiveInfinity
    }).mapEdges(e => e.attr.toDouble * 0.15 + 180.0)

    val pregel_graph: Graph[Double, Double] = init_graph.pregel(
      Double.PositiveInfinity,//初始值
      Int.MaxValue,//最大迭代次数
      EdgeDirection.Out//发送方向
    )(
    //3、vprog	节点调用该消息将聚合后的数据和本节点进行属性的合并
      (id, dist, new_dist) => math.min(dist, new_dist),
      //2、sendMsg	激活态的节点调用该方法发送消息
      triple => {
        if (triple.attr + triple.srcAttr < triple.dstAttr)
          Iterator((triple.dstId, triple.attr + triple.srcAttr))
        else
          Iterator.empty
      },
      // 1、mergeMsg如果一个节点接收到多条消息，先用mergeMsg 来将多条消息聚合成为一条消息，如果节点只收到一条消息，则不调用该函数
      (dist, new_dist) => math.min(dist, new_dist)
    )
//    pregel_graph.triplets.collect.foreach(println)

   //13296  到 15919最便宜的费用是558.75
    //13296  到 11618最便宜的费用是211.35
    //15919 到11618 的最便宜的费用是 349.65
    //((15919,558.75),(11618,211.35),349.65)

    val tuples: Array[(VertexId, Double)] = pregel_graph.vertices.sortBy(x => x._2)
      .take(3)
    println(tuples.toList)
  }
}