基于Spark GraphX的图形数据分析

本文通过两个实例介绍了图数据结构——GraphX的使用,包括顶点、边的创建,以及对图进行操作如查找最短、最长路径。在第一个实例中,展示了如何构建用户网络并打印顶点和边;第二个实例则利用航班数据构建图,计算机场数量、航线数量,并求解最长和最短飞行路线。此外,还涉及了PageRank算法在寻找重要航班中的应用。
摘要由CSDN通过智能技术生成

图(GraphX)是由顶点集合(vertex)及顶点间的关系集合(边edge)组成的一种网状数据结构

通常表示为二元组:Gragh=(V,E) 

可以对事物之间的关系建模

object GraphDemo2 {

def main(args: Array[String]): Unit = {

val spark: SparkSession = SparkSession.builder().master("local[*]").appName("GraphDemo2").getOrCreate()

val sc: SparkContext = spark.sparkContext

val users: RDD[(Long, (String, String))] = sc.makeRDD(

Array(

(3L, ("rxin", "student")),

(7L, ("jgonzal", "postdoc")),

(5L, ("franklin", "professor")),

(2L, ("istoica", "professor"))

)

)

val relations: RDD[Edge[String]] = sc.makeRDD(Array(

Edge(3L, 7L, "Collaborator"),

Edge(5L, 3L, "Advisor"),

Edge(2L, 5L, "Colleague"),

Edge(5L, 7L, "PI")

))

val graph: Graph[(String, String), String] = Graph(users, relations)

graph.triplets.foreach(println)

graph.vertices.foreach(println)//顶点

graph.edges.foreach(println)//边

}

}

((3,(rxin,student)),(7,(jgonzal,postdoc)),Collaborator)

((5,(franklin,professor)),(3,(rxin,student)),Advisor)

((5,(franklin,professor)),(7,(jgonzal,postdoc)),PI)

((2,(istoica,professor)),(5,(franklin,professor)),Colleague)

(2,(istoica,professor))

(7,(jgonzal,postdoc))

(5,(franklin,professor)) 

(3,(rxin,student))

Edge(3,7,Collaborator)

Edge(5,3,Advisor)

Edge(2,5,Colleague)

Edge(5,7,PI)

实列

object FightDemo {

def main(args: Array[String]): Unit = {

val spark: SparkSession = SparkSession.builder().master("local[*]").

appName("FightDemo").getOrCreate()

val sc: SparkContext = spark.sparkContext

val flightRDD: RDD[String] = sc.textFile("in/flight.csv")

//找出所有机场编号和名称VertexId机场编号

val airport: RDD[(VertexId, String)] = flightRDD.map(x => x.split(",")).flatMap(//降维

x => Array((x(5).toLong, x(6)), (x(7).toLong, x(8)))). //(10397,ATL) (12953,LGA)

distinct()

//找出所有航线和航线间的距离

val lines: RDD[Edge[Int]] = flightRDD.map(x =>

x.split(",")).map(x => (x(5).toLong, x(7)

.toLong, x(16).toInt)).distinct().map(

x => Edge(x._1, x._2, x._3)

) //(11618,12441,1874)

val graph: Graph[String, Int] = Graph(airport, lines)

//求机场数量(求顶点个数)

val vertices: VertexId = graph.numVertices

//求航线数量

val edges: VertexId = graph.numEdges

println("机场数量:"+vertices+",航线数量:"+edges)

//求最长飞行路线//sortBy(x=>x.attr,ascending = true)true为升序,false为降序分区内排序

graph.triplets.sortBy(x=>x.attr,ascending = false).take(3)

//.foreach(x=>println(x.srcAttr+"到达"+x.dstAttr+"距离是:"+x.attr))

//求最短飞行路线

graph.triplets.sortBy(x=>x.attr,ascending = true).take(3)

// .foreach(x=>println(x.srcAttr+"到达"+x.dstAttr+"距离是:"+x.attr))

//inDegrees入度,进场航班

val value: RDD[(VertexId, Int)] = graph.inDegrees.sortBy(x => x._2, false)

//ntln("进场航班最多的机场"+value.take(1)(0))

//outDegrees出度,出场航班

val value1: RDD[(VertexId, Int)] = graph.outDegrees.sortBy(x => x._2, false)

//n("出场航班最多的机场"+value1.take(1)(0))

//找出最重要的前三个航班

//.pageRank(0.05).vertices.sortBy(x=>x._2,false).take(3).foreach(println)

val startVertexId: Long = 10397L

val initGraph: Graph[Double,Double] = graph.mapVertices {

case (id, name) => {

if (id == startVertexId) 0.0

else Double.PositiveInfinity

}

}.mapEdges(e=>180+e.attr.toDouble*0.15)

// initGraph.triplets.take(10).foreach(println)

//((10135,Infinity),(10397,0.0),283.8)

//((10135,Infinity),(13930,Infinity),278.1)

val pregel_graph: Graph[Double, Double] = initGraph.pregel(

Double.PositiveInfinity,

Int.MaxValue,

EdgeDirection.Out

)(

(id, dist, new_dist) => {

math.min(dist, new_dist)

},

triple=> {

if (triple.attr + triple.srcAttr < triple.dstAttr)

Iterator((triple.dstId, triple.attr + triple.srcAttr))

else

Iterator.empty

},

(new_dist1, new_dist2) => {

math.min(new_dist1, new_dist2)

}

)

// pregel_graph.vertices.take(10).foreach(println)

pregel_graph.triplets.take(10).foreach(println)

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

s_schen

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值