图(GraphX)是由顶点集合(vertex)及顶点间的关系集合(边edge)组成的一种网状数据结构
通常表示为二元组:Gragh=(V,E)
可以对事物之间的关系建模
object GraphDemo2 {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().master("local[*]").appName("GraphDemo2").getOrCreate()
val sc: SparkContext = spark.sparkContext
val users: RDD[(Long, (String, String))] = sc.makeRDD(
Array(
(3L, ("rxin", "student")),
(7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "professor")),
(2L, ("istoica", "professor"))
)
)
val relations: RDD[Edge[String]] = sc.makeRDD(Array(
Edge(3L, 7L, "Collaborator"),
Edge(5L, 3L, "Advisor"),
Edge(2L, 5L, "Colleague"),
Edge(5L, 7L, "PI")
))
val graph: Graph[(String, String), String] = Graph(users, relations)
graph.triplets.foreach(println)
graph.vertices.foreach(println)//顶点
graph.edges.foreach(println)//边
}
}
((3,(rxin,student)),(7,(jgonzal,postdoc)),Collaborator)
((5,(franklin,professor)),(3,(rxin,student)),Advisor)
((5,(franklin,professor)),(7,(jgonzal,postdoc)),PI)
((2,(istoica,professor)),(5,(franklin,professor)),Colleague)
(2,(istoica,professor))
(7,(jgonzal,postdoc))
(5,(franklin,professor))
(3,(rxin,student))
Edge(3,7,Collaborator)
Edge(5,3,Advisor)
Edge(2,5,Colleague)
Edge(5,7,PI)
实列
object FightDemo {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().master("local[*]").
appName("FightDemo").getOrCreate()
val sc: SparkContext = spark.sparkContext
val flightRDD: RDD[String] = sc.textFile("in/flight.csv")
//找出所有机场编号和名称VertexId机场编号
val airport: RDD[(VertexId, String)] = flightRDD.map(x => x.split(",")).flatMap(//降维
x => Array((x(5).toLong, x(6)), (x(7).toLong, x(8)))). //(10397,ATL) (12953,LGA)
distinct()
//找出所有航线和航线间的距离
val lines: RDD[Edge[Int]] = flightRDD.map(x =>
x.split(",")).map(x => (x(5).toLong, x(7)
.toLong, x(16).toInt)).distinct().map(
x => Edge(x._1, x._2, x._3)
) //(11618,12441,1874)
val graph: Graph[String, Int] = Graph(airport, lines)
//求机场数量(求顶点个数)
val vertices: VertexId = graph.numVertices
//求航线数量
val edges: VertexId = graph.numEdges
println("机场数量:"+vertices+",航线数量:"+edges)
//求最长飞行路线//sortBy(x=>x.attr,ascending = true)true为升序,false为降序分区内排序
graph.triplets.sortBy(x=>x.attr,ascending = false).take(3)
//.foreach(x=>println(x.srcAttr+"到达"+x.dstAttr+"距离是:"+x.attr))
//求最短飞行路线
graph.triplets.sortBy(x=>x.attr,ascending = true).take(3)
// .foreach(x=>println(x.srcAttr+"到达"+x.dstAttr+"距离是:"+x.attr))
//inDegrees入度,进场航班
val value: RDD[(VertexId, Int)] = graph.inDegrees.sortBy(x => x._2, false)
//ntln("进场航班最多的机场"+value.take(1)(0))
//outDegrees出度,出场航班
val value1: RDD[(VertexId, Int)] = graph.outDegrees.sortBy(x => x._2, false)
//n("出场航班最多的机场"+value1.take(1)(0))
//找出最重要的前三个航班
//.pageRank(0.05).vertices.sortBy(x=>x._2,false).take(3).foreach(println)
val startVertexId: Long = 10397L
val initGraph: Graph[Double,Double] = graph.mapVertices {
case (id, name) => {
if (id == startVertexId) 0.0
else Double.PositiveInfinity
}
}.mapEdges(e=>180+e.attr.toDouble*0.15)
// initGraph.triplets.take(10).foreach(println)
//((10135,Infinity),(10397,0.0),283.8)
//((10135,Infinity),(13930,Infinity),278.1)
val pregel_graph: Graph[Double, Double] = initGraph.pregel(
Double.PositiveInfinity,
Int.MaxValue,
EdgeDirection.Out
)(
(id, dist, new_dist) => {
math.min(dist, new_dist)
},
triple=> {
if (triple.attr + triple.srcAttr < triple.dstAttr)
Iterator((triple.dstId, triple.attr + triple.srcAttr))
else
Iterator.empty
},
(new_dist1, new_dist2) => {
math.min(new_dist1, new_dist2)
}
)
// pregel_graph.vertices.take(10).foreach(println)
pregel_graph.triplets.take(10).foreach(println)
}
}