关于如何使用Pregel(分布式图计算框架),这里我们可以用一个小例题来说明。
需求说明
求从0到任意点的最短路径(SSSP)
实现思路
初始化 Vertex 的 Message 为最大值
将源点(0)的 Message 设为 0
每步每个节点将自己目前的 Message 加上边的权值发送到相邻节点,每个节点聚合出自身所有消息的最小值
当某一步当中一个节点Message 值无变化,该节点停止迭代
具体实现
val spark: SparkSession = SparkSession.builder()
.appName(this.getClass.getName)
.master("local[4]")
.getOrCreate()
val sc: SparkContext = spark.sparkContext
//求从0到任意点的最短路径
val vertices: RDD[(VertexId, Double)] = sc.makeRDD(Seq((0L, 1.0), (1L, 1.0), (2L, 1.0), (3L, 1.0)))
val edges = sc.makeRDD(Seq(Edge(0L, 1L, 100), Edge(0L, 2L, 30), Edge(0L, 4L, 10), Edge(2L, 1L, 60), Edge(2L, 3L, 60), Edge(3L, 1L, 10), Edge(4L, 3L, 50)))
val graph = Graph(vertices, edges)
val sourceId: VertexId = 0L
val initGraph = graph.mapVertices((id, _) => if (id == sourceId) 0 else Double.PositiveInfinity)
val sssp = initGraph.pregel(Double.PositiveInfinity)(
//接收数据处理函数
(id, dist, newDist) => math.min(dist, newDist),
triplet => {
//判断是否继续发送下一个顶点
if (triplet.srcAttr + triplet.attr < triplet.dstAttr)
Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
else
Iterator.empty
},
(dist1, dist2) => math.min(dist1, dist2) //合并消息
)
println(sssp.vertices.collect().mkString("\n"))
}
//-----------------------------------------第二种-------------------------------------------------
val sourceId: VertexId = 0L
val initGraph = graph.mapVertices((id, _) =>
if (id == initialMsg) 0.0 else Double.PositiveInfinity)
def vprog(id:VertexId,dist:Double,newDist:Double)={
math.min(dist,newDist)
}
def sendMsg(triplet: EdgeTriplet[Double,Int])={
if (triplet.srcAttr + triplet.attr < triplet.dstAttr)
Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
else
Iterator.empty
}
def mergeMsg(a:Double,b:Double)=math.min(a,b)
initGraph.pregel(Double.PositiveInfinity)(vprog,sendMsg,mergeMsg)
.vertices.repartition(1).foreach(x=>println(x.toString()))