计算过程:简单,只不顾方式三的效率不知道提升多少
可能会再贴出方式四
author:castomere——ztw
三种方式:
方式一:join方式
package aggregate import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId, VertexRDD} /** * date: 4.13 * 图的聚合操作 * 计算节点 与 root节点的边距 * 通过 g2.vertices.join(g.vertices) 新图+老图的节点信息 * sum(新图边距 - 老图边距) == 0 判作结束标志 */ object Demo2 { def sendMsg(ec:EdgeContext[Int,String,Int]):Unit = { ec.sendToDst( ec.srcAttr +1) } def mergeMsg(a: Int , b:Int) :Int = { math.max(a,b) } def sumEdgeCount(g:Graph[Int,String]):Graph[Int,String] = { val verts: VertexRDD[Int] = g.aggregateMessages[Int](sendMsg, mergeMsg) val g2 = Graph(verts ,g.edges) println("dfs") verts.collect.foreach(println(_)) println() g2.vertices.join(g.vertices).collect.foreach(println(_)) // val value: RDD[(VertexId, (Int, Int))] = g2.vertices.join(g.vertices) val check = g2.vertices.join(g.vertices) .map(x => x._2._1 - x._2._2) .reduce(_+_) if(check > 0) sumEdgeCount(g2) else g } def main(args: Array[String]): Unit