设计思路:
- 聚合节点的所有邻居
- 求边上节点的共同邻居,并通过共同邻居生成极大团的ID,发送至源节点和目标节点
- 获取收到极大团ID的节点
代码如下:
package mu.atlas.graph.community
import mu.atlas.graph.utils.BaseTool._
import org.apache.spark.graphx.{EdgeContext, Graph, VertexId}
import scala.reflect.ClassTag
/**
* 挖掘极大团
* Created by zhoujiamu on 2019/8/28.
*/
object Clique {
def run[VD, ED: ClassTag](graph: Graph[VD, ED]): Graph[Set[VertexId], ED] = {
val initGraph = graph.mapVertices{case(vid, _) => Set(vid)}
val vertexRDD = initGraph.aggregateMessages[Set[VertexId]](
triplet => {
triplet.sendToSrc(triplet.dstAttr)
triplet.sendToDst(triplet.srcAttr)
},
(a, b) => a++b
)
val graphWithNeigs = initGraph.joinVertices(vertexRDD)((vid, vdata, msg) => vdata++msg)
val vertexWithLabel = graphWithNeigs.aggregateMessages[Set[VertexId]](
triplet => {
val intersect = triplet.srcAttr.intersect(triplet.dstAttr).toSeq.sorted
val msg = md5ToLong(md5(intersect.mkString("")))
if (intersect.length >= 3){
triplet.sendToSrc(Set(msg))
triplet.sendToDst(Set(msg))
}
},
(a, b) => a++b
)
graphWithNeigs.outerJoinVertices(vertexWithLabel)((vid, vdata, msg) => msg.getOrElse(Set.empty[VertexId]))
.subgraph(vpred = (vid, vdata) => vdata.nonEmpty)
}
}
测试代码:
package mu.atlas.graph.community
import org.junit._
import Assert._
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.graphx.Graph
/**
* Created by zhoujiamu on 2019/8/28.
*/
@Test
class CliqueTest {
@Test
def testRun() = {
Logger.getLogger("org").setLevel(Level.WARN)
val conf = new SparkConf().setMaster("local").setAppName("Kcore")
val sc = new SparkContext(conf)
val rdd = sc.makeRDD(Array(
1L -> 2L,
1L -> 4L,
2L -> 3L,
2L -> 4L,
2L -> 5L,
2L -> 6L,
3L -> 5L,
4L -> 7L
))
val graph = Graph.fromEdgeTuples(rdd, null)
val clique = Clique.run(graph)
println("-"*30)
clique.vertices.foreach(println)
val res = clique.vertices.flatMap{case(vid, set) => set.map(cid => cid -> Seq(vid))}
.reduceByKey(_++_).map(_._1).count()
assertEquals(2, res)
}
}
测试结果:
(4,Set(-3963192249907337487))
(1,Set(-3963192249907337487))
(3,Set(6304741602190573741))
(5,Set(6304741602190573741))
(2,Set(-3963192249907337487, 6304741602190573741))
2、3、5是一个极大团,1、2、4为另一个极大团,这两个极大团有公共的节点2