平均连通性算法
算法描述:
具有K度节点的平均最近邻度,用作考察度值相近的节点是否倾向于相互连接,在社交网络中,节点更倾向于于度数相近的节点连接;
计算思路
- 计算图中每个顶点的sourceDegree,targetDegree(根据指定的source|target的方向);
- 根据指定的sourceDirection方向,计算每个节点的领点集合;
- 根据是否指定待计算的节点列,从sourceDegree中过滤指定的节点:sourceNode;
- 对sourceNodes每个节点进行转换;
4.1 计算每个节点的邻点度数之和;
4.2 计算后数据格式为:(节点数据,(节点度数,节点的邻点的度数之和)) - 聚合计算:计算相同度数的节点的度数之和,相同度数的节点的邻点的度数之和;
- 计算平均最近邻度;
GraphX实现逻辑
def averageDegreeConnectivity[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED],
sourceDirection: EdgeDirection = EdgeDirection.Either,
targetDirection: EdgeDirection = EdgeDirection.Either,
nodeList: List[VertexId]): RDD[(Long, Double)] = {
val graph = g.cache()
//根据边的方向,计算该点对应的度数,出度|入度|总度数
val sourceDegree: VertexRDD[Int] = degreesRDD(graph, sourceDirection)
val targetDegree: Array[(VertexId, Int)] = degreesRDD(graph, targetDirection).collect()
val sparkSession = SparkSession.builder().getOrCreate()
val sc = sparkSession.sparkContext
val broadcast = sc.broadcast(targetDegree)
//计算每个顶点的原点方向的邻点
val neighbor: VertexRDD[Set[VertexId]] = neighbors(graph, sourceDirection)
//如果指定了节点列表,则过滤指定的节点列
val sourceNodes: VertexRDD[Int] = if (nodeList != null) {
sourceDegree.filter(item => nodeList.contains(item._1))
} else {
sourceDegree
}
val vertexNeighborsRdd: RDD[(VertexId, (Int, Option[Set[VertexId]]))] = sourceNodes.leftOuterJoin(neighbor)
//遍历顶点,转换每个顶点,并将每个顶点的度数作为key,该顶点的度数以及该顶点的邻点的度数之和组成二元组作为value(Int,(Int,Double))
val resultRdd: RDD[(Long, Double)] = vertexNeighborsRdd.map(item => {
val itemNeighbors = item._2._2.getOrElse(Set())
if (itemNeighbors.nonEmpty) {
val neighborSet = itemNeighbors
val neighborDegree: Double = broadcast.value.filter(itemV2 => neighborSet.contains(itemV2._1)).map(_._2).sum
(item._2._1, (item._2._1, neighborDegree))
} else {
(item._2._1, (item._2._1, 0.0))
}
//根据key做聚合,计算度数相同的顶点的度数之和以及,度数相同的顶点的邻点的度数之和
}).reduceByKey((x, y) => (x._1 + y._1, x._2 + y._2))
.filter(item => {
item._2._1 > 0 && item._2._2 > 0
//做转换,计算具有k度的节点的平均最近邻度
//分子为:具有K度节点的度数之和
//分母为:具有K度节点的邻点的度数之和
}).map(item => {
val avg = item._2._2 / item._2._1
(item._1.toLong, avg)
})
resultRdd
}
/**
* 计算图中每个顶点的邻点集
*/
def neighbors[VD, ED: ClassTag](graph: Graph[VD, ED],
edgeDirection: EdgeDirection = EdgeDirection.Either): VertexRDD[Set[VertexId]] = {
if (edgeDirection == EdgeDirection.In) {
graph.aggregateMessages(ctx => {
ctx.sendToDst(Set(ctx.srcId))
}, _ ++ _, TripletFields.None)
} else if (edgeDirection == EdgeDirection.Out) {
graph.aggregateMessages(ctx => {
ctx.sendToSrc(Set(ctx.dstId))
}, _ ++ _, TripletFields.None)
}
else {
graph.aggregateMessages(ctx => {
ctx.sendToDst(Set(ctx.srcId))
ctx.sendToSrc(Set(ctx.dstId))
}, _ ++ _, TripletFields.None)
}
}
def degreesRDD[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED],
direction: EdgeDirection = EdgeDirection.Either): VertexRDD[Int] = {
val vertexRdd: VertexRDD[Int] = direction match {
case EdgeDirection.In => g.inDegrees
case EdgeDirection.Out => g.outDegrees
case EdgeDirection.Either => g.degrees
case EdgeDirection.Both =>
throw new SparkException("collectEdges does not support EdgeDirection.Both.Use EdgeDirection.Either instead")
}
vertexRdd
}