算法描述:
计算每个节点邻域的平均度数。用作考察度数相近的节点是否倾向于相互连接,在社交网络中,节点更倾向于与度数相近的节点相连。
实现逻辑
/**
* @param g: 待分析图:GraphX图结构
* @param sourceDirection:作用与原定点的方向取值为:In | Out 默认值为:Out
* @param targetDirection: 作用与目标顶点的方向,取值为:In |Out 默认值为:Out
* @param nodeList : 待分析顶点列表,默认值为null,如果为null,则分析全图
*/
def averageNeighborsDegree[VD, ED: ClassTag](g: Graph[VD, ED],
sourceDirection: EdgeDirection = EdgeDirection.Out,
targetDirection: EdgeDirection = EdgeDirection.Out,
nodeList: List[VertexId] = null): RDD[(VertexId, Double)] = {
val graph = g.cache()
//根据sourceDirection|targetDirection,计算每个点的度数
val sourceDegree: VertexRDD[Int] = degreesRDD(graph, sourceDirection)
val targetDegree: Array[(Long, Int)] = degreesRDD(graph, targetDirection).collect()
val sc = SparkSession.builder().getOrCreate().sparkContext
val broadcast = sc.broadcast(targetDegree)
//如果指定了节点列表,则过滤指定的节点列
val sourceNodes: VertexRDD[Int] = if (nodeList == null) {
sourceDegree.filter(item => nodeList.contains(item._1))
} else {
sourceDegree
}
//计算每个节点的邻点列表
val neighbor: VertexRDD[Set[VertexId]] = neighbors(graph, targetDirection).cache()
val vertexNeighborsRDD: RDD[(VertexId, (Int, Option[Set[VertexId]]))] = sourceNodes.leftOuterJoin(neighbor)
val result: RDD[(VertexId, Double)] = vertexNeighborsRDD.map(item => {
val deg = if (item._2._1 == 0) 1 else item._2._1
//过滤出当前节点的邻点集
val itemNeighbors: Set[VertexId] = item._2._2.getOrElse(Set())
// 如果邻点节点不为空,则计算该邻点节点的度数之和
if (itemNeighbors.nonEmpty) {
val neighborDegree: Double = broadcast.value.filter(itemV2 => itemNeighbors.contains(itemV2._1)).map(_._2).sum
val avg = neighborDegree / deg
(item._1, avg)
} else {
(item._1, 0)
}
})
result
}
/**
* 计算图中每个顶点的邻点集
*/
def neighbors[VD, ED: ClassTag](graph: Graph[VD, ED],
edgeDirection: EdgeDirection = EdgeDirection.Either): VertexRDD[Set[VertexId]] = {
if (edgeDirection == EdgeDirection.In) {
graph.aggregateMessages(ctx => {
ctx.sendToDst(Set(ctx.srcId))
}, _ ++ _, TripletFields.None)
} else if (edgeDirection == EdgeDirection.Out) {
graph.aggregateMessages(ctx => {
ctx.sendToSrc(Set(ctx.dstId))
}, _ ++ _, TripletFields.None)
}
else {
graph.aggregateMessages(ctx => {
ctx.sendToDst(Set(ctx.srcId))
ctx.sendToSrc(Set(ctx.dstId))
}, _ ++ _, TripletFields.None)
}
}
def degreesRDD[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED],
direction: EdgeDirection = EdgeDirection.Either): VertexRDD[Int] = {
val vertexRdd: VertexRDD[Int] = direction match {
case EdgeDirection.In => g.inDegrees
case EdgeDirection.Out => g.outDegrees
case EdgeDirection.Either => g.degrees
case EdgeDirection.Both =>
throw new SparkException("collectEdges does not support EdgeDirection.Both.Use EdgeDirection.Either instead")
}
vertexRdd
}