引入
例子
import com.hm.util.SparkHelper
import org.apache.spark.graphx.{Edge, Graph, VertexId, VertexRDD}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
object GraphTest {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkHelper.getSparkSession
val vertexRDD: RDD[(Long, (String, Int))] = spark.sparkContext.makeRDD(Seq(
(1L, ("李白", 44)),
(2L, ("李四", 23)),
(6L, ("王五", 26)),
(9L, ("赵六", 66)),
(16L, ("吕布", 44)),
(21L, ("刘备", 34)),
(44L, ("张飞", 24)),
(5L, ("宋江", 44)),
(7L, ("李逵", 30)),
(133L, ("小米", 24)),
(138L, ("Nokia", 22)),
(158L, ("坚果", 29))
))
val edgeRDD: RDD[Edge[Int]] = spark.sparkContext.makeRDD(Seq(
Edge(1L, 133L, 0),
Edge(9L, 133L, 0),
Edge(6L, 133L, 0),
Edge(2L, 133L, 0),
Edge(6L, 138L, 0),
Edge(16L, 138L, 0),
Edge(21L, 138L, 0),
Edge(44L, 138L, 0),
Edge(5L, 158L, 0),
Edge(7L, 158L, 0)
))
val graph: Graph[(String, Int), Int] = Graph(vertexRDD, edgeRDD)
val vertices: VertexRDD[VertexId] = graph.connectedComponents().vertices
vertices.foreach(println)
val joined: RDD[(VertexId, (VertexId, (String, Int)))] = vertices.join(vertexRDD)
joined.foreach(println)
joined.map((t: (VertexId, (VertexId, (String, Int)))) => {
(t._2._1, List(t._2._2))
}).reduceByKey(_ ++ _).foreach(println)
}
}