spark图计算简单来说就是构建边集合,点集合,然后把边集合,点集合放到graphx中进行计算。
下面我用scala写一下spark图计算的demo。
package sparksql
import org.apache.spark.graphx.{Edge, Graph}
import org.apache.spark.{SparkConf, SparkContext}
object sparkgraphx {
//spark图计算demo
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("test").setMaster("local[*]")
val sc = new SparkContext(conf)
//定点
val vertexArray = Array(
(1L, ("Alice", 38)),
(2L, ("Henry", 27)),
(3L, ("Charlie", 55)),
(4L, ("Peter", 32)),
(5L, ("Mike", 35)),
(6L, ("Kate", 23))
)
//边
val edgeArray = Array(
Edge(2L, 1L, 5),
Edge(2L, 4L, 2),
Edge(3L, 2L, 7),
Edge(3L, 6L, 3),
Edge(4L, 1L, 1),
Edge(5L, 2L, 3),
Edge(5L, 3L, 8),
Edge(5L, 6L, 8)
)
val vertexRdd = sc.parallelize(vertexArray)
val edgeRdd = sc.parallelize(edgeArray)
val graph = Graph(vertexRdd, edgeRdd)
println("年龄大于20的人")
graph.vertices.filter(v => v._2._2 > 20).collect.foreach {
v => println(s"${v._2._1} is ${v._2._2}")
}
println("图中属性大于3的边")
graph.edges.filter(e => e.attr > 3).collect
.foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}"))
println("triplet操作")
for (t<-graph.triplets.filter(s=>s.attr>3).collect){
println(s"${t.srcAttr._1} likes ${t.dstAttr._1}")
}
}
}