spark GraphX-用户关系
import org.apache.spark.graphx.{VertexRDD, VertexId, Graph, Edge}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object UserRelationship {
def main(args: Array[String]) {
val conf = new SparkConf()
.setMaster("local[1]")
.setAppName(" user relationship ")
val sc = new SparkContext(conf)
// id , name , age
val vertexArray = Array(
(1L, ("Alice", 28)),
(2L, ("Bob", 27)),
(3L, ("Charlie", 65)),
(4L, ("David", 42)),
(5L, ("Ed", 55)),
(6L, ("Fran", 50)))
val edgeArray = Array(
Edge(2L, 1L, 7),
Edge(2L, 4L, 2),
Edge(3L, 2L, 4),
Edge(3L, 6L, 3),
Edge(4L, 1L, 1),
Edge(5L, 2L, 2),
Edge(5L, 3L, 8),
Edge(5L, 6L, 3))
val vertexRDD: RDD[(Long, (String, Int))] = sc.parallelize(vertexArray)
val edgeRDD: RDD[Edge[Int]] = sc.parallelize(edgeArray)
val graph: Graph[(String, Int), Int] = Graph(vertexRDD, edgeRDD)
println("find whose age > 30")
// find whose age > 30
graph.vertices.filter {
case (id, (name, age)) => age > 30
}.collect.foreach {
case (id, (name, age)) => println(s"$name is $age")
}
println(" find the attribute > 5 ")
//find the attribute > 5
graph.edges.filter(e => e.attr > 5)
.collect.foreach(e => println(s"{$e.srcId} to s{$e.destId} att ${e.attr}"))
println("edge attr >5")
// edge attr >5
for (triple <- graph.triplets.filter(t => t.attr > 5).collect) {
println(s"${triple.srcAttr._1} likes ${triple.dstAttr._1}")
}
//Degrees操作 找出图中最大的出度 入度 度数
def max(a: (VertexId, Int), b: (VertexId, Int)): (VertexId, Int) = {
if (a._2 > b._2) a else b
}
println("the max outDegrees and inDegress Degress is :")
println("max of outDegrees:" + graph.outDegrees.reduce(max) +
"max of inDegrees:" + graph.inDegrees.reduce(max) +
"max of Degrees:" + graph.degrees.reduce(max))
//顶点的转换操作,顶点age + 10
graph.mapVertices {
case (id, (name, age)) => (id, (name, age + 10))
}.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}"))
//
graph.mapEdges(e => e.attr * 2).edges.collect
.foreach(e => println(s"${e.srcId} to ${e.dstId} attr ${e.attr}"))
println("顶点年纪>30的子图:")
val subGraph = graph.subgraph(vpred = (id, vd) => vd._2 >= 30)
println("子图所有顶点:")
subGraph.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}"))
println("subGraph edges:")
subGraph.edges.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att $e.attr"))
println("collection:")
val inDegrees: VertexRDD[Int] = graph.inDegrees
case class User(name: String, age: Int, inDeg: Int, outDeg: Int)
//创建一个新图,顶点VD的数据类型为User,并从graph做类型转换
val initialUserGraph: Graph[User, Int] = graph.mapVertices {
case (id, (name, age)) => User(name, age, 0, 0)
}
// fill in the degree information
val userGraph = initialUserGraph.outerJoinVertices(initialUserGraph.inDegrees) {
case (id, u, inDegOpt) => User(u.name, u.age, inDegOpt.getOrElse(0), u.outDeg)
}.outerJoinVertices(initialUserGraph.outDegrees) {
case (id, u, outDegOpt) => User(u.name, u.age, u.inDeg, outDegOpt.getOrElse(0))
}
}
}