做了一个关于mask函数的使用记录,以方便记忆
创建用于实验的图
import org.apache.spark._
import org.apache.spark.graphx._
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD
// Create an RDD for the vertices
val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
(4L, ("peter", "student"))))
// Create an RDD for edges
val relationships: RDD[Edge[String]] =
sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague")))
// Define a default user in case there are relationship with missing user
val defaultUser = ("John Doe", "Missing")
// Build the initial Graph
val graph = Graph(users, relationships, defaultUser)
/将生成的图的顶点输出展示一下
scala> graph.vertices.collect.foreach(println(_))
(0,(John Doe,Missing))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
/将生成的图的边输出展示一下
scala> graph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)
接下来进行mask操作
// Run Connected Components
val ccGraph = graph.connectedComponents() // No longer contains missing field
scala> ccGraph.vertices.collect.foreach(println(_))
(0,0)
(2,0)
(3,0)
(4,0)
(5,0)
(7,0)
scala> ccGraph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)
Edge(4,0,student)
Edge(5,0,colleague)
// Remove missing vertices as well as the edges to connected to them
val validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
scala> validGraph.vertices.collect.foreach(println(_))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
scala> validGraph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)
// Restrict the answer to the valid subgraph
val validCCGraph = ccGraph.mask(validGraph)
scala> validCCGraph.vertices.collect.foreach(println(_))//由下面输出可以看出,vertices是以ccGraph为主体进行操作
(2,0)
(3,0)
(4,0)
(5,0)
(7,0)
scala> validCCGraph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)
//咱们进行相反的mask操作
val ccvalidGrap=validGraph.mask(ccGraph)
scala> ccvalidGrap.vertices.collect.foreach(println(_))//由输出可看出这次的vertices是以validGraph为主体进行操作
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
scala> ccvalidGrap.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)
记录完成