GraphX中的mask函数理解

做了一个关于mask函数的使用记录,以方便记忆

创建用于实验的图

import org.apache.spark._
import org.apache.spark.graphx._
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD

// Create an RDD for the vertices
val users: RDD[(VertexId, (String, String))] =
  sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
                       (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
                       (4L, ("peter", "student"))))
// Create an RDD for edges
val relationships: RDD[Edge[String]] =
  sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
                       Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
                       Edge(4L, 0L, "student"),   Edge(5L, 0L, "colleague")))
// Define a default user in case there are relationship with missing user
val defaultUser = ("John Doe", "Missing")
// Build the initial Graph
val graph = Graph(users, relationships, defaultUser)
/将生成的图的顶点输出展示一下
scala> graph.vertices.collect.foreach(println(_))
(0,(John Doe,Missing))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
/将生成的图的边输出展示一下
scala> graph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)

接下来进行mask操作

// Run Connected Components
val ccGraph = graph.connectedComponents() // No longer contains missing field
scala> ccGraph.vertices.collect.foreach(println(_))
(0,0)
(2,0)
(3,0)
(4,0)
(5,0)
(7,0)
scala> ccGraph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)
Edge(4,0,student)
Edge(5,0,colleague)

// Remove missing vertices as well as the edges to connected to them
val validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
scala> validGraph.vertices.collect.foreach(println(_))
(2,(istoica,prof))                                                              
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
scala> validGraph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)

// Restrict the answer to the valid subgraph
val validCCGraph = ccGraph.mask(validGraph)
scala> validCCGraph.vertices.collect.foreach(println(_))//由下面输出可以看出,vertices是以ccGraph为主体进行操作
(2,0)
(3,0)
(4,0)
(5,0)
(7,0)
scala> validCCGraph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)

//咱们进行相反的mask操作

val ccvalidGrap=validGraph.mask(ccGraph)
scala> ccvalidGrap.vertices.collect.foreach(println(_))//由输出可看出这次的vertices是以validGraph为主体进行操作
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
scala> ccvalidGrap.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)

记录完成

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值