实验十 Spark-GraphX
用不同的方法创建图并查看图的3种视图
// 导入包
scala> import org.apache.spark._
import org.apache.spark._
scala> import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD
scala> import org.apache.spark.graphx._
import org.apache.spark.graphx._
// 加载处理数据
scala> val IDWeb = sc.textFile("./web-Google.txt")
IDWeb: org.apache.spark.rdd.RDD[String] = ./web-Google.txt MapPartitionsRDD[10] at textFile at <console>:37
scala> val edges = IDWeb.map{x=>val line=x.split("\t");Edge(line(0).toLong,line(1).toLong,1)}
edges: org.apache.spark.rdd.RDD[org.apache.spark.graphx.Edge[Int]] = MapPartitionsRDD[11] at map at <console>:38
scala> val rawEdges = IDWeb.map(x => {val line = x.split("\t");(line(0).toLong, line(1).toLong)})
rawEdges: org.apache.spark.rdd.RDD[(Long, Long)] = MapPartitionsRDD[12] at map at <console>:38
// 创建图
scala> val graphFromEdges = Graph.fromEdges(edges, 1L)
graphFromEdges: org.apache.spark.graphx.Graph[Long,Int] = org.apache.spark.graphx.impl.GraphImpl@7bec1342
scala> val graphFromEdgesTuple = Graph.fromEdgeTuples(rawEdges, 1)
graphFromEdgesTuple: org.apache.spark.graphx.Graph[Int,Int] = org.apache.spark.graphx.impl.GraphImpl@6f39c6a8
// 查看查看图的三种视图
scala> graphFromEdges.vertices.collect.foreach(println(_))
scala> graphFromEdges.edges.collect.foreach(println(_))
scala> graphFromEdges.triplets.collect.foreach(println(_))
查询顶点和边的个数
scala> graphFromEdges.numVertices
res5: Long = 875713
scala> graphFromEdges.numEdges
res6: Long = 5105039
通过aggregateMessages给边数据中的目标点发送值1,并返回各个顶点统计结果,查看每个网页被链接的次数
scala> graphFromEdges.aggregateMessages[Int](_.sendToDst(1),_+_).collect().take(10).foreach(println)
(266991,17)
(846729,1)
(627804,2)
(831957,22)
(512760,2)
(307248,2)
(449586,2)
(857454,15)
(32676,3)
(213045,10)
利用outerJoinVertices更新图中的数据属性,将顶点ID大于1的顶点组成的子图取出来
scala> val outDegrees = graphFromEdges.outDegrees
outDegrees: org.apache.spark.graphx.VertexRDD[Int] = VertexRDDImpl[46] at RDD at VertexRDD.scala:57
scala> val graph = graphFromEdges.outerJoinVertices(outDegrees){(vid, data, optDegree) => optDegree.getOrElse(0)}
graph: org.apache.spark.graphx.Graph[Int,Int] = org.apache.spark.graphx.impl.GraphImpl@4aeda4a2
scala> val subGraph = graph.subgraph(epred = (ed) => (ed.srcId > 1L || ed.dstId > 1L), vpred = (id, attr) => id > 1L)
subGraph: org.apache.spark.graphx.Graph[Int,Int] = org.apache.spark.graphx.impl.GraphImpl@df90a0a
学会使用什么做什么事情
Spark-GraphX 相关操作使用
在实验过程中遇到了什么问题?是如何解决的?
暂无
还有什么问题尚未解决?可能是什么原因导致的。
暂无