初识Spark GraphX

该博客展示了如何使用Apache Spark的GraphX模块构建和操作图数据结构。通过三个示例,分别创建了用户合作关系、用户社交网络关系图,并演示了过滤、度量等操作,如找出年龄大于30岁的用户及其社交关系。
摘要由CSDN通过智能技术生成
package cn.kgc.graphxdemo

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Edge, EdgeTriplet, Graph, GraphLoader}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object GraphDemo1 {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession.builder().appName("sparkgraph")
      .master("local[*]")
      .getOrCreate()
    val sc: SparkContext = spark.sparkContext


    //定义顶点
    val vertices: RDD[(Long, Int)] = sc.makeRDD(Seq((1L,1),(2L,2),(3L,3)))
    //定义边
    val edges: RDD[Edge[Int]] = sc.makeRDD(Seq(Edge(1L,2L,1),Edge(2L,3L,1)))

    val graph: Graph[Int, Int] = Graph(vertices,edges)

    println(graph)
    graph.triplets.foreach(println)

    println("----------------------------------------")

    val graph2: Graph[Int, Int] = GraphLoader.edgeListFile(sc,"in/graph.txt")
    graph2.triplets.foreach(println)

    /**
      * org.apache.spark.graphx.impl.GraphImpl@1fde4f40
      * ((1,1),(2,2),1)
      * ((2,2),(3,3),1)
      * ----------------------------------------
      * ((1,1),(2,1),1)
      * ((2,1),(3,1),1)
      * ((3,1),(4,1),1)
      */
  }

}


构建用户合作关系属性图 

顶点属性
用户名
职业

边属性
合作关系

package cn.kgc.graphxdemo

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Edge, Graph}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object GraphDemo2 {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession.builder().appName("sparkgraph")
      .master("local[*]")
      .getOrCreate()
    val sc: SparkContext = spark.sparkContext

    val users: RDD[(Long, (String, String))] = sc.makeRDD(  //二元组
      Array(
        (3L, ("rxin", "student")),
        (7L, ("jgonzal", "postdoc")),
        (5L, ("franklin", "professor")),
        (2L, ("istoica", "professor"))

      )
    )
    val relations: RDD[Edge[String]] = sc.makeRDD(
      Array(
        Edge(3L, 7L, "Collaborator"),
        Edge(5L, 3L, "Advisior"),
        Edge(2L, 5L, "Colleague"),
        Edge(5L, 7L, "PI")  //四行 边四个
      )
    )
    val graph: Graph[(String, String), String] = Graph(users,relations)
    graph.triplets.foreach(println)
    println("--------------------------------")
    graph.vertices.foreach(println)
    graph.edges.foreach(println)

    /**
      * ((2,(istoica,professor)),(5,(franklin,professor)),Colleague)
      * ((5,(franklin,professor)),(7,(jgonzal,postdoc)),PI)
      * ((5,(franklin,professor)),(3,(rxin,student)),Advisior)
      * ((3,(rxin,student)),(7,(jgonzal,postdoc)),Collaborator)
      * --------------------------------
      * (2,(istoica,professor))
      * (3,(rxin,student))
      * (7,(jgonzal,postdoc))
      * (5,(franklin,professor))
      * Edge(5,7,PI)
      * Edge(5,3,Advisior)
      * Edge(3,7,Collaborator)
      * Edge(2,5,Colleague)
      */
  }

}

构建用户社交网络关系 

顶点:用户名、年龄

边:打call次数

找出大于30岁的用户

package cn.kgc.graphxdemo

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Edge, Graph, VertexRDD}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object GraphDemo3 {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder().appName("sparkgraph")
      .master("local[*]")
      .getOrCreate()
    val sc: SparkContext = spark.sparkContext

    val users: RDD[(Long, (String, Int))] = sc.makeRDD( //元组里面不限类型
      Array(
        (1L, ("Alice", 28)),
        (2L, ("Bob", 27)),
        (3L, ("Charlie", 65)),
        (4L, ("David", 42)),
        (5L, ("Ed", 55)),
        (6L, ("Fran", 50))
      )
    )
    val edges: RDD[Edge[Int]] = sc.makeRDD(
      Array(
        Edge(2L, 1L, 7),
        Edge(3L, 2L, 4),
        Edge(4L, 1L, 1),
        Edge(2L, 4L, 2),
        Edge(5L, 2L, 2),
        Edge(5L, 3L, 8),
        Edge(3L, 6L, 3),
        Edge(5L, 6L, 3)

      )
    )
    val graph: Graph[(String, Int), Int] = Graph(users,edges)

    val rdd1: VertexRDD[(String, Int)] = graph.vertices.filter(x=>x._2._2>30)
    rdd1.foreach(println)
    println("-----------------------------")

    val rdd2: VertexRDD[(String, Int)] = graph.vertices.filter{case (id,(name,age))=>age>30}
//    rdd2.foreach(println)
//    for(rdd <- rdd2)
//      println(rdd)

//    for((id,(name,age)) <- rdd2){
//      println(age)
//
//    }

//    graph.triplets.collect().foreach(println)

    graph.triplets.filter(x=>x.attr>5)  //(顶点,终点,关系Edge)
      .foreach(x=>{println(x.srcAttr._1+"喜欢 "+x.dstAttr._1+ " 爱的有多深: "+x.attr)})   //srcAttr起点


    val edgesNum: Long = graph.numEdges
    val verticesNum: Long = graph.numVertices

    println(edgesNum,verticesNum)

    println("----------------度-------------------")
    //出度、入度
    val degrees: VertexRDD[Int] = graph.degrees
    degrees.foreach(println)
    println("----------入度----------")
    val degreesin: VertexRDD[Int] = graph.inDegrees
    degreesin.foreach(println)
    println("------------出度---------------")
    val degreesout: VertexRDD[Int] = graph.outDegrees
    degreesout.foreach(println)

    /**
      * (3,(Charlie,65))
      * (5,(Ed,55))
      * (6,(Fran,50))
      * (4,(David,42))
      * -----------------------------
      * Ed喜欢 Charlie 爱的有多深: 8
      * Bob喜欢 Alice 爱的有多深: 7
      * (8,6)
      * ----------------度-------------------
      * (4,2)
      * (3,3)
      * (6,2)
      * (2,4)
      * (1,2)
      * (5,3)
      * ----------入度----------
      * (4,1)
      * (1,2)
      * (3,1)
      * (6,2)
      * (2,2)
      * ------------出度---------------
      * (5,3)
      * (4,1)
      * (2,2)
      * (3,2)
      */

    

  }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值