计算用户上传好友PR分(scala版)

SparkPageRank.scala

import org.apache.spark.sql.SparkSession

object SparkPageRank {

    def showWarning(): Unit = {
        System.err.println(
            """WARN: This is a naive implementation of PageRank and is given as an example!
                |Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
                |for more conventional use.
            """.stripMargin)
    }

    def main(args: Array[String]): Unit = {
        if (args.length < 1) {
            System.err.println("Usage: SparkPageRank <file> <iter>")
            System.exit(1)
         }
        showWarning()

        val spark = SparkSession.builder.appName("SparkPageRank").getOrCreate()

        val iters = if (args.length > 1) args(1).toInt else 10
        val lines = spark.read.textFile(args(0)).rdd
        val links = lines.map{
            s =>
            val parts = s.split("\\s+")
            (parts(0), parts(1))
        }.distinct().groupByKey().cache()
        var ranks = links.mapValues(v => 1.0)

        for (i <- 1 to iters) {
            val contribs = links.join(ranks).values.flatMap{
                case (urls, rank) =>
                val size = urls.size
                urls.map(url => (url, rank / size))
            }
            ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
        }

        val output = ranks.collect()
        output.foreach(tup => println(s"${tup._1} has rank:  ${tup._2} ."))

        spark.stop()
    }

main()
}

调用:

spark-shell 执行scala脚本

spark-shell -i <SparkPageRank.scala

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值