package com.sdcet import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} /** * Created by Administrator on 2017/2/10. */ object PageRank { def main(args: Array[String]) { System.setProperty("hadoop.home.dir", "E:\\winutils-hadoop-2.6.4\\hadoop-2.6.4") val conf = new SparkConf().setAppName("PageRank").setMaster("local") val sc = new SparkContext(conf) val links: RDD[(String, Array[String])] = sc.parallelize(Array(("A", Array("d")), ("B", Array("A")), ("C", Array("A", "B")), ("D", Array("A", "C"))), 2) var rank: RDD[(String, Double)] = sc.parallelize(Array(("A", 1.0), ("B", 1.0), ("C", 1.0), ("D", 1.0)), 2) for (i <- 1 to 10) { val contribs = links.join(rank, 2).flatMap { case (url, (links, rank)) => links.map(dest => (dest, rank / links.size)) } rank = contribs.reduceByKey(_ + _, 2).mapValues(0.15 + 0.85 * _) } println("rank:"+rank) rank.saveAsTextFile("hdfs://hadoop1:9000/pageRank") } }