package com.scala.test
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
/**
* @author 零落尘土
* @since 2019-10-10
*/
object ScoreCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("ScoreCount").setMaster("local")
val sc = new SparkContext(conf)
//读取文件的路径
val lines: RDD[String] = sc.textFile("D:\\scalaProjects\\scalademo\\score.txt")
lines.flatMap(line=>line.split(" ")).map(word=>(word,1)).reduceByKey((v1,v2)=>v1+v2).foreach(println)
lines.map(line=>(line.split(" ")(0),line.split(" ")(1).toDouble)).groupByKey()
.map(group=>{
(group._1,group._2.toList.sortWith(_>_).take(3))
})
.foreach(println)
}
}
score.txt的文件如下:
运行结果如下: