数据:
zhangsan chinese 80
zhangsan math 90
zhangsan english 85
lisi chinese 90
lisi math 80
lisi english 90
wangwu chinese 84
wangwu math 89
wangwu english 70
maliu chinese 82
maliu math 75
maliu english 100
结果:
math:
90
89
80
chinese:
90
84
82
english:
100
90
85
package com.spark.core
import org.apache.spark.{SparkConf, SparkContext}
object GroupTopN {
System.setProperty("hadoop.home.dir","D:\\soft\\hadoop\\hadoop-2.7.3")
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("group-topn")
val sc = new SparkContext(conf)
val lines = sc.textFile(args(0))
val pairs = lines.map(line => {
val lineSplited = line.split(" ")
(lineSplited(1), lineSplited(2).toInt)
})
val groupedPairs = pairs.groupByKey
//sortWith(_>_);降序
//sortWith(_<_):升序
val top3Score = groupedPairs.map(groupedPair => (groupedPair._1, groupedPair._2.toList.sortWith((x,y) => x > y).take(3)))
top3Score.foreach(pair => {
println(pair._1 + ":")
pair._2.foreach(println(_))
})
}
}