package interview
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object GroupSort {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf()
.setMaster("local[*]")
.setAppName(this.getClass.getCanonicalName)
.set("spark.default.parallelism","1")
.set("spark.sql.shuffle.partitions","1")
val sc = new SparkContext(conf)
val dataRDD: RDD[String] = sc.textFile("data/input/groupsorts.txt")
dataRDD.map(_.split(" "))
.map(arr=>arr(0)->Integer.parseInt(arr(1)))
.groupByKey()
.flatMap(kv=>{
kv._2.toList.sortWith(_ > _).map(kv._1->_)
}).foreach(println)
}
}
数据:
aa 21
bb 51
cc 54
aa 51
bb 53
cc 58
aa 61
bb 54
cc 57
aa 58
bb 61
cc 54
aa 57
bb 58
cc 55