Spark 二次排序
废话不多说直接上代码
定义二次排序的key
class SecondSortKey(val first: Int, val second: Int)
extends Ordered[SecondSortKey] with Serializable {
def compare(that: SecondSortKey): Int = {
if(this.first - that.first != 0) {
this.first - that.first
} else {
this.second - that.second
}
}
}
二次排序处理过程
object SecondSort4LXG {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("SecondSort")
.setMaster("local")
val sc = new SparkContext(conf)
val testData = Seq("40 20",
"40 10",
"40 30",
"40 5 ",
"30 30",
"30 20",
"30 10",
"30 40",
"50 20",
"50 50",
"50 10",
"50 60")
val testRdd: RDD[String] = sc.parallelize(testData, 4)
val pairs = testRdd.map { line => (
new SecondSortKey(line.split(" ")(0).trim.toInt, line.split(" ")(1).trim.toInt),//封装自定义key
line)
}
val sortedPairs = pairs.sortByKey()
val sortedLines = sortedPairs.map(sortedPair => sortedPair._2)
sortedLines.foreach { sortedLine => println(sortedLine) }
sc.stop()
}
}