二次排序就是按照不同字段进行排序
package test1
import org.apache.spark.{SparkConf, SparkContext}
class secondSort(val first:Int,val second:Int) extends Ordered[secondSort] with Serializable {
override def compare(that: secondSort): Int = {
if (this.first != that.first){
this.first - that.first
}else{
this.second - that.second
}
}
}
object test{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("second").setMaster("local")
val sc = new SparkContext(conf)
val arr = Array((2,1),(2,4),(2,3),(5,2),(5,4),(5,2),(4,3),(4,2),(4,1))
sc.makeRDD(arr)
.map(a=>(new secondSort(a._1,a._2),a))
.sortByKey()
.foreach(a=>
println(a._2+"\t"))
// sc.textFile("D:\\a\\a.txt")
// .map(a=>(new secondSort(a.split(",")(0).toInt,a.split(",")(1).toInt),a))
// .sortByKey()
// .foreach(a=>
// println(a._2+"\t")
// )
}
}
类似于MapReduce里面的分组+排序的实现