首先要先定义一个自定义的类型,实现scala提供的Ordered接口并实现其方法,方法就是排序的逻辑。这里我对两列数字进行排序,代码如下:
package cn.spark.study.core
class SecondarySortKey(val first : Int, val second : Int) extends Ordered[SecondarySortKey] with Serializable {
def compare(that: SecondarySortKey): Int = {
if(this.first-that.first!=0){
return this.first-that.first
}else{
return this.second-that.second
}
}
}
package cn.spark.study.core
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object SecondarySort {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName(“SecondarySort”)
.setMaster(“local”)
val sc = new SparkContext(conf)
val lines = sc.textFile("", 1)
val pairs = lines.map { line => (new SecondarySortKey(line.split("")(0).toInt,line.split(" ")(1).toInt),line) }
val sortedPairs = pairs.sortByKey();
val sortedLines = sortedPairs.map(sortedPair => sortedPair._2)
sortedLines.foreach { sortedLine => println(sortedLine) }
}
}