Spark 三次排序
<pre name="code" class="java">package com.dtspark
import org.apache.spark.{SparkConf, SparkContext}
/**
* 三次排序,具体实现步骤
* 第一步:按照Ordered和Seriallizable接口实现自定义排序的Key
* 第二部:将要进行三次排序的文件加载进来进行<Key,Value>类型的RDD
* 第三部:使用sortByKey基于自定义的Key进行三次排序
* 第四部:去除掉排序的Key,只保留排序的结果
* Created by huangle on 2016/9/30.
*/
class ThirdSortKey(val first: Int, val second: Int, val third: Int) extends Ordered[ThirdSortKey] with Serializable{
def compare(other: ThirdSortKey): Int = {
if(this.first - other.first != 0){
this.first - other.first
}else{
if(this.second - other.second != 0){
this.second - other.second
}else{
this.third - other.third
}
}
}
}
object SecondarySortKeyApp {
def main(args: Array[String]): Unit = {
val sc = sparkContext("Secondary Sort") //创建SparkContext
val lines = sc.textFile("E://SOFTLEARN//BOOKDATA//dataset//1test.txt", 1)
val pairWithSortKey = lines.map(line => (
new ThirdSortKey(line.split(" ")(0).toInt, line.split(" ")(1).toInt, line.split(" ")(2).toInt), line
))
val sorted = pairWithSortKey.sortByKey(true)
sorted.collect().foreach(println)
val sortedResult = sorted.map(sortedLine => sortedLine._2)
sortedResult.collect().foreach(println)
}
//初始化程序配置
def sparkContext(name: String)={
val conf = new SparkConf().setAppName(name).setMaster("local")
val sc = new SparkContext(conf)
sc.setLogLevel("WARN")
sc
}
}