数据源:secondSort.txt
3 1
5 2
6 5
8 123
1 4
4 123
5 432
3 54
5 121
8 654
3 98
使用scala实现:过程较简单,需要首先创建一个二次排序的类,然后在map阶段将数据分割好以类对象接收,然后排序,返回
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
case class SecondSortKey(val first:Int,val second:Int) extends Ordered[SecondSortKey] {
def compare(that: SecondSortKey): Int = {
if(this.first-that.first==0)
this.second- that.second
else
this.first-that.first
}
}
/**
* 二次排序问题
*/
object SecondSort {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("secondarySort")
conf.setMaster("local")
val sc = new SparkContext(conf)
val lines = sc.textFile("./data/secondSort.txt")
val transRDD: RDD[(SecondSortKey,String)] =
lines.map(s=>{(SecondSortKey(s.split(" ")(0).toInt,s.split(" ")(1).toInt),s)})
transRDD.sortByKey(false).map(_._2).foreach(println)
}
}
使用Java实现:过程差不多,不过相对于长期使用java的更容易理解
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import java.io.Serializable;
class MySort implements Comparable<MySort> ,Serializable {
Integer first ;
Integer second ;
public MySort(Integer first,Integer second){
this.first = first;
this.second = second;
}
@Override
public int compareTo(MySort mysort) {
if(this.first == mysort.first){
return -(this.second - mysort.second);
}else{
return -(this.first - mysort.first);
}
}
}
public class SecondSort {
public static void main(String[] args) {
SparkConf conf =new SparkConf();
conf.setMaster("local");
conf.setAppName("topn");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lines = sc.textFile("./data/secondSort.txt");
JavaPairRDD<MySort, String> mapToPair = lines.mapToPair(new PairFunction<String, MySort, String>() {
@Override
public Tuple2<MySort, String> call(String line) throws Exception {
MySort mysort = new MySort(Integer.valueOf(line.split(" ")[0]), Integer.valueOf(line.split(" ")[1]));
return new Tuple2<MySort, String>(mysort, line);
}
});
mapToPair.sortByKey(false).foreach(new VoidFunction<Tuple2<MySort, String>>() {
@Override
public void call(Tuple2<MySort, String> tp) throws Exception {
System.out.println(tp._2);
}
});
}
}