二次排序
java
SparkConf conf = new SparkConf();
conf.setAppName("SecondSort");
conf.setMaster("local");
JavaSparkContext context = new JavaSparkContext(conf);
JavaRDD<String> textFile = context.textFile("./data/secondSort");
JavaPairRDD<MySort, String> mySortRDD = textFile.mapToPair(new PairFunction<String, MySort, String>() {
@Override
public Tuple2<MySort, String> call(String s) throws Exception {
Integer first = Integer.valueOf(s.split(" ")[0]);
Integer second = Integer.valueOf(s.split(" ")[1]);
return new Tuple2<>(new MySort(first, second), s);
}
});
mySortRDD.sortByKey(false).foreach(new VoidFunction<Tuple2<MySort, String>>() {
@Override
public void call(Tuple2<MySort, String> tuple2) throws Exception {
System.out.println(tuple2._2);
}
});
scala
case class MySort(first:Int, second:Int) extends Ordered[MySort]{
override def compare(that: MySort): Int = {
if(this.first == that.first){
this.second - that.second
}else{
this.first - that.first
}
}
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("SecondSort")
conf.setMaster("local")
val context = new SparkContext(conf)
val textFile: RDD[String] = context.textFile("./data/secondSort")
textFile.map(line=>{
val first = line.split(" ")(0).toInt
val second = line.split(" ")(1).toInt
Tuple2(new MySort(first, second), line)
}).sortByKey(false).foreach(one=>{
println(one._2)
})
}
TopN
java
JavaPairRDD<String, Integer> scoreToPair = scores.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
String cls = s.split(" ")[0];
Integer score = Integer.valueOf(s.split(" ")[1]);
return new Tuple2<>(cls, score);
}
});
scoreToPair.groupByKey().sortByKey().foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
@Override
public void call(Tuple2<String, Iterable<Integer>> tuple2) throws Exception {
String cls = tuple2._1;
Iterator<Integer> iterator = tuple2._2.iterator();
int[] top3 = new int[3];
while (iterator.hasNext()){
int score = iterator.next();
for(int i=0; i<top3.length; i++){
if(top3[i] == 0){
top3[i] = score;
break;
}else if (score > top3[i]) {
for(int j=2; j>i; j--){
top3[j] = top3[j-1];
}
top3[i] = score;
break;
}
}
}
for (int i : top3) {
System.out.println("班级: "+cls+", 成绩: "+i);
}
}
});
scala
val scoresRDD = scores.map((line => {
Tuple2(line.split(" ")(0), line.split(" ")(1))
}))
val result: RDD[(String, mutable.Buffer[Int])] = scoresRDD.groupByKey().map(one => {
val cls = one._1
val iterator = one._2.iterator
val top3 = new Array[Int](3)
val loop: Breaks = new Breaks
while (iterator.hasNext) {
val score = iterator.next().toInt
loop.breakable {
for (i <- 0 until top3.size) {
if (top3(i) == 0) {
top3(i) = score
loop.break()
} else if (score > top3(i)) {
for (j <- 2 until(i, -1)) {
top3(j) = top3(j - 1)
}
top3(i) = score
loop.break()
}
}
}
}
(cls, top3.toBuffer)
})
result.foreach(one => {
val iterator = one._2.iterator
while(iterator.hasNext){
println(s"班级: ${one._1}, 成绩: ${iterator.next()}")
}
})
SparkSQL: https://blog.csdn.net/happiless/article/details/107307874