数据源:
class1 100
class2 86
class3 70
class1 102
class2 65
class1 45
class2 85
class3 70
class1 16
class2 88
class1 95
class2 37
class3 98
class1 99
class2 23
使用Scala实现:
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
import scala.util.control._
/**
* 分组取topN问题
* 定义定长数组
*/
object TopN2 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local")
conf.setAppName("topN")
val sc = new SparkContext(conf)
val infos = sc.textFile("./data/scores.txt")
val pairInfo = infos.map(one=>{(one.split("\t")(0),one.split("\t")(1).toInt)})
val result: Array[(String, mutable.Buffer[Int])] = pairInfo.groupByKey().map(tp => {
val className = tp._1
val iter = tp._2.iterator
val top3Score = new Array[Int](3)
val loop = new Breaks
while (iter.hasNext) {
val currScore = iter.next()
loop.breakable {
for (i <- 0 until top3Score.size) {
if (top3Score(i) == 0) {
top3Score(i) = currScore
loop.break()
} else if (currScore > top3Score(i)) {
for (j <- 2 until(i, -1)) {
top3Score(j) = top3Score(j - 1)
}
top3Score(i) = currScore
loop.break()
}
}
}
}
(className, top3Score.toBuffer)
}).collect()
result.foreach(println)
}
}
使用java实现:
import org.apache.commons.collections.IteratorUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
public class TopN2 {
public static void main(String[] args) {
SparkConf conf =new SparkConf();
conf.setMaster("local");
conf.setAppName("topn");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lines = sc.textFile("./data/scores.txt");
JavaPairRDD<String, Integer> pairInfo = lines.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String line) throws Exception {
return new Tuple2<String, Integer>(line.split("\t")[0], Integer.valueOf(line.split("\t")[1]));
}
});
pairInfo.groupByKey().foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
@Override
public void call(Tuple2<String, Iterable<Integer>> tp) throws Exception {
String className = tp._1;
Iterator<Integer> iter = tp._2.iterator();
Integer[] top3Score = new Integer[3];
while(iter.hasNext()){
Integer currScore = iter.next();
for(int i =0;i<3;i++){
if(top3Score[i]==null){
top3Score[i] = currScore;
break;
}else if(currScore>top3Score[i]){
for(int j = 2;j>i;j--){
top3Score[j]=top3Score[j-1];
}
top3Score[i] = currScore;
break;
}
}
}
System.out.println("className = "+className);
for (Integer score :top3Score){
System.out.println("score = "+score);
}
}
});
}
}