一、案例需求:
1、对文本文件内的数字,取最大的前3个
2、对每个班级内的学生成绩,取出前3名(分组取topn)
二、top(n)
- java
/**
* 案例需求
* 对文本文件内的数字,取最大的前3个
*/
public class Top_N {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("Top_N").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lines = sc.textFile("./top.txt");
//将String映射为Tuple2(Integer,String)
JavaPairRDD<Integer,String> pairs = lines.mapToPair(new PairFunction<String, Integer, String>() {
public Tuple2<Integer, String> call(String t) throws Exception {
return new Tuple2<Integer, String>(Integer.valueOf(t),t);
}
});
//按照key排序
JavaPairRDD<Integer, String> sortedPairs = pairs.sortByKey(false);
//对排序结果进行映射,仅留下第一列
JavaRDD<Integer> sortedNumbers = sortedPairs.map(new Function<Tuple2<Integer, String>, Integer>() {
public Integer call(Tuple2<Integer, String> v1) throws Exception {
return v1._1;
}
});
//取出排序后前3个数字
List<Integer> sortedNumberList = sortedNumbers.take(3);
//打印结果
for (Integer num : sortedNumberList){
System.out.println(num);
}
sc.close();
}
}
object Top_N_scala {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Top_N_scala").setMaster("local")
val sc = new SparkContext(conf)
val lines = sc.textFile("./top.txt")
val pairs = lines.map(num => (num.toInt,num))
val sortPairs = pairs.sortByKey(false);
val sortedNumbers = sortPairs.map(sortedPairs => sortedPairs._1)
val top3Number = sortedNumbers.take(3);
for(num <- top3Number){
println(num)
}
}
}
groupTop(n)
/**
* 案例需求
* 对每个班级内的学生成绩,取出前3名(分组取topn)
*/
public class GroupTop_N {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("Top_N").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lines = sc.textFile("./score.txt");
JavaPairRDD<String,Integer> pairs = lines.mapToPair(new PairFunction<String, String, Integer>() {
public Tuple2<String, Integer> call(String line) throws Exception {
String[] lineSplited = line.split(" ");
return new Tuple2<String, Integer>(lineSplited[0],Integer.valueOf(lineSplited[1]));
}
});
JavaPairRDD<String,Iterable<Integer>> groupedParis = pairs.groupByKey();
JavaPairRDD<String,Iterable<Integer>> top3Score = groupedParis.mapToPair(
new PairFunction<Tuple2<String, Iterable<Integer>>, String, Iterable<Integer>>() {
public Tuple2<String, Iterable<Integer>> call(Tuple2<String, Iterable<Integer>> classScores) throws Exception {
Integer[] top3 = new Integer[3];
String className = classScores._1;
Iterator<Integer> scores = classScores._2.iterator();
while (scores.hasNext()) {
Integer score = scores.next();
for (int i = 0; i < 3; i++) {
if(top3[i] == null){
top3[i] = score;
break;
}else if(score > top3[i]){
for(int j=2;j > i;j--){
top3[j] = top3[j-1];
}
top3[i] = score;
break;
}
}
}
return new Tuple2<String, Iterable<Integer>>(className, Arrays.asList(top3));
}
}
);
top3Score.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
public void call(Tuple2<String, Iterable<Integer>> v1) throws Exception {
System.out.println("class: " + v1._1);
Iterator<Integer> scoreIterator = v1._2.iterator();
while(scoreIterator.hasNext()){
Integer score = scoreIterator.next();
System.out.println(score);
}
System.out.println("=======================");
}
});
sc.close();
}
}
868

被折叠的 条评论
为什么被折叠?



