需求1
对文本文件内的数字,取最大的前3个
Java实现
/**
* 取最大的前3个数字
*
*/
public class Top3 {
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("Top3Java").setMaster("local");
JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
JavaRDD<String> lines = sparkContext.textFile("E:\\testdata\\wordcount\\input\\top.txt");
JavaPairRDD<Integer, String> pairs = lines.mapToPair(new PairFunction<String, Integer, String>() {
@Override
public Tuple2<Integer, String> call(String s) throws Exception {
return new Tuple2<>(Integer.parseInt(s), s);
}
});
JavaPairRDD<Integer, String> sorted = pairs.sortByKey(false);
JavaRDD<String> result = sorted.map(new Function<Tuple2<Integer, String>, String>() {
@Override
public String call(Tuple2<Integer, String> integerStringTuple2) throws Exception {
return integerStringTuple2._2;
}
});
List<String> strings = result.take(3);
for(String string : strings) {
System.out.println("string = " + string);
}
sparkContext.close();
}
}
Scala实现
object Top3 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Top3Scala").setMaster("local")
val context = new SparkContext(conf)
val linesRDD = context.textFile("E:\\testdata\\wordcount\\input\\top.txt")
val pairs = linesRDD.map(line => (line.toInt, line))
val sort = pairs.sortByKey(false)
val result = sort.map(sort => sort._2)
val strings = result.take(3)
for(string <- strings) println("string = " + string)
}
}
需求2
对每个班级内的学生成绩,取出前3名。(分组取topn)
Java实现
/**
* 分组取top3
*/
public class GroupTop3 {
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("Top3Java").setMaster("local");
JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
JavaRDD<String> lines = sparkContext.textFile("E:\\testdata\\wordcount\\input\\score.txt");
JavaPairRDD<String, Integer> pairs = lines.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<>(s.split(" ")[0], Integer.parseInt(s.split(" ")[1]));
}
});
JavaPairRDD<String, Iterable<Integer>> studentScore = pairs.groupByKey();
JavaPairRDD<String, Iterable<Integer>> top3Score = studentScore.mapToPair(new PairFunction<Tuple2<String, Iterable<Integer>>, String, Iterable<Integer>>() {
@Override
public Tuple2<String, Iterable<Integer>> call(Tuple2<String, Iterable<Integer>> stringIterableTuple2) throws Exception {
Iterable<Integer> scores = stringIterableTuple2._2;
Integer[] maxScore = new Integer[3];
for (Integer score : scores) {
for (int i = 0; i < maxScore.length; i++) {
if (maxScore[i] == null) {
maxScore[i] = score;
break;
} else {
if (maxScore[i] < score) {
for (int j = maxScore.length - 1; j > i; j--) {
maxScore[j] = maxScore[j - 1];
}
maxScore[i] = score;
break;
}
}
}
}
return new Tuple2<String, Iterable<Integer>>(stringIterableTuple2._1, Arrays.asList(maxScore));
}
});
top3Score.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
@Override
public void call(Tuple2<String, Iterable<Integer>> stringIterableTuple2) throws Exception {
System.out.print(stringIterableTuple2._1 + "班级,前三名成绩为 ");
Iterable<Integer> scores = stringIterableTuple2._2;
int i = 0;
for(Integer score :scores) {
if(i == 0) {
System.out.print(score);
}else {
System.out.print("," + score);
}
i++;
}
System.out.println();
}
});
sparkContext.close();
}
}
Scala实现
object GroupTop3 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("GroupTop3Scala").setMaster("local")
val context = new SparkContext(conf)
val linesRDD = context.textFile("E:\\testdata\\wordcount\\input\\score.txt")
val studentScores = linesRDD.map(line => (line.split(" ")(0), line.split(" ")(1).toInt))
val groupStudentScores = studentScores.groupByKey()
val result = groupStudentScores.map(student => {
val maxScore = new Array[Int](3)
val scores = student._2
for(score <- scores) {
var flag = true
for(i <- 0 until maxScore.length if flag) {
if(maxScore(i) == Nil) {
maxScore(i) = score
flag = false
}else{
if(maxScore(i) < score) {
for(j <- (i + 1 to maxScore.length - 1).reverse){
maxScore(j) = maxScore(j - 1)
}
maxScore(i) = score
flag = false
}
}
}
}
(student._1, maxScore)
})
result.foreach(result =>{
print(result._1 + "班级前三明成绩为")
for(i <- 0 until result._2.length) {
if(i == 0) print(result._2(i))
else print("," + result._2(i))
}
println()
})
}
}