29、Spark核心编程之高级编程之topn

需求1

对文本文件内的数字,取最大的前3个

Java实现

/**
 * 取最大的前3个数字
 *
 */
public class Top3 {
    public static void main(String[] args) {
        SparkConf sparkConf = new SparkConf().setAppName("Top3Java").setMaster("local");
        JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
        JavaRDD<String> lines = sparkContext.textFile("E:\\testdata\\wordcount\\input\\top.txt");
        JavaPairRDD<Integer, String> pairs = lines.mapToPair(new PairFunction<String, Integer, String>() {
            @Override
            public Tuple2<Integer, String> call(String s) throws Exception {
                return new Tuple2<>(Integer.parseInt(s), s);
            }
        });

        JavaPairRDD<Integer, String> sorted = pairs.sortByKey(false);
        JavaRDD<String> result = sorted.map(new Function<Tuple2<Integer, String>, String>() {
            @Override
            public String call(Tuple2<Integer, String> integerStringTuple2) throws Exception {
                return integerStringTuple2._2;
            }
        });
        List<String> strings = result.take(3);
        for(String string : strings) {
            System.out.println("string = " + string);
        }

        sparkContext.close();

    }
}

Scala实现

object Top3 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Top3Scala").setMaster("local")
    val context = new SparkContext(conf)
    val linesRDD = context.textFile("E:\\testdata\\wordcount\\input\\top.txt")

    val pairs = linesRDD.map(line => (line.toInt, line))
    val sort = pairs.sortByKey(false)
    val result = sort.map(sort => sort._2)
    val strings = result.take(3)
    for(string <- strings) println("string = " + string)
  }
}

需求2

对每个班级内的学生成绩,取出前3名。(分组取topn)

Java实现

/**
 * 分组取top3
 */
public class GroupTop3 {

    public static void main(String[] args) {
        SparkConf sparkConf = new SparkConf().setAppName("Top3Java").setMaster("local");
        JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
        JavaRDD<String> lines = sparkContext.textFile("E:\\testdata\\wordcount\\input\\score.txt");

        JavaPairRDD<String, Integer> pairs = lines.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) throws Exception {
                return new Tuple2<>(s.split(" ")[0], Integer.parseInt(s.split(" ")[1]));
            }
        });

        JavaPairRDD<String, Iterable<Integer>> studentScore = pairs.groupByKey();

        JavaPairRDD<String, Iterable<Integer>> top3Score = studentScore.mapToPair(new PairFunction<Tuple2<String, Iterable<Integer>>, String, Iterable<Integer>>() {
            @Override
            public Tuple2<String, Iterable<Integer>> call(Tuple2<String, Iterable<Integer>> stringIterableTuple2) throws Exception {
                Iterable<Integer> scores = stringIterableTuple2._2;
                Integer[] maxScore = new Integer[3];
                for (Integer score : scores) {
                    for (int i = 0; i < maxScore.length; i++) {
                        if (maxScore[i] == null) {
                            maxScore[i] = score;
                            break;
                        } else {
                            if (maxScore[i] < score) {
                                for (int j = maxScore.length - 1; j > i; j--) {
                                    maxScore[j] = maxScore[j - 1];
                                }
                                maxScore[i] = score;
                                break;
                            }
                        }
                    }
                }
                return new Tuple2<String, Iterable<Integer>>(stringIterableTuple2._1, Arrays.asList(maxScore));
            }
        });

        top3Score.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
            @Override
            public void call(Tuple2<String, Iterable<Integer>> stringIterableTuple2) throws Exception {
                System.out.print(stringIterableTuple2._1 + "班级,前三名成绩为 ");
                Iterable<Integer> scores = stringIterableTuple2._2;
                int i = 0;
                for(Integer score :scores) {
                    if(i == 0) {
                        System.out.print(score);
                    }else {
                        System.out.print("," + score);
                    }
                    i++;
                }
                System.out.println();
            }
        });
        sparkContext.close();
    }
}

Scala实现

object GroupTop3 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("GroupTop3Scala").setMaster("local")
    val context = new SparkContext(conf)
    val linesRDD = context.textFile("E:\\testdata\\wordcount\\input\\score.txt")
    val studentScores = linesRDD.map(line => (line.split(" ")(0), line.split(" ")(1).toInt))
    val groupStudentScores = studentScores.groupByKey()
    val result = groupStudentScores.map(student => {
      val maxScore = new Array[Int](3)
      val scores = student._2
      for(score <- scores) {
        var flag = true
        for(i <- 0 until maxScore.length if flag) {
          if(maxScore(i) == Nil) {
            maxScore(i) = score
            flag = false
          }else{
            if(maxScore(i) < score) {
              for(j <-  (i + 1 to maxScore.length - 1).reverse){
                maxScore(j) = maxScore(j - 1)
              }
              maxScore(i) = score
              flag = false
            }
          }
        }
      }
      (student._1, maxScore)
    })

    result.foreach(result =>{
      print(result._1 + "班级前三明成绩为")
      for(i <- 0 until result._2.length) {
        if(i == 0) print(result._2(i))
        else print("," + result._2(i))
      }
      println()
    })
  }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值