Spark从入门到精通(四)--二次排序和TopN问题

二次排序

java
SparkConf conf = new SparkConf();
conf.setAppName("SecondSort");
conf.setMaster("local");
JavaSparkContext context = new JavaSparkContext(conf);
JavaRDD<String> textFile = context.textFile("./data/secondSort");
JavaPairRDD<MySort, String> mySortRDD = textFile.mapToPair(new PairFunction<String, MySort, String>() {
    @Override
    public Tuple2<MySort, String> call(String s) throws Exception {
        Integer first = Integer.valueOf(s.split(" ")[0]);
        Integer second = Integer.valueOf(s.split(" ")[1]);
        return new Tuple2<>(new MySort(first, second), s);
    }
});
mySortRDD.sortByKey(false).foreach(new VoidFunction<Tuple2<MySort, String>>() {
    @Override
    public void call(Tuple2<MySort, String> tuple2) throws Exception {
        System.out.println(tuple2._2);
    }
});
scala
case class MySort(first:Int, second:Int) extends Ordered[MySort]{
  override def compare(that: MySort): Int = {
    if(this.first == that.first){
      this.second - that.second
    }else{
      this.first - that.first
    }
  }
}

def main(args: Array[String]): Unit = {
  val conf = new SparkConf()
  conf.setAppName("SecondSort")
  conf.setMaster("local")
  val context = new SparkContext(conf)
  val textFile: RDD[String] = context.textFile("./data/secondSort")
  textFile.map(line=>{
    val first = line.split(" ")(0).toInt
    val second = line.split(" ")(1).toInt
    Tuple2(new MySort(first, second), line)
  }).sortByKey(false).foreach(one=>{
    println(one._2)
  })
}

TopN

java
JavaPairRDD<String, Integer> scoreToPair = scores.mapToPair(new PairFunction<String, String, Integer>() {
    @Override
    public Tuple2<String, Integer> call(String s) throws Exception {
        String cls = s.split(" ")[0];
        Integer score = Integer.valueOf(s.split(" ")[1]);
        return new Tuple2<>(cls, score);
    }
});
scoreToPair.groupByKey().sortByKey().foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
    @Override
    public void call(Tuple2<String, Iterable<Integer>> tuple2) throws Exception {
        String cls = tuple2._1;
        Iterator<Integer> iterator = tuple2._2.iterator();
        int[] top3 = new int[3];
        while (iterator.hasNext()){
            int score = iterator.next();
            for(int i=0; i<top3.length; i++){
                if(top3[i] == 0){
                    top3[i] = score;
                    break;
                }else if (score > top3[i]) {
                    for(int j=2; j>i; j--){
                        top3[j] = top3[j-1];
                    }
                    top3[i] = score;
                    break;
                }
            }
        }

        for (int i : top3) {
            System.out.println("班级: "+cls+", 成绩: "+i);
        }
    }
});
scala
val scoresRDD = scores.map((line => {
  Tuple2(line.split(" ")(0), line.split(" ")(1))
}))
val result: RDD[(String, mutable.Buffer[Int])] = scoresRDD.groupByKey().map(one => {
  val cls = one._1
  val iterator = one._2.iterator
  val top3 = new Array[Int](3)
  val loop: Breaks = new Breaks
  while (iterator.hasNext) {
    val score = iterator.next().toInt
    loop.breakable {
      for (i <- 0 until top3.size) {
        if (top3(i) == 0) {
          top3(i) = score
          loop.break()
        } else if (score > top3(i)) {
          for (j <- 2 until(i, -1)) {
            top3(j) = top3(j - 1)
          }
          top3(i) = score
          loop.break()
        }
      }
    }
  }
  (cls, top3.toBuffer)
})
result.foreach(one => {
  val iterator = one._2.iterator
  while(iterator.hasNext){
    println(s"班级: ${one._1}, 成绩: ${iterator.next()}")
  }
})

SparkSQL: https://blog.csdn.net/happiless/article/details/107307874

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值