Spark使用Scala和java实现TopN

数据源:

class1	100
class2	86
class3	70
class1	102
class2	65
class1	45
class2	85
class3	70
class1	16
class2	88
class1	95
class2	37
class3	98
class1	99
class2	23

使用Scala实现:

import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable
import scala.util.control._
/**
  * 分组取topN问题
  * 定义定长数组
  */
object TopN2 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("topN")
    val sc = new SparkContext(conf)
    val infos = sc.textFile("./data/scores.txt")
    val pairInfo = infos.map(one=>{(one.split("\t")(0),one.split("\t")(1).toInt)})
    val result: Array[(String, mutable.Buffer[Int])] = pairInfo.groupByKey().map(tp => {
      val className = tp._1
      val iter = tp._2.iterator

      val top3Score = new Array[Int](3)
      val loop = new Breaks
      while (iter.hasNext) {
        val currScore = iter.next()
        loop.breakable {
          for (i <- 0 until top3Score.size) {
            if (top3Score(i) == 0) {
              top3Score(i) = currScore
              loop.break()
            } else if (currScore > top3Score(i)) {
              for (j <- 2 until(i, -1)) {
                top3Score(j) = top3Score(j - 1)
              }
              top3Score(i) = currScore
              loop.break()
            }
          }
        }
      }
      (className, top3Score.toBuffer)
    }).collect()
    result.foreach(println)
  }
}

使用java实现:

import org.apache.commons.collections.IteratorUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

public class TopN2 {
    public static void main(String[] args) {
        SparkConf conf =new SparkConf();
        conf.setMaster("local");
        conf.setAppName("topn");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaRDD<String> lines = sc.textFile("./data/scores.txt");
        JavaPairRDD<String, Integer> pairInfo = lines.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String line) throws Exception {
                return new Tuple2<String, Integer>(line.split("\t")[0], Integer.valueOf(line.split("\t")[1]));
            }
        });

        pairInfo.groupByKey().foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
            @Override
            public void call(Tuple2<String, Iterable<Integer>> tp) throws Exception {
                String className = tp._1;
                Iterator<Integer> iter = tp._2.iterator();

                Integer[] top3Score = new Integer[3];
                while(iter.hasNext()){
                    Integer currScore = iter.next();
                    for(int i =0;i<3;i++){
                        if(top3Score[i]==null){
                            top3Score[i] = currScore;
                            break;
                        }else if(currScore>top3Score[i]){
                            for(int j = 2;j>i;j--){
                                top3Score[j]=top3Score[j-1];
                            }
                            top3Score[i] = currScore;
                            break;
                        }
                    }
                }

                System.out.println("className = "+className);
                for (Integer score :top3Score){
                    System.out.println("score = "+score);
                }

            }
        });

    }
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值