28、Spark核心编程之高级编程之二次排序

需求

  1. 按照文件中的第一列排序。
  2. 如果第一列相同,则按照第二列排序。

文件内容

2 5
3 6
2 4
1 3
1 5

Java实现

自定义的二次排序key

/**
 * 自定义的二次排序key
 */
public class SecondarySortKey implements Ordered<SecondarySortKey>,Serializable {

    // 首先在自定义key里面,定义需要进行排序的列
    private int first;
    private int second;

    public SecondarySortKey(int first, int second) {
        this.first = first;
        this.second = second;
    }

    @Override
    public int compare(SecondarySortKey that) {
        if(this.first - that.first != 0) {
            return this.first - that.first;
        }else {
            return this.second - that.second;
        }
    }

    @Override
    public boolean $less(SecondarySortKey that) {
        if(this.first < that.first) {
            return true;
        }else if(this.first == that.first && this.second < that.second){
            return true;
        }
        return false;
    }

    @Override
    public boolean $greater(SecondarySortKey that) {
        if(this.first > that.first) {
            return true;
        }else if(this.first == that.first && this.second > that.second){
            return true;
        }
        return false;
    }

    @Override
    public boolean $less$eq(SecondarySortKey that) {
        if($less(that)){
            return true;
        }else if(this.first == that.first && this.second == that.second) {
            return true;
        }
        return false;
    }

    @Override
    public boolean $greater$eq(SecondarySortKey that) {
        if($greater(that)) {
            return true;
        }else if(this.first == that.first && this.second == that.second) {
            return true;
        }
        return false;
    }

    @Override
    public int compareTo(SecondarySortKey that) {
        if(this.first - that.first != 0) {
            return this.first - that.first;
        }else {
            return this.second - that.second;
        }
    }
    // 为要进行排序的多个列,提供getter和setter方法,以及hashcode和equals方法

    public int getFirst() {
        return first;
    }

    public void setFirst(int first) {
        this.first = first;
    }

    public int getSecond() {
        return second;
    }

    public void setSecond(int second) {
        this.second = second;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        SecondarySortKey that = (SecondarySortKey) o;
        return first == that.first &&
                second == that.second;
    }

    @Override
    public int hashCode() {

        return Objects.hash(first, second);
    }
}

二次排序

/**
 * 二次排序
 * 1、实现自定义的key,要实现Ordered接口和Serializable接口,在key中实现自己对多个列的排序算法
 * 2、将包含文本的RDD,映射成key为自定义key,value为文本的JavaPairRDD
 * 3、使用sortByKey算子按照自定义的key进行排序
 * 4、再次映射,剔除自定义的key,只保留文本行
 *
 */
public class SecondarySort {

    public static void main(String[] args) {
        SparkConf sparkConf = new SparkConf().setAppName("SecondarySortJava").setMaster("local");
        JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
        JavaRDD<String> numsRDD = sparkContext.textFile("E:\\testdata\\wordcount\\input\\sort.txt");
        JavaPairRDD<SecondarySortKey, String> pairs = numsRDD.mapToPair(new PairFunction<String, SecondarySortKey, String>() {
            @Override
            public Tuple2<SecondarySortKey, String> call(String s) throws Exception {
                return new Tuple2<>(new SecondarySortKey(Integer.parseInt(s.split(" ")[0]),
                        Integer.parseInt(s.split(" ")[1])), s);
            }
        });
        JavaPairRDD<SecondarySortKey, String> sortedPairs = pairs.sortByKey();
        JavaRDD<String> result = sortedPairs.map(new Function<Tuple2<SecondarySortKey, String>, String>() {
            @Override
            public String call(Tuple2<SecondarySortKey, String> secondarySortKeyStringTuple2) throws Exception {
                return secondarySortKeyStringTuple2._2;
            }
        });

        result.foreach(new VoidFunction<String>() {
            @Override
            public void call(String s) throws Exception {
                System.out.println("s = " + s);
            }
        });

        sparkContext.close();
    }
}

Scala实现

SecondarySortKey

class SecondarySortKey(val first:Int, val second:Int) extends Ordered[SecondarySortKey] with Serializable {
  override def compare(that: SecondarySortKey): Int = {
    if(this.first - that.first != 0) {
      this.first - that.first
    }else {
      this.second - that.second
    }
  }
}

SecondarySort

object SecondarySort {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("SecondarySortScala").setMaster("local")
    val sparkContext = new SparkContext(conf)

    val linesRDD = sparkContext.textFile("E:\\testdata\\wordcount\\input\\sort.txt")
    val keyLineRDD = linesRDD.map(line => (new SecondarySortKey(line.split(" ")(0).toInt, line.split(" ")(1).toInt),line))
    val sortedKeyLine = keyLineRDD.sortByKey()
    val result = sortedKeyLine.map(keyline => keyline._2)
    result.foreach(result => println(result))
  }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值