二次排序,Spark,Hadoop实现_spark 数字二次排序 hadoop-CSDN博客

本文链接：https://blog.csdn.net/never_compromise2580/article/details/86670258

踏踏实实积累，不能浮躁！！！

需求：先根据上述数据中的第一列进行排序，第一列相等的根据第二列进行排序最后效果如下

3 5
2 8
2 4
1 7
1 5

原理：根据自定义的key来实现二次排序其实不难下面是Spark代码的实现逻辑供大家参考

1：自定义key

package cn.spark.java.sparkcore;

import scala.math.Ordered;

import java.io.Serializable;

/**
 *
 *  为了实现二次排序所用到的key
 */
public class Sort_2_key implements Ordered<Sort_2_key>,Serializable{

    private int first;
    private int second;

    // 为上面字段提供getter setter 和 hashcode() 和 equals 方法
    public Sort_2_key(){

    }

    public int getFirst() {
        return first;
    }

    public int getSecond() {
        return second;
    }

    public void setFirst(int first) {
        this.first = first;
    }

    public void setSecond(int second) {
        this.second = second;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Sort_2_key sort_2 = (Sort_2_key) o;

        if (first != sort_2.first) return false;
        return second == sort_2.second;
    }

    @Override
    public int hashCode() {
        int result = first;
        result = 31 * result + second;
        return result;
    }



    @Override
    public int compare(Sort_2_key that) {
        if(this.getFirst() - that.getFirst() != 0){
            return this.getFirst() - that.getFirst();
        }else{
            return this.getSecond() - that.getSecond();
        }
    }

    @Override
    public boolean $less(Sort_2_key that) {
        if(this.getFirst() < that.getFirst()){
            return true;
        }else if(this.getFirst() == that.getFirst() && this.getSecond() < that.getSecond()){
            return true;
        }else{
            return false;
        }
    }

    @Override
    public boolean $greater(Sort_2_key that) {
        if(this.getFirst() > that.getFirst()){
            return true;
        }else if(this.getFirst() == that.getFirst() && this.getSecond() > that.getSecond()){
            return true;
        }else{
            return false;
        }
    }

    @Override
    public boolean $less$eq(Sort_2_key that) {
        if(this.$less(that)){
            return true;
        }else if(this.getFirst() == that.getFirst() && this.getSecond() == that.getSecond()){
            return true;
        }else{
            return false;
        }
    }

    @Override
    public boolean $greater$eq(Sort_2_key that) {
        if(this.$greater(that) ){
            return true;
        }else if(this.getFirst() == that.getFirst() && this.getSecond() == that.getSecond()){
            return true;
        }else{
            return false;
        }
    }

    @Override
    public int compareTo(Sort_2_key that) {
        if(this.getFirst() - that.getFirst() != 0){
            return this.getFirst() - that.getFirst();
        }else{
            return this.getSecond() - that.getSecond();
        }
    }
}

2：排序的逻辑实现

package cn.spark.java.sparkcore;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

import java.util.Arrays;

/**
 *  进行二次排序
 *
 *  原理是根据自己定义的key 来进行排序   不难
 *  3 5
    2 8
    2 4
    1 7
    1 5
 */
public class Sort_2class {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf()
                .setAppName("Sort_2")
                .setMaster("local");

        JavaSparkContext sc = new JavaSparkContext(conf);

        JavaRDD lines = sc.textFile("f://scala/sort.txt");
        JavaPairRDD words = lines.mapToPair(new PairFunction<String,Sort_2_key,String>() {
            @Override
            public Tuple2<Sort_2_key,String> call(String line) throws Exception {
                Sort_2_key key = new Sort_2_key();
                key.setFirst(Integer.parseInt(line.split(" ")[0]));
                key.setSecond(Integer.parseInt(line.split(" ")[1]));
                return new Tuple2(key,line);
            }
        });

        JavaPairRDD rel = words.sortByKey(false);
        rel.foreach(new VoidFunction<Tuple2>() {
            @Override
            public void call(Tuple2 tuple2) throws Exception {
                System.out.println(tuple2._2);
            }
        });



        sc.close();
    }
}

后续会更新上Hadoop的实现逻辑代码