spark(10)-spark高级排序(course19)

    1.内容
    基础排序算法实战
    二次排序算法实战
    更高级排序算法
    排序算法内幕解密

    //修改一下log级别
    scala> sc.setLogLevel("WARN")

    2.二次排序就是排序的时候考虑两个维度,维度1相同时,再排维度2

    例如数据源:
    SecondSort.txt

    2 3 
    4 1 
    3 2 
    4 3 
    9 7 
    2 1

    3.实现Ordered(排序规则),Serializable接口的javabean:

    /*
    SecondSortKey.java
    */
    package cn.whbing.spark.SparkApps.cores;
    
    import java.io.Serializable;
    
    import scala.math.Ordered;
    
    /*
     * 自定义二次排序,实现的是scala的接口,不是java中的排序接口
     * */
    
    public class SecondSortKey implements Ordered<SecondSortKey>,Serializable{
    
        //需要二次排序的key
        private int first;
        private int second;
    
    
        public int getFirst() {
            return first;
        }
    
        public void setFirst(int first) {
            this.first = first;
        }
    
        public int getSecond() {
            return second;
        }
    
        public void setSecond(int second) {
            this.second = second;
        }
    
        public SecondSortKey(int first,int second) {
            this.first = first;
            this.second = second;
        }
    
        @Override
        public boolean $greater(SecondSortKey other) {
            // 大于的时候的情况
            if(this.first > other.getFirst()){
                return true;
            }else if(this.first == other.getFirst() && this.second > other.getSecond()){
                return true;
            }
            return false;
        }
    
        @Override
        public boolean $greater$eq(SecondSortKey other) {
            // 大于等于的情况
            if(this.$greater(other)){
                return true;
            }else if(this.first == other.getFirst() && this.second == other.getSecond()){
                return true;
            }
            return false;
        }
    
        @Override
        public boolean $less(SecondSortKey other) {
            // 小于的情况
            if(this.first < other.getFirst()){
                return true;
            }else if(this.first == other.getFirst() && this.second < other.getSecond()){
                return true;
            }
            return false;
        }
    
        @Override
        public boolean $less$eq(SecondSortKey other) {
            // TODO 小于等于的情况
            if(this.$less(other)){
                return true;
            }else if(this.first == other.getSecond() && this.second == other.getSecond()){
                return true;
            }
            return false;
        }
    
        @Override
        public int compare(SecondSortKey other) {
            if(this.first - other.getFirst() !=0){
                return this.first - other.getFirst();
            }else {
                return this.second - other.getSecond();
            }
        }
    
        @Override
        public int compareTo(SecondSortKey other) {
            if(this.first - other.getFirst() !=0){
                return this.first - other.getFirst();
            }else {
                return this.second - other.getSecond();
            } 
        }
    
        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + first;
            result = prime * result + second;
            return result;
        }
    
        @Override
        public boolean equals(Object obj) {
            if (this == obj)
                return true;
            if (obj == null)
                return false;
            if (getClass() != obj.getClass())
                return false;
            SecondSortKey other = (SecondSortKey) obj;
            if (first != other.first)
                return false;
            if (second != other.second)
                return false;
            return true;
        }
    }
    
    /*SecondSortApp.java*/
    
    package cn.whbing.spark.SparkApps.cores;
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.Function;
    import org.apache.spark.api.java.function.PairFunction;
    import org.apache.spark.api.java.function.VoidFunction;
    
    import scala.Tuple2;
    
    /*
     * 二次排序:
     * 第一步:按照Ordered和serializable接口实现自定义排序
     * 第二步:将要排序的二次排序的文件加载进<Key, Value>类型的RDD
     * 第三步:使用sortByKey基于自定义的Key进行二次排序
     * 第四步:去除掉排序的Key,只保留排序后的结果
     * 
     */
    public class SecondSortApp {
    
        public static void main(String[] args) {
            SparkConf conf = new SparkConf();
            conf.setAppName("SecondSort").setMaster("local");
    
            JavaSparkContext sc = new JavaSparkContext(conf);
            sc.setLogLevel("WARN");
            JavaRDD<String> lines = sc.textFile("D://javaTools//EclipseWork2//SparkApps//SecondSort.txt");
            JavaPairRDD<SecondSortKey, String> pairs = lines.mapToPair(new PairFunction<String, SecondSortKey, String>() {
                //String:lines读进来的内容, K2:处理的key,为SecondSortKey,V2:String
    
                @Override
                public Tuple2<SecondSortKey, String> call(String line) throws Exception {
                    String[] splited = line.split(" ");
                    SecondSortKey key = new SecondSortKey(
                            Integer.valueOf(splited[0]), Integer.valueOf(splited[1]));
                    return new Tuple2(key, line);
                }           
            });
            JavaPairRDD<SecondSortKey, String> sorted = pairs.sortByKey();//完成二次排序
            //过滤掉排序后的key,保留原结果
            JavaRDD<String> secondSorted = sorted.map(new Function<Tuple2<SecondSortKey,String>, String>() {
    
                private static final long serialVersionUID = 1L;
    
                @Override
                public String call(Tuple2<SecondSortKey, String> sortedContent) throws Exception {
    
                    return sortedContent._2;
                }
            });
    
            secondSorted.foreach(new VoidFunction<String>() {
    
                @Override
                public void call(String sorted) throws Exception {
                    System.out.println(sorted);
                }
            });
        }
    }
    

    结果:

    2 1
    2 3 
    3 2 
    4 1 
    4 3 
    9 7 

    4.小结:
    对于待排序的每一行原数据,我们进行封装成javabean,并实现Ordered接口,将其作为key;
    原来的行作为value;
    对上述(k,v)进行sortByKey操作即进行自定义的排序操作;
    最后去掉自己定义key即可。

    • 0
      点赞
    • 0
      收藏
      觉得还不错? 一键收藏
    • 0
      评论
    评论
    添加红包

    请填写红包祝福语或标题

    红包个数最小为10个

    红包金额最低5元

    当前余额3.43前往充值 >
    需支付:10.00
    成就一亿技术人!
    领取后你会自动成为博主和红包主的粉丝 规则
    hope_wisdom
    发出的红包
    实付
    使用余额支付
    点击重新获取
    扫码支付
    钱包余额 0

    抵扣说明:

    1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
    2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

    余额充值