Hadoop(13) MR 排序

      在上一文章中的统计手机用户流量的基础之上再进行排序。
      若要进行排序则须implements WritableComparable。

      map和reduce阶段进行排序时,比较的是k2。v2是不参与排序比较的。如果要想让v2也进行排序,需要把k2和v2组装成新的类,作为k2,才能参与比较。

      分组时也是按照k2进行比较的。

示例代码:

package com.zz.hadoop.dc.mr;

import java.io.IOException;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.zz.hadoop.dc.factory.Factory;
import com.zz.hadoop.dc.po.DataInfo;

public class DataSort {

    public static void main(String[] args) throws IllegalArgumentException,
            IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(DataSort.class);

        job.setMapperClass(SortMapper.class);
        job.setMapOutputKeyClass(DataInfo.class);
        job.setMapOutputValueClass(NullWritable.class);
        FileInputFormat.setInputPaths(job, new Path(args[0]));

        job.setReducerClass(SortReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DataInfo.class);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setPartitionerClass(SortPartitioner.class);
        job.setNumReduceTasks(Integer.parseInt(args[2]));

        job.waitForCompletion(true);
    }

    /**
     * 排序是由框架完成的, 要排序谁就把谁作为k2, 然后框架根据对象中定义的排序规则排序
     * 
     * 当排序对象时, 可不用v2值时, 把v2置为NullWtitable
     */
    public class SortMapper
            extends Mapper<LongWritable, Text, DataInfo, NullWritable> {

        private DataInfo k2 = new DataInfo();

        @Override
        protected void map(LongWritable k1, Text v1, Context context)
                throws IOException, InterruptedException {
            String line = v1.toString();
            String[] fields = line.split("\t");
            k2.set(fields[1], Long.parseLong(fields[8]),
                    Long.parseLong(fields[9]));
            context.write(k2, NullWritable.get());
        }
    }

    // Reduce
    public class SortReduce
            extends Reducer<DataInfo, NullWritable, Text, DataInfo> {
        private Text k3 = new Text();

        @Override
        protected void reduce(DataInfo k2, Iterable<NullWritable> v2,
                Context context) throws IOException, InterruptedException {
            this.k3.set(k2.getTel());
            context.write(k3, k2);
        }
    }

    // 将排序好的数据进行分区存放
    public static class SortPartitioner extends Partitioner<Text, DataInfo> {
        /** 分区 */
        private static Map<String, Integer> provider = Factory.getMap();
        // 模拟移动、联通、电信分段号对应的分区码
        static {
            provider.put("138", 1);
            provider.put("139", 1);
            provider.put("155", 2);
            provider.put("156", 2);
            provider.put("180", 3);
            provider.put("181", 3);
        }

        @Override
        public int getPartition(Text k2, DataInfo v2, int numPartition) {
            String sub = k2.toString().substring(0, 3);
            Integer code = provider.get(sub);
            if (null == code) {
                code = 0;
            }

            return code;
        }
    }
}

bean

package com.zz.hadoop.dc.po;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class DataInfo implements WritableComparable<DataInfo> {

    private String tel;
    private long upPayLoad;
    private long downPayLoad;
    private long totalPayLoad;

    public void set(String tel, long upPayLoad, long downPayLoad) {
        this.tel = tel;
        this.upPayLoad = upPayLoad;
        this.downPayLoad = downPayLoad;
        this.totalPayLoad = upPayLoad + downPayLoad;
    }

    // ... get/set

    @Override
    public void readFields(DataInput in) throws IOException {
        this.tel = in.readUTF();
        this.upPayLoad = in.readLong();
        this.downPayLoad = in.readLong();
        this.totalPayLoad = in.readLong();
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(this.tel);
        out.writeLong(this.upPayLoad);
        out.writeLong(this.downPayLoad);
        out.writeLong(this.totalPayLoad);
    }

    /**
     * 按照总流量排序, 若总流量相等按照上行流量排序
     * 
     * 当两个对象进行比较时:(this.attr > o.attr) 
     *    返回值 =0 代表它们相等; 
     *    返回值 <0 代表this排在被比较对象之前;
     *    返回值 >0 代表this排在被比较对象之后
     */
    @Override
    public int compareTo(DataInfo o) {
        // 处理特殊情况
        if (this.totalPayLoad == o.getTotalPayLoad()) {
            return this.getUpPayLoad() > o.upPayLoad ? -1 : -1;
        } else {
            return this.getTotalPayLoad() > o.getTotalPayLoad() ? -1 : 1;
        }
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值