MR基础案例(四)二次排序

本文介绍了MapReduce中的二次排序案例,包括两种实现方式:一种是利用shuffle阶段的排序特性进行升序排序,另一种是通过自定义数据类型实现更灵活的排序功能。
摘要由CSDN通过智能技术生成

二次排序

输入

20 21
50 51
50 53
50 52
50 54
60 51
60 53
60 52
60 56
60 57
70 58
60 61
70 54
70 55
70 56
70 57
70 58

结果:

20      21
50      51
50      52
50      53
50      54
60      51
60      52
60      53
60      56
60      57
60      61
70      54
70      55
70      56
70      57
70      58
70      58

方法1:
第一组利用shuffle中的排序特点,但只能升序,第二列放到list中(然后对list排序),遍历list输出
方法2:
自定义数据类型

SortSecondaryDemo.java

package MR;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

public class SortSecondaryDemo implements Tool {
    /**
     * map阶段
     * @author lyd
     *
     */
    public static class MyMapper extends Mapper<LongWritable, Text, SecondarySortWritable, IntWritable> {

        SecondarySortWritable ss = new SecondarySortWritable();
        @Override
        protected void map(LongWritable key, Text value,Context context)
                throws IOException, InterruptedException {
            String line = value.toString();
            String dig [] = line.split(" ");
            ss.setFirst(Integer.parseInt(dig[0]));
            ss.setSecond(Integer.parseInt(dig[1]));
            context.write(ss, new IntWritable(Integer.parseInt(dig[1])));
        }
    }

    /**
     * reduce阶段
     * @author lyd
     *
     */
	public static class MyReducer extends Reducer<SecondarySortWritable, IntWritable, SecondarySortWritable, IntWritable> {

		@Override
		protected void reduce(SecondarySortWritable key, Iterable<IntWritable> values,Context context)
				throws IOException, InterruptedException {

		    for (IntWritable i : values){
		        context.write(key,i);
            }
		}
	}


    public void setConf(Configuration conf) {
        //对conf的属性设置
    }

    public Configuration getConf() {
        return new Configuration();
    }

    /**
     * 驱动方法
     */
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "ssjob");
        job.setJarByClass(SortSecondaryDemo.class);

        // set inputpath and outputpath
        setInputAndOutput(job, conf, args);

        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(SecondarySortWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(SecondarySortWritable.class);
		job.setOutputValueClass(IntWritable.class);
        //提交
        return job.waitForCompletion(true) ? 0 : 1;
    }

    //主方法
    public static void main(String[] args) throws Exception {
        int isok= ToolRunner.run(new Configuration(), new SortSecondaryDemo(), args);
        System.exit(isok);
    }

    /**
     * 处理参数的方法
     * @param job
     * @param conf
     * @param args
     */
    public static void setInputAndOutput(Job job,Configuration conf,String[] args){
        //正常处理输入输出参数
        try {
            FileInputFormat.addInputPath(job, new Path(args[0]));

           //FileSystem fs = FileSystem.get(conf);
            Path outputPath = new Path(args[1]);
            /*if(fs.exists(outputPath)){
                fs.delete(outputPath, true);
            }*/
            FileOutputFormat.setOutputPath(job, outputPath);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}


SecondarySortWritable.java

package MR;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Objects;

/**
 *
 */
public class SecondarySortWritable implements WritableComparable<SecondarySortWritable> {
    private int first;
    private int second;

    public void write(DataOutput out) throws IOException {
        out.writeInt(this.first);
        out.writeInt(this.second);
    }

    public void readFields(DataInput in) throws IOException {
        this.first = in.readInt();
        this.second = in.readInt();
    }

    int getFirst() {
        return first;
    }

    void setFirst(int first) {
        this.first = first;
    }

    public int getSecond() {
        return second;
    }

    void setSecond(int second) {
        this.second = second;
    }


    public int compareTo(SecondarySortWritable o) {
        return this.first - o.first;
        //return o.second - this.second; //降序
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        SecondarySortWritable that = (SecondarySortWritable) o;
        return first == that.first &&
                second == that.second;
    }

    @Override
    public int hashCode() {

        return Objects.hash(first, second);
    }

    @Override
    public String toString() {
        return "first=" + first +
                ", second=" + second;
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值