Secondary Sort

  • The MapReduce framework automatically sorts the keys generated by mappers.This means that,before starting reducers,all intermediate key-value pairs
     generated by mappers must be sorted by key.
    • How to sort reducer keys
    • How to partition keys passed to reducers
    • How to group data that has arrived at each reducer

  • We need to define a proper data structure for holding our key and value,while also providing the sort order of intermediate keys.In Hadoop,for custom date types(such as DateTemperaturePair) to be persisted,they have to implement the Writable interface;and if we are going to compare custom data types, then they have to implement an additional interface called WritableComparable.
  • DateTemperaturePair
public class DateTemperaturePair implements Writable,WritableComparable<DateTemperaturePair> {
    private final Text yearMonth = new Text();
    private final Text day = new Text();
    private final IntWritable temperature = new IntWritable();

    public DateTemperaturePair() {

    }

    public DateTemperaturePair(String yearMonth,String day,int temperature) {
        this.yearMonth.set(yearMonth);
        this.day.set(day);
        this.temperature.set(temperature);
    }

    public static DateTemperaturePair read(DataInput in) throws IOException {
        DateTemperaturePair pair = new DateTemperaturePair();
        pair.readFields(in);
        return pair;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        yearMonth.write(out);
        day.write(out);
        temperature.write(out);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        yearMonth.readFields(in);
        day.readFields(in);
        temperature.readFields(in);
    }


    @Override
    public int compareTo(DateTemperaturePair pair) {
        int compareValue = this.yearMonth.compareTo(pair.getYearMonth());
        if(compareValue==0) {
            compareValue = temperature.compareTo(pair.getTemperature());
        }
        return -1 * compareValue;
    }


    public Text getYearMonthDay() {
        return new Text(yearMonth.toString() + day.toString());
    }

    public Text getYearMonth() {
        return yearMonth;
    }

    public Text getDay() {
        return day;
    }

    public IntWritable getTemperature() {
        return temperature;
    }

    public void setYearMonth(String yearMonthAsString) {
        yearMonth.set(yearMonthAsString);
    }

    public void setDay(String dayAsString) {
        day.set(dayAsString);
    }

    public void setTemperature(int temp) {
        temperature.set(temp);
    }

    @Override

    public int hashcode() {
        int result = yearMonth!=null?yearMonth.hashcode():0;
        result = 31 * result + (temperature !=null ? temperature.hashcode() :0);
        return result;
    }
}

 

public class SecondarySortMapper extends Mapper<LongWritable,Text,DateTemperaturePair,Text> {
    private final Text theTemperature = new Text();
    private final DateTemperaturePair pair = new DateTemperaturePair();



    @Override

    protected void map(LongWritable key, Text value, Context Context) 
    throws IOException,InterruptedException{

        String line = value.toString();
        String[] tokens = line.split(",");
        String yearMonth = tokens[0] + tokens[1];
        String day = tokens[2];
        int temperature = Interger.parseInt(tokens[3]);

        pair.setYearMonth(yearMonth);
        pair.setDay(day);
        pair.setTemperature(temperature);
        theTemperature.set(tokens[3]);

        context.write(pair,theTemperature);

    }
}
public class DateTemperaturePartitioner extends Partitioner<DateTemperaturePair,Text> {
    @Override
    public int getPartition(DateTemperaturePair pair,Text text,int numberOfPartitions) {
        return Math.abs(pair.getYearMonth().hashcode() % numberOfPartitions);
    }
}

 

public class DataTemperatureGroupingComparator extends WritableComparator {
    public DateTemperatureGroupingComparator() {
        super(DateTemperaturePair.class,true);
    }

    @Override
    public int compare(WritableComparable wc1,WritableComparable wc2) {
        DateTemperaturePair pair = (DateTemperaturePair) wc1;
        DateTemperaturePair pair2 = (DateTemperaturePair) wc2;
        return pair.getYearMonth().compareTo(pair2.getYearMonth());
    }

}
public class SecondarySortReducer extends Reducer<DateTemperaturePair,Text,Text,Text> {
    @Override
    protected void reduce(DateTemperaturePair key , Iterable<Text> values,Context context)
    throws IOException,InterruptedException {
        StringBuilder builder = new StringBuilder();
        for(Text value:values) {
            builder.append(value.toString));
            builder.append(",");
        }
        context.write(key.getYearMonth(),new Text(builder.toString()));
    }
}


public class SecondarySortDriver extends Configured implements Tool {
    private static Logger theLogger = Logger.getLogger(SecondarySortDriver.class);
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        Job job = new Job(conf);
        job.setJarByClass(SecondarySortDriver.class);
        job.setJobName("SecondarySortDriver");
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        job.setOutputKeyClass(DateTemperaturePair.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(SecondarySortMapper.class);
        job.setReducerClass(SecondarySortReducer.class);
        job.setPartitionerClass(DateTemperaturePartitioner.class);
        job.setGroupingComparator(DateTemperatureGroupingComparator.class);

        boolean status = job.waitForCompletion(true);
        theLogger.info("run()")
        return status?0:1;
         }
}

 

转载于:https://www.cnblogs.com/dalu610/p/5997819.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值