- The MapReduce framework automatically sorts the keys generated by mappers.This means that,before starting reducers,all intermediate key-value pairs
generated by mappers must be sorted by key.
- How to sort reducer keys
- How to partition keys passed to reducers
- How to group data that has arrived at each reducer
- We need to define a proper data structure for holding our key and value,while also providing the sort order of intermediate keys.In Hadoop,for custom date types(such as DateTemperaturePair) to be persisted,they have to implement the Writable interface;and if we are going to compare custom data types, then they have to implement an additional interface called WritableComparable.
- DateTemperaturePair
public class DateTemperaturePair implements Writable,WritableComparable<DateTemperaturePair> { private final Text yearMonth = new Text(); private final Text day = new Text(); private final IntWritable temperature = new IntWritable(); public DateTemperaturePair() { } public DateTemperaturePair(String yearMonth,String day,int temperature) { this.yearMonth.set(yearMonth); this.day.set(day); this.temperature.set(temperature); } public static DateTemperaturePair read(DataInput in) throws IOException { DateTemperaturePair pair = new DateTemperaturePair(); pair.readFields(in); return pair; } @Override public void write(DataOutput out) throws IOException { yearMonth.write(out); day.write(out); temperature.write(out); } @Override public void readFields(DataInput in) throws IOException { yearMonth.readFields(in); day.readFields(in); temperature.readFields(in); } @Override public int compareTo(DateTemperaturePair pair) { int compareValue = this.yearMonth.compareTo(pair.getYearMonth()); if(compareValue==0) { compareValue = temperature.compareTo(pair.getTemperature()); } return -1 * compareValue; } public Text getYearMonthDay() { return new Text(yearMonth.toString() + day.toString()); } public Text getYearMonth() { return yearMonth; } public Text getDay() { return day; } public IntWritable getTemperature() { return temperature; } public void setYearMonth(String yearMonthAsString) { yearMonth.set(yearMonthAsString); } public void setDay(String dayAsString) { day.set(dayAsString); } public void setTemperature(int temp) { temperature.set(temp); } @Override public int hashcode() { int result = yearMonth!=null?yearMonth.hashcode():0; result = 31 * result + (temperature !=null ? temperature.hashcode() :0); return result; } }
public class SecondarySortMapper extends Mapper<LongWritable,Text,DateTemperaturePair,Text> { private final Text theTemperature = new Text(); private final DateTemperaturePair pair = new DateTemperaturePair(); @Override protected void map(LongWritable key, Text value, Context Context) throws IOException,InterruptedException{ String line = value.toString(); String[] tokens = line.split(","); String yearMonth = tokens[0] + tokens[1]; String day = tokens[2]; int temperature = Interger.parseInt(tokens[3]); pair.setYearMonth(yearMonth); pair.setDay(day); pair.setTemperature(temperature); theTemperature.set(tokens[3]); context.write(pair,theTemperature); } }
public class DateTemperaturePartitioner extends Partitioner<DateTemperaturePair,Text> {
@Override
public int getPartition(DateTemperaturePair pair,Text text,int numberOfPartitions) {
return Math.abs(pair.getYearMonth().hashcode() % numberOfPartitions);
}
}
public class DataTemperatureGroupingComparator extends WritableComparator { public DateTemperatureGroupingComparator() { super(DateTemperaturePair.class,true); } @Override public int compare(WritableComparable wc1,WritableComparable wc2) { DateTemperaturePair pair = (DateTemperaturePair) wc1; DateTemperaturePair pair2 = (DateTemperaturePair) wc2; return pair.getYearMonth().compareTo(pair2.getYearMonth()); } }
public class SecondarySortReducer extends Reducer<DateTemperaturePair,Text,Text,Text> { @Override protected void reduce(DateTemperaturePair key , Iterable<Text> values,Context context) throws IOException,InterruptedException { StringBuilder builder = new StringBuilder(); for(Text value:values) { builder.append(value.toString)); builder.append(","); } context.write(key.getYearMonth(),new Text(builder.toString())); } } public class SecondarySortDriver extends Configured implements Tool { private static Logger theLogger = Logger.getLogger(SecondarySortDriver.class); @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); job.setJarByClass(SecondarySortDriver.class); job.setJobName("SecondarySortDriver"); FileInputFormat.setInputPaths(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); job.setOutputKeyClass(DateTemperaturePair.class); job.setOutputValueClass(Text.class); job.setMapperClass(SecondarySortMapper.class); job.setReducerClass(SecondarySortReducer.class); job.setPartitionerClass(DateTemperaturePartitioner.class); job.setGroupingComparator(DateTemperatureGroupingComparator.class); boolean status = job.waitForCompletion(true); theLogger.info("run()") return status?0:1; } }