目录
- 需求:MR中自定义bean作为key,输出某组排序中最大值。
- 方案:重写MR中groupingcomparator方法
1.需求:MR中自定义bean作为key,输出某组排序中最大值。
场景:求出多个订单中,金额最大的商品价格。
2.方案:重写MR中 groupingcomparator 方法
- 定义排序:orderBean 类,重写compareto方法,订单相同时,比较价格,并且降序desc
public class OrderBean implements WritableComparable<OrderBean>{
private Text itemid; private DoubleWritable amount;
public OrderBean() { }
public OrderBean(Text itemid, DoubleWritable amount) { set(itemid, amount);
}
public void set(Text itemid, DoubleWritable amount) {
this.itemid = itemid; this.amount = amount;
}
public Text getItemid() { return itemid; }
public DoubleWritable getAmount() { return amount; }
@Override public int compareTo(OrderBean o) { int cmp = this.itemid.compareTo(o.getItemid()); if (cmp == 0) { cmp = -this.amount.compareTo(o.getAmount()); } return cmp; }
@Override public void write(DataOutput out) throws IOException { out.writeUTF(itemid.toString()); out.writeDouble(amount.get());
}
@Override public void readFields(DataInput in) throws IOException { String readUTF = in.readUTF(); double readDouble = in.readDouble();
this.itemid = new Text(readUTF); this.amount= new DoubleWritable(readDouble); }
@Override public String toString() {
return itemid.toString() + "\t" + amount.get();
}
} |
- 定义分区:itemPartitioner 类,按照订单id分组
public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable>{ @Override public int getPartition(OrderBean bean, NullWritable value, int numReduceTasks) { //相同id的订单bean,会发往相同的partition //而且,产生的分区数,是会跟用户设置的reduce task数保持一致 return (bean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTasks; } }
|
- 定义groupingComparator,利用reduce中该组件将订单相同的bean组成一组
public class ItemidGroupingComparator extends WritableComparator {
//传入作为key的bean的class类型,以及制定需要让框架做反射获取实例对象 protected ItemidGroupingComparator() { super(OrderBean.class, true); }
@Override public int compare(WritableComparable a, WritableComparable b) { OrderBean abean = (OrderBean) a; OrderBean bbean = (OrderBean) b;
//比较两个bean时,指定只比较bean中的orderid return abean.getItemid().compareTo(bbean.getItemid());
}
} |
运行类SecondarySort
public class SecondarySort {
static class SecondarySortMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
OrderBean bean = new OrderBean();
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString(); String[] fields = StringUtils.split(line, ",");
bean.set(new Text(fields[0]), new DoubleWritable(Double.parseDouble(fields[2])));
context.write(bean, NullWritable.get());
}
}
static class SecondarySortReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
//到达reduce时,相同id的所有bean已经被看成一组,且金额最大的那个一排在第一位 @Override protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { context.write(key, NullWritable.get()); } }
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration(); Job job = Job.getInstance(conf);
job.setJarByClass(SecondarySort.class);
job.setMapperClass(SecondarySortMapper.class); job.setReducerClass(SecondarySortReducer.class);
job.setOutputKeyClass(OrderBean.class); job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("hdfs://shizhan01:9000/secondarysort/input")); FileOutputFormat.setOutputPath(job, new Path("hdfs://shizhan01:9000/secondarysort/output3"));
//在此设置自定义的Groupingcomparator类 job.setGroupingComparatorClass(ItemidGroupingComparator.class); //在此设置自定义的partitioner类 job.setPartitionerClass(ItemIdPartitioner.class);
job.setNumReduceTasks(1);
job.waitForCompletion(true);
}
} |
预处理数据:
处理后结果: