自定义groupingcomparator

//OrderBean:将需要组合在一起的字段组合成一个bean类。其中字段itemId用来分区,amount用来排序

public class OrderBean implements WritableComparable<OrderBean> {

private Text itemId;
private DoubleWritable amount;

//必须有默认的构造器皿,这样Mapreduce方法才能创建对象,然后通过readFields方法从序列化的数据流中读出进行赋值
//否则就会报错:Unable to initialize any output collector

public OrderBean() {
}
public OrderBean(Text itemId, DoubleWritable amount) {
this.itemId = itemId;
this.amount = amount;
}
public Text getItemId() {
return itemId;
}
public void setItemId(Text itemId) {
this.itemId = itemId;
}
public DoubleWritable getAmount() {
return amount;
}


public void setAmount(DoubleWritable amount) {
this.amount = amount;
}


@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(itemId.toString());
out.writeDouble(amount.get());
}


@Override
public void readFields(DataInput in) throws IOException {
String item=in.readUTF();
double readDouble = in.readDouble();
this.itemId=new Text(item);
this.amount=new DoubleWritable(readDouble);
}
//比较同一个分区的amount的大小。从高到低。
@Override
public int compareTo(OrderBean o) {
int com=this.itemId.compareTo(o.getItemId());
if(com==0){
com=-this.getAmount().compareTo(o.getAmount());
}
return com;
}


@Override
public String toString() {
return itemId.toString() + "\t" + amount.get();
}

}



//写一个继承Partition类的方法,来重写getPartition(被分区的键,被分区的值,需要分区的数量)

public class OrderPartition extends Partitioner<OrderBean, NullWritable> {
@Override
public int getPartition(OrderBean bean, NullWritable arg1, int numReduceTask) {
return bean.getItemId().hashCode()%numReduceTask;
}
}


//分组的类继承WritableComparator,

public class ItemGroupingComparator extends WritableComparator{

//此处的构造方法必须要写super()方法,
public ItemGroupingComparator() {
super(OrderBean.class,true);
}
//此处compare(参数类型 a,参数类型b)参数类型必须是WritableComparable 如果是Object类型,则在reduce端输出的是所有排序好的数据,此处要求返回的是最大的一组值
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean bean1=(OrderBean) a;
OrderBean bean2=(OrderBean) b;
return bean1.getItemId().compareTo(bean2.getItemId());
}
}



//mapreduce主方法:通过分组之后,将所有的bean视作一组,在分组中比较的是每一组bean中的itemId,通过ItemId来判别这是否是同一个bean,

//根据reduce机制,取第一个为key,其余的为values迭代值,在每个分区zhon

public class SortPartition {
static class SortPartitionMap extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] fields = StringUtils.split(line, ",");
Text itemId = new Text(fields[0]);
String order = fields[1];
DoubleWritable amount = new DoubleWritable(Double.parseDouble(fields[2]));
context.write(new OrderBean(itemId,amount), NullWritable.get());
}
}
static class SortPartitionReduce extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(SortPartition.class);
job.setMapperClass(SortPartitionMap.class);
job.setReducerClass(SortPartitionReduce.class);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//在此设置自定义的Groupingcomparator类 
job.setGroupingComparatorClass(ItemGroupingComparator.class);

//在此设置自定义的partitioner类
job.setPartitionerClass(OrderPartition.class);

job.setNumReduceTasks(3);
job.waitForCompletion(true);

}
}


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值