自定义groupingcomparator

最新推荐文章于 2021-04-28 23:16:49 发布

林森见鹿

最新推荐文章于 2021-04-28 23:16:49 发布

阅读量366

点赞数 1

本文链接：https://blog.csdn.net/qianyuanruqu/article/details/53783430

版权

//OrderBean：将需要组合在一起的字段组合成一个bean类。其中字段itemId用来分区，amount用来排序

public class OrderBean implements WritableComparable<OrderBean> {

private Text itemId;
private DoubleWritable amount;

//必须有默认的构造器皿，这样Mapreduce方法才能创建对象，然后通过readFields方法从序列化的数据流中读出进行赋值
//否则就会报错：Unable to initialize any output collector
public OrderBean() {
}
public OrderBean(Text itemId, DoubleWritable amount) {
this.itemId = itemId;
this.amount = amount;
}
public Text getItemId() {
return itemId;
}
public void setItemId(Text itemId) {
this.itemId = itemId;
}
public DoubleWritable getAmount() {
return amount;
}

public void setAmount(DoubleWritable amount) {
this.amount = amount;
}

@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(itemId.toString());
out.writeDouble(amount.get());
}

@Override
public void readFields(DataInput in) throws IOException {
String item=in.readUTF();
double readDouble = in.readDouble();
this.itemId=new Text(item);
this.amount=new DoubleWritable(readDouble);
}
//比较同一个分区的amount的大小。从高到低。
@Override
public int compareTo(OrderBean o) {
int com=this.itemId.compareTo(o.getItemId());
if(com==0){
com=-this.getAmount().compareTo(o.getAmount());
}
return com;
}

@Override
public String toString() {
return itemId.toString() + "\t" + amount.get();
}

}

//写一个继承Partition类的方法，来重写getPartition(被分区的键，被分区的值，需要分区的数量)

public class OrderPartition extends Partitioner<OrderBean, NullWritable> {
@Override
public int getPartition(OrderBean bean, NullWritable arg1, int numReduceTask) {
return bean.getItemId().hashCode()%numReduceTask;
}
}

//分组的类继承WritableComparator,

public class ItemGroupingComparator extends WritableComparator{

//此处的构造方法必须要写super（）方法，
public ItemGroupingComparator() {
super(OrderBean.class,true);
}
//此处compare(参数类型 a,参数类型b)参数类型必须是WritableComparable 如果是Object类型，则在reduce端输出的是所有排序好的数据，此处要求返回的是最大的一组值
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean bean1=(OrderBean) a;
OrderBean bean2=(OrderBean) b;
return bean1.getItemId().compareTo(bean2.getItemId());
}
}

//mapreduce主方法：通过分组之后，将所有的bean视作一组，在分组中比较的是每一组bean中的itemId，通过ItemId来判别这是否是同一个bean，

//根据reduce机制，取第一个为key，其余的为values迭代值，在每个分区zhon

public class SortPartition {
static class SortPartitionMap extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] fields = StringUtils.split(line, ",");
Text itemId = new Text(fields[0]);
String order = fields[1];
DoubleWritable amount = new DoubleWritable(Double.parseDouble(fields[2]));
context.write(new OrderBean(itemId,amount), NullWritable.get());
}
}
static class SortPartitionReduce extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(SortPartition.class);
job.setMapperClass(SortPartitionMap.class);
job.setReducerClass(SortPartitionReduce.class);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//在此设置自定义的Groupingcomparator类
job.setGroupingComparatorClass(ItemGroupingComparator.class);
//在此设置自定义的partitioner类
job.setPartitionerClass(OrderPartition.class);
job.setNumReduceTasks(3);
job.waitForCompletion(true);

}
}

林森见鹿

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
自定义groupingcomparator

//OrderBean：将需要组合在一起的字段组合成一个bean类。其中字段itemId用来分区，amount用来排序public class OrderBean implements WritableComparable {private Text itemId;private DoubleWritable amount;//必须有默认的构造器皿，这样Mapreduce方法才
复制链接

扫一扫