mapreduce自定义GroupingComparator
1.他的作用:先看图
GroupingComparator是maptasks之前的阶段,如果没有groupingcomparator那么当key为bean时候,二个bean的所有成员变量都相等时候,才会被reduce接收到一组去。
而groupingcomparator是在二个bean有成员变量不想等的时候,它可以做一些手脚,欺骗reduce,让它认为二个bean是相同的key
那么这么什么好处呢
很简单把一些工作提前做了,减少reduce的压力,
2.排序规则和分组规则
2.1如果不自定义public class MyGroupingComparator extends WritableComparator 的话,排序规则和分组规则是一样的
2.2当我们自定义public class MyGroupingComparator extends WritableComparator 的话,分组规则就是在排序规则上减少要求:
2.3区别
看个代码片段
@Override
public int compareTo(Shopping o) {
return this.p_id-o.p_id==0?o.money-this.money:this.p_id-o.p_id;
}
这个要求是如果p_id相同的,并不是就都进一个组了,还要money相同才能进一个组里边,
当我们自定义分组规则时候
现在就是p_id相同就进一个组了
2.4细节说明
分析
上图中compareTo规则依次是A->B->C->D
而compare的规则只能从上边的规则后边减少
可能是 ABC
可能是AB
可能是A
也就是说我把一个水管的闸慢慢打开了,让更多的元素当成一组了,,
3.看个案例
TopN:order_id p_id money
1
1
222
1 5 25
2 2 2000
2 4 122
2 5 722
3 1 222
1 1 1000
1 5 5000
2 3 3000
2 4 4000
2 2 722
3 1 221
1 5 25
2 2 2000
2 4 122
2 5 722
3 1 222
1 1 1000
1 5 5000
2 3 3000
2 4 4000
2 2 722
3 1 221
需求:求第二个字段中最大的前二个值
shopp
package cn.yzx.bigdata.mr.groupingcomparator;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/*
* ������Ҫ���ÿһ�������гɽ��������һ�ʽ���
* Order_0000001 Pdt_01 222.8
Order_0000001 Pdt_05 25.8
Order_0000002 Pdt_03 522.8
Order_0000002 Pdt_04 122.4
Order_0000002 Pdt_05 722.4
Order_0000003 Pdt_01 222.8
*/
public class Shopping implements WritableComparable<Shopping>{
private int order_id;
private int p_id;
private int money;
public Shopping() {
}
public Shopping(int order_id, int p_id, int money) {
this.order_id = order_id;
this.p_id = p_id;
this.money = money;
}
public int getOrder_id() {
return order_id;
}
public void setOrder_id(int order_id) {
this.order_id = order_id;
}
public int getP_id() {
return p_id;
}
public void setP_id(int p_id) {
this.p_id = p_id;
}
public int getMoney() {
return money;
}
public void setMoney(int money) {
this.money = money;
}
@Override
public String toString() {
return order_id + "," + p_id + "," + money;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(order_id);
out.writeInt(p_id);
out.writeInt(money);
}
@Override
public void readFields(DataInput in) throws IOException {
order_id=in.readInt();
p_id=in.readInt();
money=in.readInt();
}
@Override
public int compareTo(Shopping o) {
return this.p_id-o.p_id==0?o.money-this.money:this.p_id-o.p_id;
}
}
MyGroupingComparator
package cn.yzx.bigdata.mr.groupingcomparator;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class MyGroupingComparator extends WritableComparator {
protected MyGroupingComparator() {
super(Shopping.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
Shopping abean=(Shopping) a;
Shopping bbean=(Shopping) b;
return abean.getP_id()-bbean.getP_id();
}
}
GroupingComparator
package cn.yzx.bigdata.mr.groupingcomparator;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
* private int;
private int
private int ;\]
order_id p_id; money
1 1 222
1 5 25
2 3 522
*/
public class GroupingComparator {
static class GroupingComparatorMapper extends Mapper<LongWritable, Text, Shopping, NullWritable>{
Shopping s=new Shopping();
@Override
protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split("\t");
s.setOrder_id(Integer.parseInt(fields[0]));
s.setP_id(Integer.parseInt(fields[1]));
s.setMoney(Integer.parseInt(fields[2]));
context.write(s, NullWritable.get());
}
}
static class GroupingComparatorReducer extends Reducer<Shopping, NullWritable, Shopping, NullWritable>{
private static final int TopN=2;
@Override
protected void reduce(Shopping Shopping, Iterable<NullWritable> values,Context context)throws IOException, InterruptedException {
int count=0;
for (NullWritable value:values) {
if(count<TopN) {
context.write(Shopping, NullWritable.get());
count++;
}
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name", "local");
conf.set("fs.defaultFS", "file:///");
conf.set("mapred.textoutputformat.separator", " ");
Job job = Job.getInstance(conf);
job.setJarByClass(GroupingComparator.class);
job.setMapperClass(GroupingComparatorMapper.class);
job.setReducerClass(GroupingComparatorReducer.class);
job.setGroupingComparatorClass(MyGroupingComparator.class);
job.setOutputKeyClass(Shopping.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("C:/mapreduce/Shoppinginput"));
Path outpath = new Path("C:/mapreduce/Shoppingoutput");
FileSystem fs = FileSystem.get(conf);
if(fs.exists(outpath)) {
fs.delete(outpath, true);
}
FileOutputFormat.setOutputPath(job, outpath);
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);
}
}