mapreduce GroupingComparator mapreduce排序规则和分组规则

mapreduce自定义GroupingComparator

1.他的作用:先看图


GroupingComparator是maptasks之前的阶段,如果没有groupingcomparator那么当key为bean时候,二个bean的所有成员变量都相等时候,才会被reduce接收到一组去。
而groupingcomparator是在二个bean有成员变量不想等的时候,它可以做一些手脚,欺骗reduce,让它认为二个bean是相同的key
那么这么什么好处呢
很简单把一些工作提前做了,减少reduce的压力,

2.排序规则和分组规则

2.1如果不自定义public class MyGroupingComparator  extends WritableComparator 的话,排序规则和分组规则是一样的

2.2当我们自定义public class MyGroupingComparator  extends WritableComparator 的话,分组规则就是在排序规则上减少要求:

2.3区别

看个代码片段
	@Override
	public int compareTo(Shopping o) {
		
		return this.p_id-o.p_id==0?o.money-this.money:this.p_id-o.p_id;
	}
这个要求是如果p_id相同的,并不是就都进一个组了,还要money相同才能进一个组里边,
当我们自定义分组规则时候


现在就是p_id相同就进一个组了

2.4细节说明




分析


上图中compareTo规则依次是A->B->C->D

而compare的规则只能从上边的规则后边减少
可能是 ABC
可能是AB
可能是A
也就是说我把一个水管的闸慢慢打开了,让更多的元素当成一组了,,

3.看个案例

TopN:order_id    p_id    money 
1 1 222
1 5 25
2 2 2000
2 4 122
2 5 722
3 1 222
1 1 1000
1 5 5000
2 3 3000
2 4 4000
2 2 722
3 1 221
需求:求第二个字段中最大的前二个值
shopp
package cn.yzx.bigdata.mr.groupingcomparator;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

/*
 * ������Ҫ���ÿһ�������гɽ��������һ�ʽ���
 * Order_0000001	Pdt_01	222.8
Order_0000001	Pdt_05	25.8
Order_0000002	Pdt_03	522.8
Order_0000002	Pdt_04	122.4
Order_0000002	Pdt_05	722.4
Order_0000003	Pdt_01	222.8

 */
public class Shopping implements WritableComparable<Shopping>{
	private int order_id;
	private int p_id;
	private int money;
	
	public Shopping() {

	}

	public Shopping(int order_id, int p_id, int money) {
		this.order_id = order_id;
		this.p_id = p_id;
		this.money = money;
	}

	public int getOrder_id() {
		return order_id;
	}

	public void setOrder_id(int order_id) {
		this.order_id = order_id;
	}

	public int getP_id() {
		return p_id;
	}

	public void setP_id(int p_id) {
		this.p_id = p_id;
	}

	public int getMoney() {
		return money;
	}

	public void setMoney(int money) {
		this.money = money;
	}

	@Override
	public String toString() {
		return order_id + "," + p_id + "," + money;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(order_id);
		out.writeInt(p_id);
		out.writeInt(money);
		
	}

	@Override
	public void readFields(DataInput in) throws IOException {
			order_id=in.readInt();
			p_id=in.readInt();
			money=in.readInt();
	}

	
	@Override
	public int compareTo(Shopping o) {
		
		return this.p_id-o.p_id==0?o.money-this.money:this.p_id-o.p_id;
	}

	

	
}

MyGroupingComparator


package cn.yzx.bigdata.mr.groupingcomparator;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class MyGroupingComparator  extends WritableComparator {
	
	protected MyGroupingComparator() {
		super(Shopping.class,true);
	}
	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		Shopping abean=(Shopping) a;
		Shopping bbean=(Shopping) b;
		return abean.getP_id()-bbean.getP_id();
	}
}

GroupingComparator


package cn.yzx.bigdata.mr.groupingcomparator;


import java.io.File;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
 * 	private int;
	private int 
	private int ;\]
 order_id		p_id;		money
	1	 		1			222
	1			5			25
	2			3			522
 */


public class GroupingComparator {
	
	static class GroupingComparatorMapper extends Mapper<LongWritable, Text, Shopping, NullWritable>{
		Shopping s=new Shopping();
		@Override
		protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
			String line = value.toString();
			String[] fields = line.split("\t");
			s.setOrder_id(Integer.parseInt(fields[0]));
			s.setP_id(Integer.parseInt(fields[1]));
			s.setMoney(Integer.parseInt(fields[2]));
		
			context.write(s, NullWritable.get());
		}
	}
	
	static class GroupingComparatorReducer extends Reducer<Shopping, NullWritable, Shopping, NullWritable>{
		private static final int TopN=2;
		@Override
		protected void reduce(Shopping Shopping, Iterable<NullWritable> values,Context context)throws IOException, InterruptedException {
			int count=0;
			for (NullWritable value:values) {
				if(count<TopN) {
					context.write(Shopping, NullWritable.get());
					count++;
				}
			}
		}
	}
	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = new Configuration();
		conf.set("mapreduce.framework.name", "local");
		conf.set("fs.defaultFS", "file:///");
		conf.set("mapred.textoutputformat.separator", " ");
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(GroupingComparator.class);
		job.setMapperClass(GroupingComparatorMapper.class);
		job.setReducerClass(GroupingComparatorReducer.class);
		
		job.setGroupingComparatorClass(MyGroupingComparator.class);
		
		job.setOutputKeyClass(Shopping.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.setInputPaths(job, new Path("C:/mapreduce/Shoppinginput"));
		
		Path outpath = new Path("C:/mapreduce/Shoppingoutput");
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(outpath)) {
			fs.delete(outpath, true);
		}
		FileOutputFormat.setOutputPath(job, outpath);
		
		boolean res = job.waitForCompletion(true);
		System.exit(res?0:1);
	}
}




  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值