使用groupingcomparator求同一订单中最大金额的订单

有如下订单数据,现在需要求出每一个订单中成交金额最大的一笔交易

订单id

商品id

成交金额

Order_0000001

Pdt_01

222.8

Order_0000001

Pdt_05

25.8

Order_0000002

Pdt_03

522.8

Order_0000002

Pdt_04

122.4

Order_0000002

Pdt_05

722.4

Order_0000003

Pdt_01

222.8

 

groupingcomparator作用:对mapTash的输出数据进行分组

测试数据

Order_0000001,Pdt_01,222.8
Order_0000001,Pdt_05,25.8
Order_0000002,Pdt_05,325.8
Order_0000002,Pdt_03,522.8
Order_0000002,Pdt_04,122.4
Order_0000003,Pdt_01,222.8

 

代码

package cn.feizhou.secondarysort;


import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 利用reduce端的GroupingComparator来实现将一组bean看成相同的key,本质是相同的Id分为一组 
 *
 */
public class ItemidGroupingComparator extends WritableComparator {

	//传入作为key的bean的class类型,以及制定需要让框架做反射获取实例对象
	protected ItemidGroupingComparator() {
		super(OrderBean.class, true);
	}
	

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		OrderBean abean = (OrderBean) a;
		OrderBean bbean = (OrderBean) b;
		
		//比较两个bean时,指定只比较bean中的orderid
		return abean.getItemid().compareTo(bbean.getItemid());
		
	}

}
----------------------------------------------
/**
 * 分区
 *
 */
public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable>{

	@Override
	public int getPartition(OrderBean bean, NullWritable value, int numReduceTasks) {
		//相同id的订单bean,会发往相同的partition
		//而且,产生的分区数,是会跟用户设置的reduce task数保持一致
		//假如numReduceTasks=2,那么ID是奇数的分为一区,偶数的分为一区
		return (bean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTasks;
		
	}

}
----------------------------------------------
/**
 * 订单类
 */
public class OrderBean implements WritableComparable<OrderBean>{

	private Text itemid;//ID
	private DoubleWritable amount;//价格

	public OrderBean() {
	}

	public OrderBean(Text itemid, DoubleWritable amount) {
		set(itemid, amount);

	}

	public void set(Text itemid, DoubleWritable amount) {
		this.itemid = itemid;
		this.amount = amount;

	}



	public Text getItemid() {
		return itemid;
	}

	public DoubleWritable getAmount() {
		return amount;
	}



	@Override
	public int compareTo(OrderBean o) {
		//如果ID相同,按价格降序
		int cmp = this.itemid.compareTo(o.getItemid());
		if (cmp == 0) {
			cmp = -this.amount.compareTo(o.getAmount());
		}
		return cmp;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(itemid.toString());
		out.writeDouble(amount.get());
		
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		String readUTF = in.readUTF();
		double readDouble = in.readDouble();
		
		this.itemid = new Text(readUTF);
		this.amount= new DoubleWritable(readDouble);
	}


	@Override
	public String toString() {

		return itemid.toString() + "\t" + amount.get();
		
	}

}

----------------------------------------------
/**
 * 
 *
 */
public class SecondarySort {
	
	static class SecondarySortMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
		
		OrderBean bean = new OrderBean();
		
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

			String line = value.toString();
			String[] fields = StringUtils.split(line, ",");
			
			bean.set(new Text(fields[0]), new DoubleWritable(Double.parseDouble(fields[2])));
			
			context.write(bean, NullWritable.get());
			
		}
		
	}
	
	static class SecondarySortReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
		
		
		//到达reduce时,相同id的所有bean已经被看成一组,且金额最大的那个一排在第一位,这边只要第一个
		@Override
		protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
			context.write(key, NullWritable.get());
		}
	}
	
	
	public static void main(String[] args) throws Exception {
		
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(SecondarySort.class);
		
		job.setMapperClass(SecondarySortMapper.class);
		job.setReducerClass(SecondarySortReducer.class);
		
		
		job.setOutputKeyClass(OrderBean.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.setInputPaths(job, new Path("H:/test"));
		FileOutputFormat.setOutputPath(job, new Path("H:/out"));
		
		//在此设置自定义的Groupingcomparator类 
		job.setGroupingComparatorClass(ItemidGroupingComparator.class);
		//定义分区算法
		job.setPartitionerClass(ItemIdPartitioner.class);
		
		//定义分区参数
		job.setNumReduceTasks(2);
		
		job.waitForCompletion(true);
		
	}

}

----------------------------------------------

测试结果

 

 

 

 

 

 

 

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值