二次排序(GroupingComparator)

1)需求

有如下订单数据

订单id

商品id

成交金额

0000001

Pdt_01

222.8

0000001

Pdt_06

25.8

0000002

Pdt_03

522.8

0000002

Pdt_04

122.4

0000002

Pdt_05

722.4

0000003

Pdt_01

222.8

0000003

Pdt_02

33.8

现在需要求出每一个订单中最贵的商品。

2)输入数据:                                                                                输出数据预期:

3)分析

(1)利用“订单id和成交金额”作为key,可以将map阶段读取到的所有订单数据按照id分区,按照金额排序,发送到reduce。

(2)在reduce端利用groupingcomparator将订单id相同的kv聚合成组,然后取第一个即是最大值。

4)代码实现

(1)定义订单信息OrderBean

package com.lzz.twoOrder;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

public class OrderBean implements WritableComparable<OrderBean>{
	private long orderId;
	private double orderPrice;

	
	
	@Override
	public String toString() {
		return  orderId + "\t" + orderPrice;
	}

	public OrderBean() {
		super();
	}

	public OrderBean(long orderId, double orderPrice) {
		super();
		this.orderId = orderId;
		this.orderPrice = orderPrice;
	}

	public long getOrderId() {
		return orderId;
	}

	public void setOrderId(long orderId) {
		this.orderId = orderId;
	}

	public double getOrderPrice() {
		return orderPrice;
	}

	public void setOrderPrice(double orderPrice) {
		this.orderPrice = orderPrice;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeLong(orderId);
		out.writeDouble(orderPrice);
	}
	
	@Override
	public void readFields(DataInput in) throws IOException {
		this.orderId=in.readLong();
		this.orderPrice=in.readDouble();
	}

	//二次排序
	@Override
	public int compareTo(OrderBean orderBean) {
		int res;
		//订单号正序排序
		if(orderId>orderBean.getOrderId()) {
			res=1;
		}else if (orderId<orderBean.getOrderId()) {
			res=-1;
		}else{
		//价格倒序排序
			res=orderPrice>orderBean.getOrderPrice()?-1:1;
		}
		return res;
	}

	
	
}

(2)编写OrderSortMapper

package com.lzz.twoOrder;


import java.io.IOException;

import org.apache.commons.lang.ObjectUtils.Null;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TwoOrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
	OrderBean k=new OrderBean();
	
	@Override
	protected void map(LongWritable key, Text value,
			Context context)
			throws IOException, InterruptedException {
		String line=value.toString();
		String[] words=line.split("\t");
		
		k.setOrderId(Long.parseLong(words[0]));
		k.setOrderPrice(Double.parseDouble(words[2]));
		context.write(k, NullWritable.get());
	}
}

(3)编写OrderSortPartitioner

Ctrl+T使用HashCode

package com.lzz.twoOrder;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;

public class TwoOrderPartitioner extends Partitioner<OrderBean, NullWritable>{

	@Override
	public int getPartition(OrderBean key, NullWritable value, int numPartitions) {
		
		return (int)(key.getOrderId() & Integer.MAX_VALUE) % numPartitions;
		//到目前为止,已经排序 分区完成
//		1	222.8
//		1	33.8
//		1	25.8
		
//		3	222.8
//		3	33.8
	}

}

(4)编写OrderSortGroupingComparator

package com.lzz.twoOrder;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class OrderGroupingComparator extends WritableComparator{

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		OrderBean aBean=(OrderBean)a;
		OrderBean bBean=(OrderBean)b;
		
		int res;
		if(aBean.getOrderId()>bBean.getOrderId()) {
			res=1;
		}else if(aBean.getOrderId()<bBean.getOrderId()) {
			res=-1;
		}else {
			res=0;
		}
		return res;
	}
	
	protected OrderGroupingComparator() {
		super(OrderBean.class,true);
	}
}

(5)编写OrderSortReducer

package com.lzz.twoOrder;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.lzz.order.OrderDriver;
import com.lzz.order.OrderMapper;
import com.lzz.order.OrderReducer;

public class TwoOrderDriver {
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration configuration=new Configuration();
		Job job=Job.getInstance(configuration);
		
		job.setJarByClass(TwoOrderDriver.class);
		
		job.setMapperClass(TwoOrderMapper.class);
		job.setReducerClass(TwoOrderReducer.class);
		
		job.setMapOutputKeyClass(OrderBean.class);
		job.setMapOutputValueClass(NullWritable.class);
		
		// 设置reduce端的分组
		job.setGroupingComparatorClass(OrderGroupingComparator.class);

		//  设置分区
		job.setPartitionerClass(TwoOrderPartitioner.class);

		//  设置reduce个数
		job.setNumReduceTasks(3);
		
		job.setOutputKeyClass(OrderBean.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		boolean res=job.waitForCompletion(true);
		System.exit(res?0:1);
	}
}

 

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 像素格子 设计师:CSDN官方博客 返回首页