一、数据部分如下:
1001 Tmall_01 998
1001 Tmall_06 88.8
1001 Tmall_03 522.8
1002 Tmall_03 522.8
1002 Tmall_04 132.4
1002 Tmall_05 372.4
1003 Tmall_01 998
1003 Tmall_02 8.5
1003 Tmall_04 132.4
需求:
订单id正序,成交金额倒序。
上边id三份,结果按id分三个文件,每个结果文件只要一条最高交易额。
我的想法是map时分区排序,reduce时辅助排序(取每个reduce最大的值)
代码如下
OrderBean
public class OrderBean implements WritableComparable<OrderBean> {
//定义属性
private int order_id;//订单ID
private double price;//价格
public OrderBean() {}
public OrderBean(int order_id, double price) {
this.order_id = order_id;
this.price = price;
}
//序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(order_id);
out.writeDouble(price);
}
//反序列化
@Override
public void readFields(DataInput in) throws IOException {
order_id = in.readInt();
price = in.readDouble();
}
@Override
public String toString() {
return order_id + "\t" + price;
}
//排序 比较ID 再比较价格
public int getOrder_id() {
return order_id;
}
public void setOrder_id(int order_id) {
this.order_id = order_id;
}
public double getPrice() {
return price;
}
public void setPrice(double price) {
this.price = price;
}
@Override
public int compareTo(OrderBean o) {
int rs;
//根据ID排序
if(order_id > o.order_id) {
//id大往下排
rs= 1;
}else if(order_id < o.order_id){
//id小的往上排
rs=-1;
}else {
//id相等 价格高上排
rs = price > o.getPrice() ? -1:1;
}
return rs;
}
}
OrderMapper
public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable> {
protected void map(LongWritable key, Text value,Context context)
throws java.io.IOException ,InterruptedException {
//1.获取每行数据
String line = value.toString();
//2.切分数据
String[] fields = line.split("\t");
//3.取出字段
Integer order_id = Integer.parseInt(fields[0]);
Double price = Double.parseDouble(fields[2]);
OrderBean ob = new OrderBean(order_id , price);
//4.输出
context.write(ob, NullWritable.get());
}
}
OrderPartitioner
/**
*
* @author chengguo
* @version 1.0
*/
public class OrderPartitioner extends Partitioner<OrderBean, NullWritable> {
@Override
public int getPartition(OrderBean key, NullWritable value, int numPartitions) {
return (key.getOrder_id() & Integer.MAX_VALUE) % numPartitions;
}
}
OrderGroupingComparator
//辅助排序类
public class OrderGroupingComparator extends WritableComparator {
protected OrderGroupingComparator() {
super(OrderBean.class,true);
}
//重写比较,已经倒序,直接返回第一个
public int compare(WritableComparable a,WritableComparable b) {
return 0;
}
}
OrderReducer
public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable> {
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
//输出
context.write(key, NullWritable.get());
}
}
OrderDriver
public class OrderDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//1.获取job信息
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//2.获取jar包
job.setJarByClass(OrderDriver.class);
//3.获取mapper与reducer
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class);
//4.定义mapper输出类型
job.setMapOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
//5.定义reducer输出类型
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
//6.设置reducer端的辅助排序
job.setGroupingComparatorClass(OrderGroupingComparator.class);
//7.设置分区
job.setPartitionerClass(OrderPartitioner.class);
//8.设置reduceTask个数
job.setNumReduceTasks(3);
//9.设置数据的输入输出
FileInputFormat.setInputPaths(job, new Path("C:\\BIGDATA\\Test\\in"));
FileOutputFormat.setOutputPath(job, new Path("C:\\BIGDATA\\Test\\out"));
//10.提交文件
boolean rs = job.waitForCompletion(true);
System.out.println( rs ? 0 : 1);
}
}