把上面的文件输出成下面的三个文件,按订单id划分输出每个订单中最贵的商品.
需求分析
Bean对象
package com.buba.mapreduce.order;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class OrderBean implements WritableComparable<OrderBean> {
private String orderId; //订单id
private Double price; //商品价格
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public Double getPrice() {
return price;
}
public void setPrice(Double price) {
this.price = price;
}
public OrderBean() {
}
public OrderBean(String orderId, Double price) {
this.orderId = orderId;
this.price = price;
}
@Override
public String toString() {
return this.orderId+"\t"+this.price;
}
//两次排序
@Override
public int compareTo(OrderBean o) {
//1.按照id号进行排序 0等于 1大于 -1小于
int comResult = this.orderId.compareTo(o.getOrderId());
if(comResult == 0){
//2.按照价格倒叙排序
comResult = this.price > o.getPrice() ? -1 : 1 ;
}
return comResult;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(orderId);
dataOutput.writeDouble(price);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.orderId = dataInput.readUTF();
this.price = dataInput.readDouble();
}
}
mapper代码,把文件输出成下面这种格式
package com.buba.mapreduce.order;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class OrderMapper extends Mapper<LongWritable, Text,OrderBean, NullWritable> {
OrderBean bean = new OrderBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split("\t");
bean.setOrderId(fields[0]);
bean.setPrice(Double.parseDouble(fields[fields.length-1]));
context.write(bean,NullWritable.get());
}
}
按照订单id进行分区
package com.buba.mapreduce.order;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class OrderPartitioner extends Partitioner<OrderBean, NullWritable> {
@Override
public int getPartition(OrderBean orderBean, NullWritable nullWritable, int i) {
//分区规则
return (orderBean.getOrderId().hashCode() & 2147483647) % i;
}
}
reduce阶段
package com.buba.mapreduce.order;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class OrderReducer extends Reducer<OrderBean, NullWritable,OrderBean, NullWritable> {
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
dirver阶段
package com.buba.mapreduce.order;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class OrderDriver {
public static void main(String[] args)throws Exception {
//1.获取job信息
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
//2.获取jar的存储路径
job.setJarByClass(OrderDriver.class);
//3.关联map和reduce的class类
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class);
//4.设置map阶段输出key和value类型
job.setMapOutputKeyClass(OrderBean.class);
job.setMapOutputValueClass(NullWritable.class);
//5.设置最后输入数据的key和value的类型
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
//设置分区
job.setPartitionerClass(OrderPartitioner.class);
job.setNumReduceTasks(3);
//关联groupingComparator
//job.setGroupingComparatorClass(OrderGroupingCompartor.class);
//6.设置输入数据的路径和输出数据的路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//7.提交
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}
没设置分组前,执行完是下面这样的,虽然分区了也排序了,但是没把最大值取出来
编写分组代码,根据订单id判断是否为一组
package com.buba.mapreduce.order;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class OrderGroupingCompartor extends WritableComparator {
//写一个空参构造,告诉父对象比较的是谁
public OrderGroupingCompartor() {
super(OrderBean.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean = (OrderBean)a;
OrderBean bBean = (OrderBean)b;
//只比较id判断是否为一组
return aBean.getOrderId().compareTo(bBean.getOrderId());
}
}