需求:将下面数据,按照定单号分组,并按照价格倒序排列
订单号 商品 价格
0000001 Pdt_01 222.8
0000002 Pdt_06 722.4
0000001 Pdt_05 25.8
0000003 Pdt_01 222.8
0000003 Pdt_01 33.8
0000002 Pdt_03 522.8
0000002 Pdt_04 122.4
......
1:创建OrderBean类
package GroupingComparator;
import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
@AllArgsConstructor
@NoArgsConstructor
public class OrderBean implements WritableComparable<OrderBean> {
//订单id号
private int order_id;
//价格
private double price;
public int getOrder_id() {
return order_id;
}
public void setOrder_id(int order_id) {
this.order_id = order_id;
}
public double getPrice() {
return price;
}
public void setPrice(double price) {
this.price = price;
}
@Override
public String toString() {
return order_id + "\t" + price;
}
//序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(order_id);
out.writeDouble(price);
}
//反序列化
@Override
public void readFields(DataInput in) throws IOException {
this.order_id = in.readInt();
this.price = in.readDouble();
}
//二次排序
@Override
public int compareTo(OrderBean o) {
int result;
if(this.order_id > o.getOrder_id()) {
return 1;
}else if (this.order_id < o.getOrder_id()){
return -1;
}else {
//id正序,price倒序
result = this.price > o.getPrice() ? -1 : 1;
}
return result;
}
}
2:创建OrderMap类
package GroupingComparator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
//<id & price>
public class OrderMap extends Mapper<LongWritable, Text,OrderBean, NullWritable> {
OrderBean k = new OrderBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//1:获取一行
String line = value.toString();
//2:截取数据
String[] fields = line.split("\t");
//3:封装数据
k.setOrder_id(Integer.parseInt(fields[0]));
k.setPrice(Double.parseDouble(fields[2]));
//4:输出
context.write(k, NullWritable.get());
}
}
3:创建OrderReducer类
package GroupingComparator;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class OrderReducer extends Reducer<OrderBean, NullWritable,OrderBean, NullWritable> {
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context)
throws IOException,InterruptedException {
context.write(key,NullWritable.get());
}
}
4:创建OrderGroupingComparator类
package GroupingComparator;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class OrderGroupingComparator extends WritableComparator {
public OrderGroupingComparator() {
super(OrderBean.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean = (OrderBean) a;
OrderBean bBean = (OrderBean) b;
int result;
if(aBean.getOrder_id() > bBean.getOrder_id()){
return 1;
}else if(aBean.getOrder_id() < bBean.getOrder_id()){
return -1;
}else {
result = aBean.getPrice()>bBean.getPrice()? -1:1;
}
return result;
}
}
5:创建OrderPartition类
package GroupingComparator;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class OrderPartition extends Partitioner<OrderBean, NullWritable> {
@Override
public int getPartition(OrderBean key, NullWritable value, int i) {
return (key.getOrder_id() & Integer.MAX_VALUE) % i;
}
}
6:创建OrderDriver类,这里同时写了Map和Reduce的压缩,在实际中可以分别选择使用,也可同时使用
package GroupingComparator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class OrderDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
args = new String[]{"E:\\bigdata_code\\GroupingComparator.txt", "E:\\bigdata_code\\out"};
//1.获取配置信息,创建job任务
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//开启Map端的输出压缩,设置压缩方式
conf.setBoolean("mapreduce.map.output.compress",true);
conf.setClass("mapreduce.map.output.compress.codec", BZip2Codec.class, CompressionCodec.class);
//2.设置jar包加载路径
job.setJarByClass(OrderDriver.class);
//3.加载Map类,设置输入输出数据类
job.setMapperClass(OrderMap.class);
job.setMapOutputKeyClass(OrderBean.class);
job.setMapOutputValueClass(NullWritable.class);
//4.设置Reduce类,设置输入输出数据类
job.setReducerClass(OrderReducer.class);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
//5设置reduce端的分组
job.setGroupingComparatorClass(OrderGroupingComparator.class);
//定义分区和设置分区数
job.setPartitionerClass(OrderPartition.class);
job.setNumReduceTasks(3);
//指定数据输入的路径和输出路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//设置reduce压缩,压缩格式
FileOutputFormat.setCompressOutput(job,true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
//提交任务
job.waitForCompletion(true);
}
}