需求:
在输入文件中,求出每一个订单Id花费金额的最大值!!
输入文件:GoodsMaxID.txt
订单ID 商品ID 金额
Order_0000001 Pdt_01 222.8
Order_0000001 Pdt_05 25.8
Order_0000002 Pdt_05 325.8
Order_0000002 Pdt_03 522.8
Order_0000002 Pdt_04 122.4
Order_0000003 Pdt_01 222.8
Order_0000003 Pdt_01 322.8
输出结果:
Order_0000001 Pdt_01 222.8
Order_0000002 Pdt_03 522.8
Order_0000003 Pdt_01 322.8
分析: 主要考察两个知识点:
1.自定义Writable,实现compareTo;
2.分组:
我们可以利用分组,来帮助我们求出它的最大金额数,
什么是分组?即: 相同的key值就在一个组。进而,我们可以间接的去排序我们想要的字段。
也就是说,订单ID一样,然后compareTo 比较cost,最终return一个值,它的大小决定是升序还是降序。
接下来进行代码阶段:
1.先写Mapper.class
public class GoodsMapper extends Mapper<LongWritable, Text,TextGoods , NullWritable> {
private Text orderId=new Text();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer line=new StringTokenizer(value.toString());
while(line.hasMoreTokens()){ //将每行的内容write
String _orderId=line.nextToken();
String _goodsId=line.nextToken();
String _cost=line.nextToken();
context.write(new //封装一个自定义的Writable
TextGoods(_orderId,_goodsId,Float.parseFloat(_cost)),NullWritable.get());
}
}
}
2.自定义Writable, TextGoods.class
public class TextGoods implements WritableComparable<TextGoods>{
private String orderId;//订单编号
private String goodsId;//商品编号
private float cost;//金额
public TextGoods(){
setOrderId(orderId);
setGoodsId(goodsId);
setCost(cost);
}
public TextGoods(String orderId,String goodsId,float cost){//构造函数
this.orderId=orderId;
this.goodsId=goodsId;
this.cost=cost;
}
@Override
public void write(DataOutput out) throws IOException {//***
out.writeUTF(orderId); //序列化
out.writeUTF(goodsId);
out.writeFloat(cost);
}
@Override
public void readFields(DataInput in) throws IOException {//***
orderId=in.readUTF(); //序列化
goodsId=in.readUTF();
cost=in.readFloat();
}
@Override
public int compareTo(TextGoods o) { //*****
int tem=orderId.compareTo(o.orderId);
if(tem ==0){ //如果订单ID相同的话,就 进行cost的比较
if(cost>o.getCost()){
return -1;// -1:降序 1:升序
}
return 1;
}
return tem;
}
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public String getGoodsId() {
return goodsId;
}
public void setGoodsId(String goodsId) {
this.goodsId = goodsId;
}
public float getCost() {
return cost;
}
public void setCost(float cost) {
this.cost = cost;
}
@Override
public String toString() {
return orderId+"\t"+goodsId+"\t"+cost;
}
}
3.自定义分组,继承WritableComparator
public class GoodsGrouping extends WritableComparator {
public GoodsGrouping() {
super(TextGoods.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
TextGoods acc1 = (TextGoods)a;
TextGoods acc2 = (TextGoods)b;
return acc1.getOrderId().compareTo(acc2.getOrderId());
//以订单ID 来决定分组依据
}
}
4.Reducer阶段:直接输出即可,之前已经分组排过序了
public class GoodsReducer extends Reducer<TextGoods, NullWritable,TextGoods, NullWritable> {
private String goodsId;
protected void reduce(TextGoods key, Iterable<NullWritable> values, Context context)
throws IOException, InterruptedException {
for(NullWritable v2:values){
// 因为是按升序,所以write第一个就得到最大值
context.write(key,NullWritable.get());
break;//默认是升序
}
}
}
5.测试阶段,主类Driver.class
public class GoodsDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("mapred.job.queue.name", "order");
Path outfile = new Path("file:///D:/输出结果/goodsmax");
//Path outfile=new Path("/MaxID/out");
FileSystem fs = outfile.getFileSystem(conf);
if(fs.exists(outfile)){
fs.delete(outfile,true);
}
Job job = Job.getInstance(conf);
job.setJarByClass(GoodsDriver.class);
job.setJobName("Sencondary Sort");
job.setMapperClass(GoodsMapper.class);
job.setReducerClass(GoodsReducer.class);
job.setOutputKeyClass(TextGoods.class);
job.setOutputValueClass(NullWritable.class);
job.setGroupingComparatorClass(GoodsGrouping.class);
FileInputFormat.addInputPath(job, new Path("file:///D:/测试数据/GoodsMax.txt"));
FileOutputFormat.setOutputPath(job,outfile);
System.exit(job.waitForCompletion(true)?0:1);
}
}
然后运行即可!!