MR实现TOPN
需求:
求一个文件中金额前三的数据
分析:
首先实现序列化,重写cpmpareto方法,再根据TOPN再reduce中实现
数据格式:
1AA,5
2BB,10
3CC,20
4DD,20
5EE,15
1package com.ruozedata.bigdata.hadoop.mapreduce.TopN;
2
3import com.ruozedata.bigdata.hadoop.utils.FileUtils;
4import org.apache.hadoop.conf.Configuration;
5import org.apache.hadoop.fs.Path;
6import org.apache.hadoop.io.IntWritable;
7import org.apache.hadoop.io.LongWritable;
8import org.apache.hadoop.io.Text;
9import org.apache.hadoop.mapreduce.Job;
10import org.apache.hadoop.mapreduce.Mapper;
11import org.apache.hadoop.mapreduce.Reducer;
12import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
13import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
14
15import java.io.IOException;
16import java.util.TreeMap;
17
18public class TopNAppV2 {
19
20 public static void main(String[] args) throws Exception{
21 //获取job对象
22 Configuration configuration = new Configuration();
23 Job job = Job.getInstance(configuration);
24
25 String input="data/top.data";
26 String output="out/";
27 FileUtils.deleteTarget(output,configuration);
28
29 //设置jar相关信息
30 job.setJarByClass(TopNAppV2.class);
31
32 //设置自定义的mapper跟Reducer
33 job.setMapperClass(MyMapper.class);
34 job.setReducerClass(MyReducer.class);
35
36 //设置mapper阶段输出的key跟value类型
37 job.setMapOutputKeyClass(RuozedataInt.class);
38 job.setMapOutputValueClass(Text.class);
39
40 //设置reducer阶段输出的key跟vlaue类型
41 job.setOutputKeyClass(Text.class);
42 job.setOutputValueClass(RuozedataInt.class);
43
44 //设置输入输出路径
45 FileInputFormat.setInputPaths(job,new Path(input));
46 FileOutputFormat.setOutputPath(job,new Path(output));
47
48 //提交job
49 boolean result = job.waitForCompletion(true);
50 System.exit(result?0:1);
51 }
52
53 public static final int topn=3;
54
55 public static class RuozedataInt extends IntWritable{
56 public RuozedataInt(){}
57 public RuozedataInt(int vlaue){
58 super(vlaue);
59 }
60
61 @Override
62 public int compareTo(IntWritable o) {
63 return -super.compareTo(o);
64 }
65 }
66
67 public static class MyMapper extends Mapper<LongWritable, Text,RuozedataInt,Text >{
68 @Override
69 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
70 String[] splits = value.toString().split(",");
71 int price = Integer.parseInt(splits[1]);
72 String produce = splits[0];
73 context.write(new RuozedataInt(price),new Text(produce));
74 }
75 }
76 public static class MyReducer extends Reducer<RuozedataInt,Text,Text,RuozedataInt>{
77 int index = 0;
78 @Override
79 protected void reduce(RuozedataInt key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
80
81 for(Text vlaue:values){
82 if (index < topn){
83 context.write(vlaue,key);
84 }
85 index ++;
86 }
87 }
88 }
89}
识别
下方二
维码
,即可关注公众号获取最新大厂技术