public class TopN {
public static void main(String[] args) throws Exception {
//1.获取job对象
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
String input = "data/top.data";
String output = "out/";
FileUtils.deleteTarget(output, configuration);
//2.获取jar的相关信息
job.setJobName("WordCount");
job.setJarByClass(TopN.class);
//3.设置自定义的Mapper和Reducer
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
//4.Mapper阶段输出的类型
job.setMapOutputKeyClass(MyIntwritable.class);
job.setMapOutputValueClass(Text.class);
//5.Reducer阶段输出的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(MyIntwritable.class);
//6.设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
//7.提交job
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
public static final int TOPN = 3;
/**
* 为什么要自定义IntWritable继承类?
* 因为要实现降序
*/
public static class MyIntwritable extends IntWritable {
public MyIntwritable() {
}
//因为要传入int类型值,必须定义一个有参构造器。
public MyIntwritable(int value){
super(value);
}
//自定义降序,默认升序
@Override
public int compareTo(IntWritable o) {
return -super.compareTo(o);
}
}
public static class MyMapper extends Mapper<LongWritable, Text, MyIntwritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
String product = splits[0];
int price = Integer.parseInt(splits[1]);
context.write(new MyIntwritable(price), new Text(product));
}
}
public static class MyReducer extends Reducer<MyIntwritable, Text, Text, MyIntwritable> {
int index = 0;
@Override
protected void reduce(MyIntwritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text value : values)
{
if (index < TOPN){
context.write(value, key);
}
index++;
}
}
}
}
MapReduce的全局TopN简单实现
最新推荐文章于 2023-02-02 16:29:29 发布