Hadoop_MapReduce_topN示例
倒序输出测试数据中的5个最大的数字
测试文件1:D:\data\topN\topN.txt
3 9 3 7 5 6 2 85
4 5 101 1 6
1
1 0 2 82 5 90
测试文件2:D:\data\topN\topN2.txt
3 93 3 7 100
4 5 1 6 10
1 80
1
99
- TopNMapper
package com.blu.topN;
import java.io.IOException;
import java.util.TreeMap;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TopNMapper extends Mapper<LongWritable, Text, NullWritable, IntWritable>{
//TreeMap默认对key升序排序
private TreeMap<Integer, String> treemap = new TreeMap<Integer, String>();
private IntWritable iw = new IntWritable();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, NullWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
String val = value.toString();
String[] vals = val.split(" ");
for(String v : vals) {
treemap.put(Integer.parseInt(v), v);
if(treemap.size()>5) {
//如果treemap长度大于5,就把第一个key删掉
treemap.remove(treemap.firstKey());
}
}
}
@Override
protected void cleanup(Mapper<LongWritable, Text, NullWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
for(Integer i : treemap.keySet()) {
iw.set(i);
context.write(NullWritable.get(), iw);
}
}
}
- TopNReduce
package com.blu.topN;
import java.io.IOException;
import java.util.Comparator;
import java.util.TreeMap;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class TopNReduce extends Reducer<NullWritable, IntWritable, NullWritable, IntWritable>{
private IntWritable iw = new IntWritable();
//倒序输出
private TreeMap<Integer, String> treemap = new TreeMap<Integer, String>(new Comparator<Integer>() {
public int compare(Integer o1, Integer o2) {
return o2-o1;
}
});
@Override
protected void reduce(NullWritable key, Iterable<IntWritable> value,
Reducer<NullWritable, IntWritable, NullWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
for(IntWritable iw : value) {
treemap.put(iw.get(), NullWritable.get()+" ");
if(treemap.size()>5) {
treemap.remove(treemap.lastKey());
}
}
}
@Override
protected void cleanup(Reducer<NullWritable, IntWritable, NullWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
for(Integer i : treemap.keySet()) {
iw.set(i);
context.write(NullWritable.get(), iw);
}
}
}
- TopNJob
package com.blu.topN;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TopNJob {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance();
job.setJarByClass(TopNJob.class);
job.setMapperClass(TopNMapper.class);
job.setReducerClass(TopNReduce.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean flag = job.waitForCompletion(true);
System.exit(flag ? 0 : 1);
}
}
- 运行参数:
D:\data\topN\ D:\data\output
- 运行结果:
101
100
99
93
90