此次map设计引入了一个自定义的list容器,使map输出时仅输出前几名即可。
直接程序代码(引言可参考上一篇):
package test;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class TopK2 {
//改进型map
public static class Map extends Mapper<Object, Text, MyKey, NullWritable>{
private static MyList list = null;
//初始化list,使用配置容量
protected void setup(Context context)
throws IOException ,InterruptedException {
list = new MyList(Integer.parseInt(context.getConfiguration().get("top_num")));
};
protected void map(Object key, Text value, Context context)
throws java.io.IOException ,InterruptedException {
try {
list.add(Integer.parseInt(value.toString()));
} catch (Exception e) {
// TODO: handle exception
return ;
}
};
//Map任务结束时执行
protected void cleanup(Context context)
throws IOException ,InterruptedException {
for (Integer item : list) {
context.write(new MyKey(item), NullWritable.get());
}
list.clear();
};
}
public static class Reduce extends Reducer<MyKey, NullWritable, Text, NullWritable>{
private static Text k = new Text();
private static MyList list = null;
//初始化list,使用配置容量
protected void setup(Context context)
throws IOException ,InterruptedException {
list = new MyList(Integer.parseInt(context.getConfiguration().get("top_num")));
};
protected void reduce(MyKey key, Iterable<NullWritable> values, Context context)
throws IOException ,InterruptedException {
//所得到的key是降序输出的,因为是自定义的key
try {
list.add(key.getNum());
} catch (Exception e) {
// TODO: handle exception
return ;
}
};
protected void cleanup(Context context)
throws IOException ,InterruptedException {
for (int i=0; i<list.size(); i++) {
k.set(list.get(i)+"\t"+(i+1));
context.write(k, NullWritable.get());
}
list.clear();
};
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 3){
System.err.println("Usage:TopK");
System.exit(2);
}
//参数3 为要获取的最大个数
conf.set("top_num", args[2]);
Job job = new Job(conf, "TopK2");
job.setJarByClass(TopK2.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(MyKey.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
private static class MyKey implements WritableComparable<MyKey>{
private int num;
public int getNum() {
return num;
}
public MyKey() {
}
public MyKey(int num) {
super();
this.num = num;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
num = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeInt(num);
}
@Override
public int compareTo(MyKey o) {
// TODO Auto-generated method stub
//反序输出
return o.num - this.num;
}
}
private static class MyList extends ArrayList<Integer>{
//默认容量为5
private int cont = 5;
public MyList(int num){
super();
this.cont = num;
}
public void add(int value){
//添加前判断,如果<cont 直接添加,不用判断
if(super.size() < cont){
super.add(value);
}else{
//此处还可以进行优化,可以采用动态链表的形式
Collections.sort(this);
if(value > this.get(0)){
this.set(0, value);
}
}
}
}
}
计算结果:
不写了,一样。