测试数据:
key 1
value 3
aa 4
deng 5
haha 8
tt 8
1、使用TreeMap实现topN
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.TreeMap;
public class TopN {
public static final int K = 3;
// public static class MyIntWritable extends IntWritable {
//
// @Override
// public int compareTo(IntWritable o) {
// return -super.compareTo(o); //重写IntWritable排序方法,默认是升序 ,
// }
// }
public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
TreeMap<Integer, String> map = new TreeMap<>();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// super.map(key, value, context);
String[] arr = value.toString().split(" ");
int score = Integer.parseInt(arr[1]);
map.put(score, arr[0]);
if (map.size() > K) {
map.remove(map.firstKey()); //移除排在最前面的entry
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
// super.cleanup(context);
for (Integer key : map.keySet()) {
context.write(new Text(map.get(key)), new IntWritable(key)); //map执行结束时将k,v写入context
}
}
}
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
TreeMap<Integer, String> map = new TreeMap<>();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// super.reduce(key, values, context);
map.put(values.iterator().next().get(), key.toString()); //排序
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
// super.cleanup(context);
for (Integer key : map.keySet()) {
context.write(new Text(map.get(key)), new IntWritable(key));
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(TopN.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// exit(arg) arg 非0表示jvm异常终止
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
如果有相同大小的值的话,会被覆盖掉。
使用自定义Writer
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.TreeMap;
public class TopN_2 {
public static final int K = 3;
public static class MyIntWritable extends IntWritable {
public MyIntWritable() {
}
public MyIntWritable(int value) {
super(value);
}
@Override
public int compareTo(IntWritable o) {
return -super.compareTo(o); //重写IntWritable排序方法,默认是升序 ,
}
}
public static class MyMapper extends Mapper<LongWritable, Text, MyIntWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// super.map(key, value, context);
String[] arr = value.toString().split(" ");
int score = Integer.parseInt(arr[1]);
context.write(new MyIntWritable(score), new Text(arr[0]));
}
}
public static class MyReducer extends Reducer<MyIntWritable, Text, Text, MyIntWritable> {
int num = 0;
@Override
protected void reduce(MyIntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
// super.reduce(key, values, context);
for (Text text : values) {
if (num < K) {
context.write(text, key);
}
num++;
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// conf.set("mapreduce.framework.name", "local");
//
// conf.set("fs.defaultFS", "file:///");
Job job = Job.getInstance(conf);
// job.setJar("/Users/f7689781/Desktop/MyMapReduce.jar");
job.setJarByClass(TopN_2.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setMapOutputKeyClass(MyIntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(MyIntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileSystem fileSystem = FileSystem.get(conf);
fileSystem.deleteOnExit(new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// exit(arg) arg 非0表示jvm异常终止
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
测试结果:
tt 8
haha 8
deng 5
相同score不会被覆盖