package oldapi;
import java.io.IOException;
import java.util.ArrayList;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
//利用MapReduce求最大值海量数据中的K个数
public class Top_k_new {
public static class MapClass extends Mapper<LongWritable, Text, NullWritable, IntWritable> {
public static final int K = 5;
private TreeSet<Integer>set=new TreeSet<Integer>();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
int temp = Integer.parseInt(value.toString());
set.add(temp);
if(set.size()>K)
set.remove(set.first());
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
int size=set.size();
for(int i=0;i<size;++i)
{
context.write(NullWritable.get(),new IntWritable(set.first()));
set.remove(set.first());
}
}
}
public static class Reduce extends Reducer<NullWritable, IntWritable, NullWritable, IntWritable> {
public static final int K = 5;
public void reduce(NullWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
for (IntWritable val : values) {
context.write(NullWritable.get(), val);
}
}
}
// 初始化参数
public static final String HOST_PATH = "hdfs://test:9000";
public static final String INPUT_PATH = HOST_PATH + "/number";
public static final String OUTPUT_PATH = HOST_PATH + "/out";
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "TopKNum");
FileInputFormat.setInputPaths(job, INPUT_PATH);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
job.setMapperClass(MapClass.class);
//job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
}
}