【Hadoop】51-MapReduce找出Top N

public class Item implements Comparable<Item>{

    private Long count;

    private String content;

    public Item() {
    }

    public Item(Long count, String content) {
        this.count = count;
        this.content = content;
    }

    public Long getCount() {
        return count;
    }

    public void setCount(Long count) {
        this.count = count;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    @Override
    public int compareTo(Item o) {
        return Long.compare(count, o.getCount());
    }
}
public class TopNMapper extends Mapper<Object, Text, NullWritable, Text> {

    private int N;
    private PriorityQueue<Item> priorityQueue = new PriorityQueue<>();

    private static Logger LOGGER = Logger.getLogger(TopNMapper.class);

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        this.N = context.getConfiguration().getInt("N", 5);
    }

    @Override
    public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
        String[] words = value.toString().split("\t");
        if (words.length < 2) {
            return;
        }
        LOGGER.info("Debug: Map get text = [" + value.toString() + "]");
        Long count = Long.parseLong(words[1]);
        Item item = new Item(count, value.toString());
        if (priorityQueue.size() < N || count > priorityQueue.peek().getCount()) {
            priorityQueue.offer(item);
        }
        if (priorityQueue.size() > N) {
            priorityQueue.poll();
        }
    }
    @Override
    public void cleanup(Context context) throws IOException, InterruptedException {
        for (Item item : priorityQueue) {
            LOGGER.info("Debug: Map write to context = [" + item.getContent().toString() + "]");
            context.write(NullWritable.get(), new Text(item.getContent()));
        }
    }

    public static void main(String[] args) {

    }

}
public class TopNReducer extends Reducer<NullWritable, Text, NullWritable, Text> {

    private int N;

    private PriorityQueue<Item> priorityQueue = new PriorityQueue();

    private static Logger LOGGER = Logger.getLogger(TopNReducer.class);

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        this.N = context.getConfiguration().getInt("N", 5);
    }

    @Override
    protected void reduce(NullWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        for (Text value : values) {
            LOGGER.info("Debug: text = [" + value.toString() + "]");
            String[] words = value.toString().split("\t");
            Long count = Long.parseLong(words[1]);
            Item item = new Item(count, value.toString());
            if (priorityQueue.size() < N || count > priorityQueue.peek().getCount()) {
                priorityQueue.offer(item);
            }
            if (priorityQueue.size() > N) {
                priorityQueue.poll();
            }
        }
        for (Item topN : priorityQueue) {
            context.write(NullWritable.get(), new Text(topN.getContent()));
        }
    }

}
public class TopNDriver extends Configured implements Tool {

    private static Logger LOGGER = Logger.getLogger(TopNDriver.class);

    public int run(String[] args) throws Exception {
        Job job = new Job(getConf());

        job.setJarByClass(TopNDriver.class);

        int N = Integer.parseInt(args[0]); // top N
        job.getConfiguration().setInt("N", N);
        job.setJobName("TopNDriver");

        job.setMapperClass(TopNMapper.class);
        job.setReducerClass(TopNReducer.class);
        job.setNumReduceTasks(1);

        // map() output (K,V)
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        // reduce() output (K,V)
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);

        // args[1] = input directory
        // args[2] = output directory
        FileInputFormat.setInputPaths(job, new Path(args[1]));
        FileOutputFormat.setOutputPath(job, new Path(args[2]));

        boolean status = job.waitForCompletion(true);
        LOGGER.info("run(): status="+status);
        return status ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
        if (args.length != 3) {
            LOGGER.warn("usage TopNDriver <N> <input> <output>");
            System.exit(1);
        }

        LOGGER.info("N=" + args[0]);
        LOGGER.info("inputDir=" + args[1]);
        LOGGER.info("outputDir=" + args[2]);
        int returnStatus = ToolRunner.run(new TopNDriver(), args);
        System.exit(returnStatus);
    }

}

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值