求亿级记录中搜索次数Top N的搜索词(MapReduce实现)

程序事例:

日志信息: 二手车
1345 二手房 3416 洗衣机 2789 输入: N=2 输出: 二手房 洗衣机

map函数如下:

import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TopNMapper extends Mapper<Object, Text, NullWritable, Text> {
    private TreeMap<IntWritable, Text> tm = new TreeMap<IntWritable, Text>();
    private IntWritable mykey = new IntWritable();
    private Text myvalue = new Text();
    private int N = 10;
    @Override
    protected void map(Object key, Text value,
            Mapper<Object, Text, NullWritable, Text>.Context context)
            throws IOException, InterruptedException {
        String word = value.toString().split("\t")[0];
        int num = Integer.parseInt(value.toString().split("\t")[1]);
        mykey.set(num);
        myvalue.set(word);
        tm.put(mykey, myvalue);
        if (tm.size() > N) tm.remove(tm.firstKey());
    }
    
    @Override
    protected void cleanup(
            Mapper<Object, Text, NullWritable, Text>.Context context)
            throws IOException, InterruptedException {
        for (Map.Entry<IntWritable, Text> entry : tm.entrySet()) {
            Text value = new Text(entry.getKey() + " " + entry.getValue());
            context.write(NullWritable.get(), value);
        }     
    }
}

Reduce函数如下:

import java.io.IOException;
import java.util.TreeMap;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class TopNReducer extends Reducer<NullWritable, Text, NullWritable, Text>{
    private TreeMap<IntWritable, Text> tm = new TreeMap<IntWritable, Text>();
    private IntWritable mykey = new IntWritable();
    private Text myvalue = new Text();
    private int N = 10;
    @Override
    protected void reduce(NullWritable key, Iterable<Text> values,
            Reducer<NullWritable, Text, NullWritable, Text>.Context context)
            throws IOException, InterruptedException {
        for (Text val : values) {
            String[] tmp = val.toString().split(" ");
            mykey.set(Integer.parseInt(tmp[0]));
            myvalue.set(tmp[1]);
            tm.put(mykey, myvalue);
            if (tm.size() > N) tm.remove(tm.firstKey());
        }
        for (Text res : tm.descendingMap().values()) {
            context.write(NullWritable.get(), res);
        }
    }
}

 

转载于:https://www.cnblogs.com/lasclocker/p/4819655.html

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值