先自定义一个UDAF,由于udaf是多输入一条输出的聚合,所以结果拼成字符串输出,代码如下:
public class Top4GroupBy extends UDAF {
//定义一个对象用于存储数据
public static class State {
private Map<Text, IntWritable> counts;
private int limit;
}
/**
* 累加数据,判断map的key中是否存在该字符串,如果存在累加,不存在放入map中
* @param s
* @param o
* @param i
*/
private static void increment(State s, Text o, int i) {
if (s.counts == null) {
s.counts = new HashMap<Text, IntWritable>();
}
IntWritable count = s.counts.get(o);
if (count == null) {
Text key = new Text();
key.set(o);
s.counts.put(key, new IntWritable(i));
} else {
count.set(count.get() + i);
}
}
public static class Top4GroupByEvaluator implements UDAFEvaluator {
private final State state;