hive udaf_Hive自定义函数UDAF开发 | 学步园

package hive.udaf;

import org.apache.hadoop.hive.ql.exec.UDAF;

import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;

/**

*

* It should be very easy to follow and can be used as an example for writing

* new UDAFs.

*

* Note that Hive internally uses a different mechanism (called GenericUDAF) to

* implement built-in aggregation functions, which are harder to program but

* more efficient.

*

*/

public final class ActionCount extends UDAF {

/**

* The internal state of an aggregation for average.

*

* Note that this is only needed if the internal state cannot be represented

* by a primitive.

*

* The internal state can also contains fields with types like

* ArrayList and HashMap if needed.

*/

public static class UDAFState {

private long mCount;

private long mSum;

}

/**

* The actual class for doing the aggregation. Hive will automatically look

* for all internal classes of the UDAF that implements UDAFEvaluator.

*/

public static class UDAFExampleAvgEvaluator implements UDAFEvaluator {

UDAFState state;

public UDAFExampleAvgEvaluator() {

super();

state = new UDAFState();

init();

}

/**

* Reset the state of the aggregation.

*/

public void init() {

state.mSum = 0;

state.mCount = 0;

}

/**

* Iterate through one row of original data.

*

* The number and type of arguments need to the same as we call this UDAF

* from Hive command line.

*

* This function should always return true.

*/

public boolean iterate(String act_code,long act_times,String act_type) // 来了一行

{

if (act_code .equals(act_type))

{

state.mSum += act_times;

state.mCount++;

}

return true;

}

/**

* Terminate a partial aggregation and return the state. If the state is a

* primitive, just return primitive Java classes like Integer or String.

*/

public UDAFState terminatePartial() {//状态传递

// This is SQL standard - average of zero items should be null.

return state.mCount == 0 ? null : state;

}

/**

* Merge with a partial aggregation.

*

* This function should always have a single argument which has the same

* type as the return value of terminatePartial().

*/

public boolean merge(UDAFState o) {//子任务合并

if (o != null) {

state.mSum += o.mSum;

state.mCount += o.mCount;

}

return true;

}

/**

* Terminates the aggregation and return the final result.

*/

public long terminate() {//返回最终结果

// This is SQL standard - average of zero items should be null.

return state.mCount == 0 ? 0 : state.mSum;

}

}

private ActionCount() {

// prevent instantiation

}

}

关键还是要深刻理解map-reduce工作模型,才能更好驾驭hive。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值