package comxxx.hive;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import java.text.DecimalFormat;
import java.util.*;
/**
* 1.Writing the resolver -- 负责解析函数的元数据,函数传入的参数的类型检查。函数返回值的说明等
* 2.Writing the evaluator --负责计算
* 2.1getNewAggregationBuffer
* 2.2iterate
* 2.3terminatePartial
* 2.4merge
* 2.5terminate
*
* 3.UDAF的运行原理:
* ①在group by 分组后运行
* ②运行的范围是分组的一组内
* ③依次对组中的每一行进行计算,最终得到一行结果
* 4.函数如何用? --分组后直接调用函数,传入spu_name
* select
* coupon_id,myudaf(spu_name)
* from test6
* group by coupon_id
*/
public class MyUDAF extends Abstr
离线数仓项目:自定义UDAF函数
最新推荐文章于 2024-04-26 15:25:50 发布