package com.cc.hiveudf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@Description(name="nvl",value="_FUNC_(value,defaultValue) -Returns default value if value is null else return value",extended="select _FUNC_(null,val) frm src limit 1;")
public class UdfT1 extends GenericUDF{
private GenericUDFUtils.ReturnObjectInspectorResolver returnObjectInspactorResolver; //通过获取非null值的变量的类型,并通过这个数值,确定返回值的类型
private ObjectInspector[] argumentOIs;
@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
//returnObjectInspactorResolver 用户从 DeferredObject 对象数组中获取值,并返回
Object returnVal = returnObjectInspactorResolver.convertIfNecessary(arg0[0], argumentOIs[0]);
if(returnVal == null)
{
returnVal = returnObjectInspactorResolver.convertIfNecessary(arg0[1], argumentOIs[1]);
}
return returnVal;
}
//在hadoop task 内部显示调试信息
@Override
public String getDisplayString(String[] arg0) {
StringBuilder sb = new StringBuilder();
sb.append("if");
sb.append(arg0[0]);
sb.append("is null returns"+arg0[1]);
return sb.toString();
}
/*
* (non-Javadoc)
* @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF#initialize(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector[])
* 会被输入的每个参数调用,目标是确定参数的返回类型
*/
@Override
public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException {
argumentOIs = arg0;
if(arg0.length != 2)
{
throw new UDFArgumentLengthException("The operate 'nvl' accept 2 arguments!");
}
returnObjectInspactorResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
if(!(returnObjectInspactorResolver.update(argumentOIs[0]) && returnObjectInspactorResolver.update(argumentOIs[1])))
{
throw new UDFArgumentTypeException(2, "1st and 2nd args of func NVL shoud have same type but 1st type"+argumentOIs[0].getTypeName()+" 2nd type:"+argumentOIs[1].getTypeName());
}
return returnObjectInspactorResolver.get();
}
}
添加jar包到classpath
hive> add jar /home/hadoop/funNvl.jar;
hive> create temporary function nvl as 'org.apache.hadoop.hive.ql.udf.generic.UdfT1';
hive> select nvl(null,1) from china limit 1;
OK
1
Time taken: 1.578 seconds, Fetched: 1 row(s)
hive> select nvl(1,"nihao") from china limit 1;
OK
1
Time taken: 0.112 seconds, Fetched: 1 row(s)
包名似乎需要固定写成这个:org.apache.hadoop.hive.ql.udf.generic