Hive 自定义函数 UDF
当 Hive 提供的内置函数无法满足业务处理需要时,此时就可以考虑使用用户自定义函数
UDF(user-defined function)作用于单个数据行,产生一个数据行作为输出。(数学函数,字符串函数)
UDAF(用户定义聚集函数 User- Defined Aggregation Funcation):接收多个输入数据行,并产生一个输出数据行。(count,max)
UDTF(表格生成函数 User-Defined Table Functions):接收一行输入,输出多行(explode)
简单的 UDF 示例
/**
*Hive 自定义函数 UDF
*@author Lv_Hulk
*/
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
1.自定义NVL函数
@Description(name = "nvl",
value = "_FUNC_(value,default_value) - Returns default value if value"
+" is null else returns value",
extended = "Example:\n"
+ " > SELECT _FUNC_(null,'bla') FROM src LIMIT 1;\n")
/**
* 自定义空值处理函数
*/
public class GenericUDFNvl extends GenericUDF {
private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
private ObjectInspector[] argumentOIs;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
argumentOIs = arguments;
//检查参数个数
if (arguments.length != 2) {
throw new UDFArgumentLengthException(
"The operator 'NVL' accepts 2 arguments.");
}
returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
//检查两个参数类型是否一致
if (!(returnOIResolver.update(arguments[0]) && returnOIResolver
.update(arguments[1]))) {
throw new UDFArgumentTypeException(2,
"The 1st and 2nd args of function NLV should have the same type, "
+ "but they are different: \"" + arguments[0].getTypeName()
+ "\" and \"" + arguments[1].getTypeName() + "\"");
}
return returnOIResolver.get();
}
//第一个参数不为空,返回第一个。否则返回第二个
public Object evaluate(DeferredObject[] arguments) throws HiveException {
Object retVal = returnOIResolver.convertIfNecessary(arguments[0].get(),
argumentOIs[0]);
if (retVal == null ){
retVal = returnOIResolver.convertIfNecessary(arguments[1].get(),
argumentOIs[1]);
}
return retVal;
}
//用于hadoop显示帮助
public String getDisplayString(String[] children) {
StringBuilder sb = new StringBuilder();
sb.append("if ");
sb.append(children[0]);
sb.append(" is null ");
sb.append("returns");
sb.append(children[1]);
return sb.toString() ;
}
}
2.打包
$hive> ADD JAR /path/to/jar.jar;
3.添加到hive类路径
$hive> CREATE TEMPORARY FUNCTION nvl AS ‘ts.demo.hive.GenericUDFNvl’;
4.使用
$hive> select nvl( NULL, 5 );
$hive> select nvl( 1, 5 )
2. 自定义日期函数
1.将日期转成字符串进行输出
@Description(name = "ToChar",
value = "使用方式如下:toChar()",
extended = "toChar_xxxx-ext")
public class ToCharUDF extends GenericUDF {
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
return null;
}
public Object evaluate(DeferredObject[] args) throws HiveException {
//有参数
if(args != null && args.length != 0){
//指定日志对象的格式化串
if(args.length == 1){
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
return sdf.format((Date)(args[0].get()));
}
//两个参数,Date date,String frt
else{
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern((String)args[1].get());
return sdf.format(args[0].get());
}
}
//无参,返回系统时间的格式化串
else{
Date date = new Date();
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
return sdf.format(date);
}
}
public String getDisplayString(String[] children) {
return "toChar_xxx";
}
}
2.将字符串转成日期进行输出
@Description(name = "ToDate",
value = "toDate()",
extended = "toDate_xxxx-ext")
public class ToDateUDF extends GenericUDF {
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
return null;
}
public Object evaluate(DeferredObject[] args) throws HiveException {
//有参数
if(args != null && args.length != 0){
//指定日志对象的格式化串
if(args.length == 1){
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
try {
return sdf.parse((String)(args[0].get()));
} catch (ParseException e) {
e.printStackTrace();
}
}
//两个参数,Date date,String frt
else{
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern((String)args[1].get());
try {
return sdf.parse((String)args[0].get());
} catch (ParseException e) {
e.printStackTrace();
}
}
}
//无参,返回系统时间对象
else{
return new Date();
}
return null ;
}
public String getDisplayString(String[] children) {
return "toChar_xxx";
}
}
3.打包
$hive> ADD JAR /path/to/jar.jar;
4.添加到hive类路径
$hive> CREATE TEMPORARY FUNCTION nvl AS ‘ts.demo.hive.***’;
5.使用
$hive> select ***;
$hive> select ***;