1、自定义函数概念
a、UDF(User-Defined-Function)
一进一出
b、UDAF(User-Defined Aggregation Function)
多进一出
c、UDTF(User-Defined Table-Generating Functions)
一进多出——lateral view explode()
2、案例
需求
编写UDF函数,返回输入字符串的长度
代码
父模块添加依赖
<!--子模块都会公用的模块-->
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
子模块编写UDF函数
package com.atguigu.udf;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class MyUDF extends GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
//校验参数个数
if(arguments.length != 1){
throw new UDFArgumentException("参数个数不为1");
}
//返回值是int【字符串长度】
return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
//取出输入参数
String input = arguments[0].toString();
if(input == null){
return 0;
}
//返回输入数据长度
return input.length();
}
//处理执行计划
@Override
public String getDisplayString(String[] children) {
return null;
}
}
打包放到服务器上
添加jar包
add jar /opt/module/hive/lib/hive-demo-1.0-SNAPSHOT.jar
create temporary function my_len2 as "com.atguigu.udf.MyUDF";
使用自定义函数
select my_len2("12345")
结果