导入依赖
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
代码部分
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class UDFTest01 extends GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
if (objectInspectors.length!=1){
throw new UDFArgumentException("参数个数不为一");
}
return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
String input = deferredObjects[0].get().toString();
if (input==null){
return 0;
}
return input.length();
}
@Override
public String getDisplayString(String[] strings) {
return "";
}
}
创建函数
- 将jar包传入集群
- 打开hive并且导入jar包(如果将jar包直接放入hive的lib目录的话,这一步可忽略)
add jar jar_path
- 在hive中创建函数
create function function_name as 'class_path'
function_name是为你自定义的函数取名,class_path是class文件的路径 - 如果不想使用可以删除函数
drop function function_name