导入依赖
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
代码部分
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
public class UDTFTest01 extends GenericUDTF {
private ArrayList<String> list = new ArrayList<>();
@Override
public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
List<String> names = new ArrayList<>();
names.add("word");
List<ObjectInspector> fieldOIs = new ArrayList<>();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
StructObjectInspector outputOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, fieldOIs);
return outputOI;
}
@Override
public void process(Object[] objects) throws HiveException {
String input = objects[0].toString();
String string = objects[1].toString();
String[] word = input.split(string);
for (int i = 0; i < word.length; i++) {
list.clear();
list.add(word[i]);
forward(list);
}
}
@Override
public void close() throws HiveException {
}
}
创建函数
- 将jar包传入集群
- 打开hive并且导入jar包(如果将jar包直接放入hive的lib目录的话,这一步可忽略)
add jar jar_path
- 在hive中创建函数
create function function_name as 'class_path'
function_name是为你自定义的函数取名,class_path是class文件的路径 - 如果不想使用可以删除函数
drop function function_name