UDTF逻辑代码步骤
类 extends GenericUDTF{
initialize()//初始化
process() //业务处理
close() //关闭资源
}
例子:splToLines():字符串指定分割符切分
select splToLines(“a,b,c”,",");
结果:
a
b
c
代码
package HiveUDF1.udtf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import java.util.ArrayList;
import java.util.List;
/**
* @Author shall潇
* @Date 2021/4/7
* @Description
*/
public class TestUDTF extends GenericUDTF {
@Description(
name = "splToLines",
value = "call to the split character,to split the str to lines",
extended = "select splToLines("a,b,c",","); result is :"+
"a\n"+"b\n"+"c\n"
)
List<String> outlist = new ArrayList();
@Override
public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
List<String> name = new ArrayList<>(); //定义输出列名,为什么是List,因为返回值要求是两个List
List<ObjectInspector> listOis = new ArrayList<>(); //定义输出类型
name.add("values"); //最后的输出列名为value
listOis.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(name,listOis);
}
@Override
public void process(Object[] o) throws HiveException {
String arg = o[0].toString(); //第一个参数:字符串
String splitKey = o[1].toString(); //第二个参数:切分符号
String[] splits = arg.split(splitKey);
for (String split : splits) {
outlist.clear();
outlist.add(split);
forward(outlist);
}
}
@Override
public void close() throws HiveException {
}
}