用户自定义函数
1 UDF函数的创建 一进一出
1 添加maven依赖
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
2 实现类
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class MyStringLength extends GenericUDF {
public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
if (objectInspectors.length != 1)
throw new UDFArgumentLengthException("Input Args Length Error");
if (!objectInspectors[0].getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
throw new UDFArgumentTypeException(0, "Input Args Type Error !");
}
return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
}
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
if(deferredObjects[0].get() == null)
return 0;
return deferredObjects[0].get().toString().length();
}
public String getDisplayString(String[] strings) {
return "";
}
}
3 打jar包上传到服务器上
rz xxx
4 添加jar包
add jar /opt/module/data/udf.jar;
5 创建函数关联jar包中的函数实现类
create temporary function my_len as "MyStringLength"
6 使用自定义函数
2 UDTF函数的创建使用 一进多出
1 实现代码
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
public class MyUDTF extends GenericUDTF {
private ArrayList<String> outList = new ArrayList<String>();
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
List<String> fieldNames = new ArrayList();
List<ObjectInspector> filedOIs = new ArrayList();
fieldNames.add("lineToWord");
filedOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,filedOIs);
}
public void process(Object[] objects) throws HiveException {
String arg = objects[0].toString();
String splitKey = objects[1].toString();
String[] fields = arg.split(splitKey);
for (String field : fields) {
outList.clear();
outList.add(field);
forward(outList);
}
}
public void close() throws HiveException {
}
}
2 打包上传服务器
3 添加jar包
add jar /opt/module/data/udtf.jar
4 创建函数关联jar包中的函数实现类
create function my_udtf as "MyUDTF";
5 使用自定义UDTF函数