package hive;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
/*
表生成函数
*/
public class MyUDTF extends GenericUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[]
argOIs) throws UDFArgumentException {
//定义要输出列的名字的List,并且添加要输出的列名
List<String> structFieldNames = new ArrayList<>();
structFieldNames.add("key");
structFieldNames.add("value");
// 定义要输出列的类型的List,并且添加要输出列的类型
List<ObjectInspector> objectInspectorList = new
ArrayList<>();
Object ingObjectInspector;
objectInspectorList.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectorList.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return
ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, objectInspectorList);
}
/*
核心处理方法
*/
@Override
public void process(Object[] objects) throws HiveException {
// 得到第⼀个参数,转化为字符串,类似于->name:zhang;age:30;address:shenzhen
String insputString = objects[0].toString();
// 把上述例⼦字符串按照分号;切分为数组
String[] split = insputString.split(";");
// s=name:zhang
for (String s : split) {
// 把每个切分后的key value分开
String[] kvArray = s.split(":");
// 如果产⽣多列可以将多个列的值放在⼀个数组中,然后将该数组传⼊到forward()函数。
forward(kvArray);
}
}
//关闭对象
@Override
public void close() throws HiveException {
}
}
参照hive自定义类配置pom.xml