1、继承GenericUDTF
2、实现:
initizliza() :调一次
process ():一行数据,调一次
close():调一次
过程
package udtf;//udtf.MyUDTFimport org.apache.hadoop.hive.ql.exec.UDFArgumentException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;import java.util.ArrayList;import java.util.List;//publicclassMyUDTFextendsGenericUDTF{private List<String> dataList =newArrayList<String>();// 定义输出数据的列名和数据类型@Overridepublic StructObjectInspector initialize(StructObjectInspector argOIs)throws UDFArgumentException {//定义输出数据的列名
List<String> fieldNames =newArrayList<String>();
fieldNames.add("word");//定义输出数据的类型 函数的放回值
List<ObjectInspector> fieldOIs =newArrayList<ObjectInspector>();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);}/**
* Give a set of arguments for the UDTF to process.
*
* @param args object array of arguments
*/publicvoidprocess(Object[] args)throws HiveException {//1、获取数据
String data = args[0].toString();//2、获取分隔符
String splitKey=args[1].toString();//3、切分数据
String[] words = data.split(splitKey);//4、遍历写出for(String word : words){//5、将数据放置集合
dataList.clear();
dataList.add(word);//6、写出数据forward(dataList);}}/**
* Called to notify the UDTF that there are no more rows to process.
* Clean up code or additional forward() calls can be made here.
*/publicvoidclose()throws HiveException {}}