之前在博客中分享了个生成从1到n的连续数字的transform或map方法,最近研究了一下UDTF,发现用UDTF写出来的函数用起来更方便,语法更接近于普通的函数。而且在JAVA中对参数的个数、类型等进行校验也更容易。
ORACLE中生成从1到n的连续数字的方法很多,最简单的一种是:
select level from dual connect by level<=5;
----------
使用UDTF写出来的函数进行查询:
hive> select serial(5) as col1 from dual;
OK
1
2
3
4
5
1
2
3
4
5
或者使用lateral view进行查询:
hive> select T.col1 from dual lateral view serial(5) T as col1;
OK
1
2
3
4
5
1
2
3
4
5
提供一下java代码,仅供参考:
package com.hadoopbook.hive;
import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthExcepti on;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspector Factory;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthExcepti
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspector
public class UDTFSerial extends GenericUDTF {
Object[] result = new Object[1];
@Override
public void close() throws HiveException {
}
@Override
public StructObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentLengthExcepti
on("UDTFSerial takes only one argument");
}
if (!args[0].getTypeName().equals("int")) {
throw new UDFArgumentException("UDTFSerial only takes an integer as a parameter");
}
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("col1");
fieldOIs.add(PrimitiveObjectInspector
Factory.javaIntObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectI
nspector(fieldNames,fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
try
{
int n = Integer.parseInt(args[0].toString());
for (int i=0;i<n;i++)
{
result[0] = i+1;
forward(result);
}
}
catch (Exception e) {
throw new HiveException("UDTFSerial has an exception");
}
}
}
}