第一次写UDAF,拿中位数来练手。
看下中位数定义:
MEDIAN 中位数(一组数据按从小到大的顺序依次排列,处在中间位置的一个数或最中间两个数据的平均数)
写成genericUDAF的形式
1 2 3 4 中位数 2+3/2=2.5
1 2 3 中位数 2
附上代码:
package org.apache.hadoop.hive.ql.udf.generic;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.util.StringUtils;
@Description(name="median",value=""
+ "_FUNC_(x) return the median number of a number array. eg: median(x)")
public class GenericUDAFMedian extends AbstractGenericUDAFResolver {
static final Log LOG = LogFactory.getLog(GenericUDAFMedian.class.getName());
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
if(parameters.length != 1) {
throw new UDFArgumentTypeException(parameters.length-1, "Only 1 parameter is accepted!");
}
ObjectInspector objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
if(!ObjectInspectorUtils.compareSupported(objectInspector)) {
throw new UDFArgumentTypeException(parameters.length - 1, "Cannot support comparison of map<> type or complex type containing map<>.");
}
switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
return new GenericUDAFMedianEvaluatorInt();
case LONG:
return new GenericUDAFMedianEvaluatorLong();
case FLOAT:
case DOUBLE:
return new GenericUDAFMedianEvaluatorDouble();
case STRING:
case BOOLEAN:
default:
throw new UDFArgumentTypeException(0,
"Only numeric type(int long double) arguments are accepted but "
+ parameters[0].getTypeName() + " was passed as parameter of index->1.");
}
}
public static class GenericUDAFMedianEvaluatorInt extends GenericUDAFEvaluator {
private DoubleWritable result = new DoubleWritable() ;
PrimitiveObjectInspector inputOI;
StructObjectInspector structOI;
StandardListObjectInspector listOI;
StructField listField;
Object[] partialResult;
ListObjectInspector listFieldOI;
@Override
public ObjectIns