HiveUDF自定义函数

使用步骤

/*
继承UDF类或GenericUDF类
重写evaluate()方法并实现函数逻辑 
打包为jar文件编译 
复制到正确的HDFS路径 
使用jar创建临时/永久函数 
调用函数
*/

UDF函数

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import java.util.Arrays;

/**
* @DATE 2021/4/7
* @Description
*/
@Description(
        name = "arrContains",
        value = "look around a value is one element of an array or not.result is true or false",
        extended = "select arrContains(array('a','b','c'),'c');" +
                "result is true"
)
public class TestGenericUDF extends GenericUDF {//继承GenericUDF类
    ListObjectInspector loi;
    StringObjectInspector valueOi;
    ObjectInspector paramOi;
    boolean result;


    //init方法的主要目的是为了初始化输入类型,进行合法化等业务逻辑判断
    @Override
    public ObjectInspector initialize(ObjectInspector[] ois) throws UDFArgumentException {//初始化输入和输出格式,并判断合法性
        loi = (ListObjectInspector) ois[0];
        valueOi = (StringObjectInspector) ois[1];
        paramOi = loi.getListElementObjectInspector();
        result = false;
        return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
    }

    @Override
    public Object evaluate(DeferredObject[] dos) throws HiveException {
        Object array = dos[0].get();
        Object value = dos[1].get();
        int length = loi.getListLength(array);
        for (int i = 0; i < length; i++) {
            Object element = loi.getListElement(array, i);
            if (value != null && value.equals(element)) {
                result = true;
                break;
            }
        }
        return result;
    }

    @Override
    public String getDisplayString(String[] chis) {
        return "This is arrContains,Hello there!" + Arrays.toString(chis);
    }
}

UDAF 自定义聚合函数

import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.DoubleWritable;
import java.util.ArrayList;
import java.util.List;

/**
* @DATE 2021/4/7
* @Description
*/

public class TestAVG implements GenericUDAFResolver2 {//实现GenericUDAFResolver2接口
    @Override
    public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException {
        ObjectInspector[] paramOis = info.getParameterObjectInspectors();
        return null;
    }

    @Override
    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
        return new GenericUDAFAverage.GenericUDAFAverageEvaluatorDouble();
    }

    public static class AvgTest extends GenericUDAFEvaluator {

        @Override
        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
            AvgAgg agg = new AvgAgg();
            reset(agg);
            return agg;
        }

        @Override
        public void reset(AggregationBuffer agg) throws HiveException {
            AvgAgg a = (AvgAgg) agg;
            a.value = 0.0;
            a.sum = 0.0;
            a.count = 0;
        }

        DoubleObjectInspector ooi;//定义输入格式
        DoubleWritable dw;//定义输出格式

        @Override
        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
            super.init(m, parameters);
            ooi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
            dw = new DoubleWritable(0.0);
            List<String> fieldNames = new ArrayList<>();
            fieldNames.add("sum");
            fieldNames.add("count");

            List<ObjectInspector> structFieldOis = new ArrayList<>();
            structFieldOis.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
            structFieldOis.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
            return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,structFieldOis);
        }

        @Override
        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
            //每一行进一次这个方法
            //只有一列数值列,对其进行累加同时计数
            if (parameters == null) {
                return;
            } else {
                AvgAgg a = (AvgAgg) agg;
                a.sum += (Double) parameters[0];
                a.count++;
            }
        }

        @Override
        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
            return terminate(agg);
        }

        @Override
        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
            System.out.println(partial);
            if (partial instanceof LazyBinaryStruct) {
                LazyBinaryStruct lbs = (LazyBinaryStruct) partial;
                System.out.println(lbs.getFieldsAsList());
                Double sum = (Double) lbs.getField(0);
                Integer count = (Integer) lbs.getField(1);
                AvgAgg a = (AvgAgg) agg;
                a.sum += sum;
                a.count += count;
            }
        }

        @Override
        public Object terminate(AggregationBuffer agg) throws HiveException {
            AvgAgg a = (AvgAgg) agg;
            Double sum = a.sum;
            Integer count = a.count;
            double avg = sum/count;
            return new DoubleWritable(avg);
        }

        @AggregationType(estimable = true)
        static class AvgAgg extends AbstractAggregationBuffer {
            Double sum = 0.0;
            Integer count = 0;
            double value;
            public int estimate() {
                return 16;
            }
        }
    }
}

UDTF 自定义表创建函数

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;


import java.util.ArrayList;
import java.util.List;


/**
* @DATE 2021/4/7
* @Description
*/
@Description(
        name = "split to lines",
        value = "call to the split character,to split the str to lines",
        extended = "select splToLines(array('a','b','c'));result is:"+
                "a\n"+
                "b\n"+
                "c"
)
public class TestUDTF extends GenericUDTF {//继承GenericUDTF类

    StringObjectInspector strOi;
    StringObjectInspector separatorChar;

    @Override
    public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
        List<String> name = new ArrayList<>();
        name.add("values");
        strOi = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
        List<ObjectInspector> listOis = new ArrayList<>();
        listOis.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(name,listOis);
    }

    @Override
    public void process(Object[] args) throws HiveException {
        String str = strOi.getPrimitiveJavaObject(args[0]);//转成字符串
        String separator = separatorChar.getPrimitiveJavaObject(args[1]);//分隔符转成字符串
        String[] lines = str.split(separator);
        for (String line : lines) {
            ArrayList<Object> o = new ArrayList<>();
            o.add(line);
            forward(o);
        }
    }

    @Override
    public void close() throws HiveException {

    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值