hive中udf、udtf、udaf函数说明和写法(直接上代码)

hive中几种udf函数代码,由于这个比较容易理解,直接上代码,首先导入maven依赖

       <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>1.1.0</version>
            <scope>provided</scope>
        </dependency>

1、udf函数,实现传入一个字符串,返回其长度


public class TestLength extends UDF {
    public  static int evaluate(String s) {
        return s.length();
    }
}

2、udtf,实现传入一个字符串数组,返回数组中的字符串,结合later view explode调用

public class JsonArrayToString extends GenericUDTF {
    @Override
    public void process(Object[] args) throws HiveException {
        String input = (String) args[0];
        JSONArray json = JSONArray.fromObject(input );
//        JSONArray parse = JSONObject.parseArray(input);
        for (int j = 0; j < json.size(); j++) {
            JSONObject result1JSONObject = json.getJSONObject(j);
            String s = result1JSONObject.toString();
            forward(s);
        }
    }

    @Override
    public void close() throws HiveException {

    }

3、udaf,实现多行数据聚合成为一行,这个函数目前工作中未用到,在此贴一个官方的代码,需要实现时参考即可、主要实现iterate、merge、最后实现terminate即可

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax.MaxStreamingFixedWindow;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;

@Description(name = "min", value = "_FUNC_(expr) - Returns the minimum value of expr")
public class GenericUDAFMin extends AbstractGenericUDAFResolver {

  static final Log LOG = LogFactory.getLog(GenericUDAFMin.class.getName());

  @Override
  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
    throws SemanticException {
    if (parameters.length != 1) {
      throw new UDFArgumentTypeException(parameters.length - 1,
          "Exactly one argument is expected.");
    }
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
    if (!ObjectInspectorUtils.compareSupported(oi)) {
      throw new UDFArgumentTypeException(parameters.length - 1,
          "Cannot support comparison of map<> type or complex type containing map<>.");
    }
    return new GenericUDAFMinEvaluator();
  }

  @UDFType(distinctLike=true)
  public static class GenericUDAFMinEvaluator extends GenericUDAFEvaluator {

    private transient ObjectInspector inputOI;
    private transient ObjectInspector outputOI;

    @Override
    public ObjectInspector init(Mode m, ObjectInspector[] parameters)
        throws HiveException {
      assert (parameters.length == 1);
      super.init(m, parameters);
      inputOI = parameters[0];
      // Copy to Java object because that saves object creation time.
      // Note that on average the number of copies is log(N) so that's not
      // very important.
      outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
          ObjectInspectorCopyOption.JAVA);
      return outputOI;
    }

    /** class for storing the current max value */
    static class MinAgg extends AbstractAggregationBuffer {
      Object o;
    }

    @Override
    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
      MinAgg result = new MinAgg();
      return result;
    }

    @Override
    public void reset(AggregationBuffer agg) throws HiveException {
      MinAgg myagg = (MinAgg) agg;
      myagg.o = null;
    }

    boolean warned = false;

    @Override
    public void iterate(AggregationBuffer agg, Object[] parameters)
        throws HiveException {
      assert (parameters.length == 1);
      merge(agg, parameters[0]);
    }

    @Override
    public Object terminatePartial(AggregationBuffer agg) throws HiveException {
      return terminate(agg);
    }

    @Override
    public void merge(AggregationBuffer agg, Object partial)
        throws HiveException {
      if (partial != null) {
        MinAgg myagg = (MinAgg) agg;
        int r = ObjectInspectorUtils.compare(myagg.o, outputOI, partial, inputOI);
        if (myagg.o == null || r > 0) {
          myagg.o = ObjectInspectorUtils.copyToStandardObject(partial, inputOI,
              ObjectInspectorCopyOption.JAVA);
        }
      }
    }

    @Override
    public Object terminate(AggregationBuffer agg) throws HiveException {
      MinAgg myagg = (MinAgg) agg;
      return myagg.o;
    }

    @Override
    public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) {
      BoundaryDef start = wFrmDef.getStart();
      BoundaryDef end = wFrmDef.getEnd();
      return new MinStreamingFixedWindow(this, start.getAmt(), end.getAmt());
    }

  }

  static class MinStreamingFixedWindow extends MaxStreamingFixedWindow {

    public MinStreamingFixedWindow(GenericUDAFEvaluator wrappedEval,
        int numPreceding, int numFollowing) {
      super(wrappedEval, numPreceding, numFollowing);
    }

    protected ObjectInspector inputOI() {
      return ((GenericUDAFMinEvaluator) wrappedEval).inputOI;
    }

    protected ObjectInspector outputOI() {
      return ((GenericUDAFMinEvaluator) wrappedEval).outputOI;
    }

    protected boolean removeLast(Object in, Object last) {
      return isLess(in, last);
    }

    private boolean isLess(Object in, Object last) {
      if (in == null) {
        return false;
      }
      if (last == null) {
        return true;
      }
      return ObjectInspectorUtils.compare(in, inputOI(), last, outputOI()) < 0;
    }

  }

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值