hive自定义函数

  1. UDF
    UDF是一种常见的自定义函数,用于对单个输入参数进行计算。以下是一个求平方的UDF例子:
package com.example.hive.udf;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;

@Description(name = "square", value = "_FUNC_(x) - Returns the square of x",
        extended = "Example:\n > SELECT square(3) FROM src LIMIT 1;\n 9")
public class Square extends UDF {
    public DoubleWritable evaluate(final DoubleWritable n) {
        if (n == null) return null;
        return new DoubleWritable(n.get() * n.get());
    }

    public DoubleWritable evaluate(final Text str) {
        if (str == null) return null;
        try {
            return new DoubleWritable(Double.parseDouble(str.toString()) * Double.parseDouble(str.toString()));
        } catch (NumberFormatException e) {
            return null;
        }
    }
}

<dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>3.2.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hive</groupId>
        <artifactId>hive-exec</artifactId>
        <version>3.1.2</version>
    </dependency>
</dependencies>

2、UDAF

UDAF是一种用于聚合计算的自定义函数,例如求平均值、最大值等。以下是一个求平均值的UDAF例子:
package com.example.hive.udaf;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;

@Description(name = "my_avg", value = "_FUNC_(x) - Returns the average of a set of numbers",
        extended = "Example:\n > SELECT my_avg(column_name) FROM table_name;")
public class MyAvg extends UDAF {
    public static class AvgEvaluator implements UDAFEvaluator {
        private double sum = 0;
        private long count = 0;

        public void init() {
            sum = 0;
            count = 0;
        }

        public boolean iterate(DoubleWritable value) {
            if (value != null) {
                sum += value.get();
                count++;
            }
            return true;
        }

        public DoubleWritable terminatePartial() {
            if (count == 0) return null;
            return new DoubleWritable(sum / count);
        }

        public boolean merge(DoubleWritable other) {
            if (other != null) {
                sum += other.get();
                count++;
                return true;
            }
            return false;
        }

        public DoubleWritable terminate() {
            if (count == 0) return null;
            return new DoubleWritable(sum / count);
        }
    }
}

3、UDTF
UDTF是一种用户自定义的表生成函数,用于从单个输入行中生成多个输出行。以下是一个将一行文本按逗号分隔并输出的UDTF例子:

package com.example.hive.udtf;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspector;
import org.apache.hadoop.io.Text;

import java.util.ArrayList;
import java.util.List;

@Description(name = "split_rows",
        value = "_FUNC_(str) - Explode a string into multiple rows",
        extended = "Example:\n > SELECT * FROM split_rows('a,b,c');\n a\n b\n c")
public class SplitRowsUDTF extends GenericUDTF {
    private PrimitiveObjectInspector inputOI = null;

    @Override
    public void close() throws HiveException {
        // do nothing
    }

    @Override
    public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
        if (argOIs.getAllStructFieldRefs().size() != 1) {
            throw new UDFArgumentLengthException("The function split_rows() takes exactly one argument.");
        }
        ObjectInspector oi = argOIs.getAllStructFieldRefs().get(0).getObjectInspector();
        if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(0, "The function split_rows() takes a string as parameter.");
        }
        inputOI = (PrimitiveObjectInspector) oi;

        List<String> fieldNames = new ArrayList<>();
        List<ObjectInspector> fieldOIs = new ArrayList<>();
        fieldNames.add("col1");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
    }

    @Override
    public void process(Object[] args) throws HiveException {
        String input = ((Text) inputOI.getPrimitiveJavaObject(args[0])).toString();
        String[] values = input.split(",");
        for (String value : values) {
            forward(new Object[]{value});
        }
    }
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值