Presto UDF自定义函数开发

目录

Presto函数大致有三种:

1.scalar函数

2.aggregation函数

3.window函数


Presto-udf开源项目地址

https://github.com/qubole/presto-udfs

presto推荐jdk版本:Zulu OpenJDK11,相比较openjdk,oracle jdk,jvm占用内存更少。Github的版本也是基于Zulu OpenJDK开发。

Zulu OpenJDK11下载地址

https://www.azul.com/downloads/zulu-community/?version=java-11-lts&os=windows&architecture=x86-64-bit&package=jdk

presto官方文档:

https://prestosql.io/docs/current/index.html

presto function文档:

https://prestosql.io/docs/current/develop/functions.html

Presto函数大致有三种:

1.scalar函数

标量函数,类似于Java中的静态方法,无状态

scalar函数常用注解

@Description()  //函数的描述
@ScalarFunction()// 函数名称
@SqlType() //指定数据类型,修饰方法或参数
@SqlNullable //允许为null  修饰方法或参数
最近在将hive里面的自定义udf函数转换成presto里面的自定义udf。
举例:有如下需求

说明:该函数的作用是将输入格式的日期转化为输出格式的日期。
写法:dateformat(输入日期字段值, 输入日期格式, 输出日期格式)

hive->presto

POST格式表示某个日期到1900年1月1日的天数
UNIX格式表示某个时间到1970年1月1日0点0分0秒纪元(UTC)经过的秒数

需求:
1、输入格式POST、UNIX类型,输出yyyyMMdd格式
SELECT UDF_DATEFORMAT(42351,'POST','yyyyMMdd');       --输出结果:20151214	
SELECT UDF_DATEFORMAT(1503417600,'UNIX','yyyyMMdd');  --输出结果:20170823
select udf_dateformat(1503417600,'UNIX','yyyyMMddHHmmss'); 
  
2、输入格式DATE、TIMESTAMP类型,输出yyyyMMdd格式
SELECT UDF_DATEFORMAT('2015-12-30 12:12:12','DATE','yyyyMMdd');  		 --输出结果:20151230
SELECT UDF_DATEFORMAT('2015-12-30 12:12:12.203','TIMESTAMP','yyyyMMdd'); --输出结果:20151230

3、输入格式yyyyMMdd、yyyy-MM-dd类型,输出POST,UNIX格式
SELECT UDF_DATEFORMAT('20151214','yyyyMMdd','POST');  --输出结果:42351
SELECT UDF_DATEFORMAT('2017-08-23','yyyy-MM-dd','UNIX');  --输出结果:1503417600

select udf_dateformat(1503417600,'UNIX','yyyyMMddHHmmss');
等同于(输出结果一样)hive中:select from_unixtime(1503417600,'yyyyMMdd');  --输出结果:20170823

代码实现:

import io.airlift.slice.Slice;
import io.prestosql.spi.function.Description;
import io.prestosql.spi.function.ScalarFunction;
import io.prestosql.spi.function.SqlType;
import io.prestosql.spi.type.StandardTypes;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import static io.airlift.slice.Slices.utf8Slice;

public class UdfDateformat {

    private static final String POST_BASE_DAY = "19000101";//hive的post日期以19000101为基准
    private static final String JAVA_BASE_DAY = "19700101";//Java日期以19700101为基准
    private static final long DAY_OF_TIMESTAMP = 86400000;//一天的毫秒值

    private static final String INPUT_TYPE_POST = "POST";
    private static final String INPUT_TYPE_UNIX = "UNIX";
    private static final String INPUT_TYPE_DATE = "DATE";
    private static final String INPUT_TYPE_TIMESTAMP = "TIMESTAMP";
    private static final String OUTPUT_TYPE_POST = "POST";
    private static final String OUTPUT_TYPE_UNIX = "UNIX";
    private static final String FORMAT_TYPE_YMDHMS = "yyyyMMddHHmmss";
    private static final String FORMAT_TYPE_YMD = "yyyyMMdd";
    private static final String FORMAT_TYPE_Y_M_D = "yyyy-MM-dd";

    @Description("converts the date of the input format to the date of the output format")
    @ScalarFunction("udf_dateformat")
    @SqlType(StandardTypes.VARCHAR)
    public static Slice evaluate(@SqlType(StandardTypes.VARCHAR) Slice dateVal, @SqlType(StandardTypes.VARCHAR) Slice inputFmtStr, @SqlType(StandardTypes.VARCHAR) Slice outputFmtStr){
        String returnDt = "";
        if ((dateVal == null) || (inputFmtStr == null) || (outputFmtStr == null)) {
            return null;
        }
        if (("".equals(dateVal.toStringUtf8()) || ("".equals(inputFmtStr.toStringUtf8())) || ("".equals(outputFmtStr.toStringUtf8())))) {
            return null;
        }
        if(INPUT_TYPE_DATE.equals(inputFmtStr.toStringUtf8()) || INPUT_TYPE_TIMESTAMP.equals(inputFmtStr.toStringUtf8())){
            returnDt = InputFormatDateOrTimestamp(dateVal.toStringUtf8(),inputFmtStr.toStringUtf8(),outputFmtStr.toStringUtf8());
        }else if(OUTPUT_TYPE_POST.equals(outputFmtStr.toStringUtf8()) || OUTPUT_TYPE_UNIX.equals(outputFmtStr.toStringUtf8())){
            returnDt = OutputFormatPostOrUnix(dateVal.toStringUtf8(),inputFmtStr.toStringUtf8(),outputFmtStr.toStringUtf8());
        }

        return utf8Slice(returnDt);
    }

    @Description("converts the date of the input format to the date of the output format")
    @ScalarFunction("udf_dateformat")
    @SqlType(StandardTypes.VARCHAR)
    public static Slice evaluate(@SqlType(StandardTypes.BIGINT) long dateVal, @SqlType(StandardTypes.VARCHAR) Slice inputFmtStr, @SqlType(StandardTypes.VARCHAR) Slice outputFmtStr){
        String returnDt;
        if ((inputFmtStr == null) || (outputFmtStr == null)) {
            return null;
        }
        if (("".equals(inputFmtStr.toStringUtf8())) || ("".equals(outputFmtStr.toStringUtf8()))) {
            return null;
        }
        returnDt = InputFormatPostOrUnix((int) dateVal,inputFmtStr.toStringUtf8(),outputFmtStr.toStringUtf8());
        if(returnDt != null){
            return utf8Slice(returnDt);
        }
        return null;
    }

    private static String InputFormatPostOrUnix(int dateVal, String inputFmtStr,String outputFmtStr) {
        String returnDt = "";
        try {
            SimpleDateFormat formatYMD = new SimpleDateFormat(FORMAT_TYPE_YMD);//默认格式方式
            SimpleDateFormat format = new SimpleDateFormat(outputFmtStr);//输出日期格式
            Date date;
            if ((INPUT_TYPE_POST.equals(inputFmtStr))) {
                date = formatYMD.parse(POST_BASE_DAY);
                Calendar ca = Calendar.getInstance();
                ca.setTime(date);
                ca.add(Calendar.DAY_OF_YEAR, dateVal);
                date = ca.getTime();
                if(date.after(formatYMD.parse(JAVA_BASE_DAY))){
                    date.setTime(date.getTime() - DAY_OF_TIMESTAMP);
                }

                //输入默认按照yyyyMMdd格式化 若输入格式为其他则按照对应方式格式化输入日期值
                if(!(FORMAT_TYPE_YMD.equals(outputFmtStr))){
                    returnDt = format.format(date);
                }else{
                    returnDt = formatYMD.format(date);
                }
            }else if ((INPUT_TYPE_UNIX.equals(inputFmtStr))){
                if(FORMAT_TYPE_YMDHMS.equals(outputFmtStr)){
                    format = new SimpleDateFormat(FORMAT_TYPE_YMD);
                }
                date = new Date(Long.parseLong(dateVal+"000"));//秒转换为毫秒
                returnDt = format.format(date);
            }else {
                return "";
            }

        } catch (ParseException parseException) {
            System.out.println(parseException.getMessage());
        }

        return returnDt;
    }

    private static String OutputFormatPostOrUnix(String dateVal, String inputFmtStr, String outputFmtStr) {
        String returnDt = "";
        try {
            SimpleDateFormat formatYMD = new SimpleDateFormat(FORMAT_TYPE_YMD);
            SimpleDateFormat format = new SimpleDateFormat(inputFmtStr);
            Date date ;
            if ((OUTPUT_TYPE_POST.equals(outputFmtStr))) {
                //输入默认按照yyyyMMdd格式化 若输入格式为其他则按照对应方式格式化输入日期值
                if(!(FORMAT_TYPE_YMD.equals(inputFmtStr))){
                    date = format.parse(dateVal);
                }else{
                    date = formatYMD.parse(dateVal);
                }
                Date basedate = formatYMD.parse(POST_BASE_DAY);

                if(date.after(formatYMD.parse(JAVA_BASE_DAY))){
                    date.setTime(date.getTime() + DAY_OF_TIMESTAMP);
                }
                returnDt = String.valueOf((date.getTime() - basedate.getTime()) / DAY_OF_TIMESTAMP);
            }else if ((OUTPUT_TYPE_UNIX.equals(outputFmtStr))){
                date = format.parse(dateVal);
                returnDt =String.valueOf(date.getTime()/1000);//毫秒转换为秒
            }
        } catch (ParseException parseException) {
            System.out.println(parseException.getMessage());
        }

        return returnDt;
    }

    private static String InputFormatDateOrTimestamp(String dateVal, String inputFmtStr, String outputFmtStr) {
        String returnDt = "";
        if (UdfDateformat.INPUT_TYPE_DATE.equals(inputFmtStr) || UdfDateformat.INPUT_TYPE_TIMESTAMP.equals(inputFmtStr)) {
            try {
                SimpleDateFormat formatY_M_D = new SimpleDateFormat(FORMAT_TYPE_Y_M_D);
                SimpleDateFormat format = new SimpleDateFormat(outputFmtStr);
                Date date = formatY_M_D.parse(dateVal);
                returnDt = format.format(date);
            } catch (ParseException e) {
                System.out.println(e.getMessage());
            }
        }

        return returnDt;
    }
}

插件注册:

public class UdfPlugin implements Plugin
{
    @Override
    public Set<Class<?>> getFunctions()
    {
        return ImmutableSet.<Class<?>>builder()
                .add(UdfDateformat.class)
                .build();
    }
}

加载plugin资源 :

在目录src/main/resources/META-INF/services 中新建名为 com.facebook.presto.spi.Plugin 的文本文件,内容为插件注册类的全类名,如com.xxxx.xxx.xxx.UdfPlugin

pom文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    <modelVersion>4.0.0</modelVersion>

    <!-- if this lives as a subdirectory of your own project, then you'll need to do do the following workaround:
         1. Create a file:  src/main/resources/META-INF/services/io.prestosql.spi.Plugin
         2. Put the value presto.udfs.UdfPlugin in it
    -->

    <properties>
        <presto.version>346</presto.version>
        <slice.version>0.36</slice.version>
        <guava.version>26.0-jre</guava.version>
        <junit.version>4.12</junit.version>
        <javax.inject.version>1</javax.inject.version>
        <io.airlift.log.version>0.183</io.airlift.log.version>
        <jodatime.version>2.8.1</jodatime.version>
        <hive.version>2.3.2</hive.version>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
    </properties>

    <build>
       <plugins> 
           <plugin>
               <groupId>org.apache.maven.plugins</groupId>
               <artifactId>maven-release-plugin</artifactId>
               <version>2.5.1</version>
           </plugin>

           <plugin>
               <groupId>org.apache.maven.plugins</groupId>
               <artifactId>maven-shade-plugin</artifactId>
               <version>2.3</version>
               <executions>
                   <execution>
                       <phase>package</phase>
                       <goals>
                           <goal>shade</goal>
                       </goals>
                       <configuration>
                           <artifactSet>
                               <!-- dont need to add airlift.slice, presto provides it to all plugins -->
                               <includes>
                                   <include>com.google.guava:guava</include>
                                   <include>io.airlift:log</include>
                                   <include>joda-time:joda-time</include>
                               </includes>
                           </artifactSet>
                           <relocations>
                               <relocation>
                                   <pattern>org.joda</pattern>
                                   <shadedPattern>org.joda.shaded</shadedPattern>
                               </relocation>
                               <relocation>
                                   <pattern>com.google</pattern>
                                   <shadedPattern>com.google.shaded</shadedPattern>
                               </relocation>
                               <relocation>
                                   <pattern>io.airlift.log</pattern>
                                   <shadedPattern>io.airlift.log.shaded</shadedPattern>
                               </relocation>
                           </relocations>

                       </configuration>
                   </execution>
               </executions>
           </plugin>
      </plugins>
    </build>

    <groupId>com.cplc.presto</groupId>
    <artifactId>udfs</artifactId>
    <version>3.0.0</version>
    <name>PrestoUDFs</name>
    <description>Common Functions for the Facebook Presto SQL Engine</description>

    <dependencies>

        <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-exec -->
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
        </dependency>

        <dependency>
            <groupId>io.prestosql</groupId>
            <artifactId>presto-spi</artifactId>
            <version>${presto.version}</version>
        </dependency>

        <dependency>
            <groupId>io.prestosql</groupId>
            <artifactId>presto-main</artifactId>
            <version>${presto.version}</version>
        </dependency>

        <dependency>
            <groupId>io.airlift</groupId>
            <artifactId>slice</artifactId>
            <version>${slice.version}</version>
        </dependency>

        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>${guava.version}</version>
        </dependency>

        <dependency>
            <groupId>javax.inject</groupId>
            <artifactId>javax.inject</artifactId>
            <version>${javax.inject.version}</version>
        </dependency>

        <dependency>
            <groupId>io.airlift</groupId>
            <artifactId>log</artifactId>
            <version>${io.airlift.log.version}</version>
        </dependency>

        <dependency>
            <groupId>joda-time</groupId>
            <artifactId>joda-time</artifactId>
            <version>${jodatime.version}</version>
        </dependency>
    </dependencies>

</project>

maven打包项目放到集群就可以跑起来了。

2.aggregation函数

聚合函数,如avg函数,count函数,它是一个需要累积状态的函数,开发的难点在于维护状态

3.window函数

窗口函数

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值