FlinkSQL 自定义函数详解

FlinkSQL 函数详解

自定义函数

除了内置函数之外,FlinkSQL还支持自定义函数,我们可以通过自定义函数来扩展函数的使用FlinkSQL当中自定义函数主要分为四大类:

1.ScalarFunction: 标量函数
特点: 每次只接收一行的数据,输出结果也是 11列典型的标量函数如: upper(str),lower(str),abs(salary)
2.TableFunction: 表生成函数
特点: 运行时每接收一行数据(一个或多个字段),能产出多行、多列的结果典型的如: explode(),unnest()
3.AggregateFunction: 聚合函数
特点:对输入的数据行 (一组)进行持续的聚合,最终对每组数据输出一行 (多列) 结果典型的如: sum(),max()
4.TableAggregateFunction: 表聚合函数
特点:对输入的数据行(一组)进行持续的聚合,最终对每组数据输出一行或多行(多列)结果

案例1 ScalarFunction

input/userbase.json

{"date_time":"2022-10-04 08:01:48","email":"kyzqcd0686@vjikg.tng","id":0,"name":"郑潮"}
{"date_time":"2022-10-04 08:06:31","email":"bvkqwbmgwi@qq.com","id":1,"name":"图丘喜造"}
{"date_time":"2022-10-04 08:04:39","email":"axvcbj7vbo@ecyi1.4gw","id":2,"name":"王联介"}
{"date_time":"2022-10-04 08:00:19","email":"ew1qu5sunz@caxtg.vtn","id":3,"name":"赖溯"}
{"date_time":"2022-10-04 08:04:51","email":"50xdhnfppw@vwreu.kxk","id":4,"name":"钱泼奎"}
{"date_time":"2022-10-04 08:02:25","email":"h8ist2s54kalorkp,79s","id":5,"name":"紧迟亏"}
{"date_time":"2022-10-04 08:05:22","email":"hnzfdmnjgo@rsiq9.syx","id":6,"name":"贾监"}
{"date_time":"2022-10-04 08:03:53","email":"apjlg5pyuo@hs6l0j4","id":7,"name":"蔡"}
{"date_time":"2022-10-04 08:05:35","email":"cpqofnn5xd@7iknhqc5","id":8,"name":"蔡不"}
{"date_time":"2022-10-04 08:05:03","email":"wg3nfjdv9@fomvu2kb","id":9,"name":"赖妖"}
{"date_time":"2022-10-04 08:06:37","email":"1kkaib5ie@ecvb86cs","id":10,"name":"毛溜拳"}
package org.example.udf;

import com.alibaba.fastjson.JSONObject;
import org.apache.flink.table.functions.FunctionContext;
import org.apache.flink.table.functions.ScalarFunction;

public class JsonParseFunction extends ScalarFunction {
    @Override
    public void open(FunctionContext context) throws Exception {
        super.open(context);
    }

    @Override
    public void close() throws Exception {
        super.close();
    }

    public String eval(String jsonLine, String key) {
        JSONObject jsonObject = JSONObject.parseObject(jsonLine);
        if (jsonObject.containsKey(key)) {
            return jsonObject.getString(key);
        } else {
            return "";
        }
    }
}

package org.example;

import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.example.udf.JsonParseFunction;

public class FlinkSqlscalarFunction {
    public static void main(String[] args) {
        //获取tableEnvironment
        EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance().build();

        TableEnvironment tableEnvironment = TableEnvironment.create(environmentSettings);

        tableEnvironment.executeSql("CREATE TABLE userbase (" +
                "line String  " +
                ")WITH(" +
                "'connector' = 'filesystem'," +
                "'path' = 'input/userbase.json'," +
                "'format' = 'raw'" + //定义数据内容为raw,表述一正好数据作为一条
                ")");

        tableEnvironment.sqlQuery("select * from userbase ").execute().print();
        tableEnvironment.createTemporarySystemFunction("JsonParse", JsonParseFunction.class);
        tableEnvironment.sqlQuery("select JsonParse(line,'date_time'),JsonParse(line,'email'),JsonParse(line,'id'),JsonParse(line,'name')  from userbase").execute().print();
    }
}

案例2 TableFunction

input/produce_user.json

{"begin_time":1665145907806,"price":258.7,"producetId":920956185,"userBaseList":[{"date_time":"2022-10-07 08:01:48","email":"kyzqcd0686@vjikg.tng","id":0,"name":"郑潮"},{"date_time":"2022-10-07 08:06:31","email":"bvkqwbmgwi@qq.com","id":1,"name":"图丘喜造"},{"date_time":"2022-10-07 08:04:39","email":"axvcbj7vbo@ecyi1.4gw","id":2,"name":"王联介"},{"date_time":"2022-10-07 08:00:19","email":"ew1qu5sunz@caxtg.vtn","id":3,"name":"赖溯"},{"date_time":"2022-10-07 08:04:51","email":"50xdhnfppw@vwreu.kxk","id":4,"name":"钱泼奎"}]}
{"begin_time":1665145915652,"price":258.7,"producetId":-786075263,"userBaseList":[{"date_time":"2022-10-07 08:02:25","email":"h8ist2s54kalorkp,79s","id":5,"name":"紧迟亏"},{"date_time":"2022-10-07 08:05:22","email":"hnzfdmnjgo@rsiq9.syx","id":6,"name":"贾监"},{"date_time":"2022-10-07 08:03:53","email":"apjlg5pyuo@hs6l0j4","id":7,"name":"蔡"},{"date_time":"2022-10-07 08:05:35","email":"cpqofnn5xd@7iknhqc5","id":8,"name":"蔡不"},{"date_time":"2022-10-07 08:05:03","email":"wg3nfjdv9@fomvu2kb","id":9,"name":"赖妖"}]}
{"begin_time":1665145927285,"price":258.7,"producetId":-988723330,"userBaseList":[{"date_time":"2022-10-07 09:01:48","email":"kyzqcd03486@vjikg.tng","id":11,"name":"郑潮1"},{"date_time":"2022-10-07 09:06:31","email":"bvkqwbm1gwi@qq.com","id":12,"name":"图丘喜造1"},{"date_time":"2022-10-07 10:04:39","email":"axvcbj71vbo@ecyi1.4gw","id":13,"name":"王联介3"},{"date_time":"2022-10-07 09:00:19","email":"ew11qu5sunz@caxtg.vtn","id":14,"name":"赖溯1"},{"date_time":"2022-10-07 09:04:51","email":"50xdhnfppw@vwre1u.kxk","id":15,"name":"钱泼奎1"}]}
package org.example.udtf;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.FunctionHint;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;

public class FlinkSQLTableFunction {
    public static void main(String[] args) {

        //获取tableEnvironment
        EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance().build();

        TableEnvironment tableEnvironment = TableEnvironment.create(environmentSettings);

        //创建表
        tableEnvironment.executeSql("CREATE TABLE userbase (" +
                "line String  " +
                ")WITH(" +
                "'connector' = 'filesystem'," +
                "'path' = 'input/produce_user.json'," +
                "'format' = 'raw'" + //定义数据内容为raw,表述一正好数据作为一条
                ")");
        //注册函数
        tableEnvironment.createTemporarySystemFunction("JsonFunc",JsonFunction.class);
        tableEnvironment.createTemporarySystemFunction("explodeFunc",ExplodeFunc.class);

        //逻辑编写
        tableEnvironment.sqlQuery("select JsonFunc(line,'begin_time'),JsonFunc(line,'price'),JsonFunc(line,'producetId'),id,name,date_time,email  from userbase,lateral table(explodeFunc (line,'userBaseList') )")
                .execute().print();

    }

    /**
     *自定义ScalarFunction,实现JSON格式的数据解析
     */
    public static class JsonFunction extends ScalarFunction{
        public String eval(String jsonLine, String key) {
            JSONObject jsonObject = JSONObject.parseObject(jsonLine);
            if(jsonObject.containsKey(key)){
                return jsonObject.getString(key);
            }else {
                return "";
            }
        }
    }
    //一条JsonArray数据进入,然后解析成为多条数据
    @FunctionHint(output = @DataTypeHint("ROW<id String,name String,date_time String,email String>"))
    public static class ExplodeFunc extends TableFunction{
        public void eval(String line,String key){
            JSONObject jsonObject = JSONObject.parseObject(line);
            JSONArray jsonArray = jsonObject.getJSONArray(key);
            for (int i = 0; i < jsonArray.size(); i++) {
                String date_time = jsonArray.getJSONObject(i).getString("date_time");
                String id = jsonArray.getJSONObject(i).getString("id");
                String name = jsonArray.getJSONObject(i).getString("name");
                String email = jsonArray.getJSONObject(i).getString("email");

                //使用collect来收集解析出来之后的数据
                collect(Row.of(id,name,date_time,email));
            }
        }
    }
}

案例3 AggregateFunction

input/score.csv

1,zhangsan,Chinese,80
1,zhangsan,Math,76
1,zhangsan,Science,84
1,zhangsan,Art,90
2,lisi,Chinese,60
2,lisi,Math,78
2,lisi,Science,86
2,lisi,Art,88
package org.example.udaf;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.functions.AggregateFunction;

public class FlinkSQLAggregrateFunction {
    public static void main(String[] args) {
        //获取tableEnvironment
        EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance().build();

        TableEnvironment tableEnvironment = TableEnvironment.create(environmentSettings);

        tableEnvironment.executeSql("CREATE TABLE source_score (" +
                "id int," +
                "name STRING," +
                "course STRING," +
                "score Double" +
                ")WITH(" +
                "'connector' = 'filesystem'," +
                "'path' = 'input/score.csv'," +
                "'format' = 'csv'" +
                ")");

        //注册函数
        tableEnvironment.createTemporarySystemFunction("AvgFunc",AvgFunc.class);

        tableEnvironment.executeSql("select course,AvgFunc(score) as avg_score from source_score group by course").print();
    }

    public static class AvgFunc extends AggregateFunction<Double, AvgAccumulator> {

        @Override
        public Double getValue(AvgAccumulator accumulator) {
            if (accumulator.count == 0) {
                return null;
            } else {
                return accumulator.sum / accumulator.count;
            }
        }

        @Override
        public AvgAccumulator createAccumulator() {
            return new AvgAccumulator();
        }

        public void accumulate(AvgAccumulator acc, Double score) {
            acc.setSum(acc.sum + score);
            acc.setCount(acc.count + 1);
        }
    }

    @Data
    @NoArgsConstructor
    @AllArgsConstructor
    public static class AvgAccumulator {
        public double sum = 0.0;
        public int count = 0;
    }
}

pom依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>flinkSql</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <scala.binary.version>2.12</scala.binary.version>
        <flink.version>1.14.3</flink.version>
        <hadoop.version>3.1.4</hadoop.version>
        <hbase.version>2.2.7</hbase.version>
        <hive.version>3.1.2</hive.version>
    </properties>

    <dependencies>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-csv</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.22</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.73</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-simple</artifactId>
            <version>1.7.15</version>
        </dependency>

        <!--flink  读写HDFS需要用到的jar包-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-hadoop-compatibility_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-math3</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-compress</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-auth</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-math3</artifactId>
            <version>3.5</version>
        </dependency>

        <!--flink 整合HBASE 需要用到的jar包-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hbase-2.2_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-math3</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <!--flink 整合kafka需要用到的jar包-->

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-jdbc_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.38</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hive_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.hadoop</groupId>
                    <artifactId>hadoop-hdfs</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-compress</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.11.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.thrift</groupId>
            <artifactId>libfb303</artifactId>
            <version>0.9.3</version>
        </dependency>



    </dependencies>


     <build>
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-compiler-plugin</artifactId>
                 <version>3.1</version>
                 <configuration>
                     <source>1.8</source>
                     <target>1.8</target>
                 </configuration>
             </plugin>

             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-shade-plugin</artifactId>
                 <version>3.1.1</version>
                 <configuration>
                 </configuration>
                 <executions>
                     <execution>
                         <phase>package</phase>
                         <goals><goal>shade</goal></goals>
                     </execution>
                 </executions>
             </plugin>

             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-dependency-plugin</artifactId>
                 <version>2.10</version>
                 <executions>
                     <execution>
                         <id>copy-dependencies</id>
                         <phase>package</phase>
                         <goals><goal>copy-dependencies</goal></goals>
                         <configuration>
                             <outputDirectory>${project.build.directory}/lib</outputDirectory>
                         </configuration>
                     </execution>
                 </executions>
             </plugin>


             <!--<plugin>
                 <groupId>org.scala.tools</groupId>
                 <artifactId>maven-scala-plugin</artifactId>
                 <version>2.15.2</version>
                 <executions>
                     <execution>
                         <goals>
                             <goal>compile</goal>
                             <goal>testCompile</goal>
                         </goals>
                     </execution>
                 </executions>
             </plugin>-->


         </plugins>
     </build>

</project>
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值