Flink中Table API和SQL 完整使用下 (第十一章)

一、操作案例

1、快速上手

package com.example.chapter11;

import com.example.chapter05.ClickSource;
import com.example.chapter05.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.time.Duration;

import static org.apache.flink.table.api.Expressions.$;

/**
 * 快速上手
 * SQL
 * Table API
 */
public class SimpleTableExample {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //读取数据,得到DataStream
        SingleOutputStreamOperator<Event> eventStream = env.addSource(new ClickSource())
                .assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
                        .withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
                            @Override
                            public long extractTimestamp(Event element, long recordTimestamp) {
                                return element.timestamp;
                            }
                        })
                );

        //创建表执行环境
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        //将DataStream转换Table
        Table eventTable = tableEnv.fromDataStream(eventStream);

        //4、直接写SQL转换
        Table resultTable = tableEnv.sqlQuery("select user, url, `timestamp` from " + eventTable);

        //Table API
        Table resultTable2 = eventTable.select($("user"), $("url"))
                .where($("user").isEqual("Alice"));

        //Table 不能打印------------最简单转换成流输出
        tableEnv.toDataStream(resultTable).print("result");
        tableEnv.toDataStream(resultTable2).print("result2");


        tableEnv.createTemporaryView("clickTable", eventTable);
        Table aggResult = tableEnv.sqlQuery("select user,COUNT(url) as cnt from clickTable group by user");

        //toChangelogStream 更新日志流
        tableEnv.toChangelogStream(aggResult).print("agg");

        env.execute();

    }
}

2、flink流处理环境

package com.example.chapter11;

import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;

import static org.apache.flink.table.api.Expressions.$;


/**
 * 1、创建表的执行环境
 * 2、注册表
 * 3、转换注册表为Table对象
 * 4、为Table对象增加查询条件
 * 5、生成新的Table对象
 * 6、可以在次放入当前环境供后续SQL使用
 *
 * -----使用TableAPI 使用SQL 都可以得到Table对象
 *
 * --------使用TableAPI
 * --------使用SQL
 * --------两者混用
 */
public class CommonApiTest {
    public static void main(String[] args) {


        /**
         * 方式一
         */
//        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//        env.setParallelism(1);
//        StreamTableEnvironment TableEnv = StreamTableEnvironment.create(env);  //默认什么都不传递的话就是流处理、


        /**
         * 方式二
         * flink流处理环境
         */
        //1、定义环境配置来创建表执行环境
        EnvironmentSettings settings = EnvironmentSettings.newInstance()
                .inStreamingMode() //使用流处理模式
                .useBlinkPlanner()
                .build();

        TableEnvironment tableEnv = TableEnvironment.create(settings);


        /**
         * 方式三
         * 基于老版本planner进行流处理
         */
//        EnvironmentSettings settings1 = EnvironmentSettings.newInstance()
//                .inStreamingMode() //使用流处理模式
//                .useOldPlanner()
//                .build();
//
//        TableEnvironment tableEnv1 = TableEnvironment.create(settings1);


        /**
         * 方式三.1
         * 基于老版本planner进行批处理
         */

//        ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
//        BatchTableEnvironment batchTableEnvironment = BatchTableEnvironment.create(batchEnv);


        /**
         * 方式三.2
         * 基于blink版本planner进行批处理
         */

//        EnvironmentSettings setting3 = EnvironmentSettings.newInstance()
//                .inStreamingMode()
//                .useAnyPlanner()
//                .build();
//
//        TableEnvironment tableEnv3 = TableEnvironment.create(setting3);


        // 2、创建表
        String createDDL = "CREATE TABLE clickTable (" +
                "user_name STRING, " +
                "url STRING, " +
                "ts BIGINT " +
                ") WITH (" +
                " 'connector'= 'filesystem'," +
                " 'path' = 'input/clicks.txt'," +
                " 'format' = 'csv'" +
                ")";


        //executeSql 注册表
        tableEnv.executeSql(createDDL);

        //得到Table对象
        // Table java 对象
        // clickTable 是真正注册到表环境里的
        Table clickTable = tableEnv.from("clickTable");

        Table resultTable = clickTable.where($("user_name").isEqual("Bob"))
                .select($("user_name"), $("url"));

        //如果还想把这个table放入到当前环境中、直接用在后续的SQL里面
        tableEnv.createTemporaryView("Result2", resultTable);


        //执行SQL进行表的查询转换
        Table resultTable2 = tableEnv.sqlQuery("select user_name,url from Result2");

        // 创建一张用于输出的表
        String createOutDDL = "CREATE TABLE outTable (" +
                "url STRING, " +
                "user_name STRING " +
                ") WITH (" +
                "'connector'= 'filesystem'," +
                "'path' = 'output'," +
                "'format' = 'csv'" +
                ")";


        tableEnv.executeSql(createOutDDL);



        //创建一张用于控制台打印输出的表
        String creatPrintOutDDL = "CREATE TABLE printOutTable (" +
                "url STRING, " +
                "user_name STRING " +
                ") WITH (" +
                "'connector'= 'print'" +
                ")";

        tableEnv.executeSql(creatPrintOutDDL);


        //输出表
        resultTable.executeInsert("outTable");

        //输出控制台
        resultTable2.executeInsert("printOutTable");

    }
}

3、统计每个用户的PV值

package com.example.chapter11;

import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;


/**
 * TODO 统计每个用户的PV值
 * 1、创建表的执行环境
 * 2、注册表
 * 3、转换注册表为Table对象
 * 4、为Table对象增加查询条件
 * 5、生成新的Table对象
 * 6、输出
 */
public class CommonApiTestCount {
    public static void main(String[] args) {


        /**
         * 方式二
         * flink流处理环境
         */
        //1、定义环境配置来创建表执行环境
        EnvironmentSettings settings = EnvironmentSettings.newInstance()
                .inStreamingMode() //使用流处理模式
                .useBlinkPlanner()
                .build();

        TableEnvironment tableEnv = TableEnvironment.create(settings);


        // 2、创建表
        String createDDL = "CREATE TABLE clickTable (" +
                "user_name STRING, " +
                "url STRING, " +
                "ts BIGINT " +
                ") WITH (" +
                " 'connector'= 'filesystem'," +
                " 'path' = 'input/clicks.txt'," +
                " 'format' = 'csv'" +
                ")";

        //executeSql 注册表
        tableEnv.executeSql(createDDL);

        //针对每个用户、统计当前他到底点击了多少次---统计每个用户的PV
        Table aggResult = tableEnv.sqlQuery("select user_name,count(url) as cnt from clickTable group by user_name");

        //创建一张用于控制台打印输出的a每个用户的PV
        String creatPrintOutDDLUserPV = "CREATE TABLE printOutTable (" +
                " user_name STRING, " +
                " cnt BIGINT " +
                ") WITH (" +
                " 'connector'= 'print'" +
                ")";

        tableEnv.executeSql(creatPrintOutDDLUserPV);

        //输出每个用户的PV打印控制台
        aggResult.executeInsert("printOutTable");

    }



}

4、窗口TOP N 统计一段时间内的(前两名)活跃用户

package com.example.chapter11;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

public class TopNExample {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 1、在创建表的DDL中直接定义时间属性
        //FROM_UNIXTIME 长整型的值转换为String 格式的年月日的、时分秒、标准的UTC时间
        String createDDL = "CREATE TABLE clickTable (" +
                "user_name STRING, " +
                "url STRING, " +
                "ts BIGINT, " +
                "et AS TO_TIMESTAMP( FROM_UNIXTIME(ts / 1000) ), " +
                "WATERMARK FOR et AS et - INTERVAL '1' SECOND " +
                ") WITH (" +
                " 'connector'= 'filesystem'," +
                " 'path' = 'input/clicks.txt'," +
                " 'format' = 'csv'" +
                ")";

        //注册表
        tableEnv.executeSql(createDDL);


        /**
         * 普通Top N,选取当前所有用户中浏览量最大的2个
         *     cnt 访问量
         *     row_num 到底排第几
         *
         *  ROW_NUMBER() 函数
         *  OVER 窗口: 在获取一个新的字段、就是当前排序之后、按照cnt排序之后、到底排第几、对应的那个行号
         */
        Table topNResultTable = tableEnv.sqlQuery("select user_name, cnt, row_num " +
                "FROM (" +
                " SELECT *,ROW_NUMBER() OVER (" +
                "order by cnt DESC" +
                " ) AS row_num " +
                " FROM  (SELECT user_name,COUNT(url) AS cnt FROM clickTable GROUP BY user_name)" +
                ") WHERE row_num <=2");


//        tableEnv.toChangelogStream(topNResultTable).print("top 2 :");


        //窗口TOP N 统计一段时间内的(前两名)活跃用户
        String subQuery = "SELECT user_name,COUNT(url) AS cnt, window_start,window_end " +
                "FROM Table (" +
                "TUMBLE(TABLE clickTable,DESCRIPTOR(et),INTERVAL '10' SECOND)" +
                ")" +
                "GROUP  BY user_name,window_start,window_end";

        Table windowTopN = tableEnv.sqlQuery("select user_name, cnt, row_num,window_end " +
                "FROM (" +
                " SELECT *,ROW_NUMBER() OVER (" +
                " PARTITION BY window_start,window_end " +
                " order by cnt DESC" +
                " ) AS row_num " +
                " FROM  ( " + subQuery + " )) WHERE row_num <=2");

        tableEnv.toChangelogStream(windowTopN).print("window TOP N: ");

        env.execute();
    }


}

5、聚合操作函数

package com.example.chapter11;

import com.example.chapter05.ClickSource;
import com.example.chapter05.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.time.Duration;

import static org.apache.flink.table.api.Expressions.$;

public class TimeAndWindowsTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 1、在创建表的DDL中直接定义时间属性
        //FROM_UNIXTIME 长整型的值转换为String 格式的年月日的、时分秒、标准的UTC时间
        String createDDL = "CREATE TABLE clickTable (" +
                "user_name STRING, " +
                "url STRING, " +
                "ts BIGINT, " +
                "et AS TO_TIMESTAMP( FROM_UNIXTIME(ts / 1000) ), " +
                "WATERMARK FOR et AS et - INTERVAL '1' SECOND " +
                ") WITH (" +
                " 'connector'= 'filesystem'," +
                " 'path' = 'input/clicks.txt'," +
                " 'format' = 'csv'" +
                ")";

        // 创建表
        tableEnv.executeSql(createDDL);


        //2、再流转换成Table的时候定义时间属性
        SingleOutputStreamOperator<Event> clickStream = env.addSource(new ClickSource())
                .assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
                        .withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
                            @Override
                            public long extractTimestamp(Event element, long recordTimestamp) {
                                return element.timestamp;
                            }
                        })
                );


        Table clickTable = tableEnv.fromDataStream(clickStream, $("user"), $("url"), $("timestamp"),
                $("et").rowtime());


        //聚合查询转换
        //1、分组聚合
        Table aggTable = tableEnv.sqlQuery("select user_name,count(1) from clickTable group by user_name");


        //2、分组创窗口聚合---- 老版本
        Table groupWindowResultTable = tableEnv.sqlQuery("select " +
                "user_name,count(1) as cnt, " +
                "TUMBLE_END(et,INTERVAL '10' SECOND) AS entT " +
                "FROM clickTable " +
                "group by " +
                "user_name, " +
                "TUMBLE(et,INTERVAL '10' SECOND )"
        );


//        clickTable.printSchema();


        /**
         * 3、窗口聚合
         * 3.1 滚动窗口 TUMBLE
         *      参数 1  数据表
         *      参数 2  时间字段
         *      参数 3 核心参数、滚动时长
         *
         *
         *      必须有窗口信息放在GROUP BY 后面
         */

        Table tumbleWindowResultTable = tableEnv.sqlQuery(
                "select user_name,COUNT(1) as cnt, " +
                        " window_end as endT " +
                        "from TABLE( " +
                        "TUMBLE(TABLE clickTable,DESCRIPTOR(et),INTERVAL '10' SECOND )" +
                        ")" +
                        "GROUP BY user_name,window_end,window_start");


        /**
         * 3、窗口聚合
         * 3.2 滚动窗口 TUMBLE
         *      参数 1  数据表
         *      参数 2  时间字段
         *      参数 3 核心参数、滚动时长
         *
         *
         *      必须有窗口信息放在GROUP BY 后面
         */

        Table hopWindowResultTable = tableEnv.sqlQuery(
                "select user_name,COUNT(1) as cnt, " +
                        " window_end as endT " +
                        "from TABLE( " +
                        "HOP(TABLE clickTable,DESCRIPTOR(et),INTERVAL '5' SECOND ,INTERVAL '10' SECOND )" +
                        ")" +
                        "GROUP BY user_name,window_end,window_start");


        /**
         * 3、窗口聚合
         * 3.3 滚动窗口 TUMBLE
         *      参数 1  数据表
         *      参数 2  时间水平字段
         *      参数 3 核心参数、步长、累计统计输出的步长、也就是每隔多长时间输出一次当前10秒窗口有多少个
         *      参数 4 当前窗口长度、相当于统计的周期
         *
         *
         *      必须有窗口信息放在GROUP BY 后面
         */

        Table cumulateWindowResultTable = tableEnv.sqlQuery(
                "select user_name,COUNT(1) as cnt, " +
                        " window_end as endT " +
                        "from TABLE( " +
                        "CUMULATE(TABLE clickTable,DESCRIPTOR(et),INTERVAL '5' SECOND ,INTERVAL '10' SECOND )" +
                        ")" +
                        "GROUP BY user_name,window_end,window_start");


        /**
         * 4、
         * 统计每个用户当前这次访问、以及之前三次访问的平均时间戳
         */

        Table overWindowResultTable = tableEnv.sqlQuery("select user_name," +
                " avg(ts) OVER(" +
                " PARTITION BY user_name " +
                "ORDER BY et " +
                "ROWS BETWEEN 3 PRECEDING AND CURRENT ROW" +
                ") AS avg_ts " +
                "from clickTable"
        );

//        tableEnv.toChangelogStream(aggTable).print("agg");
//        tableEnv.toDataStream(groupWindowResultTable).print("group windows");
//        tableEnv.toDataStream(tumbleWindowResultTable).print("tumble window");

//        tableEnv.toDataStream(hopWindowResultTable).print("hop window");
//        tableEnv.toDataStream(cumulateWindowResultTable).print("cumulate window");

        tableEnv.toDataStream(overWindowResultTable).print("over window: ");

        env.execute();
    }
}

6、UdfTest_AggregateFunction

package com.example.chapter11;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.AggregateFunction;

public class UdfTest_AggregateFunction {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 1、在创建表的DDL中直接定义时间属性
        //FROM_UNIXTIME 长整型的值转换为String 格式的年月日的、时分秒、标准的UTC时间
        String createDDL = "CREATE TABLE clickTable (" +
                "user_name STRING, " +
                "url STRING, " +
                "ts BIGINT, " +
                "et AS TO_TIMESTAMP( FROM_UNIXTIME(ts / 1000) ), " +
                "WATERMARK FOR et AS et - INTERVAL '1' SECOND " +
                ") WITH (" +
                " 'connector'= 'filesystem'," +
                " 'path' = 'input/clicks.txt'," +
                " 'format' = 'csv'" +
                ")";

        //注册表
        tableEnv.executeSql(createDDL);

        //2、注册自定义的聚合函数
        tableEnv.createTemporarySystemFunction("WeighedAverage", WeighedAverage.class);

        //3、调用UDF进行查询转换
        Table resultTable = tableEnv.sqlQuery("select user_name, WeighedAverage(ts, 1) as w_avg " +
                " from clickTable group by user_name");

        //4、转换流打印输出
        tableEnv.toDataStream(resultTable).print();

        env.execute();
    }

    //单独定义一个累加器类型
    public static class WeightedAvgAccumulator {
        public long sum = 0;
        public int count = 0;
    }

    //实现自定义的聚合函数、计算加权衡平均值
    public static class WeighedAverage extends AggregateFunction<Long, WeightedAvgAccumulator> {

        @Override
        public Long getValue(WeightedAvgAccumulator accumulator) {
            if (accumulator.count == 0)
                return null;
            else
                return accumulator.sum / accumulator.count;
        }

        @Override
        public WeightedAvgAccumulator createAccumulator() {
            return new WeightedAvgAccumulator();
        }

        // 累加器计算方法
        public void accumulate(WeightedAvgAccumulator accumulator, Long iValue, Integer iWeight) {
            accumulator.sum += iValue * iWeight;
            accumulator.count += iWeight;
        }

    }

}

7、调用UDF进行查询转换

package com.example.chapter11;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.ScalarFunction;

public class UdfTest_ScalarFunction {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 1、在创建表的DDL中直接定义时间属性
        //FROM_UNIXTIME 长整型的值转换为String 格式的年月日的、时分秒、标准的UTC时间
        String createDDL = "CREATE TABLE clickTable (" +
                "user_name STRING, " +
                "url STRING, " +
                "ts BIGINT, " +
                "et AS TO_TIMESTAMP( FROM_UNIXTIME(ts / 1000) ), " +
                "WATERMARK FOR et AS et - INTERVAL '1' SECOND " +
                ") WITH (" +
                " 'connector'= 'filesystem'," +
                " 'path' = 'input/clicks.txt'," +
                " 'format' = 'csv'" +
                ")";

        //注册表
        tableEnv.executeSql(createDDL);

        //2、注册自定义标量函数
        tableEnv.createTemporaryFunction("MyHash", MyHashFunction.class);

        //3、调用UDF进行查询转换
        Table resultTable = tableEnv.sqlQuery("select user_name, MyHash(user_name) from clickTable");

        //4、转换流打印输出
        tableEnv.toDataStream(resultTable).print();

        env.execute();
    }


    public static class MyHashFunction extends ScalarFunction {
        public int eval(String str) {
            return str.hashCode();
        }
    }
}

8、表聚合函数

package com.example.chapter11;

/**
 *
 * 表聚合函数
 * <p>
 * Project:  FlinkTutorial
 * <p>
 * Created by  wushengran
 */


import com.example.chapter05.ClickSource;
import com.example.chapter05.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TableAggregateFunction;
import org.apache.flink.util.Collector;

import java.time.Duration;

import static org.apache.flink.table.api.Expressions.$;
import static org.apache.flink.table.api.Expressions.call;

public class UdfTest_TableAggregateFunction {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 1. 自定义数据源,从流转换
        SingleOutputStreamOperator<Event> eventStream = env.addSource(new ClickSource())
                .assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
                        .withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
                            @Override
                            public long extractTimestamp(Event element, long recordTimestamp) {
                                return element.timestamp;
                            }
                        })
                );

        // 2. 将流转换成表
        Table eventTable = tableEnv.fromDataStream(eventStream,
                $("user"),
                $("url"),
                $("timestamp").as("ts"),
                $("rt").rowtime());
        tableEnv.createTemporaryView("EventTable", eventTable);

        // 3. 开滚动窗口聚合,得到每个用户在每个窗口中的浏览量
        Table windowAggTable = tableEnv.sqlQuery("select user, count(url) as cnt, " +
                "window_end " +
                "from TABLE(" +
                "  TUMBLE( TABLE EventTable, DESCRIPTOR(rt), INTERVAL '10' SECOND )" +
                ")" +
                "group by user," +
                "  window_start," +
                "  window_end");
        tableEnv.createTemporaryView("AggTable", windowAggTable);

        // 4. 注册表聚合函数函数
        tableEnv.createTemporarySystemFunction("Top2", Top2.class);

        // 5. 在Table API中调用函数
        Table resultTable = tableEnv.from("AggTable")
                .groupBy($("window_end"))
                .flatAggregate(call("Top2", $("cnt")).as("value", "rank"))
                .select($("window_end"), $("value"), $("rank"));

        // 6. 输出到控制台
        tableEnv.toChangelogStream(resultTable).print();

        env.execute();
    }
    // 聚合累加器的类型定义,包含最大的第一和第二两个数据
    public static class Top2Accumulator {
        public Long first;
        public Long second;
    }

    // 自定义表聚合函数,查询一组数中最大的两个,返回值为(数值,排名)的二元组
    public static class Top2 extends TableAggregateFunction<Tuple2<Long, Integer>, Top2Accumulator> {

        @Override
        public Top2Accumulator createAccumulator() {
            Top2Accumulator acc = new Top2Accumulator();
            acc.first = Long.MIN_VALUE;    // 为方便比较,初始值给最小值
            acc.second = Long.MIN_VALUE;
            return acc;
        }

        // 每来一个数据调用一次,判断是否更新累加器
        public void accumulate(Top2Accumulator acc, Long value) {
            if (value > acc.first) {
                acc.second = acc.first;
                acc.first = value;
            } else if (value > acc.second) {
                acc.second = value;
            }
        }

        // 输出(数值,排名)的二元组,输出两行数据
        public void emitValue(Top2Accumulator acc, Collector<Tuple2<Long, Integer>> out) {
            if (acc.first != Long.MIN_VALUE) {
                out.collect(Tuple2.of(acc.first, 1));
            }
            if (acc.second != Long.MIN_VALUE) {
                out.collect(Tuple2.of(acc.second, 2));
            }
        }
    }

}

9、实现自定义的函数

package com.example.chapter11;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TableFunction;

public class UdfTest_TableFunction {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 1、在创建表的DDL中直接定义时间属性
        //FROM_UNIXTIME 长整型的值转换为String 格式的年月日的、时分秒、标准的UTC时间
        String createDDL = "CREATE TABLE clickTable (" +
                "user_name STRING, " +
                "url STRING, " +
                "ts BIGINT, " +
                "et AS TO_TIMESTAMP( FROM_UNIXTIME(ts / 1000) ), " +
                "WATERMARK FOR et AS et - INTERVAL '1' SECOND " +
                ") WITH (" +
                " 'connector'= 'filesystem'," +
                " 'path' = 'input/clicks.txt'," +
                " 'format' = 'csv'" +
                ")";

        //注册表
        tableEnv.executeSql(createDDL);

        //2、注册自定义标量函数
        tableEnv.createTemporaryFunction("MySplint", MySplint.class);

        //3、调用UDF进行查询转换
        Table resultTable = tableEnv.sqlQuery("select user_name, url, word, length" +
                " from clickTable, " +
                "LATERAL TABLE( MySplint(url) ) AS T(word,length)");

        //4、转换流打印输出
        tableEnv.toDataStream(resultTable).print();

        env.execute();
    }

    //实现自定义的函数
    public static class MySplint extends TableFunction<Tuple2<String, Integer>> {
        public void eval(String str) {
            String[] fields = str.split("\\?");
            for (String field : fields) {
                collect(Tuple2.of(field, field.length()));
            }
        }
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
### 回答1: Flink 1.14的Table APISQL教程可以在Flink官方文档找到,其包括了Table APISQL的基础概念、语法、操作符、函数等内容,还有详细的示例代码和实战案例,非常适合初学者学习和入门。另外,Flink社区也有很多优秀的博客和视频教程,可以帮助大家更深入地理解和应用Table APISQL。 ### 回答2: Flink是一个分布式计算引擎,是Apache Hadoop生态圈用于处理流式数据的一种解决方案。Flink支持表格APISQL语言,使得用户可以更加简单地实现流处理任务。而在Flink 1.14TableAPISQL引擎则得到了进一步的增强。 TableAPISQL将无需掌握Java或Scala编程语言就可以操作表格数据。TableAPI API支持Java和Scala,SQL则支持标准的SQL语言。如果你熟悉SQL语言,那么你很容易上手使用TableAPISQL引擎。 Flink TableAPISQL支持各种类型的表格操作,包括选择、过滤、分组、排序、连接等。此外,它们还支持窗口和聚合操作。这使得用户在处理流式数据时可以更加简单易懂地进行复杂的操作。 在Flink 1.14TableAPISQL引擎还提供了一系列新功能,包括: 1. 时间特征支持——TableAPISQL的数据时间戳可以通过时间特征进行定义和控制。通过时间特征,用户可以定义数据的时间属性,例如事件时间或处理时间。 2. 详细的窗口管理——当窗口的数据到期时,Flink 1.14会自动清除过期数据,避免数据量过大导致性能下降。 3. 支持更多的流数据源——在Flink 1.14TableAPISQL引擎可以直接从Kafka、Kinesis、Hive等数据源读取数据。这可以让用户更加方便地读取数据,而无需编写额外的代码。 TableAPISQL引擎对于Flink用户来说是非常重要的工具,无需掌握Java或Scala编程语言即可操作表格数据。并且在Flink 1.14,这两个工具得到了进一步的增强,包括更好的窗口管理和更多的数据源支持。因此,学习TableAPISQL引擎对于想要使用Flink进行流处理的人来说是非常有益的。 ### 回答3: Flink 1.14 TableAPISQL是一个非常好用的数据处理工具,可帮助数据分析师快速进行数据查询、聚合和处理。下面详细介绍一下Flink 1.14的TableAPISQL教程。 1. 如何配置Flink 1.14的TableAPISQL环境? 在进行Flink 1.14的TableAPISQL开发之前,需要先进行环境的配置。可以在官网下载Flink的安装包,解压后找到/bin目录下的start-cluster.sh脚本进行启动。启动之后,即可通过WebUI的页面查看Flink的运行状态。 2. TableAPI的基本操作 TableAPIFlink的一个高层次数据处理API,可以通过编写代码来进行数据的处理。TableAPI的基本操作有以下几个: (1) 创建Table,可以使用StreamTableEnvironment的fromDataStream或fromTableSource方法,将DataStream或TableSource转换成Table。 (2) Table的转换,可以使用多种转换操作,包括filter、select、orderBy、groupBy、join等。 (3) 将Table转化为DataStream,可以使用StreamTableEnvironment的toDataStream方法。 3. SQL的基本操作 SQLFlink提供的一种快速数据处理方式,用户只需要编写SQL语句即可完成数据处理。SQL的基本操作有以下几个: (1) 注册Table,可以使用StreamTableEnvironment的registerTable或registerTableSource方法,将TableTableSource注册到环境。 (2) 执行SQL,可以使用StreamTableEnvironment的executeSql方法,执行SQL语句并返回结果。 (3) 将结果转换为DataStream,可以使用StreamTableEnvironment的toDataStream方法。 4. 如何优化Flink 1.14的TableAPISQL的执行效率? 在进行TableAPISQL开发时,为了保证其执行效率,需要注意以下几点: (1) 避免使用复杂的JOIN操作,可以使用Broadcast和TableFunction等方式来避免JOIN操作。 (2) 注意Table的Schema定义,Schema的设计合理与否直接影响SQL性能。 (3) 避免使用无限制的聚合操作,可以进行分批次聚合来避免。 总的来说,Flink 1.14的TableAPISQL是非常强大的数据处理工具,能够帮助开发者快速高效的进行数据处理。上述内容是入门级别的教程,如果想要更深入的了解,可以参考官方文档进行学习。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值