Flink入门:通过Table sql和table api实现实时报表

场景说明

官方示例链接:

基于 Table API 实现实时报表 | Apache Flink

        官方场景为通过kafka读取数据,然后按照小时统计报表输出到jdbc数据库中。

未来模拟方便,我们自定义通过In-memory产生模拟数据,然后按照Tumble滚动窗口,按照1min时间范围内sum(amount)值

        如下分别展示通过 Table sql实现窗口和通过table api实现的区别。

代码主流程示例

package cin.examples.table;

import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.Tumble;

import static org.apache.flink.table.api.Expressions.$;
import static org.apache.flink.table.api.Expressions.lit;

public class RealTimeReportExamples {

    public static void main(String[] args) {

        EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
        settings.getConfiguration().setString("parallelism.default","1");
        TableEnvironment tEnv = TableEnvironment.create(settings);


        tEnv.executeSql("CREATE TABLE transactions (\n" +
                "    account_id  BIGINT,\n" +
                "    amount      BIGINT,\n" +
                "    transaction_time TIMESTAMP(3),\n" +
                "    WATERMARK FOR transaction_time AS transaction_time - INTERVAL '5' SECOND\n" +
                ") WITH (\n" +
                "    'connector' = 'transaction',\n" +
                "    'source.parallelism'  = '1'" +
                ")");

        tEnv.executeSql("CREATE TABLE print (\n" +
                "    account_id BIGINT,\n" +
                "    log_ts     TIMESTAMP(3),\n" +
                "    amount     BIGINT\n," +
                "    PRIMARY KEY (account_id, log_ts) NOT ENFORCED" +
                ") WITH (\n" +
                "    'connector'  = 'print',\n" +
                "    'sink.parallelism'  = '1'" +
                ")");
        //1)通过sql方式
        tEnv.executeSql("insert into print " +
                "select account_id," +
//                "FLOOR(transaction_time TO MINUTE) as log_ts," +
                "FLOOR(transaction_time TO MINUTE) as log_ts," +
                "sum(amount) as amount " +
                "from transactions " +
                " GROUP BY account_id,FLOOR(transaction_time TO MINUTE),TUMBLE(transaction_time,INTERVAL '60' SECONDS)");
        //2)通过api方式
//        Table transactions = tEnv.from("transactions");
//        report(transactions).executeInsert("print");


    }


    public static Table report(Table transactions) {
        return transactions
                .window(Tumble.over(lit(60).second()).on($("transaction_time")).as("log_ts"))
                .groupBy($("account_id"), $("log_ts"))
                .select(
                        $("account_id"),
                        $("log_ts").start().as("log_ts"),
                        $("amount").sum().as("amount"));
    }
}

模拟数据来源代码

package cin.examples.table.transaction.table;


import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.factories.DynamicTableSourceFactory;

import java.util.HashSet;
import java.util.Set;


public class TransactionTableSourceFactory implements DynamicTableSourceFactory {
    @Override
    public DynamicTableSource createDynamicTableSource(Context context) {
        return new TransactionTableSource();
    }

    @Override
    public String factoryIdentifier() {
        return "transaction";
    }

    @Override
    public Set<ConfigOption<?>> requiredOptions() {
        return new HashSet<>();
    }

    @Override
    public Set<ConfigOption<?>> optionalOptions() {
        return new HashSet<>();
    }
}


package cin.examples.table.transaction.table;


import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceFunctionProvider;


public class TransactionTableSource implements ScanTableSource {

    @Override
    public ChangelogMode getChangelogMode() {
        return ChangelogMode.insertOnly();
    }

    @Override
    public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
        return SourceFunctionProvider.of(new TransactionTableSourceFunction(),true);
    }

    @Override
    public DynamicTableSource copy() {
        return new TransactionTableSource();
    }

    @Override
    public String asSummaryString() {
        return this.getClass().getSimpleName();
    }
}


package cin.examples.table.transaction.table;

import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.TimestampData;

import java.time.LocalDateTime;
import java.util.Random;


public class TransactionTableSourceFunction implements SourceFunction {


    @Override
    public void run(SourceContext ctx) throws Exception {
        /**
         * 产生几条测试数据,用来测试 cumulate 窗口
         * 数据结构
         * 消费时间   产品名称     消费金额
         */
        GenericRowData row = null;
        Random random = new Random();
        long startTime = System.currentTimeMillis();

        /**
         *
         "    account_id  BIGINT,\n" +
         "    amount      BIGINT,\n" +
         "    transaction_time TIMESTAMP(3),\n" +
         */
        while (true) {
            if((System.currentTimeMillis() - startTime) > 1000) {
                startTime = System.currentTimeMillis();
                row = new GenericRowData(3);
                row.setField(0, Long.valueOf(random.nextInt(5)));
                row.setField(1, Long.valueOf(10));
                row.setField(2, TimestampData.fromLocalDateTime(
                        LocalDateTime.now()));
                ctx.collect(row);
            }
        }
    }

    @Override
    public void cancel() {
    }
}

输出结果示例

+I[2, 2024-09-02T17:03, 10]
+I[1, 2024-09-02T17:03, 10]
+I[0, 2024-09-02T17:03, 10]
+I[2, 2024-09-02T17:04, 110]
+I[1, 2024-09-02T17:04, 130]
+I[4, 2024-09-02T17:04, 110]
+I[3, 2024-09-02T17:04, 140]
+I[0, 2024-09-02T17:04, 110]
+I[2, 2024-09-02T17:05, 80]
+I[1, 2024-09-02T17:05, 110]
+I[4, 2024-09-02T17:05, 150]
+I[0, 2024-09-02T17:05, 130]
+I[3, 2024-09-02T17:05, 130]

maven依赖

<properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <flink.version>1.19.0</flink.version>
        <scala.binary.version>2.12</scala.binary.version>
    </properties>

    <dependencies>
        <!-- Flink core -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- Table ecosystem -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- The following two dependencies are not required to define a SQL job pipeline,
        but only to execute it.

        In particular, here we're forced to use flink-table-planner_${scala.binary.version} instead of
        flink-table-planner-loader, because otherwise we hit this bug https://youtrack.jetbrains.com/issue/IDEA-93855
        when trying to run the examples from within IntelliJ IDEA. This is only relevant to this specific
        examples project, as it's in the same build tree of flink-parent.

        In a real environment, you need flink-table-runtime and flink-table-planner-loader either
        at test scope, for executing tests, or at provided scope, to run the main directly.
         -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-runtime</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- Table connectors and formats -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-files</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-csv</artifactId>
            <version>${flink.version}</version>
        </dependency>

    </dependencies>

  • 17
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值