Flink系列--cumulate累计窗口的使用

背景:

  一个简单的场景,我们要统计数据通过flink一天的入库数据量,但是我想每10s中看一下当前已经入库的数据量。

  flink cumulate累计窗口就是解决类似问题的,在数据大屏场景应用广泛

tips:窗口触发输出有watermark推进,一种时数据处理时间,也可以是事件事件

demo

模拟数据输入,然后通过cumulate统计1分钟累计销售额,每10s输出结果

package com.test.flink.table.cumulate.producer.consumer;

import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;

/**
 * 场景示例:
 * 1、每天统计实时收入,做实时大屏统计, 每秒更新结果
 * 延伸场景:
 * 1、23:00 ~ 第二天 01:00  offset场景
 * 2、其他聚合场景
 */
public class TestCumulate {


    public static void main(String[] args) {
        EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
        TableEnvironment tEnv = TableEnvironment.create(settings);
        tEnv.getConfig().addJobParameter("parallelism.default","1");

//        DataTypes.TIMESTAMP(3)
        tEnv.executeSql("CREATE TABLE consumer (\n" +
                "    consumerTimestamp  TIMESTAMP(3),\n" +
                "    product      STRING,\n" +
                "    price DOUBLE" +
                ",\n" +
                "    WATERMARK FOR consumerTimestamp AS consumerTimestamp - INTERVAL '0' SECOND\n" +
                ") WITH (\n" +
                "    'connector' = 'consumer'" +
                ")");



        tEnv.executeSql("CREATE TABLE print1 (\n" +
                "    window_start  TIMESTAMP(3),\n" +
                "    window_end  TIMESTAMP(3),\n" +
                "    sumPrice     Double ,\n" +
                "    total BIGINT" +
                ") WITH (\n" +
                "   'connector'  = 'print',\n" +
                "   'sink.parallelism'='1'\n" +
                ")");

        tEnv.executeSql("insert into print1 " +
                "select " +
                " window_start," +
                " window_end," +
                //sum统计值,truncate 把42.01323 转成 42.0 cast转成bigint 转成42
//                "CAST(TRUNCATE(sum(price),2) as BIGINT )as sumPrice," +
                "TRUNCATE(sum(price),2) as sumPrice," +
                "count(product) as total " +
                "from TABLE(CUMULATE(" +
                "TABLE consumer," +
                "DESCRIPTOR(consumerTimestamp)," +
                "INTERVAL '10' SECOND," +
                "INTERVAL '1' MINUTES" +
                "))"
               + " group by window_start,window_end"
        );


//
//        tEnv.executeSql("CREATE TABLE print (\n" +
//                "    window_start  BIGINT,\n" +
//                "    window_end  BIGINT,\n" +
//                "    sumPrice     DOUBLE ,\n" +
//                "    total BIGINT" +
//                ") WITH (\n" +
//                "   'connector'  = 'print',\n" +
//                "   'sink.parallelism'='1'\n" +
//                ")");
//
//
//
//        tEnv.executeSql("insert into print " +
//                "select " +
//                "UNIX_TIMESTAMP(CAST(window_start AS STRING)) * 1000 as window_start," +
//                "UNIX_TIMESTAMP(CAST(window_end AS STRING)) * 1000 as window_end," +
//                "TRUNCATE(sum(price),0) as sumPrice," +
//                "count(product) as total " +
//                "from TABLE(CUMULATE(" +
//                "TABLE consumer," +
//                "DESCRIPTOR(consumerTimestamp)," +
//                "INTERVAL '10' SECOND," +
//                "INTERVAL '1' MINUTES" +
//                ")) group by window_start,window_end");
    }
}

数据源实现

import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.factories.DynamicTableSourceFactory;

import java.util.HashSet;
import java.util.Set;


public class ConsumerDataGenFactory implements DynamicTableSourceFactory {
    @Override
    public DynamicTableSource createDynamicTableSource(Context context) {
        return new ConsumerDataTableSource();
    }

    @Override
    public String factoryIdentifier() {
        return "consumer";
    }

    @Override
    public Set<ConfigOption<?>> requiredOptions() {
        return new HashSet<>();
    }

    @Override
    public Set<ConfigOption<?>> optionalOptions() {
        return new HashSet<>();
    }
}

import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.StringData;
import org.apache.flink.table.data.TimestampData;

import java.time.LocalDateTime;
import java.util.Random;


public class ConsumerDataSourceFunction implements SourceFunction {

    private final String[] product = new String[]{
            "A", "B", "C", "D", "E"
    };

    private final Double[] price = new Double[]{
            10.01d,20.01d,30.01d,40.01d,50.01d
    };

    @Override
    public void run(SourceContext ctx) throws Exception {
        /**
         * 产生几条测试数据,用来测试 cumulate 窗口
         * 数据结构
         * 消费时间   产品名称     消费金额
          */
        GenericRowData row = null;
        Random random = new Random();
        long startTime = System.currentTimeMillis();
        while (true) {
            if((System.currentTimeMillis() - startTime) > 1000) {
                startTime = System.currentTimeMillis();
                row = new GenericRowData(3);
                row.setField(0, TimestampData.fromLocalDateTime(
                        LocalDateTime.now()));
                row.setField(1, StringData.fromString(product[random.nextInt(product.length)]));
                row.setField(2, price[random.nextInt(price.length)]);
                ctx.collect(row);
            }
        }
    }

    @Override
    public void cancel() {
    }
}


import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceFunctionProvider;


public class ConsumerDataTableSource implements ScanTableSource {

    @Override
    public ChangelogMode getChangelogMode() {
        return ChangelogMode.insertOnly();
    }

    @Override
    public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
        return SourceFunctionProvider.of(new ConsumerDataSourceFunction(),true);
    }

    @Override
    public DynamicTableSource copy() {
        return new ConsumerDataTableSource();
    }

    @Override
    public String asSummaryString() {
        return this.getClass().getSimpleName();
    }
}

maven依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>FlinkTableApiTest</artifactId>
        <groupId>org.example</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>cumulate</artifactId>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
                <dependency>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-simple</artifactId>
                </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-api</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java</artifactId>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-base</artifactId>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.12</artifactId>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-state-processor-api</artifactId>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
        </dependency>

    </dependencies>
</project>

spi

  • 5
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值