背景:
一个简单的场景,我们要统计数据通过flink一天的入库数据量,但是我想每10s中看一下当前已经入库的数据量。
flink cumulate累计窗口就是解决类似问题的,在数据大屏场景应用广泛
tips:窗口触发输出有watermark推进,一种时数据处理时间,也可以是事件事件
demo
模拟数据输入,然后通过cumulate统计1分钟累计销售额,每10s输出结果
package com.test.flink.table.cumulate.producer.consumer;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
/**
* 场景示例:
* 1、每天统计实时收入,做实时大屏统计, 每秒更新结果
* 延伸场景:
* 1、23:00 ~ 第二天 01:00 offset场景
* 2、其他聚合场景
*/
public class TestCumulate {
public static void main(String[] args) {
EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
TableEnvironment tEnv = TableEnvironment.create(settings);
tEnv.getConfig().addJobParameter("parallelism.default","1");
// DataTypes.TIMESTAMP(3)
tEnv.executeSql("CREATE TABLE consumer (\n" +
" consumerTimestamp TIMESTAMP(3),\n" +
" product STRING,\n" +
" price DOUBLE" +
",\n" +
" WATERMARK FOR consumerTimestamp AS consumerTimestamp - INTERVAL '0' SECOND\n" +
") WITH (\n" +
" 'connector' = 'consumer'" +
")");
tEnv.executeSql("CREATE TABLE print1 (\n" +
" window_start TIMESTAMP(3),\n" +
" window_end TIMESTAMP(3),\n" +
" sumPrice Double ,\n" +
" total BIGINT" +
") WITH (\n" +
" 'connector' = 'print',\n" +
" 'sink.parallelism'='1'\n" +
")");
tEnv.executeSql("insert into print1 " +
"select " +
" window_start," +
" window_end," +
//sum统计值,truncate 把42.01323 转成 42.0 cast转成bigint 转成42
// "CAST(TRUNCATE(sum(price),2) as BIGINT )as sumPrice," +
"TRUNCATE(sum(price),2) as sumPrice," +
"count(product) as total " +
"from TABLE(CUMULATE(" +
"TABLE consumer," +
"DESCRIPTOR(consumerTimestamp)," +
"INTERVAL '10' SECOND," +
"INTERVAL '1' MINUTES" +
"))"
+ " group by window_start,window_end"
);
//
// tEnv.executeSql("CREATE TABLE print (\n" +
// " window_start BIGINT,\n" +
// " window_end BIGINT,\n" +
// " sumPrice DOUBLE ,\n" +
// " total BIGINT" +
// ") WITH (\n" +
// " 'connector' = 'print',\n" +
// " 'sink.parallelism'='1'\n" +
// ")");
//
//
//
// tEnv.executeSql("insert into print " +
// "select " +
// "UNIX_TIMESTAMP(CAST(window_start AS STRING)) * 1000 as window_start," +
// "UNIX_TIMESTAMP(CAST(window_end AS STRING)) * 1000 as window_end," +
// "TRUNCATE(sum(price),0) as sumPrice," +
// "count(product) as total " +
// "from TABLE(CUMULATE(" +
// "TABLE consumer," +
// "DESCRIPTOR(consumerTimestamp)," +
// "INTERVAL '10' SECOND," +
// "INTERVAL '1' MINUTES" +
// ")) group by window_start,window_end");
}
}
数据源实现
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.factories.DynamicTableSourceFactory;
import java.util.HashSet;
import java.util.Set;
public class ConsumerDataGenFactory implements DynamicTableSourceFactory {
@Override
public DynamicTableSource createDynamicTableSource(Context context) {
return new ConsumerDataTableSource();
}
@Override
public String factoryIdentifier() {
return "consumer";
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
return new HashSet<>();
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
return new HashSet<>();
}
}
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.StringData;
import org.apache.flink.table.data.TimestampData;
import java.time.LocalDateTime;
import java.util.Random;
public class ConsumerDataSourceFunction implements SourceFunction {
private final String[] product = new String[]{
"A", "B", "C", "D", "E"
};
private final Double[] price = new Double[]{
10.01d,20.01d,30.01d,40.01d,50.01d
};
@Override
public void run(SourceContext ctx) throws Exception {
/**
* 产生几条测试数据,用来测试 cumulate 窗口
* 数据结构
* 消费时间 产品名称 消费金额
*/
GenericRowData row = null;
Random random = new Random();
long startTime = System.currentTimeMillis();
while (true) {
if((System.currentTimeMillis() - startTime) > 1000) {
startTime = System.currentTimeMillis();
row = new GenericRowData(3);
row.setField(0, TimestampData.fromLocalDateTime(
LocalDateTime.now()));
row.setField(1, StringData.fromString(product[random.nextInt(product.length)]));
row.setField(2, price[random.nextInt(price.length)]);
ctx.collect(row);
}
}
}
@Override
public void cancel() {
}
}
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceFunctionProvider;
public class ConsumerDataTableSource implements ScanTableSource {
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
return SourceFunctionProvider.of(new ConsumerDataSourceFunction(),true);
}
@Override
public DynamicTableSource copy() {
return new ConsumerDataTableSource();
}
@Override
public String asSummaryString() {
return this.getClass().getSimpleName();
}
}
maven依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>FlinkTableApiTest</artifactId>
<groupId>org.example</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>cumulate</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-statebackend-rocksdb</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-base</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-state-processor-api</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
</dependency>
</dependencies>
</project>
spi

1928

被折叠的 条评论
为什么被折叠?



