近期学习Flink,Flink抽象出了一种使用SQL来处理数据的模型,让使用者非常容易上手,下面的FlinkSQL语句只使用到了Flink内置的datagen和print两个connector,非常适合初学者入门.
-- 输入表
CREATE TABLE source_table (
sku_id STRING,
price BIGINT,
row_time AS cast(CURRENT_TIMESTAMP as timestamp_LTZ(3)),
WATERMARK FOR row_time AS row_time - INTERVAL '5' SECOND
) WITH (
'connector' = 'datagen', --数据自动生成
'rows-per-second' = '1', --每秒一行
'fields.sku_id.length' = '1', --长度
'fields.price.min' = '1', --最小
'fields.price.max' = '1000000' --最大
);
-- 输出表
CREATE TABLE sink_table (
window_start TIMESTAMP(3), --窗口开始
window_end TIMESTAMP(3), --窗口结束
sku_id STRING,
count_result BIGINT,
sum_result BIGINT,
avg_result DOUBLE,
min_result BIGINT,
max_result BIGINT,
PRIMARY KEY (`sku_id`) NOT ENFORCED
) WITH (
'connector' = 'print' --打印在stdout中
);
-- 输出表,使用的是滑动窗口函数 CUMULATE(表名,窗口参数,滑动步长,窗口大小)
insert into sink_table
select
window_start,
window_end,
sku_id,
count(*) as count_result,
sum(price) as sum_result,
avg(price) as avg_result,
min(price) as min_result,
max(price) as max_result
from TABLE(CUMULATE(TABLE source_table, DESCRIPTOR(row_time), INTERVAL '5' SECOND, INTERVAL '1' DAY)) group by sku_id, window_start,window_end;
Flink会把source_table作为输入,sink_table作为输出,insert into 语句定义了具体的算法和窗口规则,这样就实现了输入-计算-输出的模式.