flink sql表查询详解
1、高阶聚合
group by cube(维度1, 维度2, 维度3)
group by grouping sets( (维度1,维度2),(维度1,维度3),() )
group by rollup(省,市,区)
语法示例:
select
privince,
city,
region,
count(distinct uid) as u_cnt
from t
group by cube(province,city,region)
select
privince,
city,
region,
count(distinct uid) as u_cnt
from t
group by rollup(province,city,region)
select
privince,
city,
region,
count(distinct uid) as u_cnt
from t
group by grouping sets( (province,city), (province,city,region) )
2、时间窗口 TVF(表值函数)
flink从1.13开始,提供了时间窗口聚合计算的TVF语法。
表值函数的使用约束:
-
(1)在窗口上做分组聚合,必须带上window_start 和 window_end 作为分组的key;
-
(2)在窗口上做topn计算,必须带上window_start 和 window_end 作为partition的key;
-
(3)带条件的join,必须包含2个输入表的window_start 和 window_end 等值条件。
select
…
from table(
tumble (table t ,descriptor(rt),interval ‘10’ minutes)
)
(1) 支持的时间窗口类型
1、滚动窗口(Tumble Windows)
TUMBLE (TABLE t_action,descriptor(时间属性字段),INTERVAL '10' SECOND[ 窗口长度 ] )
2、滑动窗口(Hop Windows)
HOP (TABLE t_action,descriptor(时间属性字段),INTERVAL '5' SECONDS[ 滑动步长 ] , INTERVAL '10' SECOND[ 窗口长度 ] )
3、累计窗口(Cumulate Windows)
CUMULATE (TABLE t_action,descriptor(时间属性字段),INTERVAL '5' SECONDS[ 更新最大步长 ] , INTERVAL '10' SECOND[ 窗口最大长度 ] )
4、会话窗口(Session Windows)
暂不支持!
(2) 语法示例
select
window_start,
window_end,
channel,
count(distinct guid) as uv
from table (
tumble(table t_applog,descriptor(rt),interval '5' minute ) --滚动窗口
)
group by window_start,window_end,channel
3、窗口topn
-- bidtime,price,item,supplier_id
2020-04-15 08:05:00.000,4.00,C,supplier1
2020-04-15 08:07:00.000,2.00,A,supplier1
2020-04-15 08:09:00.000,5.00,D,supplier2
2020-04-15 08:11:00.000,3.00,B,supplier2
2020-04-15 08:09:00.000,5.00,D,supplier3
2020-04-15 08:11:00.000,6.00,B,supplier3
2020-04-15 08:11:00.000,6.00,B,supplier3
/**
* 10分钟滚动窗口中的交易金额最大的前2笔订单
*/
public class _02_Window_Topn_V2 {
public static void main(String[] args) {
// 创建表的执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
// 从kafka中读取数据
String sourceTable = "CREATE TABLE source_table (
" +
" bidtime string ,
" +
" `price` double,
" +
" `item` STRING,
" +
" `supplier_id` STRING,
" +
" `rt` as cast( bidtime as timestamp(3) ),
" +
" watermark for rt as rt - interval '5' second
" +
") WITH (
" +
" 'connector' = 'kafka',
" +
" 'topic' = 'topn1',
" +
" 'properties.bootstrap.servers' = 'hadoop01:9092',
" +
" 'properties.group.id' = 'testGroup',
" +
" 'scan.startup.mode' = 'earliest-offset',
" +
" 'format' = 'csv'
" +
")";
tenv.executeSql(sourceTable);
// 10分钟滚动窗口中的交易金额最大的前2笔订单
tenv.executeSql("select
" +
" *
" +
"from(
" +
" select window_start,window_end,
" +
" bidtime,
" +
" price,
" +
" item,
" +
" supplier_id,
" +
" row_number() over(partition by window_start,window_end order by price desc ) as rn
" +
" from table(
" +
" tumble(table source_table,descriptor(rt),interval '10' minute)
" +
" )
" +
") t1 where rn <= 2 ").print();
}
}
## 结果如下
+----+-------------------------+-------------------------+-------------------------+-------+---------+--------------+-------+
| op | window_start | window_end | bidtime | price | item | supplier_id | rn |
+----&#