①滚动窗口
val resultTable = tableFromStream.window(Tumble.over(10.seconds).on("ts") as 'tw)
.groupBy('tw,'id)
.select('id,'id.count,'temp.avg)
val resultSqlTable = tabEnv.sqlQuery(
"""
|select id
|,count(*) as cnt
|,avg(temp) as avg_temp
|,tumble_end(ts,interval '10' second)
|from input_stream_table
|group by tumble(ts,interval '10' second),id
|""".stripMargin)
②滑动窗口
val resultSlideTable = tableFromStream.window(Slide.over(10.seconds).every(5.seconds).on('ts) as 'sw)
.groupBy('sw,'id)
.select('id,'id.count,'temp.avg)
val resultSqlSlideTable = tabEnv.sqlQuery(
"""
|select id
|,count(*) as cnt
|,avg(temp) as avg_temp
|,hop_end(ts,interval '5' second,interval '10' second)
|from input_stream_table
|group by id,hop(ts,interval '5' second,interval '10' second)
|""".stripMargin)
③会话窗口
val resultSessionTable =
tableFromStream.window(Session.withGap(10.seconds).on('ts) as 'sew)
.groupBy('sew,'id)
.select('id,'id.count,'temp.avg)
val resultSqlSessionTable = tabEnv.sqlQuery(
"""
|select id
|,count(*) as cnt
|,avg(temp) as avg_temp
|,session_end(ts,interval '10' second)
|from input_stream_table
|group by session(ts,interval '10' second),id
|""".stripMargin)
另外还有一些辅助函数,可以用来选择 Group Window 的开始和结束时间戳,以及时间
属性。
这里只写 TUMBLE_,滑动和会话窗口是类似的(HOP_,SESSION_*)。
TUMBLE_START(time_attr, interval)
TUMBLE_END(time_attr, interval)
TUMBLE_ROWTIME(time_attr, interval)
TUMBLE_PROCTIME(time_attr, interval)
Over窗口
无界over window
// 无界的事件时间 over window
.window(Over partitionBy 'a orderBy 'rowtime preceding UNBOUNDED_RANGE as 'w)
// 无界的事件时间 Row-count over window
.window(Over partitionBy 'a orderBy 'rowtime preceding UNBOUNDED_ROW as 'w)
有界over window
// 有界的事件时间 over window
.window(Over partitionBy 'a orderBy 'rowtime preceding 1.minutes as 'w)
// 有界的事件时间 Row-count over window
.window(Over partitionBy 'a orderBy 'rowtime preceding 10.rows as 'w)
Table api实现
val resultTable = table
.window(Over.partitionBy(“id”).orderBy(“rt”).preceding(3.rows) as 'ow)
.select('id,'id.count over 'ow,'temp.avg over 'ow)
Flink 实现
val resultSqlTable = tabEnv.sqlQuery(
“”"
|select id
|,count(id)
|,avg(temp)
|from input_table
|group by id
|window ow as (partition by id order by rt rows between 2 PRECEDING and current row)
|""".stripMargin)