详解 Flink Table API 和 Flink SQL 之窗口操作

时间语义,要配合窗口操作才能发挥作用

一、Group Window

分组窗口(Group Windows)会根据时间或行计数间隔,将行聚合到有限的组(Group)中,并对每个组的数据执行一次聚合函数。主要分为滚动窗口、滑动窗口和会话窗口

public class TestGroupWindow {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //开启事件时间语义
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        
        DataStream<String> inputStream = env.readTextFile("./sensor.txt");
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        })
            //提取事件时间戳和设置watermark
            .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<SensorReading>(Time.seconds(2)) {
            @Override
            public long extractTimestamp(SensorReading element) {
                return element.getTimestamp() * 1000L;
            }
        });
        
        Table sensorTable = tableEnv.fromDataStream(dataStream, "id, timestamp as ts, temperature as temp, rt.rowtime");
        
        tableEnv.createTemporaryView("sensor", sensorTable);
        
        //分组窗口操作
        //1. Table API
        //1.1 定义滚动窗口(时间或计数)
        Table resultTable = sensorTable.window(Tumble.over("10.minutes").on("rt").as("tw"))
            .groupBy("id, tw")
            .select("id, id.count as cnt, temp.avg as avgTemp, tw.start, tw.end");

        //sensorTable.window(Tumble.over("10.rows").on("rt").as("trw")); //滚动计数窗口
        
        //1.2 定义滑动窗口(时间或计数)
	//sensorTable.window(Slide.over("10.minutes").every("5.minutes").on("rt").as("sw"));
	//sensorTable.window(Slide.over("10.rows").every("5.rows").on("rt").as("srw"));
        
        //1.3 定义会话窗口
        //sensorTable.window(Session.withGap("10.minutes").on("rt").as("sew"));
        
        //2. SQL
        //2.1 定义滚动窗口(时间或计数)
        /*
        	TUMBLE(time_attr, interval):定义一个滚动窗口,第一个参数是时间字段,第二个参数是窗口长度
        */
        String sql = "select id, count(id) as cnt, avg(temp) as avgTemp, tumble_end(rt, interval '10' second) as wEnd from sensor group by id, tumble(rt, interval '10' second)";
        Table resultSqlTable = tableEnv.sqlQuery(sql);
        
        //2.2 定义滑动窗口(时间或计数)
        /*
        	HOP(time_attr, interval, interval):定义一个滑动窗口,第一个参数是时间字段,第二个参数是窗口滑动步长,第三个是窗口长度
        */
        //String sql = "select id, count(id) as cnt, avg(temp) as avgTemp, hop_end(rt, interval '2' second, interval '10' second) as wEnd from sensor group by id, hop(rt, interval '2' second, interval '10' second)";
        //Table resultSqlTable = tableEnv.sqlQuery(sql);
        
        //2.3 定义会话窗口
        /*
        	SESSION(time_attr, interval):定义一个会话窗口,第一个参数是时间字段,第二个参数是窗口间隔
        */
        //String sql = "select id, count(id) as cnt, avg(temp) as avgTemp, session_end(rt, interval '10' second) as wEnd from sensor group by id, session(rt, interval '10' second)";
        //Table resultSqlTable = tableEnv.sqlQuery(sql);
        
        //基于窗口的集合统计可以转换为追加流输出
        tableEnv.toAppendStream(resultTable, Row.class).print("result");
        tableEnv.toRetractStream(resultSqlTable, Row.class).print("sql");
        
        env.execute();
    }
}

二、Over Window

开窗窗口(Over Window)是标准 SQL 中已有的(over 子句),可以在查询的 SELECT 子句中定义,会针对每个输入行,计算相邻行范围内的聚合

public class TestOverWindow {
    public static void main(String[] args) throw Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        
        DataStream<String> inputStream = env.readTextFile("./sensor.txt");
        DataStream<SensorReading> dataStream = InputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<SensorReading>(Time.seconds(2)) {
            @Override
            public long extractTimestamp(SensorReading element) {
                return element.getTimestamp() * 1000L;
            }
        });
        
        Table sensorTable = tableEnv.fromDataStream(dataStream, "id, timestamp as ts, temperature as temp, rt.rowtime");
        
        tableEnv.createTemporaryView("sensor", dataTable);
        
        //开窗窗口
        //1. TableAPI
        /*
        	基本语法:table.window(Over[.partitionBy("field")].orderBy("rowtime/proctime").preceding(...).as("alias"))
        	窗口范围的指定:preceding(...)
        		1.无界窗口:preceding(UNBOUNDED_RANGE|UNBOUNDED_ROW),表示从最开始的时间|行到当前数据
        		2.有界窗口:preceding("1.minutes|10.rows"),表示从当前时间的前一分钟到当前时间数据|从当前行的前10行到当前数据
        */
        Table overTable = sensorTable.window(Over.partitionBy("id").orderBy("rt").preceding("2.rows").as("ow")).select("id, rt, id.count over ow, temp.avg over ow");
        
        //2. SQL
        /*
        	基本语法:
        		1. count()/sum()... over(partition by field order by rowtime|proctime range|rows between interval '1' minute|2 preceding and current row) from table
        		2. count() over ow/sum() over ow... from table window ow as (partition by field order by rowtime|proctime range|rows between interval '1' minute|2 preceding and current row)
        */
        Table overSqlTable = tableEnv.sqlQuery("select id, rt, count(id) over ow, avg(temp) over ow from sensor window ow as (partition by id order by rt rows between 2 preceding and current row)");
        
        tableEnv.toAppendStream(overTable, Row.class).print("result");
        tableEnv.toRetractStream(overSqlTable, Row.class).print("sql");
        
        env.execute();
    }
}
  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值