flink 1.10.1 java版本sql GroupWindow示例(关窗出结果)

本文的基础环境可以参考flink 1.10.1 java版本wordcount演示 (nc + socket)

groupwindow的处理方式是当窗口(不管是时间窗口,还是事件窗口)关闭的时候,才进行一次计算输出。

以事件时间为参考,以groupwindow的滚动窗口的方式,统计窗口范围内的数据,包括数据个数,平均值等。

1. 添加依赖

<dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.11</artifactId>
            <version>1.10.1</version>
</dependency>

不同版本,这里可能需要添加到包不同

2. 程序代码

package com.demo.sql;

import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;


public class FlinkSqlGroupWindow {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        env.setParallelism(1);

        // 1. 读取数据
        DataStream<String> inputStream = env.readTextFile("data/sensor.txt");

        // 2. 转换成POJO
        DataStream<SensorData> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorData(fields[0], new Long(fields[1]), new Double(fields[2]));
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<SensorData>(Time.seconds(2)) {
            @Override
            public long extractTimestamp(SensorData element) {
                return element.getDt() * 1000L;
            }
        });

        // 3. 创建表环境
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 4. 基于流创建一张表
        Table dataTable = tableEnv.fromDataStream(dataStream, "id, dt, temperature, rt.rowtime");

        // dataTable.printSchema();

        tableEnv.createTemporaryView("sensor", dataTable);

        tableEnv.toAppendStream(dataTable, Row.class).print();

        // 5. 窗口操作
        // 5.1 Group Window
        // table API
        Table resultTable = dataTable.window(Tumble.over("10.seconds").on("rt").as("tw"))
                .groupBy("id, tw")
                .select("id, id.count, temperature.avg, tw.end");

        // SQL
        Table resultSqlTable = tableEnv.sqlQuery("select id, count(id) as cnt, avg(temperature) as avgTemp, tumble_end(rt, interval '10' second) " +
                "from sensor group by id, tumble(rt, interval '10' second)");

        dataTable.printSchema();
        tableEnv.toAppendStream(resultTable, Row.class).print("result");
        tableEnv.toRetractStream(resultSqlTable, Row.class).print("sql");

        env.execute();

    }
}

3. 辅助代码SensorData

package com.demo.sql;

public class SensorData {

    private String id;
    private Long dt;
    private Double temperature;

    public SensorData() {
    }

    public SensorData(String id, Long dt, Double temperature) {
        this.id = id;
        this.dt = dt;
        this.temperature = temperature;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public Long getDt() {
        return dt;
    }

    public void setDt(Long dt) {
        this.dt = dt;
    }

    public Double getTemperature() {
        return temperature;
    }

    public void setTemperature(Double temperature) {
        this.temperature = temperature;
    }

    @Override
    public String toString() {
        return "SensorData{" +
                "id='" + id + '\'' +
                ", time=" + dt +
                ", temperature=" + temperature +
                '}';
    }
}

4. 测试数据

sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
sensor_1,1547718207,36.3
sensor_1,1547718209,32.8
sensor_1,1547718212,37.1

5. 执行程序输出结果

root
 |-- id: STRING
 |-- dt: BIGINT
 |-- temperature: DOUBLE
 |-- rt: TIMESTAMP(3) *ROWTIME*

sensor_1,1547718199,35.8,2019-01-17 09:43:19.0
sensor_6,1547718201,15.4,2019-01-17 09:43:21.0
sensor_7,1547718202,6.7,2019-01-17 09:43:22.0
sensor_10,1547718205,38.1,2019-01-17 09:43:25.0
sensor_1,1547718207,36.3,2019-01-17 09:43:27.0
sensor_1,1547718209,32.8,2019-01-17 09:43:29.0
sensor_1,1547718212,37.1,2019-01-17 09:43:32.0

result> sensor_1,1,35.8,2019-01-17 09:43:20.0
sql> (true,sensor_1,1,35.8,2019-01-17 09:43:20.0)
result> sensor_6,1,15.4,2019-01-17 09:43:30.0
sql> (true,sensor_6,1,15.4,2019-01-17 09:43:30.0)
result> sensor_1,2,34.55,2019-01-17 09:43:30.0
sql> (true,sensor_1,2,34.55,2019-01-17 09:43:30.0)
result> sensor_10,1,38.1,2019-01-17 09:43:30.0
sql> (true,sensor_10,1,38.1,2019-01-17 09:43:30.0)
result> sensor_7,1,6.7,2019-01-17 09:43:30.0
sql> (true,sensor_7,1,6.7,2019-01-17 09:43:30.0)
result> sensor_1,1,37.1,2019-01-17 09:43:40.0
sql> (true,sensor_1,1,37.1,2019-01-17 09:43:40.0)

可以看到按id分组后的10秒滚动窗口,窗口结束时间是每个整数10秒。比如sensor_1,1547718207,36.3,2019-01-17 09:43:27.0
sensor_1,1547718209,32.8,2019-01-17 09:43:29.0

这两条数据都的id都是sensor_1,时间窗口结束时间都是2019-01-17 09:43:30.0,所以会进行合并统计,得到记录条数和平均值。

另外可以看出,采用Table API进行窗口统计和sql进行窗口统计的结果是完全一样的。

在统计结果输出时,toAppendStream只支持新增数据方式的统计输出,toRetractStream支持向支持新增和修改的统计输出。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

程序猿20

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值