【Flink SQL】【第四章时间属性】

败给你的黑色幽默丶

已于 2022-03-05 18:12:04 修改

阅读量1.7k

点赞数

分类专栏： # Flink 文章标签： flink sql 大数据

于 2022-01-27 20:21:40 首次发布

本文链接：https://blog.csdn.net/weixin_43589563/article/details/122705393

版权

Flink 专栏收录该内容

24 篇文章 6 订阅

订阅专栏

本章内容对应官网：
https://nightlies.apache.org/flink/flink-docs-release-1.12/dev/table/streaming/time_attributes.html
在这里插入图片描述

1.时间属性介绍

Time attributes can be part of every table schema.
They are defined when creating a table from a CREATE TABLE DDL or a DataStream.
Once a time attribute is defined, it can be referenced as a field and used in time-based operations.
As long as a time attribute is not modified, and is simply forwarded from one part of a query to another, it remains a valid time attribute. Time attributes behave like regular timestamps, and are accessible for calculations.
When used in calculations, time attributes are materialized and act as standard timestamps. However, ordinary timestamps cannot be used in place of, or be converted to, time attributes.

重点：

时间属性可以在用DDL创建Table或者从DataStream转Table的时候指定
时间属性在使用的时候就是一个普通字段，并且是标准时间戳类型
普通的时间戳和时间属性是不一样的

2.处理时间

1.1 DataStream到Table转换

https://www.bilibili.com/video/BV1oF411v79N?p=3
在这里插入图片描述

package No11_FlinkSQL.Time;


import Bean.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Csv;
import org.apache.flink.table.descriptors.FileSystem;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;

import java.text.SimpleDateFormat;

import static org.apache.flink.table.api.Expressions.$;

/**
 *  流转换表的时候引入处理时间
 */
public class FlinkSQL_ProcessTime_StreamToTable {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);


        DataStreamSource<String> sourceDS = env.readTextFile("E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort.txt");
        SingleOutputStreamOperator<WaterSensor> sensorDS = sourceDS.map(new MapFunction<String, WaterSensor>() {
            @Override
            public WaterSensor map(String s) throws Exception {
                String[] split = s.split(",");
                return new WaterSensor(split[0], Long.parseLong(split[1]), Integer.parseInt(split[2]));
            }
        });

//引入时间语义
        Table table = tableEnv.fromDataStream(sensorDS,$("id"),$("ts"),$("vc"),$("pt").proctime());

        table.printSchema();
        //root
        // |-- id: STRING
        // |-- ts: BIGINT
        // |-- vc: INT
        // |-- pt: TIMESTAMP(3) *PROCTIME*

        DataStream<Row> res = tableEnv.toAppendStream(table, Row.class);

        res.print();

        env.execute();
    }
}

1.2 在创建表的DDL中定义

https://www.bilibili.com/video/BV1oF411v79N?p=4&spm_id_from=pageDriver

在这里插入图片描述

package No11_FlinkSQL.Time;


import Bean.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import static org.apache.flink.table.api.Expressions.$;

/**
 *  流转换表的时候引入处理时间
 */
public class FlinkSQL_ProcessTime_DDL {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);


        DataStreamSource<String> sourceDS = env.readTextFile("E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort.txt");

        tableEnv.executeSql(
                "create table sensor(" +
                          "id String," +
                          "ts bigint," +
                          "vc int," +
                          "pt_time as proctime())" +
                    "with(" +
                    "'connector' = 'filesystem'," +
                    "'path' = 'E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort2.txt'," +
                    "'format' = 'csv')" );


        TableResult tableResult = tableEnv.executeSql("select * from sensor where id = 'ws_001'");

        Table table = tableEnv.sqlQuery("select * from sensor where id = 'ws_001'");
        table.printSchema();
        //root
        // |-- id: STRING
        // |-- ts: BIGINT
        // |-- vc: INT
        // |-- pt: TIMESTAMP(3) *PROCTIME*
        tableResult.print();

        env.execute();
    }
}

3.事件时间

1.1 DataStream到Table转换

When converting a DataStream to a table, an event time attribute can be defined with the .rowtime property during schema definition.
Timestamps and watermarks must have already been assigned in the DataStream being converted.
There are two ways of defining the time attribute when converting a DataStream into a Table. Depending on whether the specified .rowtime field name exists in the schema of the DataStream, the timestamp is either (1) appended as a new column, or it (2) replaces an existing column.

In either case, the event time timestamp field will hold the value of the DataStream event time timestamp.

// Option 1:

// extract timestamp and assign watermarks based on knowledge of the stream
DataStream<Tuple2<String, String>> stream = inputStream.assignTimestampsAndWatermarks(...);

// declare an additional logical field as an event time attribute
Table table = tEnv.fromDataStream(stream, $("user_name"), $("data"), $("user_action_time").rowtime());


// Option 2:

// extract timestamp from first field, and assign watermarks based on knowledge of the stream
DataStream<Tuple3<Long, String, String>> stream = inputStream.assignTimestampsAndWatermarks(...);

// the first field has been used for timestamp extraction, and is no longer necessary
// replace first field with a logical event time attribute
Table table = tEnv.fromDataStream(stream, $("user_action_time").rowtime(), $("user_name"), $("data"));

// Usage:

WindowedTable windowedTable = table.window(Tumble
       .over(lit(10).minutes())
       .on($("user_action_time"))
       .as("userActionWindow"));

package No11_FlinkSQL.Time;


import Bean.WaterSensor;
import org.apache.flink.api.common.eventtime.*;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import java.time.Duration;

import static org.apache.flink.table.api.Expressions.$;

/**
 *  流转换表的时候引入处理时间
 */
public class FlinkSQL_EventTime_StreamToTable {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        DataStreamSource<String> sourceDS = env.readTextFile("D:\\IdeaProjects\\bigdata\\flink\\src\\main\\resources\\sensort.txt");


        //todo 流转表的时候引入事件时间 必须先在流上提取时间戳和waterMark
        WatermarkStrategy<WaterSensor> waterSensorWatermarkStrategy = WatermarkStrategy.<WaterSensor>forBoundedOutOfOrderness(Duration.ofSeconds(2))
                .withTimestampAssigner(new SerializableTimestampAssigner<WaterSensor>() {
                    @Override
                    public long extractTimestamp(WaterSensor waterSensor, long l) {
                        return waterSensor.getTs() * 1000;
                    }
                });

        SingleOutputStreamOperator<WaterSensor> sensorDS = sourceDS.map(new MapFunction<String, WaterSensor>() {
            @Override
            public WaterSensor map(String s) throws Exception {
                String[] split = s.split(",");
                return new WaterSensor(split[0], Long.parseLong(split[1]), Integer.parseInt(split[2]));
            }
        }).assignTimestampsAndWatermarks(waterSensorWatermarkStrategy);



        Table table = tableEnv.fromDataStream(sensorDS,
                $("id"),
                $("ts"),
                $("vc"),
                $("et").rowtime()); //会用提取的时间作为该字段


        table.printSchema();

        TableResult tableResult = tableEnv.executeSql("select * from " + table + " where id = 'ws_001'");

        tableResult.print();

        env.execute();
    }
}

1.2在创建表的DDL中定义

CREATE TABLE user_actions (
  user_name STRING,
  data STRING,
  user_action_time TIMESTAMP(3), --时间字段，是TIMESTAMP(3)类型的
  -- 在时间字段的基础上定义watermark
  WATERMARK FOR user_action_time AS user_action_time - INTERVAL '5' SECOND
) WITH (
  ...
);

SELECT TUMBLE_START(user_action_time, INTERVAL '10' MINUTE), COUNT(DISTINCT user_name)
FROM user_actions
GROUP BY TUMBLE(user_action_time, INTERVAL '10' MINUTE);

package No11_FlinkSQL.Time;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

public class FlinkSQL_EventTimeDDL {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        tableEnv.executeSql(" CREATE TABLE source_sensor (\n" +
                "                id String,\n" +
                "                ts bigint,\n" +
                "                vc int,\n" +
                "                rt as to_timestamp(from_unixtime(ts,'yyyy-MM-dd HH:mm:ss'))," +
                "                WATERMARK FOR rt AS rt - INTERVAL '5' SECOND\n" +
                ") WITH (\n" +
                "'connector' = 'filesystem'," +
                "'path' = 'E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort.txt'," +
                "'format' = 'csv')" );

        Table source_sensor = tableEnv.from("source_sensor");
        source_sensor.printSchema();

        DataStream<Row> rowDataStream = tableEnv.toAppendStream(source_sensor, Row.class);
        rowDataStream.print();

        env.execute();


    }
}

败给你的黑色幽默丶

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
【Flink SQL】【第四章时间属性】

本章内容对应官网：https://nightlies.apache.org/flink/flink-docs-release-1.12/dev/table/streaming/time_attributes.html1.时间属性介绍Time attributes can be part of every table schema.They are defined when creating a table from a CREATE TABLE DDL or a DataStream.Once
复制链接

扫一扫