【Flink SQL】【第四章 时间属性】

本章内容对应官网:
https://nightlies.apache.org/flink/flink-docs-release-1.12/dev/table/streaming/time_attributes.html
在这里插入图片描述

1.时间属性介绍

  • Time attributes can be part of every table schema.
  • They are defined when creating a table from a CREATE TABLE DDL or a DataStream.
  • Once a time attribute is defined, it can be referenced as a field and used in time-based operations.
  • As long as a time attribute is not modified, and is simply forwarded from one part of a query to another, it remains a valid time attribute. Time attributes behave like regular timestamps, and are accessible for calculations.
  • When used in calculations, time attributes are materialized and act as standard timestamps. However, ordinary timestamps cannot be used in place of, or be converted to, time attributes.

重点:

  • 时间属性可以在用DDL创建Table或者从DataStream转Table的时候指定
  • 时间属性在使用的时候就是一个普通字段,并且是标准时间戳类型
  • 普通的时间戳和时间属性是不一样的

2.处理时间

1.1 DataStream到Table转换

https://www.bilibili.com/video/BV1oF411v79N?p=3
在这里插入图片描述

package No11_FlinkSQL.Time;


import Bean.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Csv;
import org.apache.flink.table.descriptors.FileSystem;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;

import java.text.SimpleDateFormat;

import static org.apache.flink.table.api.Expressions.$;

/**
 *  流转换表的时候引入处理时间
 */
public class FlinkSQL_ProcessTime_StreamToTable {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);


        DataStreamSource<String> sourceDS = env.readTextFile("E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort.txt");
        SingleOutputStreamOperator<WaterSensor> sensorDS = sourceDS.map(new MapFunction<String, WaterSensor>() {
            @Override
            public WaterSensor map(String s) throws Exception {
                String[] split = s.split(",");
                return new WaterSensor(split[0], Long.parseLong(split[1]), Integer.parseInt(split[2]));
            }
        });

//引入时间语义
        Table table = tableEnv.fromDataStream(sensorDS,$("id"),$("ts"),$("vc"),$("pt").proctime());

        table.printSchema();
        //root
        // |-- id: STRING
        // |-- ts: BIGINT
        // |-- vc: INT
        // |-- pt: TIMESTAMP(3) *PROCTIME*

        DataStream<Row> res = tableEnv.toAppendStream(table, Row.class);

        res.print();

        env.execute();
    }
}

1.2 在创建表的DDL中定义

https://www.bilibili.com/video/BV1oF411v79N?p=4&spm_id_from=pageDriver

在这里插入图片描述

package No11_FlinkSQL.Time;


import Bean.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import static org.apache.flink.table.api.Expressions.$;

/**
 *  流转换表的时候引入处理时间
 */
public class FlinkSQL_ProcessTime_DDL {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);


        DataStreamSource<String> sourceDS = env.readTextFile("E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort.txt");

        tableEnv.executeSql(
                "create table sensor(" +
                          "id String," +
                          "ts bigint," +
                          "vc int," +
                          "pt_time as proctime())" +
                    "with(" +
                    "'connector' = 'filesystem'," +
                    "'path' = 'E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort2.txt'," +
                    "'format' = 'csv')" );


        TableResult tableResult = tableEnv.executeSql("select * from sensor where id = 'ws_001'");

        Table table = tableEnv.sqlQuery("select * from sensor where id = 'ws_001'");
        table.printSchema();
        //root
        // |-- id: STRING
        // |-- ts: BIGINT
        // |-- vc: INT
        // |-- pt: TIMESTAMP(3) *PROCTIME*
        tableResult.print();

        env.execute();
    }
}

3.事件时间

1.1 DataStream到Table转换

  • When converting a DataStream to a table, an event time attribute can be defined with the .rowtime property during schema definition.

  • Timestamps and watermarks must have already been assigned in the DataStream being converted.

  • There are two ways of defining the time attribute when converting a DataStream into a Table. Depending on whether the specified .rowtime field name exists in the schema of the DataStream, the timestamp is either (1) appended as a new column, or it (2) replaces an existing column.

In either case, the event time timestamp field will hold the value of the DataStream event time timestamp.

// Option 1:

// extract timestamp and assign watermarks based on knowledge of the stream
DataStream<Tuple2<String, String>> stream = inputStream.assignTimestampsAndWatermarks(...);

// declare an additional logical field as an event time attribute
Table table = tEnv.fromDataStream(stream, $("user_name"), $("data"), $("user_action_time").rowtime());


// Option 2:

// extract timestamp from first field, and assign watermarks based on knowledge of the stream
DataStream<Tuple3<Long, String, String>> stream = inputStream.assignTimestampsAndWatermarks(...);

// the first field has been used for timestamp extraction, and is no longer necessary
// replace first field with a logical event time attribute
Table table = tEnv.fromDataStream(stream, $("user_action_time").rowtime(), $("user_name"), $("data"));

// Usage:

WindowedTable windowedTable = table.window(Tumble
       .over(lit(10).minutes())
       .on($("user_action_time"))
       .as("userActionWindow"));
package No11_FlinkSQL.Time;


import Bean.WaterSensor;
import org.apache.flink.api.common.eventtime.*;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import java.time.Duration;

import static org.apache.flink.table.api.Expressions.$;

/**
 *  流转换表的时候引入处理时间
 */
public class FlinkSQL_EventTime_StreamToTable {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        DataStreamSource<String> sourceDS = env.readTextFile("D:\\IdeaProjects\\bigdata\\flink\\src\\main\\resources\\sensort.txt");


        //todo 流转表的时候引入事件时间 必须先在流上提取时间戳和waterMark
        WatermarkStrategy<WaterSensor> waterSensorWatermarkStrategy = WatermarkStrategy.<WaterSensor>forBoundedOutOfOrderness(Duration.ofSeconds(2))
                .withTimestampAssigner(new SerializableTimestampAssigner<WaterSensor>() {
                    @Override
                    public long extractTimestamp(WaterSensor waterSensor, long l) {
                        return waterSensor.getTs() * 1000;
                    }
                });

        SingleOutputStreamOperator<WaterSensor> sensorDS = sourceDS.map(new MapFunction<String, WaterSensor>() {
            @Override
            public WaterSensor map(String s) throws Exception {
                String[] split = s.split(",");
                return new WaterSensor(split[0], Long.parseLong(split[1]), Integer.parseInt(split[2]));
            }
        }).assignTimestampsAndWatermarks(waterSensorWatermarkStrategy);



        Table table = tableEnv.fromDataStream(sensorDS,
                $("id"),
                $("ts"),
                $("vc"),
                $("et").rowtime()); //会用提取的时间作为该字段


        table.printSchema();

        TableResult tableResult = tableEnv.executeSql("select * from " + table + " where id = 'ws_001'");

        tableResult.print();

        env.execute();
    }
}

1.2在创建表的DDL中定义

CREATE TABLE user_actions (
  user_name STRING,
  data STRING,
  user_action_time TIMESTAMP(3), --时间字段,是TIMESTAMP(3)类型的
  -- 在时间字段的基础上定义watermark
  WATERMARK FOR user_action_time AS user_action_time - INTERVAL '5' SECOND
) WITH (
  ...
);

SELECT TUMBLE_START(user_action_time, INTERVAL '10' MINUTE), COUNT(DISTINCT user_name)
FROM user_actions
GROUP BY TUMBLE(user_action_time, INTERVAL '10' MINUTE);
package No11_FlinkSQL.Time;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

public class FlinkSQL_EventTimeDDL {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        tableEnv.executeSql(" CREATE TABLE source_sensor (\n" +
                "                id String,\n" +
                "                ts bigint,\n" +
                "                vc int,\n" +
                "                rt as to_timestamp(from_unixtime(ts,'yyyy-MM-dd HH:mm:ss'))," +
                "                WATERMARK FOR rt AS rt - INTERVAL '5' SECOND\n" +
                ") WITH (\n" +
                "'connector' = 'filesystem'," +
                "'path' = 'E:\\work\\bigdata\\flink\\src\\main\\resources\\sensort.txt'," +
                "'format' = 'csv')" );

        Table source_sensor = tableEnv.from("source_sensor");
        source_sensor.printSchema();

        DataStream<Row> rowDataStream = tableEnv.toAppendStream(source_sensor, Row.class);
        rowDataStream.print();

        env.execute();


    }
}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值