package com.ygy.gmall.realtime.test3;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class Flink_SQL_join {
public static void main(String[] args) throws Exception {
//环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//创建tableEnv
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//展现测试流
tableEnv.executeSql("CREATE TABLE kafka_source_ygy_str_test (\n" +
" `str` string,\n" +
" proctime as PROCTIME()\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'ygy_str_test',\n" +
" 'properties.bootstrap.servers' = '***:9092'\n" +
" 'properties.group.id' = 'ygy_str_test_group',\n" +
" 'scan.startup.mode' = 'latest-offset',\n" +
" 'format' = 'csv',\n" +
" 'csv.ignore-parse-errors' = 'true',\n" +
" 'csv.field-delimiter' = '\\t'\n" +
")");
//mysql创建一个a对应大小写的表,来进行join操作
tableEnv.executeSql("CREATE TABLE dim_ygy_str_test (\n" +
" `a_str` string,\n" +
" `b_str` string\n" +
") WITH (\n" +
" 'connector' = 'jdbc',\n" +
" 'url' = 'jdbc:mysql://***:3306/adverdb',\n" +
" 'table-name' = 'dim_ygy_str_test',\n" +
" 'driver' = 'com.mysql.jdbc.Driver',\n" +
" 'username' = '***',\n" +
" 'password' = '***',\n" +
" 'lookup.cache.max-rows' = '5000',\n" +
" 'lookup.cache.ttl' = '5min'\n" +
")");
//关联
tableEnv.executeSql(" create view dw_ygy_str as\n" +
"select\n" +
" str," +
" proctime,\n" +
" b_str\n" +
"from\n" +
" kafka_source_ygy_str_test a\n" +
" LEFT JOIN dim_ygy_str_test FOR SYSTEM_TIME AS OF a.proctime as b on a.str = b.a_str");
//输出数据
tableEnv.sqlQuery("select str,b_str,CONVERT_TZ(\n" +
" DATE_FORMAT(\n" +
" tumble_start(proctime, INTERVAL '5' second),\n" +
" 'yyyy-MM-dd HH:mm:ss'\n" +
" ),\n" +
" 'UTC',\n" +
" 'GMT+8:00'\n" +
") as start_time,\n" +
"CONVERT_TZ(\n" +
" DATE_FORMAT(\n" +
" tumble_end(proctime, INTERVAL '5' second),\n" +
" 'yyyy-MM-dd HH:mm:ss'\n" +
" ),\n" +
" 'UTC',\n" +
" 'GMT+8:00'\n" +
") as end_time,count(1) as ct from dw_ygy_str group by str,b_str,tumble(proctime, INTERVAL '5' second) ").execute().print();
env.execute();
}
}
维表数据
a_str b_str
a A
b B
c C
数据输出
维表关联,flink会将mysql的数据进行缓存,他会根据配置多久更新一次,缓存多少数据。
+----+--------------------------------+--------------------------------+--------------------------------+--------------------------------+----------------------+
| op | str | b_str | start_time | end_time | ct |
+----+--------------------------------+--------------------------------+--------------------------------+--------------------------------+----------------------+
| +I | a | A | 2021-06-18 15:02:40 | 2021-06-18 15:02:45 | 1 |
| +I | a | A | 2021-06-18 15:02:45 | 2021-06-18 15:02:50 | 3 |
| +I | aa | (NULL) | 2021-06-18 15:02:50 | 2021-06-18 15:02:55 | 1 |
| +I | a | A | 2021-06-18 15:02:50 | 2021-06-18 15:02:55 | 3 |