package com.ygy.gmall.realtime.test3;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class Flink_SQL_stream_join {
public static void main(String[] args) throws Exception {
//环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//创建tableEnv
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//测试A流
tableEnv.executeSql("CREATE TABLE kafka_source_ygy_str_test (\n" +
" `str` string,\n" +
" proctime as PROCTIME()\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'ygy_str_test',\n" +
" 'properties.bootstrap.servers' = '***:9092',\n" +
" 'properties.group.id' = 'ygy_str_test_group',\n" +
" 'scan.startup.mode' = 'latest-offset',\n" +
" 'format' = 'csv',\n" +
" 'csv.ignore-parse-errors' = 'true',\n" +
" 'csv.field-delimiter' = '\\t'\n" +
")");
//测试B流
tableEnv.executeSql("CREATE TABLE kafka_source_ygy_str_test2 (\n" +
" `a_str` string,\n" +
" `b_str` string,\n" +
" proctime as PROCTIME()\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'ygy_str_test2',\n" +
" 'properties.bootstrap.servers' = '***:9092,\n" +
" 'properties.group.id' = 'ygy_str_test_group',\n" +
" 'scan.startup.mode' = 'latest-offset',\n" +
" 'format' = 'csv',\n" +
" 'csv.ignore-parse-errors' = 'true',\n" +
" 'csv.field-delimiter' = ','\n" +
")");
//join操作
tableEnv.executeSql(" create view dw_ygy_str as\n" +
"select\n" +
" a.*," +
" b.*\n" +
"from\n" +
" kafka_source_ygy_str_test a\n" +
" JOIN kafka_source_ygy_str_test2 as b on a.str = b.a_str");
//输出
tableEnv.sqlQuery("select * from dw_ygy_str ").execute().print();
env.execute();
}
}
常规联接是最通用的联接类型,其中任何新记录或联接任一侧的更改都是可见的,并且会影响整个联接结果。例如,如果左侧有一条新记录,它将与右侧所有以前和未来的记录连接起来。
保存的数据太大,
左侧输入a 那么就会出现三条数据,A1,A2,A3
+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| op | str | proctime | a_str | b_str | proctime0 |
+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| +I | a | 2021-06-18T09:18:26.300 | a | A1 | 2021-06-18T09:18:26.300 |
| +I | a | 2021-06-18T09:18:28.493 | a | A2 | 2021-06-18T09:18:28.493 |
| +I | a | 2021-06-18T09:18:30.464 | a | A3 | 2021-06-18T09:18:30.464 |
| +I | a | 2021-06-18T09:18:36.768 | a | A1 | 2021-06-18T09:18:36.768 |
| +I | a | 2021-06-18T09:18:36.768 | a | A2 | 2021-06-18T09:18:36.768 |
| +I | a | 2021-06-18T09:18:36.769 | a | A3 | 2021-06-18T09:18:36.769 |
上面的数据是没有撤回的,都是insert,刚开始左侧有数据,右侧匹配上才进行输出数据
下面看left join的情况
tableEnv.executeSql(" create view dw_ygy_str as\n" +
"select\n" +
" a.*," +
" b.*\n" +
"from\n" +
" kafka_source_ygy_str_test a\n" +
" left JOIN kafka_source_ygy_str_test2 as b on a.str = b.a_str");
输出结果
+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| op | str | proctime | a_str | b_str | proctime0 |
+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| +I | a | 2021-06-18T09:32:51.820 | (NULL) | (NULL) | 2021-06-18T09:32:51.820 |
| -D | a | 2021-06-18T09:32:56.936 | (NULL) | (NULL) | 2021-06-18T09:32:56.936 |
| +I | a | 2021-06-18T09:32:56.937 | a | A | 2021-06-18T09:32:56.937 |
左侧有数据,没有匹配上也会先输出数据,在右侧数据到达的时候会有 -D(删除)和-In(添加新的)
右侧数据关联上了,把之前的删掉,然后再插入。
下面看full outer join的情况
tableEnv.executeSql(" create view dw_ygy_str as\n" +
"select\n" +
" a.*," +
" b.*\n" +
"from\n" +
" kafka_source_ygy_str_test a\n" +
" full outerJOIN kafka_source_ygy_str_test2 as b on a.str = b.a_str");
输出结果
+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| op | str | proctime | a_str | b_str | proctime0 |
+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| +I | (NULL) | 2021-06-18T09:38:18.916 | a | A | 2021-06-18T09:38:18.916 |
| -D | (NULL) | 2021-06-18T09:38:23.168 | a | A | 2021-06-18T09:38:23.168 |
| +I | a | 2021-06-18T09:38:23.168 | a | A | 2021-06-18T09:38:23.168 |
除了inner join 其他的都会发生撤回的情况