1.33.Flink CDC案例
1.33.1.官方介绍
1.33.1.1.Table/SQL API的语法
1.33.1.2.DataStream API的用法
1.33.1.3.Building from source
1.33.2.Flink CDC案例
1.33.2.1.pom.xml定义
1.33.2.2.案例1
1.33.2.3.案例2
1.33.2.4.案例3
1.33.Flink CDC案例
1.33.1.官方介绍
Flink CDC连接器是一组用于Apache Flink的source connectors,使用更改数据捕获(CDC)从不同的数据库接收更改。Flink CDC连接器将Debezium集成为捕获数据更改的引擎。这样它就能充分利用Debezium的能力。查看更多关于Debezium的内容。
本自述文件是关于Flink CDC连接器的核心特性的简要介绍。有关完整详细的文档,请参阅文档。
支持的连接器:
支持读取数据库快照,并继续读取binlog,即使发生故障也只进行一次处理。
DataStream API的CDC连接器,用户可以在单个作业中使用多个数据库和表上的更改,而无需部署Debezium和Kafka。
对于表/SQL API,用户可以使用SQL DDL创建CDC源来监视单个表上的更改。
1.33.1.1.Table/SQL API的语法
我们需要几个步骤来使用提供的连接器设置Flink集群。
1.安装Flink 1.12+的Flink集群和Java 8+。
2.下载connector对应的jar。https://github.com/ververica/flink-cdc-connectors/wiki/Downloads
3.将下载的jar包放到FLINK_HOME/lib/.
4.重启Flink集群
该示例展示了如何在Flink SQL客户端中创建MySQL CDC源并在其上执行查询。
-- creates a mysql cdc table source
CREATE TABLE mysql_binlog (
id INT NOT NULL,
name STRING,
description STRING,
weight DECIMAL(10,3)
) WITH (
'connector' = 'mysql-cdc',
'hostname' = 'localhost',
'port' = '3306',
'username' = 'flinkuser',
'password' = 'flinkpw',
'database-name' = 'inventory',
'table-name' = 'products'
);
-- read snapshot and binlog data from mysql, and do some transformation, and show on the client
SELECT id, UPPER(name), description, weight FROM mysql_binlog;
1.33.1.2.DataStream API的用法
包括以下Maven依赖:
<dependency>
<groupId>com.alibaba.ververica</groupId>
<!-- add the dependency matching your database -->
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>1.2.0</version>
</dependency>
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import com.alibaba.ververica.cdc.debezium.StringDebeziumDeserializationSchema;
import com.alibaba.ververica.cdc.connectors.mysql.MySQLSource;
public class MySqlBinlogSourceExample {
public static void main(String[] args) throws Exception {
SourceFunction<String> sourceFunction = MySQLSource.<String>builder()
.hostname("localhost")
.port(3306)
.databaseList("inventory") // monitor all tables under inventory database
.username("flinkuser")
.password("flinkpw")
.deserializer(new StringDebeziumDeserializationSchema()) // converts SourceRecord to String
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env
.addSource(sourceFunction)
.print().setParallelism(1); // use parallelism 1 for sink to keep message ordering
env.execute();
}
}
1.33.1.3.Building from source
git clone https://github.com/ververica/flink-cdc-connectors.git
cd flink-cdc-connectors
mvn clean install -DskipTests
1.33.2.Flink CDC案例
1.33.2.1.pom.xml定义
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.xxxx.flink</groupId>
<artifactId>flink-table-api-sql</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!--maven properties -->
<maven.test.skip>true</maven.test.skip>
<maven.javadoc.skip>true</maven.javadoc.skip>
<!-- compiler settings properties -->
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<commons-lang.version>2.5</commons-lang.version>
<junit.version>4.12</junit.version>
<slf4j.version>1.7.25</slf4j.version>
<fastjson.version>1.2.73</fastjson.version>
<joda-time.version>2.9.4</joda-time.version>
<fileName>issue-handler</fileName>
<flink.version>1.11.1</flink.version>
<scala.version>2.12</scala.version>
<slf4j.version>1.7.25</slf4j.version>
</properties>
<dependencies>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>
<!-- <!– log4j2的包:建议2.6以上 开始–>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.logging.log4j</groupId>-->
<!-- <artifactId>log4j-api</artifactId>-->
<!-- <version>2.8.2</version>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.logging.log4j</groupId>-->
<!-- <artifactId>log4j-core</artifactId>-->
<!-- <version>2.8.2</version>-->
<!-- </dependency>-->
<!-- <!– log4j2的包:建议2.6以上 结束–>-->
<!-- <!– log4j2和slf4j桥接依赖 https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-slf4j-impl –>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.logging.log4j</groupId>-->
<!-- <artifactId>log4j-slf4j-impl</artifactId>-->
<!-- <version>2.8.2</version>-->
<!-- </dependency>-->
<!-- <!– slf4j依赖 https://mvnrepository.com/artifact/org.slf4j/slf4j-api –>-->
<!-- <dependency>-->
<!-- <groupId>org.slf4j</groupId>-->
<!-- <artifactId>slf4j-api</artifactId>-->
<!-- <version>1.7.25</version>-->
<!-- </dependency>-->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>${commons-lang.version}</version>
</dependency>
<!--test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
<version>${junit.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>${joda-time.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.34</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>com.alibaba.ververica</groupId>
<artifactId>flink-format-changelog-json</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-sql-connector-elasticsearch6_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>flink-connector-clickhouse</artifactId>
<version>1.11.0</version>
</dependency>
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.2.4</version>
</dependency>
</dependencies>
<distributionManagement>
<repository>
<id>releases</id>
<layout>default</layout>
<url>http://xxx.xxx.xxx/nexus/content/repositories/releases/</url>
</repository>
<snapshotRepository>
<id>snapshots</id>
<name>snapshots</name>
<url>http://xxx.xxx.xxx/nexus/content/repositories/snapshots/</url>
</snapshotRepository>
</distributionManagement>
<repositories>
<repository>
<id>releases</id>
<layout>default</layout>
<url>http://xxx.xxx.xxx/nexus/content/repositories/releases/</url>
</repository>
<repository>
<id>snapshots</id>
<name>snapshots</name>
<url>http://xxx.xxx.xxx/nexus/content/repositories/snapshots/</url>
<snapshots>
<enabled>true</enabled>
<updatePolicy>always</updatePolicy>
<checksumPolicy>warn</checksumPolicy>
</snapshots>
</repository>
<repository>
<id>xxxx</id>
<name>xxxx</name>
<url>http://xxx.xxx.xxx/nexus/content/repositories/xxxx/</url>
</repository>
<repository>
<id>public</id>
<name>public</name>
<url>http://xxx.xxx.xxx/nexus/content/groups/public/</url>
</repository>
<!-- 新加 -->
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<build>
<finalName>${fileName}</finalName>
<plugins>
<!-- 编译插件 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<encoding>${project.build.sourceEncoding}</encoding>
<compilerVersion>${maven.compiler.source}</compilerVersion>
<showDeprecation>true</showDeprecation>
<showWarnings>true</showWarnings>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.12.4</version>
<configuration>
<skipTests>${maven.test.skip}</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<version>0.12</version>
<configuration>
<excludes>
<exclude>README.md</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<configuration>
<aggregate>true</aggregate>
<reportOutputDirectory>javadocs</reportOutputDirectory>
<locale>en</locale>
</configuration>
</plugin>
<!-- scala编译插件 -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.1.6</version>
<configuration>
<scalaCompatVersion>2.11</scalaCompatVersion>
<scalaVersion>2.11.12</scalaVersion>
<encoding>UTF-8</encoding>
</configuration>
<executions>
<execution>
<id>compile-scala</id>
<phase>compile</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>test-compile-scala</id>
<phase>test-compile</phase>
<goals>
<goal>add-source</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- 打jar包插件(会包含所有依赖) -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.6</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<!-- 可以设置jar包的入口类(可选) -->
<mainClass>${mainClass}</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
1.33.2.2.案例1
package com.xxxx.issue.flink.sql;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class LocalFlinkSql {
public static void main(String[] args) {
//请确保选择与你的编程语言匹配的特定的计划器BatchTableEnvironment/StreamTableEnvironment。
//如果两种计划器的 jar 包都在 classpath 中(默认行为),你应该明确地设置要在当前程序中使用的计划器。
//BLINK BATCH QUERY
// EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
// TableEnvironment tableEnv = TableEnvironment.create(settings);
//BLINK STREAMING QUERY
//getExecutionEnvironment()方法可以根据flink运用程序如何提交判断出是那种模式提交,Local本地提交,Cluster标是standalone提交,Yarn提交好像是YarnCluster
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.createLocalEnvironment();
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
// access flink configuration
Configuration configuration = bsTableEnv.getConfig().getConfiguration();
// set low-level key-value options
// configuration.setString("table.exec.mini-batch.enabled", "true");
// configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
// configuration.setString("table.exec.mini-batch.size", "5000");
configuration.setString("table.dynamic-table-options.enabled", "true");
// 对已注册的表进行 SQL 查询
// 注册名为 “MyUserTable” 的表
bsTableEnv.executeSql(
"CREATE TABLE my_user_role_relation_table (" +
"`id` STRING, " +
"user_id STRING, " +
"role_id STRING) " +
"WITH ("
+ "'connector.type' = 'jdbc',"
+ "'connector.url' = 'jdbc:mysql://xxx.xxx.xxx.xxx:3306/xxxxxxxxxx',"
+ "'connector.table' = 'uc_sys_user_role_relation_copy1',"
+ "'connector.driver' = 'com.mysql.jdbc.Driver',"
+ "'connector.username' = 'xxxxxxx',"
+ "'connector.password' = 'xxxxxxx',"
+ "'connector.read.fetch-size' = '100'" +
")");
bsTableEnv.executeSql(
"CREATE TABLE my_user_table " +
"("
+ " id STRING,"
+ " user_name STRING,"
+ " password STRING,"
+ " proc_time AS PROCTIME() "
+ ") WITH ("
+ " 'connector' = 'mysql-cdc',"
+ " 'hostname' = 'xxx.xxx.xxx.xxx',"
+ " 'port' = '3306',"
+ " 'username' = 'root',"
+ " 'password' = '123456',"
+ " 'database-name' = 'xxxxxx',"
+ " 'table-name' = 'uc_sys_user_copy1'"
+ ")");
bsTableEnv.executeSql(
"CREATE TABLE user_role_relation_data (" +
" id STRING," +
" user_name STRING," +
" password STRING," +
" user_id STRING," +
" role_id STRING," +
" PRIMARY KEY (id,user_name,password,user_id,role_id) NOT ENFORCED" +
") WITH (" +
" 'connector.type' = 'elasticsearch'," +
" 'connector.version' = '6'," +
" 'connector.hosts' = 'http://xxx.xxx.xxx.xxx:9200'," +
" 'connector.index' = 'user_role_relation_data12'," +
" 'connector.document-type' = '_doc'," +
" 'update-mode' = 'upsert'," +
" 'connector.flush-on-checkpoint' = 'true'," +
" 'connector.bulk-flush.max-actions' = '2'," +
" 'connector.bulk-flush.interval' = '3000'," +
" 'connector.bulk-flush.backoff.max-retries' = '3'," +
" 'connector.bulk-flush.backoff.delay' = '3000', " +
" 'format.type' = 'json'" +
")");
//执行并返回结果
// try {
// // execute SELECT statement
// TableResult tableResult1 = bsTableEnv.executeSql("SELECT * FROM my_user_table");
// // use try-with-resources statement to make sure the iterator will be closed automatically
// try (CloseableIterator<Row> it = tableResult1.collect()) {
// while(it.hasNext()) {
// Row row = it.next();
// // handle row
// System.out.println(row.toString());
// }
// }
// }
// catch (Exception e){
// e.printStackTrace();
// }
// // 执行并返回结果
// TableResult tableResult2 = bsTableEnv.sqlQuery("SELECT * FROM my_user_table").execute();
// tableResult2.print();
//将表转换成 DataStream 或 DataSet
System.out.println("------------------------------------");
try {
// Table table1 = bsTableEnv.from("my_user_table");
// //请注意:sqlQuery并没有执行
// Table table = bsTableEnv.sqlQuery(
// "SELECT id, user_name, password FROM MyUserTable ");
// DataStream<Row> dsRow = bsTableEnv.toAppendStream(table, Row.class);
// try (CloseableIterator<Row> it = dsRow.executeAndCollect()) {
// while(it.hasNext()) {
// Row row = it.next();
// // handle row
// System.out.println(row.toString());
// }
// }
// // convert the Table into an append DataStream of Tuple2<String, Integer>
// // via a TypeInformation
// TupleTypeInfo<Tuple3<String, String, String>> tupleType = new TupleTypeInfo<>(
// Types.STRING(),
// Types.STRING(),
// Types.STRING());
// DataStream<Tuple3<String, String, String>> dsTuple =
// bsTableEnv.toAppendStream(table1, tupleType);
// try (CloseableIterator<Tuple3<String, String, String>> it = dsTuple.executeAndCollect()) {
// while(it.hasNext()) {
// Tuple3<String, String, String> tuple3 = it.next();
// // handle row
// System.out.println(tuple3.f0+","+tuple3.f1+","+tuple3.f2);
// }
// }
// convert the Table into a retract DataStream of Row.
// A retract stream of type X is a DataStream<Tuple2<Boolean, X>>.
// The boolean field indicates the type of the change.
// True is INSERT, false is DELETE.
// DataStream<Tuple2<Boolean, Row>> retractStream =
// bsTableEnv.toRetractStream(table1, Row.class);
// try (CloseableIterator<Tuple2<Boolean, Row>> it = retractStream.executeAndCollect()) {
// while(it.hasNext()) {
// Tuple2<Boolean, Row> tuple3 = it.next();
// // handle row
// Row row = tuple3.f1;
// String rowStr = row.getField(0).toString()+","+row.getField(1).toString()+","+row.getField(2).toString();
// System.out.println((tuple3.f0?"INSERT":"DELETE")+","+rowStr);
// }
// }
// Table my_user_tableTable1 = bsTableEnv.from("my_user_role_relation_table");
// DataStream<Tuple2<Boolean, Row>> my_user_tableRetractStream =
// bsTableEnv.toRetractStream(my_user_tableTable1, Row.class);
// try (CloseableIterator<Tuple2<Boolean, Row>> it = my_user_tableRetractStream.executeAndCollect()) {
// while(it.hasNext()) {
// Tuple2<Boolean, Row> tuple3 = it.next();
// // handle row
// Row row = tuple3.f1;
// String rowStr = row.getField(0).toString()+","+row.getField(1).toString()+","+row.getField(2).toString();
// System.out.println((tuple3.f0?"INSERT":"DELETE")+","+rowStr);
// }
// }
// catch (Exception e){
// e.printStackTrace();
// }
//基于字段映射或基于位置映射
// Table table = bsTableEnv.fromDataStream(retractStream, $("myLong"));
// Table table = bsTableEnv.fromDataStream(retractStream, $("f1"));
}
catch (Exception e){
e.printStackTrace();
}
//插入数据到ES
try {
//execute SELECT statement
// TableResult tableResult1 = bsTableEnv.executeSql("select b.id as id, b.user_name as user_name, b.password as password, s.user_id as user_id, s.role_id as role_id " +
// "from my_user_table /*+ OPTIONS('server-id'='123456') */ as b " +
// "join my_user_role_relation_table for system_time as of b.proc_time s on b.id = s.user_id " +
// "group by b.id,b.user_name,b.password,s.user_id,s.role_id ");
// // use try-with-resources statement to make sure the iterator will be closed automatically
// try (CloseableIterator<Row> it = tableResult1.collect()) {
// while(it.hasNext()) {
// Row row = it.next();
// // handle row
// System.out.println(row.toString());
// }
// }
// TableResult tableResult3 = bsTableEnv.executeSql("insert into user_role_relation_data" +
// " select id, user_name, password, user_id, role_id from (" +
// " select '1' as id, '1' as user_name, '1' as password, '1' as user_id, '1' as role_id " +
// ") temple" +
// " group by id,user_name,password,user_id,role_id ");
TableResult tableResult3 = bsTableEnv.executeSql(
"insert into user_role_relation_data " +
"select " +
"b.id as id, " +
"b.user_name as user_name, " +
"b.password as password, " +
"s.user_id as user_id, " +
"s.role_id as role_id " +
"from my_user_table /*+ OPTIONS('server-id'='123456') */ as b " +
"join my_user_role_relation_table for system_time as of b.proc_time s on b.id = s.user_id " +
"group by b.id,b.user_name,b.password,s.user_id,s.role_id");
// Table table3 = bsTableEnv.sqlQuery("select b.id as id, b.user_name as user_name, b.password as password, s.user_id as user_id, s.role_id as role_id " +
// "from my_user_table /*+ OPTIONS('server-id'='260') */ as b " +
// "join my_user_role_relation_table for system_time as of b.proc_time s on b.id = s.user_id " +
// "group by b.id,b.user_name,b.password,s.user_id,s.role_id ");
// TableResult tableResult3 = table3.executeInsert("user_role_relation_data ");
}
catch (Exception e){
e.printStackTrace();
}
// //插入数据到clickhouse
// bsTableEnv.executeSql("CREATE TABLE sink_table1 (" +
// "\tid VARCHAR," +
// " name VARCHAR," +
// " phone VARCHAR," +
// " PRIMARY KEY (id) NOT ENFORCED" +
// ") WITH (" +
// " 'connector' = 'clickhouse'," +
// " 'url' = 'clickhouse://xxx.xxx.xxx.xxx:8123'," +
// " 'username' = 'default'," +
// " 'password' = '123456'," +
// " 'database-name' = 'test'," +
// " 'table-name' = 'd_sink_table'," +
// " 'sink.batch-size' = '1000'," +
// " 'sink.flush-interval' = '0'," +
// " 'sink.max-retries' = '3'," +
// " 'sink.partition-strategy' = 'hash'," +
// " 'sink.partition-key' = 'name'," +
// " 'sink.ignore-delete' = 'true'" +
// ")");
//
// try {
// // execute SELECT statement
// //TableResult tableResult1 = bsTableEnv.executeSql("insert into sink_table select '2' as id, '2' as name,'2' as phone");
// //execute SELECT statement
// TableResult tableResult1 = bsTableEnv.executeSql("insert into sink_table1 select id as id, user_name as name, password as phone " +
// "from my_user_table /*+ OPTIONS('server-id'='123457') */");
// }
// catch (Exception e){
// e.printStackTrace();
// }
}
}
1.33.2.3.案例2
package com.xxxx.issue.flink.sql;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class MysqlCdcDataTest {
public static void main(String[] args) {
System.out.println("--------------------------------------------");
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
bsTableEnv.executeSql("CREATE TABLE my_user_table (" +
" id STRING," +
" user_name STRING," +
" password STRING," +
" proc_time AS PROCTIME() " +
") WITH (" +
" 'connector' = 'mysql-cdc'," +
" 'hostname' = 'xxx.xxx.xxx.xxx'," +
" 'port' = '3306'," +
" 'username' = 'root'," +
" 'password' = 'Hy295723'," +
" 'database-name' = 'xxxxxxxxxx'," +
" 'table-name' = 'uc_sys_user_copy1'," +
" 'server-id' = '5500'," +
" 'debezium.snapshot.mode' = 'schema_only'" +
")");
// Table my_user_tableTable1 = bsTableEnv.from("my_user_table");
// DataStream<Tuple2<Boolean, Row>> my_user_tableRetractStream =
// bsTableEnv.toRetractStream(my_user_tableTable1, Row.class);
// try (CloseableIterator<Tuple2<Boolean, Row>> it = my_user_tableRetractStream.executeAndCollect()) {
// while(it.hasNext()) {
// Tuple2<Boolean, Row> tuple3 = it.next();
// // handle row
// Row row = tuple3.f1;
// String rowStr = row.getField(0).toString()+","+row.getField(1).toString()+","+row.getField(2).toString();
// System.out.println((tuple3.f0?"INSERT":"DELETE")+","+rowStr);
// }
// }
// catch (Exception e){
// e.printStackTrace();
// }
}
}
1.33.2.4.案例3
package com.xxxx.issue.flink.sql;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class RemoteFlinkSql {
public static void main(String[] args) {
//请确保选择与你的编程语言匹配的特定的计划器BatchTableEnvironment/StreamTableEnvironment。
//如果两种计划器的 jar 包都在 classpath 中(默认行为),你应该明确地设置要在当前程序中使用的计划器。
//BLINK BATCH QUERY
// EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
// TableEnvironment tableEnv = TableEnvironment.create(settings);
//BLINK STREAMING QUERY
//getExecutionEnvironment()方法可以根据flink运用程序如何提交判断出是那种模式提交,Local本地提交,Cluster标是standalone提交,Yarn提交好像是YarnCluster
//Configuration configuration = new Configuration();
//configuration.setString("table.dynamic-table-options.enabled", "true");
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.createRemoteEnvironment("xxx.xxx.xxx.xxx",8081,
"F:\\xxxx\\used\\flink-connector-clickhouse-1.11.0.jar",
"F:\\xxxx\\used\\flink-connector-jdbc_2.11-1.11.1.jar",
"F:\\xxxx\\used\\flink-format-changelog-json-1.1.1.jar",
"F:\\xxxx\\used\\flink-sql-connector-elasticsearch6_2.11-1.11.1.jar",
"F:\\xxxx\\used\\flink-sql-connector-mysql-cdc-1.1.1.jar",
"F:\\xxxx\\used\\flink-table-common-1.11.1.jar",
"F:\\xxxx\\used\\mysql-connector-java-5.1.34.jar");
设置模式为exactly-once (这是默认值)
//env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
// set low-level key-value options
// configuration.setString("table.exec.mini-batch.enabled", "true");
// configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
// configuration.setString("table.exec.mini-batch.size", "5000");
Configuration configuration = bsTableEnv.getConfig().getConfiguration();
configuration.setString("table.dynamic-table-options.enabled", "true");
// 对已注册的表进行 SQL 查询
// 注册名为 “MyUserTable”的表
bsTableEnv.executeSql(
"CREATE TABLE my_user_role_relation_table "
+ "(`id` STRING, user_id STRING, role_id STRING) "
+ " WITH ("
+ "'connector.type' = 'jdbc',"
+ "'connector.url' = 'jdbc:mysql://xxx.xxx.xxx.xxx:3306/xxxxxxxxxx',"
+ "'connector.table' = 'uc_sys_user_role_relation_copy1',"
+ "'connector.driver' = 'com.mysql.jdbc.Driver',"
+ "'connector.username' = 'xxxxxxxxx',"
+ "'connector.password' = 'xxxxxxxxx@123',"
+ "'connector.read.fetch-size' = '100'"
+")");
bsTableEnv.executeSql(
"CREATE TABLE my_user_table "
+ "("
+ " id STRING,"
+ " user_name STRING,"
+ " password STRING,"
+ " proc_time AS PROCTIME() "
+ ") WITH ( "
+ " 'connector' = 'mysql-cdc',"
+ " 'hostname' = 'xxx.xxx.xxx.xxx',"
+ " 'port' = '3306',"
+ " 'username' = 'root',n"
+ " 'password' = '123456',"
+ " 'database-name' = 'xxxxxxxxxx',"
+ " 'table-name' = 'uc_sys_user_copy1' "
+ ")");
bsTableEnv.executeSql("CREATE TABLE user_role_relation_data (" +
" id STRING," +
" user_name STRING," +
" password STRING," +
" user_id STRING," +
" role_id STRING," +
" PRIMARY KEY (id,user_name,password,user_id,role_id) NOT ENFORCED" +
") WITH (" +
" 'connector.type' = 'elasticsearch'," +
" 'connector.version' = '6'," +
" 'connector.hosts' = 'http://xxx.xxx.xxx.xxx:9200'," +
" 'connector.index' = 'user_role_relation_data11'," +
" 'connector.document-type' = '_doc'," +
" 'update-mode' = 'upsert'," +
" 'connector.flush-on-checkpoint' = 'true'," +
" 'connector.bulk-flush.max-actions' = '2'," +
" 'connector.bulk-flush.interval' = '3000'," +
" 'connector.bulk-flush.backoff.max-retries' = '3'," +
" 'connector.bulk-flush.backoff.delay' = '3000'," +
" 'format.type' = 'json'" +
")");
//插入数据到ES
// try {
// //execute SELECT statement
TableResult tableResult1 = bsTableEnv.executeSql("select b.id as id, b.user_name as user_name, b.password as password, s.user_id as user_id, s.role_id as role_id " +
"from my_user_table /*+ OPTIONS('server-id'='200') */ as b" +
"join my_user_role_relation_table for system_time as of b.proc_time s on b.id = s.user_id " +
"group by b.id,b.user_name,b.password,s.user_id,s.role_id ");
// use try-with-resources statement to make sure the iterator will be closed automatically
try (CloseableIterator<Row> it = tableResult1.collect()) {
while(it.hasNext()) {
Row row = it.next();
// handle row
System.out.println(row.toString());
}
}
TableResult tableResult3 = bsTableEnv.executeSql("insert into user_role_relation_data" +
" select id, user_name, password, user_id, role_id from (" +
" select '1' as id, '1' as user_name, '1' as password, '1' as user_id, '1' as role_id " +
") temple " +
" group by id,user_name,password,user_id,role_id ");
// TableResult tableResult3 = bsTableEnv.executeSql("insert into user_role_relation_data " +
// "select b.id as id, b.user_name as user_name, b.password as password, s.user_id as user_id, s.role_id as role_id " +
// "from my_user_table as b " +
// "join my_user_role_relation_table for system_time as of b.proc_time s on b.id = s.user_id" +
// "group by b.id,b.user_name,b.password,s.user_id,s.role_id");
Table table3 = bsTableEnv.sqlQuery("select b.id as id, b.user_name as user_name, b.password as password, s.user_id as user_id, s.role_id as role_id" +
"from my_user_table /*+ OPTIONS('server-id'='123456') */ as b" +
"join my_user_role_relation_table for system_time as of b.proc_time s on b.id = s.user_id" +
"group by b.id,b.user_name,b.password,s.user_id,s.role_id");
TableResult tableResult3 = table3.executeInsert("user_role_relation_data");
// }
// catch (Exception e){
// e.printStackTrace();
// }
//插入数据到clickhouse
bsTableEnv.executeSql("CREATE TABLE sink_table1 (" +
" id VARCHAR," +
" name VARCHAR," +
" phone VARCHAR," +
" PRIMARY KEY (id) NOT ENFORCED" +
") WITH (" +
" 'connector' = 'clickhouse'," +
" 'url' = 'clickhouse://xxx.xxx.xxx.xxx:8123'," +
" 'username' = 'default'," +
" 'password' = ''," +
" 'database-name' = 'default'," +
" 'table-name' = 'd_sink_table'," +
" 'sink.batch-size' = '1000'," +
" 'sink.flush-interval' = '0'," +
" 'sink.max-retries' = '3'," +
" 'sink.partition-strategy' = 'hash'," +
" 'sink.partition-key' = 'name'," +
" 'sink.ignore-delete' = 'true'" +
")");
try {
// execute SELECT statement
//TableResult tableResult1 = bsTableEnv.executeSql("insert into sink_table select '2' as id, '2' as name,'2' as phone");
//execute SELECT statement
TableResult tableResult1 = bsTableEnv.executeSql(
"insert into sink_table1 " +
"select id as id, user_name as name, password as phone " +
"from my_user_table /*+ OPTIONS('server-id'='123457') */ ");
} catch (Exception e) {
e.printStackTrace();
}
}
}